opencode-semantic-memory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opencode_memory/__init__.py +3 -0
- opencode_memory/cache.py +261 -0
- opencode_memory/cli.py +794 -0
- opencode_memory/config.py +89 -0
- opencode_memory/daemon.py +879 -0
- opencode_memory/enrichment/__init__.py +0 -0
- opencode_memory/enrichment/gitlab.py +237 -0
- opencode_memory/extraction.py +225 -0
- opencode_memory/historical_ingest.py +142 -0
- opencode_memory/http_server.py +464 -0
- opencode_memory/ingestion/__init__.py +7 -0
- opencode_memory/ingestion/embeddings.py +211 -0
- opencode_memory/ingestion/extractors.py +287 -0
- opencode_memory/ingestion/opencode_db.py +448 -0
- opencode_memory/ingestion/parser.py +344 -0
- opencode_memory/ingestion/watcher.py +88 -0
- opencode_memory/linking/__init__.py +5 -0
- opencode_memory/linking/linker.py +323 -0
- opencode_memory/metrics.py +273 -0
- opencode_memory/models.py +171 -0
- opencode_memory/project.py +86 -0
- opencode_memory/query/__init__.py +5 -0
- opencode_memory/query/hybrid.py +196 -0
- opencode_memory/server.py +2795 -0
- opencode_memory/session/__init__.py +5 -0
- opencode_memory/session/registry.py +57 -0
- opencode_memory/storage/__init__.py +6 -0
- opencode_memory/storage/sqlite.py +1608 -0
- opencode_memory/storage/vectors.py +199 -0
- opencode_semantic_memory-0.1.0.dist-info/METADATA +531 -0
- opencode_semantic_memory-0.1.0.dist-info/RECORD +33 -0
- opencode_semantic_memory-0.1.0.dist-info/WHEEL +4 -0
- opencode_semantic_memory-0.1.0.dist-info/entry_points.txt +3 -0
opencode_memory/cache.py
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""LRU cache for memories with background prefetching of linked memories."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
import threading
|
|
6
|
+
import time
|
|
7
|
+
from collections import OrderedDict
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from opencode_memory.models import Memory
|
|
13
|
+
from opencode_memory.storage.sqlite import SQLiteStorage
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
# Cache configuration
|
|
18
|
+
DEFAULT_MAX_SIZE = 50000 # Max memories to cache (~100MB at 2KB avg)
|
|
19
|
+
DEFAULT_TTL_SECONDS = 86400 # 24 hours TTL (effectively no expiry for active use)
|
|
20
|
+
PREFETCH_BATCH_SIZE = 20 # Max linked memories to prefetch at once
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class CacheEntry:
|
|
25
|
+
"""A cached memory with metadata."""
|
|
26
|
+
|
|
27
|
+
memory: "Memory"
|
|
28
|
+
cached_at: float = field(default_factory=time.time)
|
|
29
|
+
access_count: int = 0
|
|
30
|
+
last_accessed: float = field(default_factory=time.time)
|
|
31
|
+
|
|
32
|
+
def is_expired(self, ttl_seconds: float) -> bool:
|
|
33
|
+
"""Check if this entry has expired."""
|
|
34
|
+
return (time.time() - self.cached_at) > ttl_seconds
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class MemoryCache:
|
|
38
|
+
"""Thread-safe LRU cache for Memory objects with TTL and prefetching.
|
|
39
|
+
|
|
40
|
+
Features:
|
|
41
|
+
- LRU eviction when max size reached
|
|
42
|
+
- TTL-based expiration
|
|
43
|
+
- Background prefetching of linked memories
|
|
44
|
+
- Cache invalidation on memory updates/deletes
|
|
45
|
+
- Thread-safe operations
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
storage: "SQLiteStorage",
|
|
51
|
+
max_size: int = DEFAULT_MAX_SIZE,
|
|
52
|
+
ttl_seconds: float = DEFAULT_TTL_SECONDS,
|
|
53
|
+
):
|
|
54
|
+
self.storage = storage
|
|
55
|
+
self.max_size = max_size
|
|
56
|
+
self.ttl_seconds = ttl_seconds
|
|
57
|
+
|
|
58
|
+
# OrderedDict for LRU behavior (most recently used at end)
|
|
59
|
+
self._cache: OrderedDict[int, CacheEntry] = OrderedDict()
|
|
60
|
+
self._lock = threading.RLock()
|
|
61
|
+
|
|
62
|
+
# Track pending prefetch tasks to avoid duplicates
|
|
63
|
+
self._pending_prefetch: set[int] = set()
|
|
64
|
+
self._prefetch_lock = threading.Lock()
|
|
65
|
+
|
|
66
|
+
# Stats
|
|
67
|
+
self._hits = 0
|
|
68
|
+
self._misses = 0
|
|
69
|
+
|
|
70
|
+
def get(self, memory_id: int) -> "Memory | None":
|
|
71
|
+
"""Get a memory from cache, returning None if not cached or expired."""
|
|
72
|
+
with self._lock:
|
|
73
|
+
entry = self._cache.get(memory_id)
|
|
74
|
+
if entry is None:
|
|
75
|
+
self._misses += 1
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
if entry.is_expired(self.ttl_seconds):
|
|
79
|
+
# Remove expired entry
|
|
80
|
+
del self._cache[memory_id]
|
|
81
|
+
self._misses += 1
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
# Update access metadata and move to end (most recently used)
|
|
85
|
+
entry.access_count += 1
|
|
86
|
+
entry.last_accessed = time.time()
|
|
87
|
+
self._cache.move_to_end(memory_id)
|
|
88
|
+
self._hits += 1
|
|
89
|
+
return entry.memory
|
|
90
|
+
|
|
91
|
+
def get_many(self, memory_ids: list[int]) -> dict[int, "Memory"]:
|
|
92
|
+
"""Get multiple memories from cache. Returns dict of found entries."""
|
|
93
|
+
result = {}
|
|
94
|
+
with self._lock:
|
|
95
|
+
for memory_id in memory_ids:
|
|
96
|
+
entry = self._cache.get(memory_id)
|
|
97
|
+
if entry and not entry.is_expired(self.ttl_seconds):
|
|
98
|
+
entry.access_count += 1
|
|
99
|
+
entry.last_accessed = time.time()
|
|
100
|
+
self._cache.move_to_end(memory_id)
|
|
101
|
+
result[memory_id] = entry.memory
|
|
102
|
+
self._hits += 1
|
|
103
|
+
else:
|
|
104
|
+
if entry:
|
|
105
|
+
del self._cache[memory_id]
|
|
106
|
+
self._misses += 1
|
|
107
|
+
return result
|
|
108
|
+
|
|
109
|
+
def put(self, memory: "Memory") -> None:
|
|
110
|
+
"""Add a memory to the cache."""
|
|
111
|
+
if memory.id is None:
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
with self._lock:
|
|
115
|
+
# If already cached, update and move to end
|
|
116
|
+
if memory.id in self._cache:
|
|
117
|
+
self._cache[memory.id] = CacheEntry(memory=memory)
|
|
118
|
+
self._cache.move_to_end(memory.id)
|
|
119
|
+
else:
|
|
120
|
+
# Evict oldest if at capacity
|
|
121
|
+
while len(self._cache) >= self.max_size:
|
|
122
|
+
self._cache.popitem(last=False) # Remove oldest
|
|
123
|
+
|
|
124
|
+
self._cache[memory.id] = CacheEntry(memory=memory)
|
|
125
|
+
|
|
126
|
+
def put_many(self, memories: list["Memory"]) -> None:
|
|
127
|
+
"""Add multiple memories to the cache."""
|
|
128
|
+
with self._lock:
|
|
129
|
+
for memory in memories:
|
|
130
|
+
if memory.id is not None:
|
|
131
|
+
if memory.id in self._cache:
|
|
132
|
+
self._cache[memory.id] = CacheEntry(memory=memory)
|
|
133
|
+
self._cache.move_to_end(memory.id)
|
|
134
|
+
else:
|
|
135
|
+
while len(self._cache) >= self.max_size:
|
|
136
|
+
self._cache.popitem(last=False)
|
|
137
|
+
self._cache[memory.id] = CacheEntry(memory=memory)
|
|
138
|
+
|
|
139
|
+
def invalidate(self, memory_id: int) -> bool:
|
|
140
|
+
"""Remove a memory from cache. Returns True if it was cached."""
|
|
141
|
+
with self._lock:
|
|
142
|
+
if memory_id in self._cache:
|
|
143
|
+
del self._cache[memory_id]
|
|
144
|
+
return True
|
|
145
|
+
return False
|
|
146
|
+
|
|
147
|
+
def invalidate_many(self, memory_ids: list[int]) -> int:
|
|
148
|
+
"""Remove multiple memories from cache. Returns count removed."""
|
|
149
|
+
removed = 0
|
|
150
|
+
with self._lock:
|
|
151
|
+
for memory_id in memory_ids:
|
|
152
|
+
if memory_id in self._cache:
|
|
153
|
+
del self._cache[memory_id]
|
|
154
|
+
removed += 1
|
|
155
|
+
return removed
|
|
156
|
+
|
|
157
|
+
def clear(self) -> int:
|
|
158
|
+
"""Clear all cached entries. Returns count cleared."""
|
|
159
|
+
with self._lock:
|
|
160
|
+
count = len(self._cache)
|
|
161
|
+
self._cache.clear()
|
|
162
|
+
return count
|
|
163
|
+
|
|
164
|
+
def prefetch_linked(self, memory_id: int) -> None:
|
|
165
|
+
"""Schedule background prefetch of memories linked to this one.
|
|
166
|
+
|
|
167
|
+
This is fire-and-forget - errors are logged but not raised.
|
|
168
|
+
Duplicate prefetch requests are ignored.
|
|
169
|
+
"""
|
|
170
|
+
with self._prefetch_lock:
|
|
171
|
+
if memory_id in self._pending_prefetch:
|
|
172
|
+
return
|
|
173
|
+
self._pending_prefetch.add(memory_id)
|
|
174
|
+
|
|
175
|
+
# Run prefetch in background thread
|
|
176
|
+
def do_prefetch():
|
|
177
|
+
try:
|
|
178
|
+
self._do_prefetch_sync(memory_id)
|
|
179
|
+
finally:
|
|
180
|
+
with self._prefetch_lock:
|
|
181
|
+
self._pending_prefetch.discard(memory_id)
|
|
182
|
+
|
|
183
|
+
thread = threading.Thread(target=do_prefetch, daemon=True)
|
|
184
|
+
thread.start()
|
|
185
|
+
|
|
186
|
+
async def prefetch_linked_async(self, memory_id: int) -> None:
|
|
187
|
+
"""Async version of prefetch_linked for use in async contexts."""
|
|
188
|
+
with self._prefetch_lock:
|
|
189
|
+
if memory_id in self._pending_prefetch:
|
|
190
|
+
return
|
|
191
|
+
self._pending_prefetch.add(memory_id)
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
# Run sync DB operations in thread pool
|
|
195
|
+
loop = asyncio.get_event_loop()
|
|
196
|
+
await loop.run_in_executor(None, self._do_prefetch_sync, memory_id)
|
|
197
|
+
finally:
|
|
198
|
+
with self._prefetch_lock:
|
|
199
|
+
self._pending_prefetch.discard(memory_id)
|
|
200
|
+
|
|
201
|
+
def _do_prefetch_sync(self, memory_id: int) -> None:
|
|
202
|
+
"""Synchronously prefetch linked memories."""
|
|
203
|
+
try:
|
|
204
|
+
# Get linked memory IDs
|
|
205
|
+
linked_ids = self.storage.get_linked_memory_ids(memory_id)
|
|
206
|
+
if not linked_ids:
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
# Filter out already cached
|
|
210
|
+
with self._lock:
|
|
211
|
+
uncached_ids = [
|
|
212
|
+
mid for mid in linked_ids[:PREFETCH_BATCH_SIZE] if mid not in self._cache
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
if not uncached_ids:
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
# Batch fetch from database
|
|
219
|
+
memories_map = self.storage.get_memories_by_ids(uncached_ids)
|
|
220
|
+
|
|
221
|
+
# Cache the fetched memories
|
|
222
|
+
self.put_many(list(memories_map.values()))
|
|
223
|
+
|
|
224
|
+
logger.debug(f"Prefetched {len(memories_map)} linked memories for memory {memory_id}")
|
|
225
|
+
except Exception as e:
|
|
226
|
+
logger.warning(f"Failed to prefetch linked memories for {memory_id}: {e}")
|
|
227
|
+
|
|
228
|
+
def get_stats(self) -> dict:
|
|
229
|
+
"""Get cache statistics."""
|
|
230
|
+
with self._lock:
|
|
231
|
+
total_requests = self._hits + self._misses
|
|
232
|
+
hit_rate = self._hits / total_requests if total_requests > 0 else 0.0
|
|
233
|
+
|
|
234
|
+
# Count expired entries without removing them
|
|
235
|
+
now = time.time()
|
|
236
|
+
expired_count = sum(
|
|
237
|
+
1 for entry in self._cache.values() if entry.is_expired(self.ttl_seconds)
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
"size": len(self._cache),
|
|
242
|
+
"max_size": self.max_size,
|
|
243
|
+
"hits": self._hits,
|
|
244
|
+
"misses": self._misses,
|
|
245
|
+
"hit_rate": round(hit_rate, 3),
|
|
246
|
+
"expired_entries": expired_count,
|
|
247
|
+
"pending_prefetch": len(self._pending_prefetch),
|
|
248
|
+
"ttl_seconds": self.ttl_seconds,
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
def cleanup_expired(self) -> int:
|
|
252
|
+
"""Remove all expired entries. Returns count removed."""
|
|
253
|
+
removed = 0
|
|
254
|
+
with self._lock:
|
|
255
|
+
expired_ids = [
|
|
256
|
+
mid for mid, entry in self._cache.items() if entry.is_expired(self.ttl_seconds)
|
|
257
|
+
]
|
|
258
|
+
for mid in expired_ids:
|
|
259
|
+
del self._cache[mid]
|
|
260
|
+
removed += 1
|
|
261
|
+
return removed
|