opencode-semantic-memory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opencode_memory/__init__.py +3 -0
- opencode_memory/cache.py +261 -0
- opencode_memory/cli.py +794 -0
- opencode_memory/config.py +89 -0
- opencode_memory/daemon.py +879 -0
- opencode_memory/enrichment/__init__.py +0 -0
- opencode_memory/enrichment/gitlab.py +237 -0
- opencode_memory/extraction.py +225 -0
- opencode_memory/historical_ingest.py +142 -0
- opencode_memory/http_server.py +464 -0
- opencode_memory/ingestion/__init__.py +7 -0
- opencode_memory/ingestion/embeddings.py +211 -0
- opencode_memory/ingestion/extractors.py +287 -0
- opencode_memory/ingestion/opencode_db.py +448 -0
- opencode_memory/ingestion/parser.py +344 -0
- opencode_memory/ingestion/watcher.py +88 -0
- opencode_memory/linking/__init__.py +5 -0
- opencode_memory/linking/linker.py +323 -0
- opencode_memory/metrics.py +273 -0
- opencode_memory/models.py +171 -0
- opencode_memory/project.py +86 -0
- opencode_memory/query/__init__.py +5 -0
- opencode_memory/query/hybrid.py +196 -0
- opencode_memory/server.py +2795 -0
- opencode_memory/session/__init__.py +5 -0
- opencode_memory/session/registry.py +57 -0
- opencode_memory/storage/__init__.py +6 -0
- opencode_memory/storage/sqlite.py +1608 -0
- opencode_memory/storage/vectors.py +199 -0
- opencode_semantic_memory-0.1.0.dist-info/METADATA +531 -0
- opencode_semantic_memory-0.1.0.dist-info/RECORD +33 -0
- opencode_semantic_memory-0.1.0.dist-info/WHEEL +4 -0
- opencode_semantic_memory-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
"""HTTP server for opencode-memory with streamable HTTP transport.
|
|
2
|
+
|
|
3
|
+
This allows multiple OpenCode sessions to share a single memory server,
|
|
4
|
+
avoiding the overhead of loading the embedding model multiple times.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
# Start the server (once, e.g., via systemd user service)
|
|
8
|
+
python -m opencode_memory.http_server
|
|
9
|
+
|
|
10
|
+
# Configure OpenCode to connect via remote MCP
|
|
11
|
+
# In opencode.json:
|
|
12
|
+
# {
|
|
13
|
+
# "mcp": {
|
|
14
|
+
# "memory": {
|
|
15
|
+
# "type": "remote",
|
|
16
|
+
# "url": "http://localhost:9824/mcp"
|
|
17
|
+
# }
|
|
18
|
+
# }
|
|
19
|
+
# }
|
|
20
|
+
|
|
21
|
+
Environment variables:
|
|
22
|
+
OPENCODE_MEMORY_HOST: Host to bind to (default: 127.0.0.1)
|
|
23
|
+
OPENCODE_MEMORY_PORT: Port to bind to (default: 9824)
|
|
24
|
+
OPENCODE_MEMORY_API_KEY: Optional API key for authentication
|
|
25
|
+
OPENCODE_MEMORY_RATE_LIMIT: Requests per minute per client (default: 60)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import asyncio
|
|
29
|
+
import logging
|
|
30
|
+
import os
|
|
31
|
+
import secrets
|
|
32
|
+
import sys
|
|
33
|
+
import time
|
|
34
|
+
from collections import defaultdict
|
|
35
|
+
from contextlib import asynccontextmanager
|
|
36
|
+
from typing import Any
|
|
37
|
+
|
|
38
|
+
from mcp.server.streamable_http import StreamableHTTPServerTransport
|
|
39
|
+
|
|
40
|
+
from opencode_memory import metrics
|
|
41
|
+
from opencode_memory.daemon import background_throttle
|
|
42
|
+
from opencode_memory.server import MemoryServer
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
import uvicorn
|
|
46
|
+
from starlette.applications import Starlette
|
|
47
|
+
from starlette.responses import JSONResponse
|
|
48
|
+
except ImportError:
|
|
49
|
+
print("HTTP server requires additional dependencies:")
|
|
50
|
+
print(" pip install starlette uvicorn")
|
|
51
|
+
sys.exit(1)
|
|
52
|
+
|
|
53
|
+
logging.basicConfig(
|
|
54
|
+
level=logging.INFO,
|
|
55
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
56
|
+
)
|
|
57
|
+
logger = logging.getLogger(__name__)
|
|
58
|
+
|
|
59
|
+
DEFAULT_HOST = os.environ.get("OPENCODE_MEMORY_HOST", "127.0.0.1")
|
|
60
|
+
DEFAULT_PORT = int(os.environ.get("OPENCODE_MEMORY_PORT", "9824"))
|
|
61
|
+
API_KEY = os.environ.get("OPENCODE_MEMORY_API_KEY") # Optional auth
|
|
62
|
+
RATE_LIMIT = int(os.environ.get("OPENCODE_MEMORY_RATE_LIMIT", "60")) # per minute
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class RateLimiter:
|
|
66
|
+
"""Simple in-memory rate limiter using sliding window."""
|
|
67
|
+
|
|
68
|
+
def __init__(self, requests_per_minute: int = 60):
|
|
69
|
+
self.requests_per_minute = requests_per_minute
|
|
70
|
+
self.window_seconds = 60
|
|
71
|
+
self._requests: dict[str, list[float]] = defaultdict(list)
|
|
72
|
+
|
|
73
|
+
def _get_client_id(self, scope: dict) -> str:
|
|
74
|
+
"""Get client identifier from request."""
|
|
75
|
+
# Use X-Forwarded-For if behind proxy, otherwise use client IP
|
|
76
|
+
headers = dict(scope.get("headers", []))
|
|
77
|
+
forwarded = headers.get(b"x-forwarded-for", b"").decode()
|
|
78
|
+
if forwarded:
|
|
79
|
+
client_ip = forwarded.split(",")[0].strip()
|
|
80
|
+
else:
|
|
81
|
+
client = scope.get("client", ("unknown", 0))
|
|
82
|
+
client_ip = client[0] if client else "unknown"
|
|
83
|
+
return client_ip
|
|
84
|
+
|
|
85
|
+
def is_allowed(self, scope: dict) -> tuple[bool, int]:
|
|
86
|
+
"""Check if request is allowed. Returns (allowed, retry_after_seconds)."""
|
|
87
|
+
client_id = self._get_client_id(scope)
|
|
88
|
+
now = time.time()
|
|
89
|
+
window_start = now - self.window_seconds
|
|
90
|
+
|
|
91
|
+
# Clean old requests
|
|
92
|
+
self._requests[client_id] = [ts for ts in self._requests[client_id] if ts > window_start]
|
|
93
|
+
|
|
94
|
+
# Check limit
|
|
95
|
+
if len(self._requests[client_id]) >= self.requests_per_minute:
|
|
96
|
+
# Calculate retry-after
|
|
97
|
+
oldest = min(self._requests[client_id])
|
|
98
|
+
retry_after = int(oldest + self.window_seconds - now) + 1
|
|
99
|
+
return False, retry_after
|
|
100
|
+
|
|
101
|
+
# Record request
|
|
102
|
+
self._requests[client_id].append(now)
|
|
103
|
+
return True, 0
|
|
104
|
+
|
|
105
|
+
def cleanup(self) -> None:
|
|
106
|
+
"""Remove stale entries (call periodically)."""
|
|
107
|
+
now = time.time()
|
|
108
|
+
window_start = now - self.window_seconds
|
|
109
|
+
stale_clients = [
|
|
110
|
+
c for c, reqs in self._requests.items() if all(ts < window_start for ts in reqs)
|
|
111
|
+
]
|
|
112
|
+
for client in stale_clients:
|
|
113
|
+
del self._requests[client]
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
_rate_limiter = RateLimiter(RATE_LIMIT)
|
|
117
|
+
|
|
118
|
+
_memory_server: MemoryServer | None = None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class ClientTracker:
|
|
122
|
+
"""Track active clients based on recent request activity."""
|
|
123
|
+
|
|
124
|
+
def __init__(self, active_window_seconds: int = 300):
|
|
125
|
+
"""
|
|
126
|
+
Args:
|
|
127
|
+
active_window_seconds: Consider client active if seen within this window (default 5 min)
|
|
128
|
+
"""
|
|
129
|
+
self.active_window = active_window_seconds
|
|
130
|
+
self._clients: dict[str, dict] = {} # client_id -> {last_seen, request_count, first_seen}
|
|
131
|
+
|
|
132
|
+
def record_request(self, scope: dict) -> None:
|
|
133
|
+
"""Record a request from a client."""
|
|
134
|
+
client_id = self._get_client_id(scope)
|
|
135
|
+
now = time.time()
|
|
136
|
+
|
|
137
|
+
if client_id not in self._clients:
|
|
138
|
+
self._clients[client_id] = {
|
|
139
|
+
"first_seen": now,
|
|
140
|
+
"last_seen": now,
|
|
141
|
+
"request_count": 1,
|
|
142
|
+
}
|
|
143
|
+
else:
|
|
144
|
+
self._clients[client_id]["last_seen"] = now
|
|
145
|
+
self._clients[client_id]["request_count"] += 1
|
|
146
|
+
|
|
147
|
+
def _get_client_id(self, scope: dict) -> str:
|
|
148
|
+
"""Get client identifier from request.
|
|
149
|
+
|
|
150
|
+
Uses X-OpenCode-Session header if present (set by opencode),
|
|
151
|
+
otherwise falls back to IP address.
|
|
152
|
+
"""
|
|
153
|
+
headers = dict(scope.get("headers", []))
|
|
154
|
+
|
|
155
|
+
# Check for opencode session header (most reliable)
|
|
156
|
+
session_id = headers.get(b"x-opencode-session", b"").decode()
|
|
157
|
+
if session_id:
|
|
158
|
+
return f"session:{session_id[:16]}" # Truncate for readability
|
|
159
|
+
|
|
160
|
+
# Fall back to IP (without port - ephemeral ports change per request)
|
|
161
|
+
forwarded = headers.get(b"x-forwarded-for", b"").decode()
|
|
162
|
+
if forwarded:
|
|
163
|
+
return forwarded.split(",")[0].strip()
|
|
164
|
+
|
|
165
|
+
client = scope.get("client", ("unknown", 0))
|
|
166
|
+
return client[0] if client else "unknown"
|
|
167
|
+
|
|
168
|
+
def get_active_clients(self) -> dict:
|
|
169
|
+
"""Get info about currently active clients."""
|
|
170
|
+
now = time.time()
|
|
171
|
+
cutoff = now - self.active_window
|
|
172
|
+
|
|
173
|
+
active = []
|
|
174
|
+
stale = []
|
|
175
|
+
|
|
176
|
+
for client_id, info in self._clients.items():
|
|
177
|
+
if info["last_seen"] > cutoff:
|
|
178
|
+
seconds_ago = int(now - info["last_seen"])
|
|
179
|
+
active.append(
|
|
180
|
+
{
|
|
181
|
+
"client": client_id,
|
|
182
|
+
"last_seen_seconds_ago": seconds_ago,
|
|
183
|
+
"requests": info["request_count"],
|
|
184
|
+
"session_duration_seconds": int(now - info["first_seen"]),
|
|
185
|
+
}
|
|
186
|
+
)
|
|
187
|
+
else:
|
|
188
|
+
stale.append(client_id)
|
|
189
|
+
|
|
190
|
+
# Clean up stale entries
|
|
191
|
+
for client_id in stale:
|
|
192
|
+
del self._clients[client_id]
|
|
193
|
+
|
|
194
|
+
return {
|
|
195
|
+
"active_count": len(active),
|
|
196
|
+
"active_window_seconds": self.active_window,
|
|
197
|
+
"clients": sorted(active, key=lambda x: x["last_seen_seconds_ago"]),
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
_client_tracker = ClientTracker()
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def get_active_clients() -> dict | None:
|
|
205
|
+
"""Get active client info. Returns None if tracker not initialized."""
|
|
206
|
+
return _client_tracker.get_active_clients()
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _check_auth(scope: dict) -> bool:
|
|
210
|
+
"""Check API key authentication if configured."""
|
|
211
|
+
if not API_KEY:
|
|
212
|
+
return True # No auth configured
|
|
213
|
+
|
|
214
|
+
headers = dict(scope.get("headers", []))
|
|
215
|
+
|
|
216
|
+
# Check Authorization header (Bearer token)
|
|
217
|
+
auth = headers.get(b"authorization", b"").decode()
|
|
218
|
+
if auth.startswith("Bearer "):
|
|
219
|
+
token = auth[7:]
|
|
220
|
+
if secrets.compare_digest(token, API_KEY):
|
|
221
|
+
return True
|
|
222
|
+
|
|
223
|
+
# Check X-API-Key header
|
|
224
|
+
api_key = headers.get(b"x-api-key", b"").decode()
|
|
225
|
+
if api_key:
|
|
226
|
+
if secrets.compare_digest(api_key, API_KEY):
|
|
227
|
+
return True
|
|
228
|
+
|
|
229
|
+
metrics.auth_failures.inc()
|
|
230
|
+
return False
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
async def handle_mcp(scope: dict[str, Any], receive: Any, send: Any) -> None:
|
|
234
|
+
"""Handle MCP requests via streamable HTTP transport - stateless mode."""
|
|
235
|
+
# Check authentication
|
|
236
|
+
if not _check_auth(scope):
|
|
237
|
+
response = JSONResponse(
|
|
238
|
+
{"error": "Unauthorized", "message": "Invalid or missing API key"},
|
|
239
|
+
status_code=401,
|
|
240
|
+
)
|
|
241
|
+
await response(scope, receive, send)
|
|
242
|
+
return
|
|
243
|
+
|
|
244
|
+
# Track client activity
|
|
245
|
+
_client_tracker.record_request(scope)
|
|
246
|
+
|
|
247
|
+
# Check rate limit
|
|
248
|
+
allowed, retry_after = _rate_limiter.is_allowed(scope)
|
|
249
|
+
if not allowed:
|
|
250
|
+
metrics.rate_limit_rejections.inc()
|
|
251
|
+
response = JSONResponse(
|
|
252
|
+
{
|
|
253
|
+
"error": "Rate limited",
|
|
254
|
+
"message": f"Too many requests. Retry after {retry_after} seconds.",
|
|
255
|
+
"retry_after": retry_after,
|
|
256
|
+
},
|
|
257
|
+
status_code=429,
|
|
258
|
+
headers={"Retry-After": str(retry_after)},
|
|
259
|
+
)
|
|
260
|
+
await response(scope, receive, send)
|
|
261
|
+
return
|
|
262
|
+
|
|
263
|
+
if _memory_server is None:
|
|
264
|
+
response = JSONResponse({"error": "Server not initialized"}, status_code=503)
|
|
265
|
+
await response(scope, receive, send)
|
|
266
|
+
return
|
|
267
|
+
|
|
268
|
+
transport = StreamableHTTPServerTransport(
|
|
269
|
+
mcp_session_id=None,
|
|
270
|
+
is_json_response_enabled=True,
|
|
271
|
+
)
|
|
272
|
+
ready_event = asyncio.Event()
|
|
273
|
+
|
|
274
|
+
async def run_server():
|
|
275
|
+
async with transport.connect() as (read_stream, write_stream):
|
|
276
|
+
ready_event.set()
|
|
277
|
+
await _memory_server.server.run(
|
|
278
|
+
read_stream,
|
|
279
|
+
write_stream,
|
|
280
|
+
_memory_server.server.create_initialization_options(),
|
|
281
|
+
stateless=True,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
async with background_throttle.priority_context():
|
|
285
|
+
server_task = asyncio.create_task(run_server())
|
|
286
|
+
await ready_event.wait()
|
|
287
|
+
try:
|
|
288
|
+
await transport.handle_request(scope, receive, send)
|
|
289
|
+
finally:
|
|
290
|
+
await transport.terminate()
|
|
291
|
+
server_task.cancel()
|
|
292
|
+
try:
|
|
293
|
+
await server_task
|
|
294
|
+
except asyncio.CancelledError:
|
|
295
|
+
pass
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
async def _prewarm_embeddings(server: MemoryServer) -> None:
|
|
299
|
+
"""Pre-warm the embedding model in background."""
|
|
300
|
+
import asyncio
|
|
301
|
+
|
|
302
|
+
await asyncio.sleep(0.5) # Let server finish startup first
|
|
303
|
+
try:
|
|
304
|
+
logger.info("Pre-warming embedding model...")
|
|
305
|
+
await server.embeddings.embed_async("warmup")
|
|
306
|
+
logger.info("Embedding model ready")
|
|
307
|
+
except Exception as e:
|
|
308
|
+
logger.warning(f"Failed to pre-warm embeddings: {e}")
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
@asynccontextmanager
|
|
312
|
+
async def lifespan(app):
|
|
313
|
+
"""Manage server lifecycle."""
|
|
314
|
+
global _memory_server
|
|
315
|
+
|
|
316
|
+
logger.info("Starting opencode-memory HTTP server...")
|
|
317
|
+
|
|
318
|
+
_memory_server = MemoryServer(enable_daemon=True)
|
|
319
|
+
|
|
320
|
+
if _memory_server.daemon:
|
|
321
|
+
await _memory_server.daemon.start()
|
|
322
|
+
|
|
323
|
+
asyncio.create_task(_prewarm_embeddings(_memory_server))
|
|
324
|
+
|
|
325
|
+
logger.info(f"Server ready at http://{DEFAULT_HOST}:{DEFAULT_PORT}")
|
|
326
|
+
logger.info(f"MCP endpoint: http://{DEFAULT_HOST}:{DEFAULT_PORT}/mcp")
|
|
327
|
+
|
|
328
|
+
yield
|
|
329
|
+
|
|
330
|
+
logger.info("Shutting down...")
|
|
331
|
+
|
|
332
|
+
if _memory_server:
|
|
333
|
+
# Wait for any pending background embeddings
|
|
334
|
+
still_pending = await MemoryServer.wait_for_pending_embeddings(timeout=5.0)
|
|
335
|
+
if still_pending > 0:
|
|
336
|
+
logger.warning(f"Shutdown with {still_pending} embeddings still pending")
|
|
337
|
+
|
|
338
|
+
await _memory_server.enricher.close()
|
|
339
|
+
if _memory_server.daemon:
|
|
340
|
+
await _memory_server.daemon.stop()
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
class MCPApp:
|
|
344
|
+
"""ASGI app that routes to MCP or health endpoints."""
|
|
345
|
+
|
|
346
|
+
async def __call__(self, scope: dict[str, Any], receive: Any, send: Any) -> None:
|
|
347
|
+
if scope["type"] != "http":
|
|
348
|
+
return
|
|
349
|
+
|
|
350
|
+
path = scope.get("path", "")
|
|
351
|
+
|
|
352
|
+
if path == "/health":
|
|
353
|
+
# Health check doesn't require auth - returns basic health status
|
|
354
|
+
health_status = {"status": "ok", "server": "opencode-memory"}
|
|
355
|
+
|
|
356
|
+
# Add daemon health info if available
|
|
357
|
+
if _memory_server and _memory_server.daemon:
|
|
358
|
+
health_status["daemon"] = {
|
|
359
|
+
"running": _memory_server.daemon.is_running,
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
# Check for degraded state (e.g., embedding queue backed up)
|
|
363
|
+
if _memory_server:
|
|
364
|
+
pending = len(MemoryServer._pending_embeddings)
|
|
365
|
+
if pending > 50:
|
|
366
|
+
health_status["status"] = "degraded"
|
|
367
|
+
health_status["warning"] = f"Embedding queue backed up: {pending} pending"
|
|
368
|
+
|
|
369
|
+
status_code = 200 if health_status["status"] == "ok" else 503
|
|
370
|
+
response = JSONResponse(health_status, status_code=status_code)
|
|
371
|
+
await response(scope, receive, send)
|
|
372
|
+
elif path == "/stats":
|
|
373
|
+
# Stats endpoint for monitoring
|
|
374
|
+
if not _check_auth(scope):
|
|
375
|
+
response = JSONResponse({"error": "Unauthorized"}, status_code=401)
|
|
376
|
+
await response(scope, receive, send)
|
|
377
|
+
return
|
|
378
|
+
|
|
379
|
+
stats = {
|
|
380
|
+
"status": "ok",
|
|
381
|
+
"server": "opencode-memory",
|
|
382
|
+
"rate_limit": {
|
|
383
|
+
"requests_per_minute": RATE_LIMIT,
|
|
384
|
+
},
|
|
385
|
+
"clients": _client_tracker.get_active_clients(),
|
|
386
|
+
}
|
|
387
|
+
if _memory_server:
|
|
388
|
+
status = _memory_server._get_status()
|
|
389
|
+
stats["embedding_queue"] = status.get("embedding_queue")
|
|
390
|
+
stats["storage"] = status.get("storage")
|
|
391
|
+
stats["memories"] = status.get("memories")
|
|
392
|
+
stats["cache"] = status.get("cache")
|
|
393
|
+
stats["links"] = status.get("links")
|
|
394
|
+
if _memory_server.daemon:
|
|
395
|
+
stats["daemon"] = {
|
|
396
|
+
"running": _memory_server.daemon.is_running,
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
response = JSONResponse(stats)
|
|
400
|
+
await response(scope, receive, send)
|
|
401
|
+
elif path == "/metrics":
|
|
402
|
+
# Prometheus metrics endpoint
|
|
403
|
+
if _memory_server:
|
|
404
|
+
status = _memory_server._get_status()
|
|
405
|
+
metrics.update_from_status(status)
|
|
406
|
+
|
|
407
|
+
from starlette.responses import Response
|
|
408
|
+
|
|
409
|
+
response = Response(
|
|
410
|
+
content=metrics.registry.render(),
|
|
411
|
+
media_type="text/plain; version=0.0.4; charset=utf-8",
|
|
412
|
+
)
|
|
413
|
+
await response(scope, receive, send)
|
|
414
|
+
elif path == "/mcp":
|
|
415
|
+
await handle_mcp(scope, receive, send)
|
|
416
|
+
else:
|
|
417
|
+
response = JSONResponse({"error": "Not found"}, status_code=404)
|
|
418
|
+
await response(scope, receive, send)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def create_app() -> Starlette:
|
|
422
|
+
"""Create the Starlette application."""
|
|
423
|
+
return Starlette(
|
|
424
|
+
routes=[],
|
|
425
|
+
lifespan=lifespan,
|
|
426
|
+
middleware=[],
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def create_asgi_app():
|
|
431
|
+
"""Create ASGI app with MCP handling."""
|
|
432
|
+
starlette_app = create_app()
|
|
433
|
+
|
|
434
|
+
async def app(scope: dict[str, Any], receive: Any, send: Any) -> None:
|
|
435
|
+
if scope["type"] == "lifespan":
|
|
436
|
+
await starlette_app(scope, receive, send)
|
|
437
|
+
else:
|
|
438
|
+
mcp_app = MCPApp()
|
|
439
|
+
await mcp_app(scope, receive, send)
|
|
440
|
+
|
|
441
|
+
return app
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def main(host: str = DEFAULT_HOST, port: int = DEFAULT_PORT) -> None:
|
|
445
|
+
"""Run the HTTP server."""
|
|
446
|
+
import argparse
|
|
447
|
+
|
|
448
|
+
parser = argparse.ArgumentParser(description="OpenCode Memory HTTP Server")
|
|
449
|
+
parser.add_argument("--host", default=host, help=f"Host to bind to (default: {host})")
|
|
450
|
+
parser.add_argument("--port", type=int, default=port, help=f"Port to bind to (default: {port})")
|
|
451
|
+
args = parser.parse_args()
|
|
452
|
+
|
|
453
|
+
app = create_asgi_app()
|
|
454
|
+
|
|
455
|
+
uvicorn.run(
|
|
456
|
+
app,
|
|
457
|
+
host=args.host,
|
|
458
|
+
port=args.port,
|
|
459
|
+
log_level="info",
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
if __name__ == "__main__":
|
|
464
|
+
main()
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Ingestion modules for opencode-memory."""
|
|
2
|
+
|
|
3
|
+
from opencode_memory.ingestion.embeddings import EmbeddingEngine
|
|
4
|
+
from opencode_memory.ingestion.parser import MarkdownParser
|
|
5
|
+
from opencode_memory.ingestion.watcher import FileWatcher
|
|
6
|
+
|
|
7
|
+
__all__ = ["EmbeddingEngine", "MarkdownParser", "FileWatcher"]
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""Local embedding generation using sentence-transformers.
|
|
2
|
+
|
|
3
|
+
All processing is done locally - no data is sent to any external service.
|
|
4
|
+
The model is downloaded once from HuggingFace and cached locally.
|
|
5
|
+
|
|
6
|
+
Uses ProcessPoolExecutor to avoid GIL blocking during CPU-intensive embedding.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import multiprocessing
|
|
13
|
+
import os
|
|
14
|
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
|
15
|
+
from typing import TYPE_CHECKING, Any
|
|
16
|
+
|
|
17
|
+
# Use 'spawn' instead of 'fork' to avoid deadlocks in multi-threaded processes
|
|
18
|
+
# This is the default on macOS and Windows, but not on Linux
|
|
19
|
+
try:
|
|
20
|
+
multiprocessing.set_start_method("spawn", force=False)
|
|
21
|
+
except RuntimeError:
|
|
22
|
+
# Already set, ignore
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from sentence_transformers import SentenceTransformer
|
|
27
|
+
|
|
28
|
+
DEFAULT_EMBEDDING_DIM = 384
|
|
29
|
+
DEFAULT_MODEL_NAME = "all-MiniLM-L6-v2"
|
|
30
|
+
|
|
31
|
+
_thread_executor: ThreadPoolExecutor | None = None
|
|
32
|
+
_process_executor: ProcessPoolExecutor | None = None
|
|
33
|
+
_subprocess_model: Any = None
|
|
34
|
+
_atexit_registered: bool = False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _shutdown_executors() -> None:
|
|
38
|
+
"""Shutdown executors on process exit to prevent resource leaks."""
|
|
39
|
+
global _thread_executor, _process_executor
|
|
40
|
+
if _thread_executor is not None:
|
|
41
|
+
_thread_executor.shutdown(wait=False)
|
|
42
|
+
_thread_executor = None
|
|
43
|
+
if _process_executor is not None:
|
|
44
|
+
_process_executor.shutdown(wait=False)
|
|
45
|
+
_process_executor = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _get_thread_executor() -> ThreadPoolExecutor:
|
|
49
|
+
"""Get or create the thread pool executor (for model loading)."""
|
|
50
|
+
global _thread_executor, _atexit_registered
|
|
51
|
+
if _thread_executor is None:
|
|
52
|
+
_thread_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="embedding")
|
|
53
|
+
if not _atexit_registered:
|
|
54
|
+
import atexit
|
|
55
|
+
|
|
56
|
+
atexit.register(_shutdown_executors)
|
|
57
|
+
_atexit_registered = True
|
|
58
|
+
return _thread_executor
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _get_process_executor() -> ProcessPoolExecutor:
|
|
62
|
+
"""Get or create the process pool executor (for CPU-intensive embedding)."""
|
|
63
|
+
global _process_executor, _atexit_registered
|
|
64
|
+
if _process_executor is None:
|
|
65
|
+
_process_executor = ProcessPoolExecutor(max_workers=2)
|
|
66
|
+
if not _atexit_registered:
|
|
67
|
+
import atexit
|
|
68
|
+
|
|
69
|
+
atexit.register(_shutdown_executors)
|
|
70
|
+
_atexit_registered = True
|
|
71
|
+
return _process_executor
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _configure_thread_limits() -> None:
|
|
75
|
+
"""Configure thread limits before any torch/numpy imports."""
|
|
76
|
+
num_threads = max(1, (os.cpu_count() or 4) // 4) # ~5-6 threads per worker
|
|
77
|
+
os.environ["OMP_NUM_THREADS"] = str(num_threads)
|
|
78
|
+
os.environ["MKL_NUM_THREADS"] = str(num_threads)
|
|
79
|
+
os.environ["OPENBLAS_NUM_THREADS"] = str(num_threads)
|
|
80
|
+
os.environ["VECLIB_MAXIMUM_THREADS"] = str(num_threads)
|
|
81
|
+
os.environ["NUMEXPR_NUM_THREADS"] = str(num_threads)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _load_model(model_name: str) -> Any:
|
|
85
|
+
"""Load the sentence transformer model (deferred import)."""
|
|
86
|
+
_configure_thread_limits() # Must be before torch import
|
|
87
|
+
os.environ["HF_HUB_OFFLINE"] = "1"
|
|
88
|
+
os.environ["TRANSFORMERS_OFFLINE"] = "1"
|
|
89
|
+
|
|
90
|
+
import torch
|
|
91
|
+
|
|
92
|
+
num_threads = max(1, (os.cpu_count() or 4) // 4)
|
|
93
|
+
torch.set_num_threads(num_threads)
|
|
94
|
+
torch.set_num_interop_threads(2)
|
|
95
|
+
|
|
96
|
+
from sentence_transformers import SentenceTransformer
|
|
97
|
+
|
|
98
|
+
return SentenceTransformer(model_name)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _embed_in_subprocess(text: str, model_name: str = DEFAULT_MODEL_NAME) -> list[float]:
|
|
102
|
+
"""Embed text in subprocess (loads model if needed)."""
|
|
103
|
+
global _subprocess_model
|
|
104
|
+
if _subprocess_model is None:
|
|
105
|
+
_subprocess_model = _load_model(model_name)
|
|
106
|
+
embedding = _subprocess_model.encode(text, convert_to_numpy=True)
|
|
107
|
+
return embedding.tolist()
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _embed_batch_in_subprocess(
|
|
111
|
+
texts: list[str], model_name: str = DEFAULT_MODEL_NAME
|
|
112
|
+
) -> list[list[float]]:
|
|
113
|
+
"""Embed batch in subprocess (loads model if needed)."""
|
|
114
|
+
global _subprocess_model
|
|
115
|
+
if _subprocess_model is None:
|
|
116
|
+
_subprocess_model = _load_model(model_name)
|
|
117
|
+
embeddings = _subprocess_model.encode(texts, convert_to_numpy=True)
|
|
118
|
+
return embeddings.tolist()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class EmbeddingEngine:
|
|
122
|
+
"""Generate embeddings using a local sentence-transformers model.
|
|
123
|
+
|
|
124
|
+
Uses ProcessPoolExecutor for all operations to avoid loading the model
|
|
125
|
+
in the main process. This saves ~200-400MB of memory since the model
|
|
126
|
+
only lives in the subprocess workers.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
def __init__(self, model_name: str = DEFAULT_MODEL_NAME, use_process_pool: bool = True):
|
|
130
|
+
self.model_name = model_name
|
|
131
|
+
self._model: SentenceTransformer | None = None
|
|
132
|
+
self._dimension: int | None = None
|
|
133
|
+
self._use_process_pool = use_process_pool
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def model(self) -> SentenceTransformer:
|
|
137
|
+
"""Lazy-load the model in main process.
|
|
138
|
+
|
|
139
|
+
Note: Prefer embed() which uses subprocess to avoid loading model
|
|
140
|
+
in main process.
|
|
141
|
+
"""
|
|
142
|
+
if self._model is None:
|
|
143
|
+
self._model = _load_model(self.model_name)
|
|
144
|
+
self._dimension = self._model.get_embedding_dimension()
|
|
145
|
+
return self._model
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def dimension(self) -> int:
|
|
149
|
+
"""Get the embedding dimension without loading the model.
|
|
150
|
+
|
|
151
|
+
Returns the known dimension for all-MiniLM-L6-v2 (384) to avoid
|
|
152
|
+
loading the model just to get the dimension during startup.
|
|
153
|
+
"""
|
|
154
|
+
if self._dimension is not None:
|
|
155
|
+
return self._dimension
|
|
156
|
+
return DEFAULT_EMBEDDING_DIM
|
|
157
|
+
|
|
158
|
+
def embed(self, text: str) -> list[float]:
|
|
159
|
+
"""Generate embedding for a single text.
|
|
160
|
+
|
|
161
|
+
When use_process_pool=True (default), runs in subprocess to avoid
|
|
162
|
+
loading model in main process.
|
|
163
|
+
"""
|
|
164
|
+
if self._use_process_pool:
|
|
165
|
+
# Run in subprocess synchronously via executor
|
|
166
|
+
future = _get_process_executor().submit(_embed_in_subprocess, text, self.model_name)
|
|
167
|
+
return future.result()
|
|
168
|
+
else:
|
|
169
|
+
embedding = self.model.encode(text, convert_to_numpy=True)
|
|
170
|
+
return embedding.tolist() # type: ignore[no-any-return]
|
|
171
|
+
|
|
172
|
+
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
173
|
+
"""Generate embeddings for multiple texts.
|
|
174
|
+
|
|
175
|
+
When use_process_pool=True (default), runs in subprocess to avoid
|
|
176
|
+
loading model in main process.
|
|
177
|
+
"""
|
|
178
|
+
if self._use_process_pool:
|
|
179
|
+
future = _get_process_executor().submit(
|
|
180
|
+
_embed_batch_in_subprocess, texts, self.model_name
|
|
181
|
+
)
|
|
182
|
+
return future.result()
|
|
183
|
+
else:
|
|
184
|
+
embeddings = self.model.encode(texts, convert_to_numpy=True)
|
|
185
|
+
return embeddings.tolist() # type: ignore[no-any-return]
|
|
186
|
+
|
|
187
|
+
async def embed_async(self, text: str) -> list[float]:
|
|
188
|
+
"""Generate embedding for a single text (async, runs in process pool).
|
|
189
|
+
|
|
190
|
+
Uses ProcessPoolExecutor to avoid GIL blocking the event loop.
|
|
191
|
+
"""
|
|
192
|
+
loop = asyncio.get_event_loop()
|
|
193
|
+
if self._use_process_pool:
|
|
194
|
+
return await loop.run_in_executor(
|
|
195
|
+
_get_process_executor(), _embed_in_subprocess, text, self.model_name
|
|
196
|
+
)
|
|
197
|
+
else:
|
|
198
|
+
return await loop.run_in_executor(_get_thread_executor(), self.embed, text)
|
|
199
|
+
|
|
200
|
+
async def embed_batch_async(self, texts: list[str]) -> list[list[float]]:
|
|
201
|
+
"""Generate embeddings for multiple texts (async, runs in process pool).
|
|
202
|
+
|
|
203
|
+
Uses ProcessPoolExecutor to avoid GIL blocking the event loop.
|
|
204
|
+
"""
|
|
205
|
+
loop = asyncio.get_event_loop()
|
|
206
|
+
if self._use_process_pool:
|
|
207
|
+
return await loop.run_in_executor(
|
|
208
|
+
_get_process_executor(), _embed_batch_in_subprocess, texts, self.model_name
|
|
209
|
+
)
|
|
210
|
+
else:
|
|
211
|
+
return await loop.run_in_executor(_get_thread_executor(), self.embed_batch, texts)
|