opencode-semantic-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,464 @@
1
+ """HTTP server for opencode-memory with streamable HTTP transport.
2
+
3
+ This allows multiple OpenCode sessions to share a single memory server,
4
+ avoiding the overhead of loading the embedding model multiple times.
5
+
6
+ Usage:
7
+ # Start the server (once, e.g., via systemd user service)
8
+ python -m opencode_memory.http_server
9
+
10
+ # Configure OpenCode to connect via remote MCP
11
+ # In opencode.json:
12
+ # {
13
+ # "mcp": {
14
+ # "memory": {
15
+ # "type": "remote",
16
+ # "url": "http://localhost:9824/mcp"
17
+ # }
18
+ # }
19
+ # }
20
+
21
+ Environment variables:
22
+ OPENCODE_MEMORY_HOST: Host to bind to (default: 127.0.0.1)
23
+ OPENCODE_MEMORY_PORT: Port to bind to (default: 9824)
24
+ OPENCODE_MEMORY_API_KEY: Optional API key for authentication
25
+ OPENCODE_MEMORY_RATE_LIMIT: Requests per minute per client (default: 60)
26
+ """
27
+
28
+ import asyncio
29
+ import logging
30
+ import os
31
+ import secrets
32
+ import sys
33
+ import time
34
+ from collections import defaultdict
35
+ from contextlib import asynccontextmanager
36
+ from typing import Any
37
+
38
+ from mcp.server.streamable_http import StreamableHTTPServerTransport
39
+
40
+ from opencode_memory import metrics
41
+ from opencode_memory.daemon import background_throttle
42
+ from opencode_memory.server import MemoryServer
43
+
44
+ try:
45
+ import uvicorn
46
+ from starlette.applications import Starlette
47
+ from starlette.responses import JSONResponse
48
+ except ImportError:
49
+ print("HTTP server requires additional dependencies:")
50
+ print(" pip install starlette uvicorn")
51
+ sys.exit(1)
52
+
53
+ logging.basicConfig(
54
+ level=logging.INFO,
55
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
56
+ )
57
+ logger = logging.getLogger(__name__)
58
+
59
+ DEFAULT_HOST = os.environ.get("OPENCODE_MEMORY_HOST", "127.0.0.1")
60
+ DEFAULT_PORT = int(os.environ.get("OPENCODE_MEMORY_PORT", "9824"))
61
+ API_KEY = os.environ.get("OPENCODE_MEMORY_API_KEY") # Optional auth
62
+ RATE_LIMIT = int(os.environ.get("OPENCODE_MEMORY_RATE_LIMIT", "60")) # per minute
63
+
64
+
65
+ class RateLimiter:
66
+ """Simple in-memory rate limiter using sliding window."""
67
+
68
+ def __init__(self, requests_per_minute: int = 60):
69
+ self.requests_per_minute = requests_per_minute
70
+ self.window_seconds = 60
71
+ self._requests: dict[str, list[float]] = defaultdict(list)
72
+
73
+ def _get_client_id(self, scope: dict) -> str:
74
+ """Get client identifier from request."""
75
+ # Use X-Forwarded-For if behind proxy, otherwise use client IP
76
+ headers = dict(scope.get("headers", []))
77
+ forwarded = headers.get(b"x-forwarded-for", b"").decode()
78
+ if forwarded:
79
+ client_ip = forwarded.split(",")[0].strip()
80
+ else:
81
+ client = scope.get("client", ("unknown", 0))
82
+ client_ip = client[0] if client else "unknown"
83
+ return client_ip
84
+
85
+ def is_allowed(self, scope: dict) -> tuple[bool, int]:
86
+ """Check if request is allowed. Returns (allowed, retry_after_seconds)."""
87
+ client_id = self._get_client_id(scope)
88
+ now = time.time()
89
+ window_start = now - self.window_seconds
90
+
91
+ # Clean old requests
92
+ self._requests[client_id] = [ts for ts in self._requests[client_id] if ts > window_start]
93
+
94
+ # Check limit
95
+ if len(self._requests[client_id]) >= self.requests_per_minute:
96
+ # Calculate retry-after
97
+ oldest = min(self._requests[client_id])
98
+ retry_after = int(oldest + self.window_seconds - now) + 1
99
+ return False, retry_after
100
+
101
+ # Record request
102
+ self._requests[client_id].append(now)
103
+ return True, 0
104
+
105
+ def cleanup(self) -> None:
106
+ """Remove stale entries (call periodically)."""
107
+ now = time.time()
108
+ window_start = now - self.window_seconds
109
+ stale_clients = [
110
+ c for c, reqs in self._requests.items() if all(ts < window_start for ts in reqs)
111
+ ]
112
+ for client in stale_clients:
113
+ del self._requests[client]
114
+
115
+
116
+ _rate_limiter = RateLimiter(RATE_LIMIT)
117
+
118
+ _memory_server: MemoryServer | None = None
119
+
120
+
121
+ class ClientTracker:
122
+ """Track active clients based on recent request activity."""
123
+
124
+ def __init__(self, active_window_seconds: int = 300):
125
+ """
126
+ Args:
127
+ active_window_seconds: Consider client active if seen within this window (default 5 min)
128
+ """
129
+ self.active_window = active_window_seconds
130
+ self._clients: dict[str, dict] = {} # client_id -> {last_seen, request_count, first_seen}
131
+
132
+ def record_request(self, scope: dict) -> None:
133
+ """Record a request from a client."""
134
+ client_id = self._get_client_id(scope)
135
+ now = time.time()
136
+
137
+ if client_id not in self._clients:
138
+ self._clients[client_id] = {
139
+ "first_seen": now,
140
+ "last_seen": now,
141
+ "request_count": 1,
142
+ }
143
+ else:
144
+ self._clients[client_id]["last_seen"] = now
145
+ self._clients[client_id]["request_count"] += 1
146
+
147
+ def _get_client_id(self, scope: dict) -> str:
148
+ """Get client identifier from request.
149
+
150
+ Uses X-OpenCode-Session header if present (set by opencode),
151
+ otherwise falls back to IP address.
152
+ """
153
+ headers = dict(scope.get("headers", []))
154
+
155
+ # Check for opencode session header (most reliable)
156
+ session_id = headers.get(b"x-opencode-session", b"").decode()
157
+ if session_id:
158
+ return f"session:{session_id[:16]}" # Truncate for readability
159
+
160
+ # Fall back to IP (without port - ephemeral ports change per request)
161
+ forwarded = headers.get(b"x-forwarded-for", b"").decode()
162
+ if forwarded:
163
+ return forwarded.split(",")[0].strip()
164
+
165
+ client = scope.get("client", ("unknown", 0))
166
+ return client[0] if client else "unknown"
167
+
168
+ def get_active_clients(self) -> dict:
169
+ """Get info about currently active clients."""
170
+ now = time.time()
171
+ cutoff = now - self.active_window
172
+
173
+ active = []
174
+ stale = []
175
+
176
+ for client_id, info in self._clients.items():
177
+ if info["last_seen"] > cutoff:
178
+ seconds_ago = int(now - info["last_seen"])
179
+ active.append(
180
+ {
181
+ "client": client_id,
182
+ "last_seen_seconds_ago": seconds_ago,
183
+ "requests": info["request_count"],
184
+ "session_duration_seconds": int(now - info["first_seen"]),
185
+ }
186
+ )
187
+ else:
188
+ stale.append(client_id)
189
+
190
+ # Clean up stale entries
191
+ for client_id in stale:
192
+ del self._clients[client_id]
193
+
194
+ return {
195
+ "active_count": len(active),
196
+ "active_window_seconds": self.active_window,
197
+ "clients": sorted(active, key=lambda x: x["last_seen_seconds_ago"]),
198
+ }
199
+
200
+
201
+ _client_tracker = ClientTracker()
202
+
203
+
204
+ def get_active_clients() -> dict | None:
205
+ """Get active client info. Returns None if tracker not initialized."""
206
+ return _client_tracker.get_active_clients()
207
+
208
+
209
+ def _check_auth(scope: dict) -> bool:
210
+ """Check API key authentication if configured."""
211
+ if not API_KEY:
212
+ return True # No auth configured
213
+
214
+ headers = dict(scope.get("headers", []))
215
+
216
+ # Check Authorization header (Bearer token)
217
+ auth = headers.get(b"authorization", b"").decode()
218
+ if auth.startswith("Bearer "):
219
+ token = auth[7:]
220
+ if secrets.compare_digest(token, API_KEY):
221
+ return True
222
+
223
+ # Check X-API-Key header
224
+ api_key = headers.get(b"x-api-key", b"").decode()
225
+ if api_key:
226
+ if secrets.compare_digest(api_key, API_KEY):
227
+ return True
228
+
229
+ metrics.auth_failures.inc()
230
+ return False
231
+
232
+
233
+ async def handle_mcp(scope: dict[str, Any], receive: Any, send: Any) -> None:
234
+ """Handle MCP requests via streamable HTTP transport - stateless mode."""
235
+ # Check authentication
236
+ if not _check_auth(scope):
237
+ response = JSONResponse(
238
+ {"error": "Unauthorized", "message": "Invalid or missing API key"},
239
+ status_code=401,
240
+ )
241
+ await response(scope, receive, send)
242
+ return
243
+
244
+ # Track client activity
245
+ _client_tracker.record_request(scope)
246
+
247
+ # Check rate limit
248
+ allowed, retry_after = _rate_limiter.is_allowed(scope)
249
+ if not allowed:
250
+ metrics.rate_limit_rejections.inc()
251
+ response = JSONResponse(
252
+ {
253
+ "error": "Rate limited",
254
+ "message": f"Too many requests. Retry after {retry_after} seconds.",
255
+ "retry_after": retry_after,
256
+ },
257
+ status_code=429,
258
+ headers={"Retry-After": str(retry_after)},
259
+ )
260
+ await response(scope, receive, send)
261
+ return
262
+
263
+ if _memory_server is None:
264
+ response = JSONResponse({"error": "Server not initialized"}, status_code=503)
265
+ await response(scope, receive, send)
266
+ return
267
+
268
+ transport = StreamableHTTPServerTransport(
269
+ mcp_session_id=None,
270
+ is_json_response_enabled=True,
271
+ )
272
+ ready_event = asyncio.Event()
273
+
274
+ async def run_server():
275
+ async with transport.connect() as (read_stream, write_stream):
276
+ ready_event.set()
277
+ await _memory_server.server.run(
278
+ read_stream,
279
+ write_stream,
280
+ _memory_server.server.create_initialization_options(),
281
+ stateless=True,
282
+ )
283
+
284
+ async with background_throttle.priority_context():
285
+ server_task = asyncio.create_task(run_server())
286
+ await ready_event.wait()
287
+ try:
288
+ await transport.handle_request(scope, receive, send)
289
+ finally:
290
+ await transport.terminate()
291
+ server_task.cancel()
292
+ try:
293
+ await server_task
294
+ except asyncio.CancelledError:
295
+ pass
296
+
297
+
298
+ async def _prewarm_embeddings(server: MemoryServer) -> None:
299
+ """Pre-warm the embedding model in background."""
300
+ import asyncio
301
+
302
+ await asyncio.sleep(0.5) # Let server finish startup first
303
+ try:
304
+ logger.info("Pre-warming embedding model...")
305
+ await server.embeddings.embed_async("warmup")
306
+ logger.info("Embedding model ready")
307
+ except Exception as e:
308
+ logger.warning(f"Failed to pre-warm embeddings: {e}")
309
+
310
+
311
+ @asynccontextmanager
312
+ async def lifespan(app):
313
+ """Manage server lifecycle."""
314
+ global _memory_server
315
+
316
+ logger.info("Starting opencode-memory HTTP server...")
317
+
318
+ _memory_server = MemoryServer(enable_daemon=True)
319
+
320
+ if _memory_server.daemon:
321
+ await _memory_server.daemon.start()
322
+
323
+ asyncio.create_task(_prewarm_embeddings(_memory_server))
324
+
325
+ logger.info(f"Server ready at http://{DEFAULT_HOST}:{DEFAULT_PORT}")
326
+ logger.info(f"MCP endpoint: http://{DEFAULT_HOST}:{DEFAULT_PORT}/mcp")
327
+
328
+ yield
329
+
330
+ logger.info("Shutting down...")
331
+
332
+ if _memory_server:
333
+ # Wait for any pending background embeddings
334
+ still_pending = await MemoryServer.wait_for_pending_embeddings(timeout=5.0)
335
+ if still_pending > 0:
336
+ logger.warning(f"Shutdown with {still_pending} embeddings still pending")
337
+
338
+ await _memory_server.enricher.close()
339
+ if _memory_server.daemon:
340
+ await _memory_server.daemon.stop()
341
+
342
+
343
+ class MCPApp:
344
+ """ASGI app that routes to MCP or health endpoints."""
345
+
346
+ async def __call__(self, scope: dict[str, Any], receive: Any, send: Any) -> None:
347
+ if scope["type"] != "http":
348
+ return
349
+
350
+ path = scope.get("path", "")
351
+
352
+ if path == "/health":
353
+ # Health check doesn't require auth - returns basic health status
354
+ health_status = {"status": "ok", "server": "opencode-memory"}
355
+
356
+ # Add daemon health info if available
357
+ if _memory_server and _memory_server.daemon:
358
+ health_status["daemon"] = {
359
+ "running": _memory_server.daemon.is_running,
360
+ }
361
+
362
+ # Check for degraded state (e.g., embedding queue backed up)
363
+ if _memory_server:
364
+ pending = len(MemoryServer._pending_embeddings)
365
+ if pending > 50:
366
+ health_status["status"] = "degraded"
367
+ health_status["warning"] = f"Embedding queue backed up: {pending} pending"
368
+
369
+ status_code = 200 if health_status["status"] == "ok" else 503
370
+ response = JSONResponse(health_status, status_code=status_code)
371
+ await response(scope, receive, send)
372
+ elif path == "/stats":
373
+ # Stats endpoint for monitoring
374
+ if not _check_auth(scope):
375
+ response = JSONResponse({"error": "Unauthorized"}, status_code=401)
376
+ await response(scope, receive, send)
377
+ return
378
+
379
+ stats = {
380
+ "status": "ok",
381
+ "server": "opencode-memory",
382
+ "rate_limit": {
383
+ "requests_per_minute": RATE_LIMIT,
384
+ },
385
+ "clients": _client_tracker.get_active_clients(),
386
+ }
387
+ if _memory_server:
388
+ status = _memory_server._get_status()
389
+ stats["embedding_queue"] = status.get("embedding_queue")
390
+ stats["storage"] = status.get("storage")
391
+ stats["memories"] = status.get("memories")
392
+ stats["cache"] = status.get("cache")
393
+ stats["links"] = status.get("links")
394
+ if _memory_server.daemon:
395
+ stats["daemon"] = {
396
+ "running": _memory_server.daemon.is_running,
397
+ }
398
+
399
+ response = JSONResponse(stats)
400
+ await response(scope, receive, send)
401
+ elif path == "/metrics":
402
+ # Prometheus metrics endpoint
403
+ if _memory_server:
404
+ status = _memory_server._get_status()
405
+ metrics.update_from_status(status)
406
+
407
+ from starlette.responses import Response
408
+
409
+ response = Response(
410
+ content=metrics.registry.render(),
411
+ media_type="text/plain; version=0.0.4; charset=utf-8",
412
+ )
413
+ await response(scope, receive, send)
414
+ elif path == "/mcp":
415
+ await handle_mcp(scope, receive, send)
416
+ else:
417
+ response = JSONResponse({"error": "Not found"}, status_code=404)
418
+ await response(scope, receive, send)
419
+
420
+
421
+ def create_app() -> Starlette:
422
+ """Create the Starlette application."""
423
+ return Starlette(
424
+ routes=[],
425
+ lifespan=lifespan,
426
+ middleware=[],
427
+ )
428
+
429
+
430
+ def create_asgi_app():
431
+ """Create ASGI app with MCP handling."""
432
+ starlette_app = create_app()
433
+
434
+ async def app(scope: dict[str, Any], receive: Any, send: Any) -> None:
435
+ if scope["type"] == "lifespan":
436
+ await starlette_app(scope, receive, send)
437
+ else:
438
+ mcp_app = MCPApp()
439
+ await mcp_app(scope, receive, send)
440
+
441
+ return app
442
+
443
+
444
+ def main(host: str = DEFAULT_HOST, port: int = DEFAULT_PORT) -> None:
445
+ """Run the HTTP server."""
446
+ import argparse
447
+
448
+ parser = argparse.ArgumentParser(description="OpenCode Memory HTTP Server")
449
+ parser.add_argument("--host", default=host, help=f"Host to bind to (default: {host})")
450
+ parser.add_argument("--port", type=int, default=port, help=f"Port to bind to (default: {port})")
451
+ args = parser.parse_args()
452
+
453
+ app = create_asgi_app()
454
+
455
+ uvicorn.run(
456
+ app,
457
+ host=args.host,
458
+ port=args.port,
459
+ log_level="info",
460
+ )
461
+
462
+
463
+ if __name__ == "__main__":
464
+ main()
@@ -0,0 +1,7 @@
1
+ """Ingestion modules for opencode-memory."""
2
+
3
+ from opencode_memory.ingestion.embeddings import EmbeddingEngine
4
+ from opencode_memory.ingestion.parser import MarkdownParser
5
+ from opencode_memory.ingestion.watcher import FileWatcher
6
+
7
+ __all__ = ["EmbeddingEngine", "MarkdownParser", "FileWatcher"]
@@ -0,0 +1,211 @@
1
+ """Local embedding generation using sentence-transformers.
2
+
3
+ All processing is done locally - no data is sent to any external service.
4
+ The model is downloaded once from HuggingFace and cached locally.
5
+
6
+ Uses ProcessPoolExecutor to avoid GIL blocking during CPU-intensive embedding.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+ import multiprocessing
13
+ import os
14
+ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
15
+ from typing import TYPE_CHECKING, Any
16
+
17
+ # Use 'spawn' instead of 'fork' to avoid deadlocks in multi-threaded processes
18
+ # This is the default on macOS and Windows, but not on Linux
19
+ try:
20
+ multiprocessing.set_start_method("spawn", force=False)
21
+ except RuntimeError:
22
+ # Already set, ignore
23
+ pass
24
+
25
+ if TYPE_CHECKING:
26
+ from sentence_transformers import SentenceTransformer
27
+
28
+ DEFAULT_EMBEDDING_DIM = 384
29
+ DEFAULT_MODEL_NAME = "all-MiniLM-L6-v2"
30
+
31
+ _thread_executor: ThreadPoolExecutor | None = None
32
+ _process_executor: ProcessPoolExecutor | None = None
33
+ _subprocess_model: Any = None
34
+ _atexit_registered: bool = False
35
+
36
+
37
+ def _shutdown_executors() -> None:
38
+ """Shutdown executors on process exit to prevent resource leaks."""
39
+ global _thread_executor, _process_executor
40
+ if _thread_executor is not None:
41
+ _thread_executor.shutdown(wait=False)
42
+ _thread_executor = None
43
+ if _process_executor is not None:
44
+ _process_executor.shutdown(wait=False)
45
+ _process_executor = None
46
+
47
+
48
+ def _get_thread_executor() -> ThreadPoolExecutor:
49
+ """Get or create the thread pool executor (for model loading)."""
50
+ global _thread_executor, _atexit_registered
51
+ if _thread_executor is None:
52
+ _thread_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="embedding")
53
+ if not _atexit_registered:
54
+ import atexit
55
+
56
+ atexit.register(_shutdown_executors)
57
+ _atexit_registered = True
58
+ return _thread_executor
59
+
60
+
61
+ def _get_process_executor() -> ProcessPoolExecutor:
62
+ """Get or create the process pool executor (for CPU-intensive embedding)."""
63
+ global _process_executor, _atexit_registered
64
+ if _process_executor is None:
65
+ _process_executor = ProcessPoolExecutor(max_workers=2)
66
+ if not _atexit_registered:
67
+ import atexit
68
+
69
+ atexit.register(_shutdown_executors)
70
+ _atexit_registered = True
71
+ return _process_executor
72
+
73
+
74
+ def _configure_thread_limits() -> None:
75
+ """Configure thread limits before any torch/numpy imports."""
76
+ num_threads = max(1, (os.cpu_count() or 4) // 4) # ~5-6 threads per worker
77
+ os.environ["OMP_NUM_THREADS"] = str(num_threads)
78
+ os.environ["MKL_NUM_THREADS"] = str(num_threads)
79
+ os.environ["OPENBLAS_NUM_THREADS"] = str(num_threads)
80
+ os.environ["VECLIB_MAXIMUM_THREADS"] = str(num_threads)
81
+ os.environ["NUMEXPR_NUM_THREADS"] = str(num_threads)
82
+
83
+
84
+ def _load_model(model_name: str) -> Any:
85
+ """Load the sentence transformer model (deferred import)."""
86
+ _configure_thread_limits() # Must be before torch import
87
+ os.environ["HF_HUB_OFFLINE"] = "1"
88
+ os.environ["TRANSFORMERS_OFFLINE"] = "1"
89
+
90
+ import torch
91
+
92
+ num_threads = max(1, (os.cpu_count() or 4) // 4)
93
+ torch.set_num_threads(num_threads)
94
+ torch.set_num_interop_threads(2)
95
+
96
+ from sentence_transformers import SentenceTransformer
97
+
98
+ return SentenceTransformer(model_name)
99
+
100
+
101
+ def _embed_in_subprocess(text: str, model_name: str = DEFAULT_MODEL_NAME) -> list[float]:
102
+ """Embed text in subprocess (loads model if needed)."""
103
+ global _subprocess_model
104
+ if _subprocess_model is None:
105
+ _subprocess_model = _load_model(model_name)
106
+ embedding = _subprocess_model.encode(text, convert_to_numpy=True)
107
+ return embedding.tolist()
108
+
109
+
110
+ def _embed_batch_in_subprocess(
111
+ texts: list[str], model_name: str = DEFAULT_MODEL_NAME
112
+ ) -> list[list[float]]:
113
+ """Embed batch in subprocess (loads model if needed)."""
114
+ global _subprocess_model
115
+ if _subprocess_model is None:
116
+ _subprocess_model = _load_model(model_name)
117
+ embeddings = _subprocess_model.encode(texts, convert_to_numpy=True)
118
+ return embeddings.tolist()
119
+
120
+
121
+ class EmbeddingEngine:
122
+ """Generate embeddings using a local sentence-transformers model.
123
+
124
+ Uses ProcessPoolExecutor for all operations to avoid loading the model
125
+ in the main process. This saves ~200-400MB of memory since the model
126
+ only lives in the subprocess workers.
127
+ """
128
+
129
+ def __init__(self, model_name: str = DEFAULT_MODEL_NAME, use_process_pool: bool = True):
130
+ self.model_name = model_name
131
+ self._model: SentenceTransformer | None = None
132
+ self._dimension: int | None = None
133
+ self._use_process_pool = use_process_pool
134
+
135
+ @property
136
+ def model(self) -> SentenceTransformer:
137
+ """Lazy-load the model in main process.
138
+
139
+ Note: Prefer embed() which uses subprocess to avoid loading model
140
+ in main process.
141
+ """
142
+ if self._model is None:
143
+ self._model = _load_model(self.model_name)
144
+ self._dimension = self._model.get_embedding_dimension()
145
+ return self._model
146
+
147
+ @property
148
+ def dimension(self) -> int:
149
+ """Get the embedding dimension without loading the model.
150
+
151
+ Returns the known dimension for all-MiniLM-L6-v2 (384) to avoid
152
+ loading the model just to get the dimension during startup.
153
+ """
154
+ if self._dimension is not None:
155
+ return self._dimension
156
+ return DEFAULT_EMBEDDING_DIM
157
+
158
+ def embed(self, text: str) -> list[float]:
159
+ """Generate embedding for a single text.
160
+
161
+ When use_process_pool=True (default), runs in subprocess to avoid
162
+ loading model in main process.
163
+ """
164
+ if self._use_process_pool:
165
+ # Run in subprocess synchronously via executor
166
+ future = _get_process_executor().submit(_embed_in_subprocess, text, self.model_name)
167
+ return future.result()
168
+ else:
169
+ embedding = self.model.encode(text, convert_to_numpy=True)
170
+ return embedding.tolist() # type: ignore[no-any-return]
171
+
172
+ def embed_batch(self, texts: list[str]) -> list[list[float]]:
173
+ """Generate embeddings for multiple texts.
174
+
175
+ When use_process_pool=True (default), runs in subprocess to avoid
176
+ loading model in main process.
177
+ """
178
+ if self._use_process_pool:
179
+ future = _get_process_executor().submit(
180
+ _embed_batch_in_subprocess, texts, self.model_name
181
+ )
182
+ return future.result()
183
+ else:
184
+ embeddings = self.model.encode(texts, convert_to_numpy=True)
185
+ return embeddings.tolist() # type: ignore[no-any-return]
186
+
187
+ async def embed_async(self, text: str) -> list[float]:
188
+ """Generate embedding for a single text (async, runs in process pool).
189
+
190
+ Uses ProcessPoolExecutor to avoid GIL blocking the event loop.
191
+ """
192
+ loop = asyncio.get_event_loop()
193
+ if self._use_process_pool:
194
+ return await loop.run_in_executor(
195
+ _get_process_executor(), _embed_in_subprocess, text, self.model_name
196
+ )
197
+ else:
198
+ return await loop.run_in_executor(_get_thread_executor(), self.embed, text)
199
+
200
+ async def embed_batch_async(self, texts: list[str]) -> list[list[float]]:
201
+ """Generate embeddings for multiple texts (async, runs in process pool).
202
+
203
+ Uses ProcessPoolExecutor to avoid GIL blocking the event loop.
204
+ """
205
+ loop = asyncio.get_event_loop()
206
+ if self._use_process_pool:
207
+ return await loop.run_in_executor(
208
+ _get_process_executor(), _embed_batch_in_subprocess, texts, self.model_name
209
+ )
210
+ else:
211
+ return await loop.run_in_executor(_get_thread_executor(), self.embed_batch, texts)