npm - claude-memory-agent - Versions diffs - 3.0.1 → 3.0.2 - Mend

claude-memory-agent 3.0.1 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/hooks/detect-correction.py +22 -18
package/install.py +1 -1
package/main.py +175 -5
package/mcp_server.py +68 -0
package/package.json +1 -1
package/run_server.py +26 -13
package/services/auth.py +5 -16
package/services/embedding_pipeline.py +1 -1
package/services/embeddings.py +7 -7
package/services/llm_analyzer.py +4 -4
package/services/response_manager.py +3 -6
package/start_daemon.py +50 -19

package/hooks/detect-correction.py CHANGED Viewed

@@ -17,7 +17,8 @@ import os
 import sys
 import json
 import re
-import requests
+import urllib.request
+import urllib.error
 from pathlib import Path
 # Configuration from environment
@@ -57,24 +58,27 @@ def get_session_id():
 def call_memory_agent(skill_id: str, params: dict) -> dict:
     """Call the memory agent API."""
     try:
-        response = requests.post(
-            f"{MEMORY_AGENT_URL}/a2a",
-            json={
-                "jsonrpc": "2.0",
-                "id": "correction-hook",
-                "method": "tasks/send",
-                "params": {
-                    "message": {"parts": [{"type": "text", "text": ""}]},
-                    "metadata": {
-                        "skill_id": skill_id,
-                        "params": params
-                    }
+        payload = json.dumps({
+            "jsonrpc": "2.0",
+            "id": "correction-hook",
+            "method": "tasks/send",
+            "params": {
+                "message": {"parts": [{"type": "text", "text": ""}]},
+                "metadata": {
+                    "skill_id": skill_id,
+                    "params": params
                 }
-            },
-            timeout=API_TIMEOUT
+            }
+        }).encode("utf-8")
+        req = urllib.request.Request(
+            f"{MEMORY_AGENT_URL}/a2a",
+            data=payload,
+            headers={"Content-Type": "application/json"},
+            method="POST"
         )
-        return response.json()
-    except:
+        with urllib.request.urlopen(req, timeout=API_TIMEOUT) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+    except Exception:
         return None
 def detect_correction(text: str) -> tuple[bool, str]:
@@ -103,7 +107,7 @@ def main():
     # Read hook input from stdin
     try:
         hook_input = json.load(sys.stdin)
-    except:
+    except (json.JSONDecodeError, ValueError, EOFError):
         sys.exit(0)
     # Get user message

package/install.py CHANGED Viewed

@@ -964,7 +964,7 @@ def main():
     else:
         try:
             subprocess.run(
-                [sys.executable, str(AGENT_DIR / "memory-agent"), "start"],
+                [sys.executable, str(AGENT_DIR / "main.py")],
                 cwd=str(AGENT_DIR),
                 timeout=30
             )

package/main.py CHANGED Viewed

@@ -329,7 +329,7 @@ async def lifespan(app: FastAPI):
         from services.terminal_ui import print_splash, setup_rich_logging
         print_splash(
-            version="2.4.0",
+            version="3.0.1",
             port=int(os.getenv("PORT", 8102)),
             auth_enabled=auth_stats.get("enabled", False),
             auth_keys=auth_stats.get("active_keys", 0),
@@ -348,7 +348,7 @@ async def lifespan(app: FastAPI):
     except ImportError:
         # Fallback to plain output if rich unavailable
-        print(f"Memory Agent v2.4.0 (CLaRa) started on port {os.getenv('PORT', 8102)}")
+        print(f"Memory Agent v3.0.1 (CLaRa) started on port {os.getenv('PORT', 8102)}")
         if auth_stats.get("enabled"):
             print(f"Authentication: ENABLED ({auth_stats.get('active_keys', 0)} active keys)")
         else:
@@ -375,7 +375,7 @@ async def lifespan(app: FastAPI):
 app = FastAPI(
     title="Claude Memory Agent",
     description="Persistent semantic memory for Claude Code sessions with cross-project support",
-    version="2.4.0",
+    version="3.0.1",
     lifespan=lifespan
 )
@@ -2141,6 +2141,176 @@ async def api_get_timeline(
         return {"success": False, "error": str(e), "events": []}
+# ---------------------------------------------------------------------------
+# REST write endpoints (POST/DELETE) for memories, patterns, timeline
+# These allow the dashboard and external tools to create/delete data
+# without going through the skill dispatch system.
+# ---------------------------------------------------------------------------
+@app.post("/api/memories")
+async def api_create_memory(request: Request):
+    """Create a new memory via REST API."""
+    try:
+        body = await request.json()
+        content = body.get("content")
+        if not content:
+            return {"success": False, "error": "content is required"}
+        result = await store_memory(
+            db=db,
+            embeddings=embeddings,
+            content=content,
+            memory_type=body.get("type", "chunk"),
+            metadata=body.get("metadata"),
+            session_id=body.get("session_id"),
+            project_path=body.get("project_path"),
+            project_name=body.get("project_name"),
+            project_type=body.get("project_type"),
+            tech_stack=body.get("tech_stack"),
+            agent_type=body.get("agent_type"),
+            tags=body.get("tags"),
+            importance=body.get("importance", 5),
+            confidence=body.get("confidence", 0.5),
+            outcome=body.get("outcome"),
+            success=body.get("success"),
+        )
+        try:
+            await broadcast_event(
+                EventTypes.MEMORY_STORED,
+                {"memory_id": result.get("memory_id"), "type": body.get("type", "chunk")},
+                body.get("project_path")
+            )
+        except Exception:
+            pass
+        return result
+    except Exception as e:
+        logger.error(f"Failed to create memory: {e}")
+        return {"success": False, "error": str(e)}
+@app.delete("/api/memory/{memory_id}")
+async def api_delete_memory(memory_id: str):
+    """Delete a memory by ID."""
+    try:
+        existing = await db.execute_query(
+            "SELECT id FROM memories WHERE id = ?", [memory_id]
+        )
+        if not existing:
+            return {"success": False, "error": "Memory not found"}
+        await db.execute_write("DELETE FROM memories WHERE id = ?", [memory_id])
+        try:
+            await broadcast_event(
+                EventTypes.MEMORY_STORED,
+                {"memory_id": memory_id, "action": "deleted"},
+                None
+            )
+        except Exception:
+            pass
+        return {"success": True, "deleted": memory_id}
+    except Exception as e:
+        logger.error(f"Failed to delete memory: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/api/patterns")
+async def api_create_pattern(request: Request):
+    """Create a new solution pattern via REST API."""
+    try:
+        body = await request.json()
+        name = body.get("name")
+        solution = body.get("solution")
+        if not name or not solution:
+            return {"success": False, "error": "name and solution are required"}
+        result = await store_pattern(
+            db=db,
+            embeddings=embeddings,
+            name=name,
+            solution=solution,
+            problem_type=body.get("problem_type"),
+            tech_context=body.get("tech_context"),
+            metadata=body.get("metadata"),
+        )
+        return result
+    except Exception as e:
+        logger.error(f"Failed to create pattern: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/api/timeline")
+async def api_create_timeline_event(request: Request):
+    """Create a timeline event via REST API."""
+    try:
+        body = await request.json()
+        summary = body.get("summary")
+        if not summary:
+            return {"success": False, "error": "summary is required"}
+        result = await timeline_log(
+            db=db,
+            embeddings=embeddings,
+            session_id=body.get("session_id", str(uuid.uuid4())),
+            event_type=body.get("event_type", "observation"),
+            summary=summary,
+            details=body.get("details"),
+            project_path=body.get("project_path"),
+            parent_event_id=body.get("parent_event_id"),
+            root_event_id=body.get("root_event_id"),
+            entities=body.get("entities"),
+            status=body.get("status", "completed"),
+            outcome=body.get("outcome"),
+            confidence=body.get("confidence"),
+            is_anchor=body.get("is_anchor", False),
+        )
+        try:
+            await broadcast_event(
+                EventTypes.TIMELINE_LOGGED,
+                {"event_id": result.get("event_id"), "event_type": body.get("event_type", "observation")},
+                body.get("project_path")
+            )
+        except Exception:
+            pass
+        return result
+    except Exception as e:
+        logger.error(f"Failed to create timeline event: {e}")
+        return {"success": False, "error": str(e)}
+@app.delete("/api/timeline/{event_id}")
+async def api_delete_timeline_event(event_id: str):
+    """Delete a timeline event by ID."""
+    try:
+        existing = await db.execute_query(
+            "SELECT id FROM timeline_events WHERE id = ?", [event_id]
+        )
+        if not existing:
+            return {"success": False, "error": "Timeline event not found"}
+        await db.execute_write("DELETE FROM timeline_events WHERE id = ?", [event_id])
+        return {"success": True, "deleted": event_id}
+    except Exception as e:
+        logger.error(f"Failed to delete timeline event: {e}")
+        return {"success": False, "error": str(e)}
+@app.delete("/api/pattern/{pattern_id}")
+async def api_delete_pattern(pattern_id: str):
+    """Delete a pattern by ID."""
+    try:
+        existing = await db.execute_query(
+            "SELECT id FROM patterns WHERE id = ?", [pattern_id]
+        )
+        if not existing:
+            return {"success": False, "error": "Pattern not found"}
+        await db.execute_write("DELETE FROM patterns WHERE id = ?", [pattern_id])
+        return {"success": True, "deleted": pattern_id}
+    except Exception as e:
+        logger.error(f"Failed to delete pattern: {e}")
+        return {"success": False, "error": str(e)}
 @app.get("/dashboard")
 async def serve_dashboard():
     """Serve the monitoring dashboard."""
@@ -2680,12 +2850,12 @@ async def health_check():
     return {
         "status": status,
-        "version": "2.0.0",
+        "version": "3.0.1",
         "timestamp": datetime.now().isoformat(),
         "components": {
             "agent": {
                 "healthy": True,
-                "version": "2.0.0"
+                "version": "3.0.1"
             },
             "database": {
                 "healthy": db_healthy,

package/mcp_server.py CHANGED Viewed

@@ -40,6 +40,7 @@ if AGENT_DIR not in sys.path:
 # ── Imports ─────────────────────────────────────────────────────────────
 import json
+import uuid
 from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
@@ -63,6 +64,7 @@ from config import config
 # Direct skill imports - no HTTP, no FastAPI dependency
 from skills.store import store_memory, store_project, store_pattern
 from skills.search import semantic_search, search_patterns, get_project_context
+from skills.timeline import timeline_log
 # ── Lifespan: DB + Embeddings initialization ───────────────────────────
@@ -159,6 +161,29 @@ async def memory_store(
         tech_stack=tech_stack,
         agent_type=agent_type,
     )
+    # Auto-create a timeline event for every stored memory
+    try:
+        event_type_map = {
+            "decision": "decision",
+            "error": "error",
+            "code": "action",
+            "session": "checkpoint",
+            "preference": "observation",
+            "chunk": "observation",
+        }
+        await timeline_log(
+            db=app.db,
+            embeddings=app.embeddings,
+            session_id=str(uuid.uuid4()),
+            event_type=event_type_map.get(memory_type, "observation"),
+            summary=content[:200],
+            details=content if len(content) > 200 else None,
+            project_path=project_path,
+        )
+    except Exception as e:
+        logger.debug(f"Timeline piggyback failed (non-fatal): {e}")
     return json.dumps(result, default=str)
@@ -367,6 +392,49 @@ async def memory_context(
     return json.dumps(result, default=str)
+@mcp_server.tool()
+async def memory_timeline_log(
+    ctx: Context,
+    summary: str,
+    event_type: str = "observation",
+    details: Optional[str] = None,
+    project_path: Optional[str] = None,
+    session_id: Optional[str] = None,
+    status: str = "completed",
+    outcome: Optional[str] = None,
+    is_anchor: bool = False,
+) -> str:
+    """Log an event to the session timeline.
+    Use this to record significant events: decisions made, errors encountered,
+    actions taken, or observations during a session.
+    Args:
+        summary: Brief description of the event (<200 chars)
+        event_type: Type: user_request, clarification, action, decision, observation, error, checkpoint
+        details: Full context (optional, for longer descriptions)
+        project_path: Project path
+        session_id: Session identifier (auto-generated if omitted)
+        status: Event status: completed, in_progress, failed, reverted
+        outcome: Result description
+        is_anchor: Mark as verified/anchor fact
+    """
+    app = _get_app(ctx)
+    result = await timeline_log(
+        db=app.db,
+        embeddings=app.embeddings,
+        session_id=session_id or str(uuid.uuid4()),
+        event_type=event_type,
+        summary=summary,
+        details=details,
+        project_path=project_path,
+        status=status,
+        outcome=outcome,
+        is_anchor=is_anchor,
+    )
+    return json.dumps(result, default=str)
 @mcp_server.tool()
 async def memory_stats(ctx: Context) -> str:
     """Get memory statistics including total memories, database size, and breakdown by type."""

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-memory-agent",
-  "version": "3.0.1",
+  "version": "3.0.2",
   "description": "Persistent semantic memory system for Claude Code sessions with anti-hallucination grounding",
   "keywords": [
     "claude",

package/run_server.py CHANGED Viewed

@@ -1,6 +1,8 @@
 """Run the memory agent server (for background/production use).
-Uses Windows file locking (msvcrt.locking) for a true process mutex.
+Uses file locking for a true process mutex:
+- Windows: msvcrt.locking()
+- macOS/Linux: fcntl.flock()
 The lock is held for the entire lifetime of the server, ensuring only
 one instance can run at a time.
 """
@@ -9,9 +11,12 @@ import sys
 import time
 import atexit
 import signal
+import platform
 import uvicorn
 from dotenv import load_dotenv
+IS_WINDOWS = platform.system() == "Windows"
 load_dotenv()
 AGENT_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -31,26 +36,30 @@ def is_port_in_use(port: int) -> bool:
 def acquire_server_lock() -> bool:
-    """Acquire exclusive server lock using Windows file locking.
+    """Acquire exclusive server lock using platform-appropriate file locking.
+    - Windows: msvcrt.locking() with LK_NBLCK for non-blocking exclusive lock
+    - macOS/Linux: fcntl.flock() with LOCK_EX | LOCK_NB for non-blocking exclusive lock
-    This uses msvcrt.locking() which provides mandatory file locking on Windows.
     The lock is held as long as the file handle remains open.
     """
     global _lock_handle
-    import msvcrt
     my_pid = os.getpid()
     try:
         # Open file for read/write, create if doesn't exist
-        # Using os.open to get a file descriptor for msvcrt.locking
         _lock_handle = open(LOCK_FILE, 'w+')
-        # Try to acquire exclusive lock (non-blocking)
-        # msvcrt.LK_NBLCK = non-blocking exclusive lock
+        # Try to acquire exclusive lock (non-blocking), platform-specific
         try:
-            msvcrt.locking(_lock_handle.fileno(), msvcrt.LK_NBLCK, 1)
-        except (IOError, OSError) as e:
+            if IS_WINDOWS:
+                import msvcrt
+                msvcrt.locking(_lock_handle.fileno(), msvcrt.LK_NBLCK, 1)
+            else:
+                import fcntl
+                fcntl.flock(_lock_handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        except (IOError, OSError):
             # Lock is held by another process
             print(f"[MUTEX] Cannot acquire lock - another instance is running")
             _lock_handle.close()
@@ -86,13 +95,17 @@ def acquire_server_lock() -> bool:
 def release_server_lock():
     """Release the server lock on exit."""
     global _lock_handle
-    import msvcrt
     try:
         if _lock_handle:
             try:
-                # Unlock the file
-                msvcrt.locking(_lock_handle.fileno(), msvcrt.LK_UNLCK, 1)
+                # Unlock the file, platform-specific
+                if IS_WINDOWS:
+                    import msvcrt
+                    msvcrt.locking(_lock_handle.fileno(), msvcrt.LK_UNLCK, 1)
+                else:
+                    import fcntl
+                    fcntl.flock(_lock_handle.fileno(), fcntl.LOCK_UN)
             except:
                 pass
             _lock_handle.close()
@@ -129,7 +142,7 @@ if __name__ == "__main__":
     # Note: The lock is held because _lock_handle stays open
     uvicorn.run(
         "main:app",
-        host=os.getenv("HOST", "0.0.0.0"),
+        host=os.getenv("HOST", "127.0.0.1"),
         port=PORT,
         reload=False,
         log_level="warning"

package/services/auth.py CHANGED Viewed

@@ -26,6 +26,8 @@ DEFAULT_RATE_LIMIT = int(os.getenv("AUTH_RATE_LIMIT", "100"))  # requests per mi
 RATE_LIMIT_WINDOW = int(os.getenv("AUTH_RATE_WINDOW", "60"))  # seconds
 # Endpoints that don't require authentication
+# This is a local-only tool, so all API endpoints are exempt by default.
+# When AUTH_ENABLED=true, only /skills/call and /tasks/send require a key.
 EXEMPT_ENDPOINTS = [
     "/health",
     "/health/live",
@@ -33,24 +35,11 @@ EXEMPT_ENDPOINTS = [
     "/.well-known/agent.json",
     "/docs",
     "/openapi.json",
-    "/api/auth/stats",  # Allow checking auth status without key
-    "/dashboard",  # Dashboard needs initial access
+    "/dashboard",
     "/favicon.ico",
-    # Dashboard API endpoints
-    "/api/stats",
-    "/api/projects",
-    "/api/agents",
-    "/api/mcps",
-    "/api/hooks",
-    "/api/sessions",
     "/ws",  # WebSocket
-    "/a2a",  # Agent-to-Agent protocol (dashboard uses this)
-    "/api/project/",  # Project config endpoints
-    # Automation endpoints
-    "/api/inject",
-    "/api/memory/natural",
-    "/api/memory/",  # Covers confidence, verify, outdated
-    "/api/claude-md",
+    "/a2a",  # Agent-to-Agent protocol
+    "/api/",  # All dashboard and REST API endpoints
 ]

package/services/embedding_pipeline.py CHANGED Viewed

@@ -81,7 +81,7 @@ class EmbeddingCache:
             'hits': self._hits,
             'misses': self._misses,
             'hit_rate': round(self._hits / total, 4) if total > 0 else 0.0,
-            'estimated_memory_mb': round(len(self._cache) * 768 * 4 / 1024 / 1024, 2)
+            'estimated_memory_mb': round(len(self._cache) * config.get("EMBEDDING_DIM", 1024) * 4 / 1024 / 1024, 2)
         }

package/services/embeddings.py CHANGED Viewed

@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
 load_dotenv()
 OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
-DEFAULT_MODEL = os.getenv("EMBEDDING_MODEL", "nomic-embed-text")
+DEFAULT_MODEL = os.getenv("EMBEDDING_MODEL", "nomic-embed-text")  # Ollama default; sentence-transformers uses config.py
 HEALTH_CHECK_TIMEOUT = float(os.getenv("OLLAMA_HEALTH_TIMEOUT", "5.0"))
 HEALTH_CACHE_TTL = float(os.getenv("OLLAMA_HEALTH_CACHE_TTL", "30.0"))
@@ -178,7 +178,7 @@ class SentenceTransformerProvider(EmbeddingProvider):
             )
         self.model_name = model
-        self._model = SentenceTransformer(model, trust_remote_code=True)
+        self._model = SentenceTransformer(model, trust_remote_code=False)
         self._dimension = self._model.get_sentence_embedding_dimension()
     def embed(self, text: str) -> List[float]:
@@ -333,7 +333,7 @@ class EmbeddingService:
         start_time = time.time()
         try:
-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
             health_result = await asyncio.wait_for(
                 loop.run_in_executor(None, self._provider.check_health),
@@ -447,7 +447,7 @@ class EmbeddingService:
                 )
         try:
-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
             def _embed():
                 return self._provider.embed(text)
@@ -527,7 +527,7 @@ class EmbeddingService:
                 )
         try:
-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
             def _embed():
                 return self._provider.embed(text)
@@ -618,7 +618,7 @@ class EmbeddingService:
         # sentence-transformers has efficient native batching
         if self.provider_type == "sentence-transformers":
             try:
-                loop = asyncio.get_event_loop()
+                loop = asyncio.get_running_loop()
                 def _batch_embed():
                     return self._provider.embed_batch(texts)
@@ -712,7 +712,7 @@ class EmbeddingService:
             return self._available_models
         try:
-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
             provider: OllamaProvider = self._provider  # type: ignore[assignment]
             models = await loop.run_in_executor(None, provider.client.list)
             model_names = [

package/services/llm_analyzer.py CHANGED Viewed

@@ -143,7 +143,7 @@ class LLMAnalyzer:
             return not self._degraded_mode
         try:
-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
             await asyncio.wait_for(
                 loop.run_in_executor(None, lambda: self.client.list()),
                 timeout=2.0
@@ -281,7 +281,7 @@ Rules:
 - Only include meaningful, actionable items"""
         try:
-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
             def _generate():
                 return self.client.generate(
@@ -409,7 +409,7 @@ Return JSON only:
 {{"has_contradiction": true/false, "conflicting_fact": "the fact it conflicts with or null", "reason": "brief explanation or null", "confidence": 0.0-1.0}}"""
             try:
-                loop = asyncio.get_event_loop()
+                loop = asyncio.get_running_loop()
                 def _generate():
                     return self.client.generate(
@@ -493,7 +493,7 @@ Recent events:
 Write a brief summary focusing on: what's being worked on, key decisions made, current status."""
             try:
-                loop = asyncio.get_event_loop()
+                loop = asyncio.get_running_loop()
                 def _generate():
                     return self.client.generate(

package/services/response_manager.py CHANGED Viewed

@@ -127,23 +127,20 @@ def fit_response(
     if len(output) <= max_chars:
         return _with_meta(output, working, level, max_chars)
-    # Level 5: emergency hard truncation
+    # Level 5: emergency hard truncation — return valid JSON
     level = 5
     logger.warning(
         "Response required emergency truncation: %d -> %d chars",
         len(output), max_chars,
     )
-    output = output[:max_chars - 100]
-    # Append a valid JSON suffix with metadata
-    meta = json.dumps({
+    return json.dumps({
         "_response_meta": {
             "degradation_level": level,
             "truncated": True,
-            "original_chars": _json_size(data, indent=None),
+            "original_chars": len(output),
             "note": "Response was emergency-truncated. Use specific queries to retrieve full data.",
         }
     })
-    return output + "\n" + meta
 def _with_meta(

package/start_daemon.py CHANGED Viewed

@@ -1,13 +1,22 @@
-"""Start the memory agent as a proper background daemon on Windows.
+"""Start the memory agent as a proper background daemon.
-Uses msvcrt.locking() for a true Windows mutex to prevent multiple
-simultaneous startup attempts. The server itself has its own mutex.
+Uses platform-appropriate file locking to prevent multiple simultaneous
+startup attempts. The server itself has its own mutex.
+- Windows: msvcrt.locking()
+- macOS/Linux: fcntl.flock()
 """
 import subprocess
 import sys
 import os
 import time
-import msvcrt
+import platform
+IS_WINDOWS = platform.system() == "Windows"
+if IS_WINDOWS:
+    import msvcrt
+else:
+    import fcntl
 AGENT_DIR = os.path.dirname(os.path.abspath(__file__))
 LOG_FILE = os.path.join(AGENT_DIR, "memory-agent.log")
@@ -19,10 +28,13 @@ _startup_lock_handle = None
 def acquire_startup_lock() -> bool:
-    """Acquire startup mutex using Windows file locking (msvcrt.locking).
+    """Acquire startup mutex using platform-appropriate file locking.
     This prevents multiple hooks from trying to start the agent simultaneously.
     The lock is held until release_startup_lock() is called.
+    Windows: msvcrt.locking() with LK_NBLCK
+    macOS/Linux: fcntl.flock() with LOCK_EX | LOCK_NB
     """
     global _startup_lock_handle
@@ -32,7 +44,10 @@ def acquire_startup_lock() -> bool:
         # Try non-blocking exclusive lock
         try:
-            msvcrt.locking(_startup_lock_handle.fileno(), msvcrt.LK_NBLCK, 1)
+            if IS_WINDOWS:
+                msvcrt.locking(_startup_lock_handle.fileno(), msvcrt.LK_NBLCK, 1)
+            else:
+                fcntl.flock(_startup_lock_handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
         except (IOError, OSError):
             # Lock held by another process - they're already starting the agent
             _startup_lock_handle.close()
@@ -64,7 +79,10 @@ def release_startup_lock():
     try:
         if _startup_lock_handle:
             try:
-                msvcrt.locking(_startup_lock_handle.fileno(), msvcrt.LK_UNLCK, 1)
+                if IS_WINDOWS:
+                    msvcrt.locking(_startup_lock_handle.fileno(), msvcrt.LK_UNLCK, 1)
+                else:
+                    fcntl.flock(_startup_lock_handle.fileno(), fcntl.LOCK_UN)
             except:
                 pass
             _startup_lock_handle.close()
@@ -76,10 +94,12 @@ def release_startup_lock():
 def is_running():
     """Check if agent is already running via health endpoint."""
     try:
-        import requests
+        from urllib.request import urlopen, Request
+        from urllib.error import URLError
         url = os.getenv("MEMORY_AGENT_URL", "http://localhost:8102")
-        r = requests.get(f"{url}/health", timeout=2)
-        return r.status_code == 200
+        req = Request(f"{url}/health")
+        response = urlopen(req, timeout=2)
+        return response.status == 200
     except Exception:
         return False
@@ -115,19 +135,30 @@ def start_daemon():
         return False
     try:
-        # Windows-specific flags for detached process
-        DETACHED_PROCESS = 0x00000008
-        CREATE_NO_WINDOW = 0x08000000
-        CREATE_NEW_PROCESS_GROUP = 0x00000200
         with open(LOG_FILE, "w") as log:
-            proc = subprocess.Popen(
-                [sys.executable, "run_server.py"],
+            popen_kwargs = dict(
                 cwd=AGENT_DIR,
                 stdout=log,
                 stderr=subprocess.STDOUT,
-                creationflags=DETACHED_PROCESS | CREATE_NO_WINDOW | CREATE_NEW_PROCESS_GROUP,
-                close_fds=True
+            )
+            if IS_WINDOWS:
+                # Windows-specific flags for detached process
+                DETACHED_PROCESS = 0x00000008
+                CREATE_NO_WINDOW = 0x08000000
+                CREATE_NEW_PROCESS_GROUP = 0x00000200
+                popen_kwargs["creationflags"] = (
+                    DETACHED_PROCESS | CREATE_NO_WINDOW | CREATE_NEW_PROCESS_GROUP
+                )
+                popen_kwargs["close_fds"] = True
+            else:
+                # Unix: start in a new session so the process is detached
+                popen_kwargs["start_new_session"] = True
+                popen_kwargs["close_fds"] = True
+            proc = subprocess.Popen(
+                [sys.executable, "run_server.py"],
+                **popen_kwargs
             )
             # Save PID for future reference