claude-memory-agent 3.0.1 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/detect-correction.py +22 -18
- package/install.py +1 -1
- package/main.py +175 -5
- package/mcp_server.py +68 -0
- package/package.json +1 -1
- package/run_server.py +26 -13
- package/services/auth.py +5 -16
- package/services/embedding_pipeline.py +1 -1
- package/services/embeddings.py +7 -7
- package/services/llm_analyzer.py +4 -4
- package/services/response_manager.py +3 -6
- package/start_daemon.py +50 -19
|
@@ -17,7 +17,8 @@ import os
|
|
|
17
17
|
import sys
|
|
18
18
|
import json
|
|
19
19
|
import re
|
|
20
|
-
import
|
|
20
|
+
import urllib.request
|
|
21
|
+
import urllib.error
|
|
21
22
|
from pathlib import Path
|
|
22
23
|
|
|
23
24
|
# Configuration from environment
|
|
@@ -57,24 +58,27 @@ def get_session_id():
|
|
|
57
58
|
def call_memory_agent(skill_id: str, params: dict) -> dict:
|
|
58
59
|
"""Call the memory agent API."""
|
|
59
60
|
try:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
"
|
|
66
|
-
"
|
|
67
|
-
"
|
|
68
|
-
"
|
|
69
|
-
"skill_id": skill_id,
|
|
70
|
-
"params": params
|
|
71
|
-
}
|
|
61
|
+
payload = json.dumps({
|
|
62
|
+
"jsonrpc": "2.0",
|
|
63
|
+
"id": "correction-hook",
|
|
64
|
+
"method": "tasks/send",
|
|
65
|
+
"params": {
|
|
66
|
+
"message": {"parts": [{"type": "text", "text": ""}]},
|
|
67
|
+
"metadata": {
|
|
68
|
+
"skill_id": skill_id,
|
|
69
|
+
"params": params
|
|
72
70
|
}
|
|
73
|
-
}
|
|
74
|
-
|
|
71
|
+
}
|
|
72
|
+
}).encode("utf-8")
|
|
73
|
+
req = urllib.request.Request(
|
|
74
|
+
f"{MEMORY_AGENT_URL}/a2a",
|
|
75
|
+
data=payload,
|
|
76
|
+
headers={"Content-Type": "application/json"},
|
|
77
|
+
method="POST"
|
|
75
78
|
)
|
|
76
|
-
|
|
77
|
-
|
|
79
|
+
with urllib.request.urlopen(req, timeout=API_TIMEOUT) as resp:
|
|
80
|
+
return json.loads(resp.read().decode("utf-8"))
|
|
81
|
+
except Exception:
|
|
78
82
|
return None
|
|
79
83
|
|
|
80
84
|
def detect_correction(text: str) -> tuple[bool, str]:
|
|
@@ -103,7 +107,7 @@ def main():
|
|
|
103
107
|
# Read hook input from stdin
|
|
104
108
|
try:
|
|
105
109
|
hook_input = json.load(sys.stdin)
|
|
106
|
-
except:
|
|
110
|
+
except (json.JSONDecodeError, ValueError, EOFError):
|
|
107
111
|
sys.exit(0)
|
|
108
112
|
|
|
109
113
|
# Get user message
|
package/install.py
CHANGED
package/main.py
CHANGED
|
@@ -329,7 +329,7 @@ async def lifespan(app: FastAPI):
|
|
|
329
329
|
from services.terminal_ui import print_splash, setup_rich_logging
|
|
330
330
|
|
|
331
331
|
print_splash(
|
|
332
|
-
version="
|
|
332
|
+
version="3.0.1",
|
|
333
333
|
port=int(os.getenv("PORT", 8102)),
|
|
334
334
|
auth_enabled=auth_stats.get("enabled", False),
|
|
335
335
|
auth_keys=auth_stats.get("active_keys", 0),
|
|
@@ -348,7 +348,7 @@ async def lifespan(app: FastAPI):
|
|
|
348
348
|
|
|
349
349
|
except ImportError:
|
|
350
350
|
# Fallback to plain output if rich unavailable
|
|
351
|
-
print(f"Memory Agent
|
|
351
|
+
print(f"Memory Agent v3.0.1 (CLaRa) started on port {os.getenv('PORT', 8102)}")
|
|
352
352
|
if auth_stats.get("enabled"):
|
|
353
353
|
print(f"Authentication: ENABLED ({auth_stats.get('active_keys', 0)} active keys)")
|
|
354
354
|
else:
|
|
@@ -375,7 +375,7 @@ async def lifespan(app: FastAPI):
|
|
|
375
375
|
app = FastAPI(
|
|
376
376
|
title="Claude Memory Agent",
|
|
377
377
|
description="Persistent semantic memory for Claude Code sessions with cross-project support",
|
|
378
|
-
version="
|
|
378
|
+
version="3.0.1",
|
|
379
379
|
lifespan=lifespan
|
|
380
380
|
)
|
|
381
381
|
|
|
@@ -2141,6 +2141,176 @@ async def api_get_timeline(
|
|
|
2141
2141
|
return {"success": False, "error": str(e), "events": []}
|
|
2142
2142
|
|
|
2143
2143
|
|
|
2144
|
+
# ---------------------------------------------------------------------------
|
|
2145
|
+
# REST write endpoints (POST/DELETE) for memories, patterns, timeline
|
|
2146
|
+
# These allow the dashboard and external tools to create/delete data
|
|
2147
|
+
# without going through the skill dispatch system.
|
|
2148
|
+
# ---------------------------------------------------------------------------
|
|
2149
|
+
|
|
2150
|
+
@app.post("/api/memories")
|
|
2151
|
+
async def api_create_memory(request: Request):
|
|
2152
|
+
"""Create a new memory via REST API."""
|
|
2153
|
+
try:
|
|
2154
|
+
body = await request.json()
|
|
2155
|
+
content = body.get("content")
|
|
2156
|
+
if not content:
|
|
2157
|
+
return {"success": False, "error": "content is required"}
|
|
2158
|
+
|
|
2159
|
+
result = await store_memory(
|
|
2160
|
+
db=db,
|
|
2161
|
+
embeddings=embeddings,
|
|
2162
|
+
content=content,
|
|
2163
|
+
memory_type=body.get("type", "chunk"),
|
|
2164
|
+
metadata=body.get("metadata"),
|
|
2165
|
+
session_id=body.get("session_id"),
|
|
2166
|
+
project_path=body.get("project_path"),
|
|
2167
|
+
project_name=body.get("project_name"),
|
|
2168
|
+
project_type=body.get("project_type"),
|
|
2169
|
+
tech_stack=body.get("tech_stack"),
|
|
2170
|
+
agent_type=body.get("agent_type"),
|
|
2171
|
+
tags=body.get("tags"),
|
|
2172
|
+
importance=body.get("importance", 5),
|
|
2173
|
+
confidence=body.get("confidence", 0.5),
|
|
2174
|
+
outcome=body.get("outcome"),
|
|
2175
|
+
success=body.get("success"),
|
|
2176
|
+
)
|
|
2177
|
+
try:
|
|
2178
|
+
await broadcast_event(
|
|
2179
|
+
EventTypes.MEMORY_STORED,
|
|
2180
|
+
{"memory_id": result.get("memory_id"), "type": body.get("type", "chunk")},
|
|
2181
|
+
body.get("project_path")
|
|
2182
|
+
)
|
|
2183
|
+
except Exception:
|
|
2184
|
+
pass
|
|
2185
|
+
return result
|
|
2186
|
+
except Exception as e:
|
|
2187
|
+
logger.error(f"Failed to create memory: {e}")
|
|
2188
|
+
return {"success": False, "error": str(e)}
|
|
2189
|
+
|
|
2190
|
+
|
|
2191
|
+
@app.delete("/api/memory/{memory_id}")
|
|
2192
|
+
async def api_delete_memory(memory_id: str):
|
|
2193
|
+
"""Delete a memory by ID."""
|
|
2194
|
+
try:
|
|
2195
|
+
existing = await db.execute_query(
|
|
2196
|
+
"SELECT id FROM memories WHERE id = ?", [memory_id]
|
|
2197
|
+
)
|
|
2198
|
+
if not existing:
|
|
2199
|
+
return {"success": False, "error": "Memory not found"}
|
|
2200
|
+
|
|
2201
|
+
await db.execute_write("DELETE FROM memories WHERE id = ?", [memory_id])
|
|
2202
|
+
try:
|
|
2203
|
+
await broadcast_event(
|
|
2204
|
+
EventTypes.MEMORY_STORED,
|
|
2205
|
+
{"memory_id": memory_id, "action": "deleted"},
|
|
2206
|
+
None
|
|
2207
|
+
)
|
|
2208
|
+
except Exception:
|
|
2209
|
+
pass
|
|
2210
|
+
return {"success": True, "deleted": memory_id}
|
|
2211
|
+
except Exception as e:
|
|
2212
|
+
logger.error(f"Failed to delete memory: {e}")
|
|
2213
|
+
return {"success": False, "error": str(e)}
|
|
2214
|
+
|
|
2215
|
+
|
|
2216
|
+
@app.post("/api/patterns")
|
|
2217
|
+
async def api_create_pattern(request: Request):
|
|
2218
|
+
"""Create a new solution pattern via REST API."""
|
|
2219
|
+
try:
|
|
2220
|
+
body = await request.json()
|
|
2221
|
+
name = body.get("name")
|
|
2222
|
+
solution = body.get("solution")
|
|
2223
|
+
if not name or not solution:
|
|
2224
|
+
return {"success": False, "error": "name and solution are required"}
|
|
2225
|
+
|
|
2226
|
+
result = await store_pattern(
|
|
2227
|
+
db=db,
|
|
2228
|
+
embeddings=embeddings,
|
|
2229
|
+
name=name,
|
|
2230
|
+
solution=solution,
|
|
2231
|
+
problem_type=body.get("problem_type"),
|
|
2232
|
+
tech_context=body.get("tech_context"),
|
|
2233
|
+
metadata=body.get("metadata"),
|
|
2234
|
+
)
|
|
2235
|
+
return result
|
|
2236
|
+
except Exception as e:
|
|
2237
|
+
logger.error(f"Failed to create pattern: {e}")
|
|
2238
|
+
return {"success": False, "error": str(e)}
|
|
2239
|
+
|
|
2240
|
+
|
|
2241
|
+
@app.post("/api/timeline")
|
|
2242
|
+
async def api_create_timeline_event(request: Request):
|
|
2243
|
+
"""Create a timeline event via REST API."""
|
|
2244
|
+
try:
|
|
2245
|
+
body = await request.json()
|
|
2246
|
+
summary = body.get("summary")
|
|
2247
|
+
if not summary:
|
|
2248
|
+
return {"success": False, "error": "summary is required"}
|
|
2249
|
+
|
|
2250
|
+
result = await timeline_log(
|
|
2251
|
+
db=db,
|
|
2252
|
+
embeddings=embeddings,
|
|
2253
|
+
session_id=body.get("session_id", str(uuid.uuid4())),
|
|
2254
|
+
event_type=body.get("event_type", "observation"),
|
|
2255
|
+
summary=summary,
|
|
2256
|
+
details=body.get("details"),
|
|
2257
|
+
project_path=body.get("project_path"),
|
|
2258
|
+
parent_event_id=body.get("parent_event_id"),
|
|
2259
|
+
root_event_id=body.get("root_event_id"),
|
|
2260
|
+
entities=body.get("entities"),
|
|
2261
|
+
status=body.get("status", "completed"),
|
|
2262
|
+
outcome=body.get("outcome"),
|
|
2263
|
+
confidence=body.get("confidence"),
|
|
2264
|
+
is_anchor=body.get("is_anchor", False),
|
|
2265
|
+
)
|
|
2266
|
+
try:
|
|
2267
|
+
await broadcast_event(
|
|
2268
|
+
EventTypes.TIMELINE_LOGGED,
|
|
2269
|
+
{"event_id": result.get("event_id"), "event_type": body.get("event_type", "observation")},
|
|
2270
|
+
body.get("project_path")
|
|
2271
|
+
)
|
|
2272
|
+
except Exception:
|
|
2273
|
+
pass
|
|
2274
|
+
return result
|
|
2275
|
+
except Exception as e:
|
|
2276
|
+
logger.error(f"Failed to create timeline event: {e}")
|
|
2277
|
+
return {"success": False, "error": str(e)}
|
|
2278
|
+
|
|
2279
|
+
|
|
2280
|
+
@app.delete("/api/timeline/{event_id}")
|
|
2281
|
+
async def api_delete_timeline_event(event_id: str):
|
|
2282
|
+
"""Delete a timeline event by ID."""
|
|
2283
|
+
try:
|
|
2284
|
+
existing = await db.execute_query(
|
|
2285
|
+
"SELECT id FROM timeline_events WHERE id = ?", [event_id]
|
|
2286
|
+
)
|
|
2287
|
+
if not existing:
|
|
2288
|
+
return {"success": False, "error": "Timeline event not found"}
|
|
2289
|
+
|
|
2290
|
+
await db.execute_write("DELETE FROM timeline_events WHERE id = ?", [event_id])
|
|
2291
|
+
return {"success": True, "deleted": event_id}
|
|
2292
|
+
except Exception as e:
|
|
2293
|
+
logger.error(f"Failed to delete timeline event: {e}")
|
|
2294
|
+
return {"success": False, "error": str(e)}
|
|
2295
|
+
|
|
2296
|
+
|
|
2297
|
+
@app.delete("/api/pattern/{pattern_id}")
|
|
2298
|
+
async def api_delete_pattern(pattern_id: str):
|
|
2299
|
+
"""Delete a pattern by ID."""
|
|
2300
|
+
try:
|
|
2301
|
+
existing = await db.execute_query(
|
|
2302
|
+
"SELECT id FROM patterns WHERE id = ?", [pattern_id]
|
|
2303
|
+
)
|
|
2304
|
+
if not existing:
|
|
2305
|
+
return {"success": False, "error": "Pattern not found"}
|
|
2306
|
+
|
|
2307
|
+
await db.execute_write("DELETE FROM patterns WHERE id = ?", [pattern_id])
|
|
2308
|
+
return {"success": True, "deleted": pattern_id}
|
|
2309
|
+
except Exception as e:
|
|
2310
|
+
logger.error(f"Failed to delete pattern: {e}")
|
|
2311
|
+
return {"success": False, "error": str(e)}
|
|
2312
|
+
|
|
2313
|
+
|
|
2144
2314
|
@app.get("/dashboard")
|
|
2145
2315
|
async def serve_dashboard():
|
|
2146
2316
|
"""Serve the monitoring dashboard."""
|
|
@@ -2680,12 +2850,12 @@ async def health_check():
|
|
|
2680
2850
|
|
|
2681
2851
|
return {
|
|
2682
2852
|
"status": status,
|
|
2683
|
-
"version": "
|
|
2853
|
+
"version": "3.0.1",
|
|
2684
2854
|
"timestamp": datetime.now().isoformat(),
|
|
2685
2855
|
"components": {
|
|
2686
2856
|
"agent": {
|
|
2687
2857
|
"healthy": True,
|
|
2688
|
-
"version": "
|
|
2858
|
+
"version": "3.0.1"
|
|
2689
2859
|
},
|
|
2690
2860
|
"database": {
|
|
2691
2861
|
"healthy": db_healthy,
|
package/mcp_server.py
CHANGED
|
@@ -40,6 +40,7 @@ if AGENT_DIR not in sys.path:
|
|
|
40
40
|
# ── Imports ─────────────────────────────────────────────────────────────
|
|
41
41
|
|
|
42
42
|
import json
|
|
43
|
+
import uuid
|
|
43
44
|
from collections.abc import AsyncIterator
|
|
44
45
|
from contextlib import asynccontextmanager
|
|
45
46
|
from dataclasses import dataclass
|
|
@@ -63,6 +64,7 @@ from config import config
|
|
|
63
64
|
# Direct skill imports - no HTTP, no FastAPI dependency
|
|
64
65
|
from skills.store import store_memory, store_project, store_pattern
|
|
65
66
|
from skills.search import semantic_search, search_patterns, get_project_context
|
|
67
|
+
from skills.timeline import timeline_log
|
|
66
68
|
|
|
67
69
|
|
|
68
70
|
# ── Lifespan: DB + Embeddings initialization ───────────────────────────
|
|
@@ -159,6 +161,29 @@ async def memory_store(
|
|
|
159
161
|
tech_stack=tech_stack,
|
|
160
162
|
agent_type=agent_type,
|
|
161
163
|
)
|
|
164
|
+
|
|
165
|
+
# Auto-create a timeline event for every stored memory
|
|
166
|
+
try:
|
|
167
|
+
event_type_map = {
|
|
168
|
+
"decision": "decision",
|
|
169
|
+
"error": "error",
|
|
170
|
+
"code": "action",
|
|
171
|
+
"session": "checkpoint",
|
|
172
|
+
"preference": "observation",
|
|
173
|
+
"chunk": "observation",
|
|
174
|
+
}
|
|
175
|
+
await timeline_log(
|
|
176
|
+
db=app.db,
|
|
177
|
+
embeddings=app.embeddings,
|
|
178
|
+
session_id=str(uuid.uuid4()),
|
|
179
|
+
event_type=event_type_map.get(memory_type, "observation"),
|
|
180
|
+
summary=content[:200],
|
|
181
|
+
details=content if len(content) > 200 else None,
|
|
182
|
+
project_path=project_path,
|
|
183
|
+
)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
logger.debug(f"Timeline piggyback failed (non-fatal): {e}")
|
|
186
|
+
|
|
162
187
|
return json.dumps(result, default=str)
|
|
163
188
|
|
|
164
189
|
|
|
@@ -367,6 +392,49 @@ async def memory_context(
|
|
|
367
392
|
return json.dumps(result, default=str)
|
|
368
393
|
|
|
369
394
|
|
|
395
|
+
@mcp_server.tool()
|
|
396
|
+
async def memory_timeline_log(
|
|
397
|
+
ctx: Context,
|
|
398
|
+
summary: str,
|
|
399
|
+
event_type: str = "observation",
|
|
400
|
+
details: Optional[str] = None,
|
|
401
|
+
project_path: Optional[str] = None,
|
|
402
|
+
session_id: Optional[str] = None,
|
|
403
|
+
status: str = "completed",
|
|
404
|
+
outcome: Optional[str] = None,
|
|
405
|
+
is_anchor: bool = False,
|
|
406
|
+
) -> str:
|
|
407
|
+
"""Log an event to the session timeline.
|
|
408
|
+
|
|
409
|
+
Use this to record significant events: decisions made, errors encountered,
|
|
410
|
+
actions taken, or observations during a session.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
summary: Brief description of the event (<200 chars)
|
|
414
|
+
event_type: Type: user_request, clarification, action, decision, observation, error, checkpoint
|
|
415
|
+
details: Full context (optional, for longer descriptions)
|
|
416
|
+
project_path: Project path
|
|
417
|
+
session_id: Session identifier (auto-generated if omitted)
|
|
418
|
+
status: Event status: completed, in_progress, failed, reverted
|
|
419
|
+
outcome: Result description
|
|
420
|
+
is_anchor: Mark as verified/anchor fact
|
|
421
|
+
"""
|
|
422
|
+
app = _get_app(ctx)
|
|
423
|
+
result = await timeline_log(
|
|
424
|
+
db=app.db,
|
|
425
|
+
embeddings=app.embeddings,
|
|
426
|
+
session_id=session_id or str(uuid.uuid4()),
|
|
427
|
+
event_type=event_type,
|
|
428
|
+
summary=summary,
|
|
429
|
+
details=details,
|
|
430
|
+
project_path=project_path,
|
|
431
|
+
status=status,
|
|
432
|
+
outcome=outcome,
|
|
433
|
+
is_anchor=is_anchor,
|
|
434
|
+
)
|
|
435
|
+
return json.dumps(result, default=str)
|
|
436
|
+
|
|
437
|
+
|
|
370
438
|
@mcp_server.tool()
|
|
371
439
|
async def memory_stats(ctx: Context) -> str:
|
|
372
440
|
"""Get memory statistics including total memories, database size, and breakdown by type."""
|
package/package.json
CHANGED
package/run_server.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""Run the memory agent server (for background/production use).
|
|
2
2
|
|
|
3
|
-
Uses
|
|
3
|
+
Uses file locking for a true process mutex:
|
|
4
|
+
- Windows: msvcrt.locking()
|
|
5
|
+
- macOS/Linux: fcntl.flock()
|
|
4
6
|
The lock is held for the entire lifetime of the server, ensuring only
|
|
5
7
|
one instance can run at a time.
|
|
6
8
|
"""
|
|
@@ -9,9 +11,12 @@ import sys
|
|
|
9
11
|
import time
|
|
10
12
|
import atexit
|
|
11
13
|
import signal
|
|
14
|
+
import platform
|
|
12
15
|
import uvicorn
|
|
13
16
|
from dotenv import load_dotenv
|
|
14
17
|
|
|
18
|
+
IS_WINDOWS = platform.system() == "Windows"
|
|
19
|
+
|
|
15
20
|
load_dotenv()
|
|
16
21
|
|
|
17
22
|
AGENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
@@ -31,26 +36,30 @@ def is_port_in_use(port: int) -> bool:
|
|
|
31
36
|
|
|
32
37
|
|
|
33
38
|
def acquire_server_lock() -> bool:
|
|
34
|
-
"""Acquire exclusive server lock using
|
|
39
|
+
"""Acquire exclusive server lock using platform-appropriate file locking.
|
|
40
|
+
|
|
41
|
+
- Windows: msvcrt.locking() with LK_NBLCK for non-blocking exclusive lock
|
|
42
|
+
- macOS/Linux: fcntl.flock() with LOCK_EX | LOCK_NB for non-blocking exclusive lock
|
|
35
43
|
|
|
36
|
-
This uses msvcrt.locking() which provides mandatory file locking on Windows.
|
|
37
44
|
The lock is held as long as the file handle remains open.
|
|
38
45
|
"""
|
|
39
46
|
global _lock_handle
|
|
40
|
-
import msvcrt
|
|
41
47
|
|
|
42
48
|
my_pid = os.getpid()
|
|
43
49
|
|
|
44
50
|
try:
|
|
45
51
|
# Open file for read/write, create if doesn't exist
|
|
46
|
-
# Using os.open to get a file descriptor for msvcrt.locking
|
|
47
52
|
_lock_handle = open(LOCK_FILE, 'w+')
|
|
48
53
|
|
|
49
|
-
# Try to acquire exclusive lock (non-blocking)
|
|
50
|
-
# msvcrt.LK_NBLCK = non-blocking exclusive lock
|
|
54
|
+
# Try to acquire exclusive lock (non-blocking), platform-specific
|
|
51
55
|
try:
|
|
52
|
-
|
|
53
|
-
|
|
56
|
+
if IS_WINDOWS:
|
|
57
|
+
import msvcrt
|
|
58
|
+
msvcrt.locking(_lock_handle.fileno(), msvcrt.LK_NBLCK, 1)
|
|
59
|
+
else:
|
|
60
|
+
import fcntl
|
|
61
|
+
fcntl.flock(_lock_handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
62
|
+
except (IOError, OSError):
|
|
54
63
|
# Lock is held by another process
|
|
55
64
|
print(f"[MUTEX] Cannot acquire lock - another instance is running")
|
|
56
65
|
_lock_handle.close()
|
|
@@ -86,13 +95,17 @@ def acquire_server_lock() -> bool:
|
|
|
86
95
|
def release_server_lock():
|
|
87
96
|
"""Release the server lock on exit."""
|
|
88
97
|
global _lock_handle
|
|
89
|
-
import msvcrt
|
|
90
98
|
|
|
91
99
|
try:
|
|
92
100
|
if _lock_handle:
|
|
93
101
|
try:
|
|
94
|
-
# Unlock the file
|
|
95
|
-
|
|
102
|
+
# Unlock the file, platform-specific
|
|
103
|
+
if IS_WINDOWS:
|
|
104
|
+
import msvcrt
|
|
105
|
+
msvcrt.locking(_lock_handle.fileno(), msvcrt.LK_UNLCK, 1)
|
|
106
|
+
else:
|
|
107
|
+
import fcntl
|
|
108
|
+
fcntl.flock(_lock_handle.fileno(), fcntl.LOCK_UN)
|
|
96
109
|
except:
|
|
97
110
|
pass
|
|
98
111
|
_lock_handle.close()
|
|
@@ -129,7 +142,7 @@ if __name__ == "__main__":
|
|
|
129
142
|
# Note: The lock is held because _lock_handle stays open
|
|
130
143
|
uvicorn.run(
|
|
131
144
|
"main:app",
|
|
132
|
-
host=os.getenv("HOST", "
|
|
145
|
+
host=os.getenv("HOST", "127.0.0.1"),
|
|
133
146
|
port=PORT,
|
|
134
147
|
reload=False,
|
|
135
148
|
log_level="warning"
|
package/services/auth.py
CHANGED
|
@@ -26,6 +26,8 @@ DEFAULT_RATE_LIMIT = int(os.getenv("AUTH_RATE_LIMIT", "100")) # requests per mi
|
|
|
26
26
|
RATE_LIMIT_WINDOW = int(os.getenv("AUTH_RATE_WINDOW", "60")) # seconds
|
|
27
27
|
|
|
28
28
|
# Endpoints that don't require authentication
|
|
29
|
+
# This is a local-only tool, so all API endpoints are exempt by default.
|
|
30
|
+
# When AUTH_ENABLED=true, only /skills/call and /tasks/send require a key.
|
|
29
31
|
EXEMPT_ENDPOINTS = [
|
|
30
32
|
"/health",
|
|
31
33
|
"/health/live",
|
|
@@ -33,24 +35,11 @@ EXEMPT_ENDPOINTS = [
|
|
|
33
35
|
"/.well-known/agent.json",
|
|
34
36
|
"/docs",
|
|
35
37
|
"/openapi.json",
|
|
36
|
-
"/
|
|
37
|
-
"/dashboard", # Dashboard needs initial access
|
|
38
|
+
"/dashboard",
|
|
38
39
|
"/favicon.ico",
|
|
39
|
-
# Dashboard API endpoints
|
|
40
|
-
"/api/stats",
|
|
41
|
-
"/api/projects",
|
|
42
|
-
"/api/agents",
|
|
43
|
-
"/api/mcps",
|
|
44
|
-
"/api/hooks",
|
|
45
|
-
"/api/sessions",
|
|
46
40
|
"/ws", # WebSocket
|
|
47
|
-
"/a2a", # Agent-to-Agent protocol
|
|
48
|
-
"/api/
|
|
49
|
-
# Automation endpoints
|
|
50
|
-
"/api/inject",
|
|
51
|
-
"/api/memory/natural",
|
|
52
|
-
"/api/memory/", # Covers confidence, verify, outdated
|
|
53
|
-
"/api/claude-md",
|
|
41
|
+
"/a2a", # Agent-to-Agent protocol
|
|
42
|
+
"/api/", # All dashboard and REST API endpoints
|
|
54
43
|
]
|
|
55
44
|
|
|
56
45
|
|
|
@@ -81,7 +81,7 @@ class EmbeddingCache:
|
|
|
81
81
|
'hits': self._hits,
|
|
82
82
|
'misses': self._misses,
|
|
83
83
|
'hit_rate': round(self._hits / total, 4) if total > 0 else 0.0,
|
|
84
|
-
'estimated_memory_mb': round(len(self._cache) *
|
|
84
|
+
'estimated_memory_mb': round(len(self._cache) * config.get("EMBEDDING_DIM", 1024) * 4 / 1024 / 1024, 2)
|
|
85
85
|
}
|
|
86
86
|
|
|
87
87
|
|
package/services/embeddings.py
CHANGED
|
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
load_dotenv()
|
|
27
27
|
|
|
28
28
|
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
|
|
29
|
-
DEFAULT_MODEL = os.getenv("EMBEDDING_MODEL", "nomic-embed-text")
|
|
29
|
+
DEFAULT_MODEL = os.getenv("EMBEDDING_MODEL", "nomic-embed-text") # Ollama default; sentence-transformers uses config.py
|
|
30
30
|
HEALTH_CHECK_TIMEOUT = float(os.getenv("OLLAMA_HEALTH_TIMEOUT", "5.0"))
|
|
31
31
|
HEALTH_CACHE_TTL = float(os.getenv("OLLAMA_HEALTH_CACHE_TTL", "30.0"))
|
|
32
32
|
|
|
@@ -178,7 +178,7 @@ class SentenceTransformerProvider(EmbeddingProvider):
|
|
|
178
178
|
)
|
|
179
179
|
|
|
180
180
|
self.model_name = model
|
|
181
|
-
self._model = SentenceTransformer(model, trust_remote_code=
|
|
181
|
+
self._model = SentenceTransformer(model, trust_remote_code=False)
|
|
182
182
|
self._dimension = self._model.get_sentence_embedding_dimension()
|
|
183
183
|
|
|
184
184
|
def embed(self, text: str) -> List[float]:
|
|
@@ -333,7 +333,7 @@ class EmbeddingService:
|
|
|
333
333
|
|
|
334
334
|
start_time = time.time()
|
|
335
335
|
try:
|
|
336
|
-
loop = asyncio.
|
|
336
|
+
loop = asyncio.get_running_loop()
|
|
337
337
|
|
|
338
338
|
health_result = await asyncio.wait_for(
|
|
339
339
|
loop.run_in_executor(None, self._provider.check_health),
|
|
@@ -447,7 +447,7 @@ class EmbeddingService:
|
|
|
447
447
|
)
|
|
448
448
|
|
|
449
449
|
try:
|
|
450
|
-
loop = asyncio.
|
|
450
|
+
loop = asyncio.get_running_loop()
|
|
451
451
|
|
|
452
452
|
def _embed():
|
|
453
453
|
return self._provider.embed(text)
|
|
@@ -527,7 +527,7 @@ class EmbeddingService:
|
|
|
527
527
|
)
|
|
528
528
|
|
|
529
529
|
try:
|
|
530
|
-
loop = asyncio.
|
|
530
|
+
loop = asyncio.get_running_loop()
|
|
531
531
|
|
|
532
532
|
def _embed():
|
|
533
533
|
return self._provider.embed(text)
|
|
@@ -618,7 +618,7 @@ class EmbeddingService:
|
|
|
618
618
|
# sentence-transformers has efficient native batching
|
|
619
619
|
if self.provider_type == "sentence-transformers":
|
|
620
620
|
try:
|
|
621
|
-
loop = asyncio.
|
|
621
|
+
loop = asyncio.get_running_loop()
|
|
622
622
|
|
|
623
623
|
def _batch_embed():
|
|
624
624
|
return self._provider.embed_batch(texts)
|
|
@@ -712,7 +712,7 @@ class EmbeddingService:
|
|
|
712
712
|
return self._available_models
|
|
713
713
|
|
|
714
714
|
try:
|
|
715
|
-
loop = asyncio.
|
|
715
|
+
loop = asyncio.get_running_loop()
|
|
716
716
|
provider: OllamaProvider = self._provider # type: ignore[assignment]
|
|
717
717
|
models = await loop.run_in_executor(None, provider.client.list)
|
|
718
718
|
model_names = [
|
package/services/llm_analyzer.py
CHANGED
|
@@ -143,7 +143,7 @@ class LLMAnalyzer:
|
|
|
143
143
|
return not self._degraded_mode
|
|
144
144
|
|
|
145
145
|
try:
|
|
146
|
-
loop = asyncio.
|
|
146
|
+
loop = asyncio.get_running_loop()
|
|
147
147
|
await asyncio.wait_for(
|
|
148
148
|
loop.run_in_executor(None, lambda: self.client.list()),
|
|
149
149
|
timeout=2.0
|
|
@@ -281,7 +281,7 @@ Rules:
|
|
|
281
281
|
- Only include meaningful, actionable items"""
|
|
282
282
|
|
|
283
283
|
try:
|
|
284
|
-
loop = asyncio.
|
|
284
|
+
loop = asyncio.get_running_loop()
|
|
285
285
|
|
|
286
286
|
def _generate():
|
|
287
287
|
return self.client.generate(
|
|
@@ -409,7 +409,7 @@ Return JSON only:
|
|
|
409
409
|
{{"has_contradiction": true/false, "conflicting_fact": "the fact it conflicts with or null", "reason": "brief explanation or null", "confidence": 0.0-1.0}}"""
|
|
410
410
|
|
|
411
411
|
try:
|
|
412
|
-
loop = asyncio.
|
|
412
|
+
loop = asyncio.get_running_loop()
|
|
413
413
|
|
|
414
414
|
def _generate():
|
|
415
415
|
return self.client.generate(
|
|
@@ -493,7 +493,7 @@ Recent events:
|
|
|
493
493
|
Write a brief summary focusing on: what's being worked on, key decisions made, current status."""
|
|
494
494
|
|
|
495
495
|
try:
|
|
496
|
-
loop = asyncio.
|
|
496
|
+
loop = asyncio.get_running_loop()
|
|
497
497
|
|
|
498
498
|
def _generate():
|
|
499
499
|
return self.client.generate(
|
|
@@ -127,23 +127,20 @@ def fit_response(
|
|
|
127
127
|
if len(output) <= max_chars:
|
|
128
128
|
return _with_meta(output, working, level, max_chars)
|
|
129
129
|
|
|
130
|
-
# Level 5: emergency hard truncation
|
|
130
|
+
# Level 5: emergency hard truncation — return valid JSON
|
|
131
131
|
level = 5
|
|
132
132
|
logger.warning(
|
|
133
133
|
"Response required emergency truncation: %d -> %d chars",
|
|
134
134
|
len(output), max_chars,
|
|
135
135
|
)
|
|
136
|
-
|
|
137
|
-
# Append a valid JSON suffix with metadata
|
|
138
|
-
meta = json.dumps({
|
|
136
|
+
return json.dumps({
|
|
139
137
|
"_response_meta": {
|
|
140
138
|
"degradation_level": level,
|
|
141
139
|
"truncated": True,
|
|
142
|
-
"original_chars":
|
|
140
|
+
"original_chars": len(output),
|
|
143
141
|
"note": "Response was emergency-truncated. Use specific queries to retrieve full data.",
|
|
144
142
|
}
|
|
145
143
|
})
|
|
146
|
-
return output + "\n" + meta
|
|
147
144
|
|
|
148
145
|
|
|
149
146
|
def _with_meta(
|
package/start_daemon.py
CHANGED
|
@@ -1,13 +1,22 @@
|
|
|
1
|
-
"""Start the memory agent as a proper background daemon
|
|
1
|
+
"""Start the memory agent as a proper background daemon.
|
|
2
2
|
|
|
3
|
-
Uses
|
|
4
|
-
|
|
3
|
+
Uses platform-appropriate file locking to prevent multiple simultaneous
|
|
4
|
+
startup attempts. The server itself has its own mutex.
|
|
5
|
+
- Windows: msvcrt.locking()
|
|
6
|
+
- macOS/Linux: fcntl.flock()
|
|
5
7
|
"""
|
|
6
8
|
import subprocess
|
|
7
9
|
import sys
|
|
8
10
|
import os
|
|
9
11
|
import time
|
|
10
|
-
import
|
|
12
|
+
import platform
|
|
13
|
+
|
|
14
|
+
IS_WINDOWS = platform.system() == "Windows"
|
|
15
|
+
|
|
16
|
+
if IS_WINDOWS:
|
|
17
|
+
import msvcrt
|
|
18
|
+
else:
|
|
19
|
+
import fcntl
|
|
11
20
|
|
|
12
21
|
AGENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
13
22
|
LOG_FILE = os.path.join(AGENT_DIR, "memory-agent.log")
|
|
@@ -19,10 +28,13 @@ _startup_lock_handle = None
|
|
|
19
28
|
|
|
20
29
|
|
|
21
30
|
def acquire_startup_lock() -> bool:
|
|
22
|
-
"""Acquire startup mutex using
|
|
31
|
+
"""Acquire startup mutex using platform-appropriate file locking.
|
|
23
32
|
|
|
24
33
|
This prevents multiple hooks from trying to start the agent simultaneously.
|
|
25
34
|
The lock is held until release_startup_lock() is called.
|
|
35
|
+
|
|
36
|
+
Windows: msvcrt.locking() with LK_NBLCK
|
|
37
|
+
macOS/Linux: fcntl.flock() with LOCK_EX | LOCK_NB
|
|
26
38
|
"""
|
|
27
39
|
global _startup_lock_handle
|
|
28
40
|
|
|
@@ -32,7 +44,10 @@ def acquire_startup_lock() -> bool:
|
|
|
32
44
|
|
|
33
45
|
# Try non-blocking exclusive lock
|
|
34
46
|
try:
|
|
35
|
-
|
|
47
|
+
if IS_WINDOWS:
|
|
48
|
+
msvcrt.locking(_startup_lock_handle.fileno(), msvcrt.LK_NBLCK, 1)
|
|
49
|
+
else:
|
|
50
|
+
fcntl.flock(_startup_lock_handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
36
51
|
except (IOError, OSError):
|
|
37
52
|
# Lock held by another process - they're already starting the agent
|
|
38
53
|
_startup_lock_handle.close()
|
|
@@ -64,7 +79,10 @@ def release_startup_lock():
|
|
|
64
79
|
try:
|
|
65
80
|
if _startup_lock_handle:
|
|
66
81
|
try:
|
|
67
|
-
|
|
82
|
+
if IS_WINDOWS:
|
|
83
|
+
msvcrt.locking(_startup_lock_handle.fileno(), msvcrt.LK_UNLCK, 1)
|
|
84
|
+
else:
|
|
85
|
+
fcntl.flock(_startup_lock_handle.fileno(), fcntl.LOCK_UN)
|
|
68
86
|
except:
|
|
69
87
|
pass
|
|
70
88
|
_startup_lock_handle.close()
|
|
@@ -76,10 +94,12 @@ def release_startup_lock():
|
|
|
76
94
|
def is_running():
|
|
77
95
|
"""Check if agent is already running via health endpoint."""
|
|
78
96
|
try:
|
|
79
|
-
import
|
|
97
|
+
from urllib.request import urlopen, Request
|
|
98
|
+
from urllib.error import URLError
|
|
80
99
|
url = os.getenv("MEMORY_AGENT_URL", "http://localhost:8102")
|
|
81
|
-
|
|
82
|
-
|
|
100
|
+
req = Request(f"{url}/health")
|
|
101
|
+
response = urlopen(req, timeout=2)
|
|
102
|
+
return response.status == 200
|
|
83
103
|
except Exception:
|
|
84
104
|
return False
|
|
85
105
|
|
|
@@ -115,19 +135,30 @@ def start_daemon():
|
|
|
115
135
|
return False
|
|
116
136
|
|
|
117
137
|
try:
|
|
118
|
-
# Windows-specific flags for detached process
|
|
119
|
-
DETACHED_PROCESS = 0x00000008
|
|
120
|
-
CREATE_NO_WINDOW = 0x08000000
|
|
121
|
-
CREATE_NEW_PROCESS_GROUP = 0x00000200
|
|
122
|
-
|
|
123
138
|
with open(LOG_FILE, "w") as log:
|
|
124
|
-
|
|
125
|
-
[sys.executable, "run_server.py"],
|
|
139
|
+
popen_kwargs = dict(
|
|
126
140
|
cwd=AGENT_DIR,
|
|
127
141
|
stdout=log,
|
|
128
142
|
stderr=subprocess.STDOUT,
|
|
129
|
-
|
|
130
|
-
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
if IS_WINDOWS:
|
|
146
|
+
# Windows-specific flags for detached process
|
|
147
|
+
DETACHED_PROCESS = 0x00000008
|
|
148
|
+
CREATE_NO_WINDOW = 0x08000000
|
|
149
|
+
CREATE_NEW_PROCESS_GROUP = 0x00000200
|
|
150
|
+
popen_kwargs["creationflags"] = (
|
|
151
|
+
DETACHED_PROCESS | CREATE_NO_WINDOW | CREATE_NEW_PROCESS_GROUP
|
|
152
|
+
)
|
|
153
|
+
popen_kwargs["close_fds"] = True
|
|
154
|
+
else:
|
|
155
|
+
# Unix: start in a new session so the process is detached
|
|
156
|
+
popen_kwargs["start_new_session"] = True
|
|
157
|
+
popen_kwargs["close_fds"] = True
|
|
158
|
+
|
|
159
|
+
proc = subprocess.Popen(
|
|
160
|
+
[sys.executable, "run_server.py"],
|
|
161
|
+
**popen_kwargs
|
|
131
162
|
)
|
|
132
163
|
|
|
133
164
|
# Save PID for future reference
|