npm - claude-memory-agent - Versions diffs - 2.0.0 - Mend

claude-memory-agent 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

package/.env.example +107 -0
package/README.md +200 -0
package/agent_card.py +512 -0
package/bin/cli.js +181 -0
package/bin/postinstall.js +216 -0
package/config.py +104 -0
package/dashboard.html +2689 -0
package/hooks/README.md +196 -0
package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
package/hooks/auto-detect-response.py +348 -0
package/hooks/auto_capture.py +255 -0
package/hooks/detect-correction.py +173 -0
package/hooks/grounding-hook.py +348 -0
package/hooks/log-tool-use.py +234 -0
package/hooks/log-user-request.py +208 -0
package/hooks/pre-tool-decision.py +218 -0
package/hooks/problem-detector.py +343 -0
package/hooks/session_end.py +192 -0
package/hooks/session_start.py +227 -0
package/install.py +887 -0
package/main.py +2859 -0
package/manager.py +997 -0
package/package.json +55 -0
package/requirements.txt +8 -0
package/run_server.py +136 -0
package/services/__init__.py +50 -0
package/services/__pycache__/__init__.cpython-312.pyc +0 -0
package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
package/services/__pycache__/auth.cpython-312.pyc +0 -0
package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
package/services/__pycache__/confidence.cpython-312.pyc +0 -0
package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
package/services/__pycache__/database.cpython-312.pyc +0 -0
package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
package/services/__pycache__/insights.cpython-312.pyc +0 -0
package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
package/services/__pycache__/timeline.cpython-312.pyc +0 -0
package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
package/services/__pycache__/websocket.cpython-312.pyc +0 -0
package/services/agent_registry.py +753 -0
package/services/auth.py +331 -0
package/services/auto_inject.py +250 -0
package/services/claude_md_sync.py +275 -0
package/services/cleanup.py +667 -0
package/services/compaction_flush.py +447 -0
package/services/confidence.py +301 -0
package/services/daily_log.py +333 -0
package/services/database.py +2485 -0
package/services/embeddings.py +358 -0
package/services/insights.py +632 -0
package/services/llm_analyzer.py +595 -0
package/services/memory_md_sync.py +409 -0
package/services/retry_queue.py +453 -0
package/services/timeline.py +579 -0
package/services/vector_index.py +398 -0
package/services/websocket.py +257 -0
package/skills/__init__.py +6 -0
package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
package/skills/__pycache__/admin.cpython-312.pyc +0 -0
package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
package/skills/__pycache__/insights.cpython-312.pyc +0 -0
package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
package/skills/__pycache__/search.cpython-312.pyc +0 -0
package/skills/__pycache__/state.cpython-312.pyc +0 -0
package/skills/__pycache__/store.cpython-312.pyc +0 -0
package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
package/skills/__pycache__/verification.cpython-312.pyc +0 -0
package/skills/admin.py +469 -0
package/skills/checkpoint.py +198 -0
package/skills/claude_md.py +363 -0
package/skills/cleanup.py +241 -0
package/skills/grounding.py +801 -0
package/skills/insights.py +231 -0
package/skills/natural_language.py +277 -0
package/skills/retrieve.py +67 -0
package/skills/search.py +213 -0
package/skills/state.py +182 -0
package/skills/store.py +179 -0
package/skills/summarize.py +588 -0
package/skills/timeline.py +387 -0
package/skills/verification.py +391 -0
package/start_daemon.py +155 -0
package/test_automation.py +221 -0
package/test_complete.py +338 -0
package/test_full.py +322 -0
package/update_system.py +817 -0
package/verify_db.py +134 -0

package/skills/verification.py ADDED Viewed

@@ -0,0 +1,391 @@
+"""Verification skills for anti-hallucination - Best-of-N and Quote Extraction."""
+import os
+import json
+import asyncio
+from typing import Dict, Any, Optional, List
+from services.database import DatabaseService
+from services.embeddings import EmbeddingService
+# Check if LLM analysis is available
+USE_LLM_ANALYSIS = os.getenv("USE_LLM_ANALYSIS", "true").lower() == "true"
+OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
+VERIFICATION_MODEL = os.getenv("VERIFICATION_MODEL", "llama3.2:3b")
+async def best_of_n_verify(
+    query: str,
+    n: int = 3,
+    context: Optional[str] = None,
+    threshold: float = 0.7
+) -> Dict[str, Any]:
+    """
+    Best-of-N verification: Run the same query N times and check consistency.
+    If outputs are inconsistent, it likely indicates hallucination.
+    Args:
+        query: The question/task to verify
+        n: Number of runs (default 3)
+        context: Optional context to include
+        threshold: Similarity threshold for consistency (0-1)
+    Returns:
+        Dict with verification results
+    """
+    if not USE_LLM_ANALYSIS:
+        return {
+            "success": False,
+            "error": "LLM analysis not available",
+            "recommendation": "Enable USE_LLM_ANALYSIS or install Ollama"
+        }
+    try:
+        import ollama
+        client = ollama.Client(host=OLLAMA_HOST)
+    except Exception as e:
+        return {
+            "success": False,
+            "error": f"Ollama not available: {e}"
+        }
+    prompt_template = """Answer this question concisely and factually.
+{context}
+Question: {query}
+Answer (be specific and factual):"""
+    context_str = f"Context: {context}\n" if context else ""
+    prompt = prompt_template.format(context=context_str, query=query)
+    # Run N times
+    responses = []
+    for i in range(n):
+        try:
+            response = client.generate(
+                model=VERIFICATION_MODEL,
+                prompt=prompt,
+                options={
+                    "temperature": 0.7,  # Some variation to test consistency
+                    "num_predict": 200
+                }
+            )
+            responses.append(response.get("response", "").strip())
+        except Exception as e:
+            responses.append(f"[Error: {e}]")
+    # Analyze consistency
+    consistency_result = await _analyze_consistency(responses, threshold)
+    return {
+        "success": True,
+        "query": query,
+        "n_runs": n,
+        "responses": responses,
+        "is_consistent": consistency_result["is_consistent"],
+        "consistency_score": consistency_result["score"],
+        "consensus_answer": consistency_result.get("consensus"),
+        "inconsistencies": consistency_result.get("inconsistencies", []),
+        "recommendation": (
+            "Answers are consistent - likely reliable"
+            if consistency_result["is_consistent"]
+            else "INCONSISTENT answers detected - verify manually before trusting"
+        )
+    }
+async def _analyze_consistency(responses: List[str], threshold: float) -> Dict[str, Any]:
+    """Analyze consistency across multiple responses."""
+    if len(responses) < 2:
+        return {"is_consistent": True, "score": 1.0, "consensus": responses[0] if responses else None}
+    # Simple word overlap consistency check
+    def get_key_words(text: str) -> set:
+        # Extract significant words (longer than 3 chars, not common)
+        common_words = {'the', 'and', 'for', 'that', 'this', 'with', 'are', 'was', 'were', 'been', 'have', 'has', 'will', 'would', 'could', 'should'}
+        words = set(w.lower() for w in text.split() if len(w) > 3 and w.lower() not in common_words)
+        return words
+    word_sets = [get_key_words(r) for r in responses]
+    # Calculate pairwise overlap
+    overlaps = []
+    for i in range(len(word_sets)):
+        for j in range(i + 1, len(word_sets)):
+            if word_sets[i] and word_sets[j]:
+                intersection = word_sets[i] & word_sets[j]
+                union = word_sets[i] | word_sets[j]
+                overlap = len(intersection) / len(union) if union else 0
+                overlaps.append(overlap)
+    avg_overlap = sum(overlaps) / len(overlaps) if overlaps else 0
+    # Find inconsistencies
+    inconsistencies = []
+    if avg_overlap < threshold:
+        # Find which responses differ most
+        all_words = set()
+        for ws in word_sets:
+            all_words.update(ws)
+        # Words that appear in some but not all responses
+        for word in all_words:
+            present_in = sum(1 for ws in word_sets if word in ws)
+            if 0 < present_in < len(word_sets):
+                inconsistencies.append(f"'{word}' appears in {present_in}/{len(word_sets)} responses")
+    # Find consensus (most common response pattern)
+    consensus = responses[0] if responses else None
+    return {
+        "is_consistent": avg_overlap >= threshold,
+        "score": round(avg_overlap, 3),
+        "consensus": consensus,
+        "inconsistencies": inconsistencies[:5]  # Limit to 5
+    }
+async def extract_quotes(
+    document: str,
+    query: str,
+    max_quotes: int = 5,
+    min_length: int = 20
+) -> Dict[str, Any]:
+    """
+    Extract direct quotes from a document that are relevant to a query.
+    Forces verbatim grounding - Claude must work from exact quotes.
+    Args:
+        document: The source document text
+        query: What we're looking for
+        max_quotes: Maximum quotes to extract
+        min_length: Minimum quote length
+    Returns:
+        Dict with extracted quotes
+    """
+    if not document or not query:
+        return {
+            "success": False,
+            "error": "Document and query are required"
+        }
+    if not USE_LLM_ANALYSIS:
+        # Fallback: simple keyword-based extraction
+        return await _extract_quotes_keyword(document, query, max_quotes, min_length)
+    try:
+        import ollama
+        client = ollama.Client(host=OLLAMA_HOST)
+    except:
+        return await _extract_quotes_keyword(document, query, max_quotes, min_length)
+    prompt = f"""Extract exact, word-for-word quotes from this document that are relevant to the query.
+DOCUMENT:
+{document[:5000]}
+QUERY: {query}
+Return ONLY a JSON array of exact quotes from the document. Do not paraphrase or modify.
+Example format: ["exact quote 1", "exact quote 2"]
+Quotes (JSON array only):"""
+    try:
+        response = client.generate(
+            model=VERIFICATION_MODEL,
+            prompt=prompt,
+            options={
+                "temperature": 0.1,  # Low temperature for accuracy
+                "num_predict": 500
+            }
+        )
+        result_text = response.get("response", "[]")
+        # Parse JSON
+        json_start = result_text.find("[")
+        json_end = result_text.rfind("]") + 1
+        if json_start >= 0 and json_end > json_start:
+            quotes = json.loads(result_text[json_start:json_end])
+            # Verify quotes actually exist in document
+            verified_quotes = []
+            for quote in quotes[:max_quotes]:
+                if isinstance(quote, str) and len(quote) >= min_length:
+                    # Check if quote (or close match) exists in document
+                    quote_lower = quote.lower()
+                    doc_lower = document.lower()
+                    if quote_lower in doc_lower or _fuzzy_match(quote_lower, doc_lower):
+                        verified_quotes.append({
+                            "quote": quote,
+                            "verified": True
+                        })
+                    else:
+                        verified_quotes.append({
+                            "quote": quote,
+                            "verified": False,
+                            "warning": "Quote not found verbatim in document"
+                        })
+            return {
+                "success": True,
+                "query": query,
+                "quotes": verified_quotes,
+                "total_found": len(verified_quotes),
+                "all_verified": all(q["verified"] for q in verified_quotes),
+                "grounding_instruction": (
+                    "Use ONLY these verified quotes to answer. "
+                    "Do not add information not in the quotes."
+                )
+            }
+    except Exception as e:
+        pass
+    # Fallback to keyword extraction
+    return await _extract_quotes_keyword(document, query, max_quotes, min_length)
+def _fuzzy_match(quote: str, document: str, threshold: float = 0.8) -> bool:
+    """Check if quote approximately matches something in document."""
+    # Simple check: do most words appear in sequence?
+    words = quote.split()
+    if len(words) < 3:
+        return False
+    # Check if 80% of words appear near each other in document
+    matches = 0
+    for word in words:
+        if word in document:
+            matches += 1
+    return (matches / len(words)) >= threshold
+async def _extract_quotes_keyword(
+    document: str,
+    query: str,
+    max_quotes: int,
+    min_length: int
+) -> Dict[str, Any]:
+    """Fallback keyword-based quote extraction."""
+    # Split query into keywords
+    keywords = [w.lower() for w in query.split() if len(w) > 3]
+    # Split document into sentences
+    sentences = []
+    for sep in ['. ', '.\n', '! ', '? ', '\n\n']:
+        if sep in document:
+            parts = document.split(sep)
+            for part in parts:
+                if len(part.strip()) >= min_length:
+                    sentences.append(part.strip())
+    if not sentences:
+        sentences = [document[i:i+200] for i in range(0, len(document), 150)]
+    # Score sentences by keyword matches
+    scored = []
+    for sentence in sentences:
+        sentence_lower = sentence.lower()
+        score = sum(1 for kw in keywords if kw in sentence_lower)
+        if score > 0:
+            scored.append((score, sentence))
+    # Sort by score and take top N
+    scored.sort(reverse=True)
+    quotes = [{"quote": s, "verified": True, "keyword_matches": score} for score, s in scored[:max_quotes]]
+    return {
+        "success": True,
+        "query": query,
+        "quotes": quotes,
+        "total_found": len(quotes),
+        "method": "keyword_extraction",
+        "grounding_instruction": (
+            "Use these extracted sections to answer. "
+            "Cite specific quotes when making claims."
+        )
+    }
+async def require_grounding(
+    db: DatabaseService,
+    session_id: str,
+    statement: str,
+    source_type: str = "any"
+) -> Dict[str, Any]:
+    """
+    Require that a statement be grounded in stored facts before accepting it.
+    Args:
+        db: Database service
+        session_id: Current session
+        statement: The statement to verify
+        source_type: Type of source required ("anchor", "memory", "any")
+    Returns:
+        Dict with grounding verification
+    """
+    grounding_sources = []
+    # Check against anchors
+    events = await db.get_timeline_events(
+        session_id=session_id,
+        limit=50,
+        anchors_only=True
+    )
+    statement_lower = statement.lower()
+    for event in events:
+        if event.get("is_anchor"):
+            summary_lower = event["summary"].lower()
+            # Check for keyword overlap
+            overlap = sum(1 for word in statement_lower.split() if len(word) > 3 and word in summary_lower)
+            if overlap >= 2:
+                grounding_sources.append({
+                    "type": "anchor",
+                    "content": event["summary"],
+                    "match_strength": "keyword_overlap"
+                })
+    # Check against memories if needed
+    if source_type in ["memory", "any"] and not grounding_sources:
+        try:
+            from services.embeddings import EmbeddingService
+            embeddings = EmbeddingService()
+            embedding = await embeddings.generate_embedding(statement)
+            memories = await db.search_similar(
+                embedding=embedding,
+                limit=3,
+                threshold=0.7
+            )
+            for memory in memories:
+                grounding_sources.append({
+                    "type": "memory",
+                    "content": memory.get("content", "")[:200],
+                    "similarity": memory.get("similarity")
+                })
+        except:
+            pass
+    is_grounded = len(grounding_sources) > 0
+    return {
+        "success": True,
+        "statement": statement,
+        "is_grounded": is_grounded,
+        "grounding_sources": grounding_sources,
+        "source_count": len(grounding_sources),
+        "recommendation": (
+            "Statement is grounded in stored facts"
+            if is_grounded
+            else "WARNING: Statement has no grounding. Verify before using."
+        )
+    }

package/start_daemon.py ADDED Viewed

@@ -0,0 +1,155 @@
+"""Start the memory agent as a proper background daemon on Windows.
+Uses msvcrt.locking() for a true Windows mutex to prevent multiple
+simultaneous startup attempts. The server itself has its own mutex.
+"""
+import subprocess
+import sys
+import os
+import time
+import msvcrt
+AGENT_DIR = os.path.dirname(os.path.abspath(__file__))
+LOG_FILE = os.path.join(AGENT_DIR, "memory-agent.log")
+STARTUP_LOCK_FILE = os.path.join(AGENT_DIR, "memory-agent-startup.lock")
+PID_FILE = os.path.join(AGENT_DIR, "memory-agent.pid")
+# Global handle - must stay open for lock to persist
+_startup_lock_handle = None
+def acquire_startup_lock() -> bool:
+    """Acquire startup mutex using Windows file locking (msvcrt.locking).
+    This prevents multiple hooks from trying to start the agent simultaneously.
+    The lock is held until release_startup_lock() is called.
+    """
+    global _startup_lock_handle
+    try:
+        # Open/create the lock file
+        _startup_lock_handle = open(STARTUP_LOCK_FILE, 'w+')
+        # Try non-blocking exclusive lock
+        try:
+            msvcrt.locking(_startup_lock_handle.fileno(), msvcrt.LK_NBLCK, 1)
+        except (IOError, OSError):
+            # Lock held by another process - they're already starting the agent
+            _startup_lock_handle.close()
+            _startup_lock_handle = None
+            return False
+        # We have the lock - write our PID for debugging
+        _startup_lock_handle.seek(0)
+        _startup_lock_handle.truncate()
+        _startup_lock_handle.write(str(os.getpid()))
+        _startup_lock_handle.flush()
+        return True
+    except Exception as e:
+        print(f"[STARTUP] Failed to acquire lock: {e}")
+        if _startup_lock_handle:
+            try:
+                _startup_lock_handle.close()
+            except:
+                pass
+            _startup_lock_handle = None
+        return False
+def release_startup_lock():
+    """Release the startup mutex."""
+    global _startup_lock_handle
+    try:
+        if _startup_lock_handle:
+            try:
+                msvcrt.locking(_startup_lock_handle.fileno(), msvcrt.LK_UNLCK, 1)
+            except:
+                pass
+            _startup_lock_handle.close()
+            _startup_lock_handle = None
+    except Exception:
+        pass
+def is_running():
+    """Check if agent is already running via health endpoint."""
+    try:
+        import requests
+        url = os.getenv("MEMORY_AGENT_URL", "http://localhost:8102")
+        r = requests.get(f"{url}/health", timeout=2)
+        return r.status_code == 200
+    except Exception:
+        return False
+def read_pid():
+    """Read the PID from the PID file if it exists."""
+    try:
+        if os.path.exists(PID_FILE):
+            with open(PID_FILE, 'r') as f:
+                return int(f.read().strip())
+    except Exception:
+        pass
+    return None
+def start_daemon():
+    """Start the memory agent as a detached background process."""
+    # First check: is it already responding?
+    if is_running():
+        print("Memory agent is already running!")
+        return True
+    # Second check: try to acquire mutex lock
+    if not acquire_startup_lock():
+        # Another startup is in progress, wait for it
+        print("Waiting for other startup to complete...")
+        for i in range(10):
+            time.sleep(0.5)
+            if is_running():
+                print("Memory agent started by another process!")
+                return True
+        print("Other startup failed or timed out")
+        return False
+    try:
+        # Windows-specific flags for detached process
+        DETACHED_PROCESS = 0x00000008
+        CREATE_NO_WINDOW = 0x08000000
+        CREATE_NEW_PROCESS_GROUP = 0x00000200
+        with open(LOG_FILE, "w") as log:
+            proc = subprocess.Popen(
+                [sys.executable, "run_server.py"],
+                cwd=AGENT_DIR,
+                stdout=log,
+                stderr=subprocess.STDOUT,
+                creationflags=DETACHED_PROCESS | CREATE_NO_WINDOW | CREATE_NEW_PROCESS_GROUP,
+                close_fds=True
+            )
+            # Save PID for future reference
+            with open(PID_FILE, 'w') as f:
+                f.write(str(proc.pid))
+            print(f"Started memory agent (PID: {proc.pid})")
+        # Wait for startup with health check
+        for i in range(10):
+            time.sleep(0.5)
+            if is_running():
+                print("Memory agent is now running!")
+                return True
+        print("Warning: Agent started but health check failed. Check log file.")
+        return False
+    finally:
+        # Always release lock when done (success or failure)
+        release_startup_lock()
+if __name__ == "__main__":
+    start_daemon()