npm - claude-memory-agent - Versions diffs - 2.1.0 → 2.2.1 - Mend

claude-memory-agent 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/bin/cli.js +11 -1
package/bin/lib/banner.js +39 -0
package/bin/lib/environment.js +166 -0
package/bin/lib/installer.js +291 -0
package/bin/lib/models.js +95 -0
package/bin/lib/steps/advanced.js +101 -0
package/bin/lib/steps/confirm.js +87 -0
package/bin/lib/steps/model.js +57 -0
package/bin/lib/steps/provider.js +65 -0
package/bin/lib/steps/scope.js +59 -0
package/bin/lib/steps/server.js +74 -0
package/bin/lib/ui.js +75 -0
package/bin/onboarding.js +164 -0
package/bin/postinstall.js +22 -257
package/config.py +103 -4
package/dashboard.html +697 -27
package/hooks/extract_memories.py +439 -0
package/hooks/pre_compact_hook.py +76 -0
package/hooks/session_end_hook.py +149 -0
package/hooks/stop_hook.py +372 -0
package/install.py +91 -37
package/main.py +1636 -892
package/mcp_server.py +451 -0
package/package.json +14 -3
package/requirements.txt +12 -8
package/services/adaptive_ranker.py +272 -0
package/services/agent_catalog.json +153 -0
package/services/agent_registry.py +245 -730
package/services/claude_md_sync.py +320 -4
package/services/consolidation.py +417 -0
package/services/database.py +586 -105
package/services/embedding_pipeline.py +262 -0
package/services/embeddings.py +493 -85
package/services/memory_decay.py +408 -0
package/services/native_memory_paths.py +86 -0
package/services/native_memory_sync.py +496 -0
package/services/response_manager.py +183 -0
package/services/terminal_ui.py +199 -0
package/services/tier_manager.py +235 -0
package/services/websocket.py +26 -6
package/skills/search.py +136 -61
package/skills/session_review.py +210 -23
package/skills/store.py +125 -18
package/terminal_dashboard.py +474 -0
package/hooks/__pycache__/auto-detect-response.cpython-312.pyc +0 -0
package/hooks/__pycache__/auto_capture.cpython-312.pyc +0 -0
package/hooks/__pycache__/grounding-hook.cpython-312.pyc +0 -0
package/hooks/__pycache__/session_end.cpython-312.pyc +0 -0
package/hooks/__pycache__/session_start.cpython-312.pyc +0 -0
package/services/__pycache__/__init__.cpython-312.pyc +0 -0
package/services/__pycache__/agent_registry.cpython-312.pyc +0 -0
package/services/__pycache__/auth.cpython-312.pyc +0 -0
package/services/__pycache__/auto_inject.cpython-312.pyc +0 -0
package/services/__pycache__/claude_md_sync.cpython-312.pyc +0 -0
package/services/__pycache__/cleanup.cpython-312.pyc +0 -0
package/services/__pycache__/compaction_flush.cpython-312.pyc +0 -0
package/services/__pycache__/confidence.cpython-312.pyc +0 -0
package/services/__pycache__/curator.cpython-312.pyc +0 -0
package/services/__pycache__/daily_log.cpython-312.pyc +0 -0
package/services/__pycache__/database.cpython-312.pyc +0 -0
package/services/__pycache__/embeddings.cpython-312.pyc +0 -0
package/services/__pycache__/insights.cpython-312.pyc +0 -0
package/services/__pycache__/llm_analyzer.cpython-312.pyc +0 -0
package/services/__pycache__/memory_md_sync.cpython-312.pyc +0 -0
package/services/__pycache__/retry_queue.cpython-312.pyc +0 -0
package/services/__pycache__/timeline.cpython-312.pyc +0 -0
package/services/__pycache__/vector_index.cpython-312.pyc +0 -0
package/services/__pycache__/websocket.cpython-312.pyc +0 -0
package/skills/__pycache__/__init__.cpython-312.pyc +0 -0
package/skills/__pycache__/admin.cpython-312.pyc +0 -0
package/skills/__pycache__/checkpoint.cpython-312.pyc +0 -0
package/skills/__pycache__/claude_md.cpython-312.pyc +0 -0
package/skills/__pycache__/cleanup.cpython-312.pyc +0 -0
package/skills/__pycache__/confidence_tracker.cpython-312.pyc +0 -0
package/skills/__pycache__/context.cpython-312.pyc +0 -0
package/skills/__pycache__/curator.cpython-312.pyc +0 -0
package/skills/__pycache__/grounding.cpython-312.pyc +0 -0
package/skills/__pycache__/insights.cpython-312.pyc +0 -0
package/skills/__pycache__/natural_language.cpython-312.pyc +0 -0
package/skills/__pycache__/retrieve.cpython-312.pyc +0 -0
package/skills/__pycache__/search.cpython-312.pyc +0 -0
package/skills/__pycache__/session_review.cpython-312.pyc +0 -0
package/skills/__pycache__/state.cpython-312.pyc +0 -0
package/skills/__pycache__/store.cpython-312.pyc +0 -0
package/skills/__pycache__/summarize.cpython-312.pyc +0 -0
package/skills/__pycache__/timeline.cpython-312.pyc +0 -0
package/skills/__pycache__/verification.cpython-312.pyc +0 -0
package/test_automation.py +0 -221
package/test_complete.py +0 -338
package/test_full.py +0 -322
package/verify_db.py +0 -134

package/hooks/stop_hook.py ADDED Viewed

@@ -0,0 +1,372 @@
+#!/usr/bin/env python3
+"""
+Stop hook for Claude Code.
+Fires after every Claude response. Unlike PreCompact/SessionEnd hooks which
+scan the full transcript, this hook analyzes ONLY the latest assistant
+response for high-signal content worth persisting immediately.
+Design constraints:
+  - Runs after EVERY response -- must complete in < 2 seconds
+  - Extracts at most 2 memories per invocation
+  - Focuses only on explicit, high-confidence signals (decisions, error
+    resolutions, architecture notes)
+  - Shares the cursor dedup hash list with extract_memories.py so the
+    heavier hooks don't re-extract the same content
+  - Uses stdlib only (no pip dependencies)
+  - Always exits 0 -- never blocks the user
+Stdin JSON schema (provided by Claude Code):
+  {
+    "session_id": "...",
+    "transcript_path": "...",
+    "hook_event_name": "Stop",
+    "cwd": "...",
+    "stop_hook_active": true,
+    ... (assistant's last response in transcript)
+  }
+"""
+import os
+import sys
+import json
+import re
+import time
+import hashlib
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+MEMORY_AGENT_URL = os.getenv("MEMORY_AGENT_URL", "http://localhost:8102")
+API_KEY = os.getenv("MEMORY_API_KEY", "")
+CURSOR_DIR = Path.home() / ".claude"
+CURSOR_FILE = CURSOR_DIR / "memory-agent-cursor.json"
+MAX_MEMORIES_PER_STOP = 2        # Hard cap -- stay fast
+MAX_CONTENT_LENGTH = 500         # Truncate for storage
+API_TIMEOUT_SECONDS = 1.5        # Tight timeout for API calls
+TOTAL_TIME_BUDGET = 2.0          # Total wall-clock budget
+# ---------------------------------------------------------------------------
+# High-signal extraction patterns (intentionally narrow)
+#
+# These are stricter than the ones in extract_memories.py because the Stop
+# hook runs on every response and must avoid false positives.  The heavier
+# PreCompact/SessionEnd hooks catch the rest.
+# ---------------------------------------------------------------------------
+# Explicit decisions -- strong first-person phrasing
+DECISION_PATTERNS = [
+    re.compile(
+        r"(?:^|\n)\s*(?:I decided to|I've decided to|Let's go with|The approach will be|"
+        r"We(?:'ll| will) go with|The decision is to) (.{20,}?)(?:\.|$)",
+        re.IGNORECASE | re.MULTILINE,
+    ),
+]
+# Error resolutions -- explicit fix language
+ERROR_RESOLUTION_PATTERNS = [
+    re.compile(
+        r"(?:^|\n)\s*(?:The fix is|The fix was|Root cause was|Root cause:|"
+        r"This was caused by|The bug was|The issue was|Resolution:) (.{20,}?)(?:\.|$)",
+        re.IGNORECASE | re.MULTILINE,
+    ),
+]
+# Architecture / convention notes
+ARCHITECTURE_PATTERNS = [
+    re.compile(
+        r"(?:^|\n)\s*(?:The architecture|This pattern|Convention:|"
+        r"The convention is|Key pattern:|Architecture note:) (.{20,}?)(?:\.|$)",
+        re.IGNORECASE | re.MULTILINE,
+    ),
+]
+# ---------------------------------------------------------------------------
+# Cursor interaction (reuses same file as extract_memories.py)
+# ---------------------------------------------------------------------------
+def _load_cursor_hashes(session_id: str) -> set:
+    """Load the set of already-extracted content hashes for this session."""
+    try:
+        if CURSOR_FILE.exists():
+            data = json.loads(CURSOR_FILE.read_text(encoding="utf-8"))
+            session = data.get(session_id, {})
+            return set(session.get("extracted_hashes", []))
+    except (json.JSONDecodeError, OSError):
+        pass
+    return set()
+def _save_cursor_hashes(session_id: str, new_hashes: List[str]):
+    """Append new hashes to the session's cursor entry."""
+    try:
+        CURSOR_DIR.mkdir(parents=True, exist_ok=True)
+        data = {}
+        if CURSOR_FILE.exists():
+            try:
+                data = json.loads(CURSOR_FILE.read_text(encoding="utf-8"))
+            except (json.JSONDecodeError, OSError):
+                data = {}
+        session = data.get(session_id, {"byte_offset": 0, "extracted_hashes": []})
+        existing = set(session.get("extracted_hashes", []))
+        merged = list(existing | set(new_hashes))
+        # Cap to prevent unbounded growth
+        if len(merged) > 200:
+            merged = merged[-200:]
+        session["extracted_hashes"] = merged
+        data[session_id] = session
+        CURSOR_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
+    except OSError:
+        pass  # Fail silently
+def _content_hash(text: str) -> str:
+    """Short MD5 prefix for dedup -- matches extract_memories.content_hash."""
+    return hashlib.md5(text.strip().lower().encode("utf-8")).hexdigest()[:12]
+# ---------------------------------------------------------------------------
+# Response extraction
+# ---------------------------------------------------------------------------
+def _get_latest_response(transcript_path: str) -> str:
+    """
+    Read the transcript file and return only the last assistant response.
+    Claude Code transcripts are JSONL where each line is a message object.
+    We read the file from the end backwards to find the last assistant turn.
+    For speed we only read the trailing portion of the file (last 32 KB max).
+    """
+    path = Path(transcript_path)
+    if not path.exists():
+        return ""
+    try:
+        file_size = path.stat().st_size
+        if file_size == 0:
+            return ""
+        # Read at most the last 32 KB -- the latest response should be there
+        read_start = max(0, file_size - 32768)
+        with open(path, "r", encoding="utf-8", errors="replace") as f:
+            if read_start > 0:
+                f.seek(read_start)
+                # Skip partial line
+                f.readline()
+            tail = f.read()
+        if not tail.strip():
+            return ""
+        # Walk lines in reverse to find last assistant message
+        lines = tail.strip().split('\n')
+        for line in reversed(lines):
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                msg = json.loads(line)
+                # Claude Code JSONL format: {"role": "assistant", "content": ...}
+                if msg.get("role") == "assistant":
+                    content = msg.get("content", "")
+                    if isinstance(content, list):
+                        # Multi-part content (text blocks)
+                        parts = []
+                        for part in content:
+                            if isinstance(part, dict) and part.get("type") == "text":
+                                parts.append(part.get("text", ""))
+                            elif isinstance(part, str):
+                                parts.append(part)
+                        return "\n".join(parts)
+                    elif isinstance(content, str):
+                        return content
+            except (json.JSONDecodeError, TypeError):
+                continue
+        # Fallback: if JSONL parsing fails, return last chunk of raw text
+        # (transcript might be plain text rather than JSONL)
+        return tail[-8192:] if len(tail) > 8192 else tail
+    except OSError:
+        return ""
+def _extract_high_signal(text: str, existing_hashes: set) -> List[Dict[str, Any]]:
+    """
+    Scan text for high-signal patterns.  Returns at most MAX_MEMORIES_PER_STOP items.
+    """
+    extractions: List[Dict[str, Any]] = []
+    seen = set(existing_hashes)
+    def _try_add(content: str, mem_type: str, importance: int, tags: List[str]):
+        if len(extractions) >= MAX_MEMORIES_PER_STOP:
+            return
+        h = _content_hash(content)
+        if h in seen:
+            return
+        seen.add(h)
+        if len(content) > MAX_CONTENT_LENGTH:
+            content = content[:MAX_CONTENT_LENGTH] + "..."
+        extractions.append({
+            "content": content,
+            "type": mem_type,
+            "importance": importance,
+            "tags": tags + ["auto-extracted", "stop-hook"],
+            "hash": h,
+        })
+    def _context_around(match_obj, source_text: str, chars: int = 200) -> str:
+        """Grab surrounding context aligned to line boundaries."""
+        start = max(0, match_obj.start() - chars)
+        end = min(len(source_text), match_obj.end() + chars)
+        while start > 0 and source_text[start] != '\n':
+            start -= 1
+        while end < len(source_text) and source_text[end] != '\n':
+            end += 1
+        return source_text[start:end].strip()
+    # --- Decisions (importance 7 -- higher than extract_memories' 6 because
+    #     these patterns are narrower / higher confidence) ---
+    for pat in DECISION_PATTERNS:
+        for m in pat.finditer(text):
+            ctx = _context_around(m, text)
+            if len(ctx) > 30:
+                _try_add(ctx, "decision", 7, ["decision"])
+    # --- Error resolutions (importance 7) ---
+    for pat in ERROR_RESOLUTION_PATTERNS:
+        for m in pat.finditer(text):
+            ctx = _context_around(m, text)
+            if len(ctx) > 30:
+                _try_add(ctx, "error", 7, ["error", "resolution"])
+    # --- Architecture notes (importance 6) ---
+    for pat in ARCHITECTURE_PATTERNS:
+        for m in pat.finditer(text):
+            ctx = _context_around(m, text)
+            if len(ctx) > 30:
+                _try_add(ctx, "decision", 6, ["architecture", "pattern"])
+    return extractions
+# ---------------------------------------------------------------------------
+# API call (mirrors extract_memories.store_memory_sync, tighter timeout)
+# ---------------------------------------------------------------------------
+def _store_memory(extraction: Dict[str, Any], project_path: Optional[str] = None) -> bool:
+    """Store a single memory via the memory agent A2A endpoint."""
+    import urllib.request
+    import urllib.error
+    payload = {
+        "jsonrpc": "2.0",
+        "method": "tasks/send",
+        "params": {
+            "message": {"parts": [{"type": "text", "text": ""}]},
+            "metadata": {
+                "skill_id": "store_memory",
+                "params": {
+                    "content": extraction["content"],
+                    "type": extraction["type"],
+                    "importance": extraction["importance"],
+                    "tags": extraction["tags"],
+                    "project_path": project_path,
+                    "agent_type": "stop-hook",
+                    "outcome_status": "pending",
+                    "confidence": 0.45,  # Slightly above auto-extracted (0.4)
+                },
+            },
+        },
+        "id": f"stop-{extraction['hash']}-{int(time.time())}",
+    }
+    headers = {"Content-Type": "application/json"}
+    if API_KEY:
+        headers["X-Memory-Key"] = API_KEY
+    try:
+        data = json.dumps(payload).encode("utf-8")
+        req = urllib.request.Request(
+            f"{MEMORY_AGENT_URL}/a2a",
+            data=data,
+            headers=headers,
+            method="POST",
+        )
+        with urllib.request.urlopen(req, timeout=API_TIMEOUT_SECONDS) as resp:
+            return resp.status == 200
+    except (urllib.error.URLError, urllib.error.HTTPError, OSError, TimeoutError):
+        return False
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+def main():
+    start = time.time()
+    try:
+        # --- Read stdin JSON ---
+        hook_data: Dict[str, Any] = {}
+        if not sys.stdin.isatty():
+            raw = sys.stdin.read()
+            if raw.strip():
+                hook_data = json.loads(raw)
+        session_id = hook_data.get("session_id", "")
+        transcript_path = hook_data.get("transcript_path", "")
+        project_path = hook_data.get("cwd") or hook_data.get("project_path", "")
+        if not transcript_path or not session_id:
+            sys.exit(0)
+        # --- Load existing hashes for dedup ---
+        existing_hashes = _load_cursor_hashes(session_id)
+        # --- Get only the latest assistant response ---
+        response_text = _get_latest_response(transcript_path)
+        if not response_text or len(response_text) < 40:
+            sys.exit(0)
+        # --- Extract high-signal content ---
+        extractions = _extract_high_signal(response_text, existing_hashes)
+        if not extractions:
+            sys.exit(0)
+        # --- Store via API (with time budget) ---
+        stored_hashes: List[str] = []
+        for extraction in extractions:
+            elapsed = time.time() - start
+            if elapsed >= TOTAL_TIME_BUDGET:
+                break
+            if _store_memory(extraction, project_path):
+                stored_hashes.append(extraction["hash"])
+        # --- Persist new hashes to cursor file ---
+        if stored_hashes:
+            _save_cursor_hashes(session_id, stored_hashes)
+        elapsed_total = round(time.time() - start, 3)
+        print(
+            f"[Stop] session={session_id} "
+            f"found={len(extractions)} stored={len(stored_hashes)} "
+            f"elapsed={elapsed_total}s",
+            file=sys.stderr,
+        )
+    except Exception as e:
+        elapsed = round(time.time() - start, 3)
+        print(f"[Stop] Error (non-fatal): {e} [{elapsed}s]", file=sys.stderr)
+    sys.exit(0)
+if __name__ == "__main__":
+    main()

package/install.py CHANGED Viewed

@@ -36,7 +36,8 @@ DEFAULT_CONFIG = {
     "HOST": "0.0.0.0",
     "MEMORY_AGENT_URL": "http://localhost:8102",
     "OLLAMA_HOST": "http://localhost:11434",
-    "EMBEDDING_MODEL": "nomic-embed-text",
+    "EMBEDDING_MODEL": "Alibaba-NLP/gte-large-en-v1.5",
+    "EMBEDDING_PROVIDER": "sentence-transformers",
     "LOG_LEVEL": "INFO",
     "USE_VECTOR_INDEX": "true",
     "DB_POOL_SIZE": "5",
@@ -47,12 +48,7 @@ DEFAULT_CONFIG = {
 # Claude Code settings paths
 def get_claude_settings_dir() -> Path:
     """Get the Claude Code settings directory."""
-    if sys.platform == "win32":
-        return Path.home() / ".claude"
-    elif sys.platform == "darwin":
-        return Path.home() / ".claude"
-    else:  # Linux
-        return Path.home() / ".claude"
+    return Path.home() / ".claude"
 def get_claude_settings_file() -> Path:
     """Get the Claude Code settings.json file path."""
@@ -280,17 +276,18 @@ def check_ollama() -> bool:
     print_warning("Ollama not detected")
     print("")
     print("  " + "="*56)
-    print("  OLLAMA REQUIRED FOR SEMANTIC SEARCH")
+    print("  OLLAMA (OPTIONAL)")
     print("  " + "="*56)
     print("")
-    print("  The memory agent needs Ollama for embeddings.")
-    print("  Without it, semantic search will not work.")
+    print("  Ollama is optional. The default provider (sentence-transformers)")
+    print("  runs locally without Ollama. Install Ollama only if you prefer")
+    print("  the Ollama provider.")
     print("")
-    print("  To install Ollama:")
+    print("  To install Ollama (if desired):")
     print("    1. Download from: https://ollama.ai/download")
     print("    2. Install and run: ollama pull nomic-embed-text")
     print("    3. Start Ollama: ollama serve")
-    print("    4. Re-run this installer")
+    print("    4. Set EMBEDDING_PROVIDER=ollama in .env")
     print("")
     return False
@@ -357,10 +354,13 @@ def create_env_file(config: Dict[str, str], force: bool = False) -> bool:
         f"PORT={config['PORT']}",
         f"MEMORY_AGENT_URL={config['MEMORY_AGENT_URL']}",
         "",
-        "# Ollama Configuration",
-        f"OLLAMA_HOST={config['OLLAMA_HOST']}",
+        "# Embedding Configuration",
+        f"EMBEDDING_PROVIDER={config.get('EMBEDDING_PROVIDER', 'sentence-transformers')}",
         f"EMBEDDING_MODEL={config['EMBEDDING_MODEL']}",
         "",
+        "# Ollama Configuration (only needed if EMBEDDING_PROVIDER=ollama)",
+        f"OLLAMA_HOST={config['OLLAMA_HOST']}",
+        "",
         "# Database Configuration",
         f"DATABASE_PATH={AGENT_DIR / 'memories.db'}",
         f"USE_VECTOR_INDEX={config['USE_VECTOR_INDEX']}",
@@ -452,10 +452,9 @@ echo "Memory Agent started (PID: $!)"
         return False
-def configure_claude_mcp(config: Dict[str, str]) -> bool:
-    """Configure Claude Code MCP settings."""
-    settings_file = get_claude_settings_file()
-    settings_dir = get_claude_settings_dir()
+def _write_mcp_settings(settings_file: Path, config: Dict[str, str]) -> bool:
+    """Write MCP settings to a given settings file."""
+    settings_dir = settings_file.parent
     # Ensure settings directory exists
     settings_dir.mkdir(parents=True, exist_ok=True)
@@ -465,7 +464,7 @@ def configure_claude_mcp(config: Dict[str, str]) -> bool:
         try:
             settings = json.loads(settings_file.read_text())
         except json.JSONDecodeError:
-            print_warning("Existing settings.json is invalid, creating backup")
+            print_warning(f"Existing {settings_file.name} is invalid, creating backup")
             shutil.copy(settings_file, settings_file.with_suffix(".json.bak"))
             settings = {}
     else:
@@ -478,7 +477,7 @@ def configure_claude_mcp(config: Dict[str, str]) -> bool:
     # Add/update claude-memory server configuration
     settings["mcpServers"]["claude-memory"] = {
         "command": sys.executable,
-        "args": [str(AGENT_DIR / "main.py")],
+        "args": [str(AGENT_DIR / "mcp_server.py")],
         "env": {
             "MEMORY_AGENT_URL": config["MEMORY_AGENT_URL"],
             "PORT": config["PORT"],
@@ -494,6 +493,35 @@ def configure_claude_mcp(config: Dict[str, str]) -> bool:
         return False
+def configure_claude_mcp(config: Dict[str, str], scope: str = "global", project_path: Optional[str] = None) -> bool:
+    """Configure Claude Code MCP settings.
+    Args:
+        config: Configuration dictionary with PORT, MEMORY_AGENT_URL, etc.
+        scope: Installation scope - 'global', 'project', or 'both'.
+        project_path: Project directory path for project-specific installation.
+    """
+    success = True
+    if scope in ("global", "both"):
+        settings_file = get_claude_settings_file()
+        if not _write_mcp_settings(settings_file, config):
+            success = False
+    if scope in ("project", "both"):
+        if project_path:
+            project_settings_dir = Path(project_path) / ".claude"
+            project_settings_file = project_settings_dir / "settings.local.json"
+            if not _write_mcp_settings(project_settings_file, config):
+                success = False
+        else:
+            print_warning("Project path not specified, skipping project-level MCP settings")
+            if scope == "project":
+                success = False
+    return success
 def setup_hooks(config: Dict[str, str]) -> bool:
     """Set up Claude Code hooks for auto-start and context injection."""
     hooks_dir = get_hooks_dir()
@@ -535,7 +563,7 @@ def setup_hooks(config: Dict[str, str]) -> bool:
     return True
-def configure_hooks_json() -> bool:
+def configure_hooks_json(auto: bool = False) -> bool:
     """Configure hooks.json to enable the hooks."""
     hooks_file = get_claude_settings_dir() / "hooks.json"
@@ -568,8 +596,9 @@ def configure_hooks_json() -> bool:
     if hooks_file.exists():
         try:
             existing = json.loads(hooks_file.read_text())
-            # Don't overwrite if user has customized
-            if prompt_yes_no("hooks.json exists. Update with memory agent hooks?", default=True):
+            # In auto mode, always merge; otherwise ask
+            should_update = auto or prompt_yes_no("hooks.json exists. Update with memory agent hooks?", default=True)
+            if should_update:
                 if "hooks" not in existing:
                     existing["hooks"] = {}
                 existing["hooks"].update(hooks_config["hooks"])
@@ -679,9 +708,10 @@ def print_post_install_instructions(config: Dict[str, str]):
     print("Next steps:")
     print("")
-    print("1. Make sure Ollama is running with the embedding model:")
-    print(f"   ollama pull {config['EMBEDDING_MODEL']}")
+    print("1. (Optional) If using Ollama provider, make sure Ollama is running:")
+    print(f"   ollama pull nomic-embed-text")
     print(f"   ollama serve")
+    print(f"   Then set EMBEDDING_PROVIDER=ollama in .env")
     print("")
     print("2. Start the Memory Agent:")
     print(f"   cd \"{AGENT_DIR}\"")
@@ -770,6 +800,28 @@ def main():
         action="store_true",
         help="Skip Claude Code installation check (for standalone use)"
     )
+    parser.add_argument(
+        "--skip-env",
+        action="store_true",
+        help="Skip .env file creation (already created by Node.js wizard)"
+    )
+    parser.add_argument(
+        "--scope",
+        choices=["global", "project", "both"],
+        default="global",
+        help="Installation scope for Claude Code settings"
+    )
+    parser.add_argument(
+        "--project-path",
+        type=str,
+        default=None,
+        help="Project path for project-specific installation"
+    )
+    parser.add_argument(
+        "--no-start",
+        action="store_true",
+        help="Don't auto-start the agent after installation"
+    )
     args = parser.parse_args()
@@ -810,7 +862,7 @@ def main():
                     if not install_claude_code():
                         print_error("Could not install Claude Code automatically.")
                         print("Please install manually: npm install -g @anthropic-ai/claude-code")
-                        if not prompt_yes_no("Continue anyway (memory agent only)?", default=False):
+                        if not args.auto and not prompt_yes_no("Continue anyway (memory agent only)?", default=False):
                             return 1
                     else:
                         claude_ok = True
@@ -841,7 +893,7 @@ def main():
                 config["OLLAMA_HOST"]
             )
-        if prompt_yes_no("Use default embedding model (nomic-embed-text)?"):
+        if prompt_yes_no("Use default embedding model (gte-large-en-v1.5 via sentence-transformers)?"):
             pass
         else:
             config["EMBEDDING_MODEL"] = prompt_value(
@@ -861,8 +913,11 @@ def main():
     # Step 4: Create .env file
     print_step(4, total_steps, "Creating configuration file...")
-    if not create_env_file(config, force=args.auto):
-        return 1
+    if not args.skip_env:
+        if not create_env_file(config, force=args.auto):
+            return 1
+    else:
+        print_success("Skipped .env creation (--skip-env)")
     # Step 5: Fix hardcoded values
     print_step(5, total_steps, "Fixing hardcoded values...")
@@ -879,11 +934,11 @@ def main():
     if claude_ok:
         if args.auto or prompt_yes_no("Configure Claude Code MCP settings?"):
-            configure_claude_mcp(config)
+            configure_claude_mcp(config, scope=args.scope, project_path=args.project_path)
         if args.auto or prompt_yes_no("Install Claude Code hooks?"):
             setup_hooks(config)
-            configure_hooks_json()
+            configure_hooks_json(auto=args.auto)
     else:
         print_warning("Skipping Claude Code configuration (Claude Code not installed)")
         print("  Run 'python install.py' again after installing Claude Code")
@@ -892,9 +947,11 @@ def main():
     print_step(8, total_steps, "Verifying installation...")
     verify_installation()
-    # Step 9: Auto-start agent if Ollama is ready
+    # Step 9: Auto-start agent
     print_step(9, total_steps, "Starting Memory Agent...")
-    if ollama_ok:
+    if args.no_start:
+        print_success("Skipped auto-start (--no-start)")
+    else:
         try:
             subprocess.run(
                 [sys.executable, str(AGENT_DIR / "memory-agent"), "start"],
@@ -904,10 +961,7 @@ def main():
             print_success("Memory Agent started!")
         except Exception as e:
             print_warning(f"Could not auto-start agent: {e}")
-            print("  Start manually with: claude-memory-agent start")
-    else:
-        print_warning("Skipping auto-start (Ollama not running)")
-        print("  After installing Ollama, run: claude-memory-agent start")
+            print("  Start manually with: python main.py")
     # Done!
     print_post_install_instructions(config)