npm - claude-self-reflect - Versions diffs - 2.5.10 → 2.5.12 - Mend

claude-self-reflect 2.5.10 → 2.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/Dockerfile.async-importer +26 -0
package/README.md +7 -3
package/docker-compose.yaml +48 -7
package/installer/cli.js +55 -1
package/mcp-server/run-mcp.sh +48 -0
package/mcp-server/src/__main__.py +13 -0
package/mcp-server/src/embedding_manager.py +237 -0
package/mcp-server/src/server.py +243 -23
package/mcp-server/src/status.py +135 -0
package/package.json +1 -1

package/Dockerfile.async-importer ADDED Viewed

@@ -0,0 +1,26 @@
+FROM python:3.11-slim
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy requirements
+COPY scripts/requirements.txt /app/
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the streaming importer script
+COPY scripts/streaming-importer.py /scripts/
+# Set environment variables for memory management
+ENV MALLOC_ARENA_MAX=2
+ENV PYTHONUNBUFFERED=1
+# Run the streaming importer
+CMD ["python", "/scripts/streaming-importer.py"]

package/README.md CHANGED Viewed

@@ -6,6 +6,8 @@ Claude forgets everything. This fixes that.
 Ask Claude about past conversations. Get actual answers. **100% local by default** - your conversations never leave your machine. Cloud-enhanced search available when you need it.
+**✅ Proven at Scale**: Successfully indexed 682 conversation files with 100% reliability. No data loss, no corruption, just seamless conversation memory that works.
 **Before**: "I don't have access to previous conversations"
 **After**:
 ```
@@ -129,9 +131,10 @@ Claude: [Searches across ALL your projects]
 Recent conversations matter more. Old ones fade. Like your brain, but reliable.
 ### 🚀 Performance
-- **Search**: 200-350ms response time
-- **Import**: 2-second response for new conversations
+- **Search**: 200-350ms response time across 682 indexed conversations
+- **Import**: 2-second response for new conversations
 - **Memory**: 50MB operational target with smart chunking
+- **Scale**: 100% indexing success rate across all conversation types
 ## The Technical Stack
@@ -150,13 +153,14 @@ Recent conversations matter more. Old ones fade. Like your brain, but reliable.
 ## What's New
+- **v2.5.11** - Critical cloud mode fix - Environment variables now properly passed to MCP server
+- **v2.5.10** - Emergency hotfix for MCP server startup failure (dead code removal)
 - **v2.5.6** - Tool Output Extraction - Captures git changes & tool outputs for cross-agent discovery
 - **v2.5.5** - Critical dependency fix & streaming importer enhancements
 - **v2.5.4** - Documentation & bug fixes (import path & state file compatibility)
 - **v2.5.3** - Streamlined README & import architecture diagram
 - **v2.5.2** - State file compatibility fix
 - **v2.4.5** - 10-40x performance boost
-- **v2.4.3** - Project-scoped search
 [Full changelog](docs/release-history.md)

package/docker-compose.yaml CHANGED Viewed

@@ -8,7 +8,7 @@ services:
     command: chown -R 1000:1000 /config
     volumes:
       - ${CONFIG_PATH:-~/.claude-self-reflect/config}:/config
-    profiles: ["watch", "mcp", "import"]
+    profiles: ["watch", "mcp", "import", "async"]
   # Qdrant vector database - the heart of semantic search
   qdrant:
@@ -104,18 +104,59 @@ services:
       - VOYAGE_API_KEY=${VOYAGE_API_KEY:-}
       - VOYAGE_KEY=${VOYAGE_KEY:-}
       - PREFER_LOCAL_EMBEDDINGS=${PREFER_LOCAL_EMBEDDINGS:-true}
-      - WATCH_INTERVAL=${WATCH_INTERVAL:-5}  # Testing with 5 second interval
-      - MAX_MEMORY_MB=${MAX_MEMORY_MB:-350}  # Total memory including model
-      - OPERATIONAL_MEMORY_MB=${OPERATIONAL_MEMORY_MB:-100}  # Memory for operations (increased for large file handling)
-      - CHUNK_SIZE=${CHUNK_SIZE:-5}
+      - WATCH_INTERVAL=${WATCH_INTERVAL:-1}  # Aggressive: 5x faster detection (minimum 1 second)
+      - MAX_MEMORY_MB=${MAX_MEMORY_MB:-2000}  # Ultra conservative to prevent memory leak
+      - OPERATIONAL_MEMORY_MB=${OPERATIONAL_MEMORY_MB:-1500}  # 1.5GB operational (25% of 8GB)
+      - CHUNK_SIZE=${CHUNK_SIZE:-5}  # Minimal batch size
+      - HOT_WINDOW_MINUTES=${HOT_WINDOW_MINUTES:-15}  # Keep files HOT longer
+      - MAX_COLD_FILES_PER_CYCLE=${MAX_COLD_FILES_PER_CYCLE:-5}  # Single file processing
+      - PARALLEL_WORKERS=${PARALLEL_WORKERS:-8}  # Enable parallel embedding workers
       - PYTHONUNBUFFERED=1
       - LOGS_DIR=/logs
       - FASTEMBED_CACHE_PATH=/root/.cache/fastembed
       - CURRENT_PROJECT_PATH=${PWD}  # Pass current project path for prioritization
+      - MALLOC_ARENA_MAX=2  # MEMORY LEAK FIX: Limit glibc malloc arenas
+      - THREAD_POOL_WORKERS=${THREAD_POOL_WORKERS:-2}  # AsyncEmbedder thread pool size (speed vs stability)
+      - THREAD_POOL_RECYCLE_FILES=${THREAD_POOL_RECYCLE_FILES:-50}  # Files before recycling thread pool
     restart: unless-stopped
     profiles: ["watch"]
-    mem_limit: 1g
-    memswap_limit: 1g
+    mem_limit: 8g
+    memswap_limit: 8g
+  # Async streaming importer - Ground-up async rewrite
+  async-importer:
+    build:
+      context: .
+      dockerfile: Dockerfile.async-importer
+    container_name: claude-reflection-async
+    depends_on:
+      - qdrant
+    volumes:
+      - ${CLAUDE_LOGS_PATH:-~/.claude/projects}:/logs:ro
+      - ${CONFIG_PATH:-~/.claude-self-reflect/config}:/config
+      - ./scripts:/scripts:ro
+    environment:
+      - QDRANT_URL=http://qdrant:6333
+      - STATE_FILE=/config/imported-files.json
+      - VOYAGE_API_KEY=${VOYAGE_API_KEY:-}
+      - VOYAGE_KEY=${VOYAGE_KEY:-}
+      - PREFER_LOCAL_EMBEDDINGS=${PREFER_LOCAL_EMBEDDINGS:-true}
+      - WATCH_INTERVAL=${WATCH_INTERVAL:-5}
+      - MAX_MEMORY_MB=${MAX_MEMORY_MB:-2000}
+      - OPERATIONAL_MEMORY_MB=${OPERATIONAL_MEMORY_MB:-1500}
+      - CHUNK_SIZE=${CHUNK_SIZE:-5}
+      - HOT_WINDOW_MINUTES=${HOT_WINDOW_MINUTES:-15}
+      - MAX_COLD_FILES_PER_CYCLE=${MAX_COLD_FILES_PER_CYCLE:-5}
+      - THREAD_POOL_WORKERS=${THREAD_POOL_WORKERS:-2}
+      - PYTHONUNBUFFERED=1
+      - LOGS_DIR=/logs
+      - FASTEMBED_CACHE_PATH=/root/.cache/fastembed
+      - CURRENT_PROJECT_PATH=${PWD}
+      - MALLOC_ARENA_MAX=2
+    restart: unless-stopped
+    profiles: ["async"]
+    mem_limit: 4g
+    memswap_limit: 4g
   # MCP server for Claude integration
   mcp-server:

package/installer/cli.js CHANGED Viewed

@@ -10,6 +10,7 @@ const __dirname = dirname(__filename);
 const commands = {
   setup: 'Run the setup wizard to configure Claude Self-Reflect',
+  status: 'Get indexing status as JSON (overall + per-project breakdown)',
   doctor: 'Check your installation and diagnose issues',
   help: 'Show this help message'
 };
@@ -27,6 +28,53 @@ async function setup() {
   });
 }
+async function status() {
+  // Call the Python MCP server's --status command
+  const mcpServerPath = join(__dirname, '..', 'mcp-server');
+  const venvPython = join(mcpServerPath, 'venv', 'bin', 'python');
+  const mcpModule = join(mcpServerPath, 'src');
+  try {
+    const child = spawn(venvPython, ['-m', 'src', '--status'], {
+      cwd: mcpServerPath,
+      stdio: ['inherit', 'pipe', 'pipe']
+    });
+    let stdout = '';
+    let stderr = '';
+    child.stdout.on('data', (data) => {
+      stdout += data.toString();
+    });
+    child.stderr.on('data', (data) => {
+      stderr += data.toString();
+    });
+    child.on('exit', (code) => {
+      if (code === 0) {
+        // Output the JSON directly for other tools to parse
+        process.stdout.write(stdout);
+        process.exit(0);
+      } else {
+        console.error('Error getting status:', stderr || 'Unknown error');
+        process.exit(1);
+      }
+    });
+    // Handle timeout
+    setTimeout(() => {
+      child.kill('SIGTERM');
+      console.error('Status check timed out');
+      process.exit(1);
+    }, 10000); // 10 second timeout
+  } catch (error) {
+    console.error('Failed to execute status command:', error.message);
+    process.exit(1);
+  }
+}
 async function doctor() {
   console.log('🔍 Checking Claude Self-Reflect installation...\n');
@@ -124,8 +172,11 @@ function help() {
   console.log('  claude-self-reflect setup --voyage-key=pa-1234567890');
   console.log('  claude-self-reflect setup --local');
   console.log('  claude-self-reflect setup --debug  # For troubleshooting');
+  console.log('  claude-self-reflect status          # Get indexing status as JSON');
-  console.log('\nFor more information: https://github.com/ramakay/claude-self-reflect');
+  console.log('\nFor more information:');
+  console.log('  Documentation: https://github.com/ramakay/claude-self-reflect');
+  console.log('  Status API: See docs/api-reference.md#cli-status-interface');
 }
 // Main
@@ -135,6 +186,9 @@ switch (command) {
   case 'setup':
     setup();
     break;
+  case 'status':
+    status();
+    break;
   case 'doctor':
     doctor();
     break;

package/mcp-server/run-mcp.sh CHANGED Viewed

@@ -21,5 +21,53 @@ else
     source venv/bin/activate
 fi
+# CRITICAL FIX: Pass through environment variables from Claude Code
+# These environment variables are set by `claude mcp add -e KEY=value`
+# Export them so the Python process can access them
+if [ ! -z "$VOYAGE_KEY" ]; then
+    export VOYAGE_KEY="$VOYAGE_KEY"
+fi
+if [ ! -z "$VOYAGE_KEY_2" ]; then
+    export VOYAGE_KEY_2="$VOYAGE_KEY_2"
+fi
+if [ ! -z "$PREFER_LOCAL_EMBEDDINGS" ]; then
+    export PREFER_LOCAL_EMBEDDINGS="$PREFER_LOCAL_EMBEDDINGS"
+fi
+if [ ! -z "$QDRANT_URL" ]; then
+    export QDRANT_URL="$QDRANT_URL"
+fi
+if [ ! -z "$ENABLE_MEMORY_DECAY" ]; then
+    export ENABLE_MEMORY_DECAY="$ENABLE_MEMORY_DECAY"
+fi
+if [ ! -z "$DECAY_WEIGHT" ]; then
+    export DECAY_WEIGHT="$DECAY_WEIGHT"
+fi
+if [ ! -z "$DECAY_SCALE_DAYS" ]; then
+    export DECAY_SCALE_DAYS="$DECAY_SCALE_DAYS"
+fi
+if [ ! -z "$EMBEDDING_MODEL" ]; then
+    export EMBEDDING_MODEL="$EMBEDDING_MODEL"
+fi
+# The embedding manager now handles cache properly in a controlled directory
+# Set to 'false' if you want to use HuggingFace instead of Qdrant CDN
+if [ -z "$FASTEMBED_SKIP_HUGGINGFACE" ]; then
+    export FASTEMBED_SKIP_HUGGINGFACE=true
+fi
+# Debug: Show what environment variables are being passed
+echo "[DEBUG] Environment variables for MCP server:"
+echo "[DEBUG] VOYAGE_KEY: ${VOYAGE_KEY:+set}"
+echo "[DEBUG] PREFER_LOCAL_EMBEDDINGS: ${PREFER_LOCAL_EMBEDDINGS:-not set}"
+echo "[DEBUG] QDRANT_URL: ${QDRANT_URL:-not set}"
+echo "[DEBUG] ENABLE_MEMORY_DECAY: ${ENABLE_MEMORY_DECAY:-not set}"
 # Run the MCP server
 exec python -m src

package/mcp-server/src/__main__.py CHANGED Viewed

@@ -10,9 +10,22 @@ def main():
         "--transport",
         choices=["stdio", "sse"],
         default="stdio",
+        help="Transport protocol for MCP server (default: stdio)"
+    )
+    parser.add_argument(
+        "--status",
+        action="store_true",
+        help="Get indexing status as JSON with overall and per-project breakdown"
     )
     args = parser.parse_args()
+    # Handle status request with early exit (avoid loading heavy MCP dependencies)
+    if args.status:
+        from .status import get_status
+        import json
+        print(json.dumps(get_status()))
+        return
     # Import is done here to make sure environment variables are loaded
     from .server import mcp

package/mcp-server/src/embedding_manager.py ADDED Viewed

@@ -0,0 +1,237 @@
+"""Robust embedding model manager with proper cache handling."""
+import os
+import sys
+import time
+import logging
+import shutil
+from typing import Optional, List, Union
+from pathlib import Path
+import threading
+import signal
+logger = logging.getLogger(__name__)
+class EmbeddingManager:
+    """Manages embedding models with proper cache and lock handling."""
+    def __init__(self):
+        self.model = None
+        self.model_type = None  # 'local' or 'voyage'
+        self.voyage_client = None
+        # Configuration
+        self.prefer_local = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'true').lower() == 'true'
+        self.voyage_key = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2')
+        self.embedding_model = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
+        self.download_timeout = int(os.getenv('FASTEMBED_DOWNLOAD_TIMEOUT', '30'))
+        # Set cache directory to our controlled location
+        self.cache_dir = Path(__file__).parent.parent / '.fastembed-cache'
+    def _clean_stale_locks(self):
+        """Clean up any stale lock files from previous runs."""
+        locks_dir = self.cache_dir / '.locks'
+        if locks_dir.exists():
+            logger.info(f"Cleaning stale locks in {locks_dir}")
+            try:
+                # Remove all lock files older than 5 minutes
+                import time
+                current_time = time.time()
+                for lock_file in locks_dir.glob('**/*.lock'):
+                    try:
+                        age = current_time - lock_file.stat().st_mtime
+                        if age > 300:  # 5 minutes
+                            lock_file.unlink()
+                            logger.debug(f"Removed stale lock: {lock_file.name}")
+                    except Exception as e:
+                        logger.debug(f"Could not remove lock {lock_file}: {e}")
+            except Exception as e:
+                logger.warning(f"Error cleaning locks: {e}")
+    def initialize(self) -> bool:
+        """Initialize embedding model based on user preference."""
+        logger.info("Initializing embedding manager...")
+        # Clean up any stale locks first
+        self._clean_stale_locks()
+        if self.prefer_local:
+            # User wants local - try local only, don't fallback to cloud
+            if self._try_initialize_local():
+                return True
+            logger.error("Local embeddings failed and user prefers local - not falling back to cloud")
+            return False
+        else:
+            # User prefers Voyage AI
+            if self.voyage_key and self._try_initialize_voyage():
+                return True
+            logger.warning("Voyage AI failed, trying local as fallback...")
+            if self._try_initialize_local():
+                return True
+            logger.error("Both Voyage AI and local embeddings failed")
+            return False
+    def _try_initialize_local(self) -> bool:
+        """Try to initialize local FastEmbed model with timeout and optimizations."""
+        try:
+            logger.info(f"Attempting to load local model: {self.embedding_model}")
+            # CRITICAL OPTIMIZATION: Set thread limits BEFORE loading model
+            # This prevents ONNX Runtime and BLAS from over-subscribing CPU
+            os.environ['OMP_NUM_THREADS'] = '1'
+            os.environ['MKL_NUM_THREADS'] = '1'
+            os.environ['OPENBLAS_NUM_THREADS'] = '1'
+            os.environ['NUMEXPR_NUM_THREADS'] = '1'
+            logger.info("Set thread limits to prevent CPU over-subscription")
+            # Ensure cache directory exists and is writable
+            self.cache_dir.mkdir(parents=True, exist_ok=True)
+            # Set FASTEMBED_CACHE_PATH to our controlled directory
+            os.environ['FASTEMBED_CACHE_PATH'] = str(self.cache_dir)
+            logger.info(f"Using cache directory: {self.cache_dir}")
+            # Also set HF_HOME to avoid any HuggingFace cache issues
+            os.environ['HF_HOME'] = str(self.cache_dir / 'huggingface')
+            model_cache = self.cache_dir / 'models--qdrant--all-MiniLM-L6-v2-onnx'
+            if model_cache.exists():
+                logger.info("Model cache found, loading from cache...")
+            else:
+                logger.info(f"Model cache not found, will download (timeout: {self.download_timeout}s)")
+                logger.info("Note: First download may take 1-2 minutes")
+            # Force alternative download if HuggingFace is problematic
+            # This uses Qdrant's CDN which is more reliable
+            if os.getenv('FASTEMBED_SKIP_HUGGINGFACE', 'true').lower() == 'true':
+                os.environ['HF_HUB_OFFLINE'] = '1'
+                logger.info("Using alternative download sources (Qdrant CDN)")
+            # Use a thread with timeout for model initialization
+            success = False
+            error = None
+            def init_model():
+                nonlocal success, error
+                try:
+                    from fastembed import TextEmbedding
+                    # Initialize with optimized settings
+                    # Note: FastEmbed uses these environment variables internally
+                    self.model = TextEmbedding(
+                        model_name=self.embedding_model,
+                        threads=1  # Single thread per worker to prevent over-subscription
+                    )
+                    self.model_type = 'local'
+                    success = True
+                    logger.info(f"Successfully initialized local model: {self.embedding_model} with single-thread mode")
+                except Exception as e:
+                    error = e
+                    logger.error(f"Failed to initialize local model: {e}")
+            # Start initialization in a thread
+            thread = threading.Thread(target=init_model)
+            thread.daemon = True
+            thread.start()
+            thread.join(timeout=self.download_timeout)
+            if thread.is_alive():
+                logger.error(f"Model initialization timed out after {self.download_timeout}s")
+                logger.info("Tip: Set FASTEMBED_SKIP_HUGGINGFACE=true to use alternative download sources")
+                # Thread will continue in background but we move on
+                return False
+            return success
+        except ImportError:
+            logger.error("FastEmbed not installed. Install with: pip install fastembed")
+            return False
+        except Exception as e:
+            logger.error(f"Unexpected error initializing local embeddings: {e}")
+            return False
+    def _try_initialize_voyage(self) -> bool:
+        """Try to initialize Voyage AI client."""
+        try:
+            logger.info("Attempting to initialize Voyage AI...")
+            import voyageai
+            self.voyage_client = voyageai.Client(api_key=self.voyage_key)
+            # Test the client with a simple embedding
+            test_result = self.voyage_client.embed(
+                texts=["test"],
+                model="voyage-3",
+                input_type="document"
+            )
+            if test_result and test_result.embeddings:
+                self.model_type = 'voyage'
+                logger.info("Successfully initialized Voyage AI")
+                return True
+            else:
+                logger.error("Voyage AI test embedding failed")
+                return False
+        except Exception as e:
+            logger.error(f"Failed to initialize Voyage AI: {e}")
+            return False
+    def embed(self, texts: Union[str, List[str]], input_type: str = "document") -> Optional[List[List[float]]]:
+        """Generate embeddings using the active model."""
+        if not self.model and not self.voyage_client:
+            logger.error("No embedding model initialized")
+            return None
+        # Ensure texts is a list
+        if isinstance(texts, str):
+            texts = [texts]
+        try:
+            if self.model_type == 'local':
+                # FastEmbed returns a generator, convert to list
+                embeddings = list(self.model.embed(texts))
+                return [emb.tolist() for emb in embeddings]
+            elif self.model_type == 'voyage':
+                result = self.voyage_client.embed(
+                    texts=texts,
+                    model="voyage-3-lite" if input_type == "query" else "voyage-3",
+                    input_type=input_type
+                )
+                return result.embeddings
+        except Exception as e:
+            logger.error(f"Error generating embeddings: {e}")
+            return None
+    def get_vector_dimension(self) -> int:
+        """Get the dimension of embeddings."""
+        if self.model_type == 'local':
+            return 384  # all-MiniLM-L6-v2 dimension
+        elif self.model_type == 'voyage':
+            return 1024  # voyage-3 dimension
+        return 0
+    def get_model_info(self) -> dict:
+        """Get information about the active model."""
+        return {
+            'type': self.model_type,
+            'model': self.embedding_model if self.model_type == 'local' else 'voyage-3',
+            'dimension': self.get_vector_dimension(),
+            'prefer_local': self.prefer_local,
+            'has_voyage_key': bool(self.voyage_key)
+        }
+# Global instance
+_embedding_manager = None
+def get_embedding_manager() -> EmbeddingManager:
+    """Get or create the global embedding manager."""
+    global _embedding_manager
+    if _embedding_manager is None:
+        _embedding_manager = EmbeddingManager()
+        if not _embedding_manager.initialize():
+            raise RuntimeError("Failed to initialize any embedding model")
+    return _embedding_manager

package/mcp-server/src/server.py CHANGED Viewed

@@ -36,37 +36,48 @@ except ImportError:
 import voyageai
 from dotenv import load_dotenv
-# Load environment variables
+# Load environment variables from .env file (fallback only)
 env_path = Path(__file__).parent.parent.parent / '.env'
-load_dotenv(env_path)
+load_dotenv(env_path, override=False)  # Don't override process environment
-# Configuration
+# Configuration - prioritize process environment variables over .env file
 QDRANT_URL = os.getenv('QDRANT_URL', 'http://localhost:6333')
-VOYAGE_API_KEY = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2')
+VOYAGE_API_KEY = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2') or os.getenv('VOYAGE_KEY_2')
 ENABLE_MEMORY_DECAY = os.getenv('ENABLE_MEMORY_DECAY', 'false').lower() == 'true'
 DECAY_WEIGHT = float(os.getenv('DECAY_WEIGHT', '0.3'))
 DECAY_SCALE_DAYS = float(os.getenv('DECAY_SCALE_DAYS', '90'))
 USE_NATIVE_DECAY = os.getenv('USE_NATIVE_DECAY', 'false').lower() == 'true'
-# Embedding configuration
-PREFER_LOCAL_EMBEDDINGS = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'false').lower() == 'true'
+# Embedding configuration - now using lazy initialization
+# CRITICAL: Default changed to 'true' for local embeddings for privacy
+PREFER_LOCAL_EMBEDDINGS = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'true').lower() == 'true'
 EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
-# Initialize Voyage AI client (only if not using local embeddings)
-voyage_client = None
-if not PREFER_LOCAL_EMBEDDINGS and VOYAGE_API_KEY:
-    voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY)
+# Import the robust embedding manager
+from .embedding_manager import get_embedding_manager
-# Initialize local embedding model if needed
-local_embedding_model = None
-if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
+# Lazy initialization - models will be loaded on first use
+embedding_manager = None
+voyage_client = None  # Keep for backward compatibility
+local_embedding_model = None  # Keep for backward compatibility
+def initialize_embeddings():
+    """Initialize embedding models with robust fallback."""
+    global embedding_manager, voyage_client, local_embedding_model
     try:
-        from fastembed import TextEmbedding
-        local_embedding_model = TextEmbedding(model_name=EMBEDDING_MODEL)
-        print(f"[DEBUG] Initialized local embedding model: {EMBEDDING_MODEL}")
-    except ImportError:
-        print("[ERROR] FastEmbed not available. Install with: pip install fastembed")
-        raise
+        embedding_manager = get_embedding_manager()
+        print(f"[INFO] Embedding manager initialized: {embedding_manager.get_model_info()}")
+        # Set backward compatibility references
+        if embedding_manager.model_type == 'voyage':
+            voyage_client = embedding_manager.voyage_client
+        elif embedding_manager.model_type == 'local':
+            local_embedding_model = embedding_manager.model
+        return True
+    except Exception as e:
+        print(f"[ERROR] Failed to initialize embeddings: {e}")
+        return False
 # Debug environment loading
 print(f"[DEBUG] Environment variables loaded:")
@@ -88,6 +99,7 @@ class SearchResult(BaseModel):
     excerpt: str
     project_name: str
     conversation_id: Optional[str] = None
+    base_conversation_id: Optional[str] = None
     collection_name: str
     raw_payload: Optional[Dict[str, Any]] = None  # Full Qdrant payload when debug mode enabled
@@ -100,6 +112,99 @@ mcp = FastMCP(
 # Create Qdrant client
 qdrant_client = AsyncQdrantClient(url=QDRANT_URL)
+# Track indexing status (updated periodically)
+indexing_status = {
+    "last_check": 0,
+    "indexed_conversations": 0,
+    "total_conversations": 0,
+    "percentage": 100.0,
+    "backlog_count": 0,
+    "is_checking": False
+}
+async def update_indexing_status():
+    """Update indexing status by checking JSONL files vs Qdrant collections.
+    This is a lightweight check that compares file counts, not full content."""
+    global indexing_status
+    # Don't run concurrent checks
+    if indexing_status["is_checking"]:
+        return
+    # Only check every 5 minutes to avoid overhead
+    current_time = time.time()
+    if current_time - indexing_status["last_check"] < 300:  # 5 minutes
+        return
+    indexing_status["is_checking"] = True
+    try:
+        # Count total JSONL files
+        projects_dir = Path.home() / ".claude" / "projects"
+        total_files = 0
+        indexed_files = 0
+        if projects_dir.exists():
+            # Get all JSONL files
+            jsonl_files = list(projects_dir.glob("**/*.jsonl"))
+            total_files = len(jsonl_files)
+            # Check imported-files.json to see what's been imported
+            # The streaming importer uses imported-files.json with nested structure
+            # Try multiple possible locations for the config file
+            possible_paths = [
+                Path.home() / ".claude-self-reflect" / "config" / "imported-files.json",
+                Path(__file__).parent.parent.parent / "config" / "imported-files.json",
+                Path("/config/imported-files.json")  # Docker path if running in container
+            ]
+            imported_files_path = None
+            for path in possible_paths:
+                if path.exists():
+                    imported_files_path = path
+                    break
+            if imported_files_path and imported_files_path.exists():
+                with open(imported_files_path, 'r') as f:
+                    imported_data = json.load(f)
+                    # The file has nested structure: {stream_position: {file: position}, imported_files: {file: lines}}
+                    # Handle new nested structure
+                    stream_position = imported_data.get("stream_position", {})
+                    imported_files_list = stream_position.get("imported_files", [])
+                    file_metadata = stream_position.get("file_metadata", {})
+                    # Count files that have been imported
+                    for file_path in jsonl_files:
+                        # Try multiple path formats to match Docker's state file
+                        file_str = str(file_path).replace(str(Path.home()), "/logs").replace("\\", "/")
+                        # Also try without .claude/projects prefix (Docker mounts directly)
+                        file_str_alt = file_str.replace("/.claude/projects", "")
+                        # Check if file is in imported_files list (fully imported)
+                        if file_str in imported_files_list or file_str_alt in imported_files_list:
+                            indexed_files += 1
+                        # Or if it has metadata with position > 0 (partially imported)
+                        elif file_str in file_metadata and file_metadata[file_str].get("position", 0) > 0:
+                            indexed_files += 1
+                        elif file_str_alt in file_metadata and file_metadata[file_str_alt].get("position", 0) > 0:
+                            indexed_files += 1
+        # Update status
+        indexing_status["last_check"] = current_time
+        indexing_status["total_conversations"] = total_files
+        indexing_status["indexed_conversations"] = indexed_files
+        indexing_status["backlog_count"] = total_files - indexed_files
+        if total_files > 0:
+            indexing_status["percentage"] = (indexed_files / total_files) * 100
+        else:
+            indexing_status["percentage"] = 100.0
+    except Exception as e:
+        print(f"[WARNING] Failed to update indexing status: {e}")
+    finally:
+        indexing_status["is_checking"] = False
 async def get_all_collections() -> List[str]:
     """Get all collections (both Voyage and local)."""
@@ -115,12 +220,23 @@ async def generate_embedding(text: str, force_type: Optional[str] = None) -> Lis
         text: Text to embed
         force_type: Force specific embedding type ('local' or 'voyage')
     """
-    use_local = force_type == 'local' if force_type else (PREFER_LOCAL_EMBEDDINGS or not voyage_client)
+    global embedding_manager, voyage_client, local_embedding_model
+    # Initialize on first use
+    if embedding_manager is None:
+        if not initialize_embeddings():
+            raise RuntimeError("Failed to initialize any embedding model. Check logs for details.")
+    # Determine which type to use
+    if force_type:
+        use_local = force_type == 'local'
+    else:
+        use_local = embedding_manager.model_type == 'local'
     if use_local:
         # Use local embeddings
         if not local_embedding_model:
-            raise ValueError("Local embedding model not initialized")
+            raise ValueError("Local embedding model not available")
         # Run in executor since fastembed is synchronous
         loop = asyncio.get_event_loop()
@@ -131,7 +247,7 @@ async def generate_embedding(text: str, force_type: Optional[str] = None) -> Lis
     else:
         # Use Voyage AI
         if not voyage_client:
-            raise ValueError("Voyage client not initialized")
+            raise ValueError("Voyage client not available")
         result = voyage_client.embed(
             texts=[text],
             model="voyage-3-large",
@@ -417,6 +533,7 @@ async def reflect_on_past(
                             excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
                             project_name=point_project,
                             conversation_id=point.payload.get('conversation_id'),
+                            base_conversation_id=point.payload.get('base_conversation_id'),
                             collection_name=collection_name,
                             raw_payload=point.payload if include_raw else None
                         ))
@@ -496,6 +613,7 @@ async def reflect_on_past(
                             excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
                             project_name=point_project,
                             conversation_id=point.payload.get('conversation_id'),
+                            base_conversation_id=point.payload.get('base_conversation_id'),
                             collection_name=collection_name,
                             raw_payload=point.payload if include_raw else None
                         ))
@@ -532,6 +650,7 @@ async def reflect_on_past(
                             excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
                             project_name=point_project,
                             conversation_id=point.payload.get('conversation_id'),
+                            base_conversation_id=point.payload.get('base_conversation_id'),
                             collection_name=collection_name,
                             raw_payload=point.payload if include_raw else None
                         ))
@@ -552,6 +671,30 @@ async def reflect_on_past(
             message="Search complete, processing results"
         )
+        # Apply base_conversation_id boosting before sorting
+        timing_info['boost_start'] = time.time()
+        # Group results by base_conversation_id to identify related chunks
+        base_conversation_groups = {}
+        for result in all_results:
+            base_id = result.base_conversation_id
+            if base_id:
+                if base_id not in base_conversation_groups:
+                    base_conversation_groups[base_id] = []
+                base_conversation_groups[base_id].append(result)
+        # Apply boost to results from base conversations with multiple high-scoring chunks
+        base_conversation_boost = 0.1  # Boost factor for base conversation matching
+        for base_id, group_results in base_conversation_groups.items():
+            if len(group_results) > 1:  # Multiple chunks from same base conversation
+                avg_score = sum(r.score for r in group_results) / len(group_results)
+                if avg_score > 0.8:  # Only boost high-quality base conversations
+                    for result in group_results:
+                        result.score += base_conversation_boost
+                        await ctx.debug(f"Boosted result from base_conversation_id {base_id}: {result.score:.3f}")
+        timing_info['boost_end'] = time.time()
         # Sort by score and limit
         timing_info['sort_start'] = time.time()
         all_results.sort(key=lambda x: x.score, reverse=True)
@@ -561,12 +704,89 @@ async def reflect_on_past(
         if not all_results:
             return f"No conversations found matching '{query}'. Try different keywords or check if conversations have been imported."
+        # Update indexing status before returning results
+        await update_indexing_status()
         # Format results based on response_format
         timing_info['format_start'] = time.time()
         if response_format == "xml":
+            # Add upfront summary for immediate visibility (before collapsible XML)
+            upfront_summary = ""
+            # Show indexing status prominently
+            if indexing_status["percentage"] < 95.0:
+                upfront_summary += f"📊 INDEXING: {indexing_status['indexed_conversations']}/{indexing_status['total_conversations']} conversations ({indexing_status['percentage']:.1f}% complete, {indexing_status['backlog_count']} pending)\n"
+            # Show result summary
+            if all_results:
+                score_info = "high" if all_results[0].score >= 0.85 else "good" if all_results[0].score >= 0.75 else "partial"
+                upfront_summary += f"🎯 RESULTS: {len(all_results)} matches ({score_info} relevance, top score: {all_results[0].score:.3f})\n"
+                # Show performance
+                total_time = time.time() - start_time
+                upfront_summary += f"⚡ PERFORMANCE: {int(total_time * 1000)}ms total ({len(collections_to_search)} collections searched)\n"
+            else:
+                upfront_summary += f"❌ NO RESULTS: No conversations found matching '{query}'\n"
             # XML format (compact tags for performance)
-            result_text = "<search>\n"
+            result_text = upfront_summary + "\n<search>\n"
+            # Add indexing status if not fully baselined - put key stats in opening tag for immediate visibility
+            if indexing_status["percentage"] < 95.0:
+                result_text += f'  <info status="indexing" progress="{indexing_status["percentage"]:.1f}%" backlog="{indexing_status["backlog_count"]}">\n'
+                result_text += f'    <message>📊 Indexing: {indexing_status["indexed_conversations"]}/{indexing_status["total_conversations"]} conversations ({indexing_status["percentage"]:.1f}% complete, {indexing_status["backlog_count"]} pending)</message>\n'
+                result_text += f"  </info>\n"
+            # Add high-level result summary
+            if all_results:
+                # Count today's results
+                now = datetime.now(timezone.utc)
+                today_count = 0
+                yesterday_count = 0
+                week_count = 0
+                for result in all_results:
+                    timestamp_clean = result.timestamp.replace('Z', '+00:00') if result.timestamp.endswith('Z') else result.timestamp
+                    timestamp_dt = datetime.fromisoformat(timestamp_clean)
+                    if timestamp_dt.tzinfo is None:
+                        timestamp_dt = timestamp_dt.replace(tzinfo=timezone.utc)
+                    days_ago = (now - timestamp_dt).days
+                    if days_ago == 0:
+                        today_count += 1
+                    elif days_ago == 1:
+                        yesterday_count += 1
+                    if days_ago <= 7:
+                        week_count += 1
+                # Compact summary with key info in opening tag
+                time_info = ""
+                if today_count > 0:
+                    time_info = f"{today_count} today"
+                elif yesterday_count > 0:
+                    time_info = f"{yesterday_count} yesterday"
+                elif week_count > 0:
+                    time_info = f"{week_count} this week"
+                else:
+                    time_info = "older results"
+                score_info = "high" if all_results[0].score >= 0.85 else "good" if all_results[0].score >= 0.75 else "partial"
+                result_text += f'  <summary count="{len(all_results)}" relevance="{score_info}" recency="{time_info}" top-score="{all_results[0].score:.3f}">\n'
+                # Short preview of top result
+                top_excerpt = all_results[0].excerpt[:100].strip()
+                if '...' not in top_excerpt:
+                    top_excerpt += "..."
+                result_text += f'    <preview>{top_excerpt}</preview>\n'
+                result_text += f"  </summary>\n"
+            else:
+                result_text += f"  <result-summary>\n"
+                result_text += f"    <headline>No matches found</headline>\n"
+                result_text += f"    <relevance>No conversations matched your query</relevance>\n"
+                result_text += f"  </result-summary>\n"
             result_text += f"  <meta>\n"
             result_text += f"    <q>{query}</q>\n"
             result_text += f"    <scope>{target_project if target_project != 'all' else 'all'}</scope>\n"

package/mcp-server/src/status.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""Ultra-fast status checker for Claude Self Reflect indexing progress.
+This module provides lightweight indexing status without loading heavy MCP dependencies.
+Designed for <20ms execution time to support status bars and shell scripts.
+"""
+import json
+from pathlib import Path
+from collections import defaultdict
+def extract_project_name_from_path(file_path: str) -> str:
+    """Extract project name from JSONL file path.
+    Handles paths like:
+    - ~/.claude/projects/-Users-ramakrishnanannaswamy-projects-claude-self-reflect/file.jsonl
+    - /logs/-Users-ramakrishnanannaswamy-projects-n8n-builder/file.jsonl
+    """
+    # Get the directory name containing the JSONL file
+    path_obj = Path(file_path)
+    dir_name = path_obj.parent.name
+    # Extract project name from dash-encoded path
+    # Format: -Users-username-projects-PROJECT_NAME (PROJECT_NAME can have dashes)
+    if dir_name.startswith('-') and 'projects' in dir_name:
+        parts = dir_name.split('-')
+        # Find 'projects' and take everything after it as the project name
+        try:
+            projects_idx = parts.index('projects')
+            if projects_idx + 1 < len(parts):
+                # Join all parts after 'projects' to handle multi-part project names
+                # like "claude-self-reflect", "n8n-builder", etc.
+                project_parts = parts[projects_idx + 1:]
+                return '-'.join(project_parts)
+        except ValueError:
+            pass
+    # Fallback: use the directory name as-is
+    return dir_name.lstrip('-')
+def get_status() -> dict:
+    """Get indexing status with overall stats and per-project breakdown.
+    Returns:
+        dict: JSON structure with overall and per-project indexing status
+    """
+    projects_dir = Path.home() / ".claude" / "projects"
+    project_stats = defaultdict(lambda: {"indexed": 0, "total": 0})
+    # Count total JSONL files per project
+    if projects_dir.exists():
+        for jsonl_file in projects_dir.glob("**/*.jsonl"):
+            project_name = extract_project_name_from_path(str(jsonl_file))
+            project_stats[project_name]["total"] += 1
+    # Read imported-files.json to count indexed files per project
+    config_paths = [
+        Path.home() / ".claude-self-reflect" / "config" / "imported-files.json",
+        Path(__file__).parent.parent.parent / "config" / "imported-files.json",
+        Path("/config/imported-files.json")  # Docker path
+    ]
+    imported_files_path = None
+    for path in config_paths:
+        if path.exists():
+            imported_files_path = path
+            break
+    if imported_files_path:
+        try:
+            with open(imported_files_path, 'r') as f:
+                data = json.load(f)
+                # Handle both old and new config file formats
+                if "stream_position" in data:
+                    # New format with stream_position
+                    stream_pos = data.get("stream_position", {})
+                    imported_files = stream_pos.get("imported_files", [])
+                    file_metadata = stream_pos.get("file_metadata", {})
+                    # Count fully imported files
+                    for file_path in imported_files:
+                        project_name = extract_project_name_from_path(file_path)
+                        project_stats[project_name]["indexed"] += 1
+                    # Count partially imported files (files with position > 0)
+                    for file_path, metadata in file_metadata.items():
+                        if isinstance(metadata, dict) and metadata.get("position", 0) > 0:
+                            # Only count if not already in imported_files
+                            if file_path not in imported_files:
+                                project_name = extract_project_name_from_path(file_path)
+                                project_stats[project_name]["indexed"] += 1
+                else:
+                    # Legacy format with imported_files as top-level object
+                    imported_files = data.get("imported_files", {})
+                    # Count all files in imported_files object (they are all fully imported)
+                    for file_path in imported_files.keys():
+                        project_name = extract_project_name_from_path(file_path)
+                        project_stats[project_name]["indexed"] += 1
+        except (json.JSONDecodeError, KeyError, OSError):
+            # If config file is corrupted or unreadable, continue with zero indexed counts
+            pass
+    # Calculate overall stats
+    total_all = sum(p["total"] for p in project_stats.values())
+    indexed_all = sum(p["indexed"] for p in project_stats.values())
+    # Build response structure
+    result = {
+        "overall": {
+            "percentage": round((indexed_all / total_all * 100) if total_all > 0 else 100.0, 1),
+            "indexed": indexed_all,
+            "total": total_all,
+            "backlog": total_all - indexed_all
+        },
+        "projects": {}
+    }
+    # Add per-project stats with percentages
+    for project, stats in project_stats.items():
+        result["projects"][project] = {
+            "percentage": round((stats["indexed"] / stats["total"] * 100) if stats["total"] > 0 else 100.0, 1),
+            "indexed": stats["indexed"],
+            "total": stats["total"]
+        }
+    return result
+if __name__ == "__main__":
+    # Allow running as standalone script for testing
+    import sys
+    print(json.dumps(get_status(), indent=2))

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-self-reflect",
-  "version": "2.5.10",
+  "version": "2.5.12",
   "description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
   "keywords": [
     "claude",