npm - claude-self-reflect - Versions diffs - 2.7.3 → 2.8.0 - Mend

claude-self-reflect 2.7.3 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/Dockerfile.safe-watcher +6 -3
package/README.md +11 -4
package/docker-compose.yaml +19 -10
package/installer/setup-wizard-docker.js +24 -1
package/mcp-server/src/server.py +122 -25
package/mcp-server/src/status.py +30 -1
package/package.json +1 -1
package/scripts/delta-metadata-update-safe.py +3 -2
package/scripts/import-conversations-unified.py +3 -1

package/Dockerfile.safe-watcher CHANGED Viewed

@@ -30,8 +30,11 @@ RUN mkdir -p /root/.cache/fastembed && \
 # Set working directory
 WORKDIR /app
-# Copy scripts
-COPY scripts/ /scripts/
+# Copy application scripts
+COPY scripts/ /app/scripts/
+# Make watcher-loop.sh executable
+RUN chmod +x /app/scripts/watcher-loop.sh
 # Create config directory
 RUN mkdir -p /config
@@ -41,4 +44,4 @@ ENV PYTHONUNBUFFERED=1
 ENV MALLOC_ARENA_MAX=2
 # Run the watcher loop
-CMD ["/scripts/watcher-loop.sh"]
+CMD ["/app/scripts/watcher-loop.sh"]

package/README.md CHANGED Viewed

@@ -149,10 +149,17 @@ Here's how your conversations get imported and prioritized:
 ![Import Architecture](docs/diagrams/import-architecture.png)
-**The system intelligently prioritizes your conversations:**
-- **HOT** (< 5 minutes): Switches to 2-second intervals for near real-time import
-- **🌡️ WARM** (< 24 hours): Normal priority, processed every 60 seconds
-- **❄️ COLD** (> 24 hours): Batch processed, max 5 per cycle to prevent blocking
+**The system intelligently processes your conversations:**
+- Runs every 60 seconds checking for new conversations
+- Processes newest conversations first (delta import pattern)
+- Maintains low memory usage (<50MB) through streaming
+- Handles up to 5 files per cycle to prevent blocking
+**HOT/WARM/COLD Intelligent Prioritization:**
+- **🔥 HOT** (< 5 minutes): Switches to 2-second intervals for near real-time import
+- **🌡️ WARM** (< 24 hours): Normal priority with starvation prevention (urgent after 30 min wait)
+- **❄️ COLD** (> 24 hours): Batch processed, max 5 per cycle to prevent blocking new content
+- Files are categorized by age and processed with priority queuing to ensure newest content gets imported quickly while preventing older files from being starved
 ## Using It

package/docker-compose.yaml CHANGED Viewed

@@ -42,6 +42,7 @@ services:
     environment:
       - QDRANT_URL=http://qdrant:6333
       - STATE_FILE=/config/imported-files.json
+      - LOGS_DIR=/logs
       - OPENAI_API_KEY=${OPENAI_API_KEY:-}
       - VOYAGE_API_KEY=${VOYAGE_API_KEY:-}
       - VOYAGE_KEY=${VOYAGE_KEY:-}
@@ -176,21 +177,29 @@ services:
       - ./scripts:/scripts:ro
     environment:
       - QDRANT_URL=http://qdrant:6333
-      - STATE_FILE=/config/watcher-state.json
+      - STATE_FILE=/config/csr-watcher.json
+      - LOGS_DIR=/logs  # Fixed: Point to mounted volume
       - VOYAGE_KEY=${VOYAGE_KEY:-}
       - PREFER_LOCAL_EMBEDDINGS=${PREFER_LOCAL_EMBEDDINGS:-true}
-      - HOT_WINDOW_MINUTES=${HOT_WINDOW_MINUTES:-15}
-      - MAX_COLD_FILES_PER_CYCLE=${MAX_COLD_FILES_PER_CYCLE:-3}
-      - MAX_MEMORY_MB=${MAX_MEMORY_MB:-300}
-      - WATCH_INTERVAL_SECONDS=${WATCH_INTERVAL_SECONDS:-30}
-      - MAX_FILES_PER_CYCLE=${MAX_FILES_PER_CYCLE:-10}
+      - ENABLE_MEMORY_DECAY=${ENABLE_MEMORY_DECAY:-false}
+      - DECAY_WEIGHT=${DECAY_WEIGHT:-0.3}
+      - DECAY_SCALE_DAYS=${DECAY_SCALE_DAYS:-90}
+      - CHECK_INTERVAL_S=${CHECK_INTERVAL_S:-60}
+      - HOT_CHECK_INTERVAL_S=${HOT_CHECK_INTERVAL_S:-2}
+      - HOT_WINDOW_MINUTES=${HOT_WINDOW_MINUTES:-5}
+      - WARM_WINDOW_HOURS=${WARM_WINDOW_HOURS:-24}
+      - MAX_COLD_FILES=${MAX_COLD_FILES:-5}
+      - MAX_WARM_WAIT_MINUTES=${MAX_WARM_WAIT_MINUTES:-30}
+      - MAX_MESSAGES_PER_CHUNK=${MAX_MESSAGES_PER_CHUNK:-10}
       - MAX_CHUNK_SIZE=${MAX_CHUNK_SIZE:-50}  # Messages per chunk for streaming
+      - MEMORY_LIMIT_MB=${MEMORY_LIMIT_MB:-1000}
+      - MEMORY_WARNING_MB=${MEMORY_WARNING_MB:-500}
       - PYTHONUNBUFFERED=1
       - MALLOC_ARENA_MAX=2
-    restart: "no"  # Manual start only - prevent system overload
-    profiles: ["safe-watch"]  # Requires explicit profile to run
-    mem_limit: 600m  # Increased from 400m to handle large files safely
-    memswap_limit: 600m
+    restart: unless-stopped
+    profiles: ["safe-watch", "watch"]  # Requires explicit profile to run
+    mem_limit: 1g  # Increased to 1GB to match MEMORY_LIMIT_MB
+    memswap_limit: 1g
     cpus: 1.0  # Single CPU core limit
   # MCP server for Claude integration

package/installer/setup-wizard-docker.js CHANGED Viewed

@@ -454,6 +454,26 @@ async function enrichMetadata() {
   }
 }
+async function startWatcher() {
+  console.log('\n🔄 Starting the streaming watcher...');
+  console.log('   • HOT files (<5 min): 2-second processing');
+  console.log('   • WARM files (<24 hrs): Normal priority');
+  console.log('   • COLD files (>24 hrs): Batch processing');
+  try {
+    safeExec('docker', ['compose', '--profile', 'watch', 'up', '-d', 'safe-watcher'], {
+      cwd: projectRoot,
+      stdio: 'inherit'
+    });
+    console.log('✅ Watcher started successfully!');
+    return true;
+  } catch (error) {
+    console.log('⚠️  Could not start watcher automatically');
+    console.log('   You can start it manually with: docker compose --profile watch up -d');
+    return false;
+  }
+}
 async function showFinalInstructions() {
   console.log('\n✅ Setup complete!');
@@ -461,7 +481,7 @@ async function showFinalInstructions() {
   console.log('   • 🌐 Qdrant Dashboard: http://localhost:6333/dashboard/');
   console.log('   • 📊 Status: All services running');
   console.log('   • 🔍 Search: Semantic search with memory decay enabled');
-  console.log('   • 🚀 Import: Watcher checking every 60 seconds');
+  console.log('   • 🚀 Watcher: HOT/WARM/COLD prioritization active');
   console.log('\n📋 Quick Reference Commands:');
   console.log('   • Check status: docker compose ps');
@@ -568,6 +588,9 @@ async function main() {
   // Enrich metadata (new in v2.5.19)
   await enrichMetadata();
+  // Start the watcher
+  await startWatcher();
   // Show final instructions
   await showFinalInstructions();

package/mcp-server/src/server.py CHANGED Viewed

@@ -9,6 +9,7 @@ import json
 import numpy as np
 import hashlib
 import time
+import logging
 from fastmcp import FastMCP, Context
 from .utils import normalize_project_name
@@ -124,18 +125,48 @@ indexing_status = {
     "is_checking": False
 }
-async def update_indexing_status():
+# Cache for indexing status (5-second TTL)
+_indexing_cache = {"result": None, "timestamp": 0}
+# Setup logger
+logger = logging.getLogger(__name__)
+def normalize_path(path_str: str) -> str:
+    """Normalize path for consistent comparison across platforms.
+    Args:
+        path_str: Path string to normalize
+    Returns:
+        Normalized path string with consistent separators
+    """
+    if not path_str:
+        return path_str
+    p = Path(path_str).expanduser().resolve()
+    return str(p).replace('\\', '/')  # Consistent separators for all platforms
+async def update_indexing_status(cache_ttl: int = 5):
     """Update indexing status by checking JSONL files vs Qdrant collections.
-    This is a lightweight check that compares file counts, not full content."""
-    global indexing_status
+    This is a lightweight check that compares file counts, not full content.
+    Args:
+        cache_ttl: Cache time-to-live in seconds (default: 5)
+    """
+    global indexing_status, _indexing_cache
+    # Check cache first (5-second TTL to prevent performance issues)
+    current_time = time.time()
+    if _indexing_cache["result"] and current_time - _indexing_cache["timestamp"] < cache_ttl:
+        # Use cached result
+        indexing_status = _indexing_cache["result"].copy()
+        return
     # Don't run concurrent checks
     if indexing_status["is_checking"]:
         return
-    # Only check every 5 minutes to avoid overhead
-    current_time = time.time()
-    if current_time - indexing_status["last_check"] < 300:  # 5 minutes
+    # Check immediately on first call, then every 60 seconds to avoid overhead
+    if indexing_status["last_check"] > 0 and current_time - indexing_status["last_check"] < 60:  # 1 minute
         return
     indexing_status["is_checking"] = True
@@ -151,46 +182,107 @@ async def update_indexing_status():
             jsonl_files = list(projects_dir.glob("**/*.jsonl"))
             total_files = len(jsonl_files)
-            # Check imported-files.json to see what's been imported
-            # The streaming importer uses imported-files.json with nested structure
-            # Try multiple possible locations for the config file
+            # Check imported-files.json AND watcher state files to see what's been imported
+            # The system uses multiple state files that need to be merged
+            all_imported_files = set()  # Use set to avoid duplicates
+            file_metadata = {}
+            # 1. Check imported-files.json (batch importer)
             possible_paths = [
                 Path.home() / ".claude-self-reflect" / "config" / "imported-files.json",
                 Path(__file__).parent.parent.parent / "config" / "imported-files.json",
                 Path("/config/imported-files.json")  # Docker path if running in container
             ]
-            imported_files_path = None
             for path in possible_paths:
                 if path.exists():
-                    imported_files_path = path
-                    break
+                    try:
+                        with open(path, 'r') as f:
+                            imported_data = json.load(f)
+                            imported_files_dict = imported_data.get("imported_files", {})
+                            file_metadata.update(imported_data.get("file_metadata", {}))
+                            # Normalize paths before adding to set
+                            normalized_files = {normalize_path(k) for k in imported_files_dict.keys()}
+                            all_imported_files.update(normalized_files)
+                    except (json.JSONDecodeError, IOError) as e:
+                        logger.debug(f"Failed to read state file {path}: {e}")
+                        pass  # Continue if file is corrupted
-            if imported_files_path and imported_files_path.exists():
-                with open(imported_files_path, 'r') as f:
-                    imported_data = json.load(f)
-                    # The actual structure has imported_files and file_metadata at the top level
-                    # NOT nested under stream_position as previously assumed
-                    imported_files_dict = imported_data.get("imported_files", {})
-                    file_metadata = imported_data.get("file_metadata", {})
-                    # Convert dict keys to list for compatibility with existing logic
-                    imported_files_list = list(imported_files_dict.keys())
+            # 2. Check csr-watcher.json (streaming watcher - local mode)
+            watcher_paths = [
+                Path.home() / ".claude-self-reflect" / "config" / "csr-watcher.json",
+                Path("/config/csr-watcher.json")  # Docker path
+            ]
+            for path in watcher_paths:
+                if path.exists():
+                    try:
+                        with open(path, 'r') as f:
+                            watcher_data = json.load(f)
+                            watcher_files = watcher_data.get("imported_files", {})
+                            # Normalize paths before adding to set
+                            normalized_files = {normalize_path(k) for k in watcher_files.keys()}
+                            all_imported_files.update(normalized_files)
+                            # Add to metadata with normalized paths
+                            for file_path, info in watcher_files.items():
+                                normalized = normalize_path(file_path)
+                                if normalized not in file_metadata:
+                                    file_metadata[normalized] = {
+                                        "position": 1,
+                                        "chunks": info.get("chunks", 0)
+                                    }
+                    except (json.JSONDecodeError, IOError) as e:
+                        logger.debug(f"Failed to read watcher state file {path}: {e}")
+                        pass  # Continue if file is corrupted
+            # 3. Check csr-watcher-cloud.json (streaming watcher - cloud mode)
+            cloud_watcher_path = Path.home() / ".claude-self-reflect" / "config" / "csr-watcher-cloud.json"
+            if cloud_watcher_path.exists():
+                try:
+                    with open(cloud_watcher_path, 'r') as f:
+                        cloud_data = json.load(f)
+                        cloud_files = cloud_data.get("imported_files", {})
+                        # Normalize paths before adding to set
+                        normalized_files = {normalize_path(k) for k in cloud_files.keys()}
+                        all_imported_files.update(normalized_files)
+                        # Add to metadata with normalized paths
+                        for file_path, info in cloud_files.items():
+                            normalized = normalize_path(file_path)
+                            if normalized not in file_metadata:
+                                file_metadata[normalized] = {
+                                    "position": 1,
+                                    "chunks": info.get("chunks", 0)
+                                }
+                except (json.JSONDecodeError, IOError) as e:
+                    logger.debug(f"Failed to read cloud watcher state file {cloud_watcher_path}: {e}")
+                    pass  # Continue if file is corrupted
+            # Convert set to list for compatibility
+            imported_files_list = list(all_imported_files)
                     # Count files that have been imported
                     for file_path in jsonl_files:
+                        # Normalize the current file path for consistent comparison
+                        normalized_file = normalize_path(str(file_path))
                         # Try multiple path formats to match Docker's state file
                         file_str = str(file_path).replace(str(Path.home()), "/logs").replace("\\", "/")
                         # Also try without .claude/projects prefix (Docker mounts directly)
                         file_str_alt = file_str.replace("/.claude/projects", "")
+                        # Normalize alternative paths as well
+                        normalized_alt = normalize_path(file_str)
+                        normalized_alt2 = normalize_path(file_str_alt)
                         # Check if file is in imported_files list (fully imported)
-                        if file_str in imported_files_list or file_str_alt in imported_files_list:
+                        if normalized_file in imported_files_list or normalized_alt in imported_files_list or normalized_alt2 in imported_files_list:
                             indexed_files += 1
                         # Or if it has metadata with position > 0 (partially imported)
-                        elif file_str in file_metadata and file_metadata[file_str].get("position", 0) > 0:
+                        elif normalized_file in file_metadata and file_metadata[normalized_file].get("position", 0) > 0:
+                            indexed_files += 1
+                        elif normalized_alt in file_metadata and file_metadata[normalized_alt].get("position", 0) > 0:
                             indexed_files += 1
-                        elif file_str_alt in file_metadata and file_metadata[file_str_alt].get("position", 0) > 0:
+                        elif normalized_alt2 in file_metadata and file_metadata[normalized_alt2].get("position", 0) > 0:
                             indexed_files += 1
         # Update status
@@ -203,9 +295,14 @@ async def update_indexing_status():
             indexing_status["percentage"] = (indexed_files / total_files) * 100
         else:
             indexing_status["percentage"] = 100.0
+        # Update cache
+        _indexing_cache["result"] = indexing_status.copy()
+        _indexing_cache["timestamp"] = current_time
     except Exception as e:
         print(f"[WARNING] Failed to update indexing status: {e}")
+        logger.error(f"Failed to update indexing status: {e}", exc_info=True)
     finally:
         indexing_status["is_checking"] = False

package/mcp-server/src/status.py CHANGED Viewed

@@ -5,6 +5,7 @@ Designed for <20ms execution time to support status bars and shell scripts.
 """
 import json
+import time
 from pathlib import Path
 from collections import defaultdict
@@ -53,11 +54,36 @@ def normalize_file_path(file_path: str) -> str:
     return file_path
+def get_watcher_status() -> dict:
+    """Get streaming watcher status if available."""
+    watcher_state_file = Path.home() / "config" / "csr-watcher.json"
+    if not watcher_state_file.exists():
+        return {"running": False, "status": "not configured"}
+    try:
+        with open(watcher_state_file) as f:
+            state = json.load(f)
+        # Check if watcher is active (modified recently)
+        file_age = time.time() - watcher_state_file.stat().st_mtime
+        is_active = file_age < 120  # Active if updated in last 2 minutes
+        return {
+            "running": is_active,
+            "files_processed": len(state.get("imported_files", {})),
+            "last_update_seconds": int(file_age),
+            "status": "🟢 active" if is_active else "🔴 inactive"
+        }
+    except:
+        return {"running": False, "status": "error reading state"}
 def get_status() -> dict:
     """Get indexing status with overall stats and per-project breakdown.
     Returns:
-        dict: JSON structure with overall and per-project indexing status
+        dict: JSON structure with overall and per-project indexing status, plus watcher status
     """
     projects_dir = Path.home() / ".claude" / "projects"
     project_stats = defaultdict(lambda: {"indexed": 0, "total": 0})
@@ -154,6 +180,9 @@ def get_status() -> dict:
             "total": stats["total"]
         }
+    # Add watcher status
+    result["watcher"] = get_watcher_status()
     return result

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-self-reflect",
-  "version": "2.7.3",
+  "version": "2.8.0",
   "description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
   "keywords": [
     "claude",

package/scripts/delta-metadata-update-safe.py CHANGED Viewed

@@ -24,7 +24,8 @@ from qdrant_client.models import Filter, FieldCondition, MatchValue
 # Configuration
 QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
 LOGS_DIR = os.getenv("LOGS_DIR", os.path.expanduser("~/.claude/projects"))
-STATE_FILE = os.getenv("STATE_FILE", "./config/delta-update-state.json")
+# Use /config path if running in Docker, otherwise use ./config
+STATE_FILE = os.getenv("STATE_FILE", "/config/delta-update-state.json" if os.path.exists("/config") else "./config/delta-update-state.json")
 PREFER_LOCAL_EMBEDDINGS = os.getenv("PREFER_LOCAL_EMBEDDINGS", "true").lower() == "true"
 DRY_RUN = os.getenv("DRY_RUN", "false").lower() == "true"
 DAYS_TO_UPDATE = int(os.getenv("DAYS_TO_UPDATE", "7"))
@@ -432,7 +433,7 @@ async def main_async():
     logger.info("=== Delta Update Complete ===")
     logger.info(f"Successfully updated: {success_count} conversations")
     logger.info(f"Failed: {failed_count} conversations")
-    logger.info(f"Total conversations in state: {len(state['updated_conversations'])}")
+    logger.info(f"Total conversations in state: {len(state.get('updated_conversations', {}))}")
 def main():
     """Entry point."""

package/scripts/import-conversations-unified.py CHANGED Viewed

@@ -57,7 +57,9 @@ else:
 def normalize_project_name(project_name: str) -> str:
     """Normalize project name for consistency."""
-    return project_name.replace("-Users-ramakrishnanannaswamy-projects-", "").replace("-", "_").lower()
+    # For compatibility with delta-metadata-update, just use the project name as-is
+    # This ensures collection names match between import and delta update scripts
+    return project_name
 def get_collection_name(project_path: Path) -> str:
     """Generate collection name from project path."""