claude-self-reflect 2.5.10 → 2.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ FROM python:3.11-slim
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && apt-get install -y \
5
+ gcc \
6
+ g++ \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Set working directory
10
+ WORKDIR /app
11
+
12
+ # Copy requirements
13
+ COPY scripts/requirements.txt /app/
14
+
15
+ # Install Python dependencies
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy the streaming importer script
19
+ COPY scripts/streaming-importer.py /scripts/
20
+
21
+ # Set environment variables for memory management
22
+ ENV MALLOC_ARENA_MAX=2
23
+ ENV PYTHONUNBUFFERED=1
24
+
25
+ # Run the streaming importer
26
+ CMD ["python", "/scripts/streaming-importer.py"]
package/README.md CHANGED
@@ -6,6 +6,8 @@ Claude forgets everything. This fixes that.
6
6
 
7
7
  Ask Claude about past conversations. Get actual answers. **100% local by default** - your conversations never leave your machine. Cloud-enhanced search available when you need it.
8
8
 
9
+ **✅ Proven at Scale**: Successfully indexed 682 conversation files with 100% reliability. No data loss, no corruption, just seamless conversation memory that works.
10
+
9
11
  **Before**: "I don't have access to previous conversations"
10
12
  **After**:
11
13
  ```
@@ -129,9 +131,10 @@ Claude: [Searches across ALL your projects]
129
131
  Recent conversations matter more. Old ones fade. Like your brain, but reliable.
130
132
 
131
133
  ### 🚀 Performance
132
- - **Search**: 200-350ms response time
133
- - **Import**: 2-second response for new conversations
134
+ - **Search**: 200-350ms response time across 682 indexed conversations
135
+ - **Import**: 2-second response for new conversations
134
136
  - **Memory**: 50MB operational target with smart chunking
137
+ - **Scale**: 100% indexing success rate across all conversation types
135
138
 
136
139
  ## The Technical Stack
137
140
 
@@ -150,13 +153,14 @@ Recent conversations matter more. Old ones fade. Like your brain, but reliable.
150
153
 
151
154
  ## What's New
152
155
 
156
+ - **v2.5.11** - Critical cloud mode fix - Environment variables now properly passed to MCP server
157
+ - **v2.5.10** - Emergency hotfix for MCP server startup failure (dead code removal)
153
158
  - **v2.5.6** - Tool Output Extraction - Captures git changes & tool outputs for cross-agent discovery
154
159
  - **v2.5.5** - Critical dependency fix & streaming importer enhancements
155
160
  - **v2.5.4** - Documentation & bug fixes (import path & state file compatibility)
156
161
  - **v2.5.3** - Streamlined README & import architecture diagram
157
162
  - **v2.5.2** - State file compatibility fix
158
163
  - **v2.4.5** - 10-40x performance boost
159
- - **v2.4.3** - Project-scoped search
160
164
 
161
165
  [Full changelog](docs/release-history.md)
162
166
 
@@ -8,7 +8,7 @@ services:
8
8
  command: chown -R 1000:1000 /config
9
9
  volumes:
10
10
  - ${CONFIG_PATH:-~/.claude-self-reflect/config}:/config
11
- profiles: ["watch", "mcp", "import"]
11
+ profiles: ["watch", "mcp", "import", "async"]
12
12
 
13
13
  # Qdrant vector database - the heart of semantic search
14
14
  qdrant:
@@ -104,18 +104,59 @@ services:
104
104
  - VOYAGE_API_KEY=${VOYAGE_API_KEY:-}
105
105
  - VOYAGE_KEY=${VOYAGE_KEY:-}
106
106
  - PREFER_LOCAL_EMBEDDINGS=${PREFER_LOCAL_EMBEDDINGS:-true}
107
- - WATCH_INTERVAL=${WATCH_INTERVAL:-5} # Testing with 5 second interval
108
- - MAX_MEMORY_MB=${MAX_MEMORY_MB:-350} # Total memory including model
109
- - OPERATIONAL_MEMORY_MB=${OPERATIONAL_MEMORY_MB:-100} # Memory for operations (increased for large file handling)
110
- - CHUNK_SIZE=${CHUNK_SIZE:-5}
107
+ - WATCH_INTERVAL=${WATCH_INTERVAL:-1} # Aggressive: 5x faster detection (minimum 1 second)
108
+ - MAX_MEMORY_MB=${MAX_MEMORY_MB:-2000} # Ultra conservative to prevent memory leak
109
+ - OPERATIONAL_MEMORY_MB=${OPERATIONAL_MEMORY_MB:-1500} # 1.5GB operational (25% of 8GB)
110
+ - CHUNK_SIZE=${CHUNK_SIZE:-5} # Minimal batch size
111
+ - HOT_WINDOW_MINUTES=${HOT_WINDOW_MINUTES:-15} # Keep files HOT longer
112
+ - MAX_COLD_FILES_PER_CYCLE=${MAX_COLD_FILES_PER_CYCLE:-5} # Single file processing
113
+ - PARALLEL_WORKERS=${PARALLEL_WORKERS:-8} # Enable parallel embedding workers
111
114
  - PYTHONUNBUFFERED=1
112
115
  - LOGS_DIR=/logs
113
116
  - FASTEMBED_CACHE_PATH=/root/.cache/fastembed
114
117
  - CURRENT_PROJECT_PATH=${PWD} # Pass current project path for prioritization
118
+ - MALLOC_ARENA_MAX=2 # MEMORY LEAK FIX: Limit glibc malloc arenas
119
+ - THREAD_POOL_WORKERS=${THREAD_POOL_WORKERS:-2} # AsyncEmbedder thread pool size (speed vs stability)
120
+ - THREAD_POOL_RECYCLE_FILES=${THREAD_POOL_RECYCLE_FILES:-50} # Files before recycling thread pool
115
121
  restart: unless-stopped
116
122
  profiles: ["watch"]
117
- mem_limit: 1g
118
- memswap_limit: 1g
123
+ mem_limit: 8g
124
+ memswap_limit: 8g
125
+
126
+ # Async streaming importer - Ground-up async rewrite
127
+ async-importer:
128
+ build:
129
+ context: .
130
+ dockerfile: Dockerfile.async-importer
131
+ container_name: claude-reflection-async
132
+ depends_on:
133
+ - qdrant
134
+ volumes:
135
+ - ${CLAUDE_LOGS_PATH:-~/.claude/projects}:/logs:ro
136
+ - ${CONFIG_PATH:-~/.claude-self-reflect/config}:/config
137
+ - ./scripts:/scripts:ro
138
+ environment:
139
+ - QDRANT_URL=http://qdrant:6333
140
+ - STATE_FILE=/config/imported-files.json
141
+ - VOYAGE_API_KEY=${VOYAGE_API_KEY:-}
142
+ - VOYAGE_KEY=${VOYAGE_KEY:-}
143
+ - PREFER_LOCAL_EMBEDDINGS=${PREFER_LOCAL_EMBEDDINGS:-true}
144
+ - WATCH_INTERVAL=${WATCH_INTERVAL:-5}
145
+ - MAX_MEMORY_MB=${MAX_MEMORY_MB:-2000}
146
+ - OPERATIONAL_MEMORY_MB=${OPERATIONAL_MEMORY_MB:-1500}
147
+ - CHUNK_SIZE=${CHUNK_SIZE:-5}
148
+ - HOT_WINDOW_MINUTES=${HOT_WINDOW_MINUTES:-15}
149
+ - MAX_COLD_FILES_PER_CYCLE=${MAX_COLD_FILES_PER_CYCLE:-5}
150
+ - THREAD_POOL_WORKERS=${THREAD_POOL_WORKERS:-2}
151
+ - PYTHONUNBUFFERED=1
152
+ - LOGS_DIR=/logs
153
+ - FASTEMBED_CACHE_PATH=/root/.cache/fastembed
154
+ - CURRENT_PROJECT_PATH=${PWD}
155
+ - MALLOC_ARENA_MAX=2
156
+ restart: unless-stopped
157
+ profiles: ["async"]
158
+ mem_limit: 4g
159
+ memswap_limit: 4g
119
160
 
120
161
  # MCP server for Claude integration
121
162
  mcp-server:
package/installer/cli.js CHANGED
@@ -10,6 +10,7 @@ const __dirname = dirname(__filename);
10
10
 
11
11
  const commands = {
12
12
  setup: 'Run the setup wizard to configure Claude Self-Reflect',
13
+ status: 'Get indexing status as JSON (overall + per-project breakdown)',
13
14
  doctor: 'Check your installation and diagnose issues',
14
15
  help: 'Show this help message'
15
16
  };
@@ -27,6 +28,53 @@ async function setup() {
27
28
  });
28
29
  }
29
30
 
31
+ async function status() {
32
+ // Call the Python MCP server's --status command
33
+ const mcpServerPath = join(__dirname, '..', 'mcp-server');
34
+ const venvPython = join(mcpServerPath, 'venv', 'bin', 'python');
35
+ const mcpModule = join(mcpServerPath, 'src');
36
+
37
+ try {
38
+ const child = spawn(venvPython, ['-m', 'src', '--status'], {
39
+ cwd: mcpServerPath,
40
+ stdio: ['inherit', 'pipe', 'pipe']
41
+ });
42
+
43
+ let stdout = '';
44
+ let stderr = '';
45
+
46
+ child.stdout.on('data', (data) => {
47
+ stdout += data.toString();
48
+ });
49
+
50
+ child.stderr.on('data', (data) => {
51
+ stderr += data.toString();
52
+ });
53
+
54
+ child.on('exit', (code) => {
55
+ if (code === 0) {
56
+ // Output the JSON directly for other tools to parse
57
+ process.stdout.write(stdout);
58
+ process.exit(0);
59
+ } else {
60
+ console.error('Error getting status:', stderr || 'Unknown error');
61
+ process.exit(1);
62
+ }
63
+ });
64
+
65
+ // Handle timeout
66
+ setTimeout(() => {
67
+ child.kill('SIGTERM');
68
+ console.error('Status check timed out');
69
+ process.exit(1);
70
+ }, 10000); // 10 second timeout
71
+
72
+ } catch (error) {
73
+ console.error('Failed to execute status command:', error.message);
74
+ process.exit(1);
75
+ }
76
+ }
77
+
30
78
  async function doctor() {
31
79
  console.log('🔍 Checking Claude Self-Reflect installation...\n');
32
80
 
@@ -124,8 +172,11 @@ function help() {
124
172
  console.log(' claude-self-reflect setup --voyage-key=pa-1234567890');
125
173
  console.log(' claude-self-reflect setup --local');
126
174
  console.log(' claude-self-reflect setup --debug # For troubleshooting');
175
+ console.log(' claude-self-reflect status # Get indexing status as JSON');
127
176
 
128
- console.log('\nFor more information: https://github.com/ramakay/claude-self-reflect');
177
+ console.log('\nFor more information:');
178
+ console.log(' Documentation: https://github.com/ramakay/claude-self-reflect');
179
+ console.log(' Status API: See docs/api-reference.md#cli-status-interface');
129
180
  }
130
181
 
131
182
  // Main
@@ -135,6 +186,9 @@ switch (command) {
135
186
  case 'setup':
136
187
  setup();
137
188
  break;
189
+ case 'status':
190
+ status();
191
+ break;
138
192
  case 'doctor':
139
193
  doctor();
140
194
  break;
@@ -21,5 +21,53 @@ else
21
21
  source venv/bin/activate
22
22
  fi
23
23
 
24
+ # CRITICAL FIX: Pass through environment variables from Claude Code
25
+ # These environment variables are set by `claude mcp add -e KEY=value`
26
+ # Export them so the Python process can access them
27
+ if [ ! -z "$VOYAGE_KEY" ]; then
28
+ export VOYAGE_KEY="$VOYAGE_KEY"
29
+ fi
30
+
31
+ if [ ! -z "$VOYAGE_KEY_2" ]; then
32
+ export VOYAGE_KEY_2="$VOYAGE_KEY_2"
33
+ fi
34
+
35
+ if [ ! -z "$PREFER_LOCAL_EMBEDDINGS" ]; then
36
+ export PREFER_LOCAL_EMBEDDINGS="$PREFER_LOCAL_EMBEDDINGS"
37
+ fi
38
+
39
+ if [ ! -z "$QDRANT_URL" ]; then
40
+ export QDRANT_URL="$QDRANT_URL"
41
+ fi
42
+
43
+ if [ ! -z "$ENABLE_MEMORY_DECAY" ]; then
44
+ export ENABLE_MEMORY_DECAY="$ENABLE_MEMORY_DECAY"
45
+ fi
46
+
47
+ if [ ! -z "$DECAY_WEIGHT" ]; then
48
+ export DECAY_WEIGHT="$DECAY_WEIGHT"
49
+ fi
50
+
51
+ if [ ! -z "$DECAY_SCALE_DAYS" ]; then
52
+ export DECAY_SCALE_DAYS="$DECAY_SCALE_DAYS"
53
+ fi
54
+
55
+ if [ ! -z "$EMBEDDING_MODEL" ]; then
56
+ export EMBEDDING_MODEL="$EMBEDDING_MODEL"
57
+ fi
58
+
59
+ # The embedding manager now handles cache properly in a controlled directory
60
+ # Set to 'false' if you want to use HuggingFace instead of Qdrant CDN
61
+ if [ -z "$FASTEMBED_SKIP_HUGGINGFACE" ]; then
62
+ export FASTEMBED_SKIP_HUGGINGFACE=true
63
+ fi
64
+
65
+ # Debug: Show what environment variables are being passed
66
+ echo "[DEBUG] Environment variables for MCP server:"
67
+ echo "[DEBUG] VOYAGE_KEY: ${VOYAGE_KEY:+set}"
68
+ echo "[DEBUG] PREFER_LOCAL_EMBEDDINGS: ${PREFER_LOCAL_EMBEDDINGS:-not set}"
69
+ echo "[DEBUG] QDRANT_URL: ${QDRANT_URL:-not set}"
70
+ echo "[DEBUG] ENABLE_MEMORY_DECAY: ${ENABLE_MEMORY_DECAY:-not set}"
71
+
24
72
  # Run the MCP server
25
73
  exec python -m src
@@ -10,9 +10,22 @@ def main():
10
10
  "--transport",
11
11
  choices=["stdio", "sse"],
12
12
  default="stdio",
13
+ help="Transport protocol for MCP server (default: stdio)"
14
+ )
15
+ parser.add_argument(
16
+ "--status",
17
+ action="store_true",
18
+ help="Get indexing status as JSON with overall and per-project breakdown"
13
19
  )
14
20
  args = parser.parse_args()
15
21
 
22
+ # Handle status request with early exit (avoid loading heavy MCP dependencies)
23
+ if args.status:
24
+ from .status import get_status
25
+ import json
26
+ print(json.dumps(get_status()))
27
+ return
28
+
16
29
  # Import is done here to make sure environment variables are loaded
17
30
  from .server import mcp
18
31
 
@@ -0,0 +1,237 @@
1
+ """Robust embedding model manager with proper cache handling."""
2
+
3
+ import os
4
+ import sys
5
+ import time
6
+ import logging
7
+ import shutil
8
+ from typing import Optional, List, Union
9
+ from pathlib import Path
10
+ import threading
11
+ import signal
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class EmbeddingManager:
16
+ """Manages embedding models with proper cache and lock handling."""
17
+
18
+ def __init__(self):
19
+ self.model = None
20
+ self.model_type = None # 'local' or 'voyage'
21
+ self.voyage_client = None
22
+
23
+ # Configuration
24
+ self.prefer_local = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'true').lower() == 'true'
25
+ self.voyage_key = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2')
26
+ self.embedding_model = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
27
+ self.download_timeout = int(os.getenv('FASTEMBED_DOWNLOAD_TIMEOUT', '30'))
28
+
29
+ # Set cache directory to our controlled location
30
+ self.cache_dir = Path(__file__).parent.parent / '.fastembed-cache'
31
+
32
+ def _clean_stale_locks(self):
33
+ """Clean up any stale lock files from previous runs."""
34
+ locks_dir = self.cache_dir / '.locks'
35
+ if locks_dir.exists():
36
+ logger.info(f"Cleaning stale locks in {locks_dir}")
37
+ try:
38
+ # Remove all lock files older than 5 minutes
39
+ import time
40
+ current_time = time.time()
41
+ for lock_file in locks_dir.glob('**/*.lock'):
42
+ try:
43
+ age = current_time - lock_file.stat().st_mtime
44
+ if age > 300: # 5 minutes
45
+ lock_file.unlink()
46
+ logger.debug(f"Removed stale lock: {lock_file.name}")
47
+ except Exception as e:
48
+ logger.debug(f"Could not remove lock {lock_file}: {e}")
49
+ except Exception as e:
50
+ logger.warning(f"Error cleaning locks: {e}")
51
+
52
+ def initialize(self) -> bool:
53
+ """Initialize embedding model based on user preference."""
54
+ logger.info("Initializing embedding manager...")
55
+
56
+ # Clean up any stale locks first
57
+ self._clean_stale_locks()
58
+
59
+ if self.prefer_local:
60
+ # User wants local - try local only, don't fallback to cloud
61
+ if self._try_initialize_local():
62
+ return True
63
+ logger.error("Local embeddings failed and user prefers local - not falling back to cloud")
64
+ return False
65
+ else:
66
+ # User prefers Voyage AI
67
+ if self.voyage_key and self._try_initialize_voyage():
68
+ return True
69
+ logger.warning("Voyage AI failed, trying local as fallback...")
70
+ if self._try_initialize_local():
71
+ return True
72
+ logger.error("Both Voyage AI and local embeddings failed")
73
+ return False
74
+
75
+ def _try_initialize_local(self) -> bool:
76
+ """Try to initialize local FastEmbed model with timeout and optimizations."""
77
+ try:
78
+ logger.info(f"Attempting to load local model: {self.embedding_model}")
79
+
80
+ # CRITICAL OPTIMIZATION: Set thread limits BEFORE loading model
81
+ # This prevents ONNX Runtime and BLAS from over-subscribing CPU
82
+ os.environ['OMP_NUM_THREADS'] = '1'
83
+ os.environ['MKL_NUM_THREADS'] = '1'
84
+ os.environ['OPENBLAS_NUM_THREADS'] = '1'
85
+ os.environ['NUMEXPR_NUM_THREADS'] = '1'
86
+ logger.info("Set thread limits to prevent CPU over-subscription")
87
+
88
+ # Ensure cache directory exists and is writable
89
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
90
+
91
+ # Set FASTEMBED_CACHE_PATH to our controlled directory
92
+ os.environ['FASTEMBED_CACHE_PATH'] = str(self.cache_dir)
93
+ logger.info(f"Using cache directory: {self.cache_dir}")
94
+
95
+ # Also set HF_HOME to avoid any HuggingFace cache issues
96
+ os.environ['HF_HOME'] = str(self.cache_dir / 'huggingface')
97
+
98
+ model_cache = self.cache_dir / 'models--qdrant--all-MiniLM-L6-v2-onnx'
99
+
100
+ if model_cache.exists():
101
+ logger.info("Model cache found, loading from cache...")
102
+ else:
103
+ logger.info(f"Model cache not found, will download (timeout: {self.download_timeout}s)")
104
+ logger.info("Note: First download may take 1-2 minutes")
105
+
106
+ # Force alternative download if HuggingFace is problematic
107
+ # This uses Qdrant's CDN which is more reliable
108
+ if os.getenv('FASTEMBED_SKIP_HUGGINGFACE', 'true').lower() == 'true':
109
+ os.environ['HF_HUB_OFFLINE'] = '1'
110
+ logger.info("Using alternative download sources (Qdrant CDN)")
111
+
112
+ # Use a thread with timeout for model initialization
113
+ success = False
114
+ error = None
115
+
116
+ def init_model():
117
+ nonlocal success, error
118
+ try:
119
+ from fastembed import TextEmbedding
120
+ # Initialize with optimized settings
121
+ # Note: FastEmbed uses these environment variables internally
122
+ self.model = TextEmbedding(
123
+ model_name=self.embedding_model,
124
+ threads=1 # Single thread per worker to prevent over-subscription
125
+ )
126
+ self.model_type = 'local'
127
+ success = True
128
+ logger.info(f"Successfully initialized local model: {self.embedding_model} with single-thread mode")
129
+ except Exception as e:
130
+ error = e
131
+ logger.error(f"Failed to initialize local model: {e}")
132
+
133
+ # Start initialization in a thread
134
+ thread = threading.Thread(target=init_model)
135
+ thread.daemon = True
136
+ thread.start()
137
+ thread.join(timeout=self.download_timeout)
138
+
139
+ if thread.is_alive():
140
+ logger.error(f"Model initialization timed out after {self.download_timeout}s")
141
+ logger.info("Tip: Set FASTEMBED_SKIP_HUGGINGFACE=true to use alternative download sources")
142
+ # Thread will continue in background but we move on
143
+ return False
144
+
145
+ return success
146
+
147
+ except ImportError:
148
+ logger.error("FastEmbed not installed. Install with: pip install fastembed")
149
+ return False
150
+ except Exception as e:
151
+ logger.error(f"Unexpected error initializing local embeddings: {e}")
152
+ return False
153
+
154
+ def _try_initialize_voyage(self) -> bool:
155
+ """Try to initialize Voyage AI client."""
156
+ try:
157
+ logger.info("Attempting to initialize Voyage AI...")
158
+ import voyageai
159
+ self.voyage_client = voyageai.Client(api_key=self.voyage_key)
160
+
161
+ # Test the client with a simple embedding
162
+ test_result = self.voyage_client.embed(
163
+ texts=["test"],
164
+ model="voyage-3",
165
+ input_type="document"
166
+ )
167
+
168
+ if test_result and test_result.embeddings:
169
+ self.model_type = 'voyage'
170
+ logger.info("Successfully initialized Voyage AI")
171
+ return True
172
+ else:
173
+ logger.error("Voyage AI test embedding failed")
174
+ return False
175
+
176
+ except Exception as e:
177
+ logger.error(f"Failed to initialize Voyage AI: {e}")
178
+ return False
179
+
180
+ def embed(self, texts: Union[str, List[str]], input_type: str = "document") -> Optional[List[List[float]]]:
181
+ """Generate embeddings using the active model."""
182
+ if not self.model and not self.voyage_client:
183
+ logger.error("No embedding model initialized")
184
+ return None
185
+
186
+ # Ensure texts is a list
187
+ if isinstance(texts, str):
188
+ texts = [texts]
189
+
190
+ try:
191
+ if self.model_type == 'local':
192
+ # FastEmbed returns a generator, convert to list
193
+ embeddings = list(self.model.embed(texts))
194
+ return [emb.tolist() for emb in embeddings]
195
+
196
+ elif self.model_type == 'voyage':
197
+ result = self.voyage_client.embed(
198
+ texts=texts,
199
+ model="voyage-3-lite" if input_type == "query" else "voyage-3",
200
+ input_type=input_type
201
+ )
202
+ return result.embeddings
203
+
204
+ except Exception as e:
205
+ logger.error(f"Error generating embeddings: {e}")
206
+ return None
207
+
208
+ def get_vector_dimension(self) -> int:
209
+ """Get the dimension of embeddings."""
210
+ if self.model_type == 'local':
211
+ return 384 # all-MiniLM-L6-v2 dimension
212
+ elif self.model_type == 'voyage':
213
+ return 1024 # voyage-3 dimension
214
+ return 0
215
+
216
+ def get_model_info(self) -> dict:
217
+ """Get information about the active model."""
218
+ return {
219
+ 'type': self.model_type,
220
+ 'model': self.embedding_model if self.model_type == 'local' else 'voyage-3',
221
+ 'dimension': self.get_vector_dimension(),
222
+ 'prefer_local': self.prefer_local,
223
+ 'has_voyage_key': bool(self.voyage_key)
224
+ }
225
+
226
+
227
+ # Global instance
228
+ _embedding_manager = None
229
+
230
+ def get_embedding_manager() -> EmbeddingManager:
231
+ """Get or create the global embedding manager."""
232
+ global _embedding_manager
233
+ if _embedding_manager is None:
234
+ _embedding_manager = EmbeddingManager()
235
+ if not _embedding_manager.initialize():
236
+ raise RuntimeError("Failed to initialize any embedding model")
237
+ return _embedding_manager
@@ -36,37 +36,48 @@ except ImportError:
36
36
  import voyageai
37
37
  from dotenv import load_dotenv
38
38
 
39
- # Load environment variables
39
+ # Load environment variables from .env file (fallback only)
40
40
  env_path = Path(__file__).parent.parent.parent / '.env'
41
- load_dotenv(env_path)
41
+ load_dotenv(env_path, override=False) # Don't override process environment
42
42
 
43
- # Configuration
43
+ # Configuration - prioritize process environment variables over .env file
44
44
  QDRANT_URL = os.getenv('QDRANT_URL', 'http://localhost:6333')
45
- VOYAGE_API_KEY = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2')
45
+ VOYAGE_API_KEY = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2') or os.getenv('VOYAGE_KEY_2')
46
46
  ENABLE_MEMORY_DECAY = os.getenv('ENABLE_MEMORY_DECAY', 'false').lower() == 'true'
47
47
  DECAY_WEIGHT = float(os.getenv('DECAY_WEIGHT', '0.3'))
48
48
  DECAY_SCALE_DAYS = float(os.getenv('DECAY_SCALE_DAYS', '90'))
49
49
  USE_NATIVE_DECAY = os.getenv('USE_NATIVE_DECAY', 'false').lower() == 'true'
50
50
 
51
- # Embedding configuration
52
- PREFER_LOCAL_EMBEDDINGS = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'false').lower() == 'true'
51
+ # Embedding configuration - now using lazy initialization
52
+ # CRITICAL: Default changed to 'true' for local embeddings for privacy
53
+ PREFER_LOCAL_EMBEDDINGS = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'true').lower() == 'true'
53
54
  EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
54
55
 
55
- # Initialize Voyage AI client (only if not using local embeddings)
56
- voyage_client = None
57
- if not PREFER_LOCAL_EMBEDDINGS and VOYAGE_API_KEY:
58
- voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY)
56
+ # Import the robust embedding manager
57
+ from .embedding_manager import get_embedding_manager
59
58
 
60
- # Initialize local embedding model if needed
61
- local_embedding_model = None
62
- if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
59
+ # Lazy initialization - models will be loaded on first use
60
+ embedding_manager = None
61
+ voyage_client = None # Keep for backward compatibility
62
+ local_embedding_model = None # Keep for backward compatibility
63
+
64
+ def initialize_embeddings():
65
+ """Initialize embedding models with robust fallback."""
66
+ global embedding_manager, voyage_client, local_embedding_model
63
67
  try:
64
- from fastembed import TextEmbedding
65
- local_embedding_model = TextEmbedding(model_name=EMBEDDING_MODEL)
66
- print(f"[DEBUG] Initialized local embedding model: {EMBEDDING_MODEL}")
67
- except ImportError:
68
- print("[ERROR] FastEmbed not available. Install with: pip install fastembed")
69
- raise
68
+ embedding_manager = get_embedding_manager()
69
+ print(f"[INFO] Embedding manager initialized: {embedding_manager.get_model_info()}")
70
+
71
+ # Set backward compatibility references
72
+ if embedding_manager.model_type == 'voyage':
73
+ voyage_client = embedding_manager.voyage_client
74
+ elif embedding_manager.model_type == 'local':
75
+ local_embedding_model = embedding_manager.model
76
+
77
+ return True
78
+ except Exception as e:
79
+ print(f"[ERROR] Failed to initialize embeddings: {e}")
80
+ return False
70
81
 
71
82
  # Debug environment loading
72
83
  print(f"[DEBUG] Environment variables loaded:")
@@ -88,6 +99,7 @@ class SearchResult(BaseModel):
88
99
  excerpt: str
89
100
  project_name: str
90
101
  conversation_id: Optional[str] = None
102
+ base_conversation_id: Optional[str] = None
91
103
  collection_name: str
92
104
  raw_payload: Optional[Dict[str, Any]] = None # Full Qdrant payload when debug mode enabled
93
105
 
@@ -100,6 +112,99 @@ mcp = FastMCP(
100
112
 
101
113
  # Create Qdrant client
102
114
  qdrant_client = AsyncQdrantClient(url=QDRANT_URL)
115
+
116
+ # Track indexing status (updated periodically)
117
+ indexing_status = {
118
+ "last_check": 0,
119
+ "indexed_conversations": 0,
120
+ "total_conversations": 0,
121
+ "percentage": 100.0,
122
+ "backlog_count": 0,
123
+ "is_checking": False
124
+ }
125
+
126
+ async def update_indexing_status():
127
+ """Update indexing status by checking JSONL files vs Qdrant collections.
128
+ This is a lightweight check that compares file counts, not full content."""
129
+ global indexing_status
130
+
131
+ # Don't run concurrent checks
132
+ if indexing_status["is_checking"]:
133
+ return
134
+
135
+ # Only check every 5 minutes to avoid overhead
136
+ current_time = time.time()
137
+ if current_time - indexing_status["last_check"] < 300: # 5 minutes
138
+ return
139
+
140
+ indexing_status["is_checking"] = True
141
+
142
+ try:
143
+ # Count total JSONL files
144
+ projects_dir = Path.home() / ".claude" / "projects"
145
+ total_files = 0
146
+ indexed_files = 0
147
+
148
+ if projects_dir.exists():
149
+ # Get all JSONL files
150
+ jsonl_files = list(projects_dir.glob("**/*.jsonl"))
151
+ total_files = len(jsonl_files)
152
+
153
+ # Check imported-files.json to see what's been imported
154
+ # The streaming importer uses imported-files.json with nested structure
155
+ # Try multiple possible locations for the config file
156
+ possible_paths = [
157
+ Path.home() / ".claude-self-reflect" / "config" / "imported-files.json",
158
+ Path(__file__).parent.parent.parent / "config" / "imported-files.json",
159
+ Path("/config/imported-files.json") # Docker path if running in container
160
+ ]
161
+
162
+ imported_files_path = None
163
+ for path in possible_paths:
164
+ if path.exists():
165
+ imported_files_path = path
166
+ break
167
+
168
+ if imported_files_path and imported_files_path.exists():
169
+ with open(imported_files_path, 'r') as f:
170
+ imported_data = json.load(f)
171
+ # The file has nested structure: {stream_position: {file: position}, imported_files: {file: lines}}
172
+ # Handle new nested structure
173
+ stream_position = imported_data.get("stream_position", {})
174
+ imported_files_list = stream_position.get("imported_files", [])
175
+ file_metadata = stream_position.get("file_metadata", {})
176
+
177
+ # Count files that have been imported
178
+ for file_path in jsonl_files:
179
+ # Try multiple path formats to match Docker's state file
180
+ file_str = str(file_path).replace(str(Path.home()), "/logs").replace("\\", "/")
181
+ # Also try without .claude/projects prefix (Docker mounts directly)
182
+ file_str_alt = file_str.replace("/.claude/projects", "")
183
+
184
+ # Check if file is in imported_files list (fully imported)
185
+ if file_str in imported_files_list or file_str_alt in imported_files_list:
186
+ indexed_files += 1
187
+ # Or if it has metadata with position > 0 (partially imported)
188
+ elif file_str in file_metadata and file_metadata[file_str].get("position", 0) > 0:
189
+ indexed_files += 1
190
+ elif file_str_alt in file_metadata and file_metadata[file_str_alt].get("position", 0) > 0:
191
+ indexed_files += 1
192
+
193
+ # Update status
194
+ indexing_status["last_check"] = current_time
195
+ indexing_status["total_conversations"] = total_files
196
+ indexing_status["indexed_conversations"] = indexed_files
197
+ indexing_status["backlog_count"] = total_files - indexed_files
198
+
199
+ if total_files > 0:
200
+ indexing_status["percentage"] = (indexed_files / total_files) * 100
201
+ else:
202
+ indexing_status["percentage"] = 100.0
203
+
204
+ except Exception as e:
205
+ print(f"[WARNING] Failed to update indexing status: {e}")
206
+ finally:
207
+ indexing_status["is_checking"] = False
103
208
 
104
209
  async def get_all_collections() -> List[str]:
105
210
  """Get all collections (both Voyage and local)."""
@@ -115,12 +220,23 @@ async def generate_embedding(text: str, force_type: Optional[str] = None) -> Lis
115
220
  text: Text to embed
116
221
  force_type: Force specific embedding type ('local' or 'voyage')
117
222
  """
118
- use_local = force_type == 'local' if force_type else (PREFER_LOCAL_EMBEDDINGS or not voyage_client)
223
+ global embedding_manager, voyage_client, local_embedding_model
224
+
225
+ # Initialize on first use
226
+ if embedding_manager is None:
227
+ if not initialize_embeddings():
228
+ raise RuntimeError("Failed to initialize any embedding model. Check logs for details.")
229
+
230
+ # Determine which type to use
231
+ if force_type:
232
+ use_local = force_type == 'local'
233
+ else:
234
+ use_local = embedding_manager.model_type == 'local'
119
235
 
120
236
  if use_local:
121
237
  # Use local embeddings
122
238
  if not local_embedding_model:
123
- raise ValueError("Local embedding model not initialized")
239
+ raise ValueError("Local embedding model not available")
124
240
 
125
241
  # Run in executor since fastembed is synchronous
126
242
  loop = asyncio.get_event_loop()
@@ -131,7 +247,7 @@ async def generate_embedding(text: str, force_type: Optional[str] = None) -> Lis
131
247
  else:
132
248
  # Use Voyage AI
133
249
  if not voyage_client:
134
- raise ValueError("Voyage client not initialized")
250
+ raise ValueError("Voyage client not available")
135
251
  result = voyage_client.embed(
136
252
  texts=[text],
137
253
  model="voyage-3-large",
@@ -417,6 +533,7 @@ async def reflect_on_past(
417
533
  excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
418
534
  project_name=point_project,
419
535
  conversation_id=point.payload.get('conversation_id'),
536
+ base_conversation_id=point.payload.get('base_conversation_id'),
420
537
  collection_name=collection_name,
421
538
  raw_payload=point.payload if include_raw else None
422
539
  ))
@@ -496,6 +613,7 @@ async def reflect_on_past(
496
613
  excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
497
614
  project_name=point_project,
498
615
  conversation_id=point.payload.get('conversation_id'),
616
+ base_conversation_id=point.payload.get('base_conversation_id'),
499
617
  collection_name=collection_name,
500
618
  raw_payload=point.payload if include_raw else None
501
619
  ))
@@ -532,6 +650,7 @@ async def reflect_on_past(
532
650
  excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
533
651
  project_name=point_project,
534
652
  conversation_id=point.payload.get('conversation_id'),
653
+ base_conversation_id=point.payload.get('base_conversation_id'),
535
654
  collection_name=collection_name,
536
655
  raw_payload=point.payload if include_raw else None
537
656
  ))
@@ -552,6 +671,30 @@ async def reflect_on_past(
552
671
  message="Search complete, processing results"
553
672
  )
554
673
 
674
+ # Apply base_conversation_id boosting before sorting
675
+ timing_info['boost_start'] = time.time()
676
+
677
+ # Group results by base_conversation_id to identify related chunks
678
+ base_conversation_groups = {}
679
+ for result in all_results:
680
+ base_id = result.base_conversation_id
681
+ if base_id:
682
+ if base_id not in base_conversation_groups:
683
+ base_conversation_groups[base_id] = []
684
+ base_conversation_groups[base_id].append(result)
685
+
686
+ # Apply boost to results from base conversations with multiple high-scoring chunks
687
+ base_conversation_boost = 0.1 # Boost factor for base conversation matching
688
+ for base_id, group_results in base_conversation_groups.items():
689
+ if len(group_results) > 1: # Multiple chunks from same base conversation
690
+ avg_score = sum(r.score for r in group_results) / len(group_results)
691
+ if avg_score > 0.8: # Only boost high-quality base conversations
692
+ for result in group_results:
693
+ result.score += base_conversation_boost
694
+ await ctx.debug(f"Boosted result from base_conversation_id {base_id}: {result.score:.3f}")
695
+
696
+ timing_info['boost_end'] = time.time()
697
+
555
698
  # Sort by score and limit
556
699
  timing_info['sort_start'] = time.time()
557
700
  all_results.sort(key=lambda x: x.score, reverse=True)
@@ -561,12 +704,89 @@ async def reflect_on_past(
561
704
  if not all_results:
562
705
  return f"No conversations found matching '{query}'. Try different keywords or check if conversations have been imported."
563
706
 
707
+ # Update indexing status before returning results
708
+ await update_indexing_status()
709
+
564
710
  # Format results based on response_format
565
711
  timing_info['format_start'] = time.time()
566
712
 
567
713
  if response_format == "xml":
714
+ # Add upfront summary for immediate visibility (before collapsible XML)
715
+ upfront_summary = ""
716
+
717
+ # Show indexing status prominently
718
+ if indexing_status["percentage"] < 95.0:
719
+ upfront_summary += f"📊 INDEXING: {indexing_status['indexed_conversations']}/{indexing_status['total_conversations']} conversations ({indexing_status['percentage']:.1f}% complete, {indexing_status['backlog_count']} pending)\n"
720
+
721
+ # Show result summary
722
+ if all_results:
723
+ score_info = "high" if all_results[0].score >= 0.85 else "good" if all_results[0].score >= 0.75 else "partial"
724
+ upfront_summary += f"🎯 RESULTS: {len(all_results)} matches ({score_info} relevance, top score: {all_results[0].score:.3f})\n"
725
+
726
+ # Show performance
727
+ total_time = time.time() - start_time
728
+ upfront_summary += f"⚡ PERFORMANCE: {int(total_time * 1000)}ms total ({len(collections_to_search)} collections searched)\n"
729
+ else:
730
+ upfront_summary += f"❌ NO RESULTS: No conversations found matching '{query}'\n"
731
+
568
732
  # XML format (compact tags for performance)
569
- result_text = "<search>\n"
733
+ result_text = upfront_summary + "\n<search>\n"
734
+
735
+ # Add indexing status if not fully baselined - put key stats in opening tag for immediate visibility
736
+ if indexing_status["percentage"] < 95.0:
737
+ result_text += f' <info status="indexing" progress="{indexing_status["percentage"]:.1f}%" backlog="{indexing_status["backlog_count"]}">\n'
738
+ result_text += f' <message>📊 Indexing: {indexing_status["indexed_conversations"]}/{indexing_status["total_conversations"]} conversations ({indexing_status["percentage"]:.1f}% complete, {indexing_status["backlog_count"]} pending)</message>\n'
739
+ result_text += f" </info>\n"
740
+
741
+ # Add high-level result summary
742
+ if all_results:
743
+ # Count today's results
744
+ now = datetime.now(timezone.utc)
745
+ today_count = 0
746
+ yesterday_count = 0
747
+ week_count = 0
748
+
749
+ for result in all_results:
750
+ timestamp_clean = result.timestamp.replace('Z', '+00:00') if result.timestamp.endswith('Z') else result.timestamp
751
+ timestamp_dt = datetime.fromisoformat(timestamp_clean)
752
+ if timestamp_dt.tzinfo is None:
753
+ timestamp_dt = timestamp_dt.replace(tzinfo=timezone.utc)
754
+
755
+ days_ago = (now - timestamp_dt).days
756
+ if days_ago == 0:
757
+ today_count += 1
758
+ elif days_ago == 1:
759
+ yesterday_count += 1
760
+ if days_ago <= 7:
761
+ week_count += 1
762
+
763
+ # Compact summary with key info in opening tag
764
+ time_info = ""
765
+ if today_count > 0:
766
+ time_info = f"{today_count} today"
767
+ elif yesterday_count > 0:
768
+ time_info = f"{yesterday_count} yesterday"
769
+ elif week_count > 0:
770
+ time_info = f"{week_count} this week"
771
+ else:
772
+ time_info = "older results"
773
+
774
+ score_info = "high" if all_results[0].score >= 0.85 else "good" if all_results[0].score >= 0.75 else "partial"
775
+
776
+ result_text += f' <summary count="{len(all_results)}" relevance="{score_info}" recency="{time_info}" top-score="{all_results[0].score:.3f}">\n'
777
+
778
+ # Short preview of top result
779
+ top_excerpt = all_results[0].excerpt[:100].strip()
780
+ if '...' not in top_excerpt:
781
+ top_excerpt += "..."
782
+ result_text += f' <preview>{top_excerpt}</preview>\n'
783
+ result_text += f" </summary>\n"
784
+ else:
785
+ result_text += f" <result-summary>\n"
786
+ result_text += f" <headline>No matches found</headline>\n"
787
+ result_text += f" <relevance>No conversations matched your query</relevance>\n"
788
+ result_text += f" </result-summary>\n"
789
+
570
790
  result_text += f" <meta>\n"
571
791
  result_text += f" <q>{query}</q>\n"
572
792
  result_text += f" <scope>{target_project if target_project != 'all' else 'all'}</scope>\n"
@@ -0,0 +1,135 @@
1
+ """Ultra-fast status checker for Claude Self Reflect indexing progress.
2
+
3
+ This module provides lightweight indexing status without loading heavy MCP dependencies.
4
+ Designed for <20ms execution time to support status bars and shell scripts.
5
+ """
6
+
7
+ import json
8
+ from pathlib import Path
9
+ from collections import defaultdict
10
+
11
+
12
+ def extract_project_name_from_path(file_path: str) -> str:
13
+ """Extract project name from JSONL file path.
14
+
15
+ Handles paths like:
16
+ - ~/.claude/projects/-Users-ramakrishnanannaswamy-projects-claude-self-reflect/file.jsonl
17
+ - /logs/-Users-ramakrishnanannaswamy-projects-n8n-builder/file.jsonl
18
+ """
19
+ # Get the directory name containing the JSONL file
20
+ path_obj = Path(file_path)
21
+ dir_name = path_obj.parent.name
22
+
23
+ # Extract project name from dash-encoded path
24
+ # Format: -Users-username-projects-PROJECT_NAME (PROJECT_NAME can have dashes)
25
+ if dir_name.startswith('-') and 'projects' in dir_name:
26
+ parts = dir_name.split('-')
27
+ # Find 'projects' and take everything after it as the project name
28
+ try:
29
+ projects_idx = parts.index('projects')
30
+ if projects_idx + 1 < len(parts):
31
+ # Join all parts after 'projects' to handle multi-part project names
32
+ # like "claude-self-reflect", "n8n-builder", etc.
33
+ project_parts = parts[projects_idx + 1:]
34
+ return '-'.join(project_parts)
35
+ except ValueError:
36
+ pass
37
+
38
+ # Fallback: use the directory name as-is
39
+ return dir_name.lstrip('-')
40
+
41
+
42
+ def get_status() -> dict:
43
+ """Get indexing status with overall stats and per-project breakdown.
44
+
45
+ Returns:
46
+ dict: JSON structure with overall and per-project indexing status
47
+ """
48
+ projects_dir = Path.home() / ".claude" / "projects"
49
+ project_stats = defaultdict(lambda: {"indexed": 0, "total": 0})
50
+
51
+ # Count total JSONL files per project
52
+ if projects_dir.exists():
53
+ for jsonl_file in projects_dir.glob("**/*.jsonl"):
54
+ project_name = extract_project_name_from_path(str(jsonl_file))
55
+ project_stats[project_name]["total"] += 1
56
+
57
+ # Read imported-files.json to count indexed files per project
58
+ config_paths = [
59
+ Path.home() / ".claude-self-reflect" / "config" / "imported-files.json",
60
+ Path(__file__).parent.parent.parent / "config" / "imported-files.json",
61
+ Path("/config/imported-files.json") # Docker path
62
+ ]
63
+
64
+ imported_files_path = None
65
+ for path in config_paths:
66
+ if path.exists():
67
+ imported_files_path = path
68
+ break
69
+
70
+ if imported_files_path:
71
+ try:
72
+ with open(imported_files_path, 'r') as f:
73
+ data = json.load(f)
74
+
75
+ # Handle both old and new config file formats
76
+ if "stream_position" in data:
77
+ # New format with stream_position
78
+ stream_pos = data.get("stream_position", {})
79
+ imported_files = stream_pos.get("imported_files", [])
80
+ file_metadata = stream_pos.get("file_metadata", {})
81
+
82
+ # Count fully imported files
83
+ for file_path in imported_files:
84
+ project_name = extract_project_name_from_path(file_path)
85
+ project_stats[project_name]["indexed"] += 1
86
+
87
+ # Count partially imported files (files with position > 0)
88
+ for file_path, metadata in file_metadata.items():
89
+ if isinstance(metadata, dict) and metadata.get("position", 0) > 0:
90
+ # Only count if not already in imported_files
91
+ if file_path not in imported_files:
92
+ project_name = extract_project_name_from_path(file_path)
93
+ project_stats[project_name]["indexed"] += 1
94
+ else:
95
+ # Legacy format with imported_files as top-level object
96
+ imported_files = data.get("imported_files", {})
97
+
98
+ # Count all files in imported_files object (they are all fully imported)
99
+ for file_path in imported_files.keys():
100
+ project_name = extract_project_name_from_path(file_path)
101
+ project_stats[project_name]["indexed"] += 1
102
+ except (json.JSONDecodeError, KeyError, OSError):
103
+ # If config file is corrupted or unreadable, continue with zero indexed counts
104
+ pass
105
+
106
+ # Calculate overall stats
107
+ total_all = sum(p["total"] for p in project_stats.values())
108
+ indexed_all = sum(p["indexed"] for p in project_stats.values())
109
+
110
+ # Build response structure
111
+ result = {
112
+ "overall": {
113
+ "percentage": round((indexed_all / total_all * 100) if total_all > 0 else 100.0, 1),
114
+ "indexed": indexed_all,
115
+ "total": total_all,
116
+ "backlog": total_all - indexed_all
117
+ },
118
+ "projects": {}
119
+ }
120
+
121
+ # Add per-project stats with percentages
122
+ for project, stats in project_stats.items():
123
+ result["projects"][project] = {
124
+ "percentage": round((stats["indexed"] / stats["total"] * 100) if stats["total"] > 0 else 100.0, 1),
125
+ "indexed": stats["indexed"],
126
+ "total": stats["total"]
127
+ }
128
+
129
+ return result
130
+
131
+
132
+ if __name__ == "__main__":
133
+ # Allow running as standalone script for testing
134
+ import sys
135
+ print(json.dumps(get_status(), indent=2))
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-self-reflect",
3
- "version": "2.5.10",
3
+ "version": "2.5.12",
4
4
  "description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
5
5
  "keywords": [
6
6
  "claude",