claude-self-reflect 2.5.10 → 2.5.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile.async-importer +26 -0
- package/README.md +7 -3
- package/docker-compose.yaml +48 -7
- package/installer/cli.js +55 -1
- package/mcp-server/run-mcp.sh +48 -0
- package/mcp-server/src/__main__.py +13 -0
- package/mcp-server/src/embedding_manager.py +237 -0
- package/mcp-server/src/server.py +243 -23
- package/mcp-server/src/status.py +135 -0
- package/package.json +1 -1
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
FROM python:3.11-slim
|
|
2
|
+
|
|
3
|
+
# Install system dependencies
|
|
4
|
+
RUN apt-get update && apt-get install -y \
|
|
5
|
+
gcc \
|
|
6
|
+
g++ \
|
|
7
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
8
|
+
|
|
9
|
+
# Set working directory
|
|
10
|
+
WORKDIR /app
|
|
11
|
+
|
|
12
|
+
# Copy requirements
|
|
13
|
+
COPY scripts/requirements.txt /app/
|
|
14
|
+
|
|
15
|
+
# Install Python dependencies
|
|
16
|
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
17
|
+
|
|
18
|
+
# Copy the streaming importer script
|
|
19
|
+
COPY scripts/streaming-importer.py /scripts/
|
|
20
|
+
|
|
21
|
+
# Set environment variables for memory management
|
|
22
|
+
ENV MALLOC_ARENA_MAX=2
|
|
23
|
+
ENV PYTHONUNBUFFERED=1
|
|
24
|
+
|
|
25
|
+
# Run the streaming importer
|
|
26
|
+
CMD ["python", "/scripts/streaming-importer.py"]
|
package/README.md
CHANGED
|
@@ -6,6 +6,8 @@ Claude forgets everything. This fixes that.
|
|
|
6
6
|
|
|
7
7
|
Ask Claude about past conversations. Get actual answers. **100% local by default** - your conversations never leave your machine. Cloud-enhanced search available when you need it.
|
|
8
8
|
|
|
9
|
+
**✅ Proven at Scale**: Successfully indexed 682 conversation files with 100% reliability. No data loss, no corruption, just seamless conversation memory that works.
|
|
10
|
+
|
|
9
11
|
**Before**: "I don't have access to previous conversations"
|
|
10
12
|
**After**:
|
|
11
13
|
```
|
|
@@ -129,9 +131,10 @@ Claude: [Searches across ALL your projects]
|
|
|
129
131
|
Recent conversations matter more. Old ones fade. Like your brain, but reliable.
|
|
130
132
|
|
|
131
133
|
### 🚀 Performance
|
|
132
|
-
- **Search**: 200-350ms response time
|
|
133
|
-
- **Import**: 2-second response for new conversations
|
|
134
|
+
- **Search**: 200-350ms response time across 682 indexed conversations
|
|
135
|
+
- **Import**: 2-second response for new conversations
|
|
134
136
|
- **Memory**: 50MB operational target with smart chunking
|
|
137
|
+
- **Scale**: 100% indexing success rate across all conversation types
|
|
135
138
|
|
|
136
139
|
## The Technical Stack
|
|
137
140
|
|
|
@@ -150,13 +153,14 @@ Recent conversations matter more. Old ones fade. Like your brain, but reliable.
|
|
|
150
153
|
|
|
151
154
|
## What's New
|
|
152
155
|
|
|
156
|
+
- **v2.5.11** - Critical cloud mode fix - Environment variables now properly passed to MCP server
|
|
157
|
+
- **v2.5.10** - Emergency hotfix for MCP server startup failure (dead code removal)
|
|
153
158
|
- **v2.5.6** - Tool Output Extraction - Captures git changes & tool outputs for cross-agent discovery
|
|
154
159
|
- **v2.5.5** - Critical dependency fix & streaming importer enhancements
|
|
155
160
|
- **v2.5.4** - Documentation & bug fixes (import path & state file compatibility)
|
|
156
161
|
- **v2.5.3** - Streamlined README & import architecture diagram
|
|
157
162
|
- **v2.5.2** - State file compatibility fix
|
|
158
163
|
- **v2.4.5** - 10-40x performance boost
|
|
159
|
-
- **v2.4.3** - Project-scoped search
|
|
160
164
|
|
|
161
165
|
[Full changelog](docs/release-history.md)
|
|
162
166
|
|
package/docker-compose.yaml
CHANGED
|
@@ -8,7 +8,7 @@ services:
|
|
|
8
8
|
command: chown -R 1000:1000 /config
|
|
9
9
|
volumes:
|
|
10
10
|
- ${CONFIG_PATH:-~/.claude-self-reflect/config}:/config
|
|
11
|
-
profiles: ["watch", "mcp", "import"]
|
|
11
|
+
profiles: ["watch", "mcp", "import", "async"]
|
|
12
12
|
|
|
13
13
|
# Qdrant vector database - the heart of semantic search
|
|
14
14
|
qdrant:
|
|
@@ -104,18 +104,59 @@ services:
|
|
|
104
104
|
- VOYAGE_API_KEY=${VOYAGE_API_KEY:-}
|
|
105
105
|
- VOYAGE_KEY=${VOYAGE_KEY:-}
|
|
106
106
|
- PREFER_LOCAL_EMBEDDINGS=${PREFER_LOCAL_EMBEDDINGS:-true}
|
|
107
|
-
- WATCH_INTERVAL=${WATCH_INTERVAL:-
|
|
108
|
-
- MAX_MEMORY_MB=${MAX_MEMORY_MB:-
|
|
109
|
-
- OPERATIONAL_MEMORY_MB=${OPERATIONAL_MEMORY_MB:-
|
|
110
|
-
- CHUNK_SIZE=${CHUNK_SIZE:-5}
|
|
107
|
+
- WATCH_INTERVAL=${WATCH_INTERVAL:-1} # Aggressive: 5x faster detection (minimum 1 second)
|
|
108
|
+
- MAX_MEMORY_MB=${MAX_MEMORY_MB:-2000} # Ultra conservative to prevent memory leak
|
|
109
|
+
- OPERATIONAL_MEMORY_MB=${OPERATIONAL_MEMORY_MB:-1500} # 1.5GB operational (25% of 8GB)
|
|
110
|
+
- CHUNK_SIZE=${CHUNK_SIZE:-5} # Minimal batch size
|
|
111
|
+
- HOT_WINDOW_MINUTES=${HOT_WINDOW_MINUTES:-15} # Keep files HOT longer
|
|
112
|
+
- MAX_COLD_FILES_PER_CYCLE=${MAX_COLD_FILES_PER_CYCLE:-5} # Single file processing
|
|
113
|
+
- PARALLEL_WORKERS=${PARALLEL_WORKERS:-8} # Enable parallel embedding workers
|
|
111
114
|
- PYTHONUNBUFFERED=1
|
|
112
115
|
- LOGS_DIR=/logs
|
|
113
116
|
- FASTEMBED_CACHE_PATH=/root/.cache/fastembed
|
|
114
117
|
- CURRENT_PROJECT_PATH=${PWD} # Pass current project path for prioritization
|
|
118
|
+
- MALLOC_ARENA_MAX=2 # MEMORY LEAK FIX: Limit glibc malloc arenas
|
|
119
|
+
- THREAD_POOL_WORKERS=${THREAD_POOL_WORKERS:-2} # AsyncEmbedder thread pool size (speed vs stability)
|
|
120
|
+
- THREAD_POOL_RECYCLE_FILES=${THREAD_POOL_RECYCLE_FILES:-50} # Files before recycling thread pool
|
|
115
121
|
restart: unless-stopped
|
|
116
122
|
profiles: ["watch"]
|
|
117
|
-
mem_limit:
|
|
118
|
-
memswap_limit:
|
|
123
|
+
mem_limit: 8g
|
|
124
|
+
memswap_limit: 8g
|
|
125
|
+
|
|
126
|
+
# Async streaming importer - Ground-up async rewrite
|
|
127
|
+
async-importer:
|
|
128
|
+
build:
|
|
129
|
+
context: .
|
|
130
|
+
dockerfile: Dockerfile.async-importer
|
|
131
|
+
container_name: claude-reflection-async
|
|
132
|
+
depends_on:
|
|
133
|
+
- qdrant
|
|
134
|
+
volumes:
|
|
135
|
+
- ${CLAUDE_LOGS_PATH:-~/.claude/projects}:/logs:ro
|
|
136
|
+
- ${CONFIG_PATH:-~/.claude-self-reflect/config}:/config
|
|
137
|
+
- ./scripts:/scripts:ro
|
|
138
|
+
environment:
|
|
139
|
+
- QDRANT_URL=http://qdrant:6333
|
|
140
|
+
- STATE_FILE=/config/imported-files.json
|
|
141
|
+
- VOYAGE_API_KEY=${VOYAGE_API_KEY:-}
|
|
142
|
+
- VOYAGE_KEY=${VOYAGE_KEY:-}
|
|
143
|
+
- PREFER_LOCAL_EMBEDDINGS=${PREFER_LOCAL_EMBEDDINGS:-true}
|
|
144
|
+
- WATCH_INTERVAL=${WATCH_INTERVAL:-5}
|
|
145
|
+
- MAX_MEMORY_MB=${MAX_MEMORY_MB:-2000}
|
|
146
|
+
- OPERATIONAL_MEMORY_MB=${OPERATIONAL_MEMORY_MB:-1500}
|
|
147
|
+
- CHUNK_SIZE=${CHUNK_SIZE:-5}
|
|
148
|
+
- HOT_WINDOW_MINUTES=${HOT_WINDOW_MINUTES:-15}
|
|
149
|
+
- MAX_COLD_FILES_PER_CYCLE=${MAX_COLD_FILES_PER_CYCLE:-5}
|
|
150
|
+
- THREAD_POOL_WORKERS=${THREAD_POOL_WORKERS:-2}
|
|
151
|
+
- PYTHONUNBUFFERED=1
|
|
152
|
+
- LOGS_DIR=/logs
|
|
153
|
+
- FASTEMBED_CACHE_PATH=/root/.cache/fastembed
|
|
154
|
+
- CURRENT_PROJECT_PATH=${PWD}
|
|
155
|
+
- MALLOC_ARENA_MAX=2
|
|
156
|
+
restart: unless-stopped
|
|
157
|
+
profiles: ["async"]
|
|
158
|
+
mem_limit: 4g
|
|
159
|
+
memswap_limit: 4g
|
|
119
160
|
|
|
120
161
|
# MCP server for Claude integration
|
|
121
162
|
mcp-server:
|
package/installer/cli.js
CHANGED
|
@@ -10,6 +10,7 @@ const __dirname = dirname(__filename);
|
|
|
10
10
|
|
|
11
11
|
const commands = {
|
|
12
12
|
setup: 'Run the setup wizard to configure Claude Self-Reflect',
|
|
13
|
+
status: 'Get indexing status as JSON (overall + per-project breakdown)',
|
|
13
14
|
doctor: 'Check your installation and diagnose issues',
|
|
14
15
|
help: 'Show this help message'
|
|
15
16
|
};
|
|
@@ -27,6 +28,53 @@ async function setup() {
|
|
|
27
28
|
});
|
|
28
29
|
}
|
|
29
30
|
|
|
31
|
+
async function status() {
|
|
32
|
+
// Call the Python MCP server's --status command
|
|
33
|
+
const mcpServerPath = join(__dirname, '..', 'mcp-server');
|
|
34
|
+
const venvPython = join(mcpServerPath, 'venv', 'bin', 'python');
|
|
35
|
+
const mcpModule = join(mcpServerPath, 'src');
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
const child = spawn(venvPython, ['-m', 'src', '--status'], {
|
|
39
|
+
cwd: mcpServerPath,
|
|
40
|
+
stdio: ['inherit', 'pipe', 'pipe']
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
let stdout = '';
|
|
44
|
+
let stderr = '';
|
|
45
|
+
|
|
46
|
+
child.stdout.on('data', (data) => {
|
|
47
|
+
stdout += data.toString();
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
child.stderr.on('data', (data) => {
|
|
51
|
+
stderr += data.toString();
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
child.on('exit', (code) => {
|
|
55
|
+
if (code === 0) {
|
|
56
|
+
// Output the JSON directly for other tools to parse
|
|
57
|
+
process.stdout.write(stdout);
|
|
58
|
+
process.exit(0);
|
|
59
|
+
} else {
|
|
60
|
+
console.error('Error getting status:', stderr || 'Unknown error');
|
|
61
|
+
process.exit(1);
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
// Handle timeout
|
|
66
|
+
setTimeout(() => {
|
|
67
|
+
child.kill('SIGTERM');
|
|
68
|
+
console.error('Status check timed out');
|
|
69
|
+
process.exit(1);
|
|
70
|
+
}, 10000); // 10 second timeout
|
|
71
|
+
|
|
72
|
+
} catch (error) {
|
|
73
|
+
console.error('Failed to execute status command:', error.message);
|
|
74
|
+
process.exit(1);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
30
78
|
async function doctor() {
|
|
31
79
|
console.log('🔍 Checking Claude Self-Reflect installation...\n');
|
|
32
80
|
|
|
@@ -124,8 +172,11 @@ function help() {
|
|
|
124
172
|
console.log(' claude-self-reflect setup --voyage-key=pa-1234567890');
|
|
125
173
|
console.log(' claude-self-reflect setup --local');
|
|
126
174
|
console.log(' claude-self-reflect setup --debug # For troubleshooting');
|
|
175
|
+
console.log(' claude-self-reflect status # Get indexing status as JSON');
|
|
127
176
|
|
|
128
|
-
console.log('\nFor more information:
|
|
177
|
+
console.log('\nFor more information:');
|
|
178
|
+
console.log(' Documentation: https://github.com/ramakay/claude-self-reflect');
|
|
179
|
+
console.log(' Status API: See docs/api-reference.md#cli-status-interface');
|
|
129
180
|
}
|
|
130
181
|
|
|
131
182
|
// Main
|
|
@@ -135,6 +186,9 @@ switch (command) {
|
|
|
135
186
|
case 'setup':
|
|
136
187
|
setup();
|
|
137
188
|
break;
|
|
189
|
+
case 'status':
|
|
190
|
+
status();
|
|
191
|
+
break;
|
|
138
192
|
case 'doctor':
|
|
139
193
|
doctor();
|
|
140
194
|
break;
|
package/mcp-server/run-mcp.sh
CHANGED
|
@@ -21,5 +21,53 @@ else
|
|
|
21
21
|
source venv/bin/activate
|
|
22
22
|
fi
|
|
23
23
|
|
|
24
|
+
# CRITICAL FIX: Pass through environment variables from Claude Code
|
|
25
|
+
# These environment variables are set by `claude mcp add -e KEY=value`
|
|
26
|
+
# Export them so the Python process can access them
|
|
27
|
+
if [ ! -z "$VOYAGE_KEY" ]; then
|
|
28
|
+
export VOYAGE_KEY="$VOYAGE_KEY"
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
if [ ! -z "$VOYAGE_KEY_2" ]; then
|
|
32
|
+
export VOYAGE_KEY_2="$VOYAGE_KEY_2"
|
|
33
|
+
fi
|
|
34
|
+
|
|
35
|
+
if [ ! -z "$PREFER_LOCAL_EMBEDDINGS" ]; then
|
|
36
|
+
export PREFER_LOCAL_EMBEDDINGS="$PREFER_LOCAL_EMBEDDINGS"
|
|
37
|
+
fi
|
|
38
|
+
|
|
39
|
+
if [ ! -z "$QDRANT_URL" ]; then
|
|
40
|
+
export QDRANT_URL="$QDRANT_URL"
|
|
41
|
+
fi
|
|
42
|
+
|
|
43
|
+
if [ ! -z "$ENABLE_MEMORY_DECAY" ]; then
|
|
44
|
+
export ENABLE_MEMORY_DECAY="$ENABLE_MEMORY_DECAY"
|
|
45
|
+
fi
|
|
46
|
+
|
|
47
|
+
if [ ! -z "$DECAY_WEIGHT" ]; then
|
|
48
|
+
export DECAY_WEIGHT="$DECAY_WEIGHT"
|
|
49
|
+
fi
|
|
50
|
+
|
|
51
|
+
if [ ! -z "$DECAY_SCALE_DAYS" ]; then
|
|
52
|
+
export DECAY_SCALE_DAYS="$DECAY_SCALE_DAYS"
|
|
53
|
+
fi
|
|
54
|
+
|
|
55
|
+
if [ ! -z "$EMBEDDING_MODEL" ]; then
|
|
56
|
+
export EMBEDDING_MODEL="$EMBEDDING_MODEL"
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
# The embedding manager now handles cache properly in a controlled directory
|
|
60
|
+
# Set to 'false' if you want to use HuggingFace instead of Qdrant CDN
|
|
61
|
+
if [ -z "$FASTEMBED_SKIP_HUGGINGFACE" ]; then
|
|
62
|
+
export FASTEMBED_SKIP_HUGGINGFACE=true
|
|
63
|
+
fi
|
|
64
|
+
|
|
65
|
+
# Debug: Show what environment variables are being passed
|
|
66
|
+
echo "[DEBUG] Environment variables for MCP server:"
|
|
67
|
+
echo "[DEBUG] VOYAGE_KEY: ${VOYAGE_KEY:+set}"
|
|
68
|
+
echo "[DEBUG] PREFER_LOCAL_EMBEDDINGS: ${PREFER_LOCAL_EMBEDDINGS:-not set}"
|
|
69
|
+
echo "[DEBUG] QDRANT_URL: ${QDRANT_URL:-not set}"
|
|
70
|
+
echo "[DEBUG] ENABLE_MEMORY_DECAY: ${ENABLE_MEMORY_DECAY:-not set}"
|
|
71
|
+
|
|
24
72
|
# Run the MCP server
|
|
25
73
|
exec python -m src
|
|
@@ -10,9 +10,22 @@ def main():
|
|
|
10
10
|
"--transport",
|
|
11
11
|
choices=["stdio", "sse"],
|
|
12
12
|
default="stdio",
|
|
13
|
+
help="Transport protocol for MCP server (default: stdio)"
|
|
14
|
+
)
|
|
15
|
+
parser.add_argument(
|
|
16
|
+
"--status",
|
|
17
|
+
action="store_true",
|
|
18
|
+
help="Get indexing status as JSON with overall and per-project breakdown"
|
|
13
19
|
)
|
|
14
20
|
args = parser.parse_args()
|
|
15
21
|
|
|
22
|
+
# Handle status request with early exit (avoid loading heavy MCP dependencies)
|
|
23
|
+
if args.status:
|
|
24
|
+
from .status import get_status
|
|
25
|
+
import json
|
|
26
|
+
print(json.dumps(get_status()))
|
|
27
|
+
return
|
|
28
|
+
|
|
16
29
|
# Import is done here to make sure environment variables are loaded
|
|
17
30
|
from .server import mcp
|
|
18
31
|
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Robust embedding model manager with proper cache handling."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
import logging
|
|
7
|
+
import shutil
|
|
8
|
+
from typing import Optional, List, Union
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import threading
|
|
11
|
+
import signal
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
class EmbeddingManager:
|
|
16
|
+
"""Manages embedding models with proper cache and lock handling."""
|
|
17
|
+
|
|
18
|
+
def __init__(self):
|
|
19
|
+
self.model = None
|
|
20
|
+
self.model_type = None # 'local' or 'voyage'
|
|
21
|
+
self.voyage_client = None
|
|
22
|
+
|
|
23
|
+
# Configuration
|
|
24
|
+
self.prefer_local = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'true').lower() == 'true'
|
|
25
|
+
self.voyage_key = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2')
|
|
26
|
+
self.embedding_model = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
|
|
27
|
+
self.download_timeout = int(os.getenv('FASTEMBED_DOWNLOAD_TIMEOUT', '30'))
|
|
28
|
+
|
|
29
|
+
# Set cache directory to our controlled location
|
|
30
|
+
self.cache_dir = Path(__file__).parent.parent / '.fastembed-cache'
|
|
31
|
+
|
|
32
|
+
def _clean_stale_locks(self):
|
|
33
|
+
"""Clean up any stale lock files from previous runs."""
|
|
34
|
+
locks_dir = self.cache_dir / '.locks'
|
|
35
|
+
if locks_dir.exists():
|
|
36
|
+
logger.info(f"Cleaning stale locks in {locks_dir}")
|
|
37
|
+
try:
|
|
38
|
+
# Remove all lock files older than 5 minutes
|
|
39
|
+
import time
|
|
40
|
+
current_time = time.time()
|
|
41
|
+
for lock_file in locks_dir.glob('**/*.lock'):
|
|
42
|
+
try:
|
|
43
|
+
age = current_time - lock_file.stat().st_mtime
|
|
44
|
+
if age > 300: # 5 minutes
|
|
45
|
+
lock_file.unlink()
|
|
46
|
+
logger.debug(f"Removed stale lock: {lock_file.name}")
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.debug(f"Could not remove lock {lock_file}: {e}")
|
|
49
|
+
except Exception as e:
|
|
50
|
+
logger.warning(f"Error cleaning locks: {e}")
|
|
51
|
+
|
|
52
|
+
def initialize(self) -> bool:
|
|
53
|
+
"""Initialize embedding model based on user preference."""
|
|
54
|
+
logger.info("Initializing embedding manager...")
|
|
55
|
+
|
|
56
|
+
# Clean up any stale locks first
|
|
57
|
+
self._clean_stale_locks()
|
|
58
|
+
|
|
59
|
+
if self.prefer_local:
|
|
60
|
+
# User wants local - try local only, don't fallback to cloud
|
|
61
|
+
if self._try_initialize_local():
|
|
62
|
+
return True
|
|
63
|
+
logger.error("Local embeddings failed and user prefers local - not falling back to cloud")
|
|
64
|
+
return False
|
|
65
|
+
else:
|
|
66
|
+
# User prefers Voyage AI
|
|
67
|
+
if self.voyage_key and self._try_initialize_voyage():
|
|
68
|
+
return True
|
|
69
|
+
logger.warning("Voyage AI failed, trying local as fallback...")
|
|
70
|
+
if self._try_initialize_local():
|
|
71
|
+
return True
|
|
72
|
+
logger.error("Both Voyage AI and local embeddings failed")
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
def _try_initialize_local(self) -> bool:
|
|
76
|
+
"""Try to initialize local FastEmbed model with timeout and optimizations."""
|
|
77
|
+
try:
|
|
78
|
+
logger.info(f"Attempting to load local model: {self.embedding_model}")
|
|
79
|
+
|
|
80
|
+
# CRITICAL OPTIMIZATION: Set thread limits BEFORE loading model
|
|
81
|
+
# This prevents ONNX Runtime and BLAS from over-subscribing CPU
|
|
82
|
+
os.environ['OMP_NUM_THREADS'] = '1'
|
|
83
|
+
os.environ['MKL_NUM_THREADS'] = '1'
|
|
84
|
+
os.environ['OPENBLAS_NUM_THREADS'] = '1'
|
|
85
|
+
os.environ['NUMEXPR_NUM_THREADS'] = '1'
|
|
86
|
+
logger.info("Set thread limits to prevent CPU over-subscription")
|
|
87
|
+
|
|
88
|
+
# Ensure cache directory exists and is writable
|
|
89
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
90
|
+
|
|
91
|
+
# Set FASTEMBED_CACHE_PATH to our controlled directory
|
|
92
|
+
os.environ['FASTEMBED_CACHE_PATH'] = str(self.cache_dir)
|
|
93
|
+
logger.info(f"Using cache directory: {self.cache_dir}")
|
|
94
|
+
|
|
95
|
+
# Also set HF_HOME to avoid any HuggingFace cache issues
|
|
96
|
+
os.environ['HF_HOME'] = str(self.cache_dir / 'huggingface')
|
|
97
|
+
|
|
98
|
+
model_cache = self.cache_dir / 'models--qdrant--all-MiniLM-L6-v2-onnx'
|
|
99
|
+
|
|
100
|
+
if model_cache.exists():
|
|
101
|
+
logger.info("Model cache found, loading from cache...")
|
|
102
|
+
else:
|
|
103
|
+
logger.info(f"Model cache not found, will download (timeout: {self.download_timeout}s)")
|
|
104
|
+
logger.info("Note: First download may take 1-2 minutes")
|
|
105
|
+
|
|
106
|
+
# Force alternative download if HuggingFace is problematic
|
|
107
|
+
# This uses Qdrant's CDN which is more reliable
|
|
108
|
+
if os.getenv('FASTEMBED_SKIP_HUGGINGFACE', 'true').lower() == 'true':
|
|
109
|
+
os.environ['HF_HUB_OFFLINE'] = '1'
|
|
110
|
+
logger.info("Using alternative download sources (Qdrant CDN)")
|
|
111
|
+
|
|
112
|
+
# Use a thread with timeout for model initialization
|
|
113
|
+
success = False
|
|
114
|
+
error = None
|
|
115
|
+
|
|
116
|
+
def init_model():
|
|
117
|
+
nonlocal success, error
|
|
118
|
+
try:
|
|
119
|
+
from fastembed import TextEmbedding
|
|
120
|
+
# Initialize with optimized settings
|
|
121
|
+
# Note: FastEmbed uses these environment variables internally
|
|
122
|
+
self.model = TextEmbedding(
|
|
123
|
+
model_name=self.embedding_model,
|
|
124
|
+
threads=1 # Single thread per worker to prevent over-subscription
|
|
125
|
+
)
|
|
126
|
+
self.model_type = 'local'
|
|
127
|
+
success = True
|
|
128
|
+
logger.info(f"Successfully initialized local model: {self.embedding_model} with single-thread mode")
|
|
129
|
+
except Exception as e:
|
|
130
|
+
error = e
|
|
131
|
+
logger.error(f"Failed to initialize local model: {e}")
|
|
132
|
+
|
|
133
|
+
# Start initialization in a thread
|
|
134
|
+
thread = threading.Thread(target=init_model)
|
|
135
|
+
thread.daemon = True
|
|
136
|
+
thread.start()
|
|
137
|
+
thread.join(timeout=self.download_timeout)
|
|
138
|
+
|
|
139
|
+
if thread.is_alive():
|
|
140
|
+
logger.error(f"Model initialization timed out after {self.download_timeout}s")
|
|
141
|
+
logger.info("Tip: Set FASTEMBED_SKIP_HUGGINGFACE=true to use alternative download sources")
|
|
142
|
+
# Thread will continue in background but we move on
|
|
143
|
+
return False
|
|
144
|
+
|
|
145
|
+
return success
|
|
146
|
+
|
|
147
|
+
except ImportError:
|
|
148
|
+
logger.error("FastEmbed not installed. Install with: pip install fastembed")
|
|
149
|
+
return False
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logger.error(f"Unexpected error initializing local embeddings: {e}")
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
def _try_initialize_voyage(self) -> bool:
|
|
155
|
+
"""Try to initialize Voyage AI client."""
|
|
156
|
+
try:
|
|
157
|
+
logger.info("Attempting to initialize Voyage AI...")
|
|
158
|
+
import voyageai
|
|
159
|
+
self.voyage_client = voyageai.Client(api_key=self.voyage_key)
|
|
160
|
+
|
|
161
|
+
# Test the client with a simple embedding
|
|
162
|
+
test_result = self.voyage_client.embed(
|
|
163
|
+
texts=["test"],
|
|
164
|
+
model="voyage-3",
|
|
165
|
+
input_type="document"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if test_result and test_result.embeddings:
|
|
169
|
+
self.model_type = 'voyage'
|
|
170
|
+
logger.info("Successfully initialized Voyage AI")
|
|
171
|
+
return True
|
|
172
|
+
else:
|
|
173
|
+
logger.error("Voyage AI test embedding failed")
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
except Exception as e:
|
|
177
|
+
logger.error(f"Failed to initialize Voyage AI: {e}")
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
def embed(self, texts: Union[str, List[str]], input_type: str = "document") -> Optional[List[List[float]]]:
|
|
181
|
+
"""Generate embeddings using the active model."""
|
|
182
|
+
if not self.model and not self.voyage_client:
|
|
183
|
+
logger.error("No embedding model initialized")
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
# Ensure texts is a list
|
|
187
|
+
if isinstance(texts, str):
|
|
188
|
+
texts = [texts]
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
if self.model_type == 'local':
|
|
192
|
+
# FastEmbed returns a generator, convert to list
|
|
193
|
+
embeddings = list(self.model.embed(texts))
|
|
194
|
+
return [emb.tolist() for emb in embeddings]
|
|
195
|
+
|
|
196
|
+
elif self.model_type == 'voyage':
|
|
197
|
+
result = self.voyage_client.embed(
|
|
198
|
+
texts=texts,
|
|
199
|
+
model="voyage-3-lite" if input_type == "query" else "voyage-3",
|
|
200
|
+
input_type=input_type
|
|
201
|
+
)
|
|
202
|
+
return result.embeddings
|
|
203
|
+
|
|
204
|
+
except Exception as e:
|
|
205
|
+
logger.error(f"Error generating embeddings: {e}")
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
def get_vector_dimension(self) -> int:
|
|
209
|
+
"""Get the dimension of embeddings."""
|
|
210
|
+
if self.model_type == 'local':
|
|
211
|
+
return 384 # all-MiniLM-L6-v2 dimension
|
|
212
|
+
elif self.model_type == 'voyage':
|
|
213
|
+
return 1024 # voyage-3 dimension
|
|
214
|
+
return 0
|
|
215
|
+
|
|
216
|
+
def get_model_info(self) -> dict:
|
|
217
|
+
"""Get information about the active model."""
|
|
218
|
+
return {
|
|
219
|
+
'type': self.model_type,
|
|
220
|
+
'model': self.embedding_model if self.model_type == 'local' else 'voyage-3',
|
|
221
|
+
'dimension': self.get_vector_dimension(),
|
|
222
|
+
'prefer_local': self.prefer_local,
|
|
223
|
+
'has_voyage_key': bool(self.voyage_key)
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# Global instance
|
|
228
|
+
_embedding_manager = None
|
|
229
|
+
|
|
230
|
+
def get_embedding_manager() -> EmbeddingManager:
|
|
231
|
+
"""Get or create the global embedding manager."""
|
|
232
|
+
global _embedding_manager
|
|
233
|
+
if _embedding_manager is None:
|
|
234
|
+
_embedding_manager = EmbeddingManager()
|
|
235
|
+
if not _embedding_manager.initialize():
|
|
236
|
+
raise RuntimeError("Failed to initialize any embedding model")
|
|
237
|
+
return _embedding_manager
|
package/mcp-server/src/server.py
CHANGED
|
@@ -36,37 +36,48 @@ except ImportError:
|
|
|
36
36
|
import voyageai
|
|
37
37
|
from dotenv import load_dotenv
|
|
38
38
|
|
|
39
|
-
# Load environment variables
|
|
39
|
+
# Load environment variables from .env file (fallback only)
|
|
40
40
|
env_path = Path(__file__).parent.parent.parent / '.env'
|
|
41
|
-
load_dotenv(env_path)
|
|
41
|
+
load_dotenv(env_path, override=False) # Don't override process environment
|
|
42
42
|
|
|
43
|
-
# Configuration
|
|
43
|
+
# Configuration - prioritize process environment variables over .env file
|
|
44
44
|
QDRANT_URL = os.getenv('QDRANT_URL', 'http://localhost:6333')
|
|
45
|
-
VOYAGE_API_KEY = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2')
|
|
45
|
+
VOYAGE_API_KEY = os.getenv('VOYAGE_KEY') or os.getenv('VOYAGE_KEY-2') or os.getenv('VOYAGE_KEY_2')
|
|
46
46
|
ENABLE_MEMORY_DECAY = os.getenv('ENABLE_MEMORY_DECAY', 'false').lower() == 'true'
|
|
47
47
|
DECAY_WEIGHT = float(os.getenv('DECAY_WEIGHT', '0.3'))
|
|
48
48
|
DECAY_SCALE_DAYS = float(os.getenv('DECAY_SCALE_DAYS', '90'))
|
|
49
49
|
USE_NATIVE_DECAY = os.getenv('USE_NATIVE_DECAY', 'false').lower() == 'true'
|
|
50
50
|
|
|
51
|
-
# Embedding configuration
|
|
52
|
-
|
|
51
|
+
# Embedding configuration - now using lazy initialization
|
|
52
|
+
# CRITICAL: Default changed to 'true' for local embeddings for privacy
|
|
53
|
+
PREFER_LOCAL_EMBEDDINGS = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'true').lower() == 'true'
|
|
53
54
|
EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
|
|
54
55
|
|
|
55
|
-
#
|
|
56
|
-
|
|
57
|
-
if not PREFER_LOCAL_EMBEDDINGS and VOYAGE_API_KEY:
|
|
58
|
-
voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY)
|
|
56
|
+
# Import the robust embedding manager
|
|
57
|
+
from .embedding_manager import get_embedding_manager
|
|
59
58
|
|
|
60
|
-
#
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
# Lazy initialization - models will be loaded on first use
|
|
60
|
+
embedding_manager = None
|
|
61
|
+
voyage_client = None # Keep for backward compatibility
|
|
62
|
+
local_embedding_model = None # Keep for backward compatibility
|
|
63
|
+
|
|
64
|
+
def initialize_embeddings():
|
|
65
|
+
"""Initialize embedding models with robust fallback."""
|
|
66
|
+
global embedding_manager, voyage_client, local_embedding_model
|
|
63
67
|
try:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
embedding_manager = get_embedding_manager()
|
|
69
|
+
print(f"[INFO] Embedding manager initialized: {embedding_manager.get_model_info()}")
|
|
70
|
+
|
|
71
|
+
# Set backward compatibility references
|
|
72
|
+
if embedding_manager.model_type == 'voyage':
|
|
73
|
+
voyage_client = embedding_manager.voyage_client
|
|
74
|
+
elif embedding_manager.model_type == 'local':
|
|
75
|
+
local_embedding_model = embedding_manager.model
|
|
76
|
+
|
|
77
|
+
return True
|
|
78
|
+
except Exception as e:
|
|
79
|
+
print(f"[ERROR] Failed to initialize embeddings: {e}")
|
|
80
|
+
return False
|
|
70
81
|
|
|
71
82
|
# Debug environment loading
|
|
72
83
|
print(f"[DEBUG] Environment variables loaded:")
|
|
@@ -88,6 +99,7 @@ class SearchResult(BaseModel):
|
|
|
88
99
|
excerpt: str
|
|
89
100
|
project_name: str
|
|
90
101
|
conversation_id: Optional[str] = None
|
|
102
|
+
base_conversation_id: Optional[str] = None
|
|
91
103
|
collection_name: str
|
|
92
104
|
raw_payload: Optional[Dict[str, Any]] = None # Full Qdrant payload when debug mode enabled
|
|
93
105
|
|
|
@@ -100,6 +112,99 @@ mcp = FastMCP(
|
|
|
100
112
|
|
|
101
113
|
# Create Qdrant client
|
|
102
114
|
qdrant_client = AsyncQdrantClient(url=QDRANT_URL)
|
|
115
|
+
|
|
116
|
+
# Track indexing status (updated periodically)
|
|
117
|
+
indexing_status = {
|
|
118
|
+
"last_check": 0,
|
|
119
|
+
"indexed_conversations": 0,
|
|
120
|
+
"total_conversations": 0,
|
|
121
|
+
"percentage": 100.0,
|
|
122
|
+
"backlog_count": 0,
|
|
123
|
+
"is_checking": False
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async def update_indexing_status():
|
|
127
|
+
"""Update indexing status by checking JSONL files vs Qdrant collections.
|
|
128
|
+
This is a lightweight check that compares file counts, not full content."""
|
|
129
|
+
global indexing_status
|
|
130
|
+
|
|
131
|
+
# Don't run concurrent checks
|
|
132
|
+
if indexing_status["is_checking"]:
|
|
133
|
+
return
|
|
134
|
+
|
|
135
|
+
# Only check every 5 minutes to avoid overhead
|
|
136
|
+
current_time = time.time()
|
|
137
|
+
if current_time - indexing_status["last_check"] < 300: # 5 minutes
|
|
138
|
+
return
|
|
139
|
+
|
|
140
|
+
indexing_status["is_checking"] = True
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
# Count total JSONL files
|
|
144
|
+
projects_dir = Path.home() / ".claude" / "projects"
|
|
145
|
+
total_files = 0
|
|
146
|
+
indexed_files = 0
|
|
147
|
+
|
|
148
|
+
if projects_dir.exists():
|
|
149
|
+
# Get all JSONL files
|
|
150
|
+
jsonl_files = list(projects_dir.glob("**/*.jsonl"))
|
|
151
|
+
total_files = len(jsonl_files)
|
|
152
|
+
|
|
153
|
+
# Check imported-files.json to see what's been imported
|
|
154
|
+
# The streaming importer uses imported-files.json with nested structure
|
|
155
|
+
# Try multiple possible locations for the config file
|
|
156
|
+
possible_paths = [
|
|
157
|
+
Path.home() / ".claude-self-reflect" / "config" / "imported-files.json",
|
|
158
|
+
Path(__file__).parent.parent.parent / "config" / "imported-files.json",
|
|
159
|
+
Path("/config/imported-files.json") # Docker path if running in container
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
imported_files_path = None
|
|
163
|
+
for path in possible_paths:
|
|
164
|
+
if path.exists():
|
|
165
|
+
imported_files_path = path
|
|
166
|
+
break
|
|
167
|
+
|
|
168
|
+
if imported_files_path and imported_files_path.exists():
|
|
169
|
+
with open(imported_files_path, 'r') as f:
|
|
170
|
+
imported_data = json.load(f)
|
|
171
|
+
# The file has nested structure: {stream_position: {file: position}, imported_files: {file: lines}}
|
|
172
|
+
# Handle new nested structure
|
|
173
|
+
stream_position = imported_data.get("stream_position", {})
|
|
174
|
+
imported_files_list = stream_position.get("imported_files", [])
|
|
175
|
+
file_metadata = stream_position.get("file_metadata", {})
|
|
176
|
+
|
|
177
|
+
# Count files that have been imported
|
|
178
|
+
for file_path in jsonl_files:
|
|
179
|
+
# Try multiple path formats to match Docker's state file
|
|
180
|
+
file_str = str(file_path).replace(str(Path.home()), "/logs").replace("\\", "/")
|
|
181
|
+
# Also try without .claude/projects prefix (Docker mounts directly)
|
|
182
|
+
file_str_alt = file_str.replace("/.claude/projects", "")
|
|
183
|
+
|
|
184
|
+
# Check if file is in imported_files list (fully imported)
|
|
185
|
+
if file_str in imported_files_list or file_str_alt in imported_files_list:
|
|
186
|
+
indexed_files += 1
|
|
187
|
+
# Or if it has metadata with position > 0 (partially imported)
|
|
188
|
+
elif file_str in file_metadata and file_metadata[file_str].get("position", 0) > 0:
|
|
189
|
+
indexed_files += 1
|
|
190
|
+
elif file_str_alt in file_metadata and file_metadata[file_str_alt].get("position", 0) > 0:
|
|
191
|
+
indexed_files += 1
|
|
192
|
+
|
|
193
|
+
# Update status
|
|
194
|
+
indexing_status["last_check"] = current_time
|
|
195
|
+
indexing_status["total_conversations"] = total_files
|
|
196
|
+
indexing_status["indexed_conversations"] = indexed_files
|
|
197
|
+
indexing_status["backlog_count"] = total_files - indexed_files
|
|
198
|
+
|
|
199
|
+
if total_files > 0:
|
|
200
|
+
indexing_status["percentage"] = (indexed_files / total_files) * 100
|
|
201
|
+
else:
|
|
202
|
+
indexing_status["percentage"] = 100.0
|
|
203
|
+
|
|
204
|
+
except Exception as e:
|
|
205
|
+
print(f"[WARNING] Failed to update indexing status: {e}")
|
|
206
|
+
finally:
|
|
207
|
+
indexing_status["is_checking"] = False
|
|
103
208
|
|
|
104
209
|
async def get_all_collections() -> List[str]:
|
|
105
210
|
"""Get all collections (both Voyage and local)."""
|
|
@@ -115,12 +220,23 @@ async def generate_embedding(text: str, force_type: Optional[str] = None) -> Lis
|
|
|
115
220
|
text: Text to embed
|
|
116
221
|
force_type: Force specific embedding type ('local' or 'voyage')
|
|
117
222
|
"""
|
|
118
|
-
|
|
223
|
+
global embedding_manager, voyage_client, local_embedding_model
|
|
224
|
+
|
|
225
|
+
# Initialize on first use
|
|
226
|
+
if embedding_manager is None:
|
|
227
|
+
if not initialize_embeddings():
|
|
228
|
+
raise RuntimeError("Failed to initialize any embedding model. Check logs for details.")
|
|
229
|
+
|
|
230
|
+
# Determine which type to use
|
|
231
|
+
if force_type:
|
|
232
|
+
use_local = force_type == 'local'
|
|
233
|
+
else:
|
|
234
|
+
use_local = embedding_manager.model_type == 'local'
|
|
119
235
|
|
|
120
236
|
if use_local:
|
|
121
237
|
# Use local embeddings
|
|
122
238
|
if not local_embedding_model:
|
|
123
|
-
raise ValueError("Local embedding model not
|
|
239
|
+
raise ValueError("Local embedding model not available")
|
|
124
240
|
|
|
125
241
|
# Run in executor since fastembed is synchronous
|
|
126
242
|
loop = asyncio.get_event_loop()
|
|
@@ -131,7 +247,7 @@ async def generate_embedding(text: str, force_type: Optional[str] = None) -> Lis
|
|
|
131
247
|
else:
|
|
132
248
|
# Use Voyage AI
|
|
133
249
|
if not voyage_client:
|
|
134
|
-
raise ValueError("Voyage client not
|
|
250
|
+
raise ValueError("Voyage client not available")
|
|
135
251
|
result = voyage_client.embed(
|
|
136
252
|
texts=[text],
|
|
137
253
|
model="voyage-3-large",
|
|
@@ -417,6 +533,7 @@ async def reflect_on_past(
|
|
|
417
533
|
excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
|
|
418
534
|
project_name=point_project,
|
|
419
535
|
conversation_id=point.payload.get('conversation_id'),
|
|
536
|
+
base_conversation_id=point.payload.get('base_conversation_id'),
|
|
420
537
|
collection_name=collection_name,
|
|
421
538
|
raw_payload=point.payload if include_raw else None
|
|
422
539
|
))
|
|
@@ -496,6 +613,7 @@ async def reflect_on_past(
|
|
|
496
613
|
excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
|
|
497
614
|
project_name=point_project,
|
|
498
615
|
conversation_id=point.payload.get('conversation_id'),
|
|
616
|
+
base_conversation_id=point.payload.get('base_conversation_id'),
|
|
499
617
|
collection_name=collection_name,
|
|
500
618
|
raw_payload=point.payload if include_raw else None
|
|
501
619
|
))
|
|
@@ -532,6 +650,7 @@ async def reflect_on_past(
|
|
|
532
650
|
excerpt=(point.payload.get('text', '')[:350] + '...' if len(point.payload.get('text', '')) > 350 else point.payload.get('text', '')),
|
|
533
651
|
project_name=point_project,
|
|
534
652
|
conversation_id=point.payload.get('conversation_id'),
|
|
653
|
+
base_conversation_id=point.payload.get('base_conversation_id'),
|
|
535
654
|
collection_name=collection_name,
|
|
536
655
|
raw_payload=point.payload if include_raw else None
|
|
537
656
|
))
|
|
@@ -552,6 +671,30 @@ async def reflect_on_past(
|
|
|
552
671
|
message="Search complete, processing results"
|
|
553
672
|
)
|
|
554
673
|
|
|
674
|
+
# Apply base_conversation_id boosting before sorting
|
|
675
|
+
timing_info['boost_start'] = time.time()
|
|
676
|
+
|
|
677
|
+
# Group results by base_conversation_id to identify related chunks
|
|
678
|
+
base_conversation_groups = {}
|
|
679
|
+
for result in all_results:
|
|
680
|
+
base_id = result.base_conversation_id
|
|
681
|
+
if base_id:
|
|
682
|
+
if base_id not in base_conversation_groups:
|
|
683
|
+
base_conversation_groups[base_id] = []
|
|
684
|
+
base_conversation_groups[base_id].append(result)
|
|
685
|
+
|
|
686
|
+
# Apply boost to results from base conversations with multiple high-scoring chunks
|
|
687
|
+
base_conversation_boost = 0.1 # Boost factor for base conversation matching
|
|
688
|
+
for base_id, group_results in base_conversation_groups.items():
|
|
689
|
+
if len(group_results) > 1: # Multiple chunks from same base conversation
|
|
690
|
+
avg_score = sum(r.score for r in group_results) / len(group_results)
|
|
691
|
+
if avg_score > 0.8: # Only boost high-quality base conversations
|
|
692
|
+
for result in group_results:
|
|
693
|
+
result.score += base_conversation_boost
|
|
694
|
+
await ctx.debug(f"Boosted result from base_conversation_id {base_id}: {result.score:.3f}")
|
|
695
|
+
|
|
696
|
+
timing_info['boost_end'] = time.time()
|
|
697
|
+
|
|
555
698
|
# Sort by score and limit
|
|
556
699
|
timing_info['sort_start'] = time.time()
|
|
557
700
|
all_results.sort(key=lambda x: x.score, reverse=True)
|
|
@@ -561,12 +704,89 @@ async def reflect_on_past(
|
|
|
561
704
|
if not all_results:
|
|
562
705
|
return f"No conversations found matching '{query}'. Try different keywords or check if conversations have been imported."
|
|
563
706
|
|
|
707
|
+
# Update indexing status before returning results
|
|
708
|
+
await update_indexing_status()
|
|
709
|
+
|
|
564
710
|
# Format results based on response_format
|
|
565
711
|
timing_info['format_start'] = time.time()
|
|
566
712
|
|
|
567
713
|
if response_format == "xml":
|
|
714
|
+
# Add upfront summary for immediate visibility (before collapsible XML)
|
|
715
|
+
upfront_summary = ""
|
|
716
|
+
|
|
717
|
+
# Show indexing status prominently
|
|
718
|
+
if indexing_status["percentage"] < 95.0:
|
|
719
|
+
upfront_summary += f"📊 INDEXING: {indexing_status['indexed_conversations']}/{indexing_status['total_conversations']} conversations ({indexing_status['percentage']:.1f}% complete, {indexing_status['backlog_count']} pending)\n"
|
|
720
|
+
|
|
721
|
+
# Show result summary
|
|
722
|
+
if all_results:
|
|
723
|
+
score_info = "high" if all_results[0].score >= 0.85 else "good" if all_results[0].score >= 0.75 else "partial"
|
|
724
|
+
upfront_summary += f"🎯 RESULTS: {len(all_results)} matches ({score_info} relevance, top score: {all_results[0].score:.3f})\n"
|
|
725
|
+
|
|
726
|
+
# Show performance
|
|
727
|
+
total_time = time.time() - start_time
|
|
728
|
+
upfront_summary += f"⚡ PERFORMANCE: {int(total_time * 1000)}ms total ({len(collections_to_search)} collections searched)\n"
|
|
729
|
+
else:
|
|
730
|
+
upfront_summary += f"❌ NO RESULTS: No conversations found matching '{query}'\n"
|
|
731
|
+
|
|
568
732
|
# XML format (compact tags for performance)
|
|
569
|
-
result_text = "<search>\n"
|
|
733
|
+
result_text = upfront_summary + "\n<search>\n"
|
|
734
|
+
|
|
735
|
+
# Add indexing status if not fully baselined - put key stats in opening tag for immediate visibility
|
|
736
|
+
if indexing_status["percentage"] < 95.0:
|
|
737
|
+
result_text += f' <info status="indexing" progress="{indexing_status["percentage"]:.1f}%" backlog="{indexing_status["backlog_count"]}">\n'
|
|
738
|
+
result_text += f' <message>📊 Indexing: {indexing_status["indexed_conversations"]}/{indexing_status["total_conversations"]} conversations ({indexing_status["percentage"]:.1f}% complete, {indexing_status["backlog_count"]} pending)</message>\n'
|
|
739
|
+
result_text += f" </info>\n"
|
|
740
|
+
|
|
741
|
+
# Add high-level result summary
|
|
742
|
+
if all_results:
|
|
743
|
+
# Count today's results
|
|
744
|
+
now = datetime.now(timezone.utc)
|
|
745
|
+
today_count = 0
|
|
746
|
+
yesterday_count = 0
|
|
747
|
+
week_count = 0
|
|
748
|
+
|
|
749
|
+
for result in all_results:
|
|
750
|
+
timestamp_clean = result.timestamp.replace('Z', '+00:00') if result.timestamp.endswith('Z') else result.timestamp
|
|
751
|
+
timestamp_dt = datetime.fromisoformat(timestamp_clean)
|
|
752
|
+
if timestamp_dt.tzinfo is None:
|
|
753
|
+
timestamp_dt = timestamp_dt.replace(tzinfo=timezone.utc)
|
|
754
|
+
|
|
755
|
+
days_ago = (now - timestamp_dt).days
|
|
756
|
+
if days_ago == 0:
|
|
757
|
+
today_count += 1
|
|
758
|
+
elif days_ago == 1:
|
|
759
|
+
yesterday_count += 1
|
|
760
|
+
if days_ago <= 7:
|
|
761
|
+
week_count += 1
|
|
762
|
+
|
|
763
|
+
# Compact summary with key info in opening tag
|
|
764
|
+
time_info = ""
|
|
765
|
+
if today_count > 0:
|
|
766
|
+
time_info = f"{today_count} today"
|
|
767
|
+
elif yesterday_count > 0:
|
|
768
|
+
time_info = f"{yesterday_count} yesterday"
|
|
769
|
+
elif week_count > 0:
|
|
770
|
+
time_info = f"{week_count} this week"
|
|
771
|
+
else:
|
|
772
|
+
time_info = "older results"
|
|
773
|
+
|
|
774
|
+
score_info = "high" if all_results[0].score >= 0.85 else "good" if all_results[0].score >= 0.75 else "partial"
|
|
775
|
+
|
|
776
|
+
result_text += f' <summary count="{len(all_results)}" relevance="{score_info}" recency="{time_info}" top-score="{all_results[0].score:.3f}">\n'
|
|
777
|
+
|
|
778
|
+
# Short preview of top result
|
|
779
|
+
top_excerpt = all_results[0].excerpt[:100].strip()
|
|
780
|
+
if '...' not in top_excerpt:
|
|
781
|
+
top_excerpt += "..."
|
|
782
|
+
result_text += f' <preview>{top_excerpt}</preview>\n'
|
|
783
|
+
result_text += f" </summary>\n"
|
|
784
|
+
else:
|
|
785
|
+
result_text += f" <result-summary>\n"
|
|
786
|
+
result_text += f" <headline>No matches found</headline>\n"
|
|
787
|
+
result_text += f" <relevance>No conversations matched your query</relevance>\n"
|
|
788
|
+
result_text += f" </result-summary>\n"
|
|
789
|
+
|
|
570
790
|
result_text += f" <meta>\n"
|
|
571
791
|
result_text += f" <q>{query}</q>\n"
|
|
572
792
|
result_text += f" <scope>{target_project if target_project != 'all' else 'all'}</scope>\n"
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Ultra-fast status checker for Claude Self Reflect indexing progress.
|
|
2
|
+
|
|
3
|
+
This module provides lightweight indexing status without loading heavy MCP dependencies.
|
|
4
|
+
Designed for <20ms execution time to support status bars and shell scripts.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def extract_project_name_from_path(file_path: str) -> str:
|
|
13
|
+
"""Extract project name from JSONL file path.
|
|
14
|
+
|
|
15
|
+
Handles paths like:
|
|
16
|
+
- ~/.claude/projects/-Users-ramakrishnanannaswamy-projects-claude-self-reflect/file.jsonl
|
|
17
|
+
- /logs/-Users-ramakrishnanannaswamy-projects-n8n-builder/file.jsonl
|
|
18
|
+
"""
|
|
19
|
+
# Get the directory name containing the JSONL file
|
|
20
|
+
path_obj = Path(file_path)
|
|
21
|
+
dir_name = path_obj.parent.name
|
|
22
|
+
|
|
23
|
+
# Extract project name from dash-encoded path
|
|
24
|
+
# Format: -Users-username-projects-PROJECT_NAME (PROJECT_NAME can have dashes)
|
|
25
|
+
if dir_name.startswith('-') and 'projects' in dir_name:
|
|
26
|
+
parts = dir_name.split('-')
|
|
27
|
+
# Find 'projects' and take everything after it as the project name
|
|
28
|
+
try:
|
|
29
|
+
projects_idx = parts.index('projects')
|
|
30
|
+
if projects_idx + 1 < len(parts):
|
|
31
|
+
# Join all parts after 'projects' to handle multi-part project names
|
|
32
|
+
# like "claude-self-reflect", "n8n-builder", etc.
|
|
33
|
+
project_parts = parts[projects_idx + 1:]
|
|
34
|
+
return '-'.join(project_parts)
|
|
35
|
+
except ValueError:
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
# Fallback: use the directory name as-is
|
|
39
|
+
return dir_name.lstrip('-')
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_status() -> dict:
|
|
43
|
+
"""Get indexing status with overall stats and per-project breakdown.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
dict: JSON structure with overall and per-project indexing status
|
|
47
|
+
"""
|
|
48
|
+
projects_dir = Path.home() / ".claude" / "projects"
|
|
49
|
+
project_stats = defaultdict(lambda: {"indexed": 0, "total": 0})
|
|
50
|
+
|
|
51
|
+
# Count total JSONL files per project
|
|
52
|
+
if projects_dir.exists():
|
|
53
|
+
for jsonl_file in projects_dir.glob("**/*.jsonl"):
|
|
54
|
+
project_name = extract_project_name_from_path(str(jsonl_file))
|
|
55
|
+
project_stats[project_name]["total"] += 1
|
|
56
|
+
|
|
57
|
+
# Read imported-files.json to count indexed files per project
|
|
58
|
+
config_paths = [
|
|
59
|
+
Path.home() / ".claude-self-reflect" / "config" / "imported-files.json",
|
|
60
|
+
Path(__file__).parent.parent.parent / "config" / "imported-files.json",
|
|
61
|
+
Path("/config/imported-files.json") # Docker path
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
imported_files_path = None
|
|
65
|
+
for path in config_paths:
|
|
66
|
+
if path.exists():
|
|
67
|
+
imported_files_path = path
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
if imported_files_path:
|
|
71
|
+
try:
|
|
72
|
+
with open(imported_files_path, 'r') as f:
|
|
73
|
+
data = json.load(f)
|
|
74
|
+
|
|
75
|
+
# Handle both old and new config file formats
|
|
76
|
+
if "stream_position" in data:
|
|
77
|
+
# New format with stream_position
|
|
78
|
+
stream_pos = data.get("stream_position", {})
|
|
79
|
+
imported_files = stream_pos.get("imported_files", [])
|
|
80
|
+
file_metadata = stream_pos.get("file_metadata", {})
|
|
81
|
+
|
|
82
|
+
# Count fully imported files
|
|
83
|
+
for file_path in imported_files:
|
|
84
|
+
project_name = extract_project_name_from_path(file_path)
|
|
85
|
+
project_stats[project_name]["indexed"] += 1
|
|
86
|
+
|
|
87
|
+
# Count partially imported files (files with position > 0)
|
|
88
|
+
for file_path, metadata in file_metadata.items():
|
|
89
|
+
if isinstance(metadata, dict) and metadata.get("position", 0) > 0:
|
|
90
|
+
# Only count if not already in imported_files
|
|
91
|
+
if file_path not in imported_files:
|
|
92
|
+
project_name = extract_project_name_from_path(file_path)
|
|
93
|
+
project_stats[project_name]["indexed"] += 1
|
|
94
|
+
else:
|
|
95
|
+
# Legacy format with imported_files as top-level object
|
|
96
|
+
imported_files = data.get("imported_files", {})
|
|
97
|
+
|
|
98
|
+
# Count all files in imported_files object (they are all fully imported)
|
|
99
|
+
for file_path in imported_files.keys():
|
|
100
|
+
project_name = extract_project_name_from_path(file_path)
|
|
101
|
+
project_stats[project_name]["indexed"] += 1
|
|
102
|
+
except (json.JSONDecodeError, KeyError, OSError):
|
|
103
|
+
# If config file is corrupted or unreadable, continue with zero indexed counts
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
# Calculate overall stats
|
|
107
|
+
total_all = sum(p["total"] for p in project_stats.values())
|
|
108
|
+
indexed_all = sum(p["indexed"] for p in project_stats.values())
|
|
109
|
+
|
|
110
|
+
# Build response structure
|
|
111
|
+
result = {
|
|
112
|
+
"overall": {
|
|
113
|
+
"percentage": round((indexed_all / total_all * 100) if total_all > 0 else 100.0, 1),
|
|
114
|
+
"indexed": indexed_all,
|
|
115
|
+
"total": total_all,
|
|
116
|
+
"backlog": total_all - indexed_all
|
|
117
|
+
},
|
|
118
|
+
"projects": {}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
# Add per-project stats with percentages
|
|
122
|
+
for project, stats in project_stats.items():
|
|
123
|
+
result["projects"][project] = {
|
|
124
|
+
"percentage": round((stats["indexed"] / stats["total"] * 100) if stats["total"] > 0 else 100.0, 1),
|
|
125
|
+
"indexed": stats["indexed"],
|
|
126
|
+
"total": stats["total"]
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
if __name__ == "__main__":
|
|
133
|
+
# Allow running as standalone script for testing
|
|
134
|
+
import sys
|
|
135
|
+
print(json.dumps(get_status(), indent=2))
|