claude-self-reflect 3.3.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/claude-self-reflect-test.md +107 -8
- package/.claude/agents/quality-fixer.md +314 -0
- package/.claude/agents/reflection-specialist.md +40 -1
- package/mcp-server/run-mcp.sh +20 -6
- package/mcp-server/src/code_reload_tool.py +271 -0
- package/mcp-server/src/embedding_manager.py +60 -26
- package/mcp-server/src/enhanced_tool_registry.py +407 -0
- package/mcp-server/src/mode_switch_tool.py +181 -0
- package/mcp-server/src/parallel_search.py +8 -3
- package/mcp-server/src/project_resolver.py +20 -2
- package/mcp-server/src/reflection_tools.py +50 -8
- package/mcp-server/src/rich_formatting.py +103 -0
- package/mcp-server/src/search_tools.py +90 -37
- package/mcp-server/src/security_patches.py +555 -0
- package/mcp-server/src/server.py +318 -240
- package/mcp-server/src/status.py +13 -8
- package/mcp-server/src/test_quality.py +153 -0
- package/package.json +1 -1
- package/scripts/ast_grep_final_analyzer.py +5 -2
- package/scripts/ast_grep_unified_registry.py +170 -16
- package/scripts/csr-status +190 -45
- package/scripts/import-conversations-unified.py +10 -5
- package/scripts/session_quality_tracker.py +221 -41
package/mcp-server/src/server.py
CHANGED
|
@@ -11,6 +11,8 @@ import time
|
|
|
11
11
|
import logging
|
|
12
12
|
import math
|
|
13
13
|
from xml.sax.saxutils import escape
|
|
14
|
+
from collections import defaultdict, Counter
|
|
15
|
+
import aiofiles
|
|
14
16
|
|
|
15
17
|
from fastmcp import FastMCP, Context
|
|
16
18
|
|
|
@@ -30,6 +32,8 @@ from .temporal_utils import SessionDetector, TemporalParser, WorkSession, group_
|
|
|
30
32
|
from .temporal_tools import register_temporal_tools
|
|
31
33
|
from .search_tools import register_search_tools
|
|
32
34
|
from .reflection_tools import register_reflection_tools
|
|
35
|
+
from .mode_switch_tool import register_mode_switch_tool
|
|
36
|
+
from .code_reload_tool import register_code_reload_tool
|
|
33
37
|
from pydantic import BaseModel, Field
|
|
34
38
|
from qdrant_client import AsyncQdrantClient, models
|
|
35
39
|
from qdrant_client.models import (
|
|
@@ -77,6 +81,9 @@ logging.basicConfig(
|
|
|
77
81
|
logging.StreamHandler()
|
|
78
82
|
]
|
|
79
83
|
)
|
|
84
|
+
|
|
85
|
+
# Setup logger early to avoid NameError
|
|
86
|
+
logger = logging.getLogger(__name__)
|
|
80
87
|
DECAY_SCALE_DAYS = float(os.getenv('DECAY_SCALE_DAYS', '90'))
|
|
81
88
|
USE_NATIVE_DECAY = os.getenv('USE_NATIVE_DECAY', 'false').lower() == 'true'
|
|
82
89
|
|
|
@@ -122,10 +129,51 @@ except ImportError:
|
|
|
122
129
|
DECAY_MANAGER_AVAILABLE = False
|
|
123
130
|
logging.warning("Decay manager module not available")
|
|
124
131
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
132
|
+
class EmbeddingState:
|
|
133
|
+
"""Manages embedding state without global variables."""
|
|
134
|
+
def __init__(self):
|
|
135
|
+
self.embedding_manager = None
|
|
136
|
+
self.voyage_client = None # Keep for backward compatibility
|
|
137
|
+
self.local_embedding_model = None # Keep for backward compatibility
|
|
138
|
+
self._initialized = False
|
|
139
|
+
|
|
140
|
+
def initialize_embeddings(self):
|
|
141
|
+
"""Initialize embedding models with robust fallback."""
|
|
142
|
+
if self._initialized:
|
|
143
|
+
return True
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
self.embedding_manager = get_embedding_manager()
|
|
147
|
+
logger.info(f"Embedding manager initialized: {self.embedding_manager.get_model_info()}")
|
|
148
|
+
|
|
149
|
+
# Set backward compatibility references
|
|
150
|
+
if self.embedding_manager.model_type == 'voyage':
|
|
151
|
+
self.voyage_client = self.embedding_manager.voyage_client
|
|
152
|
+
elif self.embedding_manager.model_type == 'local':
|
|
153
|
+
self.local_embedding_model = self.embedding_manager.local_model
|
|
154
|
+
|
|
155
|
+
self._initialized = True
|
|
156
|
+
return True
|
|
157
|
+
except Exception as e:
|
|
158
|
+
logger.error(f"Failed to initialize embeddings: {e}")
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
class IndexingState:
|
|
162
|
+
"""Manages indexing status without global variables."""
|
|
163
|
+
def __init__(self):
|
|
164
|
+
self.status = {
|
|
165
|
+
"last_check": 0,
|
|
166
|
+
"indexed_conversations": 0,
|
|
167
|
+
"total_conversations": 0,
|
|
168
|
+
"percentage": 100.0,
|
|
169
|
+
"backlog_count": 0,
|
|
170
|
+
"is_checking": False
|
|
171
|
+
}
|
|
172
|
+
self.cache = {"result": None, "timestamp": 0}
|
|
173
|
+
|
|
174
|
+
# Initialize state managers
|
|
175
|
+
embedding_state = EmbeddingState()
|
|
176
|
+
indexing_state = IndexingState()
|
|
129
177
|
|
|
130
178
|
# Initialize connection pool
|
|
131
179
|
qdrant_pool = None
|
|
@@ -133,38 +181,24 @@ circuit_breaker = None
|
|
|
133
181
|
|
|
134
182
|
def initialize_embeddings():
|
|
135
183
|
"""Initialize embedding models with robust fallback."""
|
|
136
|
-
|
|
137
|
-
try:
|
|
138
|
-
embedding_manager = get_embedding_manager()
|
|
139
|
-
print(f"[INFO] Embedding manager initialized: {embedding_manager.get_model_info()}")
|
|
140
|
-
|
|
141
|
-
# Set backward compatibility references
|
|
142
|
-
if embedding_manager.model_type == 'voyage':
|
|
143
|
-
voyage_client = embedding_manager.voyage_client
|
|
144
|
-
elif embedding_manager.model_type == 'local':
|
|
145
|
-
local_embedding_model = embedding_manager.local_model
|
|
146
|
-
|
|
147
|
-
return True
|
|
148
|
-
except Exception as e:
|
|
149
|
-
print(f"[ERROR] Failed to initialize embeddings: {e}")
|
|
150
|
-
return False
|
|
184
|
+
return embedding_state.initialize_embeddings()
|
|
151
185
|
|
|
152
186
|
# Debug environment loading and startup
|
|
153
187
|
# Debug environment loading and startup
|
|
154
188
|
startup_time = datetime.now(timezone.utc).isoformat()
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
189
|
+
logger.info(f"MCP Server starting at {startup_time}")
|
|
190
|
+
logger.info(f"Python: {sys.version}")
|
|
191
|
+
logger.info(f"Working directory: {os.getcwd()}")
|
|
192
|
+
logger.info(f"Script location: {__file__}")
|
|
193
|
+
logger.debug("Environment variables loaded:")
|
|
194
|
+
logger.debug(f"QDRANT_URL: {QDRANT_URL}")
|
|
195
|
+
logger.debug(f"ENABLE_MEMORY_DECAY: {ENABLE_MEMORY_DECAY}")
|
|
196
|
+
logger.debug(f"USE_NATIVE_DECAY: {USE_NATIVE_DECAY}")
|
|
197
|
+
logger.debug(f"DECAY_WEIGHT: {DECAY_WEIGHT}")
|
|
198
|
+
logger.debug(f"DECAY_SCALE_DAYS: {DECAY_SCALE_DAYS}")
|
|
199
|
+
logger.debug(f"PREFER_LOCAL_EMBEDDINGS: {PREFER_LOCAL_EMBEDDINGS}")
|
|
200
|
+
logger.debug(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
|
|
201
|
+
logger.debug(f"env_path: {env_path}")
|
|
168
202
|
|
|
169
203
|
|
|
170
204
|
class SearchResult(BaseModel):
|
|
@@ -205,19 +239,19 @@ if CONNECTION_POOL_AVAILABLE and ENABLE_PARALLEL_SEARCH:
|
|
|
205
239
|
# Create a wrapper for backward compatibility
|
|
206
240
|
qdrant_client = AsyncQdrantClient(url=QDRANT_URL)
|
|
207
241
|
circuit_breaker = CircuitBreaker(failure_threshold=5, recovery_timeout=60.0)
|
|
208
|
-
|
|
242
|
+
logger.info(f"Connection pool initialized with size {POOL_SIZE}")
|
|
209
243
|
else:
|
|
210
244
|
# Fallback to single client
|
|
211
245
|
qdrant_client = AsyncQdrantClient(url=QDRANT_URL)
|
|
212
246
|
qdrant_pool = None
|
|
213
247
|
circuit_breaker = None
|
|
214
|
-
|
|
248
|
+
logger.info("Using single Qdrant client (no pooling)")
|
|
215
249
|
|
|
216
250
|
# Initialize decay manager if available
|
|
217
251
|
decay_manager = None
|
|
218
252
|
if DECAY_MANAGER_AVAILABLE:
|
|
219
253
|
decay_manager = DecayManager()
|
|
220
|
-
|
|
254
|
+
logger.info("Decay manager initialized")
|
|
221
255
|
|
|
222
256
|
# Add MCP Resources for system status
|
|
223
257
|
@mcp.resource("status://import-stats")
|
|
@@ -275,11 +309,11 @@ async def get_system_health():
|
|
|
275
309
|
|
|
276
310
|
# Check embedding configuration
|
|
277
311
|
embedding_info = {}
|
|
278
|
-
if embedding_manager:
|
|
312
|
+
if embedding_state.embedding_manager:
|
|
279
313
|
embedding_info = {
|
|
280
|
-
"model_type": embedding_manager.model_type,
|
|
281
|
-
"model_name": embedding_manager.model_name,
|
|
282
|
-
"dimension": embedding_manager.dimension
|
|
314
|
+
"model_type": embedding_state.embedding_manager.model_type,
|
|
315
|
+
"model_name": embedding_state.embedding_manager.model_name,
|
|
316
|
+
"dimension": embedding_state.embedding_manager.dimension
|
|
283
317
|
}
|
|
284
318
|
|
|
285
319
|
return json.dumps({
|
|
@@ -303,21 +337,10 @@ async def get_system_health():
|
|
|
303
337
|
}
|
|
304
338
|
}, indent=2)
|
|
305
339
|
|
|
306
|
-
#
|
|
307
|
-
indexing_status =
|
|
308
|
-
|
|
309
|
-
"indexed_conversations": 0,
|
|
310
|
-
"total_conversations": 0,
|
|
311
|
-
"percentage": 100.0,
|
|
312
|
-
"backlog_count": 0,
|
|
313
|
-
"is_checking": False
|
|
314
|
-
}
|
|
340
|
+
# Legacy support for old variable names
|
|
341
|
+
indexing_status = indexing_state.status
|
|
342
|
+
_indexing_cache = indexing_state.cache
|
|
315
343
|
|
|
316
|
-
# Cache for indexing status (5-second TTL)
|
|
317
|
-
_indexing_cache = {"result": None, "timestamp": 0}
|
|
318
|
-
|
|
319
|
-
# Setup logger
|
|
320
|
-
logger = logging.getLogger(__name__)
|
|
321
344
|
logger.info(f"MCP Server starting - Log file: {LOG_FILE}")
|
|
322
345
|
logger.info(f"Configuration: QDRANT_URL={QDRANT_URL}, DECAY={ENABLE_MEMORY_DECAY}, VOYAGE_API_STATUS={'Configured' if VOYAGE_API_KEY else 'Not Configured'}")
|
|
323
346
|
|
|
@@ -335,166 +358,218 @@ def normalize_path(path_str: str) -> str:
|
|
|
335
358
|
p = Path(path_str).expanduser().resolve()
|
|
336
359
|
return str(p).replace('\\', '/') # Consistent separators for all platforms
|
|
337
360
|
|
|
361
|
+
|
|
362
|
+
async def read_json_file(path: Path) -> dict:
|
|
363
|
+
"""Read JSON file from disk."""
|
|
364
|
+
async with aiofiles.open(path, 'r') as f:
|
|
365
|
+
content = await f.read()
|
|
366
|
+
return json.loads(content)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
async def read_watcher_file(path: Path) -> dict:
|
|
370
|
+
"""Read watcher JSON file from disk."""
|
|
371
|
+
async with aiofiles.open(path, 'r') as f:
|
|
372
|
+
content = await f.read()
|
|
373
|
+
return json.loads(content)
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
async def read_cloud_file(path: Path) -> dict:
|
|
377
|
+
"""Read cloud watcher JSON file from disk."""
|
|
378
|
+
async with aiofiles.open(path, 'r') as f:
|
|
379
|
+
content = await f.read()
|
|
380
|
+
return json.loads(content)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
async def _load_state_files() -> tuple[set[str], dict[str, dict]]:
|
|
384
|
+
"""Load and merge all state files to get imported file tracking."""
|
|
385
|
+
all_imported_files = set()
|
|
386
|
+
file_metadata = {}
|
|
387
|
+
|
|
388
|
+
# 1. Check imported-files.json (batch importer)
|
|
389
|
+
possible_paths = [
|
|
390
|
+
Path.home() / ".claude-self-reflect" / "config" / "imported-files.json",
|
|
391
|
+
Path(__file__).parent.parent.parent / "config" / "imported-files.json",
|
|
392
|
+
Path("/config/imported-files.json") # Docker path if running in container
|
|
393
|
+
]
|
|
394
|
+
|
|
395
|
+
for path in possible_paths:
|
|
396
|
+
if path.exists():
|
|
397
|
+
try:
|
|
398
|
+
imported_data = await read_json_file(path)
|
|
399
|
+
imported_files_dict = imported_data.get("imported_files", {})
|
|
400
|
+
file_metadata.update(imported_data.get("file_metadata", {}))
|
|
401
|
+
# Normalize paths before adding to set
|
|
402
|
+
normalized_files = {normalize_path(k) for k in imported_files_dict.keys()}
|
|
403
|
+
all_imported_files.update(normalized_files)
|
|
404
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
405
|
+
logger.debug(f"Failed to read state file {path}: {e}")
|
|
406
|
+
pass # Continue if file is corrupted
|
|
407
|
+
|
|
408
|
+
# 2. Check csr-watcher.json (streaming watcher - local mode)
|
|
409
|
+
watcher_paths = [
|
|
410
|
+
Path.home() / ".claude-self-reflect" / "config" / "csr-watcher.json",
|
|
411
|
+
Path("/config/csr-watcher.json") # Docker path
|
|
412
|
+
]
|
|
413
|
+
|
|
414
|
+
for path in watcher_paths:
|
|
415
|
+
if path.exists():
|
|
416
|
+
try:
|
|
417
|
+
watcher_data = await read_watcher_file(path)
|
|
418
|
+
watcher_files = watcher_data.get("imported_files", {})
|
|
419
|
+
# Normalize paths before adding to set
|
|
420
|
+
normalized_files = {normalize_path(k) for k in watcher_files.keys()}
|
|
421
|
+
all_imported_files.update(normalized_files)
|
|
422
|
+
# Add to metadata with normalized paths
|
|
423
|
+
for file_path, info in watcher_files.items():
|
|
424
|
+
normalized = normalize_path(file_path)
|
|
425
|
+
if normalized not in file_metadata:
|
|
426
|
+
file_metadata[normalized] = {
|
|
427
|
+
"position": 1,
|
|
428
|
+
"chunks": info.get("chunks", 0)
|
|
429
|
+
}
|
|
430
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
431
|
+
logger.debug(f"Failed to read watcher state file {path}: {e}")
|
|
432
|
+
pass # Continue if file is corrupted
|
|
433
|
+
|
|
434
|
+
# 3. Check csr-watcher-cloud.json (streaming watcher - cloud mode)
|
|
435
|
+
cloud_watcher_path = Path.home() / ".claude-self-reflect" / "config" / "csr-watcher-cloud.json"
|
|
436
|
+
if cloud_watcher_path.exists():
|
|
437
|
+
try:
|
|
438
|
+
cloud_data = await read_cloud_file(cloud_watcher_path)
|
|
439
|
+
cloud_files = cloud_data.get("imported_files", {})
|
|
440
|
+
# Normalize paths before adding to set
|
|
441
|
+
normalized_files = {normalize_path(k) for k in cloud_files.keys()}
|
|
442
|
+
all_imported_files.update(normalized_files)
|
|
443
|
+
# Add to metadata with normalized paths
|
|
444
|
+
for file_path, info in cloud_files.items():
|
|
445
|
+
normalized = normalize_path(file_path)
|
|
446
|
+
if normalized not in file_metadata:
|
|
447
|
+
file_metadata[normalized] = {
|
|
448
|
+
"position": 1,
|
|
449
|
+
"chunks": info.get("chunks", 0)
|
|
450
|
+
}
|
|
451
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
452
|
+
logger.debug(f"Failed to read cloud watcher state file {cloud_watcher_path}: {e}")
|
|
453
|
+
pass # Continue if file is corrupted
|
|
454
|
+
|
|
455
|
+
return all_imported_files, file_metadata
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def _is_file_imported(file_path: Path, imported_files_list: list[str], file_metadata: dict[str, dict]) -> bool:
|
|
459
|
+
"""Check if a file has been imported using multiple path matching strategies."""
|
|
460
|
+
# Normalize the current file path for consistent comparison
|
|
461
|
+
normalized_file = normalize_path(str(file_path))
|
|
462
|
+
|
|
463
|
+
# Try multiple path formats to match Docker's state file
|
|
464
|
+
file_str = str(file_path).replace(str(Path.home()), "/logs").replace("\\", "/")
|
|
465
|
+
# Also try without .claude/projects prefix (Docker mounts directly)
|
|
466
|
+
file_str_alt = file_str.replace("/.claude/projects", "")
|
|
467
|
+
|
|
468
|
+
# Normalize alternative paths as well
|
|
469
|
+
normalized_alt = normalize_path(file_str)
|
|
470
|
+
normalized_alt2 = normalize_path(file_str_alt)
|
|
471
|
+
|
|
472
|
+
# Check if file is in imported_files list (fully imported)
|
|
473
|
+
if any(path in imported_files_list for path in [normalized_file, normalized_alt, normalized_alt2]):
|
|
474
|
+
return True
|
|
475
|
+
|
|
476
|
+
# Or if it has metadata with position > 0 (partially imported)
|
|
477
|
+
return any(
|
|
478
|
+
path in file_metadata and file_metadata[path].get("position", 0) > 0
|
|
479
|
+
for path in [normalized_file, normalized_alt, normalized_alt2]
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def _should_skip_indexing_check(cache: dict, status: dict, current_time: float, cache_ttl: int) -> bool:
|
|
484
|
+
"""Check if indexing status update should be skipped due to cache or rate limiting."""
|
|
485
|
+
# Check cache first (5-second TTL to prevent performance issues)
|
|
486
|
+
if cache["result"] and current_time - cache["timestamp"] < cache_ttl:
|
|
487
|
+
# Use cached result
|
|
488
|
+
status.update(cache["result"])
|
|
489
|
+
return True
|
|
490
|
+
|
|
491
|
+
# Don't run concurrent checks
|
|
492
|
+
if status["is_checking"]:
|
|
493
|
+
return True
|
|
494
|
+
|
|
495
|
+
# Check immediately on first call, then every 60 seconds to avoid overhead
|
|
496
|
+
if status["last_check"] > 0 and current_time - status["last_check"] < 60: # 1 minute
|
|
497
|
+
return True
|
|
498
|
+
|
|
499
|
+
return False
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
async def _count_indexed_files() -> tuple[int, int]:
|
|
503
|
+
"""Count total JSONL files and how many have been indexed."""
|
|
504
|
+
projects_dir = Path.home() / ".claude" / "projects"
|
|
505
|
+
total_files = 0
|
|
506
|
+
indexed_files = 0
|
|
507
|
+
|
|
508
|
+
if projects_dir.exists():
|
|
509
|
+
# Get all JSONL files
|
|
510
|
+
jsonl_files = list(projects_dir.glob("**/*.jsonl"))
|
|
511
|
+
total_files = len(jsonl_files)
|
|
512
|
+
|
|
513
|
+
# Load state from all tracking files
|
|
514
|
+
all_imported_files, file_metadata = await _load_state_files()
|
|
515
|
+
imported_files_list = list(all_imported_files)
|
|
516
|
+
|
|
517
|
+
# Count files that have been imported
|
|
518
|
+
for file_path in jsonl_files:
|
|
519
|
+
if _is_file_imported(file_path, imported_files_list, file_metadata):
|
|
520
|
+
indexed_files += 1
|
|
521
|
+
|
|
522
|
+
return total_files, indexed_files
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def _update_status_metrics(status: dict, cache: dict, current_time: float,
|
|
526
|
+
total_files: int, indexed_files: int) -> None:
|
|
527
|
+
"""Update the status metrics and cache."""
|
|
528
|
+
# Update status
|
|
529
|
+
status["last_check"] = current_time
|
|
530
|
+
status["total_conversations"] = total_files
|
|
531
|
+
status["indexed_conversations"] = indexed_files
|
|
532
|
+
status["backlog_count"] = total_files - indexed_files
|
|
533
|
+
|
|
534
|
+
if total_files > 0:
|
|
535
|
+
status["percentage"] = (indexed_files / total_files) * 100
|
|
536
|
+
else:
|
|
537
|
+
status["percentage"] = 100.0
|
|
538
|
+
|
|
539
|
+
# Update cache
|
|
540
|
+
cache["result"] = status.copy()
|
|
541
|
+
cache["timestamp"] = current_time
|
|
542
|
+
|
|
543
|
+
|
|
338
544
|
async def update_indexing_status(cache_ttl: int = 5):
|
|
339
545
|
"""Update indexing status by checking JSONL files vs Qdrant collections.
|
|
340
546
|
This is a lightweight check that compares file counts, not full content.
|
|
341
|
-
|
|
547
|
+
|
|
342
548
|
Args:
|
|
343
549
|
cache_ttl: Cache time-to-live in seconds (default: 5)
|
|
344
550
|
"""
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
# Check cache first (5-second TTL to prevent performance issues)
|
|
551
|
+
status = indexing_state.status
|
|
552
|
+
cache = indexing_state.cache
|
|
348
553
|
current_time = time.time()
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
return
|
|
353
|
-
|
|
354
|
-
# Don't run concurrent checks
|
|
355
|
-
if indexing_status["is_checking"]:
|
|
356
|
-
return
|
|
357
|
-
|
|
358
|
-
# Check immediately on first call, then every 60 seconds to avoid overhead
|
|
359
|
-
if indexing_status["last_check"] > 0 and current_time - indexing_status["last_check"] < 60: # 1 minute
|
|
554
|
+
|
|
555
|
+
# Check if we should skip this update
|
|
556
|
+
if _should_skip_indexing_check(cache, status, current_time, cache_ttl):
|
|
360
557
|
return
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
558
|
+
|
|
559
|
+
status["is_checking"] = True
|
|
560
|
+
|
|
364
561
|
try:
|
|
365
|
-
# Count
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
# Get all JSONL files
|
|
372
|
-
jsonl_files = list(projects_dir.glob("**/*.jsonl"))
|
|
373
|
-
total_files = len(jsonl_files)
|
|
374
|
-
|
|
375
|
-
# Check imported-files.json AND watcher state files to see what's been imported
|
|
376
|
-
# The system uses multiple state files that need to be merged
|
|
377
|
-
all_imported_files = set() # Use set to avoid duplicates
|
|
378
|
-
file_metadata = {}
|
|
379
|
-
|
|
380
|
-
# 1. Check imported-files.json (batch importer)
|
|
381
|
-
possible_paths = [
|
|
382
|
-
Path.home() / ".claude-self-reflect" / "config" / "imported-files.json",
|
|
383
|
-
Path(__file__).parent.parent.parent / "config" / "imported-files.json",
|
|
384
|
-
Path("/config/imported-files.json") # Docker path if running in container
|
|
385
|
-
]
|
|
386
|
-
|
|
387
|
-
for path in possible_paths:
|
|
388
|
-
if path.exists():
|
|
389
|
-
try:
|
|
390
|
-
with open(path, 'r') as f:
|
|
391
|
-
imported_data = json.load(f)
|
|
392
|
-
imported_files_dict = imported_data.get("imported_files", {})
|
|
393
|
-
file_metadata.update(imported_data.get("file_metadata", {}))
|
|
394
|
-
# Normalize paths before adding to set
|
|
395
|
-
normalized_files = {normalize_path(k) for k in imported_files_dict.keys()}
|
|
396
|
-
all_imported_files.update(normalized_files)
|
|
397
|
-
except (json.JSONDecodeError, IOError) as e:
|
|
398
|
-
logger.debug(f"Failed to read state file {path}: {e}")
|
|
399
|
-
pass # Continue if file is corrupted
|
|
400
|
-
|
|
401
|
-
# 2. Check csr-watcher.json (streaming watcher - local mode)
|
|
402
|
-
watcher_paths = [
|
|
403
|
-
Path.home() / ".claude-self-reflect" / "config" / "csr-watcher.json",
|
|
404
|
-
Path("/config/csr-watcher.json") # Docker path
|
|
405
|
-
]
|
|
406
|
-
|
|
407
|
-
for path in watcher_paths:
|
|
408
|
-
if path.exists():
|
|
409
|
-
try:
|
|
410
|
-
with open(path, 'r') as f:
|
|
411
|
-
watcher_data = json.load(f)
|
|
412
|
-
watcher_files = watcher_data.get("imported_files", {})
|
|
413
|
-
# Normalize paths before adding to set
|
|
414
|
-
normalized_files = {normalize_path(k) for k in watcher_files.keys()}
|
|
415
|
-
all_imported_files.update(normalized_files)
|
|
416
|
-
# Add to metadata with normalized paths
|
|
417
|
-
for file_path, info in watcher_files.items():
|
|
418
|
-
normalized = normalize_path(file_path)
|
|
419
|
-
if normalized not in file_metadata:
|
|
420
|
-
file_metadata[normalized] = {
|
|
421
|
-
"position": 1,
|
|
422
|
-
"chunks": info.get("chunks", 0)
|
|
423
|
-
}
|
|
424
|
-
except (json.JSONDecodeError, IOError) as e:
|
|
425
|
-
logger.debug(f"Failed to read watcher state file {path}: {e}")
|
|
426
|
-
pass # Continue if file is corrupted
|
|
427
|
-
|
|
428
|
-
# 3. Check csr-watcher-cloud.json (streaming watcher - cloud mode)
|
|
429
|
-
cloud_watcher_path = Path.home() / ".claude-self-reflect" / "config" / "csr-watcher-cloud.json"
|
|
430
|
-
if cloud_watcher_path.exists():
|
|
431
|
-
try:
|
|
432
|
-
with open(cloud_watcher_path, 'r') as f:
|
|
433
|
-
cloud_data = json.load(f)
|
|
434
|
-
cloud_files = cloud_data.get("imported_files", {})
|
|
435
|
-
# Normalize paths before adding to set
|
|
436
|
-
normalized_files = {normalize_path(k) for k in cloud_files.keys()}
|
|
437
|
-
all_imported_files.update(normalized_files)
|
|
438
|
-
# Add to metadata with normalized paths
|
|
439
|
-
for file_path, info in cloud_files.items():
|
|
440
|
-
normalized = normalize_path(file_path)
|
|
441
|
-
if normalized not in file_metadata:
|
|
442
|
-
file_metadata[normalized] = {
|
|
443
|
-
"position": 1,
|
|
444
|
-
"chunks": info.get("chunks", 0)
|
|
445
|
-
}
|
|
446
|
-
except (json.JSONDecodeError, IOError) as e:
|
|
447
|
-
logger.debug(f"Failed to read cloud watcher state file {cloud_watcher_path}: {e}")
|
|
448
|
-
pass # Continue if file is corrupted
|
|
449
|
-
|
|
450
|
-
# Convert set to list for compatibility
|
|
451
|
-
imported_files_list = list(all_imported_files)
|
|
452
|
-
|
|
453
|
-
# Count files that have been imported
|
|
454
|
-
for file_path in jsonl_files:
|
|
455
|
-
# Normalize the current file path for consistent comparison
|
|
456
|
-
normalized_file = normalize_path(str(file_path))
|
|
457
|
-
|
|
458
|
-
# Try multiple path formats to match Docker's state file
|
|
459
|
-
file_str = str(file_path).replace(str(Path.home()), "/logs").replace("\\", "/")
|
|
460
|
-
# Also try without .claude/projects prefix (Docker mounts directly)
|
|
461
|
-
file_str_alt = file_str.replace("/.claude/projects", "")
|
|
462
|
-
|
|
463
|
-
# Normalize alternative paths as well
|
|
464
|
-
normalized_alt = normalize_path(file_str)
|
|
465
|
-
normalized_alt2 = normalize_path(file_str_alt)
|
|
466
|
-
|
|
467
|
-
# Check if file is in imported_files list (fully imported)
|
|
468
|
-
if normalized_file in imported_files_list or normalized_alt in imported_files_list or normalized_alt2 in imported_files_list:
|
|
469
|
-
indexed_files += 1
|
|
470
|
-
# Or if it has metadata with position > 0 (partially imported)
|
|
471
|
-
elif normalized_file in file_metadata and file_metadata[normalized_file].get("position", 0) > 0:
|
|
472
|
-
indexed_files += 1
|
|
473
|
-
elif normalized_alt in file_metadata and file_metadata[normalized_alt].get("position", 0) > 0:
|
|
474
|
-
indexed_files += 1
|
|
475
|
-
elif normalized_alt2 in file_metadata and file_metadata[normalized_alt2].get("position", 0) > 0:
|
|
476
|
-
indexed_files += 1
|
|
477
|
-
|
|
478
|
-
# Update status
|
|
479
|
-
indexing_status["last_check"] = current_time
|
|
480
|
-
indexing_status["total_conversations"] = total_files
|
|
481
|
-
indexing_status["indexed_conversations"] = indexed_files
|
|
482
|
-
indexing_status["backlog_count"] = total_files - indexed_files
|
|
483
|
-
|
|
484
|
-
if total_files > 0:
|
|
485
|
-
indexing_status["percentage"] = (indexed_files / total_files) * 100
|
|
486
|
-
else:
|
|
487
|
-
indexing_status["percentage"] = 100.0
|
|
488
|
-
|
|
489
|
-
# Update cache
|
|
490
|
-
_indexing_cache["result"] = indexing_status.copy()
|
|
491
|
-
_indexing_cache["timestamp"] = current_time
|
|
492
|
-
|
|
562
|
+
# Count files and their indexing status
|
|
563
|
+
total_files, indexed_files = await _count_indexed_files()
|
|
564
|
+
|
|
565
|
+
# Update all metrics
|
|
566
|
+
_update_status_metrics(status, cache, current_time, total_files, indexed_files)
|
|
567
|
+
|
|
493
568
|
except Exception as e:
|
|
494
|
-
|
|
569
|
+
logger.warning(f"Failed to update indexing status: {e}")
|
|
495
570
|
logger.error(f"Failed to update indexing status: {e}", exc_info=True)
|
|
496
571
|
finally:
|
|
497
|
-
|
|
572
|
+
status["is_checking"] = False
|
|
498
573
|
|
|
499
574
|
async def get_all_collections() -> List[str]:
|
|
500
575
|
"""Get all collections (both Voyage and local)."""
|
|
@@ -505,40 +580,38 @@ async def get_all_collections() -> List[str]:
|
|
|
505
580
|
|
|
506
581
|
async def generate_embedding(text: str, force_type: Optional[str] = None) -> List[float]:
|
|
507
582
|
"""Generate embedding using configured provider or forced type.
|
|
508
|
-
|
|
583
|
+
|
|
509
584
|
Args:
|
|
510
585
|
text: Text to embed
|
|
511
586
|
force_type: Force specific embedding type ('local' or 'voyage')
|
|
512
587
|
"""
|
|
513
|
-
global embedding_manager, voyage_client, local_embedding_model
|
|
514
|
-
|
|
515
588
|
# Initialize on first use
|
|
516
|
-
if embedding_manager is None:
|
|
517
|
-
if not initialize_embeddings():
|
|
589
|
+
if embedding_state.embedding_manager is None:
|
|
590
|
+
if not embedding_state.initialize_embeddings():
|
|
518
591
|
raise RuntimeError("Failed to initialize any embedding model. Check logs for details.")
|
|
519
|
-
|
|
592
|
+
|
|
520
593
|
# Determine which type to use
|
|
521
594
|
if force_type:
|
|
522
595
|
use_local = force_type == 'local'
|
|
523
596
|
else:
|
|
524
|
-
use_local = embedding_manager.model_type == 'local'
|
|
525
|
-
|
|
597
|
+
use_local = embedding_state.embedding_manager.model_type == 'local'
|
|
598
|
+
|
|
526
599
|
if use_local:
|
|
527
600
|
# Use local embeddings
|
|
528
|
-
if not local_embedding_model:
|
|
601
|
+
if not embedding_state.local_embedding_model:
|
|
529
602
|
raise ValueError("Local embedding model not available")
|
|
530
|
-
|
|
603
|
+
|
|
531
604
|
# Run in executor since fastembed is synchronous
|
|
532
605
|
loop = asyncio.get_event_loop()
|
|
533
606
|
embeddings = await loop.run_in_executor(
|
|
534
|
-
None, lambda: list(local_embedding_model.embed([text]))
|
|
607
|
+
None, lambda: list(embedding_state.local_embedding_model.embed([text]))
|
|
535
608
|
)
|
|
536
609
|
return embeddings[0].tolist()
|
|
537
610
|
else:
|
|
538
611
|
# Use Voyage AI
|
|
539
|
-
if not voyage_client:
|
|
612
|
+
if not embedding_state.voyage_client:
|
|
540
613
|
raise ValueError("Voyage client not available")
|
|
541
|
-
result = voyage_client.embed(
|
|
614
|
+
result = embedding_state.voyage_client.embed(
|
|
542
615
|
texts=[text],
|
|
543
616
|
model="voyage-3-large",
|
|
544
617
|
input_type="query"
|
|
@@ -547,7 +620,7 @@ async def generate_embedding(text: str, force_type: Optional[str] = None) -> Lis
|
|
|
547
620
|
|
|
548
621
|
def get_embedding_dimension() -> int:
|
|
549
622
|
"""Get the dimension of embeddings based on the provider."""
|
|
550
|
-
if PREFER_LOCAL_EMBEDDINGS or not voyage_client:
|
|
623
|
+
if PREFER_LOCAL_EMBEDDINGS or not embedding_state.voyage_client:
|
|
551
624
|
# all-MiniLM-L6-v2 produces 384-dimensional embeddings
|
|
552
625
|
return 384
|
|
553
626
|
else:
|
|
@@ -557,8 +630,8 @@ def get_embedding_dimension() -> int:
|
|
|
557
630
|
def get_collection_suffix() -> str:
|
|
558
631
|
"""Get the collection suffix based on embedding provider."""
|
|
559
632
|
# Use embedding_manager's model type if available
|
|
560
|
-
if embedding_manager and hasattr(embedding_manager, 'model_type'):
|
|
561
|
-
if embedding_manager.model_type == 'voyage':
|
|
633
|
+
if embedding_state.embedding_manager and hasattr(embedding_state.embedding_manager, 'model_type'):
|
|
634
|
+
if embedding_state.embedding_manager.model_type == 'voyage':
|
|
562
635
|
return "_voyage"
|
|
563
636
|
else:
|
|
564
637
|
return "_local"
|
|
@@ -571,28 +644,21 @@ def get_collection_suffix() -> str:
|
|
|
571
644
|
def aggregate_pattern_intelligence(results: List[SearchResult]) -> Dict[str, Any]:
|
|
572
645
|
"""Aggregate pattern intelligence across search results."""
|
|
573
646
|
|
|
574
|
-
# Initialize counters
|
|
575
|
-
all_patterns =
|
|
647
|
+
# Initialize counters using efficient data structures
|
|
648
|
+
all_patterns = Counter()
|
|
576
649
|
all_files = set()
|
|
577
650
|
all_tools = set()
|
|
578
651
|
all_concepts = set()
|
|
579
|
-
pattern_by_category =
|
|
580
|
-
|
|
652
|
+
pattern_by_category = defaultdict(lambda: defaultdict(int))
|
|
653
|
+
|
|
581
654
|
for result in results:
|
|
582
|
-
# Aggregate code patterns
|
|
655
|
+
# Aggregate code patterns efficiently using Counter operations
|
|
583
656
|
if result.code_patterns:
|
|
584
657
|
for category, patterns in result.code_patterns.items():
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
pattern_by_category[category][pattern] = 0
|
|
590
|
-
pattern_by_category[category][pattern] += 1
|
|
591
|
-
|
|
592
|
-
# Overall pattern count
|
|
593
|
-
if pattern not in all_patterns:
|
|
594
|
-
all_patterns[pattern] = 0
|
|
595
|
-
all_patterns[pattern] += 1
|
|
658
|
+
# Use Counter for efficient bulk updates
|
|
659
|
+
pattern_counter = Counter(patterns)
|
|
660
|
+
pattern_by_category[category].update(pattern_counter)
|
|
661
|
+
all_patterns.update(pattern_counter)
|
|
596
662
|
|
|
597
663
|
# Aggregate files
|
|
598
664
|
if result.files_analyzed:
|
|
@@ -625,7 +691,7 @@ def aggregate_pattern_intelligence(results: List[SearchResult]) -> Dict[str, Any
|
|
|
625
691
|
"files_referenced": list(all_files)[:20], # Limit to top 20
|
|
626
692
|
"tools_used": list(all_tools),
|
|
627
693
|
"concepts_discussed": list(all_concepts)[:15], # Limit to top 15
|
|
628
|
-
"pattern_by_category": pattern_by_category,
|
|
694
|
+
"pattern_by_category": {k: dict(v) for k, v in pattern_by_category.items()},
|
|
629
695
|
"pattern_diversity_score": len(all_patterns) / max(len(results), 1) # Patterns per result
|
|
630
696
|
}
|
|
631
697
|
|
|
@@ -660,7 +726,7 @@ register_temporal_tools(
|
|
|
660
726
|
initialize_embeddings,
|
|
661
727
|
normalize_project_name
|
|
662
728
|
)
|
|
663
|
-
|
|
729
|
+
logger.info("Temporal tools registered")
|
|
664
730
|
|
|
665
731
|
# Register search tools
|
|
666
732
|
def get_embedding_manager():
|
|
@@ -698,6 +764,18 @@ register_reflection_tools(
|
|
|
698
764
|
normalize_project_name
|
|
699
765
|
)
|
|
700
766
|
|
|
767
|
+
# Register mode switching tools
|
|
768
|
+
register_mode_switch_tool(
|
|
769
|
+
mcp,
|
|
770
|
+
get_embedding_manager
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
# Register code reload tools
|
|
774
|
+
register_code_reload_tool(
|
|
775
|
+
mcp,
|
|
776
|
+
get_embedding_manager
|
|
777
|
+
)
|
|
778
|
+
|
|
701
779
|
# Run the server
|
|
702
780
|
if __name__ == "__main__":
|
|
703
781
|
import sys
|
|
@@ -715,14 +793,14 @@ if __name__ == "__main__":
|
|
|
715
793
|
status_copy["last_check"] = datetime.fromtimestamp(status_copy["last_check"]).isoformat()
|
|
716
794
|
else:
|
|
717
795
|
status_copy["last_check"] = None
|
|
718
|
-
|
|
796
|
+
logger.info(json.dumps(status_copy, indent=2))
|
|
719
797
|
|
|
720
798
|
asyncio.run(print_status())
|
|
721
799
|
sys.exit(0)
|
|
722
800
|
|
|
723
801
|
# Normal MCP server operation
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
802
|
+
logger.info("Starting FastMCP server in stdio mode...")
|
|
803
|
+
logger.info(f"Server name: {mcp.name}")
|
|
804
|
+
logger.info("Calling mcp.run()...")
|
|
727
805
|
mcp.run()
|
|
728
|
-
|
|
806
|
+
logger.info("Server exited normally")
|