claude-self-reflect 2.7.3 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,8 +30,11 @@ RUN mkdir -p /root/.cache/fastembed && \
30
30
  # Set working directory
31
31
  WORKDIR /app
32
32
 
33
- # Copy scripts
34
- COPY scripts/ /scripts/
33
+ # Copy application scripts
34
+ COPY scripts/ /app/scripts/
35
+
36
+ # Make watcher-loop.sh executable
37
+ RUN chmod +x /app/scripts/watcher-loop.sh
35
38
 
36
39
  # Create config directory
37
40
  RUN mkdir -p /config
@@ -41,4 +44,4 @@ ENV PYTHONUNBUFFERED=1
41
44
  ENV MALLOC_ARENA_MAX=2
42
45
 
43
46
  # Run the watcher loop
44
- CMD ["/scripts/watcher-loop.sh"]
47
+ CMD ["/app/scripts/watcher-loop.sh"]
package/README.md CHANGED
@@ -149,10 +149,17 @@ Here's how your conversations get imported and prioritized:
149
149
 
150
150
  ![Import Architecture](docs/diagrams/import-architecture.png)
151
151
 
152
- **The system intelligently prioritizes your conversations:**
153
- - **HOT** (< 5 minutes): Switches to 2-second intervals for near real-time import
154
- - **šŸŒ”ļø WARM** (< 24 hours): Normal priority, processed every 60 seconds
155
- - **ā„ļø COLD** (> 24 hours): Batch processed, max 5 per cycle to prevent blocking
152
+ **The system intelligently processes your conversations:**
153
+ - Runs every 60 seconds checking for new conversations
154
+ - Processes newest conversations first (delta import pattern)
155
+ - Maintains low memory usage (<50MB) through streaming
156
+ - Handles up to 5 files per cycle to prevent blocking
157
+
158
+ **HOT/WARM/COLD Intelligent Prioritization:**
159
+ - **šŸ”„ HOT** (< 5 minutes): Switches to 2-second intervals for near real-time import
160
+ - **šŸŒ”ļø WARM** (< 24 hours): Normal priority with starvation prevention (urgent after 30 min wait)
161
+ - **ā„ļø COLD** (> 24 hours): Batch processed, max 5 per cycle to prevent blocking new content
162
+ - Files are categorized by age and processed with priority queuing to ensure newest content gets imported quickly while preventing older files from being starved
156
163
 
157
164
  ## Using It
158
165
 
@@ -42,6 +42,7 @@ services:
42
42
  environment:
43
43
  - QDRANT_URL=http://qdrant:6333
44
44
  - STATE_FILE=/config/imported-files.json
45
+ - LOGS_DIR=/logs
45
46
  - OPENAI_API_KEY=${OPENAI_API_KEY:-}
46
47
  - VOYAGE_API_KEY=${VOYAGE_API_KEY:-}
47
48
  - VOYAGE_KEY=${VOYAGE_KEY:-}
@@ -176,21 +177,29 @@ services:
176
177
  - ./scripts:/scripts:ro
177
178
  environment:
178
179
  - QDRANT_URL=http://qdrant:6333
179
- - STATE_FILE=/config/watcher-state.json
180
+ - STATE_FILE=/config/csr-watcher.json
181
+ - LOGS_DIR=/logs # Fixed: Point to mounted volume
180
182
  - VOYAGE_KEY=${VOYAGE_KEY:-}
181
183
  - PREFER_LOCAL_EMBEDDINGS=${PREFER_LOCAL_EMBEDDINGS:-true}
182
- - HOT_WINDOW_MINUTES=${HOT_WINDOW_MINUTES:-15}
183
- - MAX_COLD_FILES_PER_CYCLE=${MAX_COLD_FILES_PER_CYCLE:-3}
184
- - MAX_MEMORY_MB=${MAX_MEMORY_MB:-300}
185
- - WATCH_INTERVAL_SECONDS=${WATCH_INTERVAL_SECONDS:-30}
186
- - MAX_FILES_PER_CYCLE=${MAX_FILES_PER_CYCLE:-10}
184
+ - ENABLE_MEMORY_DECAY=${ENABLE_MEMORY_DECAY:-false}
185
+ - DECAY_WEIGHT=${DECAY_WEIGHT:-0.3}
186
+ - DECAY_SCALE_DAYS=${DECAY_SCALE_DAYS:-90}
187
+ - CHECK_INTERVAL_S=${CHECK_INTERVAL_S:-60}
188
+ - HOT_CHECK_INTERVAL_S=${HOT_CHECK_INTERVAL_S:-2}
189
+ - HOT_WINDOW_MINUTES=${HOT_WINDOW_MINUTES:-5}
190
+ - WARM_WINDOW_HOURS=${WARM_WINDOW_HOURS:-24}
191
+ - MAX_COLD_FILES=${MAX_COLD_FILES:-5}
192
+ - MAX_WARM_WAIT_MINUTES=${MAX_WARM_WAIT_MINUTES:-30}
193
+ - MAX_MESSAGES_PER_CHUNK=${MAX_MESSAGES_PER_CHUNK:-10}
187
194
  - MAX_CHUNK_SIZE=${MAX_CHUNK_SIZE:-50} # Messages per chunk for streaming
195
+ - MEMORY_LIMIT_MB=${MEMORY_LIMIT_MB:-1000}
196
+ - MEMORY_WARNING_MB=${MEMORY_WARNING_MB:-500}
188
197
  - PYTHONUNBUFFERED=1
189
198
  - MALLOC_ARENA_MAX=2
190
- restart: "no" # Manual start only - prevent system overload
191
- profiles: ["safe-watch"] # Requires explicit profile to run
192
- mem_limit: 600m # Increased from 400m to handle large files safely
193
- memswap_limit: 600m
199
+ restart: unless-stopped
200
+ profiles: ["safe-watch", "watch"] # Requires explicit profile to run
201
+ mem_limit: 1g # Increased to 1GB to match MEMORY_LIMIT_MB
202
+ memswap_limit: 1g
194
203
  cpus: 1.0 # Single CPU core limit
195
204
 
196
205
  # MCP server for Claude integration
@@ -454,6 +454,26 @@ async function enrichMetadata() {
454
454
  }
455
455
  }
456
456
 
457
+ async function startWatcher() {
458
+ console.log('\nšŸ”„ Starting the streaming watcher...');
459
+ console.log(' • HOT files (<5 min): 2-second processing');
460
+ console.log(' • WARM files (<24 hrs): Normal priority');
461
+ console.log(' • COLD files (>24 hrs): Batch processing');
462
+
463
+ try {
464
+ safeExec('docker', ['compose', '--profile', 'watch', 'up', '-d', 'safe-watcher'], {
465
+ cwd: projectRoot,
466
+ stdio: 'inherit'
467
+ });
468
+ console.log('āœ… Watcher started successfully!');
469
+ return true;
470
+ } catch (error) {
471
+ console.log('āš ļø Could not start watcher automatically');
472
+ console.log(' You can start it manually with: docker compose --profile watch up -d');
473
+ return false;
474
+ }
475
+ }
476
+
457
477
  async function showFinalInstructions() {
458
478
  console.log('\nāœ… Setup complete!');
459
479
 
@@ -461,7 +481,7 @@ async function showFinalInstructions() {
461
481
  console.log(' • 🌐 Qdrant Dashboard: http://localhost:6333/dashboard/');
462
482
  console.log(' • šŸ“Š Status: All services running');
463
483
  console.log(' • šŸ” Search: Semantic search with memory decay enabled');
464
- console.log(' • šŸš€ Import: Watcher checking every 60 seconds');
484
+ console.log(' • šŸš€ Watcher: HOT/WARM/COLD prioritization active');
465
485
 
466
486
  console.log('\nšŸ“‹ Quick Reference Commands:');
467
487
  console.log(' • Check status: docker compose ps');
@@ -568,6 +588,9 @@ async function main() {
568
588
  // Enrich metadata (new in v2.5.19)
569
589
  await enrichMetadata();
570
590
 
591
+ // Start the watcher
592
+ await startWatcher();
593
+
571
594
  // Show final instructions
572
595
  await showFinalInstructions();
573
596
 
@@ -9,6 +9,7 @@ import json
9
9
  import numpy as np
10
10
  import hashlib
11
11
  import time
12
+ import logging
12
13
 
13
14
  from fastmcp import FastMCP, Context
14
15
  from .utils import normalize_project_name
@@ -124,18 +125,48 @@ indexing_status = {
124
125
  "is_checking": False
125
126
  }
126
127
 
127
- async def update_indexing_status():
128
+ # Cache for indexing status (5-second TTL)
129
+ _indexing_cache = {"result": None, "timestamp": 0}
130
+
131
+ # Setup logger
132
+ logger = logging.getLogger(__name__)
133
+
134
+ def normalize_path(path_str: str) -> str:
135
+ """Normalize path for consistent comparison across platforms.
136
+
137
+ Args:
138
+ path_str: Path string to normalize
139
+
140
+ Returns:
141
+ Normalized path string with consistent separators
142
+ """
143
+ if not path_str:
144
+ return path_str
145
+ p = Path(path_str).expanduser().resolve()
146
+ return str(p).replace('\\', '/') # Consistent separators for all platforms
147
+
148
+ async def update_indexing_status(cache_ttl: int = 5):
128
149
  """Update indexing status by checking JSONL files vs Qdrant collections.
129
- This is a lightweight check that compares file counts, not full content."""
130
- global indexing_status
150
+ This is a lightweight check that compares file counts, not full content.
151
+
152
+ Args:
153
+ cache_ttl: Cache time-to-live in seconds (default: 5)
154
+ """
155
+ global indexing_status, _indexing_cache
156
+
157
+ # Check cache first (5-second TTL to prevent performance issues)
158
+ current_time = time.time()
159
+ if _indexing_cache["result"] and current_time - _indexing_cache["timestamp"] < cache_ttl:
160
+ # Use cached result
161
+ indexing_status = _indexing_cache["result"].copy()
162
+ return
131
163
 
132
164
  # Don't run concurrent checks
133
165
  if indexing_status["is_checking"]:
134
166
  return
135
167
 
136
- # Only check every 5 minutes to avoid overhead
137
- current_time = time.time()
138
- if current_time - indexing_status["last_check"] < 300: # 5 minutes
168
+ # Check immediately on first call, then every 60 seconds to avoid overhead
169
+ if indexing_status["last_check"] > 0 and current_time - indexing_status["last_check"] < 60: # 1 minute
139
170
  return
140
171
 
141
172
  indexing_status["is_checking"] = True
@@ -151,46 +182,107 @@ async def update_indexing_status():
151
182
  jsonl_files = list(projects_dir.glob("**/*.jsonl"))
152
183
  total_files = len(jsonl_files)
153
184
 
154
- # Check imported-files.json to see what's been imported
155
- # The streaming importer uses imported-files.json with nested structure
156
- # Try multiple possible locations for the config file
185
+ # Check imported-files.json AND watcher state files to see what's been imported
186
+ # The system uses multiple state files that need to be merged
187
+ all_imported_files = set() # Use set to avoid duplicates
188
+ file_metadata = {}
189
+
190
+ # 1. Check imported-files.json (batch importer)
157
191
  possible_paths = [
158
192
  Path.home() / ".claude-self-reflect" / "config" / "imported-files.json",
159
193
  Path(__file__).parent.parent.parent / "config" / "imported-files.json",
160
194
  Path("/config/imported-files.json") # Docker path if running in container
161
195
  ]
162
196
 
163
- imported_files_path = None
164
197
  for path in possible_paths:
165
198
  if path.exists():
166
- imported_files_path = path
167
- break
199
+ try:
200
+ with open(path, 'r') as f:
201
+ imported_data = json.load(f)
202
+ imported_files_dict = imported_data.get("imported_files", {})
203
+ file_metadata.update(imported_data.get("file_metadata", {}))
204
+ # Normalize paths before adding to set
205
+ normalized_files = {normalize_path(k) for k in imported_files_dict.keys()}
206
+ all_imported_files.update(normalized_files)
207
+ except (json.JSONDecodeError, IOError) as e:
208
+ logger.debug(f"Failed to read state file {path}: {e}")
209
+ pass # Continue if file is corrupted
168
210
 
169
- if imported_files_path and imported_files_path.exists():
170
- with open(imported_files_path, 'r') as f:
171
- imported_data = json.load(f)
172
- # The actual structure has imported_files and file_metadata at the top level
173
- # NOT nested under stream_position as previously assumed
174
- imported_files_dict = imported_data.get("imported_files", {})
175
- file_metadata = imported_data.get("file_metadata", {})
176
-
177
- # Convert dict keys to list for compatibility with existing logic
178
- imported_files_list = list(imported_files_dict.keys())
211
+ # 2. Check csr-watcher.json (streaming watcher - local mode)
212
+ watcher_paths = [
213
+ Path.home() / ".claude-self-reflect" / "config" / "csr-watcher.json",
214
+ Path("/config/csr-watcher.json") # Docker path
215
+ ]
216
+
217
+ for path in watcher_paths:
218
+ if path.exists():
219
+ try:
220
+ with open(path, 'r') as f:
221
+ watcher_data = json.load(f)
222
+ watcher_files = watcher_data.get("imported_files", {})
223
+ # Normalize paths before adding to set
224
+ normalized_files = {normalize_path(k) for k in watcher_files.keys()}
225
+ all_imported_files.update(normalized_files)
226
+ # Add to metadata with normalized paths
227
+ for file_path, info in watcher_files.items():
228
+ normalized = normalize_path(file_path)
229
+ if normalized not in file_metadata:
230
+ file_metadata[normalized] = {
231
+ "position": 1,
232
+ "chunks": info.get("chunks", 0)
233
+ }
234
+ except (json.JSONDecodeError, IOError) as e:
235
+ logger.debug(f"Failed to read watcher state file {path}: {e}")
236
+ pass # Continue if file is corrupted
237
+
238
+ # 3. Check csr-watcher-cloud.json (streaming watcher - cloud mode)
239
+ cloud_watcher_path = Path.home() / ".claude-self-reflect" / "config" / "csr-watcher-cloud.json"
240
+ if cloud_watcher_path.exists():
241
+ try:
242
+ with open(cloud_watcher_path, 'r') as f:
243
+ cloud_data = json.load(f)
244
+ cloud_files = cloud_data.get("imported_files", {})
245
+ # Normalize paths before adding to set
246
+ normalized_files = {normalize_path(k) for k in cloud_files.keys()}
247
+ all_imported_files.update(normalized_files)
248
+ # Add to metadata with normalized paths
249
+ for file_path, info in cloud_files.items():
250
+ normalized = normalize_path(file_path)
251
+ if normalized not in file_metadata:
252
+ file_metadata[normalized] = {
253
+ "position": 1,
254
+ "chunks": info.get("chunks", 0)
255
+ }
256
+ except (json.JSONDecodeError, IOError) as e:
257
+ logger.debug(f"Failed to read cloud watcher state file {cloud_watcher_path}: {e}")
258
+ pass # Continue if file is corrupted
259
+
260
+ # Convert set to list for compatibility
261
+ imported_files_list = list(all_imported_files)
179
262
 
180
263
  # Count files that have been imported
181
264
  for file_path in jsonl_files:
265
+ # Normalize the current file path for consistent comparison
266
+ normalized_file = normalize_path(str(file_path))
267
+
182
268
  # Try multiple path formats to match Docker's state file
183
269
  file_str = str(file_path).replace(str(Path.home()), "/logs").replace("\\", "/")
184
270
  # Also try without .claude/projects prefix (Docker mounts directly)
185
271
  file_str_alt = file_str.replace("/.claude/projects", "")
186
272
 
273
+ # Normalize alternative paths as well
274
+ normalized_alt = normalize_path(file_str)
275
+ normalized_alt2 = normalize_path(file_str_alt)
276
+
187
277
  # Check if file is in imported_files list (fully imported)
188
- if file_str in imported_files_list or file_str_alt in imported_files_list:
278
+ if normalized_file in imported_files_list or normalized_alt in imported_files_list or normalized_alt2 in imported_files_list:
189
279
  indexed_files += 1
190
280
  # Or if it has metadata with position > 0 (partially imported)
191
- elif file_str in file_metadata and file_metadata[file_str].get("position", 0) > 0:
281
+ elif normalized_file in file_metadata and file_metadata[normalized_file].get("position", 0) > 0:
282
+ indexed_files += 1
283
+ elif normalized_alt in file_metadata and file_metadata[normalized_alt].get("position", 0) > 0:
192
284
  indexed_files += 1
193
- elif file_str_alt in file_metadata and file_metadata[file_str_alt].get("position", 0) > 0:
285
+ elif normalized_alt2 in file_metadata and file_metadata[normalized_alt2].get("position", 0) > 0:
194
286
  indexed_files += 1
195
287
 
196
288
  # Update status
@@ -203,9 +295,14 @@ async def update_indexing_status():
203
295
  indexing_status["percentage"] = (indexed_files / total_files) * 100
204
296
  else:
205
297
  indexing_status["percentage"] = 100.0
298
+
299
+ # Update cache
300
+ _indexing_cache["result"] = indexing_status.copy()
301
+ _indexing_cache["timestamp"] = current_time
206
302
 
207
303
  except Exception as e:
208
304
  print(f"[WARNING] Failed to update indexing status: {e}")
305
+ logger.error(f"Failed to update indexing status: {e}", exc_info=True)
209
306
  finally:
210
307
  indexing_status["is_checking"] = False
211
308
 
@@ -5,6 +5,7 @@ Designed for <20ms execution time to support status bars and shell scripts.
5
5
  """
6
6
 
7
7
  import json
8
+ import time
8
9
  from pathlib import Path
9
10
  from collections import defaultdict
10
11
 
@@ -53,11 +54,36 @@ def normalize_file_path(file_path: str) -> str:
53
54
  return file_path
54
55
 
55
56
 
57
+ def get_watcher_status() -> dict:
58
+ """Get streaming watcher status if available."""
59
+ watcher_state_file = Path.home() / "config" / "csr-watcher.json"
60
+
61
+ if not watcher_state_file.exists():
62
+ return {"running": False, "status": "not configured"}
63
+
64
+ try:
65
+ with open(watcher_state_file) as f:
66
+ state = json.load(f)
67
+
68
+ # Check if watcher is active (modified recently)
69
+ file_age = time.time() - watcher_state_file.stat().st_mtime
70
+ is_active = file_age < 120 # Active if updated in last 2 minutes
71
+
72
+ return {
73
+ "running": is_active,
74
+ "files_processed": len(state.get("imported_files", {})),
75
+ "last_update_seconds": int(file_age),
76
+ "status": "🟢 active" if is_active else "šŸ”“ inactive"
77
+ }
78
+ except:
79
+ return {"running": False, "status": "error reading state"}
80
+
81
+
56
82
  def get_status() -> dict:
57
83
  """Get indexing status with overall stats and per-project breakdown.
58
84
 
59
85
  Returns:
60
- dict: JSON structure with overall and per-project indexing status
86
+ dict: JSON structure with overall and per-project indexing status, plus watcher status
61
87
  """
62
88
  projects_dir = Path.home() / ".claude" / "projects"
63
89
  project_stats = defaultdict(lambda: {"indexed": 0, "total": 0})
@@ -154,6 +180,9 @@ def get_status() -> dict:
154
180
  "total": stats["total"]
155
181
  }
156
182
 
183
+ # Add watcher status
184
+ result["watcher"] = get_watcher_status()
185
+
157
186
  return result
158
187
 
159
188
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-self-reflect",
3
- "version": "2.7.3",
3
+ "version": "2.8.0",
4
4
  "description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
5
5
  "keywords": [
6
6
  "claude",
@@ -24,7 +24,8 @@ from qdrant_client.models import Filter, FieldCondition, MatchValue
24
24
  # Configuration
25
25
  QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
26
26
  LOGS_DIR = os.getenv("LOGS_DIR", os.path.expanduser("~/.claude/projects"))
27
- STATE_FILE = os.getenv("STATE_FILE", "./config/delta-update-state.json")
27
+ # Use /config path if running in Docker, otherwise use ./config
28
+ STATE_FILE = os.getenv("STATE_FILE", "/config/delta-update-state.json" if os.path.exists("/config") else "./config/delta-update-state.json")
28
29
  PREFER_LOCAL_EMBEDDINGS = os.getenv("PREFER_LOCAL_EMBEDDINGS", "true").lower() == "true"
29
30
  DRY_RUN = os.getenv("DRY_RUN", "false").lower() == "true"
30
31
  DAYS_TO_UPDATE = int(os.getenv("DAYS_TO_UPDATE", "7"))
@@ -432,7 +433,7 @@ async def main_async():
432
433
  logger.info("=== Delta Update Complete ===")
433
434
  logger.info(f"Successfully updated: {success_count} conversations")
434
435
  logger.info(f"Failed: {failed_count} conversations")
435
- logger.info(f"Total conversations in state: {len(state['updated_conversations'])}")
436
+ logger.info(f"Total conversations in state: {len(state.get('updated_conversations', {}))}")
436
437
 
437
438
  def main():
438
439
  """Entry point."""
@@ -57,7 +57,9 @@ else:
57
57
 
58
58
  def normalize_project_name(project_name: str) -> str:
59
59
  """Normalize project name for consistency."""
60
- return project_name.replace("-Users-ramakrishnanannaswamy-projects-", "").replace("-", "_").lower()
60
+ # For compatibility with delta-metadata-update, just use the project name as-is
61
+ # This ensures collection names match between import and delta update scripts
62
+ return project_name
61
63
 
62
64
  def get_collection_name(project_path: Path) -> str:
63
65
  """Generate collection name from project path."""