claude-self-reflect 3.2.3 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.claude/agents/claude-self-reflect-test.md +595 -528
  2. package/.claude/agents/documentation-writer.md +1 -1
  3. package/.claude/agents/qdrant-specialist.md +2 -2
  4. package/.claude/agents/reflection-specialist.md +61 -5
  5. package/.claude/agents/search-optimizer.md +9 -7
  6. package/README.md +16 -9
  7. package/mcp-server/pyproject.toml +1 -1
  8. package/mcp-server/run-mcp.sh +49 -5
  9. package/mcp-server/src/app_context.py +64 -0
  10. package/mcp-server/src/config.py +57 -0
  11. package/mcp-server/src/connection_pool.py +286 -0
  12. package/mcp-server/src/decay_manager.py +106 -0
  13. package/mcp-server/src/embedding_manager.py +64 -40
  14. package/mcp-server/src/embeddings_old.py +141 -0
  15. package/mcp-server/src/models.py +64 -0
  16. package/mcp-server/src/parallel_search.py +371 -0
  17. package/mcp-server/src/project_resolver.py +33 -46
  18. package/mcp-server/src/reflection_tools.py +206 -0
  19. package/mcp-server/src/rich_formatting.py +196 -0
  20. package/mcp-server/src/search_tools.py +826 -0
  21. package/mcp-server/src/server.py +140 -1715
  22. package/mcp-server/src/temporal_design.py +132 -0
  23. package/mcp-server/src/temporal_tools.py +597 -0
  24. package/mcp-server/src/temporal_utils.py +384 -0
  25. package/mcp-server/src/utils.py +150 -67
  26. package/package.json +11 -1
  27. package/scripts/add-timestamp-indexes.py +134 -0
  28. package/scripts/check-collections.py +29 -0
  29. package/scripts/debug-august-parsing.py +76 -0
  30. package/scripts/debug-import-single.py +91 -0
  31. package/scripts/debug-project-resolver.py +82 -0
  32. package/scripts/debug-temporal-tools.py +135 -0
  33. package/scripts/delta-metadata-update.py +547 -0
  34. package/scripts/import-conversations-unified.py +65 -6
  35. package/scripts/importer/utils/project_normalizer.py +22 -9
  36. package/scripts/precompact-hook.sh +33 -0
  37. package/scripts/streaming-watcher.py +1443 -0
  38. package/scripts/utils.py +39 -0
  39. package/shared/__init__.py +5 -0
  40. package/shared/normalization.py +54 -0
@@ -0,0 +1,1443 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Claude Self-Reflect Production Streaming Watcher v3.0.0
4
+ Complete overhaul with all fixes from v2.5.17 plus enhanced monitoring
5
+
6
+ Key improvements:
7
+ 1. Production state file: csr-watcher.json (no temp/test names)
8
+ 2. Comprehensive psutil memory monitoring with detailed metrics
9
+ 3. Proper state key format handling (full paths)
10
+ 4. Container-aware configuration for Docker deployments
11
+ 5. Enhanced error recovery and queue management
12
+ 6. Real-time progress tracking toward 100% indexing
13
+ """
14
+
15
+ import asyncio
16
+ import json
17
+ import os
18
+ import time
19
+ import hashlib
20
+ import re
21
+ import gc
22
+ import ctypes
23
+ import platform
24
+ from pathlib import Path
25
+ from typing import Dict, List, Optional, Any, Set, Tuple, Generator
26
+ from datetime import datetime, timedelta
27
+ from concurrent.futures import ThreadPoolExecutor
28
+ from dataclasses import dataclass, field
29
+ from enum import Enum
30
+ import logging
31
+ from collections import deque
32
+
33
+ from qdrant_client import AsyncQdrantClient, models
34
+ from qdrant_client.http.exceptions import UnexpectedResponse
35
+ from fastembed import TextEmbedding
36
+ import psutil
37
+
38
+ # Import normalize_project_name
39
+ import sys
40
+ sys.path.insert(0, str(Path(__file__).parent))
41
+ from utils import normalize_project_name
42
+
43
+ # Configure logging
44
+ logging.basicConfig(
45
+ level=logging.INFO,
46
+ format='%(asctime)s - %(levelname)s - %(message)s'
47
+ )
48
+ logger = logging.getLogger(__name__)
49
+
50
+ # Configuration from environment
51
+ @dataclass
52
+ class Config:
53
+ """Production configuration with proper defaults."""
54
+ qdrant_url: str = field(default_factory=lambda: os.getenv("QDRANT_URL", "http://localhost:6333"))
55
+ voyage_api_key: Optional[str] = field(default_factory=lambda: os.getenv("VOYAGE_API_KEY"))
56
+ prefer_local_embeddings: bool = field(default_factory=lambda: os.getenv("PREFER_LOCAL_EMBEDDINGS", "true").lower() == "true")
57
+ embedding_model: str = field(default_factory=lambda: os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2"))
58
+
59
+ logs_dir: Path = field(default_factory=lambda: Path(os.getenv("LOGS_DIR", "~/.claude/projects")).expanduser())
60
+
61
+ # Production state file with proper naming
62
+ state_file: Path = field(default_factory=lambda: (
63
+ # Docker/cloud mode: use /config volume
64
+ Path("/config/csr-watcher.json") if os.path.exists("/.dockerenv")
65
+ # Local mode with cloud flag: separate state file
66
+ else Path("~/.claude-self-reflect/config/csr-watcher-cloud.json").expanduser()
67
+ if os.getenv("PREFER_LOCAL_EMBEDDINGS", "true").lower() == "false" and os.getenv("VOYAGE_API_KEY")
68
+ # Default local mode
69
+ else Path("~/.claude-self-reflect/config/csr-watcher.json").expanduser()
70
+ if os.getenv("STATE_FILE") is None
71
+ # User override
72
+ else Path(os.getenv("STATE_FILE")).expanduser()
73
+ ))
74
+
75
+ collection_prefix: str = "conv"
76
+ vector_size: int = 384 # FastEmbed all-MiniLM-L6-v2
77
+
78
+ # Production throttling controls (optimized for stability)
79
+ import_frequency: int = field(default_factory=lambda: int(os.getenv("IMPORT_FREQUENCY", "60"))) # Normal cycle
80
+ hot_check_interval_s: int = field(default_factory=lambda: int(os.getenv("HOT_CHECK_INTERVAL_S", "2"))) # HOT file check
81
+ batch_size: int = field(default_factory=lambda: int(os.getenv("BATCH_SIZE", "10")))
82
+ memory_limit_mb: int = field(default_factory=lambda: int(os.getenv("MEMORY_LIMIT_MB", "1024"))) # 1GB default
83
+ memory_warning_mb: int = field(default_factory=lambda: int(os.getenv("MEMORY_WARNING_MB", "500"))) # 500MB warning
84
+
85
+ # HOT/WARM/COLD configuration
86
+ hot_window_minutes: int = field(default_factory=lambda: int(os.getenv("HOT_WINDOW_MINUTES", "5"))) # Files < 5 min are HOT
87
+ warm_window_hours: int = field(default_factory=lambda: int(os.getenv("WARM_WINDOW_HOURS", "24"))) # Files < 24 hrs are WARM
88
+ max_cold_files: int = field(default_factory=lambda: int(os.getenv("MAX_COLD_FILES", "5"))) # Max COLD files per cycle
89
+ max_warm_wait_minutes: int = field(default_factory=lambda: int(os.getenv("MAX_WARM_WAIT_MINUTES", "30"))) # Starvation prevention
90
+
91
+ # CPU management
92
+ max_cpu_percent_per_core: float = field(default_factory=lambda: float(os.getenv("MAX_CPU_PERCENT_PER_CORE", "50")))
93
+ max_concurrent_embeddings: int = field(default_factory=lambda: int(os.getenv("MAX_CONCURRENT_EMBEDDINGS", "2")))
94
+ max_concurrent_qdrant: int = field(default_factory=lambda: int(os.getenv("MAX_CONCURRENT_QDRANT", "3")))
95
+
96
+ # Queue management
97
+ max_queue_size: int = field(default_factory=lambda: int(os.getenv("MAX_QUEUE_SIZE", "100")))
98
+ max_backlog_hours: int = field(default_factory=lambda: int(os.getenv("MAX_BACKLOG_HOURS", "24")))
99
+
100
+ # Reliability settings
101
+ qdrant_timeout_s: float = field(default_factory=lambda: float(os.getenv("QDRANT_TIMEOUT", "10")))
102
+ max_retries: int = field(default_factory=lambda: int(os.getenv("MAX_RETRIES", "3")))
103
+ retry_delay_s: float = field(default_factory=lambda: float(os.getenv("RETRY_DELAY", "1")))
104
+
105
+ # Collection cache settings
106
+ collection_cache_ttl: int = field(default_factory=lambda: int(os.getenv("COLLECTION_CACHE_TTL", "3600")))
107
+ collection_cache_max_size: int = field(default_factory=lambda: int(os.getenv("COLLECTION_CACHE_MAX_SIZE", "100")))
108
+
109
+
110
+ # Check if malloc_trim is available
111
+ try:
112
+ libc = ctypes.CDLL("libc.so.6")
113
+ malloc_trim = libc.malloc_trim
114
+ malloc_trim.argtypes = [ctypes.c_size_t]
115
+ malloc_trim.restype = ctypes.c_int
116
+ MALLOC_TRIM_AVAILABLE = True
117
+ except:
118
+ MALLOC_TRIM_AVAILABLE = False
119
+ logger.debug("malloc_trim not available on this platform")
120
+
121
+
122
+ def get_effective_cpus() -> float:
123
+ """Get effective CPU count considering cgroup limits."""
124
+ effective_cores_env = os.getenv("EFFECTIVE_CORES")
125
+ if effective_cores_env:
126
+ try:
127
+ return float(effective_cores_env)
128
+ except ValueError:
129
+ pass
130
+
131
+ # cgroup v2
132
+ cpu_max = Path("/sys/fs/cgroup/cpu.max")
133
+ # cgroup v1
134
+ cpu_quota = Path("/sys/fs/cgroup/cpu/cpu.cfs_quota_us")
135
+ cpu_period = Path("/sys/fs/cgroup/cpu/cpu.cfs_period_us")
136
+
137
+ try:
138
+ if cpu_max.exists():
139
+ content = cpu_max.read_text().strip().split()
140
+ if content[0] != "max":
141
+ quota, period = int(content[0]), int(content[1])
142
+ if period > 0:
143
+ return max(1.0, quota / period)
144
+ elif cpu_quota.exists() and cpu_period.exists():
145
+ quota = int(cpu_quota.read_text())
146
+ period = int(cpu_period.read_text())
147
+ if quota > 0 and period > 0:
148
+ return max(1.0, quota / period)
149
+ except Exception:
150
+ pass
151
+
152
+ return float(psutil.cpu_count() or 1)
153
+
154
+
155
+ def extract_tool_usage_from_conversation(messages: List[Dict]) -> Dict[str, Any]:
156
+ """Extract tool usage metadata from conversation messages."""
157
+ tool_usage = {
158
+ 'files_analyzed': [],
159
+ 'files_edited': [],
160
+ 'tools_used': set()
161
+ }
162
+
163
+ for msg in messages:
164
+ content = msg.get('content', '')
165
+
166
+ if isinstance(content, str):
167
+ text = content
168
+ elif isinstance(content, list):
169
+ text_parts = []
170
+ for item in content:
171
+ if isinstance(item, str):
172
+ text_parts.append(item)
173
+ elif isinstance(item, dict):
174
+ if item.get('type') == 'text':
175
+ text_parts.append(item.get('text', ''))
176
+ elif item.get('type') == 'tool_use':
177
+ tool_name = item.get('name', '')
178
+ tool_usage['tools_used'].add(tool_name)
179
+
180
+ if 'input' in item:
181
+ tool_input = item['input']
182
+ if isinstance(tool_input, dict):
183
+ if 'file_path' in tool_input:
184
+ file_path = tool_input['file_path']
185
+ if tool_name in ['Read', 'Grep', 'Glob', 'LS']:
186
+ tool_usage['files_analyzed'].append(file_path)
187
+ elif tool_name in ['Edit', 'Write', 'MultiEdit']:
188
+ tool_usage['files_edited'].append(file_path)
189
+
190
+ if 'files' in tool_input:
191
+ files = tool_input['files']
192
+ if isinstance(files, list):
193
+ tool_usage['files_analyzed'].extend(files)
194
+ text = ' '.join(text_parts)
195
+ else:
196
+ text = str(content) if content else ''
197
+
198
+ # Extract file paths from text content
199
+ file_patterns = [
200
+ r'`([/\w\-\.]+\.\w+)`',
201
+ r'File: ([/\w\-\.]+\.\w+)',
202
+ r'(?:^|\s)(/[\w\-\./]+\.\w+)',
203
+ r'(?:^|\s)([\w\-]+\.\w+)',
204
+ ]
205
+
206
+ for pattern in file_patterns:
207
+ matches = re.findall(pattern, text[:5000])
208
+ for match in matches[:10]:
209
+ if match and not match.startswith('http'):
210
+ if any(keyword in text.lower() for keyword in ['edit', 'modify', 'update', 'write', 'create']):
211
+ tool_usage['files_edited'].append(match)
212
+ else:
213
+ tool_usage['files_analyzed'].append(match)
214
+
215
+ # Convert sets to lists and deduplicate
216
+ tool_usage['tools_used'] = list(tool_usage['tools_used'])
217
+ tool_usage['files_analyzed'] = list(set(tool_usage['files_analyzed']))[:20]
218
+ tool_usage['files_edited'] = list(set(tool_usage['files_edited']))[:20]
219
+
220
+ return tool_usage
221
+
222
+
223
+ def extract_concepts(text: str, tool_usage: Dict[str, Any]) -> List[str]:
224
+ """Extract development concepts from conversation text."""
225
+ concepts = set()
226
+
227
+ text_sample = text[:50000] if len(text) > 50000 else text
228
+
229
+ concept_patterns = {
230
+ 'docker': r'\b(?:docker|container|compose|dockerfile)\b',
231
+ 'testing': r'\b(?:test|testing|unittest|pytest)\b',
232
+ 'database': r'\b(?:database|sql|postgres|mysql|mongodb|qdrant)\b',
233
+ 'api': r'\b(?:api|rest|graphql|endpoint|mcp)\b',
234
+ 'security': r'\b(?:security|auth|authentication)\b',
235
+ 'performance': r'\b(?:performance|optimization|cache|memory)\b',
236
+ 'debugging': r'\b(?:debug|debugging|error|bug|fix)\b',
237
+ 'deployment': r'\b(?:deploy|deployment|ci\/cd)\b',
238
+ 'streaming': r'\b(?:stream|streaming|import|watcher)\b',
239
+ 'embeddings': r'\b(?:embed|embedding|vector|fastembed|voyage)\b',
240
+ }
241
+
242
+ text_lower = text_sample.lower()
243
+
244
+ for concept, pattern in concept_patterns.items():
245
+ if re.search(pattern, text_lower, re.IGNORECASE):
246
+ concepts.add(concept)
247
+
248
+ # Add concepts based on tools used
249
+ if 'Docker' in tool_usage.get('tools_used', []):
250
+ concepts.add('docker')
251
+ if 'Bash' in tool_usage.get('tools_used', []):
252
+ concepts.add('scripting')
253
+
254
+ return list(concepts)[:15]
255
+
256
+
257
+ class FreshnessLevel(Enum):
258
+ """File freshness categorization for prioritization."""
259
+ HOT = "HOT" # < 5 minutes old - near real-time processing
260
+ WARM = "WARM" # 5 minutes - 24 hours - normal processing
261
+ COLD = "COLD" # > 24 hours - batch processing
262
+ URGENT_WARM = "URGENT_WARM" # WARM files waiting > 30 minutes (starvation prevention)
263
+
264
+
265
+ class MemoryMonitor:
266
+ """Enhanced memory monitoring with psutil."""
267
+
268
+ def __init__(self, limit_mb: int, warning_mb: int):
269
+ self.process = psutil.Process()
270
+ self.limit_mb = limit_mb
271
+ self.warning_mb = warning_mb
272
+ self.start_memory = self.get_memory_info()
273
+ self.peak_memory = self.start_memory['rss_mb']
274
+ self.cleanup_count = 0
275
+ self.last_warning_time = 0
276
+
277
+ def get_memory_info(self) -> Dict[str, float]:
278
+ """Get detailed memory information."""
279
+ mem = self.process.memory_info()
280
+
281
+ # Get additional memory metrics
282
+ try:
283
+ mem_full = self.process.memory_full_info()
284
+ uss = mem_full.uss / 1024 / 1024 # Unique set size
285
+ pss = mem_full.pss / 1024 / 1024 if hasattr(mem_full, 'pss') else 0 # Proportional set size
286
+ except:
287
+ uss = 0
288
+ pss = 0
289
+
290
+ return {
291
+ 'rss_mb': mem.rss / 1024 / 1024, # Resident set size
292
+ 'vms_mb': mem.vms / 1024 / 1024, # Virtual memory size
293
+ 'uss_mb': uss, # Unique memory
294
+ 'pss_mb': pss, # Proportional memory
295
+ 'percent': self.process.memory_percent(),
296
+ 'available_mb': psutil.virtual_memory().available / 1024 / 1024
297
+ }
298
+
299
+ def check_memory(self) -> Tuple[bool, Dict[str, Any]]:
300
+ """Check memory usage and return (should_cleanup, metrics)."""
301
+ info = self.get_memory_info()
302
+ rss_mb = info['rss_mb']
303
+
304
+ # Update peak
305
+ self.peak_memory = max(self.peak_memory, rss_mb)
306
+
307
+ # Check thresholds
308
+ should_cleanup = False
309
+ alert_level = "normal"
310
+
311
+ if rss_mb > self.limit_mb:
312
+ alert_level = "critical"
313
+ should_cleanup = True
314
+ elif rss_mb > self.limit_mb * 0.85:
315
+ alert_level = "high"
316
+ should_cleanup = True
317
+ elif rss_mb > self.warning_mb:
318
+ alert_level = "warning"
319
+ # Only warn once per minute
320
+ now = time.time()
321
+ if now - self.last_warning_time > 60:
322
+ logger.warning(f"Memory usage {rss_mb:.1f}MB exceeds warning threshold {self.warning_mb}MB")
323
+ self.last_warning_time = now
324
+
325
+ return should_cleanup, {
326
+ 'current_mb': rss_mb,
327
+ 'peak_mb': self.peak_memory,
328
+ 'limit_mb': self.limit_mb,
329
+ 'warning_mb': self.warning_mb,
330
+ 'percent_of_limit': (rss_mb / self.limit_mb * 100) if self.limit_mb > 0 else 0,
331
+ 'alert_level': alert_level,
332
+ 'cleanup_count': self.cleanup_count,
333
+ 'details': info
334
+ }
335
+
336
+ async def cleanup(self) -> Dict[str, Any]:
337
+ """Perform memory cleanup and return metrics."""
338
+ before = self.get_memory_info()
339
+
340
+ # Force garbage collection
341
+ gc.collect(2) # Full collection
342
+
343
+ # Platform-specific cleanup
344
+ if MALLOC_TRIM_AVAILABLE:
345
+ malloc_trim(0)
346
+
347
+ # Give system time to reclaim
348
+ await asyncio.sleep(0.1)
349
+
350
+ after = self.get_memory_info()
351
+ self.cleanup_count += 1
352
+
353
+ freed = before['rss_mb'] - after['rss_mb']
354
+
355
+ if freed > 10: # Significant cleanup
356
+ logger.info(f"Memory cleanup freed {freed:.1f}MB (before: {before['rss_mb']:.1f}MB, after: {after['rss_mb']:.1f}MB)")
357
+
358
+ return {
359
+ 'before_mb': before['rss_mb'],
360
+ 'after_mb': after['rss_mb'],
361
+ 'freed_mb': freed,
362
+ 'cleanup_count': self.cleanup_count
363
+ }
364
+
365
+
366
+ class EmbeddingProvider:
367
+ """Base class for embedding providers."""
368
+
369
+ async def embed_documents(self, texts: List[str]) -> List[List[float]]:
370
+ raise NotImplementedError
371
+
372
+ async def close(self):
373
+ """Cleanup resources."""
374
+ pass
375
+
376
+
377
+ class FastEmbedProvider(EmbeddingProvider):
378
+ """FastEmbed provider with proper resource management."""
379
+
380
+ def __init__(self, model_name: str, max_concurrent: int = 2):
381
+ self.model = TextEmbedding(model_name)
382
+ self.executor = ThreadPoolExecutor(max_workers=1)
383
+ self.semaphore = asyncio.Semaphore(max_concurrent)
384
+ self.vector_size = 384 # all-MiniLM-L6-v2 dimensions
385
+ self.provider_type = 'local'
386
+
387
+ async def embed_documents(self, texts: List[str]) -> List[List[float]]:
388
+ """Generate embeddings with concurrency control."""
389
+ async with self.semaphore:
390
+ loop = asyncio.get_event_loop()
391
+ embeddings = await loop.run_in_executor(
392
+ self.executor,
393
+ lambda: list(self.model.embed(texts))
394
+ )
395
+ return [embedding.tolist() for embedding in embeddings]
396
+
397
+ async def close(self):
398
+ """Shutdown executor properly."""
399
+ if sys.version_info >= (3, 9):
400
+ self.executor.shutdown(wait=True, cancel_futures=True)
401
+ else:
402
+ self.executor.shutdown(wait=True)
403
+
404
+
405
+ class VoyageProvider(EmbeddingProvider):
406
+ """Voyage AI provider for cloud embeddings with retry logic."""
407
+
408
+ def __init__(self, api_key: str, model_name: str = "voyage-3", max_concurrent: int = 2):
409
+ self.api_key = api_key
410
+ self.model_name = model_name
411
+ self.vector_size = 1024 # voyage-3 dimension
412
+ self.semaphore = asyncio.Semaphore(max_concurrent)
413
+ self.base_url = "https://api.voyageai.com/v1/embeddings"
414
+ self.session = None
415
+ self.max_retries = 3
416
+ self.retry_delay = 1.0
417
+
418
+ async def _ensure_session(self):
419
+ """Ensure aiohttp session exists."""
420
+ if self.session is None:
421
+ import aiohttp
422
+ self.session = aiohttp.ClientSession()
423
+
424
+ async def embed_documents(self, texts: List[str]) -> List[List[float]]:
425
+ """Generate embeddings using Voyage AI API with retry logic."""
426
+ await self._ensure_session()
427
+
428
+ async with self.semaphore:
429
+ for attempt in range(self.max_retries):
430
+ try:
431
+ import aiohttp
432
+ headers = {
433
+ "Authorization": f"Bearer {self.api_key}",
434
+ "Content-Type": "application/json"
435
+ }
436
+
437
+ payload = {
438
+ "input": texts,
439
+ "model": self.model_name,
440
+ "input_type": "document" # For document embeddings
441
+ }
442
+
443
+ async with self.session.post(
444
+ self.base_url,
445
+ headers=headers,
446
+ json=payload,
447
+ timeout=aiohttp.ClientTimeout(total=30)
448
+ ) as response:
449
+ if response.status == 200:
450
+ data = await response.json()
451
+ # Voyage returns embeddings in data.data[].embedding
452
+ embeddings = [item["embedding"] for item in data["data"]]
453
+ return embeddings
454
+ elif response.status == 429: # Rate limit
455
+ retry_after = int(response.headers.get("Retry-After", 2))
456
+ logger.warning(f"Rate limited, retrying after {retry_after}s")
457
+ await asyncio.sleep(retry_after)
458
+ else:
459
+ error_text = await response.text()
460
+ logger.error(f"Voyage API error {response.status}: {error_text}")
461
+ if attempt < self.max_retries - 1:
462
+ await asyncio.sleep(self.retry_delay * (2 ** attempt))
463
+
464
+ except asyncio.TimeoutError:
465
+ logger.warning(f"Voyage API timeout (attempt {attempt + 1}/{self.max_retries})")
466
+ if attempt < self.max_retries - 1:
467
+ await asyncio.sleep(self.retry_delay * (2 ** attempt))
468
+ except Exception as e:
469
+ logger.error(f"Voyage API error: {e}")
470
+ if attempt < self.max_retries - 1:
471
+ await asyncio.sleep(self.retry_delay * (2 ** attempt))
472
+
473
+ raise Exception(f"Failed to get embeddings after {self.max_retries} attempts")
474
+
475
+ async def close(self):
476
+ """Close aiohttp session."""
477
+ if self.session:
478
+ await self.session.close()
479
+ self.session = None
480
+
481
+
482
+ class QdrantService:
483
+ """Qdrant service with proper backpressure and retries."""
484
+
485
+ def __init__(self, config: Config, embedding_provider: EmbeddingProvider):
486
+ self.config = config
487
+ self.client = AsyncQdrantClient(url=config.qdrant_url)
488
+ self.embedding_provider = embedding_provider
489
+ self._collection_cache: Dict[str, float] = {}
490
+ self.request_semaphore = asyncio.Semaphore(config.max_concurrent_qdrant)
491
+
492
+ async def ensure_collection(self, collection_name: str) -> None:
493
+ """Ensure collection exists with TTL cache."""
494
+ now = time.time()
495
+
496
+ if collection_name in self._collection_cache:
497
+ if now - self._collection_cache[collection_name] < self.config.collection_cache_ttl:
498
+ return
499
+
500
+ if len(self._collection_cache) >= self.config.collection_cache_max_size:
501
+ oldest = min(self._collection_cache.items(), key=lambda x: x[1])
502
+ del self._collection_cache[oldest[0]]
503
+
504
+ async with self.request_semaphore:
505
+ try:
506
+ await asyncio.wait_for(
507
+ self.client.get_collection(collection_name),
508
+ timeout=self.config.qdrant_timeout_s
509
+ )
510
+ self._collection_cache[collection_name] = now
511
+ logger.debug(f"Collection {collection_name} exists")
512
+ except (UnexpectedResponse, asyncio.TimeoutError):
513
+ # Create collection with correct vector size based on provider
514
+ vector_size = self.embedding_provider.vector_size or self.config.vector_size
515
+
516
+ try:
517
+ await asyncio.wait_for(
518
+ self.client.create_collection(
519
+ collection_name=collection_name,
520
+ vectors_config=models.VectorParams(
521
+ size=vector_size,
522
+ distance=models.Distance.COSINE
523
+ ),
524
+ optimizers_config=models.OptimizersConfigDiff(
525
+ indexing_threshold=100
526
+ )
527
+ ),
528
+ timeout=self.config.qdrant_timeout_s
529
+ )
530
+ self._collection_cache[collection_name] = now
531
+ logger.info(f"Created collection {collection_name}")
532
+ except UnexpectedResponse as e:
533
+ if "already exists" in str(e):
534
+ self._collection_cache[collection_name] = now
535
+ else:
536
+ raise
537
+
538
+ async def store_points_with_retry(
539
+ self,
540
+ collection_name: str,
541
+ points: List[models.PointStruct]
542
+ ) -> bool:
543
+ """Store points with retry logic."""
544
+ if not points:
545
+ return True
546
+
547
+ for attempt in range(self.config.max_retries):
548
+ try:
549
+ async with self.request_semaphore:
550
+ # Directly await with timeout to avoid orphaned tasks
551
+ await asyncio.wait_for(
552
+ self.client.upsert(
553
+ collection_name=collection_name,
554
+ points=points,
555
+ wait=True
556
+ ),
557
+ timeout=self.config.qdrant_timeout_s
558
+ )
559
+ logger.debug(f"Stored {len(points)} points in {collection_name}")
560
+ return True
561
+
562
+ except asyncio.TimeoutError:
563
+ # Don't cancel - let it complete in background to avoid race condition
564
+ logger.warning(f"Timeout storing points (attempt {attempt + 1}/{self.config.max_retries})")
565
+ if attempt < self.config.max_retries - 1:
566
+ await asyncio.sleep(self.config.retry_delay_s * (2 ** attempt))
567
+ except Exception as e:
568
+ logger.error(f"Error storing points: {e}")
569
+ if attempt < self.config.max_retries - 1:
570
+ await asyncio.sleep(self.config.retry_delay_s)
571
+
572
+ return False
573
+
574
+ async def close(self):
575
+ """Close client connection."""
576
+ self._collection_cache.clear()
577
+ try:
578
+ await self.client.close() # Close AsyncQdrantClient connections
579
+ except AttributeError:
580
+ pass # Older versions might not have close()
581
+
582
+
583
+ class TokenAwareChunker:
584
+ """Memory-efficient streaming chunker."""
585
+
586
+ def __init__(self, chunk_size_tokens: int = 400, chunk_overlap_tokens: int = 75):
587
+ self.chunk_size_chars = chunk_size_tokens * 4
588
+ self.chunk_overlap_chars = chunk_overlap_tokens * 4
589
+ logger.info(f"TokenAwareChunker: {chunk_size_tokens} tokens (~{self.chunk_size_chars} chars)")
590
+
591
+ def chunk_text_stream(self, text: str) -> Generator[str, None, None]:
592
+ """Stream chunks without holding all in memory."""
593
+ if not text:
594
+ return
595
+
596
+ if len(text) <= self.chunk_size_chars:
597
+ yield text
598
+ return
599
+
600
+ start = 0
601
+ while start < len(text):
602
+ end = min(start + self.chunk_size_chars, len(text))
603
+
604
+ if end < len(text):
605
+ for separator in ['. ', '.\n', '! ', '? ', '\n\n', '\n', ' ']:
606
+ last_sep = text.rfind(separator, start, end)
607
+ if last_sep > start + (self.chunk_size_chars // 2):
608
+ end = last_sep + len(separator)
609
+ break
610
+
611
+ chunk = text[start:end].strip()
612
+ if chunk:
613
+ yield chunk
614
+
615
+ if end >= len(text):
616
+ break
617
+ start = max(start + 1, end - self.chunk_overlap_chars)
618
+
619
+
620
+ class CPUMonitor:
621
+ """Non-blocking CPU monitoring with cgroup awareness."""
622
+
623
+ def __init__(self, max_cpu_per_core: float):
624
+ self.process = psutil.Process()
625
+ effective_cores = get_effective_cpus()
626
+ self.max_total_cpu = max_cpu_per_core * effective_cores
627
+ logger.info(f"CPU Monitor: {effective_cores:.1f} effective cores, {self.max_total_cpu:.1f}% limit")
628
+
629
+ self.process.cpu_percent(interval=None)
630
+ time.sleep(0.01)
631
+ self.last_check = time.time()
632
+ self.last_cpu = self.process.cpu_percent(interval=None)
633
+
634
+ def get_cpu_nowait(self) -> float:
635
+ """Get CPU without blocking."""
636
+ now = time.time()
637
+ if now - self.last_check > 1.0:
638
+ val = self.process.cpu_percent(interval=None)
639
+ if val == 0.0 and self.last_cpu == 0.0:
640
+ time.sleep(0.01)
641
+ val = self.process.cpu_percent(interval=None)
642
+ self.last_cpu = val
643
+ self.last_check = now
644
+ return self.last_cpu
645
+
646
+ def should_throttle(self) -> bool:
647
+ """Check if we should throttle based on CPU."""
648
+ return self.get_cpu_nowait() > self.max_total_cpu
649
+
650
+
651
+ class QueueManager:
652
+ """Manage file processing queue with priority support and deduplication."""
653
+
654
+ def __init__(self, max_size: int, max_age_hours: int):
655
+ self.max_size = max_size
656
+ self.max_age = timedelta(hours=max_age_hours)
657
+ # Queue stores (path, mod_time, freshness_level, priority_score)
658
+ self.queue: deque = deque()
659
+ self._queued: Set[str] = set() # Track queued files to prevent duplicates
660
+ self.processed_count = 0
661
+ self.deferred_count = 0
662
+
663
+ def add_categorized(self, items: List[Tuple[Path, datetime, FreshnessLevel, int]]) -> int:
664
+ """Add categorized files with priority handling."""
665
+ added = 0
666
+ overflow = []
667
+
668
+ for file_path, mod_time, level, priority in items:
669
+ key = str(file_path)
670
+
671
+ # Skip if already queued
672
+ if key in self._queued:
673
+ continue
674
+
675
+ if len(self.queue) >= self.max_size:
676
+ overflow.append((file_path, mod_time))
677
+ continue
678
+
679
+ # HOT and URGENT_WARM go to front of queue
680
+ if level in (FreshnessLevel.HOT, FreshnessLevel.URGENT_WARM):
681
+ self.queue.appendleft((file_path, mod_time, level, priority))
682
+ else:
683
+ self.queue.append((file_path, mod_time, level, priority))
684
+
685
+ self._queued.add(key)
686
+ added += 1
687
+
688
+ if overflow:
689
+ self.deferred_count += len(overflow)
690
+ oldest = min(overflow, key=lambda x: x[1])
691
+ logger.critical(f"QUEUE OVERFLOW: {len(overflow)} files deferred. "
692
+ f"Oldest: {oldest[0].name} ({(datetime.now() - oldest[1]).total_seconds() / 3600:.1f}h old)")
693
+
694
+ return added
695
+
696
+ def get_batch(self, batch_size: int) -> List[Tuple[Path, FreshnessLevel]]:
697
+ """Get next batch of files with their freshness levels."""
698
+ batch = []
699
+ now = datetime.now()
700
+
701
+ if self.queue:
702
+ oldest_time = self.queue[0][1]
703
+ if now - oldest_time > self.max_age:
704
+ logger.warning(f"BACKLOG: Oldest file is {(now - oldest_time).total_seconds() / 3600:.1f} hours old")
705
+
706
+ for _ in range(min(batch_size, len(self.queue))):
707
+ if self.queue:
708
+ file_path, _, level, _ = self.queue.popleft()
709
+ self._queued.discard(str(file_path))
710
+ batch.append((file_path, level))
711
+ self.processed_count += 1
712
+
713
+ return batch
714
+
715
+ def has_hot_or_urgent(self) -> bool:
716
+ """Check if queue contains HOT or URGENT_WARM files."""
717
+ return any(level in (FreshnessLevel.HOT, FreshnessLevel.URGENT_WARM)
718
+ for _, _, level, _ in self.queue)
719
+
720
+ def get_metrics(self) -> Dict[str, Any]:
721
+ """Get queue metrics."""
722
+ return {
723
+ "queue_size": len(self.queue),
724
+ "processed": self.processed_count,
725
+ "deferred": self.deferred_count,
726
+ "oldest_age_hours": self._get_oldest_age()
727
+ }
728
+
729
+ def _get_oldest_age(self) -> float:
730
+ """Get age of oldest item in hours."""
731
+ if not self.queue:
732
+ return 0
733
+ oldest_time = self.queue[0][1]
734
+ return (datetime.now() - oldest_time).total_seconds() / 3600
735
+
736
+
737
+ class IndexingProgress:
738
+ """Track progress toward 100% indexing."""
739
+
740
+ def __init__(self, logs_dir: Path):
741
+ self.logs_dir = logs_dir
742
+ self.total_files = 0
743
+ self.indexed_files = 0
744
+ self.start_time = time.time()
745
+ self.last_update = time.time()
746
+
747
+ def scan_total_files(self) -> int:
748
+ """Count total JSONL files."""
749
+ total = 0
750
+ if self.logs_dir.exists():
751
+ for project_dir in self.logs_dir.iterdir():
752
+ if project_dir.is_dir():
753
+ total += len(list(project_dir.glob("*.jsonl")))
754
+ self.total_files = total
755
+ return total
756
+
757
+ def update(self, indexed_count: int):
758
+ """Update progress."""
759
+ self.indexed_files = indexed_count
760
+ self.last_update = time.time()
761
+
762
+ def get_progress(self) -> Dict[str, Any]:
763
+ """Get progress metrics."""
764
+ # Cap percentage at 100% to handle stale state entries
765
+ percent = min(100.0, (self.indexed_files / self.total_files * 100)) if self.total_files > 0 else 0
766
+ elapsed = time.time() - self.start_time
767
+ rate = self.indexed_files / elapsed if elapsed > 0 else 0
768
+ # For ETA calculation, use remaining files (but min with 0 to avoid negative)
769
+ remaining = max(0, self.total_files - self.indexed_files)
770
+ eta = remaining / rate if rate > 0 else 0
771
+
772
+ return {
773
+ 'total_files': self.total_files,
774
+ 'indexed_files': min(self.indexed_files, self.total_files), # Cap at total
775
+ 'percent': percent,
776
+ 'rate_per_hour': rate * 3600,
777
+ 'eta_hours': eta / 3600,
778
+ 'elapsed_hours': elapsed / 3600
779
+ }
780
+
781
+
782
+ class StreamingWatcher:
783
+ """Production-ready streaming watcher with comprehensive monitoring."""
784
+
785
+ def __init__(self, config: Config):
786
+ self.config = config
787
+ self.state: Dict[str, Any] = {}
788
+ self.embedding_provider = self._create_embedding_provider()
789
+ self.qdrant_service = QdrantService(config, self.embedding_provider)
790
+ self.chunker = TokenAwareChunker()
791
+ self.cpu_monitor = CPUMonitor(config.max_cpu_percent_per_core)
792
+ self.memory_monitor = MemoryMonitor(config.memory_limit_mb, config.memory_warning_mb)
793
+ self.queue_manager = QueueManager(config.max_queue_size, config.max_backlog_hours)
794
+ self.progress = IndexingProgress(config.logs_dir)
795
+
796
+ self.stats = {
797
+ "files_processed": 0,
798
+ "chunks_processed": 0,
799
+ "failures": 0,
800
+ "start_time": time.time()
801
+ }
802
+
803
+ # Track file wait times for starvation prevention
804
+ self.file_first_seen: Dict[str, float] = {}
805
+ self.current_project: Optional[str] = self._detect_current_project()
806
+ self.last_mode: Optional[str] = None # Track mode changes for logging
807
+
808
+ self.shutdown_event = asyncio.Event()
809
+
810
+ logger.info(f"Streaming Watcher v3.0.0 with HOT/WARM/COLD prioritization")
811
+ logger.info(f"State file: {self.config.state_file}")
812
+ logger.info(f"Memory limits: {config.memory_warning_mb}MB warning, {config.memory_limit_mb}MB limit")
813
+ logger.info(f"HOT window: {config.hot_window_minutes} min, WARM window: {config.warm_window_hours} hrs")
814
+
815
+ def _detect_current_project(self) -> Optional[str]:
816
+ """Detect current project from working directory."""
817
+ try:
818
+ cwd = Path.cwd()
819
+ # Check if we're in a claude project directory
820
+ if "/.claude/projects/" in str(cwd):
821
+ # Extract project name from path
822
+ parts = str(cwd).split("/.claude/projects/")
823
+ if len(parts) > 1:
824
+ project = parts[1].split("/")[0]
825
+ logger.info(f"Detected current project: {project}")
826
+ return project
827
+ except Exception as e:
828
+ logger.debug(f"Could not detect current project: {e}")
829
+ return None
830
+
831
+ def categorize_freshness(self, file_path: Path) -> Tuple[FreshnessLevel, int]:
832
+ """
833
+ Categorize file freshness for prioritization.
834
+ Returns (FreshnessLevel, priority_score) where lower scores = higher priority.
835
+ """
836
+ now = time.time()
837
+ file_key = str(file_path)
838
+
839
+ # Track first seen time atomically
840
+ if file_key not in self.file_first_seen:
841
+ self.file_first_seen[file_key] = now
842
+ first_seen_time = self.file_first_seen[file_key]
843
+
844
+ file_age_minutes = (now - file_path.stat().st_mtime) / 60
845
+
846
+ # Check if file is from current project
847
+ is_current_project = False
848
+ if self.current_project:
849
+ file_project = normalize_project_name(str(file_path.parent))
850
+ is_current_project = (file_project == self.current_project)
851
+
852
+ # Determine base freshness level
853
+ if file_age_minutes < self.config.hot_window_minutes:
854
+ level = FreshnessLevel.HOT
855
+ base_priority = 0 # Highest priority
856
+ elif file_age_minutes < (self.config.warm_window_hours * 60):
857
+ # Check for starvation (WARM files waiting too long)
858
+ wait_minutes = (now - first_seen_time) / 60
859
+ if wait_minutes > self.config.max_warm_wait_minutes:
860
+ level = FreshnessLevel.URGENT_WARM
861
+ base_priority = 1 # Second highest priority
862
+ else:
863
+ level = FreshnessLevel.WARM
864
+ base_priority = 2 if is_current_project else 3
865
+ else:
866
+ level = FreshnessLevel.COLD
867
+ base_priority = 4 # Lowest priority
868
+
869
+ # Adjust priority score based on exact age for tie-breaking
870
+ priority_score = base_priority * 10000 + min(file_age_minutes, 9999)
871
+
872
+ return level, int(priority_score)
873
+
874
+ def _create_embedding_provider(self) -> EmbeddingProvider:
875
+ """Create embedding provider based on configuration."""
876
+ if not self.config.prefer_local_embeddings and self.config.voyage_api_key:
877
+ logger.info("Using Voyage AI for cloud embeddings")
878
+ return VoyageProvider(
879
+ api_key=self.config.voyage_api_key,
880
+ model_name="voyage-3", # Latest Voyage model with 1024 dimensions
881
+ max_concurrent=self.config.max_concurrent_embeddings
882
+ )
883
+ else:
884
+ logger.info(f"Using FastEmbed: {self.config.embedding_model}")
885
+ return FastEmbedProvider(
886
+ self.config.embedding_model,
887
+ self.config.max_concurrent_embeddings
888
+ )
889
+
890
+ async def load_state(self) -> None:
891
+ """Load persisted state with migration support."""
892
+ if self.config.state_file.exists():
893
+ try:
894
+ with open(self.config.state_file, 'r') as f:
895
+ self.state = json.load(f)
896
+
897
+ # Migrate old state format if needed
898
+ if "imported_files" in self.state:
899
+ imported_count = len(self.state["imported_files"])
900
+ logger.info(f"Loaded state with {imported_count} files")
901
+
902
+ # Ensure all entries have full paths as keys
903
+ migrated = {}
904
+ for key, value in self.state["imported_files"].items():
905
+ # Ensure key is a full path
906
+ if not key.startswith('/'):
907
+ # Try to reconstruct full path
908
+ possible_path = self.config.logs_dir / key
909
+ if possible_path.exists():
910
+ migrated[str(possible_path)] = value
911
+ else:
912
+ migrated[key] = value # Keep as is
913
+ else:
914
+ migrated[key] = value
915
+
916
+ if len(migrated) != len(self.state["imported_files"]):
917
+ logger.info(f"Migrated state format: {len(self.state['imported_files'])} -> {len(migrated)} entries")
918
+ self.state["imported_files"] = migrated
919
+
920
+ except Exception as e:
921
+ logger.error(f"Error loading state: {e}")
922
+ self.state = {}
923
+
924
+ if "imported_files" not in self.state:
925
+ self.state["imported_files"] = {}
926
+ if "high_water_mark" not in self.state:
927
+ self.state["high_water_mark"] = 0
928
+
929
+ # Update progress tracker
930
+ self.progress.update(len(self.state["imported_files"]))
931
+
932
+ async def save_state(self) -> None:
933
+ """Save state atomically."""
934
+ try:
935
+ self.config.state_file.parent.mkdir(parents=True, exist_ok=True)
936
+ temp_file = self.config.state_file.with_suffix('.tmp')
937
+
938
+ with open(temp_file, 'w') as f:
939
+ json.dump(self.state, f, indent=2)
940
+ f.flush()
941
+ os.fsync(f.fileno())
942
+
943
+ if platform.system() == 'Windows':
944
+ if self.config.state_file.exists():
945
+ self.config.state_file.unlink()
946
+ temp_file.rename(self.config.state_file)
947
+ else:
948
+ os.replace(temp_file, self.config.state_file)
949
+
950
+ # Directory fsync for stronger guarantees
951
+ try:
952
+ dir_fd = os.open(str(self.config.state_file.parent), os.O_DIRECTORY)
953
+ os.fsync(dir_fd)
954
+ os.close(dir_fd)
955
+ except:
956
+ pass
957
+
958
+ except Exception as e:
959
+ logger.error(f"Error saving state: {e}")
960
+
961
+ def get_collection_name(self, project_path: str) -> str:
962
+ """Get collection name for project."""
963
+ normalized = normalize_project_name(project_path)
964
+ project_hash = hashlib.md5(normalized.encode()).hexdigest()[:8]
965
+ suffix = "_local" if self.config.prefer_local_embeddings else "_voyage"
966
+ return f"{self.config.collection_prefix}_{project_hash}{suffix}"
967
+
968
+ def _extract_message_text(self, content: Any) -> str:
969
+ """Extract text from message content."""
970
+ if isinstance(content, str):
971
+ return content
972
+ elif isinstance(content, list):
973
+ text_parts = []
974
+ for item in content:
975
+ if isinstance(item, str):
976
+ text_parts.append(item)
977
+ elif isinstance(item, dict):
978
+ if item.get('type') == 'text':
979
+ text_parts.append(item.get('text', ''))
980
+ return ' '.join(text_parts)
981
+ return str(content) if content else ''
982
+
983
+ async def process_file(self, file_path: Path) -> bool:
984
+ """Process a single file."""
985
+ try:
986
+ # Memory check
987
+ should_cleanup, mem_metrics = self.memory_monitor.check_memory()
988
+ if should_cleanup:
989
+ await self.memory_monitor.cleanup()
990
+ _, mem_metrics = self.memory_monitor.check_memory()
991
+ if mem_metrics['alert_level'] == 'critical':
992
+ logger.error(f"Memory critical: {mem_metrics['current_mb']:.1f}MB, skipping {file_path}")
993
+ return False
994
+
995
+ project_path = file_path.parent.name # Use just the project directory name, not full path
996
+ collection_name = self.get_collection_name(project_path)
997
+ conversation_id = file_path.stem
998
+
999
+ logger.info(f"Processing: {file_path.name} (memory: {mem_metrics['current_mb']:.1f}MB)")
1000
+
1001
+ # Read messages (defer collection creation until we know we have content)
1002
+ all_messages = []
1003
+ with open(file_path, 'r') as f:
1004
+ for line in f:
1005
+ if line.strip():
1006
+ try:
1007
+ data = json.loads(line)
1008
+ # Handle 'messages' array (standard format)
1009
+ if 'messages' in data and data['messages']:
1010
+ all_messages.extend(data['messages'])
1011
+ # Handle single 'message' object (must be dict, not string)
1012
+ elif 'message' in data and data['message']:
1013
+ if isinstance(data['message'], dict):
1014
+ all_messages.append(data['message'])
1015
+ # Skip string messages (like status messages)
1016
+ # Handle direct role/content format
1017
+ elif 'role' in data and 'content' in data:
1018
+ all_messages.append(data)
1019
+ except json.JSONDecodeError:
1020
+ continue
1021
+
1022
+ if not all_messages:
1023
+ logger.warning(f"No messages in {file_path}, marking as processed")
1024
+ # Mark file as processed with 0 chunks
1025
+ self.state["imported_files"][str(file_path)] = {
1026
+ "imported_at": datetime.now().isoformat(),
1027
+ "_parsed_time": datetime.now().timestamp(),
1028
+ "chunks": 0,
1029
+ "collection": collection_name,
1030
+ "empty_file": True
1031
+ }
1032
+ self.stats["files_processed"] += 1
1033
+ return True
1034
+
1035
+ # Extract metadata
1036
+ tool_usage = extract_tool_usage_from_conversation(all_messages)
1037
+
1038
+ # Build text
1039
+ text_parts = []
1040
+ for msg in all_messages:
1041
+ role = msg.get('role', 'unknown')
1042
+ content = msg.get('content', '')
1043
+ text = self._extract_message_text(content)
1044
+ if text:
1045
+ text_parts.append(f"{role}: {text}")
1046
+
1047
+ combined_text = "\n\n".join(text_parts)
1048
+ if not combined_text.strip():
1049
+ logger.warning(f"No textual content in {file_path}, marking as processed")
1050
+ # Mark file as processed with 0 chunks (has messages but no extractable text)
1051
+ self.state["imported_files"][str(file_path)] = {
1052
+ "imported_at": datetime.now().isoformat(),
1053
+ "_parsed_time": datetime.now().timestamp(),
1054
+ "chunks": 0,
1055
+ "collection": collection_name,
1056
+ "no_text_content": True
1057
+ }
1058
+ self.stats["files_processed"] += 1
1059
+ return True
1060
+
1061
+ concepts = extract_concepts(combined_text, tool_usage)
1062
+
1063
+ # Now we know we have content, ensure collection exists
1064
+ await self.qdrant_service.ensure_collection(collection_name)
1065
+
1066
+ # Process chunks
1067
+ chunks_processed = 0
1068
+ chunk_index = 0
1069
+
1070
+ for chunk_text in self.chunker.chunk_text_stream(combined_text):
1071
+ if self.shutdown_event.is_set():
1072
+ return False
1073
+
1074
+ # CPU throttling
1075
+ if self.cpu_monitor.should_throttle():
1076
+ await asyncio.sleep(0.5)
1077
+
1078
+ # Generate embedding
1079
+ embeddings = None
1080
+ for attempt in range(self.config.max_retries):
1081
+ try:
1082
+ embeddings = await self.embedding_provider.embed_documents([chunk_text])
1083
+ # Validate embedding dimensions
1084
+ if embeddings and len(embeddings[0]) != self.embedding_provider.vector_size:
1085
+ logger.error(f"Embedding dimension mismatch: got {len(embeddings[0])}, expected {self.embedding_provider.vector_size} for provider {self.embedding_provider.__class__.__name__}")
1086
+ self.stats["failures"] += 1
1087
+ embeddings = None # Force retry
1088
+ continue # Continue retrying, not break
1089
+ break
1090
+ except Exception as e:
1091
+ logger.warning(f"Embed failed (attempt {attempt+1}): {e}")
1092
+ if attempt < self.config.max_retries - 1:
1093
+ await asyncio.sleep(self.config.retry_delay_s * (2 ** attempt))
1094
+
1095
+ if not embeddings:
1096
+ logger.error(f"Failed to embed chunk {chunk_index}")
1097
+ self.stats["failures"] += 1
1098
+ continue
1099
+
1100
+ # Create payload
1101
+ payload = {
1102
+ "text": chunk_text[:10000],
1103
+ "conversation_id": conversation_id,
1104
+ "chunk_index": chunk_index,
1105
+ "message_count": len(all_messages),
1106
+ "project": normalize_project_name(project_path),
1107
+ "timestamp": datetime.now().isoformat(),
1108
+ "total_length": len(chunk_text),
1109
+ "chunking_version": "v3",
1110
+ "concepts": concepts,
1111
+ "files_analyzed": tool_usage['files_analyzed'],
1112
+ "files_edited": tool_usage['files_edited'],
1113
+ "tools_used": tool_usage['tools_used']
1114
+ }
1115
+
1116
+ # Create point
1117
+ point_id_str = hashlib.md5(
1118
+ f"{conversation_id}_{chunk_index}".encode()
1119
+ ).hexdigest()[:16]
1120
+ point_id = int(point_id_str, 16) % (2**63)
1121
+
1122
+ point = models.PointStruct(
1123
+ id=point_id,
1124
+ vector=embeddings[0],
1125
+ payload=payload
1126
+ )
1127
+
1128
+ # Store
1129
+ success = await self.qdrant_service.store_points_with_retry(
1130
+ collection_name,
1131
+ [point]
1132
+ )
1133
+
1134
+ if not success:
1135
+ logger.error(f"Failed to store chunk {chunk_index}")
1136
+ self.stats["failures"] += 1
1137
+ else:
1138
+ chunks_processed += 1
1139
+
1140
+ chunk_index += 1
1141
+
1142
+ # Memory check mid-file
1143
+ if chunk_index % 10 == 0:
1144
+ should_cleanup, _ = self.memory_monitor.check_memory()
1145
+ if should_cleanup:
1146
+ await self.memory_monitor.cleanup()
1147
+
1148
+ # Update state - use full path as key
1149
+ self.state["imported_files"][str(file_path)] = {
1150
+ "imported_at": datetime.now().isoformat(),
1151
+ "_parsed_time": datetime.now().timestamp(),
1152
+ "chunks": chunks_processed,
1153
+ "collection": collection_name
1154
+ }
1155
+
1156
+ self.stats["files_processed"] += 1
1157
+ self.stats["chunks_processed"] += chunks_processed
1158
+
1159
+ logger.info(f"Completed: {file_path.name} ({chunks_processed} chunks)")
1160
+ return True
1161
+
1162
+ except Exception as e:
1163
+ logger.error(f"Error processing {file_path}: {e}")
1164
+ self.stats["failures"] += 1
1165
+ return False
1166
+
1167
+ async def find_new_files(self) -> List[Tuple[Path, FreshnessLevel, int]]:
1168
+ """Find new files to process with freshness categorization."""
1169
+ if not self.config.logs_dir.exists():
1170
+ logger.warning(f"Logs dir not found: {self.config.logs_dir}")
1171
+ return []
1172
+
1173
+ categorized_files = []
1174
+ high_water_mark = self.state.get("high_water_mark", 0)
1175
+ new_high_water = high_water_mark
1176
+ now = time.time()
1177
+
1178
+ try:
1179
+ for project_dir in self.config.logs_dir.iterdir():
1180
+ if not project_dir.is_dir():
1181
+ continue
1182
+
1183
+ try:
1184
+ for jsonl_file in project_dir.glob("*.jsonl"):
1185
+ file_mtime = jsonl_file.stat().st_mtime
1186
+ new_high_water = max(new_high_water, file_mtime)
1187
+
1188
+ # Check if already processed (using full path)
1189
+ file_key = str(jsonl_file)
1190
+ if file_key in self.state["imported_files"]:
1191
+ stored = self.state["imported_files"][file_key]
1192
+ if "_parsed_time" in stored:
1193
+ if file_mtime <= stored["_parsed_time"]:
1194
+ continue
1195
+ elif "imported_at" in stored:
1196
+ import_time = datetime.fromisoformat(stored["imported_at"]).timestamp()
1197
+ stored["_parsed_time"] = import_time
1198
+ if file_mtime <= import_time:
1199
+ continue
1200
+
1201
+ # Categorize file freshness (handles first_seen tracking internally)
1202
+ freshness_level, priority_score = self.categorize_freshness(jsonl_file)
1203
+
1204
+ categorized_files.append((jsonl_file, freshness_level, priority_score))
1205
+ except Exception as e:
1206
+ logger.error(f"Error scanning project dir {project_dir}: {e}")
1207
+
1208
+ except Exception as e:
1209
+ logger.error(f"Error scanning logs dir: {e}")
1210
+
1211
+ self.state["high_water_mark"] = new_high_water
1212
+
1213
+ # Sort by priority score (lower = higher priority)
1214
+ categorized_files.sort(key=lambda x: x[2])
1215
+
1216
+ # Log categorization summary
1217
+ if categorized_files:
1218
+ hot_count = sum(1 for _, level, _ in categorized_files if level == FreshnessLevel.HOT)
1219
+ urgent_count = sum(1 for _, level, _ in categorized_files if level == FreshnessLevel.URGENT_WARM)
1220
+ warm_count = sum(1 for _, level, _ in categorized_files if level == FreshnessLevel.WARM)
1221
+ cold_count = sum(1 for _, level, _ in categorized_files if level == FreshnessLevel.COLD)
1222
+
1223
+ status_parts = []
1224
+ if hot_count: status_parts.append(f"{hot_count} 🔥HOT")
1225
+ if urgent_count: status_parts.append(f"{urgent_count} ⚠️URGENT")
1226
+ if warm_count: status_parts.append(f"{warm_count} 🌡️WARM")
1227
+ if cold_count: status_parts.append(f"{cold_count} ❄️COLD")
1228
+
1229
+ logger.info(f"Found {len(categorized_files)} new files: {', '.join(status_parts)}")
1230
+
1231
+ return categorized_files
1232
+
1233
+ async def run_continuous(self) -> None:
1234
+ """Main loop with comprehensive monitoring."""
1235
+ logger.info("=" * 60)
1236
+ logger.info("Claude Self-Reflect Streaming Watcher v3.0.0")
1237
+ logger.info("=" * 60)
1238
+ logger.info(f"State file: {self.config.state_file}")
1239
+ logger.info(f"Memory: {self.config.memory_warning_mb}MB warning, {self.config.memory_limit_mb}MB limit")
1240
+ logger.info(f"CPU limit: {self.cpu_monitor.max_total_cpu:.1f}%")
1241
+ logger.info(f"Queue size: {self.config.max_queue_size}")
1242
+ logger.info("=" * 60)
1243
+
1244
+ await self.load_state()
1245
+
1246
+ # Initial progress scan
1247
+ total_files = self.progress.scan_total_files()
1248
+ indexed_files = len(self.state.get("imported_files", {}))
1249
+ self.progress.update(indexed_files)
1250
+
1251
+ initial_progress = self.progress.get_progress()
1252
+ logger.info(f"Initial progress: {indexed_files}/{total_files} files ({initial_progress['percent']:.1f}%)")
1253
+
1254
+ try:
1255
+ cycle_count = 0
1256
+ while not self.shutdown_event.is_set():
1257
+ try:
1258
+ cycle_count += 1
1259
+
1260
+ # Find new files with categorization
1261
+ categorized_files = await self.find_new_files()
1262
+
1263
+ # Determine if we have HOT files (in new files or existing queue)
1264
+ has_hot_files = (any(level == FreshnessLevel.HOT for _, level, _ in categorized_files)
1265
+ or self.queue_manager.has_hot_or_urgent())
1266
+
1267
+ # Process files by temperature with proper priority
1268
+ files_to_process = []
1269
+ cold_count = 0
1270
+
1271
+ for file_path, level, priority in categorized_files:
1272
+ # Limit COLD files per cycle
1273
+ if level == FreshnessLevel.COLD:
1274
+ if cold_count >= self.config.max_cold_files:
1275
+ logger.debug(f"Skipping COLD file {file_path.name} (limit reached)")
1276
+ continue
1277
+ cold_count += 1
1278
+
1279
+ mod_time = datetime.fromtimestamp(file_path.stat().st_mtime)
1280
+ files_to_process.append((file_path, mod_time, level, priority))
1281
+
1282
+ if files_to_process:
1283
+ added = self.queue_manager.add_categorized(files_to_process)
1284
+ if added > 0:
1285
+ logger.info(f"Cycle {cycle_count}: Added {added} files to queue")
1286
+
1287
+ # Process batch
1288
+ batch = self.queue_manager.get_batch(self.config.batch_size)
1289
+
1290
+ for file_path, level in batch:
1291
+ if self.shutdown_event.is_set():
1292
+ break
1293
+
1294
+ # Double-check if already imported (defensive)
1295
+ file_key = str(file_path)
1296
+ try:
1297
+ file_mtime = file_path.stat().st_mtime
1298
+ except FileNotFoundError:
1299
+ logger.warning(f"File disappeared: {file_path}")
1300
+ continue
1301
+
1302
+ imported = self.state["imported_files"].get(file_key)
1303
+ if imported:
1304
+ parsed_time = imported.get("_parsed_time")
1305
+ if not parsed_time and "imported_at" in imported:
1306
+ parsed_time = datetime.fromisoformat(imported["imported_at"]).timestamp()
1307
+ if parsed_time and file_mtime <= parsed_time:
1308
+ logger.debug(f"Skipping already imported: {file_path.name}")
1309
+ continue
1310
+
1311
+ success = await self.process_file(file_path)
1312
+
1313
+ if success:
1314
+ # Clean up first_seen tracking to prevent memory leak
1315
+ self.file_first_seen.pop(file_key, None)
1316
+ await self.save_state()
1317
+ self.progress.update(len(self.state["imported_files"]))
1318
+
1319
+ # Log comprehensive metrics
1320
+ if batch or cycle_count % 6 == 0: # Every minute if idle
1321
+ queue_metrics = self.queue_manager.get_metrics()
1322
+ progress_metrics = self.progress.get_progress()
1323
+ _, mem_metrics = self.memory_monitor.check_memory()
1324
+ cpu = self.cpu_monitor.get_cpu_nowait()
1325
+
1326
+ logger.info(
1327
+ f"Progress: {progress_metrics['percent']:.1f}% "
1328
+ f"({progress_metrics['indexed_files']}/{progress_metrics['total_files']}) | "
1329
+ f"Queue: {queue_metrics['queue_size']} | "
1330
+ f"Memory: {mem_metrics['current_mb']:.1f}MB/{mem_metrics['limit_mb']}MB | "
1331
+ f"CPU: {cpu:.1f}% | "
1332
+ f"Processed: {self.stats['files_processed']} | "
1333
+ f"Failures: {self.stats['failures']}"
1334
+ )
1335
+
1336
+ # Alert on high memory
1337
+ if mem_metrics['alert_level'] in ['warning', 'high', 'critical']:
1338
+ logger.warning(
1339
+ f"Memory {mem_metrics['alert_level'].upper()}: "
1340
+ f"{mem_metrics['current_mb']:.1f}MB "
1341
+ f"({mem_metrics['percent_of_limit']:.1f}% of limit)"
1342
+ )
1343
+
1344
+ # Progress toward 100%
1345
+ if progress_metrics['percent'] >= 99.9:
1346
+ logger.info("🎉 INDEXING COMPLETE: 100% of files processed!")
1347
+ elif progress_metrics['percent'] >= 90:
1348
+ logger.info(f"📈 Nearing completion: {progress_metrics['percent']:.1f}%")
1349
+
1350
+ # Backlog alert
1351
+ if queue_metrics['oldest_age_hours'] > self.config.max_backlog_hours:
1352
+ logger.error(
1353
+ f"BACKLOG CRITICAL: Oldest file is "
1354
+ f"{queue_metrics['oldest_age_hours']:.1f} hours old"
1355
+ )
1356
+
1357
+ # Dynamic interval based on file temperature
1358
+ current_mode = "HOT" if has_hot_files else "NORMAL"
1359
+
1360
+ if current_mode != self.last_mode:
1361
+ if has_hot_files:
1362
+ logger.info(f"🔥 HOT files detected - switching to {self.config.hot_check_interval_s}s interval")
1363
+ else:
1364
+ logger.info(f"Returning to normal {self.config.import_frequency}s interval")
1365
+ self.last_mode = current_mode
1366
+
1367
+ wait_time = self.config.hot_check_interval_s if has_hot_files else self.config.import_frequency
1368
+
1369
+ # Wait with interrupt capability for HOT files
1370
+ try:
1371
+ await asyncio.wait_for(
1372
+ self.shutdown_event.wait(),
1373
+ timeout=wait_time
1374
+ )
1375
+ except asyncio.TimeoutError:
1376
+ pass # Normal timeout, continue loop
1377
+
1378
+ except Exception as e:
1379
+ logger.error(f"Error in main loop: {e}")
1380
+ await asyncio.sleep(self.config.import_frequency)
1381
+
1382
+ except asyncio.CancelledError:
1383
+ logger.info("Main task cancelled")
1384
+ raise
1385
+ finally:
1386
+ logger.info("Shutting down...")
1387
+ await self.save_state()
1388
+ await self.embedding_provider.close()
1389
+ await self.qdrant_service.close()
1390
+
1391
+ # Final metrics
1392
+ final_progress = self.progress.get_progress()
1393
+ logger.info("=" * 60)
1394
+ logger.info("Final Statistics:")
1395
+ logger.info(f"Progress: {final_progress['percent']:.1f}% complete")
1396
+ logger.info(f"Files processed: {self.stats['files_processed']}")
1397
+ logger.info(f"Chunks processed: {self.stats['chunks_processed']}")
1398
+ logger.info(f"Failures: {self.stats['failures']}")
1399
+ logger.info(f"Memory cleanups: {self.memory_monitor.cleanup_count}")
1400
+ logger.info(f"Peak memory: {self.memory_monitor.peak_memory:.1f}MB")
1401
+ logger.info("=" * 60)
1402
+ logger.info("Shutdown complete")
1403
+
1404
+ async def shutdown(self):
1405
+ """Trigger graceful shutdown."""
1406
+ logger.info("Shutdown requested")
1407
+ self.shutdown_event.set()
1408
+
1409
+
1410
+ async def main():
1411
+ """Main entry point."""
1412
+ config = Config()
1413
+ watcher = StreamingWatcher(config)
1414
+
1415
+ # Setup signal handlers
1416
+ import signal
1417
+
1418
+ loop = asyncio.get_running_loop()
1419
+
1420
+ def shutdown_handler():
1421
+ logger.info("Received shutdown signal")
1422
+ watcher.shutdown_event.set()
1423
+
1424
+ if hasattr(loop, "add_signal_handler"):
1425
+ for sig in (signal.SIGINT, signal.SIGTERM):
1426
+ loop.add_signal_handler(sig, shutdown_handler)
1427
+ else:
1428
+ # Windows fallback
1429
+ def signal_handler(sig, frame):
1430
+ logger.info(f"Received signal {sig}")
1431
+ watcher.shutdown_event.set()
1432
+
1433
+ signal.signal(signal.SIGINT, signal_handler)
1434
+ signal.signal(signal.SIGTERM, signal_handler)
1435
+
1436
+ try:
1437
+ await watcher.run_continuous()
1438
+ except (KeyboardInterrupt, asyncio.CancelledError):
1439
+ await watcher.shutdown()
1440
+
1441
+
1442
+ if __name__ == "__main__":
1443
+ asyncio.run(main())