claude-self-reflect 3.3.1 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,10 +35,11 @@ from qdrant_client.http.exceptions import UnexpectedResponse
35
35
  from fastembed import TextEmbedding
36
36
  import psutil
37
37
 
38
- # Import normalize_project_name
38
+ # Import normalize_project_name and UnifiedStateManager
39
39
  import sys
40
40
  sys.path.insert(0, str(Path(__file__).parent))
41
41
  from utils import normalize_project_name
42
+ from unified_state_manager import UnifiedStateManager
42
43
 
43
44
  # Configure logging
44
45
  logging.basicConfig(
@@ -52,26 +53,14 @@ logger = logging.getLogger(__name__)
52
53
  class Config:
53
54
  """Production configuration with proper defaults."""
54
55
  qdrant_url: str = field(default_factory=lambda: os.getenv("QDRANT_URL", "http://localhost:6333"))
56
+ qdrant_api_key: Optional[str] = field(default_factory=lambda: os.getenv("QDRANT_API_KEY"))
57
+ require_tls_for_remote: bool = field(default_factory=lambda: os.getenv("QDRANT_REQUIRE_TLS_FOR_REMOTE", "true").lower() == "true")
55
58
  voyage_api_key: Optional[str] = field(default_factory=lambda: os.getenv("VOYAGE_API_KEY"))
56
59
  prefer_local_embeddings: bool = field(default_factory=lambda: os.getenv("PREFER_LOCAL_EMBEDDINGS", "true").lower() == "true")
57
60
  embedding_model: str = field(default_factory=lambda: os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2"))
58
61
 
59
62
  logs_dir: Path = field(default_factory=lambda: Path(os.getenv("LOGS_DIR", "~/.claude/projects")).expanduser())
60
63
 
61
- # Production state file with proper naming
62
- state_file: Path = field(default_factory=lambda: (
63
- # Docker/cloud mode: use /config volume
64
- Path("/config/csr-watcher.json") if os.path.exists("/.dockerenv")
65
- # Local mode with cloud flag: separate state file
66
- else Path("~/.claude-self-reflect/config/csr-watcher-cloud.json").expanduser()
67
- if os.getenv("PREFER_LOCAL_EMBEDDINGS", "true").lower() == "false" and os.getenv("VOYAGE_API_KEY")
68
- # Default local mode
69
- else Path("~/.claude-self-reflect/config/csr-watcher.json").expanduser()
70
- if os.getenv("STATE_FILE") is None
71
- # User override
72
- else Path(os.getenv("STATE_FILE")).expanduser()
73
- ))
74
-
75
64
  collection_prefix: str = "conv"
76
65
  vector_size: int = 384 # FastEmbed all-MiniLM-L6-v2
77
66
 
@@ -496,7 +485,7 @@ class QdrantService:
496
485
  # Initialize with API key if provided
497
486
  self.client = AsyncQdrantClient(
498
487
  url=config.qdrant_url,
499
- api_key=config.qdrant_api_key if hasattr(config, 'qdrant_api_key') else None
488
+ api_key=config.qdrant_api_key
500
489
  )
501
490
  self.embedding_provider = embedding_provider
502
491
  self._collection_cache: Dict[str, float] = {}
@@ -797,7 +786,7 @@ class StreamingWatcher:
797
786
 
798
787
  def __init__(self, config: Config):
799
788
  self.config = config
800
- self.state: Dict[str, Any] = {}
789
+ self.state_manager = UnifiedStateManager()
801
790
  self.embedding_provider = self._create_embedding_provider()
802
791
  self.qdrant_service = QdrantService(config, self.embedding_provider)
803
792
  self.chunker = TokenAwareChunker()
@@ -805,23 +794,23 @@ class StreamingWatcher:
805
794
  self.memory_monitor = MemoryMonitor(config.memory_limit_mb, config.memory_warning_mb)
806
795
  self.queue_manager = QueueManager(config.max_queue_size, config.max_backlog_hours)
807
796
  self.progress = IndexingProgress(config.logs_dir)
808
-
797
+
809
798
  self.stats = {
810
799
  "files_processed": 0,
811
800
  "chunks_processed": 0,
812
801
  "failures": 0,
813
802
  "start_time": time.time()
814
803
  }
815
-
804
+
816
805
  # Track file wait times for starvation prevention
817
806
  self.file_first_seen: Dict[str, float] = {}
818
807
  self.current_project: Optional[str] = self._detect_current_project()
819
808
  self.last_mode: Optional[str] = None # Track mode changes for logging
820
-
809
+
821
810
  self.shutdown_event = asyncio.Event()
822
-
823
- logger.info(f"Streaming Watcher v3.0.0 with HOT/WARM/COLD prioritization")
824
- logger.info(f"State file: {self.config.state_file}")
811
+
812
+ logger.info("Streaming Watcher v3.0.0 with HOT/WARM/COLD prioritization")
813
+ logger.info(f"State file: {self.state_manager.state_file}")
825
814
  logger.info(f"Memory limits: {config.memory_warning_mb}MB warning, {config.memory_limit_mb}MB limit")
826
815
  logger.info(f"HOT window: {config.hot_window_minutes} min, WARM window: {config.warm_window_hours} hrs")
827
816
 
@@ -901,75 +890,19 @@ class StreamingWatcher:
901
890
  )
902
891
 
903
892
  async def load_state(self) -> None:
904
- """Load persisted state with migration support."""
905
- if self.config.state_file.exists():
906
- try:
907
- with open(self.config.state_file, 'r') as f:
908
- self.state = json.load(f)
909
-
910
- # Migrate old state format if needed
911
- if "imported_files" in self.state:
912
- imported_count = len(self.state["imported_files"])
913
- logger.info(f"Loaded state with {imported_count} files")
914
-
915
- # Ensure all entries have full paths as keys
916
- migrated = {}
917
- for key, value in self.state["imported_files"].items():
918
- # Ensure key is a full path
919
- if not key.startswith('/'):
920
- # Try to reconstruct full path
921
- possible_path = self.config.logs_dir / key
922
- if possible_path.exists():
923
- migrated[str(possible_path)] = value
924
- else:
925
- migrated[key] = value # Keep as is
926
- else:
927
- migrated[key] = value
928
-
929
- if len(migrated) != len(self.state["imported_files"]):
930
- logger.info(f"Migrated state format: {len(self.state['imported_files'])} -> {len(migrated)} entries")
931
- self.state["imported_files"] = migrated
932
-
933
- except Exception as e:
934
- logger.error(f"Error loading state: {e}")
935
- self.state = {}
936
-
937
- if "imported_files" not in self.state:
938
- self.state["imported_files"] = {}
939
- if "high_water_mark" not in self.state:
940
- self.state["high_water_mark"] = 0
941
-
942
- # Update progress tracker
943
- self.progress.update(len(self.state["imported_files"]))
944
-
945
- async def save_state(self) -> None:
946
- """Save state atomically."""
893
+ """Load persisted state using UnifiedStateManager."""
947
894
  try:
948
- self.config.state_file.parent.mkdir(parents=True, exist_ok=True)
949
- temp_file = self.config.state_file.with_suffix('.tmp')
950
-
951
- with open(temp_file, 'w') as f:
952
- json.dump(self.state, f, indent=2)
953
- f.flush()
954
- os.fsync(f.fileno())
955
-
956
- if platform.system() == 'Windows':
957
- if self.config.state_file.exists():
958
- self.config.state_file.unlink()
959
- temp_file.rename(self.config.state_file)
960
- else:
961
- os.replace(temp_file, self.config.state_file)
962
-
963
- # Directory fsync for stronger guarantees
964
- try:
965
- dir_fd = os.open(str(self.config.state_file.parent), os.O_DIRECTORY)
966
- os.fsync(dir_fd)
967
- os.close(dir_fd)
968
- except:
969
- pass
970
-
895
+ status = self.state_manager.get_status()
896
+ imported_count = status["indexed_files"]
897
+ logger.info(f"Loaded state with {imported_count} files")
898
+
899
+ # Update progress tracker
900
+ self.progress.update(imported_count)
971
901
  except Exception as e:
972
- logger.error(f"Error saving state: {e}")
902
+ logger.error(f"Error loading state: {e}")
903
+ # Initialize progress with 0
904
+ self.progress.update(0)
905
+
973
906
 
974
907
  def get_collection_name(self, project_path: str) -> str:
975
908
  """Get collection name for project."""
@@ -1092,15 +1025,15 @@ class StreamingWatcher:
1092
1025
  continue
1093
1026
 
1094
1027
  if not all_messages:
1095
- logger.warning(f"No messages in {file_path}, marking as processed")
1096
- # Mark file as processed with 0 chunks
1097
- self.state["imported_files"][str(file_path)] = {
1098
- "imported_at": datetime.now().isoformat(),
1099
- "_parsed_time": datetime.now().timestamp(),
1100
- "chunks": 0,
1101
- "collection": collection_name,
1102
- "empty_file": True
1103
- }
1028
+ logger.warning(f"No messages in {file_path}, marking as failed")
1029
+ # Mark as failed to enable retry and correct progress
1030
+ try:
1031
+ self.state_manager.mark_file_failed(
1032
+ str(file_path),
1033
+ "No messages found in conversation (0 chunks)"
1034
+ )
1035
+ except Exception as e:
1036
+ logger.exception("Failed to update state for %s", file_path)
1104
1037
  self.stats["files_processed"] += 1
1105
1038
  return True
1106
1039
 
@@ -1181,15 +1114,15 @@ class StreamingWatcher:
1181
1114
 
1182
1115
  combined_text = "\n\n".join(text_parts)
1183
1116
  if not combined_text.strip():
1184
- logger.warning(f"No textual content in {file_path}, marking as processed")
1185
- # Mark file as processed with 0 chunks (has messages but no extractable text)
1186
- self.state["imported_files"][str(file_path)] = {
1187
- "imported_at": datetime.now().isoformat(),
1188
- "_parsed_time": datetime.now().timestamp(),
1189
- "chunks": 0,
1190
- "collection": collection_name,
1191
- "no_text_content": True
1192
- }
1117
+ logger.warning(f"No textual content in {file_path}, marking as failed")
1118
+ # Mark as failed to enable retry and correct progress
1119
+ try:
1120
+ self.state_manager.mark_file_failed(
1121
+ str(file_path),
1122
+ "No textual content in conversation (0 chunks)"
1123
+ )
1124
+ except Exception as e:
1125
+ logger.exception("Failed to update state for %s", file_path)
1193
1126
  self.stats["files_processed"] += 1
1194
1127
  return True
1195
1128
 
@@ -1280,23 +1213,34 @@ class StreamingWatcher:
1280
1213
  if should_cleanup:
1281
1214
  await self.memory_monitor.cleanup()
1282
1215
 
1283
- # Update state - use full path as key
1284
- self.state["imported_files"][str(file_path)] = {
1285
- "imported_at": datetime.now().isoformat(),
1286
- "_parsed_time": datetime.now().timestamp(),
1287
- "chunks": chunks_processed,
1288
- "collection": collection_name
1289
- }
1290
-
1216
+ # Update state using UnifiedStateManager
1217
+ try:
1218
+ self.state_manager.add_imported_file(
1219
+ file_path=str(file_path),
1220
+ chunks=chunks_processed,
1221
+ importer="streaming",
1222
+ collection=collection_name,
1223
+ embedding_mode="local" if self.config.prefer_local_embeddings else "cloud",
1224
+ status="completed"
1225
+ )
1226
+ except Exception as e:
1227
+ logger.error(f"Failed to update state for {file_path}: {e}")
1228
+ return False
1229
+
1291
1230
  self.stats["files_processed"] += 1
1292
1231
  self.stats["chunks_processed"] += chunks_processed
1293
-
1232
+
1294
1233
  logger.info(f"Completed: {file_path.name} ({chunks_processed} chunks)")
1295
1234
  return True
1296
1235
 
1297
1236
  except Exception as e:
1298
1237
  logger.error(f"Error processing {file_path}: {e}")
1299
1238
  self.stats["failures"] += 1
1239
+ # Mark file as failed using UnifiedStateManager
1240
+ try:
1241
+ self.state_manager.mark_file_failed(str(file_path), str(e))
1242
+ except Exception as mark_error:
1243
+ logger.error(f"Failed to mark file as failed: {mark_error}")
1300
1244
  return False
1301
1245
 
1302
1246
  async def find_new_files(self) -> List[Tuple[Path, FreshnessLevel, int]]:
@@ -1304,47 +1248,51 @@ class StreamingWatcher:
1304
1248
  if not self.config.logs_dir.exists():
1305
1249
  logger.warning(f"Logs dir not found: {self.config.logs_dir}")
1306
1250
  return []
1307
-
1251
+
1308
1252
  categorized_files = []
1309
- high_water_mark = self.state.get("high_water_mark", 0)
1310
- new_high_water = high_water_mark
1311
1253
  now = time.time()
1312
-
1254
+
1255
+ # Get imported files from UnifiedStateManager
1256
+ try:
1257
+ imported_files = self.state_manager.get_imported_files()
1258
+ except Exception as e:
1259
+ logger.error(f"Error getting imported files: {e}")
1260
+ imported_files = {}
1261
+
1313
1262
  try:
1314
1263
  for project_dir in self.config.logs_dir.iterdir():
1315
1264
  if not project_dir.is_dir():
1316
1265
  continue
1317
-
1266
+
1318
1267
  try:
1319
1268
  for jsonl_file in project_dir.glob("*.jsonl"):
1320
1269
  file_mtime = jsonl_file.stat().st_mtime
1321
- new_high_water = max(new_high_water, file_mtime)
1322
-
1323
- # Check if already processed (using full path)
1324
- file_key = str(jsonl_file)
1325
- if file_key in self.state["imported_files"]:
1326
- stored = self.state["imported_files"][file_key]
1327
- if "_parsed_time" in stored:
1328
- if file_mtime <= stored["_parsed_time"]:
1329
- continue
1330
- elif "imported_at" in stored:
1331
- import_time = datetime.fromisoformat(stored["imported_at"]).timestamp()
1332
- stored["_parsed_time"] = import_time
1333
- if file_mtime <= import_time:
1334
- continue
1335
-
1270
+
1271
+ # Check if already processed (using normalized path)
1272
+ try:
1273
+ normalized_path = self.state_manager.normalize_path(str(jsonl_file))
1274
+ if normalized_path in imported_files:
1275
+ stored = imported_files[normalized_path]
1276
+ # Check if file was modified after import
1277
+ import_time_str = stored.get("imported_at")
1278
+ if import_time_str:
1279
+ import_time = datetime.fromisoformat(import_time_str.replace("Z", "+00:00")).timestamp()
1280
+ if file_mtime <= import_time:
1281
+ continue
1282
+ except Exception as e:
1283
+ logger.debug(f"Error checking import status for {jsonl_file}: {e}")
1284
+ # If we can't check, assume not imported
1285
+
1336
1286
  # Categorize file freshness (handles first_seen tracking internally)
1337
1287
  freshness_level, priority_score = self.categorize_freshness(jsonl_file)
1338
-
1288
+
1339
1289
  categorized_files.append((jsonl_file, freshness_level, priority_score))
1340
1290
  except Exception as e:
1341
1291
  logger.error(f"Error scanning project dir {project_dir}: {e}")
1342
-
1292
+
1343
1293
  except Exception as e:
1344
1294
  logger.error(f"Error scanning logs dir: {e}")
1345
1295
 
1346
- self.state["high_water_mark"] = new_high_water
1347
-
1348
1296
  # Sort by priority score (lower = higher priority)
1349
1297
  categorized_files.sort(key=lambda x: x[2])
1350
1298
 
@@ -1370,7 +1318,7 @@ class StreamingWatcher:
1370
1318
  logger.info("=" * 60)
1371
1319
  logger.info("Claude Self-Reflect Streaming Watcher v3.0.0")
1372
1320
  logger.info("=" * 60)
1373
- logger.info(f"State file: {self.config.state_file}")
1321
+ logger.info("State manager: UnifiedStateManager")
1374
1322
  logger.info(f"Memory: {self.config.memory_warning_mb}MB warning, {self.config.memory_limit_mb}MB limit")
1375
1323
  logger.info(f"CPU limit: {self.cpu_monitor.max_total_cpu:.1f}%")
1376
1324
  logger.info(f"Queue size: {self.config.max_queue_size}")
@@ -1380,9 +1328,10 @@ class StreamingWatcher:
1380
1328
 
1381
1329
  # Initial progress scan
1382
1330
  total_files = self.progress.scan_total_files()
1383
- indexed_files = len(self.state.get("imported_files", {}))
1331
+ status = self.state_manager.get_status()
1332
+ indexed_files = status["indexed_files"]
1384
1333
  self.progress.update(indexed_files)
1385
-
1334
+
1386
1335
  initial_progress = self.progress.get_progress()
1387
1336
  logger.info(f"Initial progress: {indexed_files}/{total_files} files ({initial_progress['percent']:.1f}%)")
1388
1337
 
@@ -1433,23 +1382,30 @@ class StreamingWatcher:
1433
1382
  except FileNotFoundError:
1434
1383
  logger.warning(f"File disappeared: {file_path}")
1435
1384
  continue
1436
-
1437
- imported = self.state["imported_files"].get(file_key)
1438
- if imported:
1439
- parsed_time = imported.get("_parsed_time")
1440
- if not parsed_time and "imported_at" in imported:
1441
- parsed_time = datetime.fromisoformat(imported["imported_at"]).timestamp()
1442
- if parsed_time and file_mtime <= parsed_time:
1443
- logger.debug(f"Skipping already imported: {file_path.name}")
1444
- continue
1445
-
1385
+
1386
+ # Check if already imported using UnifiedStateManager
1387
+ try:
1388
+ normalized_path = self.state_manager.normalize_path(file_key)
1389
+ imported_files = self.state_manager.get_imported_files()
1390
+ if normalized_path in imported_files:
1391
+ stored = imported_files[normalized_path]
1392
+ import_time_str = stored.get("imported_at")
1393
+ if import_time_str:
1394
+ import_time = datetime.fromisoformat(import_time_str.replace("Z", "+00:00")).timestamp()
1395
+ if file_mtime <= import_time:
1396
+ logger.debug(f"Skipping already imported: {file_path.name}")
1397
+ continue
1398
+ except Exception as e:
1399
+ logger.debug(f"Error checking import status: {e}")
1400
+
1446
1401
  success = await self.process_file(file_path)
1447
-
1402
+
1448
1403
  if success:
1449
1404
  # Clean up first_seen tracking to prevent memory leak
1450
1405
  self.file_first_seen.pop(file_key, None)
1451
- await self.save_state()
1452
- self.progress.update(len(self.state["imported_files"]))
1406
+ # Update progress (state is managed by UnifiedStateManager)
1407
+ status = self.state_manager.get_status()
1408
+ self.progress.update(status["indexed_files"])
1453
1409
 
1454
1410
  # Log comprehensive metrics
1455
1411
  if batch or cycle_count % 6 == 0: # Every minute if idle
@@ -1519,7 +1475,6 @@ class StreamingWatcher:
1519
1475
  raise
1520
1476
  finally:
1521
1477
  logger.info("Shutting down...")
1522
- await self.save_state()
1523
1478
  await self.embedding_provider.close()
1524
1479
  await self.qdrant_service.close()
1525
1480