claude-self-reflect 4.0.0 → 4.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,7 @@ import fcntl
15
15
  import time
16
16
  import argparse
17
17
  from pathlib import Path
18
- from datetime import datetime
18
+ from datetime import datetime, timezone
19
19
  from typing import List, Dict, Any, Optional, Set
20
20
  import logging
21
21
 
@@ -34,6 +34,9 @@ except ImportError:
34
34
  scripts_dir = Path(__file__).parent
35
35
  sys.path.insert(0, str(scripts_dir))
36
36
 
37
+ # Import UnifiedStateManager
38
+ from unified_state_manager import UnifiedStateManager
39
+
37
40
  from qdrant_client import QdrantClient
38
41
  from qdrant_client.models import PointStruct, Distance, VectorParams
39
42
 
@@ -72,32 +75,15 @@ MAX_FILES_EDITED = 20
72
75
  MAX_TOOLS_USED = 15
73
76
  MAX_CONCEPT_MESSAGES = 50
74
77
 
75
- # Robust cross-platform state file resolution
76
- def get_default_state_file():
77
- """Determine the default state file location with cross-platform support."""
78
- from pathlib import Path
79
-
80
- # Check if we're in Docker (more reliable than just checking /config)
81
- docker_indicators = [
82
- Path("/.dockerenv").exists(), # Docker creates this file
83
- os.path.exists("/config") and os.access("/config", os.W_OK) # Mounted config dir with write access
84
- ]
85
-
86
- if any(docker_indicators):
87
- return "/config/imported-files.json"
88
-
89
- # Use pathlib for cross-platform home directory path
90
- home_state = Path.home() / ".claude-self-reflect" / "config" / "imported-files.json"
91
- return str(home_state)
92
-
93
- # Get state file path with env override support
78
+ # Initialize UnifiedStateManager
79
+ # Support legacy STATE_FILE environment variable
94
80
  env_state = os.getenv("STATE_FILE")
95
81
  if env_state:
96
- # Normalize any user-provided path to absolute
97
82
  from pathlib import Path
98
- STATE_FILE = str(Path(env_state).expanduser().resolve())
83
+ state_file_path = Path(env_state).expanduser().resolve()
84
+ state_manager = UnifiedStateManager(state_file_path)
99
85
  else:
100
- STATE_FILE = get_default_state_file()
86
+ state_manager = UnifiedStateManager() # Uses default location
101
87
  PREFER_LOCAL_EMBEDDINGS = os.getenv("PREFER_LOCAL_EMBEDDINGS", "true").lower() == "true"
102
88
  VOYAGE_API_KEY = os.getenv("VOYAGE_KEY")
103
89
  MAX_CHUNK_SIZE = int(os.getenv("MAX_CHUNK_SIZE", "50")) # Messages per chunk
@@ -686,18 +672,13 @@ def stream_import_file(jsonl_file: Path, collection_name: str, project_path: Pat
686
672
 
687
673
  except Exception as e:
688
674
  logger.error(f"Failed to import {jsonl_file}: {e}")
675
+ # Mark file as failed in state manager
676
+ try:
677
+ state_manager.mark_file_failed(str(jsonl_file), str(e))
678
+ except Exception as state_error:
679
+ logger.warning(f"Could not mark file as failed in state: {state_error}")
689
680
  return 0
690
681
 
691
- def _locked_open(path, mode):
692
- """Open file with exclusive lock for concurrent safety."""
693
- f = open(path, mode)
694
- try:
695
- fcntl.flock(f.fileno(), fcntl.LOCK_EX)
696
- except Exception:
697
- f.close()
698
- raise
699
- return f
700
-
701
682
  def _with_retries(fn, attempts=3, base_sleep=0.5):
702
683
  """Execute function with retries and exponential backoff."""
703
684
  for i in range(attempts):
@@ -709,66 +690,78 @@ def _with_retries(fn, attempts=3, base_sleep=0.5):
709
690
  time.sleep(base_sleep * (2 ** i))
710
691
  logger.debug(f"Retrying after error: {e}")
711
692
 
712
- def load_state() -> dict:
713
- """Load import state with file locking."""
714
- if os.path.exists(STATE_FILE):
715
- try:
716
- with _locked_open(STATE_FILE, 'r') as f:
717
- return json.load(f)
718
- except Exception as e:
719
- logger.warning(f"Failed to load state: {e}")
720
- return {"imported_files": {}}
721
-
722
- def save_state(state: dict):
723
- """Save import state with atomic write."""
724
- # Fix: Handle case where STATE_FILE has no directory component
725
- state_dir = os.path.dirname(STATE_FILE)
726
- if state_dir:
727
- os.makedirs(state_dir, exist_ok=True)
728
-
729
- # Use atomic write with locking to prevent corruption
730
- temp_file = f"{STATE_FILE}.tmp"
731
- with _locked_open(temp_file, 'w') as f:
732
- json.dump(state, f, indent=2)
733
- f.flush()
734
- os.fsync(f.fileno())
735
-
736
- # Atomic rename (on POSIX systems)
737
- os.replace(temp_file, STATE_FILE)
738
-
739
- def should_import_file(file_path: Path, state: dict) -> bool:
740
- """Check if file should be imported."""
741
- file_str = str(file_path)
742
- if file_str in state.get("imported_files", {}):
743
- file_info = state["imported_files"][file_str]
744
- last_modified = file_path.stat().st_mtime
745
-
746
- # Check if file has been modified
747
- if file_info.get("last_modified") != last_modified:
748
- logger.info(f"File modified, will re-import: {file_path.name}")
749
- return True
750
-
751
- # Check for suspiciously low chunk counts (likely failed imports)
752
- chunks = file_info.get("chunks", 0)
753
- file_size_kb = file_path.stat().st_size / 1024
754
-
755
- # Heuristic: Files > 10KB should have more than 2 chunks
756
- if file_size_kb > 10 and chunks <= 2:
757
- logger.warning(f"File has suspiciously low chunks ({chunks}) for size {file_size_kb:.1f}KB, will re-import: {file_path.name}")
758
- return True
759
-
760
- logger.info(f"Skipping unchanged file: {file_path.name}")
761
- return False
762
- return True
763
-
764
- def update_file_state(file_path: Path, state: dict, chunks: int):
765
- """Update state for imported file."""
766
- file_str = str(file_path)
767
- state["imported_files"][file_str] = {
768
- "imported_at": datetime.now().isoformat(),
769
- "last_modified": file_path.stat().st_mtime,
770
- "chunks": chunks
771
- }
693
+ def should_import_file(file_path: Path) -> bool:
694
+ """Check if file should be imported using UnifiedStateManager."""
695
+ try:
696
+ # Get imported files from state manager
697
+ imported_files = state_manager.get_imported_files()
698
+
699
+ # Normalize the file path for comparison
700
+ normalized_path = state_manager.normalize_path(str(file_path))
701
+
702
+ if normalized_path in imported_files:
703
+ file_info = imported_files[normalized_path]
704
+
705
+ # Skip if file failed and we haven't reached retry limit
706
+ if file_info.get("status") == "failed" and file_info.get("retry_count", 0) >= 3:
707
+ logger.info(f"Skipping failed file (max retries reached): {file_path.name}")
708
+ return False
709
+
710
+ # Get file modification time for comparison
711
+ last_modified = file_path.stat().st_mtime
712
+ stored_modified = file_info.get("last_modified")
713
+
714
+ # Check if file has been modified (convert stored timestamp to float if needed)
715
+ if stored_modified:
716
+ try:
717
+ # Parse ISO timestamp to float for comparison
718
+ stored_time = datetime.fromisoformat(stored_modified.replace("Z", "+00:00")).timestamp()
719
+ if abs(last_modified - stored_time) > 1: # Allow 1 second tolerance
720
+ logger.info(f"File modified, will re-import: {file_path.name}")
721
+ return True
722
+ except (ValueError, TypeError):
723
+ # If we can't parse the stored time, re-import to be safe
724
+ logger.warning(f"Could not parse stored modification time, will re-import: {file_path.name}")
725
+ return True
726
+
727
+ # Check for suspiciously low chunk counts (likely failed imports)
728
+ chunks = file_info.get("chunks", 0)
729
+ file_size_kb = file_path.stat().st_size / 1024
730
+
731
+ # Heuristic: Files > 10KB should have more than 2 chunks
732
+ if file_size_kb > 10 and chunks <= 2 and file_info.get("status") != "failed":
733
+ logger.warning(f"File has suspiciously low chunks ({chunks}) for size {file_size_kb:.1f}KB, will re-import: {file_path.name}")
734
+ return True
735
+
736
+ # Skip if successfully imported
737
+ if file_info.get("status") == "completed":
738
+ logger.info(f"Skipping successfully imported file: {file_path.name}")
739
+ return False
740
+
741
+ return True
742
+
743
+ except Exception as e:
744
+ logger.warning(f"Error checking import status for {file_path}: {e}")
745
+ return True # Default to importing if we can't check status
746
+
747
+ def update_file_state(file_path: Path, chunks: int, collection_name: str):
748
+ """Update state for imported file using UnifiedStateManager."""
749
+ try:
750
+ # Determine embedding mode from collection suffix
751
+ embedding_mode = "local" if collection_suffix == "local" else "cloud"
752
+
753
+ # Add file to state manager
754
+ state_manager.add_imported_file(
755
+ file_path=str(file_path),
756
+ chunks=chunks,
757
+ importer="streaming",
758
+ collection=collection_name,
759
+ embedding_mode=embedding_mode,
760
+ status="completed"
761
+ )
762
+ logger.debug(f"Updated state for {file_path.name}: {chunks} chunks")
763
+ except Exception as e:
764
+ logger.error(f"Failed to update state for {file_path}: {e}")
772
765
 
773
766
  def main():
774
767
  """Main import function."""
@@ -798,9 +791,9 @@ def main():
798
791
  collection_suffix = "voyage"
799
792
  logger.info("Switched to Voyage AI embeddings (dimension: 1024)")
800
793
 
801
- # Load state
802
- state = load_state()
803
- logger.info(f"Loaded state with {len(state.get('imported_files', {}))} previously imported files")
794
+ # Get status from state manager
795
+ status = state_manager.get_status()
796
+ logger.info(f"Loaded state with {status['indexed_files']} previously imported files")
804
797
 
805
798
  # Find all projects
806
799
  # Use LOGS_DIR env var, or fall back to Claude projects directory, then /logs for Docker
@@ -848,7 +841,7 @@ def main():
848
841
  logger.info(f"Reached limit of {args.limit} files, stopping import")
849
842
  break
850
843
 
851
- if should_import_file(jsonl_file, state):
844
+ if should_import_file(jsonl_file):
852
845
  chunks = stream_import_file(jsonl_file, collection_name, project_dir)
853
846
  files_processed += 1
854
847
  if chunks > 0:
@@ -868,8 +861,7 @@ def main():
868
861
 
869
862
  if actual_count > 0:
870
863
  logger.info(f"Verified {actual_count} points in Qdrant for {conversation_id}")
871
- update_file_state(jsonl_file, state, chunks)
872
- save_state(state)
864
+ update_file_state(jsonl_file, chunks, collection_name)
873
865
  total_imported += 1
874
866
  else:
875
867
  logger.error(f"No points found in Qdrant for {conversation_id} despite {chunks} chunks processed - not marking as imported")
@@ -883,6 +875,11 @@ def main():
883
875
  # Critical fix: Don't mark files with 0 chunks as imported
884
876
  # This allows retry on next run
885
877
  logger.warning(f"File produced 0 chunks, not marking as imported: {jsonl_file.name}")
878
+ # Mark as failed so we don't keep retrying indefinitely
879
+ try:
880
+ state_manager.mark_file_failed(str(jsonl_file), "File produced 0 chunks during import")
881
+ except Exception as state_error:
882
+ logger.warning(f"Could not mark file as failed in state: {state_error}")
886
883
 
887
884
  logger.info(f"Import complete: processed {total_imported} files")
888
885