claude-self-reflect 3.0.0 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/.claude/agents/claude-self-reflect-test.md +110 -66
  2. package/README.md +1 -1
  3. package/installer/setup-wizard.js +4 -2
  4. package/mcp-server/pyproject.toml +1 -0
  5. package/mcp-server/src/server.py +84 -0
  6. package/package.json +2 -1
  7. package/scripts/import-conversations-unified.py +225 -44
  8. package/scripts/importer/__init__.py +25 -0
  9. package/scripts/importer/__main__.py +14 -0
  10. package/scripts/importer/core/__init__.py +25 -0
  11. package/scripts/importer/core/config.py +120 -0
  12. package/scripts/importer/core/exceptions.py +52 -0
  13. package/scripts/importer/core/models.py +184 -0
  14. package/scripts/importer/embeddings/__init__.py +22 -0
  15. package/scripts/importer/embeddings/base.py +141 -0
  16. package/scripts/importer/embeddings/fastembed_provider.py +164 -0
  17. package/scripts/importer/embeddings/validator.py +136 -0
  18. package/scripts/importer/embeddings/voyage_provider.py +251 -0
  19. package/scripts/importer/main.py +393 -0
  20. package/scripts/importer/processors/__init__.py +15 -0
  21. package/scripts/importer/processors/ast_extractor.py +197 -0
  22. package/scripts/importer/processors/chunker.py +157 -0
  23. package/scripts/importer/processors/concept_extractor.py +109 -0
  24. package/scripts/importer/processors/conversation_parser.py +181 -0
  25. package/scripts/importer/processors/tool_extractor.py +165 -0
  26. package/scripts/importer/state/__init__.py +5 -0
  27. package/scripts/importer/state/state_manager.py +190 -0
  28. package/scripts/importer/storage/__init__.py +5 -0
  29. package/scripts/importer/storage/qdrant_storage.py +250 -0
  30. package/scripts/importer/utils/__init__.py +9 -0
  31. package/scripts/importer/utils/logger.py +87 -0
  32. package/scripts/importer/utils/project_normalizer.py +120 -0
@@ -0,0 +1,165 @@
1
+ """Extract tool usage and file references from conversations."""
2
+
3
+ import re
4
+ import logging
5
+ from typing import Dict, Any, Set, List
6
+ from pathlib import Path
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class ToolUsageExtractor:
12
+ """Extract files analyzed, edited, and tools used from conversations."""
13
+
14
+ def __init__(self):
15
+ # Patterns for file operations
16
+ self.file_patterns = {
17
+ 'analyzed': [
18
+ re.compile(r'(?:reading|analyzing|examining|looking at|checking)\s+(?:file\s+)?([/\w\-\.]+\.\w+)', re.IGNORECASE),
19
+ re.compile(r'(?:Read|read)\s+([/\w\-\.]+\.\w+)', re.IGNORECASE),
20
+ re.compile(r'(?:in|from)\s+file\s+([/\w\-\.]+\.\w+)', re.IGNORECASE)
21
+ ],
22
+ 'edited': [
23
+ re.compile(r'(?:editing|modifying|updating|changing|writing to)\s+(?:file\s+)?([/\w\-\.]+\.\w+)', re.IGNORECASE),
24
+ re.compile(r'(?:Edit|Write)\s+([/\w\-\.]+\.\w+)', re.IGNORECASE),
25
+ re.compile(r'(?:changes to|modified|updated)\s+([/\w\-\.]+\.\w+)', re.IGNORECASE)
26
+ ],
27
+ 'created': [
28
+ re.compile(r'(?:creating|created|new file)\s+([/\w\-\.]+\.\w+)', re.IGNORECASE),
29
+ re.compile(r'(?:Write|Create)\s+new\s+file\s+([/\w\-\.]+\.\w+)', re.IGNORECASE)
30
+ ]
31
+ }
32
+
33
+ # Tool patterns
34
+ self.tool_patterns = [
35
+ re.compile(r'(?:using|running|executing)\s+(\w+)\s+(?:tool|command)', re.IGNORECASE),
36
+ re.compile(r'(?:Tool:|Command:)\s*(\w+)', re.IGNORECASE),
37
+ re.compile(r'```(?:bash|shell|sh)\n([a-z]+)', re.IGNORECASE),
38
+ re.compile(r'\$\s+([a-z]+)\s+', re.IGNORECASE), # Command line
39
+ re.compile(r'(?:npm|yarn|pip|cargo|go|cargo)\s+([\w\-]+)', re.IGNORECASE)
40
+ ]
41
+
42
+ # MCP tool pattern
43
+ self.mcp_pattern = re.compile(r'mcp__([a-zA-Z0-9\-_]+)__([a-zA-Z0-9\-_]+)')
44
+
45
+ def extract(self, text: str) -> Dict[str, Any]:
46
+ """
47
+ Extract tool usage information from text.
48
+
49
+ Returns:
50
+ Dictionary with files_analyzed, files_edited, tools_used
51
+ """
52
+ files_analyzed = set()
53
+ files_edited = set()
54
+ files_created = set()
55
+ tools_used = set()
56
+
57
+ # Extract file operations
58
+ for pattern in self.file_patterns['analyzed']:
59
+ matches = pattern.findall(text)
60
+ for match in matches:
61
+ file_path = self._normalize_file_path(match)
62
+ if file_path:
63
+ files_analyzed.add(file_path)
64
+
65
+ for pattern in self.file_patterns['edited']:
66
+ matches = pattern.findall(text)
67
+ for match in matches:
68
+ file_path = self._normalize_file_path(match)
69
+ if file_path:
70
+ files_edited.add(file_path)
71
+
72
+ for pattern in self.file_patterns['created']:
73
+ matches = pattern.findall(text)
74
+ for match in matches:
75
+ file_path = self._normalize_file_path(match)
76
+ if file_path:
77
+ files_created.add(file_path)
78
+
79
+ # Extract tools
80
+ for pattern in self.tool_patterns:
81
+ matches = pattern.findall(text)
82
+ for match in matches:
83
+ tool = match.lower().strip()
84
+ if self._is_valid_tool(tool):
85
+ tools_used.add(tool)
86
+
87
+ # Extract MCP tools specifically
88
+ mcp_matches = self.mcp_pattern.findall(text)
89
+ for server, tool in mcp_matches:
90
+ tools_used.add(f"mcp:{server}:{tool}")
91
+
92
+ # Look for common CLI tools
93
+ common_tools = [
94
+ 'git', 'npm', 'yarn', 'pip', 'python', 'node', 'docker',
95
+ 'kubectl', 'aws', 'gcloud', 'az', 'terraform', 'ansible',
96
+ 'make', 'gradle', 'maven', 'cargo', 'go', 'rustc'
97
+ ]
98
+ for tool in common_tools:
99
+ if re.search(rf'\b{tool}\b', text, re.IGNORECASE):
100
+ tools_used.add(tool)
101
+
102
+ # Combine all files for backward compatibility
103
+ all_files = files_analyzed | files_edited | files_created
104
+
105
+ return {
106
+ "files_analyzed": list(files_analyzed)[:50],
107
+ "files_edited": list(files_edited)[:50],
108
+ "files_created": list(files_created)[:50],
109
+ "files": list(all_files)[:50], # Legacy field
110
+ "tools_used": list(tools_used)[:30],
111
+ "file_count": len(all_files),
112
+ "tool_count": len(tools_used)
113
+ }
114
+
115
+ def _normalize_file_path(self, path: str) -> str:
116
+ """Normalize and validate file path."""
117
+ # Remove quotes and whitespace
118
+ path = path.strip('\'"` \n')
119
+
120
+ # Skip if too short or too long
121
+ if len(path) < 3 or len(path) > 200:
122
+ return ""
123
+
124
+ # Must have an extension
125
+ if '.' not in path:
126
+ return ""
127
+
128
+ # Extract just the filename if it's a full path
129
+ if '/' in path:
130
+ # Get the last component
131
+ path = path.split('/')[-1]
132
+
133
+ # Validate extension
134
+ valid_extensions = {
135
+ 'py', 'js', 'ts', 'jsx', 'tsx', 'java', 'go', 'rs', 'cpp', 'c', 'h',
136
+ 'md', 'txt', 'json', 'yaml', 'yml', 'xml', 'html', 'css', 'scss',
137
+ 'sql', 'sh', 'bash', 'dockerfile', 'makefile', 'toml', 'ini', 'cfg'
138
+ }
139
+
140
+ ext = path.split('.')[-1].lower()
141
+ if ext not in valid_extensions:
142
+ return ""
143
+
144
+ return path
145
+
146
+ def _is_valid_tool(self, tool: str) -> bool:
147
+ """Check if a string is a valid tool name."""
148
+ # Skip common words
149
+ skip_words = {
150
+ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
151
+ 'this', 'that', 'with', 'from', 'as', 'is', 'was', 'are', 'were'
152
+ }
153
+
154
+ if tool in skip_words:
155
+ return False
156
+
157
+ # Must be alphanumeric with possible hyphens/underscores
158
+ if not re.match(r'^[a-z0-9\-_]+$', tool):
159
+ return False
160
+
161
+ # Reasonable length
162
+ if len(tool) < 2 or len(tool) > 30:
163
+ return False
164
+
165
+ return True
@@ -0,0 +1,5 @@
1
+ """State management for import tracking."""
2
+
3
+ from .state_manager import StateManager
4
+
5
+ __all__ = ["StateManager"]
@@ -0,0 +1,190 @@
1
+ """State management with atomic writes."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Dict, Any, Optional, Set
8
+ from datetime import datetime
9
+ import fcntl
10
+ import tempfile
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class StateManager:
16
+ """
17
+ Manage import state with atomic writes and crash recovery.
18
+
19
+ Implements the critical STATE_FILE fixes:
20
+ 1. Handle empty dirname case
21
+ 2. Atomic writes to prevent corruption
22
+ 3. File locking for concurrent access
23
+ """
24
+
25
+ def __init__(self, state_file: Path):
26
+ self.state_file = Path(state_file)
27
+ self.state: Dict[str, Any] = self._load_state()
28
+ self._lock_file = None
29
+
30
+ def _load_state(self) -> Dict[str, Any]:
31
+ """Load state from file or create new."""
32
+ if self.state_file.exists():
33
+ try:
34
+ with open(self.state_file, 'r') as f:
35
+ state = json.load(f)
36
+ logger.debug(f"Loaded state with {len(state.get('processed', []))} processed files")
37
+ return state
38
+ except Exception as e:
39
+ logger.error(f"Failed to load state: {e}")
40
+ # Create backup of corrupted state
41
+ backup = self.state_file.with_suffix('.corrupt')
42
+ try:
43
+ self.state_file.rename(backup)
44
+ logger.info(f"Backed up corrupted state to {backup}")
45
+ except Exception:
46
+ pass
47
+
48
+ # Return default state
49
+ return {
50
+ "processed": {},
51
+ "failed": {},
52
+ "last_updated": None,
53
+ "version": "3.0.0"
54
+ }
55
+
56
+ def save_state(self) -> None:
57
+ """
58
+ Save state with atomic write.
59
+
60
+ Implements the critical fix for STATE_FILE directory handling.
61
+ """
62
+ try:
63
+ # FIX: Handle case where STATE_FILE has no directory component
64
+ state_dir = self.state_file.parent
65
+ if state_dir and state_dir != Path('.'):
66
+ state_dir.mkdir(parents=True, exist_ok=True)
67
+
68
+ # Update timestamp
69
+ self.state["last_updated"] = datetime.now().isoformat()
70
+
71
+ # Use atomic write to prevent corruption during crashes
72
+ # Create temp file in same directory for atomic rename
73
+ temp_fd, temp_path = tempfile.mkstemp(
74
+ dir=str(state_dir) if state_dir else '.',
75
+ prefix='.tmp_state_',
76
+ suffix='.json'
77
+ )
78
+
79
+ try:
80
+ # Write to temp file
81
+ with os.fdopen(temp_fd, 'w') as f:
82
+ json.dump(self.state, f, indent=2)
83
+
84
+ # Atomic rename (on POSIX systems)
85
+ Path(temp_path).replace(self.state_file)
86
+
87
+ logger.debug("State saved successfully")
88
+
89
+ except Exception as e:
90
+ # Clean up temp file on error
91
+ try:
92
+ os.unlink(temp_path)
93
+ except:
94
+ pass
95
+ raise e
96
+
97
+ except Exception as e:
98
+ logger.error(f"Failed to save state: {e}")
99
+ raise
100
+
101
+ def is_processed(self, file_path: Path) -> bool:
102
+ """Check if file has been processed."""
103
+ return str(file_path) in self.state.get("processed", {})
104
+
105
+ def mark_processed(self, file_path: Path, points_created: int) -> None:
106
+ """Mark file as processed."""
107
+ if "processed" not in self.state:
108
+ self.state["processed"] = {}
109
+
110
+ self.state["processed"][str(file_path)] = {
111
+ "timestamp": datetime.now().isoformat(),
112
+ "points_created": points_created
113
+ }
114
+
115
+ # Remove from failed if present
116
+ if str(file_path) in self.state.get("failed", {}):
117
+ del self.state["failed"][str(file_path)]
118
+
119
+ self.save_state()
120
+
121
+ def mark_failed(self, file_path: Path, error: str) -> None:
122
+ """Mark file as failed."""
123
+ if "failed" not in self.state:
124
+ self.state["failed"] = {}
125
+
126
+ self.state["failed"][str(file_path)] = {
127
+ "timestamp": datetime.now().isoformat(),
128
+ "error": error
129
+ }
130
+
131
+ self.save_state()
132
+
133
+ def get_processed_files(self) -> Set[str]:
134
+ """Get set of processed file paths."""
135
+ return set(self.state.get("processed", {}).keys())
136
+
137
+ def get_failed_files(self) -> Set[str]:
138
+ """Get set of failed file paths."""
139
+ return set(self.state.get("failed", {}).keys())
140
+
141
+ def reset(self) -> None:
142
+ """Reset state to empty."""
143
+ self.state = {
144
+ "processed": {},
145
+ "failed": {},
146
+ "last_updated": datetime.now().isoformat(),
147
+ "version": "3.0.0"
148
+ }
149
+ self.save_state()
150
+
151
+ def get_statistics(self) -> Dict[str, Any]:
152
+ """Get import statistics."""
153
+ processed = self.state.get("processed", {})
154
+ failed = self.state.get("failed", {})
155
+
156
+ total_points = sum(
157
+ info.get("points_created", 0)
158
+ for info in processed.values()
159
+ )
160
+
161
+ return {
162
+ "processed_count": len(processed),
163
+ "failed_count": len(failed),
164
+ "total_points": total_points,
165
+ "last_updated": self.state.get("last_updated"),
166
+ "version": self.state.get("version")
167
+ }
168
+
169
+ def acquire_lock(self) -> bool:
170
+ """Acquire exclusive lock for state file (Unix/Linux only)."""
171
+ if os.name == 'nt': # Windows
172
+ logger.warning("File locking not implemented for Windows")
173
+ return True # Allow operation to proceed without locking on Windows
174
+ try:
175
+ lock_path = self.state_file.with_suffix('.lock')
176
+ self._lock_file = open(lock_path, 'w')
177
+ fcntl.flock(self._lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
178
+ return True
179
+ except (IOError, OSError):
180
+ return False
181
+
182
+ def release_lock(self) -> None:
183
+ """Release state file lock."""
184
+ if self._lock_file:
185
+ try:
186
+ fcntl.flock(self._lock_file.fileno(), fcntl.LOCK_UN)
187
+ self._lock_file.close()
188
+ except:
189
+ pass
190
+ self._lock_file = None
@@ -0,0 +1,5 @@
1
+ """Storage backend for vector database operations."""
2
+
3
+ from .qdrant_storage import QdrantStorage
4
+
5
+ __all__ = ["QdrantStorage"]
@@ -0,0 +1,250 @@
1
+ """Qdrant vector database storage implementation."""
2
+
3
+ import logging
4
+ from typing import List, Optional, Dict, Any
5
+ from qdrant_client import QdrantClient
6
+ from qdrant_client.models import (
7
+ Distance,
8
+ VectorParams,
9
+ PointStruct,
10
+ CollectionInfo
11
+ )
12
+
13
+ from ..core import ProcessedPoint
14
+ from ..core.exceptions import StorageError
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class QdrantStorage:
20
+ """
21
+ Qdrant storage backend implementation.
22
+
23
+ Handles all interactions with the Qdrant vector database.
24
+ """
25
+
26
+ def __init__(self, url: str = "http://localhost:6333", api_key: Optional[str] = None):
27
+ self.url = url
28
+ self.api_key = api_key
29
+ self.client = None
30
+ self._initialized = False
31
+
32
+ def initialize(self) -> None:
33
+ """Initialize connection to Qdrant."""
34
+ try:
35
+ self.client = QdrantClient(
36
+ url=self.url,
37
+ api_key=self.api_key,
38
+ timeout=30
39
+ )
40
+
41
+ # Test connection
42
+ self.client.get_collections()
43
+ self._initialized = True
44
+ logger.info(f"Connected to Qdrant at {self.url}")
45
+
46
+ except Exception as e:
47
+ raise StorageError(
48
+ operation="initialize",
49
+ collection="N/A",
50
+ reason=f"Failed to connect to Qdrant: {e}"
51
+ )
52
+
53
+ def create_collection(self, name: str, dimension: int) -> bool:
54
+ """
55
+ Create a new collection if it doesn't exist.
56
+
57
+ Args:
58
+ name: Collection name
59
+ dimension: Vector dimension
60
+
61
+ Returns:
62
+ True if created, False if already exists
63
+ """
64
+ if not self._initialized:
65
+ raise StorageError(
66
+ operation="create_collection",
67
+ collection=name,
68
+ reason="Storage not initialized"
69
+ )
70
+
71
+ try:
72
+ if self.collection_exists(name):
73
+ logger.debug(f"Collection {name} already exists")
74
+ return False
75
+
76
+ self.client.create_collection(
77
+ collection_name=name,
78
+ vectors_config=VectorParams(
79
+ size=dimension,
80
+ distance=Distance.COSINE
81
+ )
82
+ )
83
+
84
+ logger.info(f"Created collection {name} with dimension {dimension}")
85
+ return True
86
+
87
+ except Exception as e:
88
+ raise StorageError(
89
+ operation="create_collection",
90
+ collection=name,
91
+ reason=str(e)
92
+ )
93
+
94
+ def collection_exists(self, name: str) -> bool:
95
+ """Check if a collection exists."""
96
+ if not self._initialized:
97
+ raise StorageError(
98
+ operation="collection_exists",
99
+ collection=name,
100
+ reason="Storage not initialized"
101
+ )
102
+
103
+ try:
104
+ collections = self.client.get_collections().collections
105
+ return any(c.name == name for c in collections)
106
+ except Exception as e:
107
+ logger.error(f"Failed to check collection existence: {e}")
108
+ return False
109
+
110
+ def upsert_points(self, collection: str, points: List[ProcessedPoint]) -> int:
111
+ """
112
+ Insert or update points in a collection.
113
+
114
+ Args:
115
+ collection: Collection name
116
+ points: List of points to upsert
117
+
118
+ Returns:
119
+ Number of points upserted
120
+ """
121
+ if not self._initialized:
122
+ raise StorageError(
123
+ operation="upsert_points",
124
+ collection=collection,
125
+ reason="Storage not initialized"
126
+ )
127
+
128
+ if not points:
129
+ return 0
130
+
131
+ try:
132
+ # Ensure collection exists with correct dimension
133
+ if not self.collection_exists(collection):
134
+ dimension = len(points[0].vector)
135
+ self.create_collection(collection, dimension)
136
+
137
+ # Convert to Qdrant points
138
+ qdrant_points = [
139
+ PointStruct(
140
+ id=self._generate_point_id(point.id),
141
+ vector=point.vector,
142
+ payload=point.payload
143
+ )
144
+ for point in points
145
+ ]
146
+
147
+ # Batch upsert
148
+ operation_info = self.client.upsert(
149
+ collection_name=collection,
150
+ points=qdrant_points
151
+ )
152
+
153
+ logger.debug(f"Upserted {len(points)} points to {collection}")
154
+ return len(points)
155
+
156
+ except Exception as e:
157
+ raise StorageError(
158
+ operation="upsert_points",
159
+ collection=collection,
160
+ reason=str(e)
161
+ )
162
+
163
+ def get_collection_info(self, name: str) -> Optional[CollectionInfo]:
164
+ """Get information about a collection."""
165
+ if not self._initialized:
166
+ return None
167
+
168
+ try:
169
+ return self.client.get_collection(collection_name=name)
170
+ except Exception as e:
171
+ logger.error(f"Failed to get collection info: {e}")
172
+ return None
173
+
174
+ def delete_collection(self, name: str) -> bool:
175
+ """Delete a collection."""
176
+ if not self._initialized:
177
+ raise StorageError(
178
+ operation="delete_collection",
179
+ collection=name,
180
+ reason="Storage not initialized"
181
+ )
182
+
183
+ try:
184
+ if not self.collection_exists(name):
185
+ return False
186
+
187
+ self.client.delete_collection(collection_name=name)
188
+ logger.info(f"Deleted collection {name}")
189
+ return True
190
+
191
+ except Exception as e:
192
+ raise StorageError(
193
+ operation="delete_collection",
194
+ collection=name,
195
+ reason=str(e)
196
+ )
197
+
198
+ def get_collections(self) -> List[str]:
199
+ """Get list of all collection names."""
200
+ if not self._initialized:
201
+ return []
202
+
203
+ try:
204
+ collections = self.client.get_collections().collections
205
+ return [c.name for c in collections]
206
+ except Exception as e:
207
+ logger.error(f"Failed to get collections: {e}")
208
+ return []
209
+
210
+ def count_points(self, collection: str) -> int:
211
+ """Count points in a collection."""
212
+ if not self._initialized:
213
+ return 0
214
+
215
+ try:
216
+ info = self.get_collection_info(collection)
217
+ return info.points_count if info else 0
218
+ except Exception as e:
219
+ logger.error(f"Failed to count points: {e}")
220
+ return 0
221
+
222
+ def _generate_point_id(self, string_id: str) -> str:
223
+ """
224
+ Generate a valid point ID from a string.
225
+
226
+ Qdrant accepts string IDs directly in newer versions.
227
+ For older versions, we might need to hash to integer.
228
+ """
229
+ # For now, use string IDs directly
230
+ # If needed, can hash: int(hashlib.md5(string_id.encode()).hexdigest()[:16], 16)
231
+ return string_id
232
+
233
+ def health_check(self) -> Dict[str, Any]:
234
+ """Check health of Qdrant connection."""
235
+ if not self._initialized:
236
+ return {"healthy": False, "reason": "Not initialized"}
237
+
238
+ try:
239
+ collections = self.client.get_collections()
240
+ return {
241
+ "healthy": True,
242
+ "url": self.url,
243
+ "collections_count": len(collections.collections),
244
+ "total_points": sum(
245
+ self.count_points(c.name)
246
+ for c in collections.collections
247
+ )
248
+ }
249
+ except Exception as e:
250
+ return {"healthy": False, "reason": str(e)}
@@ -0,0 +1,9 @@
1
+ """Utility functions for the import system."""
2
+
3
+ from .project_normalizer import ProjectNormalizer
4
+ from .logger import setup_logging
5
+
6
+ __all__ = [
7
+ "ProjectNormalizer",
8
+ "setup_logging"
9
+ ]