claude-self-reflect 2.3.5 → 2.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,33 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Import only recent conversation files from memento-stack project."""
3
-
4
- import os
5
- import sys
6
- from datetime import datetime, timedelta
7
-
8
- # Get the import script path
9
- import_script = os.path.join(os.path.dirname(__file__), "import-openai.py")
10
- project_path = os.path.expanduser("~/.claude/projects/-Users-ramakrishnanannaswamy-memento-stack")
11
-
12
- # Get files modified in last 2 days
13
- cutoff = datetime.now() - timedelta(days=2)
14
- recent_files = []
15
-
16
- for file in os.listdir(project_path):
17
- if file.endswith(".jsonl"):
18
- file_path = os.path.join(project_path, file)
19
- mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
20
- if mtime > cutoff:
21
- recent_files.append(file)
22
-
23
- print(f"Found {len(recent_files)} recent files to import")
24
-
25
- # Set environment variable
26
- # VOYAGE_KEY must be set as environment variable
27
- if not os.getenv("VOYAGE_KEY"):
28
- print("Error: VOYAGE_KEY environment variable not set")
29
- print("Please set: export VOYAGE_KEY='your-voyage-api-key'")
30
- sys.exit(1)
31
-
32
- # Import the whole project (the script will handle individual files)
33
- os.system(f"python {import_script} {project_path}")
@@ -1,278 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Import a single project's conversations to Qdrant.
4
- This script processes one project at a time to avoid memory issues.
5
- """
6
-
7
- import json
8
- import os
9
- import sys
10
- import glob
11
- import hashlib
12
- from datetime import datetime
13
- from typing import List, Dict, Any
14
- import logging
15
- from qdrant_client import QdrantClient
16
- from qdrant_client.models import VectorParams, Distance, PointStruct
17
- from sentence_transformers import SentenceTransformer
18
-
19
- # Configuration
20
- QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
21
- STATE_FILE = os.getenv("STATE_FILE", "./config-isolated/imported-files.json")
22
- EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
23
- BATCH_SIZE = 50 # Reduced batch size for memory efficiency
24
- CHUNK_SIZE = 5 # Messages per chunk
25
-
26
- # Set up logging
27
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
28
- logger = logging.getLogger(__name__)
29
-
30
- class SingleProjectImporter:
31
- def __init__(self, project_path: str):
32
- """Initialize importer for a single project."""
33
- self.project_path = project_path
34
- self.project_name = os.path.basename(project_path)
35
- self.client = QdrantClient(url=QDRANT_URL)
36
- self.encoder = SentenceTransformer(EMBEDDING_MODEL)
37
- self.imported_files = self.load_state()
38
-
39
- def load_state(self) -> Dict[str, List[str]]:
40
- """Load import state."""
41
- if os.path.exists(STATE_FILE):
42
- try:
43
- with open(STATE_FILE, 'r') as f:
44
- data = json.load(f)
45
- return data.get('projects', {})
46
- except Exception as e:
47
- logger.error(f"Failed to load state: {e}")
48
- return {}
49
-
50
- def save_state(self):
51
- """Save import state."""
52
- os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
53
-
54
- # Load existing state to preserve other projects
55
- existing = {}
56
- if os.path.exists(STATE_FILE):
57
- try:
58
- with open(STATE_FILE, 'r') as f:
59
- existing = json.load(f)
60
- except:
61
- pass
62
-
63
- # Update with current project
64
- if 'projects' not in existing:
65
- existing['projects'] = {}
66
- existing['projects'][self.project_name] = self.imported_files.get(self.project_name, [])
67
- existing['last_updated'] = datetime.now().isoformat()
68
- existing['mode'] = 'isolated'
69
-
70
- with open(STATE_FILE, 'w') as f:
71
- json.dump(existing, f, indent=2)
72
-
73
- def get_collection_name(self) -> str:
74
- """Get collection name for this project."""
75
- project_hash = hashlib.md5(self.project_name.encode()).hexdigest()[:8]
76
- return f"conv_{project_hash}"
77
-
78
- def setup_collection(self):
79
- """Create or verify collection exists."""
80
- collection_name = self.get_collection_name()
81
-
82
- collections = self.client.get_collections().collections
83
- exists = any(c.name == collection_name for c in collections)
84
-
85
- if not exists:
86
- logger.info(f"Creating collection: {collection_name}")
87
- self.client.create_collection(
88
- collection_name=collection_name,
89
- vectors_config=VectorParams(
90
- size=384, # all-MiniLM-L6-v2 dimension
91
- distance=Distance.COSINE
92
- )
93
- )
94
- else:
95
- logger.info(f"Collection {collection_name} already exists")
96
-
97
- return collection_name
98
-
99
- def process_jsonl_file(self, file_path: str) -> List[Dict[str, Any]]:
100
- """Extract messages from a JSONL file."""
101
- messages = []
102
-
103
- try:
104
- with open(file_path, 'r') as f:
105
- for line_num, line in enumerate(f, 1):
106
- try:
107
- data = json.loads(line.strip())
108
-
109
- if 'message' in data and data['message']:
110
- msg = data['message']
111
- if 'role' in msg and 'content' in msg:
112
- content = msg['content']
113
- if isinstance(content, dict):
114
- content = content.get('text', json.dumps(content))
115
-
116
- messages.append({
117
- 'role': msg['role'],
118
- 'content': content,
119
- 'file_path': file_path,
120
- 'line_number': line_num,
121
- 'timestamp': data.get('timestamp', datetime.now().isoformat())
122
- })
123
- except json.JSONDecodeError:
124
- continue
125
- except Exception as e:
126
- logger.debug(f"Error processing line {line_num}: {e}")
127
-
128
- except Exception as e:
129
- logger.error(f"Failed to read file {file_path}: {e}")
130
-
131
- return messages
132
-
133
- def create_conversation_chunks(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
134
- """Group messages into conversation chunks."""
135
- chunks = []
136
-
137
- for i in range(0, len(messages), CHUNK_SIZE):
138
- chunk_messages = messages[i:i + CHUNK_SIZE]
139
-
140
- conversation_text = "\n\n".join([
141
- f"{msg['role'].upper()}: {msg['content'][:500]}..."
142
- if len(msg['content']) > 500 else f"{msg['role'].upper()}: {msg['content']}"
143
- for msg in chunk_messages
144
- ])
145
-
146
- conversation_id = os.path.basename(chunk_messages[0]['file_path']).replace('.jsonl', '')
147
-
148
- chunks.append({
149
- 'id': hashlib.md5(f"{chunk_messages[0]['file_path']}_{i}".encode()).hexdigest(),
150
- 'text': conversation_text,
151
- 'metadata': {
152
- 'project_id': self.project_name,
153
- 'project_name': self.project_name,
154
- 'conversation_id': conversation_id,
155
- 'chunk_index': i // CHUNK_SIZE,
156
- 'message_count': len(chunk_messages),
157
- 'start_role': chunk_messages[0]['role'],
158
- 'timestamp': chunk_messages[0]['timestamp'],
159
- 'file_path': chunk_messages[0]['file_path']
160
- }
161
- })
162
-
163
- return chunks
164
-
165
- def import_to_qdrant(self, chunks: List[Dict[str, Any]], collection_name: str):
166
- """Import chunks to Qdrant with memory-efficient batching."""
167
- if not chunks:
168
- return
169
-
170
- # Process in smaller batches to avoid memory issues
171
- for batch_start in range(0, len(chunks), BATCH_SIZE):
172
- batch_chunks = chunks[batch_start:batch_start + BATCH_SIZE]
173
-
174
- # Generate embeddings for this batch
175
- texts = [chunk['text'] for chunk in batch_chunks]
176
- embeddings = self.encoder.encode(texts, show_progress_bar=False)
177
-
178
- # Create points
179
- points = []
180
- for chunk, embedding in zip(batch_chunks, embeddings):
181
- points.append(
182
- PointStruct(
183
- id=chunk['id'],
184
- vector=embedding.tolist(),
185
- payload={
186
- 'text': chunk['text'],
187
- **chunk['metadata']
188
- }
189
- )
190
- )
191
-
192
- # Upload to Qdrant
193
- self.client.upsert(
194
- collection_name=collection_name,
195
- points=points
196
- )
197
- logger.info(f"Uploaded batch of {len(points)} points")
198
-
199
- # Clear memory
200
- del texts, embeddings, points
201
-
202
- def import_project(self):
203
- """Import all conversations for this project."""
204
- logger.info(f"Importing project: {self.project_name}")
205
-
206
- # Find all JSONL files
207
- pattern = os.path.join(self.project_path, "*.jsonl")
208
- all_files = glob.glob(pattern)
209
-
210
- if not all_files:
211
- logger.warning(f"No JSONL files found in {self.project_path}")
212
- return
213
-
214
- # Get already imported files for this project
215
- project_imported = set(self.imported_files.get(self.project_name, []))
216
-
217
- # Convert to relative paths for comparison
218
- new_files = []
219
- for f in all_files:
220
- rel_path = f.replace(os.path.expanduser("~/.claude/projects"), "/logs")
221
- if rel_path not in project_imported:
222
- new_files.append((f, rel_path))
223
-
224
- if not new_files:
225
- logger.info(f"All files already imported for {self.project_name}")
226
- return
227
-
228
- logger.info(f"Found {len(new_files)} new files to import")
229
-
230
- # Setup collection
231
- collection_name = self.setup_collection()
232
-
233
- # Process files one by one
234
- total_chunks = 0
235
- for file_path, rel_path in new_files:
236
- logger.info(f"Processing: {os.path.basename(file_path)}")
237
-
238
- # Extract messages
239
- messages = self.process_jsonl_file(file_path)
240
- if not messages:
241
- logger.warning(f"No messages found in {file_path}")
242
- continue
243
-
244
- # Create chunks
245
- chunks = self.create_conversation_chunks(messages)
246
-
247
- # Import to Qdrant
248
- self.import_to_qdrant(chunks, collection_name)
249
-
250
- # Update state after each file
251
- if self.project_name not in self.imported_files:
252
- self.imported_files[self.project_name] = []
253
- self.imported_files[self.project_name].append(rel_path)
254
- self.save_state()
255
-
256
- total_chunks += len(chunks)
257
- logger.info(f"Imported {len(chunks)} chunks from {os.path.basename(file_path)}")
258
-
259
- # Final summary
260
- count = self.client.get_collection(collection_name).points_count
261
- logger.info(f"Project complete: {total_chunks} chunks imported, {count} total points in collection")
262
-
263
- def main():
264
- if len(sys.argv) != 2:
265
- print("Usage: python import-single-project.py <project_path>")
266
- print("Example: python import-single-project.py ~/.claude/projects/my-project")
267
- sys.exit(1)
268
-
269
- project_path = sys.argv[1]
270
- if not os.path.exists(project_path):
271
- logger.error(f"Project path does not exist: {project_path}")
272
- sys.exit(1)
273
-
274
- importer = SingleProjectImporter(project_path)
275
- importer.import_project()
276
-
277
- if __name__ == "__main__":
278
- main()
@@ -1,170 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- File system watcher for automatic conversation imports.
4
- Monitors Claude projects directory for new/modified JSONL files.
5
- """
6
-
7
- import os
8
- import sys
9
- import time
10
- import json
11
- import logging
12
- from datetime import datetime, timedelta
13
- from pathlib import Path
14
- from typing import Dict, Set
15
- import subprocess
16
-
17
- # Configuration
18
- WATCH_DIR = os.getenv("WATCH_DIR", "/logs")
19
- STATE_FILE = os.getenv("STATE_FILE", "/config/imported-files.json")
20
- WATCH_INTERVAL = int(os.getenv("WATCH_INTERVAL", "60")) # seconds
21
- IMPORT_DELAY = int(os.getenv("IMPORT_DELAY", "30")) # Wait before importing new files
22
- IMPORTER_SCRIPT = "/scripts/import-conversations-unified.py"
23
-
24
- # Set up logging
25
- logging.basicConfig(
26
- level=logging.INFO,
27
- format='%(asctime)s - %(levelname)s - [Watcher] %(message)s'
28
- )
29
- logger = logging.getLogger(__name__)
30
-
31
- class ImportWatcher:
32
- def __init__(self):
33
- """Initialize the import watcher."""
34
- self.watch_dir = Path(WATCH_DIR)
35
- self.state_file = Path(STATE_FILE)
36
- self.pending_imports: Dict[str, datetime] = {}
37
- self.last_scan = datetime.now()
38
-
39
- def load_imported_files(self) -> Set[str]:
40
- """Load set of already imported files."""
41
- imported = set()
42
-
43
- if self.state_file.exists():
44
- try:
45
- with open(self.state_file, 'r') as f:
46
- state = json.load(f)
47
- for project_files in state.get("projects", {}).values():
48
- imported.update(project_files)
49
- except Exception as e:
50
- logger.error(f"Failed to load state: {e}")
51
-
52
- return imported
53
-
54
- def find_new_files(self, imported_files: Set[str]) -> Dict[str, Path]:
55
- """Find new or modified JSONL files."""
56
- new_files = {}
57
-
58
- for project_dir in self.watch_dir.iterdir():
59
- if not project_dir.is_dir():
60
- continue
61
-
62
- for jsonl_file in project_dir.glob("*.jsonl"):
63
- # Convert to relative path for comparison
64
- rel_path = str(jsonl_file).replace(str(self.watch_dir), "/logs")
65
-
66
- # Check if file is new or modified
67
- if rel_path not in imported_files:
68
- mtime = datetime.fromtimestamp(jsonl_file.stat().st_mtime)
69
-
70
- # Only consider files modified after last scan
71
- if mtime > self.last_scan - timedelta(seconds=WATCH_INTERVAL):
72
- new_files[rel_path] = jsonl_file
73
- logger.info(f"Found new file: {jsonl_file.name} in {project_dir.name}")
74
-
75
- return new_files
76
-
77
- def import_project(self, project_path: Path) -> bool:
78
- """Trigger import for a specific project."""
79
- try:
80
- logger.info(f"Starting import for project: {project_path.name}")
81
-
82
- # Run the streaming importer
83
- # Pass only the project directory name, not the full path
84
- result = subprocess.run(
85
- ["python", IMPORTER_SCRIPT, "--project", project_path.name],
86
- capture_output=True,
87
- text=True,
88
- timeout=300 # 5 minute timeout
89
- )
90
-
91
- if result.returncode == 0:
92
- logger.info(f"Successfully imported project: {project_path.name}")
93
- return True
94
- else:
95
- logger.error(f"Import failed for {project_path.name}: {result.stderr}")
96
- return False
97
-
98
- except subprocess.TimeoutExpired:
99
- logger.error(f"Import timeout for project: {project_path.name}")
100
- return False
101
- except Exception as e:
102
- logger.error(f"Import error for {project_path.name}: {e}")
103
- return False
104
-
105
- def process_pending_imports(self):
106
- """Process files that are ready for import."""
107
- current_time = datetime.now()
108
- projects_to_import = set()
109
-
110
- # Check which files are ready for import
111
- for file_path, added_time in list(self.pending_imports.items()):
112
- if current_time - added_time >= timedelta(seconds=IMPORT_DELAY):
113
- project_path = Path(file_path).parent
114
- projects_to_import.add(project_path)
115
- del self.pending_imports[file_path]
116
-
117
- # Import each project
118
- for project_path in projects_to_import:
119
- self.import_project(project_path)
120
-
121
- def run(self):
122
- """Main watch loop."""
123
- logger.info(f"Starting import watcher on {self.watch_dir}")
124
- logger.info(f"Scan interval: {WATCH_INTERVAL}s, Import delay: {IMPORT_DELAY}s")
125
-
126
- # Initial full import
127
- logger.info("Running initial full import...")
128
- subprocess.run(["python", IMPORTER_SCRIPT], timeout=3600)
129
-
130
- while True:
131
- try:
132
- # Load current import state
133
- imported_files = self.load_imported_files()
134
-
135
- # Find new files
136
- new_files = self.find_new_files(imported_files)
137
-
138
- # Add new files to pending
139
- for file_path, full_path in new_files.items():
140
- if file_path not in self.pending_imports:
141
- self.pending_imports[file_path] = datetime.now()
142
- logger.info(f"Queued for import: {full_path.name}")
143
-
144
- # Process pending imports
145
- self.process_pending_imports()
146
-
147
- # Update last scan time
148
- self.last_scan = datetime.now()
149
-
150
- # Log status
151
- if self.pending_imports:
152
- logger.info(f"Files pending import: {len(self.pending_imports)}")
153
-
154
- # Wait for next scan
155
- time.sleep(WATCH_INTERVAL)
156
-
157
- except KeyboardInterrupt:
158
- logger.info("Watcher stopped by user")
159
- break
160
- except Exception as e:
161
- logger.error(f"Watcher error: {e}")
162
- time.sleep(WATCH_INTERVAL)
163
-
164
- def main():
165
- """Main entry point."""
166
- watcher = ImportWatcher()
167
- watcher.run()
168
-
169
- if __name__ == "__main__":
170
- main()