claude-self-reflect 2.3.5 → 2.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -2
- package/package.json +1 -1
- package/scripts/import-conversations-isolated.py +0 -311
- package/scripts/import-conversations-voyage-streaming.py +0 -368
- package/scripts/import-conversations-voyage.py +0 -430
- package/scripts/import-conversations.py +0 -240
- package/scripts/import-current-conversation.py +0 -39
- package/scripts/import-live-conversation.py +0 -154
- package/scripts/import-openai-enhanced.py +0 -867
- package/scripts/import-recent-only.py +0 -33
- package/scripts/import-single-project.py +0 -278
- package/scripts/import-watcher.py +0 -170
package/README.md
CHANGED
|
@@ -4,10 +4,19 @@ Claude forgets everything. This fixes that.
|
|
|
4
4
|
|
|
5
5
|
## What You Get
|
|
6
6
|
|
|
7
|
-
Ask Claude about past conversations. Get actual answers.
|
|
7
|
+
Ask Claude about past conversations. Get actual answers. Local-first with no cloud dependencies, but cloud-enhanced search available when you need it.
|
|
8
8
|
|
|
9
9
|
**Before**: "I don't have access to previous conversations"
|
|
10
|
-
**After**:
|
|
10
|
+
**After**:
|
|
11
|
+
```
|
|
12
|
+
⏺ reflection-specialist(Search FastEmbed vs cloud embedding decision)
|
|
13
|
+
⎿ Done (3 tool uses · 8.2k tokens · 12.4s)
|
|
14
|
+
|
|
15
|
+
"Found it! Yesterday we decided on FastEmbed for local mode - better privacy,
|
|
16
|
+
no API calls, 384-dimensional embeddings. Works offline too."
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
The reflection specialist is a specialized sub-agent that Claude automatically spawns when you ask about past conversations. It searches your conversation history in its own isolated context, keeping your main chat clean and focused.
|
|
11
20
|
|
|
12
21
|
Your conversations become searchable. Your decisions stay remembered. Your context persists.
|
|
13
22
|
|
package/package.json
CHANGED
|
@@ -1,311 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Import Claude conversation logs with project isolation support.
|
|
4
|
-
Each project gets its own collection for complete isolation.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import json
|
|
8
|
-
import os
|
|
9
|
-
import glob
|
|
10
|
-
import hashlib
|
|
11
|
-
from datetime import datetime, timedelta
|
|
12
|
-
from typing import List, Dict, Any, Set
|
|
13
|
-
import logging
|
|
14
|
-
from qdrant_client import QdrantClient
|
|
15
|
-
from qdrant_client.models import (
|
|
16
|
-
VectorParams, Distance, PointStruct,
|
|
17
|
-
Filter, FieldCondition, MatchValue
|
|
18
|
-
)
|
|
19
|
-
from sentence_transformers import SentenceTransformer
|
|
20
|
-
|
|
21
|
-
# Configuration
|
|
22
|
-
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
|
23
|
-
LOGS_DIR = os.getenv("LOGS_DIR", "/logs")
|
|
24
|
-
STATE_FILE = os.getenv("STATE_FILE", "/config/imported-files.json")
|
|
25
|
-
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
|
26
|
-
BATCH_SIZE = int(os.getenv("BATCH_SIZE", "100"))
|
|
27
|
-
ISOLATION_MODE = os.getenv("ISOLATION_MODE", "isolated") # isolated, shared, hybrid
|
|
28
|
-
|
|
29
|
-
# Set up logging
|
|
30
|
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
31
|
-
logger = logging.getLogger(__name__)
|
|
32
|
-
|
|
33
|
-
class ProjectAwareImporter:
|
|
34
|
-
def __init__(self):
|
|
35
|
-
"""Initialize the importer with Qdrant client and embedding model."""
|
|
36
|
-
self.client = QdrantClient(url=QDRANT_URL)
|
|
37
|
-
self.encoder = SentenceTransformer(EMBEDDING_MODEL)
|
|
38
|
-
self.imported_files = self.load_state()
|
|
39
|
-
self.project_collections: Set[str] = set()
|
|
40
|
-
|
|
41
|
-
def load_state(self) -> Dict[str, Set[str]]:
|
|
42
|
-
"""Load the set of already imported files per project."""
|
|
43
|
-
if os.path.exists(STATE_FILE):
|
|
44
|
-
try:
|
|
45
|
-
with open(STATE_FILE, 'r') as f:
|
|
46
|
-
data = json.load(f)
|
|
47
|
-
# Convert to per-project tracking
|
|
48
|
-
if isinstance(data.get('files'), list):
|
|
49
|
-
# Legacy format - convert to new format
|
|
50
|
-
return {'_legacy': set(data['files'])}
|
|
51
|
-
else:
|
|
52
|
-
# New format with per-project tracking
|
|
53
|
-
return {k: set(v) for k, v in data.get('projects', {}).items()}
|
|
54
|
-
except Exception as e:
|
|
55
|
-
logger.error(f"Failed to load state: {e}")
|
|
56
|
-
return {}
|
|
57
|
-
|
|
58
|
-
def save_state(self):
|
|
59
|
-
"""Save the set of imported files per project."""
|
|
60
|
-
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
|
|
61
|
-
with open(STATE_FILE, 'w') as f:
|
|
62
|
-
json.dump({
|
|
63
|
-
'projects': {k: list(v) for k, v in self.imported_files.items()},
|
|
64
|
-
'last_updated': datetime.now().isoformat(),
|
|
65
|
-
'mode': ISOLATION_MODE
|
|
66
|
-
}, f, indent=2)
|
|
67
|
-
|
|
68
|
-
def get_collection_name(self, project_name: str) -> str:
|
|
69
|
-
"""Get collection name based on isolation mode."""
|
|
70
|
-
if ISOLATION_MODE == "isolated":
|
|
71
|
-
# Create project-specific collection name
|
|
72
|
-
project_hash = hashlib.md5(project_name.encode()).hexdigest()[:8]
|
|
73
|
-
return f"conv_{project_hash}"
|
|
74
|
-
else:
|
|
75
|
-
# Shared collection mode
|
|
76
|
-
return "conversations"
|
|
77
|
-
|
|
78
|
-
def setup_collection(self, project_name: str):
|
|
79
|
-
"""Create or update the Qdrant collection for a project."""
|
|
80
|
-
collection_name = self.get_collection_name(project_name)
|
|
81
|
-
|
|
82
|
-
# Skip if already set up in this session
|
|
83
|
-
if collection_name in self.project_collections:
|
|
84
|
-
return collection_name
|
|
85
|
-
|
|
86
|
-
collections = self.client.get_collections().collections
|
|
87
|
-
exists = any(c.name == collection_name for c in collections)
|
|
88
|
-
|
|
89
|
-
if not exists:
|
|
90
|
-
logger.info(f"Creating collection: {collection_name} for project: {project_name}")
|
|
91
|
-
self.client.create_collection(
|
|
92
|
-
collection_name=collection_name,
|
|
93
|
-
vectors_config=VectorParams(
|
|
94
|
-
size=384, # all-MiniLM-L6-v2 dimension
|
|
95
|
-
distance=Distance.COSINE
|
|
96
|
-
)
|
|
97
|
-
)
|
|
98
|
-
else:
|
|
99
|
-
logger.info(f"Collection {collection_name} already exists for project: {project_name}")
|
|
100
|
-
|
|
101
|
-
self.project_collections.add(collection_name)
|
|
102
|
-
return collection_name
|
|
103
|
-
|
|
104
|
-
def extract_project_name(self, file_path: str) -> str:
|
|
105
|
-
"""Extract project name from file path."""
|
|
106
|
-
# Expected path: /logs/<project-name>/<conversation-id>.jsonl
|
|
107
|
-
parts = file_path.split('/')
|
|
108
|
-
if len(parts) >= 3 and parts[-2] != 'logs':
|
|
109
|
-
return parts[-2]
|
|
110
|
-
return 'unknown'
|
|
111
|
-
|
|
112
|
-
def process_jsonl_file(self, file_path: str) -> List[Dict[str, Any]]:
|
|
113
|
-
"""Extract messages from a JSONL file."""
|
|
114
|
-
messages = []
|
|
115
|
-
|
|
116
|
-
try:
|
|
117
|
-
with open(file_path, 'r') as f:
|
|
118
|
-
for line_num, line in enumerate(f, 1):
|
|
119
|
-
try:
|
|
120
|
-
data = json.loads(line.strip())
|
|
121
|
-
|
|
122
|
-
# Extract message if present
|
|
123
|
-
if 'message' in data and data['message']:
|
|
124
|
-
msg = data['message']
|
|
125
|
-
if 'role' in msg and 'content' in msg:
|
|
126
|
-
# Handle content that might be an object
|
|
127
|
-
content = msg['content']
|
|
128
|
-
if isinstance(content, dict):
|
|
129
|
-
content = content.get('text', json.dumps(content))
|
|
130
|
-
|
|
131
|
-
# Create message document
|
|
132
|
-
messages.append({
|
|
133
|
-
'role': msg['role'],
|
|
134
|
-
'content': content,
|
|
135
|
-
'file_path': file_path,
|
|
136
|
-
'line_number': line_num,
|
|
137
|
-
'timestamp': data.get('timestamp', datetime.now().isoformat())
|
|
138
|
-
})
|
|
139
|
-
except json.JSONDecodeError:
|
|
140
|
-
logger.warning(f"Failed to parse line {line_num} in {file_path}")
|
|
141
|
-
except Exception as e:
|
|
142
|
-
logger.error(f"Error processing line {line_num} in {file_path}: {e}")
|
|
143
|
-
|
|
144
|
-
except Exception as e:
|
|
145
|
-
logger.error(f"Failed to read file {file_path}: {e}")
|
|
146
|
-
|
|
147
|
-
return messages
|
|
148
|
-
|
|
149
|
-
def create_conversation_chunks(self, messages: List[Dict[str, Any]], chunk_size: int = 5) -> List[Dict[str, Any]]:
|
|
150
|
-
"""Group messages into conversation chunks for better context."""
|
|
151
|
-
chunks = []
|
|
152
|
-
|
|
153
|
-
for i in range(0, len(messages), chunk_size):
|
|
154
|
-
chunk_messages = messages[i:i + chunk_size]
|
|
155
|
-
|
|
156
|
-
# Create a conversation summary
|
|
157
|
-
conversation_text = "\n\n".join([
|
|
158
|
-
f"{msg['role'].upper()}: {msg['content'][:500]}..."
|
|
159
|
-
if len(msg['content']) > 500 else f"{msg['role'].upper()}: {msg['content']}"
|
|
160
|
-
for msg in chunk_messages
|
|
161
|
-
])
|
|
162
|
-
|
|
163
|
-
# Extract metadata
|
|
164
|
-
project_id = self.extract_project_name(chunk_messages[0]['file_path'])
|
|
165
|
-
conversation_id = os.path.basename(chunk_messages[0]['file_path']).replace('.jsonl', '')
|
|
166
|
-
|
|
167
|
-
chunks.append({
|
|
168
|
-
'id': hashlib.md5(f"{chunk_messages[0]['file_path']}_{i}".encode()).hexdigest(),
|
|
169
|
-
'text': conversation_text,
|
|
170
|
-
'metadata': {
|
|
171
|
-
'project_id': project_id,
|
|
172
|
-
'project_name': project_id, # Add both for compatibility
|
|
173
|
-
'conversation_id': conversation_id,
|
|
174
|
-
'chunk_index': i // chunk_size,
|
|
175
|
-
'message_count': len(chunk_messages),
|
|
176
|
-
'start_role': chunk_messages[0]['role'],
|
|
177
|
-
'timestamp': chunk_messages[0]['timestamp'],
|
|
178
|
-
'file_path': chunk_messages[0]['file_path']
|
|
179
|
-
}
|
|
180
|
-
})
|
|
181
|
-
|
|
182
|
-
return chunks
|
|
183
|
-
|
|
184
|
-
def import_to_qdrant(self, chunks: List[Dict[str, Any]], collection_name: str):
|
|
185
|
-
"""Import conversation chunks to a specific Qdrant collection."""
|
|
186
|
-
if not chunks:
|
|
187
|
-
return
|
|
188
|
-
|
|
189
|
-
# Generate embeddings
|
|
190
|
-
texts = [chunk['text'] for chunk in chunks]
|
|
191
|
-
embeddings = self.encoder.encode(texts, show_progress_bar=True)
|
|
192
|
-
|
|
193
|
-
# Create points for Qdrant
|
|
194
|
-
points = []
|
|
195
|
-
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
|
|
196
|
-
points.append(
|
|
197
|
-
PointStruct(
|
|
198
|
-
id=chunk['id'],
|
|
199
|
-
vector=embedding.tolist(),
|
|
200
|
-
payload={
|
|
201
|
-
'text': chunk['text'],
|
|
202
|
-
**chunk['metadata']
|
|
203
|
-
}
|
|
204
|
-
)
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
# Upload to Qdrant in batches
|
|
208
|
-
for i in range(0, len(points), BATCH_SIZE):
|
|
209
|
-
batch = points[i:i + BATCH_SIZE]
|
|
210
|
-
self.client.upsert(
|
|
211
|
-
collection_name=collection_name,
|
|
212
|
-
points=batch
|
|
213
|
-
)
|
|
214
|
-
logger.info(f"Uploaded batch of {len(batch)} points to {collection_name}")
|
|
215
|
-
|
|
216
|
-
def find_recent_files(self, days: int = 30) -> List[str]:
|
|
217
|
-
"""Find JSONL files modified in the last N days."""
|
|
218
|
-
cutoff_time = datetime.now() - timedelta(days=days)
|
|
219
|
-
pattern = os.path.join(LOGS_DIR, "**", "*.jsonl")
|
|
220
|
-
|
|
221
|
-
recent_files = []
|
|
222
|
-
for file_path in glob.glob(pattern, recursive=True):
|
|
223
|
-
try:
|
|
224
|
-
mtime = os.path.getmtime(file_path)
|
|
225
|
-
if datetime.fromtimestamp(mtime) >= cutoff_time:
|
|
226
|
-
recent_files.append(file_path)
|
|
227
|
-
except Exception as e:
|
|
228
|
-
logger.error(f"Error checking file {file_path}: {e}")
|
|
229
|
-
|
|
230
|
-
return recent_files
|
|
231
|
-
|
|
232
|
-
def run(self):
|
|
233
|
-
"""Main import process with project isolation."""
|
|
234
|
-
logger.info(f"Starting conversation import to Qdrant (mode: {ISOLATION_MODE})")
|
|
235
|
-
|
|
236
|
-
# Find files to import
|
|
237
|
-
all_files = self.find_recent_files()
|
|
238
|
-
logger.info(f"Found {len(all_files)} total files")
|
|
239
|
-
|
|
240
|
-
# Group files by project
|
|
241
|
-
files_by_project: Dict[str, List[str]] = {}
|
|
242
|
-
for file_path in all_files:
|
|
243
|
-
project_name = self.extract_project_name(file_path)
|
|
244
|
-
if project_name not in files_by_project:
|
|
245
|
-
files_by_project[project_name] = []
|
|
246
|
-
files_by_project[project_name].append(file_path)
|
|
247
|
-
|
|
248
|
-
logger.info(f"Found {len(files_by_project)} projects to process")
|
|
249
|
-
|
|
250
|
-
total_chunks = 0
|
|
251
|
-
for project_name, project_files in files_by_project.items():
|
|
252
|
-
logger.info(f"\nProcessing project: {project_name}")
|
|
253
|
-
|
|
254
|
-
# Get imported files for this project
|
|
255
|
-
project_imported = self.imported_files.get(project_name, set())
|
|
256
|
-
new_files = [f for f in project_files if f not in project_imported]
|
|
257
|
-
|
|
258
|
-
if not new_files:
|
|
259
|
-
logger.info(f"No new files for project {project_name}")
|
|
260
|
-
continue
|
|
261
|
-
|
|
262
|
-
logger.info(f"Found {len(new_files)} new files for project {project_name}")
|
|
263
|
-
|
|
264
|
-
# Setup collection for this project
|
|
265
|
-
collection_name = self.setup_collection(project_name)
|
|
266
|
-
|
|
267
|
-
project_chunks = 0
|
|
268
|
-
for file_path in new_files:
|
|
269
|
-
logger.info(f"Processing: {file_path}")
|
|
270
|
-
|
|
271
|
-
# Extract messages
|
|
272
|
-
messages = self.process_jsonl_file(file_path)
|
|
273
|
-
if not messages:
|
|
274
|
-
logger.warning(f"No messages found in {file_path}")
|
|
275
|
-
continue
|
|
276
|
-
|
|
277
|
-
# Create conversation chunks
|
|
278
|
-
chunks = self.create_conversation_chunks(messages)
|
|
279
|
-
|
|
280
|
-
# Import to project-specific collection
|
|
281
|
-
self.import_to_qdrant(chunks, collection_name)
|
|
282
|
-
|
|
283
|
-
# Mark file as imported for this project
|
|
284
|
-
if project_name not in self.imported_files:
|
|
285
|
-
self.imported_files[project_name] = set()
|
|
286
|
-
self.imported_files[project_name].add(file_path)
|
|
287
|
-
self.save_state()
|
|
288
|
-
|
|
289
|
-
project_chunks += len(chunks)
|
|
290
|
-
logger.info(f"Imported {len(chunks)} chunks from {file_path}")
|
|
291
|
-
|
|
292
|
-
total_chunks += project_chunks
|
|
293
|
-
logger.info(f"Project {project_name} complete: {project_chunks} chunks imported")
|
|
294
|
-
|
|
295
|
-
logger.info(f"\nImport complete: {total_chunks} total chunks imported")
|
|
296
|
-
|
|
297
|
-
# Show collection summary
|
|
298
|
-
logger.info("\nCollection summary:")
|
|
299
|
-
collections = self.client.get_collections().collections
|
|
300
|
-
for collection in collections:
|
|
301
|
-
if collection.name.startswith('conv_') or collection.name == 'conversations':
|
|
302
|
-
count = self.client.get_collection(collection.name).points_count
|
|
303
|
-
logger.info(f" {collection.name}: {count} points")
|
|
304
|
-
|
|
305
|
-
def main():
|
|
306
|
-
"""Entry point for the importer."""
|
|
307
|
-
importer = ProjectAwareImporter()
|
|
308
|
-
importer.run()
|
|
309
|
-
|
|
310
|
-
if __name__ == "__main__":
|
|
311
|
-
main()
|