claude-self-reflect 2.3.6 → 2.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -1
- package/installer/setup-wizard.js +63 -23
- package/package.json +2 -2
- package/scripts/import-conversations-isolated.py +0 -311
- package/scripts/import-conversations-voyage-streaming.py +0 -368
- package/scripts/import-conversations-voyage.py +0 -430
- package/scripts/import-conversations.py +0 -240
- package/scripts/import-current-conversation.py +0 -39
- package/scripts/import-live-conversation.py +0 -154
- package/scripts/import-openai-enhanced.py +0 -867
- package/scripts/import-recent-only.py +0 -33
- package/scripts/import-single-project.py +0 -278
- package/scripts/import-watcher.py +0 -170
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@ Claude forgets everything. This fixes that.
|
|
|
4
4
|
|
|
5
5
|
## What You Get
|
|
6
6
|
|
|
7
|
-
Ask Claude about past conversations. Get actual answers.
|
|
7
|
+
Ask Claude about past conversations. Get actual answers. **100% local by default** - your conversations never leave your machine. Cloud-enhanced search available when you need it.
|
|
8
8
|
|
|
9
9
|
**Before**: "I don't have access to previous conversations"
|
|
10
10
|
**After**:
|
|
@@ -143,6 +143,21 @@ Built by developers tired of re-explaining context every conversation.
|
|
|
143
143
|
- Python 3.10+
|
|
144
144
|
- 5 minutes for setup
|
|
145
145
|
|
|
146
|
+
## Upgrading from Earlier Versions
|
|
147
|
+
|
|
148
|
+
**v2.3.7+ includes major improvements:**
|
|
149
|
+
- **Privacy First**: Local embeddings by default - your data never leaves your machine
|
|
150
|
+
- **Smarter Setup**: Handles existing installations gracefully
|
|
151
|
+
- **Better Security**: Automated vulnerability scanning
|
|
152
|
+
|
|
153
|
+
**To upgrade:**
|
|
154
|
+
```bash
|
|
155
|
+
npm update -g claude-self-reflect
|
|
156
|
+
claude-self-reflect setup # Re-run setup, it handles everything
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
The setup wizard now detects and fixes common upgrade issues automatically. Your existing conversations remain searchable.
|
|
160
|
+
|
|
146
161
|
## Advanced Setup
|
|
147
162
|
|
|
148
163
|
Want to customize? See [Configuration Guide](docs/installation-guide.md).
|
|
@@ -245,15 +245,48 @@ async function setupPythonEnvironment() {
|
|
|
245
245
|
// Check if venv already exists
|
|
246
246
|
const venvPath = join(mcpPath, 'venv');
|
|
247
247
|
let venvExists = false;
|
|
248
|
+
let venvHealthy = false;
|
|
249
|
+
let needsInstall = false;
|
|
250
|
+
|
|
248
251
|
try {
|
|
249
252
|
await fs.access(venvPath);
|
|
250
253
|
venvExists = true;
|
|
251
|
-
|
|
254
|
+
|
|
255
|
+
// Check if existing venv is healthy
|
|
256
|
+
const venvPython = process.platform === 'win32'
|
|
257
|
+
? join(venvPath, 'Scripts', 'python.exe')
|
|
258
|
+
: join(venvPath, 'bin', 'python');
|
|
259
|
+
|
|
260
|
+
try {
|
|
261
|
+
// Try to run python --version to verify venv is functional
|
|
262
|
+
safeExec(venvPython, ['--version'], { stdio: 'pipe' });
|
|
263
|
+
|
|
264
|
+
// Also check if MCP dependencies are installed
|
|
265
|
+
try {
|
|
266
|
+
safeExec(venvPython, ['-c', 'import fastmcp, qdrant_client'], { stdio: 'pipe' });
|
|
267
|
+
venvHealthy = true;
|
|
268
|
+
console.log('✅ Virtual environment already exists and is healthy');
|
|
269
|
+
} catch {
|
|
270
|
+
// Dependencies not installed, need to install them
|
|
271
|
+
console.log('⚠️ Virtual environment exists but missing dependencies');
|
|
272
|
+
needsInstall = true;
|
|
273
|
+
venvHealthy = true; // venv itself is healthy, just needs deps
|
|
274
|
+
}
|
|
275
|
+
} catch (healthError) {
|
|
276
|
+
console.log('⚠️ Existing virtual environment is corrupted');
|
|
277
|
+
console.log(' Removing and recreating...');
|
|
278
|
+
|
|
279
|
+
// Remove broken venv
|
|
280
|
+
const { rmSync } = await import('fs');
|
|
281
|
+
rmSync(venvPath, { recursive: true, force: true });
|
|
282
|
+
venvExists = false;
|
|
283
|
+
venvHealthy = false;
|
|
284
|
+
}
|
|
252
285
|
} catch {
|
|
253
286
|
// venv doesn't exist, create it
|
|
254
287
|
}
|
|
255
288
|
|
|
256
|
-
if (!venvExists) {
|
|
289
|
+
if (!venvExists || !venvHealthy) {
|
|
257
290
|
// Create virtual environment
|
|
258
291
|
console.log('Creating virtual environment...');
|
|
259
292
|
const pythonCmd = process.env.PYTHON_PATH || 'python3';
|
|
@@ -265,6 +298,7 @@ async function setupPythonEnvironment() {
|
|
|
265
298
|
stdio: 'inherit'
|
|
266
299
|
});
|
|
267
300
|
if (result.error) throw result.error;
|
|
301
|
+
needsInstall = true; // Mark that we need to install dependencies
|
|
268
302
|
} catch (venvError) {
|
|
269
303
|
console.log('⚠️ Failed to create venv with python3, trying python...');
|
|
270
304
|
try {
|
|
@@ -274,6 +308,7 @@ async function setupPythonEnvironment() {
|
|
|
274
308
|
stdio: 'inherit'
|
|
275
309
|
});
|
|
276
310
|
if (result.error) throw result.error;
|
|
311
|
+
needsInstall = true; // Mark that we need to install dependencies
|
|
277
312
|
} catch {
|
|
278
313
|
console.log('❌ Failed to create virtual environment');
|
|
279
314
|
console.log('📚 Fix: Install python3-venv package');
|
|
@@ -285,7 +320,6 @@ async function setupPythonEnvironment() {
|
|
|
285
320
|
}
|
|
286
321
|
|
|
287
322
|
// Setup paths for virtual environment
|
|
288
|
-
console.log('Setting up pip in virtual environment...');
|
|
289
323
|
const venvPython = process.platform === 'win32'
|
|
290
324
|
? join(mcpPath, 'venv', 'Scripts', 'python.exe')
|
|
291
325
|
: join(mcpPath, 'venv', 'bin', 'python');
|
|
@@ -293,15 +327,19 @@ async function setupPythonEnvironment() {
|
|
|
293
327
|
? join(mcpPath, 'venv', 'Scripts', 'pip.exe')
|
|
294
328
|
: join(mcpPath, 'venv', 'bin', 'pip');
|
|
295
329
|
|
|
296
|
-
//
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
330
|
+
// Only install dependencies if we just created a new venv
|
|
331
|
+
if (needsInstall) {
|
|
332
|
+
console.log('Setting up pip in virtual environment...');
|
|
333
|
+
|
|
334
|
+
// First, try to install certifi to help with SSL issues
|
|
335
|
+
console.log('Installing certificate handler...');
|
|
336
|
+
try {
|
|
337
|
+
safeExec(venvPip, [
|
|
338
|
+
'install', '--trusted-host', 'pypi.org', '--trusted-host', 'files.pythonhosted.org', 'certifi'
|
|
339
|
+
], { cwd: mcpPath, stdio: 'pipe' });
|
|
340
|
+
} catch {
|
|
341
|
+
// Continue even if certifi fails
|
|
342
|
+
}
|
|
305
343
|
|
|
306
344
|
// Upgrade pip and install wheel first
|
|
307
345
|
try {
|
|
@@ -369,9 +407,9 @@ async function setupPythonEnvironment() {
|
|
|
369
407
|
}
|
|
370
408
|
}
|
|
371
409
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
410
|
+
// Install script dependencies
|
|
411
|
+
console.log('Installing import script dependencies...');
|
|
412
|
+
try {
|
|
375
413
|
safeExec(venvPip, [
|
|
376
414
|
'install', '-r', join(scriptsPath, 'requirements.txt')
|
|
377
415
|
], { cwd: mcpPath, stdio: 'inherit' });
|
|
@@ -387,6 +425,7 @@ async function setupPythonEnvironment() {
|
|
|
387
425
|
console.log(' You may need to install them manually later');
|
|
388
426
|
}
|
|
389
427
|
}
|
|
428
|
+
} // End of needsInstall block
|
|
390
429
|
|
|
391
430
|
console.log('✅ Python environment setup complete');
|
|
392
431
|
return true;
|
|
@@ -679,7 +718,7 @@ async function importConversations() {
|
|
|
679
718
|
|
|
680
719
|
try {
|
|
681
720
|
const pythonCmd = process.env.PYTHON_PATH || 'python3';
|
|
682
|
-
const importScript = join(projectRoot, 'scripts', 'import-conversations-
|
|
721
|
+
const importScript = join(projectRoot, 'scripts', 'import-conversations-unified.py');
|
|
683
722
|
|
|
684
723
|
// Use the venv Python directly - platform specific
|
|
685
724
|
let venvPython;
|
|
@@ -961,12 +1000,13 @@ async function showSystemDashboard() {
|
|
|
961
1000
|
async function setupWatcher() {
|
|
962
1001
|
console.log('\n⚙️ Setting up Continuous Import (Watcher)...');
|
|
963
1002
|
|
|
964
|
-
|
|
1003
|
+
// Check if Docker compose file exists
|
|
1004
|
+
const dockerComposeFile = join(projectRoot, 'docker-compose.yaml');
|
|
965
1005
|
|
|
966
|
-
// Check if watcher
|
|
1006
|
+
// Check if Docker watcher is available
|
|
967
1007
|
try {
|
|
968
|
-
await fs.access(
|
|
969
|
-
console.log('✅
|
|
1008
|
+
await fs.access(dockerComposeFile);
|
|
1009
|
+
console.log('✅ Docker-based watcher available');
|
|
970
1010
|
|
|
971
1011
|
// Watcher works with both local and cloud embeddings
|
|
972
1012
|
console.log(localMode ? '🏠 Watcher will use local embeddings' : '🌐 Watcher will use Voyage AI embeddings');
|
|
@@ -1079,15 +1119,15 @@ async function setupWatcher() {
|
|
|
1079
1119
|
// Fallback to manual Python execution
|
|
1080
1120
|
console.log('\n📝 To enable the watcher, run:');
|
|
1081
1121
|
console.log(' cd claude-self-reflect');
|
|
1082
|
-
console.log('
|
|
1083
|
-
console.log(' python scripts/import-watcher.py &');
|
|
1122
|
+
console.log(' docker compose --profile watch up -d');
|
|
1084
1123
|
}
|
|
1085
1124
|
} else {
|
|
1086
1125
|
console.log('\n📝 You can enable the watcher later by running:');
|
|
1087
1126
|
console.log(' docker compose --profile watch up -d');
|
|
1088
1127
|
}
|
|
1089
1128
|
} catch {
|
|
1090
|
-
console.log('⚠️
|
|
1129
|
+
console.log('⚠️ Docker compose not found');
|
|
1130
|
+
console.log(' The watcher requires Docker to run continuously');
|
|
1091
1131
|
}
|
|
1092
1132
|
}
|
|
1093
1133
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-self-reflect",
|
|
3
|
-
"version": "2.3.
|
|
3
|
+
"version": "2.3.8",
|
|
4
4
|
"description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"claude",
|
|
@@ -47,4 +47,4 @@
|
|
|
47
47
|
"access": "public",
|
|
48
48
|
"registry": "https://registry.npmjs.org/"
|
|
49
49
|
}
|
|
50
|
-
}
|
|
50
|
+
}
|
|
@@ -1,311 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Import Claude conversation logs with project isolation support.
|
|
4
|
-
Each project gets its own collection for complete isolation.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import json
|
|
8
|
-
import os
|
|
9
|
-
import glob
|
|
10
|
-
import hashlib
|
|
11
|
-
from datetime import datetime, timedelta
|
|
12
|
-
from typing import List, Dict, Any, Set
|
|
13
|
-
import logging
|
|
14
|
-
from qdrant_client import QdrantClient
|
|
15
|
-
from qdrant_client.models import (
|
|
16
|
-
VectorParams, Distance, PointStruct,
|
|
17
|
-
Filter, FieldCondition, MatchValue
|
|
18
|
-
)
|
|
19
|
-
from sentence_transformers import SentenceTransformer
|
|
20
|
-
|
|
21
|
-
# Configuration
|
|
22
|
-
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
|
23
|
-
LOGS_DIR = os.getenv("LOGS_DIR", "/logs")
|
|
24
|
-
STATE_FILE = os.getenv("STATE_FILE", "/config/imported-files.json")
|
|
25
|
-
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
|
26
|
-
BATCH_SIZE = int(os.getenv("BATCH_SIZE", "100"))
|
|
27
|
-
ISOLATION_MODE = os.getenv("ISOLATION_MODE", "isolated") # isolated, shared, hybrid
|
|
28
|
-
|
|
29
|
-
# Set up logging
|
|
30
|
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
31
|
-
logger = logging.getLogger(__name__)
|
|
32
|
-
|
|
33
|
-
class ProjectAwareImporter:
|
|
34
|
-
def __init__(self):
|
|
35
|
-
"""Initialize the importer with Qdrant client and embedding model."""
|
|
36
|
-
self.client = QdrantClient(url=QDRANT_URL)
|
|
37
|
-
self.encoder = SentenceTransformer(EMBEDDING_MODEL)
|
|
38
|
-
self.imported_files = self.load_state()
|
|
39
|
-
self.project_collections: Set[str] = set()
|
|
40
|
-
|
|
41
|
-
def load_state(self) -> Dict[str, Set[str]]:
|
|
42
|
-
"""Load the set of already imported files per project."""
|
|
43
|
-
if os.path.exists(STATE_FILE):
|
|
44
|
-
try:
|
|
45
|
-
with open(STATE_FILE, 'r') as f:
|
|
46
|
-
data = json.load(f)
|
|
47
|
-
# Convert to per-project tracking
|
|
48
|
-
if isinstance(data.get('files'), list):
|
|
49
|
-
# Legacy format - convert to new format
|
|
50
|
-
return {'_legacy': set(data['files'])}
|
|
51
|
-
else:
|
|
52
|
-
# New format with per-project tracking
|
|
53
|
-
return {k: set(v) for k, v in data.get('projects', {}).items()}
|
|
54
|
-
except Exception as e:
|
|
55
|
-
logger.error(f"Failed to load state: {e}")
|
|
56
|
-
return {}
|
|
57
|
-
|
|
58
|
-
def save_state(self):
|
|
59
|
-
"""Save the set of imported files per project."""
|
|
60
|
-
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
|
|
61
|
-
with open(STATE_FILE, 'w') as f:
|
|
62
|
-
json.dump({
|
|
63
|
-
'projects': {k: list(v) for k, v in self.imported_files.items()},
|
|
64
|
-
'last_updated': datetime.now().isoformat(),
|
|
65
|
-
'mode': ISOLATION_MODE
|
|
66
|
-
}, f, indent=2)
|
|
67
|
-
|
|
68
|
-
def get_collection_name(self, project_name: str) -> str:
|
|
69
|
-
"""Get collection name based on isolation mode."""
|
|
70
|
-
if ISOLATION_MODE == "isolated":
|
|
71
|
-
# Create project-specific collection name
|
|
72
|
-
project_hash = hashlib.md5(project_name.encode()).hexdigest()[:8]
|
|
73
|
-
return f"conv_{project_hash}"
|
|
74
|
-
else:
|
|
75
|
-
# Shared collection mode
|
|
76
|
-
return "conversations"
|
|
77
|
-
|
|
78
|
-
def setup_collection(self, project_name: str):
|
|
79
|
-
"""Create or update the Qdrant collection for a project."""
|
|
80
|
-
collection_name = self.get_collection_name(project_name)
|
|
81
|
-
|
|
82
|
-
# Skip if already set up in this session
|
|
83
|
-
if collection_name in self.project_collections:
|
|
84
|
-
return collection_name
|
|
85
|
-
|
|
86
|
-
collections = self.client.get_collections().collections
|
|
87
|
-
exists = any(c.name == collection_name for c in collections)
|
|
88
|
-
|
|
89
|
-
if not exists:
|
|
90
|
-
logger.info(f"Creating collection: {collection_name} for project: {project_name}")
|
|
91
|
-
self.client.create_collection(
|
|
92
|
-
collection_name=collection_name,
|
|
93
|
-
vectors_config=VectorParams(
|
|
94
|
-
size=384, # all-MiniLM-L6-v2 dimension
|
|
95
|
-
distance=Distance.COSINE
|
|
96
|
-
)
|
|
97
|
-
)
|
|
98
|
-
else:
|
|
99
|
-
logger.info(f"Collection {collection_name} already exists for project: {project_name}")
|
|
100
|
-
|
|
101
|
-
self.project_collections.add(collection_name)
|
|
102
|
-
return collection_name
|
|
103
|
-
|
|
104
|
-
def extract_project_name(self, file_path: str) -> str:
|
|
105
|
-
"""Extract project name from file path."""
|
|
106
|
-
# Expected path: /logs/<project-name>/<conversation-id>.jsonl
|
|
107
|
-
parts = file_path.split('/')
|
|
108
|
-
if len(parts) >= 3 and parts[-2] != 'logs':
|
|
109
|
-
return parts[-2]
|
|
110
|
-
return 'unknown'
|
|
111
|
-
|
|
112
|
-
def process_jsonl_file(self, file_path: str) -> List[Dict[str, Any]]:
|
|
113
|
-
"""Extract messages from a JSONL file."""
|
|
114
|
-
messages = []
|
|
115
|
-
|
|
116
|
-
try:
|
|
117
|
-
with open(file_path, 'r') as f:
|
|
118
|
-
for line_num, line in enumerate(f, 1):
|
|
119
|
-
try:
|
|
120
|
-
data = json.loads(line.strip())
|
|
121
|
-
|
|
122
|
-
# Extract message if present
|
|
123
|
-
if 'message' in data and data['message']:
|
|
124
|
-
msg = data['message']
|
|
125
|
-
if 'role' in msg and 'content' in msg:
|
|
126
|
-
# Handle content that might be an object
|
|
127
|
-
content = msg['content']
|
|
128
|
-
if isinstance(content, dict):
|
|
129
|
-
content = content.get('text', json.dumps(content))
|
|
130
|
-
|
|
131
|
-
# Create message document
|
|
132
|
-
messages.append({
|
|
133
|
-
'role': msg['role'],
|
|
134
|
-
'content': content,
|
|
135
|
-
'file_path': file_path,
|
|
136
|
-
'line_number': line_num,
|
|
137
|
-
'timestamp': data.get('timestamp', datetime.now().isoformat())
|
|
138
|
-
})
|
|
139
|
-
except json.JSONDecodeError:
|
|
140
|
-
logger.warning(f"Failed to parse line {line_num} in {file_path}")
|
|
141
|
-
except Exception as e:
|
|
142
|
-
logger.error(f"Error processing line {line_num} in {file_path}: {e}")
|
|
143
|
-
|
|
144
|
-
except Exception as e:
|
|
145
|
-
logger.error(f"Failed to read file {file_path}: {e}")
|
|
146
|
-
|
|
147
|
-
return messages
|
|
148
|
-
|
|
149
|
-
def create_conversation_chunks(self, messages: List[Dict[str, Any]], chunk_size: int = 5) -> List[Dict[str, Any]]:
|
|
150
|
-
"""Group messages into conversation chunks for better context."""
|
|
151
|
-
chunks = []
|
|
152
|
-
|
|
153
|
-
for i in range(0, len(messages), chunk_size):
|
|
154
|
-
chunk_messages = messages[i:i + chunk_size]
|
|
155
|
-
|
|
156
|
-
# Create a conversation summary
|
|
157
|
-
conversation_text = "\n\n".join([
|
|
158
|
-
f"{msg['role'].upper()}: {msg['content'][:500]}..."
|
|
159
|
-
if len(msg['content']) > 500 else f"{msg['role'].upper()}: {msg['content']}"
|
|
160
|
-
for msg in chunk_messages
|
|
161
|
-
])
|
|
162
|
-
|
|
163
|
-
# Extract metadata
|
|
164
|
-
project_id = self.extract_project_name(chunk_messages[0]['file_path'])
|
|
165
|
-
conversation_id = os.path.basename(chunk_messages[0]['file_path']).replace('.jsonl', '')
|
|
166
|
-
|
|
167
|
-
chunks.append({
|
|
168
|
-
'id': hashlib.md5(f"{chunk_messages[0]['file_path']}_{i}".encode()).hexdigest(),
|
|
169
|
-
'text': conversation_text,
|
|
170
|
-
'metadata': {
|
|
171
|
-
'project_id': project_id,
|
|
172
|
-
'project_name': project_id, # Add both for compatibility
|
|
173
|
-
'conversation_id': conversation_id,
|
|
174
|
-
'chunk_index': i // chunk_size,
|
|
175
|
-
'message_count': len(chunk_messages),
|
|
176
|
-
'start_role': chunk_messages[0]['role'],
|
|
177
|
-
'timestamp': chunk_messages[0]['timestamp'],
|
|
178
|
-
'file_path': chunk_messages[0]['file_path']
|
|
179
|
-
}
|
|
180
|
-
})
|
|
181
|
-
|
|
182
|
-
return chunks
|
|
183
|
-
|
|
184
|
-
def import_to_qdrant(self, chunks: List[Dict[str, Any]], collection_name: str):
|
|
185
|
-
"""Import conversation chunks to a specific Qdrant collection."""
|
|
186
|
-
if not chunks:
|
|
187
|
-
return
|
|
188
|
-
|
|
189
|
-
# Generate embeddings
|
|
190
|
-
texts = [chunk['text'] for chunk in chunks]
|
|
191
|
-
embeddings = self.encoder.encode(texts, show_progress_bar=True)
|
|
192
|
-
|
|
193
|
-
# Create points for Qdrant
|
|
194
|
-
points = []
|
|
195
|
-
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
|
|
196
|
-
points.append(
|
|
197
|
-
PointStruct(
|
|
198
|
-
id=chunk['id'],
|
|
199
|
-
vector=embedding.tolist(),
|
|
200
|
-
payload={
|
|
201
|
-
'text': chunk['text'],
|
|
202
|
-
**chunk['metadata']
|
|
203
|
-
}
|
|
204
|
-
)
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
# Upload to Qdrant in batches
|
|
208
|
-
for i in range(0, len(points), BATCH_SIZE):
|
|
209
|
-
batch = points[i:i + BATCH_SIZE]
|
|
210
|
-
self.client.upsert(
|
|
211
|
-
collection_name=collection_name,
|
|
212
|
-
points=batch
|
|
213
|
-
)
|
|
214
|
-
logger.info(f"Uploaded batch of {len(batch)} points to {collection_name}")
|
|
215
|
-
|
|
216
|
-
def find_recent_files(self, days: int = 30) -> List[str]:
|
|
217
|
-
"""Find JSONL files modified in the last N days."""
|
|
218
|
-
cutoff_time = datetime.now() - timedelta(days=days)
|
|
219
|
-
pattern = os.path.join(LOGS_DIR, "**", "*.jsonl")
|
|
220
|
-
|
|
221
|
-
recent_files = []
|
|
222
|
-
for file_path in glob.glob(pattern, recursive=True):
|
|
223
|
-
try:
|
|
224
|
-
mtime = os.path.getmtime(file_path)
|
|
225
|
-
if datetime.fromtimestamp(mtime) >= cutoff_time:
|
|
226
|
-
recent_files.append(file_path)
|
|
227
|
-
except Exception as e:
|
|
228
|
-
logger.error(f"Error checking file {file_path}: {e}")
|
|
229
|
-
|
|
230
|
-
return recent_files
|
|
231
|
-
|
|
232
|
-
def run(self):
|
|
233
|
-
"""Main import process with project isolation."""
|
|
234
|
-
logger.info(f"Starting conversation import to Qdrant (mode: {ISOLATION_MODE})")
|
|
235
|
-
|
|
236
|
-
# Find files to import
|
|
237
|
-
all_files = self.find_recent_files()
|
|
238
|
-
logger.info(f"Found {len(all_files)} total files")
|
|
239
|
-
|
|
240
|
-
# Group files by project
|
|
241
|
-
files_by_project: Dict[str, List[str]] = {}
|
|
242
|
-
for file_path in all_files:
|
|
243
|
-
project_name = self.extract_project_name(file_path)
|
|
244
|
-
if project_name not in files_by_project:
|
|
245
|
-
files_by_project[project_name] = []
|
|
246
|
-
files_by_project[project_name].append(file_path)
|
|
247
|
-
|
|
248
|
-
logger.info(f"Found {len(files_by_project)} projects to process")
|
|
249
|
-
|
|
250
|
-
total_chunks = 0
|
|
251
|
-
for project_name, project_files in files_by_project.items():
|
|
252
|
-
logger.info(f"\nProcessing project: {project_name}")
|
|
253
|
-
|
|
254
|
-
# Get imported files for this project
|
|
255
|
-
project_imported = self.imported_files.get(project_name, set())
|
|
256
|
-
new_files = [f for f in project_files if f not in project_imported]
|
|
257
|
-
|
|
258
|
-
if not new_files:
|
|
259
|
-
logger.info(f"No new files for project {project_name}")
|
|
260
|
-
continue
|
|
261
|
-
|
|
262
|
-
logger.info(f"Found {len(new_files)} new files for project {project_name}")
|
|
263
|
-
|
|
264
|
-
# Setup collection for this project
|
|
265
|
-
collection_name = self.setup_collection(project_name)
|
|
266
|
-
|
|
267
|
-
project_chunks = 0
|
|
268
|
-
for file_path in new_files:
|
|
269
|
-
logger.info(f"Processing: {file_path}")
|
|
270
|
-
|
|
271
|
-
# Extract messages
|
|
272
|
-
messages = self.process_jsonl_file(file_path)
|
|
273
|
-
if not messages:
|
|
274
|
-
logger.warning(f"No messages found in {file_path}")
|
|
275
|
-
continue
|
|
276
|
-
|
|
277
|
-
# Create conversation chunks
|
|
278
|
-
chunks = self.create_conversation_chunks(messages)
|
|
279
|
-
|
|
280
|
-
# Import to project-specific collection
|
|
281
|
-
self.import_to_qdrant(chunks, collection_name)
|
|
282
|
-
|
|
283
|
-
# Mark file as imported for this project
|
|
284
|
-
if project_name not in self.imported_files:
|
|
285
|
-
self.imported_files[project_name] = set()
|
|
286
|
-
self.imported_files[project_name].add(file_path)
|
|
287
|
-
self.save_state()
|
|
288
|
-
|
|
289
|
-
project_chunks += len(chunks)
|
|
290
|
-
logger.info(f"Imported {len(chunks)} chunks from {file_path}")
|
|
291
|
-
|
|
292
|
-
total_chunks += project_chunks
|
|
293
|
-
logger.info(f"Project {project_name} complete: {project_chunks} chunks imported")
|
|
294
|
-
|
|
295
|
-
logger.info(f"\nImport complete: {total_chunks} total chunks imported")
|
|
296
|
-
|
|
297
|
-
# Show collection summary
|
|
298
|
-
logger.info("\nCollection summary:")
|
|
299
|
-
collections = self.client.get_collections().collections
|
|
300
|
-
for collection in collections:
|
|
301
|
-
if collection.name.startswith('conv_') or collection.name == 'conversations':
|
|
302
|
-
count = self.client.get_collection(collection.name).points_count
|
|
303
|
-
logger.info(f" {collection.name}: {count} points")
|
|
304
|
-
|
|
305
|
-
def main():
|
|
306
|
-
"""Entry point for the importer."""
|
|
307
|
-
importer = ProjectAwareImporter()
|
|
308
|
-
importer.run()
|
|
309
|
-
|
|
310
|
-
if __name__ == "__main__":
|
|
311
|
-
main()
|