claude-self-reflect 2.3.6 → 2.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,430 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Import Claude conversation logs from JSONL files into Qdrant vector database using Voyage AI embeddings.
4
- Clean implementation with 32k token context window.
5
- """
6
-
7
- import json
8
- import os
9
- import glob
10
- import time
11
- import hashlib
12
- from datetime import datetime
13
- from typing import List, Dict, Any, Optional
14
- import logging
15
- from qdrant_client import QdrantClient
16
- from qdrant_client.models import VectorParams, Distance, PointStruct
17
- import requests
18
- import backoff
19
-
20
- # Configuration
21
- QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
22
- LOGS_DIR = os.getenv("LOGS_DIR", os.path.expanduser("~/.claude/projects"))
23
- STATE_FILE = os.getenv("STATE_FILE", os.path.expanduser("~/.claude-self-reflect/imported-files.json"))
24
- VOYAGE_API_KEY = os.getenv("VOYAGE_KEY-2") or os.getenv("VOYAGE_KEY")
25
- BATCH_SIZE = int(os.getenv("BATCH_SIZE", "50")) # Voyage supports batch embedding
26
- CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", "10")) # Can use larger chunks with 32k token limit
27
- RATE_LIMIT_DELAY = 0.1 # 100ms between requests for faster imports
28
- EMBEDDING_MODEL = "voyage-3.5-lite"
29
- EMBEDDING_DIMENSIONS = 1024 # Voyage default dimensions
30
- VOYAGE_API_URL = "https://api.voyageai.com/v1/embeddings"
31
-
32
- # Set up logging
33
- logging.basicConfig(
34
- level=logging.INFO,
35
- format='%(asctime)s - %(levelname)s - %(message)s'
36
- )
37
- logger = logging.getLogger(__name__)
38
-
39
- class VoyageConversationImporter:
40
- def __init__(self):
41
- """Initialize the importer with Qdrant and Voyage AI."""
42
- if not VOYAGE_API_KEY:
43
- raise ValueError("VOYAGE_KEY environment variable not set")
44
-
45
- self.qdrant_client = QdrantClient(url=QDRANT_URL, timeout=60)
46
- self.voyage_headers = {
47
- "Authorization": f"Bearer {VOYAGE_API_KEY}",
48
- "Content-Type": "application/json"
49
- }
50
- self.state = self._load_state()
51
- self.total_imported = 0
52
- self.total_errors = 0
53
-
54
- def _load_state(self) -> Dict[str, Any]:
55
- """Load or initialize state."""
56
- if os.path.exists(STATE_FILE):
57
- try:
58
- with open(STATE_FILE, 'r') as f:
59
- data = json.load(f)
60
- # Handle old format (files list) vs new format (projects dict)
61
- if 'files' in data and 'projects' not in data:
62
- # Convert old format to new format
63
- projects = {}
64
- for file_path in data.get('files', []):
65
- # Extract project name from file path
66
- parts = file_path.split('/')
67
- if len(parts) >= 3:
68
- project_name = parts[2]
69
- if project_name not in projects:
70
- projects[project_name] = []
71
- projects[project_name].append(file_path)
72
- return {
73
- "projects": projects,
74
- "last_updated": data.get('lastUpdated'),
75
- "total_imported": len(data.get('files', []))
76
- }
77
- # New format
78
- return data
79
- except Exception as e:
80
- logger.error(f"Failed to load state: {e}")
81
-
82
- return {
83
- "projects": {},
84
- "last_updated": None,
85
- "total_imported": 0
86
- }
87
-
88
- def _save_state(self):
89
- """Save current state to disk."""
90
- try:
91
- os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
92
- self.state["last_updated"] = datetime.now().isoformat()
93
- self.state["total_imported"] = self.total_imported
94
-
95
- with open(STATE_FILE, 'w') as f:
96
- json.dump(self.state, f, indent=2)
97
- except Exception as e:
98
- logger.error(f"Failed to save state: {e}")
99
-
100
- def _get_collection_name(self, project_name: str) -> str:
101
- """Generate collection name for project with Voyage suffix."""
102
- project_hash = hashlib.md5(project_name.encode()).hexdigest()[:8]
103
- return f"conv_{project_hash}_voyage"
104
-
105
- def _ensure_collection(self, collection_name: str):
106
- """Ensure collection exists with correct configuration for OpenAI embeddings."""
107
- collections = [col.name for col in self.qdrant_client.get_collections().collections]
108
-
109
- if collection_name not in collections:
110
- logger.info(f"Creating collection: {collection_name} with {EMBEDDING_DIMENSIONS} dimensions")
111
- self.qdrant_client.create_collection(
112
- collection_name=collection_name,
113
- vectors_config=VectorParams(
114
- size=EMBEDDING_DIMENSIONS,
115
- distance=Distance.COSINE
116
- )
117
- )
118
- else:
119
- # Verify dimensions
120
- info = self.qdrant_client.get_collection(collection_name)
121
- if info.config.params.vectors.size != EMBEDDING_DIMENSIONS:
122
- logger.error(f"Collection {collection_name} has wrong dimensions: {info.config.params.vectors.size}")
123
- raise ValueError(f"Dimension mismatch in collection {collection_name}")
124
-
125
- @backoff.on_exception(
126
- backoff.expo,
127
- Exception,
128
- max_tries=5,
129
- on_backoff=lambda details: logger.warning(f"Backing off {details['wait']}s after {details['tries']} tries")
130
- )
131
- def _generate_embeddings(self, texts: List[str]) -> List[List[float]]:
132
- """Generate embeddings using Voyage AI API with retry logic."""
133
- try:
134
- response = requests.post(
135
- VOYAGE_API_URL,
136
- headers=self.voyage_headers,
137
- json={
138
- "input": texts,
139
- "model": EMBEDDING_MODEL,
140
- "input_type": "document" # For document embeddings
141
- }
142
- )
143
-
144
- if response.status_code != 200:
145
- raise Exception(f"Voyage API error: {response.status_code} - {response.text}")
146
-
147
- data = response.json()
148
- return [item["embedding"] for item in data["data"]]
149
- except Exception as e:
150
- logger.error(f"Voyage API error: {e}")
151
- raise
152
-
153
- def _process_jsonl_file(self, file_path: str) -> List[Dict[str, Any]]:
154
- """Extract messages from a JSONL file."""
155
- messages = []
156
-
157
- try:
158
- with open(file_path, 'r', encoding='utf-8') as f:
159
- for line_num, line in enumerate(f, 1):
160
- line = line.strip()
161
- if not line:
162
- continue
163
-
164
- try:
165
- data = json.loads(line)
166
-
167
- # Extract message if present
168
- if 'message' in data and data['message']:
169
- msg = data['message']
170
- if msg.get('role') and msg.get('content'):
171
- content = msg['content']
172
- if isinstance(content, dict):
173
- content = content.get('text', json.dumps(content))
174
-
175
- messages.append({
176
- 'role': msg['role'],
177
- 'content': content,
178
- 'file_path': file_path,
179
- 'line_number': line_num,
180
- 'timestamp': data.get('timestamp', datetime.now().isoformat())
181
- })
182
- except json.JSONDecodeError:
183
- logger.debug(f"Skipping invalid JSON at line {line_num}")
184
- except Exception as e:
185
- logger.error(f"Error processing line {line_num}: {e}")
186
-
187
- except Exception as e:
188
- logger.error(f"Failed to read file {file_path}: {e}")
189
-
190
- return messages
191
-
192
- def _create_conversation_chunks(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
193
- """Group messages into conversation chunks for better context."""
194
- chunks = []
195
-
196
- for i in range(0, len(messages), CHUNK_SIZE):
197
- chunk_messages = messages[i:i + CHUNK_SIZE]
198
-
199
- # Create conversation text - Voyage supports 32k tokens
200
- # Rough estimate: ~4 chars per token, so ~128k chars max
201
- # We'll use 100k chars to be safe
202
- conversation_parts = []
203
- total_chars = 0
204
- max_chars = 100000 # Much larger limit with Voyage!
205
-
206
- for msg in chunk_messages:
207
- role = msg['role'].upper()
208
- content = msg['content']
209
-
210
- # Only truncate extremely long messages
211
- if len(content) > 20000:
212
- # Keep first 15000 and last 5000 chars
213
- content = content[:15000] + "\n\n[... truncated ...]\n\n" + content[-5000:]
214
-
215
- part = f"{role}: {content}"
216
-
217
- # Check if adding this would exceed limit
218
- if total_chars + len(part) > max_chars:
219
- # For the last message, try to fit what we can
220
- remaining = max_chars - total_chars
221
- if remaining > 1000: # Only add if we can fit meaningful content
222
- part = f"{role}: {content[:remaining-100]}..."
223
- conversation_parts.append(part)
224
- break
225
-
226
- conversation_parts.append(part)
227
- total_chars += len(part) + 2 # +2 for newlines
228
-
229
- conversation_text = "\n\n".join(conversation_parts)
230
-
231
- # Extract metadata
232
- project_name = os.path.basename(os.path.dirname(chunk_messages[0]['file_path']))
233
- conversation_id = os.path.basename(chunk_messages[0]['file_path']).replace('.jsonl', '')
234
-
235
- # Generate unique ID
236
- chunk_id = hashlib.md5(
237
- f"{project_name}_{conversation_id}_{i}".encode()
238
- ).hexdigest()
239
-
240
- chunks.append({
241
- 'id': chunk_id,
242
- 'text': conversation_text,
243
- 'metadata': {
244
- 'project': project_name,
245
- 'conversation_id': conversation_id,
246
- 'chunk_index': i // CHUNK_SIZE,
247
- 'message_count': len(chunk_messages),
248
- 'start_role': chunk_messages[0]['role'],
249
- 'timestamp': chunk_messages[0]['timestamp'],
250
- 'file': os.path.basename(chunk_messages[0]['file_path'])
251
- }
252
- })
253
-
254
- return chunks
255
-
256
- def _import_chunks_to_qdrant(self, chunks: List[Dict[str, Any]], collection_name: str):
257
- """Import conversation chunks to Qdrant with batched OpenAI embeddings."""
258
- if not chunks:
259
- return
260
-
261
- # Process in batches
262
- for i in range(0, len(chunks), BATCH_SIZE):
263
- batch = chunks[i:i + BATCH_SIZE]
264
- texts = [chunk['text'] for chunk in batch]
265
-
266
- try:
267
- # Generate embeddings
268
- embeddings = self._generate_embeddings(texts)
269
-
270
- # Create points
271
- points = []
272
- for chunk, embedding in zip(batch, embeddings):
273
- point = PointStruct(
274
- id=chunk['id'],
275
- vector=embedding,
276
- payload={
277
- 'text': chunk['text'][:2000], # Limit text size
278
- **chunk['metadata']
279
- }
280
- )
281
- points.append(point)
282
-
283
- # Upload to Qdrant
284
- self.qdrant_client.upsert(
285
- collection_name=collection_name,
286
- points=points
287
- )
288
-
289
- self.total_imported += len(points)
290
- logger.info(f"Imported batch of {len(points)} chunks (total: {self.total_imported})")
291
-
292
- # Add delay to respect rate limit (3 RPM)
293
- if i + BATCH_SIZE < len(chunks) and i % 100 == 0: # Only delay every 100 chunks
294
- logger.info(f"Waiting {RATE_LIMIT_DELAY}s for rate limit...")
295
- time.sleep(RATE_LIMIT_DELAY)
296
-
297
- except Exception as e:
298
- logger.error(f"Failed to import batch: {e}")
299
- self.total_errors += 1
300
- # Continue with next batch instead of failing completely
301
-
302
- def import_project(self, project_path: str) -> int:
303
- """Import all JSONL files in a project directory."""
304
- project_name = os.path.basename(project_path)
305
- collection_name = self._get_collection_name(project_name)
306
-
307
- logger.info(f"📁 Importing project: {project_name} to collection: {collection_name}")
308
-
309
- # Ensure collection exists
310
- self._ensure_collection(collection_name)
311
-
312
- # Get list of JSONL files
313
- jsonl_files = []
314
- for file in os.listdir(project_path):
315
- if file.endswith('.jsonl'):
316
- file_path = os.path.join(project_path, file)
317
-
318
- # Skip already imported files
319
- if (project_name in self.state["projects"] and
320
- file_path in self.state["projects"][project_name]):
321
- logger.debug(f"Skipping already imported: {file}")
322
- continue
323
-
324
- jsonl_files.append(file_path)
325
-
326
- if not jsonl_files:
327
- logger.info(f"No new files to import for {project_name}")
328
- return 0
329
-
330
- project_total = 0
331
- for file_path in sorted(jsonl_files):
332
- logger.info(f"Processing: {os.path.basename(file_path)}")
333
-
334
- # Extract messages
335
- messages = self._process_jsonl_file(file_path)
336
- if not messages:
337
- logger.warning(f"No messages found in {file_path}")
338
- continue
339
-
340
- # Create chunks
341
- chunks = self._create_conversation_chunks(messages)
342
-
343
- # Import to Qdrant
344
- self._import_chunks_to_qdrant(chunks, collection_name)
345
-
346
- # Mark file as imported
347
- if project_name not in self.state["projects"]:
348
- self.state["projects"][project_name] = []
349
- self.state["projects"][project_name].append(file_path)
350
-
351
- project_total += len(chunks)
352
-
353
- # Save state after each file
354
- self._save_state()
355
-
356
- logger.info(f"✅ Imported {project_total} chunks from {len(jsonl_files)} files")
357
- return project_total
358
-
359
- def import_all(self):
360
- """Import all Claude projects."""
361
- projects_dir = LOGS_DIR
362
-
363
- if not os.path.exists(projects_dir):
364
- logger.error(f"Claude projects directory not found: {projects_dir}")
365
- logger.error("This usually means Claude Code hasn't created any projects yet.")
366
- logger.error("Please open Claude Code and create a conversation first.")
367
- return
368
-
369
- # Get list of projects
370
- projects = [
371
- d for d in os.listdir(projects_dir)
372
- if os.path.isdir(os.path.join(projects_dir, d)) and not d.startswith('.')
373
- ]
374
-
375
- logger.info(f"Found {len(projects)} projects to import")
376
-
377
- # Import each project
378
- start_time = time.time()
379
- for idx, project_name in enumerate(sorted(projects), 1):
380
- project_path = os.path.join(projects_dir, project_name)
381
-
382
- try:
383
- logger.info(f"\n[{idx}/{len(projects)}] Processing: {project_name}")
384
- count = self.import_project(project_path)
385
-
386
- # Log progress
387
- imported_projects = len(self.state["projects"])
388
- progress = (imported_projects / len(projects)) * 100
389
- logger.info(
390
- f"Progress: {imported_projects}/{len(projects)} projects "
391
- f"({progress:.1f}%), Total chunks: {self.total_imported}"
392
- )
393
-
394
- except Exception as e:
395
- logger.error(f"Failed to import project {project_name}: {e}")
396
- self.total_errors += 1
397
- continue
398
-
399
- # Final summary
400
- elapsed_time = time.time() - start_time
401
- logger.info("=" * 60)
402
- logger.info(f"Import completed in {elapsed_time:.1f} seconds!")
403
- logger.info(f"Projects imported: {len(self.state['projects'])}/{len(projects)}")
404
- logger.info(f"Total chunks: {self.total_imported}")
405
- logger.info(f"Total errors: {self.total_errors}")
406
-
407
- # Show collection summary
408
- logger.info("\nCollection summary:")
409
- for col in self.qdrant_client.get_collections().collections:
410
- if col.name.endswith("_voyage"):
411
- info = self.qdrant_client.get_collection(col.name)
412
- logger.info(f" {col.name}: {info.points_count} points")
413
-
414
- def main():
415
- """Main entry point."""
416
- importer = VoyageConversationImporter()
417
-
418
- if len(os.sys.argv) > 1:
419
- # Import specific project
420
- project_path = os.sys.argv[1]
421
- if os.path.exists(project_path):
422
- importer.import_project(project_path)
423
- else:
424
- logger.error(f"Project path not found: {project_path}")
425
- else:
426
- # Import all projects
427
- importer.import_all()
428
-
429
- if __name__ == "__main__":
430
- main()
@@ -1,240 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Import Claude conversation logs from JSONL files into Qdrant vector database.
4
- Simplified version focusing on semantic search without complex entity extraction.
5
- """
6
-
7
- import json
8
- import os
9
- import glob
10
- from datetime import datetime, timedelta
11
- from typing import List, Dict, Any
12
- import logging
13
- from qdrant_client import QdrantClient
14
- from qdrant_client.models import (
15
- VectorParams, Distance, PointStruct,
16
- Filter, FieldCondition, MatchValue
17
- )
18
- from sentence_transformers import SentenceTransformer
19
- import hashlib
20
-
21
- # Configuration
22
- QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
23
- COLLECTION_NAME = os.getenv("COLLECTION_NAME", "conversations")
24
- LOGS_DIR = os.getenv("LOGS_DIR", "/logs")
25
- STATE_FILE = os.getenv("STATE_FILE", "/config/imported-files.json")
26
- EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
27
- BATCH_SIZE = int(os.getenv("BATCH_SIZE", "100"))
28
-
29
- # Set up logging
30
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
31
- logger = logging.getLogger(__name__)
32
-
33
- class ConversationImporter:
34
- def __init__(self):
35
- """Initialize the importer with Qdrant client and embedding model."""
36
- self.client = QdrantClient(url=QDRANT_URL)
37
- self.encoder = SentenceTransformer(EMBEDDING_MODEL)
38
- self.imported_files = self.load_state()
39
-
40
- def load_state(self) -> set:
41
- """Load the set of already imported files."""
42
- if os.path.exists(STATE_FILE):
43
- try:
44
- with open(STATE_FILE, 'r') as f:
45
- data = json.load(f)
46
- return set(data.get('files', []))
47
- except Exception as e:
48
- logger.error(f"Failed to load state: {e}")
49
- return set()
50
-
51
- def save_state(self):
52
- """Save the set of imported files."""
53
- os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
54
- with open(STATE_FILE, 'w') as f:
55
- json.dump({
56
- 'files': list(self.imported_files),
57
- 'last_updated': datetime.now().isoformat()
58
- }, f)
59
-
60
- def setup_collection(self):
61
- """Create or update the Qdrant collection."""
62
- collections = self.client.get_collections().collections
63
- exists = any(c.name == COLLECTION_NAME for c in collections)
64
-
65
- if not exists:
66
- logger.info(f"Creating collection: {COLLECTION_NAME}")
67
- self.client.create_collection(
68
- collection_name=COLLECTION_NAME,
69
- vectors_config=VectorParams(
70
- size=384, # all-MiniLM-L6-v2 dimension
71
- distance=Distance.COSINE
72
- )
73
- )
74
- else:
75
- logger.info(f"Collection {COLLECTION_NAME} already exists")
76
-
77
- def process_jsonl_file(self, file_path: str) -> List[Dict[str, Any]]:
78
- """Extract messages from a JSONL file."""
79
- messages = []
80
-
81
- try:
82
- with open(file_path, 'r') as f:
83
- for line_num, line in enumerate(f, 1):
84
- try:
85
- data = json.loads(line.strip())
86
-
87
- # Extract message if present
88
- if 'message' in data and data['message']:
89
- msg = data['message']
90
- if 'role' in msg and 'content' in msg:
91
- # Handle content that might be an object
92
- content = msg['content']
93
- if isinstance(content, dict):
94
- content = content.get('text', json.dumps(content))
95
-
96
- # Create message document
97
- messages.append({
98
- 'role': msg['role'],
99
- 'content': content,
100
- 'file_path': file_path,
101
- 'line_number': line_num,
102
- 'timestamp': data.get('timestamp', datetime.now().isoformat())
103
- })
104
- except json.JSONDecodeError:
105
- logger.warning(f"Failed to parse line {line_num} in {file_path}")
106
- except Exception as e:
107
- logger.error(f"Error processing line {line_num} in {file_path}: {e}")
108
-
109
- except Exception as e:
110
- logger.error(f"Failed to read file {file_path}: {e}")
111
-
112
- return messages
113
-
114
- def create_conversation_chunks(self, messages: List[Dict[str, Any]], chunk_size: int = 5) -> List[Dict[str, Any]]:
115
- """Group messages into conversation chunks for better context."""
116
- chunks = []
117
-
118
- for i in range(0, len(messages), chunk_size):
119
- chunk_messages = messages[i:i + chunk_size]
120
-
121
- # Create a conversation summary
122
- conversation_text = "\n\n".join([
123
- f"{msg['role'].upper()}: {msg['content'][:500]}..."
124
- if len(msg['content']) > 500 else f"{msg['role'].upper()}: {msg['content']}"
125
- for msg in chunk_messages
126
- ])
127
-
128
- # Extract metadata
129
- project_id = os.path.basename(os.path.dirname(os.path.dirname(chunk_messages[0]['file_path'])))
130
- conversation_id = os.path.basename(chunk_messages[0]['file_path']).replace('.jsonl', '')
131
-
132
- chunks.append({
133
- 'id': hashlib.md5(f"{chunk_messages[0]['file_path']}_{i}".encode()).hexdigest(),
134
- 'text': conversation_text,
135
- 'metadata': {
136
- 'project_id': project_id,
137
- 'conversation_id': conversation_id,
138
- 'chunk_index': i // chunk_size,
139
- 'message_count': len(chunk_messages),
140
- 'start_role': chunk_messages[0]['role'],
141
- 'timestamp': chunk_messages[0]['timestamp'],
142
- 'file_path': chunk_messages[0]['file_path']
143
- }
144
- })
145
-
146
- return chunks
147
-
148
- def import_to_qdrant(self, chunks: List[Dict[str, Any]]):
149
- """Import conversation chunks to Qdrant."""
150
- if not chunks:
151
- return
152
-
153
- # Generate embeddings
154
- texts = [chunk['text'] for chunk in chunks]
155
- embeddings = self.encoder.encode(texts, show_progress_bar=True)
156
-
157
- # Create points for Qdrant
158
- points = []
159
- for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
160
- points.append(
161
- PointStruct(
162
- id=chunk['id'],
163
- vector=embedding.tolist(),
164
- payload={
165
- 'text': chunk['text'],
166
- **chunk['metadata']
167
- }
168
- )
169
- )
170
-
171
- # Upload to Qdrant in batches
172
- for i in range(0, len(points), BATCH_SIZE):
173
- batch = points[i:i + BATCH_SIZE]
174
- self.client.upsert(
175
- collection_name=COLLECTION_NAME,
176
- points=batch
177
- )
178
- logger.info(f"Uploaded batch of {len(batch)} points")
179
-
180
- def find_recent_files(self, days: int = 30) -> List[str]:
181
- """Find JSONL files modified in the last N days."""
182
- cutoff_time = datetime.now() - timedelta(days=days)
183
- pattern = os.path.join(LOGS_DIR, "**", "*.jsonl")
184
-
185
- recent_files = []
186
- for file_path in glob.glob(pattern, recursive=True):
187
- try:
188
- mtime = os.path.getmtime(file_path)
189
- if datetime.fromtimestamp(mtime) >= cutoff_time:
190
- recent_files.append(file_path)
191
- except Exception as e:
192
- logger.error(f"Error checking file {file_path}: {e}")
193
-
194
- return recent_files
195
-
196
- def run(self):
197
- """Main import process."""
198
- logger.info("Starting conversation import to Qdrant")
199
-
200
- # Setup collection
201
- self.setup_collection()
202
-
203
- # Find files to import
204
- all_files = self.find_recent_files()
205
- new_files = [f for f in all_files if f not in self.imported_files]
206
-
207
- logger.info(f"Found {len(all_files)} total files, {len(new_files)} new files to import")
208
-
209
- total_chunks = 0
210
- for file_path in new_files:
211
- logger.info(f"Processing: {file_path}")
212
-
213
- # Extract messages
214
- messages = self.process_jsonl_file(file_path)
215
- if not messages:
216
- logger.warning(f"No messages found in {file_path}")
217
- continue
218
-
219
- # Create conversation chunks
220
- chunks = self.create_conversation_chunks(messages)
221
-
222
- # Import to Qdrant
223
- self.import_to_qdrant(chunks)
224
-
225
- # Mark file as imported
226
- self.imported_files.add(file_path)
227
- self.save_state()
228
-
229
- total_chunks += len(chunks)
230
- logger.info(f"Imported {len(chunks)} chunks from {file_path}")
231
-
232
- logger.info(f"Import complete: {total_chunks} total chunks imported from {len(new_files)} files")
233
-
234
- def main():
235
- """Entry point for the importer."""
236
- importer = ConversationImporter()
237
- importer.run()
238
-
239
- if __name__ == "__main__":
240
- main()