claude-self-reflect 2.7.3 → 2.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,6 +42,7 @@ services:
42
42
  environment:
43
43
  - QDRANT_URL=http://qdrant:6333
44
44
  - STATE_FILE=/config/imported-files.json
45
+ - LOGS_DIR=/logs
45
46
  - OPENAI_API_KEY=${OPENAI_API_KEY:-}
46
47
  - VOYAGE_API_KEY=${VOYAGE_API_KEY:-}
47
48
  - VOYAGE_KEY=${VOYAGE_KEY:-}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-self-reflect",
3
- "version": "2.7.3",
3
+ "version": "2.7.4",
4
4
  "description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
5
5
  "keywords": [
6
6
  "claude",
@@ -24,7 +24,8 @@ from qdrant_client.models import Filter, FieldCondition, MatchValue
24
24
  # Configuration
25
25
  QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
26
26
  LOGS_DIR = os.getenv("LOGS_DIR", os.path.expanduser("~/.claude/projects"))
27
- STATE_FILE = os.getenv("STATE_FILE", "./config/delta-update-state.json")
27
+ # Use /config path if running in Docker, otherwise use ./config
28
+ STATE_FILE = os.getenv("STATE_FILE", "/config/delta-update-state.json" if os.path.exists("/config") else "./config/delta-update-state.json")
28
29
  PREFER_LOCAL_EMBEDDINGS = os.getenv("PREFER_LOCAL_EMBEDDINGS", "true").lower() == "true"
29
30
  DRY_RUN = os.getenv("DRY_RUN", "false").lower() == "true"
30
31
  DAYS_TO_UPDATE = int(os.getenv("DAYS_TO_UPDATE", "7"))
@@ -432,7 +433,7 @@ async def main_async():
432
433
  logger.info("=== Delta Update Complete ===")
433
434
  logger.info(f"Successfully updated: {success_count} conversations")
434
435
  logger.info(f"Failed: {failed_count} conversations")
435
- logger.info(f"Total conversations in state: {len(state['updated_conversations'])}")
436
+ logger.info(f"Total conversations in state: {len(state.get('updated_conversations', {}))}")
436
437
 
437
438
  def main():
438
439
  """Entry point."""
@@ -0,0 +1,374 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Streaming importer with true line-by-line processing to prevent OOM.
4
+ Processes JSONL files without loading entire file into memory.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import sys
10
+ import hashlib
11
+ import gc
12
+ from pathlib import Path
13
+ from datetime import datetime
14
+ from typing import List, Dict, Any, Optional
15
+ import logging
16
+
17
+ # Add the project root to the Python path
18
+ project_root = Path(__file__).parent.parent
19
+ sys.path.insert(0, str(project_root))
20
+
21
+ from qdrant_client import QdrantClient
22
+ from qdrant_client.models import PointStruct, Distance, VectorParams
23
+
24
+ # Set up logging
25
+ logging.basicConfig(
26
+ level=logging.INFO,
27
+ format='%(asctime)s - %(levelname)s - %(message)s'
28
+ )
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Environment variables
32
+ QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
33
+ STATE_FILE = os.getenv("STATE_FILE", "/config/imported-files.json")
34
+ PREFER_LOCAL_EMBEDDINGS = os.getenv("PREFER_LOCAL_EMBEDDINGS", "true").lower() == "true"
35
+ VOYAGE_API_KEY = os.getenv("VOYAGE_KEY")
36
+ MAX_CHUNK_SIZE = int(os.getenv("MAX_CHUNK_SIZE", "50")) # Messages per chunk
37
+
38
+ # Initialize Qdrant client
39
+ client = QdrantClient(url=QDRANT_URL)
40
+
41
+ # Initialize embedding provider
42
+ embedding_provider = None
43
+ embedding_dimension = None
44
+
45
+ if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
46
+ logger.info("Using local embeddings (fastembed)")
47
+ from fastembed import TextEmbedding
48
+ embedding_provider = TextEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
49
+ embedding_dimension = 384
50
+ collection_suffix = "local"
51
+ else:
52
+ logger.info("Using Voyage AI embeddings")
53
+ import voyageai
54
+ embedding_provider = voyageai.Client(api_key=VOYAGE_API_KEY)
55
+ embedding_dimension = 1024
56
+ collection_suffix = "voyage"
57
+
58
+ def normalize_project_name(project_name: str) -> str:
59
+ """Normalize project name for consistency."""
60
+ return project_name.replace("-Users-ramakrishnanannaswamy-projects-", "").replace("-", "_").lower()
61
+
62
+ def get_collection_name(project_path: Path) -> str:
63
+ """Generate collection name from project path."""
64
+ normalized = normalize_project_name(project_path.name)
65
+ name_hash = hashlib.md5(normalized.encode()).hexdigest()[:8]
66
+ return f"conv_{name_hash}_{collection_suffix}"
67
+
68
+ def ensure_collection(collection_name: str):
69
+ """Ensure collection exists with correct configuration."""
70
+ collections = client.get_collections().collections
71
+ if not any(c.name == collection_name for c in collections):
72
+ logger.info(f"Creating collection: {collection_name}")
73
+ client.create_collection(
74
+ collection_name=collection_name,
75
+ vectors_config=VectorParams(size=embedding_dimension, distance=Distance.COSINE)
76
+ )
77
+
78
+ def generate_embeddings(texts: List[str]) -> List[List[float]]:
79
+ """Generate embeddings for texts."""
80
+ if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
81
+ embeddings = list(embedding_provider.passage_embed(texts))
82
+ return [emb.tolist() if hasattr(emb, 'tolist') else emb for emb in embeddings]
83
+ else:
84
+ response = embedding_provider.embed(texts, model="voyage-3")
85
+ return response.embeddings
86
+
87
+ def process_and_upload_chunk(messages: List[Dict[str, Any]], chunk_index: int,
88
+ conversation_id: str, created_at: str,
89
+ metadata: Dict[str, Any], collection_name: str,
90
+ project_path: Path) -> int:
91
+ """Process and immediately upload a single chunk."""
92
+ if not messages:
93
+ return 0
94
+
95
+ # Extract text content
96
+ texts = []
97
+ for msg in messages:
98
+ role = msg.get("role", "unknown")
99
+ content = msg.get("content", "")
100
+ if content:
101
+ texts.append(f"{role.upper()}: {content}")
102
+
103
+ if not texts:
104
+ return 0
105
+
106
+ chunk_text = "\n".join(texts)
107
+
108
+ try:
109
+ # Generate embedding
110
+ embeddings = generate_embeddings([chunk_text])
111
+
112
+ # Create point ID
113
+ point_id = hashlib.md5(
114
+ f"{conversation_id}_{chunk_index}".encode()
115
+ ).hexdigest()[:16]
116
+
117
+ # Create payload
118
+ payload = {
119
+ "text": chunk_text,
120
+ "conversation_id": conversation_id,
121
+ "chunk_index": chunk_index,
122
+ "timestamp": created_at,
123
+ "project": normalize_project_name(project_path.name),
124
+ "start_role": messages[0].get("role", "unknown") if messages else "unknown",
125
+ "message_count": len(messages)
126
+ }
127
+
128
+ # Add metadata
129
+ if metadata:
130
+ payload.update(metadata)
131
+
132
+ # Create point
133
+ point = PointStruct(
134
+ id=int(point_id, 16) % (2**63),
135
+ vector=embeddings[0],
136
+ payload=payload
137
+ )
138
+
139
+ # Upload immediately
140
+ client.upsert(
141
+ collection_name=collection_name,
142
+ points=[point],
143
+ wait=True
144
+ )
145
+
146
+ return 1
147
+
148
+ except Exception as e:
149
+ logger.error(f"Error processing chunk {chunk_index}: {e}")
150
+ return 0
151
+
152
+ def extract_metadata_single_pass(file_path: str) -> tuple[Dict[str, Any], str]:
153
+ """Extract metadata in a single pass, return metadata and first timestamp."""
154
+ metadata = {
155
+ "files_analyzed": [],
156
+ "files_edited": [],
157
+ "tools_used": [],
158
+ "concepts": []
159
+ }
160
+
161
+ first_timestamp = None
162
+
163
+ try:
164
+ with open(file_path, 'r', encoding='utf-8') as f:
165
+ for line in f:
166
+ if not line.strip():
167
+ continue
168
+
169
+ try:
170
+ data = json.loads(line)
171
+
172
+ # Get timestamp from first valid entry
173
+ if first_timestamp is None and 'timestamp' in data:
174
+ first_timestamp = data.get('timestamp')
175
+
176
+ # Extract tool usage from messages
177
+ if 'message' in data and data['message']:
178
+ msg = data['message']
179
+ if msg.get('content'):
180
+ content = msg['content']
181
+ if isinstance(content, list):
182
+ for item in content:
183
+ if isinstance(item, dict) and item.get('type') == 'tool_use':
184
+ tool_name = item.get('name', '')
185
+ if tool_name and tool_name not in metadata['tools_used']:
186
+ metadata['tools_used'].append(tool_name)
187
+
188
+ # Extract file references
189
+ if 'input' in item:
190
+ input_data = item['input']
191
+ if isinstance(input_data, dict):
192
+ if 'file_path' in input_data:
193
+ file_ref = input_data['file_path']
194
+ if file_ref not in metadata['files_analyzed']:
195
+ metadata['files_analyzed'].append(file_ref)
196
+ if 'path' in input_data:
197
+ file_ref = input_data['path']
198
+ if file_ref not in metadata['files_analyzed']:
199
+ metadata['files_analyzed'].append(file_ref)
200
+
201
+ except json.JSONDecodeError:
202
+ continue
203
+ except Exception:
204
+ continue
205
+
206
+ except Exception as e:
207
+ logger.warning(f"Error extracting metadata: {e}")
208
+
209
+ return metadata, first_timestamp or datetime.now().isoformat()
210
+
211
+ def stream_import_file(jsonl_file: Path, collection_name: str, project_path: Path) -> int:
212
+ """Stream import a single JSONL file without loading it into memory."""
213
+ logger.info(f"Streaming import of {jsonl_file.name}")
214
+
215
+ # Extract metadata in first pass (lightweight)
216
+ metadata, created_at = extract_metadata_single_pass(str(jsonl_file))
217
+
218
+ # Stream messages and process in chunks
219
+ chunk_buffer = []
220
+ chunk_index = 0
221
+ total_chunks = 0
222
+ conversation_id = jsonl_file.stem
223
+
224
+ try:
225
+ with open(jsonl_file, 'r', encoding='utf-8') as f:
226
+ for line_num, line in enumerate(f, 1):
227
+ line = line.strip()
228
+ if not line:
229
+ continue
230
+
231
+ try:
232
+ data = json.loads(line)
233
+
234
+ # Skip non-message lines
235
+ if data.get('type') == 'summary':
236
+ continue
237
+
238
+ # Extract message if present
239
+ if 'message' in data and data['message']:
240
+ msg = data['message']
241
+ if msg.get('role') and msg.get('content'):
242
+ # Extract content
243
+ content = msg['content']
244
+ if isinstance(content, list):
245
+ text_parts = []
246
+ for item in content:
247
+ if isinstance(item, dict) and item.get('type') == 'text':
248
+ text_parts.append(item.get('text', ''))
249
+ elif isinstance(item, str):
250
+ text_parts.append(item)
251
+ content = '\n'.join(text_parts)
252
+
253
+ if content:
254
+ chunk_buffer.append({
255
+ 'role': msg['role'],
256
+ 'content': content
257
+ })
258
+
259
+ # Process chunk when buffer reaches MAX_CHUNK_SIZE
260
+ if len(chunk_buffer) >= MAX_CHUNK_SIZE:
261
+ chunks = process_and_upload_chunk(
262
+ chunk_buffer, chunk_index, conversation_id,
263
+ created_at, metadata, collection_name, project_path
264
+ )
265
+ total_chunks += chunks
266
+ chunk_buffer = []
267
+ chunk_index += 1
268
+
269
+ # Force garbage collection after each chunk
270
+ gc.collect()
271
+
272
+ # Log progress
273
+ if chunk_index % 10 == 0:
274
+ logger.info(f"Processed {chunk_index} chunks from {jsonl_file.name}")
275
+
276
+ except json.JSONDecodeError:
277
+ logger.debug(f"Skipping invalid JSON at line {line_num}")
278
+ except Exception as e:
279
+ logger.debug(f"Error processing line {line_num}: {e}")
280
+
281
+ # Process remaining messages
282
+ if chunk_buffer:
283
+ chunks = process_and_upload_chunk(
284
+ chunk_buffer, chunk_index, conversation_id,
285
+ created_at, metadata, collection_name, project_path
286
+ )
287
+ total_chunks += chunks
288
+
289
+ logger.info(f"Imported {total_chunks} chunks from {jsonl_file.name}")
290
+ return total_chunks
291
+
292
+ except Exception as e:
293
+ logger.error(f"Failed to import {jsonl_file}: {e}")
294
+ return 0
295
+
296
+ def load_state() -> dict:
297
+ """Load import state."""
298
+ if os.path.exists(STATE_FILE):
299
+ try:
300
+ with open(STATE_FILE, 'r') as f:
301
+ return json.load(f)
302
+ except:
303
+ pass
304
+ return {"imported_files": {}}
305
+
306
+ def save_state(state: dict):
307
+ """Save import state."""
308
+ os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
309
+ with open(STATE_FILE, 'w') as f:
310
+ json.dump(state, f, indent=2)
311
+
312
+ def should_import_file(file_path: Path, state: dict) -> bool:
313
+ """Check if file should be imported."""
314
+ file_str = str(file_path)
315
+ if file_str in state.get("imported_files", {}):
316
+ file_info = state["imported_files"][file_str]
317
+ last_modified = file_path.stat().st_mtime
318
+ if file_info.get("last_modified") == last_modified:
319
+ logger.info(f"Skipping unchanged file: {file_path.name}")
320
+ return False
321
+ return True
322
+
323
+ def update_file_state(file_path: Path, state: dict, chunks: int):
324
+ """Update state for imported file."""
325
+ file_str = str(file_path)
326
+ state["imported_files"][file_str] = {
327
+ "imported_at": datetime.now().isoformat(),
328
+ "last_modified": file_path.stat().st_mtime,
329
+ "chunks": chunks
330
+ }
331
+
332
+ def main():
333
+ """Main import function."""
334
+ # Load state
335
+ state = load_state()
336
+ logger.info(f"Loaded state with {len(state.get('imported_files', {}))} previously imported files")
337
+
338
+ # Find all projects
339
+ logs_dir = Path(os.getenv("LOGS_DIR", "/logs"))
340
+ project_dirs = [d for d in logs_dir.iterdir() if d.is_dir()]
341
+ logger.info(f"Found {len(project_dirs)} projects to import")
342
+
343
+ total_imported = 0
344
+
345
+ for project_dir in project_dirs:
346
+ # Get collection name
347
+ collection_name = get_collection_name(project_dir)
348
+ logger.info(f"Importing project: {project_dir.name} -> {collection_name}")
349
+
350
+ # Ensure collection exists
351
+ ensure_collection(collection_name)
352
+
353
+ # Find JSONL files
354
+ jsonl_files = sorted(project_dir.glob("*.jsonl"))
355
+
356
+ # Limit files per cycle if specified
357
+ max_files = int(os.getenv("MAX_FILES_PER_CYCLE", "1000"))
358
+ jsonl_files = jsonl_files[:max_files]
359
+
360
+ for jsonl_file in jsonl_files:
361
+ if should_import_file(jsonl_file, state):
362
+ chunks = stream_import_file(jsonl_file, collection_name, project_dir)
363
+ if chunks > 0:
364
+ update_file_state(jsonl_file, state, chunks)
365
+ save_state(state)
366
+ total_imported += 1
367
+
368
+ # Force GC after each file
369
+ gc.collect()
370
+
371
+ logger.info(f"Import complete: processed {total_imported} files")
372
+
373
+ if __name__ == "__main__":
374
+ main()
@@ -57,7 +57,9 @@ else:
57
57
 
58
58
  def normalize_project_name(project_name: str) -> str:
59
59
  """Normalize project name for consistency."""
60
- return project_name.replace("-Users-ramakrishnanannaswamy-projects-", "").replace("-", "_").lower()
60
+ # For compatibility with delta-metadata-update, just use the project name as-is
61
+ # This ensures collection names match between import and delta update scripts
62
+ return project_name
61
63
 
62
64
  def get_collection_name(project_path: Path) -> str:
63
65
  """Generate collection name from project path."""
@@ -0,0 +1,124 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Quick import script for current project's latest conversations.
4
+ Designed for PreCompact hook integration - targets <10 second imports.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import json
10
+ import subprocess
11
+ from datetime import datetime, timedelta
12
+ from pathlib import Path
13
+ import logging
14
+
15
+ # Configuration
16
+ LOGS_DIR = os.getenv("LOGS_DIR", os.path.expanduser("~/.claude/projects"))
17
+ STATE_FILE = os.getenv("STATE_FILE", os.path.expanduser("~/.claude-self-reflect-state.json"))
18
+ HOURS_BACK = int(os.getenv("IMPORT_HOURS_BACK", "2")) # Only import last 2 hours by default
19
+
20
+ # Set up logging
21
+ logging.basicConfig(
22
+ level=logging.INFO,
23
+ format='%(asctime)s - %(levelname)s - %(message)s'
24
+ )
25
+ logger = logging.getLogger(__name__)
26
+
27
+ def load_state():
28
+ """Load import state from file."""
29
+ if os.path.exists(STATE_FILE):
30
+ try:
31
+ with open(STATE_FILE, 'r') as f:
32
+ return json.load(f)
33
+ except:
34
+ return {}
35
+ return {}
36
+
37
+ def save_state(state):
38
+ """Save import state to file."""
39
+ os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
40
+ with open(STATE_FILE, 'w') as f:
41
+ json.dump(state, f, indent=2)
42
+
43
+ def get_project_from_cwd():
44
+ """Detect project from current working directory."""
45
+ cwd = os.getcwd()
46
+ # Convert path to project name format used in logs
47
+ # Claude logs use format: -Users-username-path-to-project
48
+ project_name = cwd.replace('/', '-')
49
+ # Keep the leading dash as that's how Claude stores it
50
+ if not project_name.startswith('-'):
51
+ project_name = '-' + project_name
52
+ return project_name
53
+
54
+ def get_recent_files(project_path: Path, hours_back: int):
55
+ """Get JSONL files modified in the last N hours."""
56
+ cutoff_time = datetime.now() - timedelta(hours=hours_back)
57
+ recent_files = []
58
+
59
+ for jsonl_file in project_path.glob("*.jsonl"):
60
+ mtime = datetime.fromtimestamp(jsonl_file.stat().st_mtime)
61
+ if mtime > cutoff_time:
62
+ recent_files.append(jsonl_file)
63
+
64
+ return sorted(recent_files, key=lambda f: f.stat().st_mtime, reverse=True)
65
+
66
+ def main():
67
+ """Main quick import function."""
68
+ start_time = datetime.now()
69
+
70
+ # Detect current project
71
+ project_name = get_project_from_cwd()
72
+ project_path = Path(LOGS_DIR) / project_name
73
+
74
+ if not project_path.exists():
75
+ logger.warning(f"Project logs not found: {project_path}")
76
+ logger.info("Make sure you're in a project directory with Claude conversations.")
77
+ return
78
+
79
+ logger.info(f"Quick importing latest conversations for: {project_name}")
80
+
81
+ # Get recent files
82
+ recent_files = get_recent_files(project_path, HOURS_BACK)
83
+ logger.info(f"Found {len(recent_files)} files modified in last {HOURS_BACK} hours")
84
+
85
+ if not recent_files:
86
+ logger.info("No recent conversations to import")
87
+ return
88
+
89
+ # For now, just call the unified importer with the specific project
90
+ # This is a temporary solution until we implement incremental imports
91
+ script_dir = os.path.dirname(os.path.abspath(__file__))
92
+ unified_script = os.path.join(script_dir, "import-conversations-unified.py")
93
+
94
+ # Set environment to only process this project
95
+ env = os.environ.copy()
96
+ env['LOGS_DIR'] = str(project_path.parent)
97
+ env['IMPORT_PROJECT'] = project_name
98
+
99
+ try:
100
+ # Run the unified importer for just this project
101
+ result = subprocess.run(
102
+ [sys.executable, unified_script],
103
+ env=env,
104
+ capture_output=True,
105
+ text=True,
106
+ timeout=60 # 60 second timeout
107
+ )
108
+
109
+ if result.returncode == 0:
110
+ logger.info("Quick import completed successfully")
111
+ else:
112
+ logger.error(f"Import failed: {result.stderr}")
113
+
114
+ except subprocess.TimeoutExpired:
115
+ logger.warning("Import timed out after 60 seconds")
116
+ except Exception as e:
117
+ logger.error(f"Error during import: {e}")
118
+
119
+ # Report timing
120
+ elapsed = (datetime.now() - start_time).total_seconds()
121
+ logger.info(f"Quick import completed in {elapsed:.1f} seconds")
122
+
123
+ if __name__ == "__main__":
124
+ main()
@@ -0,0 +1,171 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Import old format JSONL files from Claude conversations.
4
+ These files have a different structure with type/summary fields instead of messages.
5
+ """
6
+
7
+ import json
8
+ import sys
9
+ from pathlib import Path
10
+ import hashlib
11
+ import uuid
12
+ from datetime import datetime
13
+ from qdrant_client import QdrantClient
14
+ from qdrant_client.models import Distance, VectorParams, PointStruct
15
+ from fastembed import TextEmbedding
16
+ import logging
17
+
18
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
19
+ logger = logging.getLogger(__name__)
20
+
21
+ def import_old_format_project(project_dir: Path, project_path: str = None):
22
+ """Import old format JSONL files from a project directory."""
23
+
24
+ # Initialize
25
+ client = QdrantClient(url='http://localhost:6333')
26
+ model = TextEmbedding(model_name='sentence-transformers/all-MiniLM-L6-v2', max_length=512)
27
+
28
+ # Determine project path from directory name if not provided
29
+ if not project_path:
30
+ # Convert -Users-username-projects-projectname back to path
31
+ dir_name = project_dir.name
32
+ project_path = '/' + dir_name.strip('-').replace('-', '/')
33
+
34
+ # Create collection name
35
+ project_hash = hashlib.md5(project_path.encode()).hexdigest()[:8]
36
+ collection_name = f'conv_{project_hash}_local'
37
+
38
+ logger.info(f'Project: {project_path}')
39
+ logger.info(f'Collection: {collection_name}')
40
+
41
+ # Create collection if needed
42
+ try:
43
+ client.get_collection(collection_name)
44
+ logger.info('Collection exists')
45
+ except:
46
+ client.create_collection(
47
+ collection_name=collection_name,
48
+ vectors_config=VectorParams(size=384, distance=Distance.COSINE)
49
+ )
50
+ logger.info('Created collection')
51
+
52
+ # Process all JSONL files
53
+ jsonl_files = list(project_dir.glob('*.jsonl'))
54
+ logger.info(f'Found {len(jsonl_files)} files to import')
55
+
56
+ total_points = 0
57
+ for file_path in jsonl_files:
58
+ logger.info(f'Processing {file_path.name}...')
59
+ points_batch = []
60
+
61
+ with open(file_path, 'r', encoding='utf-8') as f:
62
+ conversation_text = []
63
+ file_timestamp = file_path.stat().st_mtime
64
+
65
+ for line_num, line in enumerate(f, 1):
66
+ try:
67
+ data = json.loads(line)
68
+ msg_type = data.get('type', '')
69
+
70
+ # Extract text content based on type
71
+ content = None
72
+ if msg_type == 'summary' and data.get('summary'):
73
+ content = f"[Conversation Summary] {data['summary']}"
74
+ elif msg_type == 'user' and data.get('summary'):
75
+ content = f"User: {data['summary']}"
76
+ elif msg_type == 'assistant' and data.get('summary'):
77
+ content = f"Assistant: {data['summary']}"
78
+ elif msg_type in ['user', 'assistant']:
79
+ # Try to get content from other fields
80
+ if 'content' in data:
81
+ content = f"{msg_type.title()}: {data['content']}"
82
+ elif 'text' in data:
83
+ content = f"{msg_type.title()}: {data['text']}"
84
+
85
+ if content:
86
+ conversation_text.append(content)
87
+
88
+ # Create chunks every 5 messages or at end
89
+ if len(conversation_text) >= 5:
90
+ chunk_text = '\n\n'.join(conversation_text)
91
+ if chunk_text.strip():
92
+ # Generate embedding
93
+ embedding = list(model.embed([chunk_text[:2000]]))[0] # Limit to 2000 chars
94
+
95
+ point = PointStruct(
96
+ id=str(uuid.uuid4()),
97
+ vector=embedding.tolist(),
98
+ payload={
99
+ 'content': chunk_text[:1000], # Store first 1000 chars
100
+ 'full_content': chunk_text[:4000], # Store more for context
101
+ 'project_path': project_path,
102
+ 'file_path': str(file_path),
103
+ 'file_name': file_path.name,
104
+ 'conversation_id': file_path.stem,
105
+ 'chunk_index': len(points_batch),
106
+ 'timestamp': file_timestamp,
107
+ 'type': 'conversation_chunk'
108
+ }
109
+ )
110
+ points_batch.append(point)
111
+ conversation_text = []
112
+
113
+ except json.JSONDecodeError:
114
+ logger.warning(f'Invalid JSON at line {line_num} in {file_path.name}')
115
+ except Exception as e:
116
+ logger.warning(f'Error processing line {line_num}: {e}')
117
+
118
+ # Handle remaining text
119
+ if conversation_text:
120
+ chunk_text = '\n\n'.join(conversation_text)
121
+ if chunk_text.strip():
122
+ embedding = list(model.embed([chunk_text[:2000]]))[0]
123
+
124
+ point = PointStruct(
125
+ id=str(uuid.uuid4()),
126
+ vector=embedding.tolist(),
127
+ payload={
128
+ 'content': chunk_text[:1000],
129
+ 'full_content': chunk_text[:4000],
130
+ 'project_path': project_path,
131
+ 'file_path': str(file_path),
132
+ 'file_name': file_path.name,
133
+ 'conversation_id': file_path.stem,
134
+ 'chunk_index': len(points_batch),
135
+ 'timestamp': file_timestamp,
136
+ 'type': 'conversation_chunk'
137
+ }
138
+ )
139
+ points_batch.append(point)
140
+
141
+ # Upload batch
142
+ if points_batch:
143
+ client.upsert(collection_name=collection_name, points=points_batch)
144
+ logger.info(f' Uploaded {len(points_batch)} chunks from {file_path.name}')
145
+ total_points += len(points_batch)
146
+
147
+ # Verify
148
+ info = client.get_collection(collection_name)
149
+ logger.info(f'\nImport complete!')
150
+ logger.info(f'Collection {collection_name} now has {info.points_count} points')
151
+ logger.info(f'Added {total_points} new points in this import')
152
+
153
+ return collection_name, total_points
154
+
155
+ def main():
156
+ if len(sys.argv) < 2:
157
+ print("Usage: python import-old-format.py <project-directory> [project-path]")
158
+ print("Example: python import-old-format.py ~/.claude/projects/-Users-me-projects-myapp /Users/me/projects/myapp")
159
+ sys.exit(1)
160
+
161
+ project_dir = Path(sys.argv[1]).expanduser()
162
+ project_path = sys.argv[2] if len(sys.argv) > 2 else None
163
+
164
+ if not project_dir.exists():
165
+ print(f"Error: Directory {project_dir} does not exist")
166
+ sys.exit(1)
167
+
168
+ import_old_format_project(project_dir, project_path)
169
+
170
+ if __name__ == "__main__":
171
+ main()