claude-self-reflect 2.3.2 → 2.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/reflection-specialist.md +56 -40
- package/README.md +34 -10
- package/installer/setup-wizard.js +187 -108
- package/mcp-server/pyproject.toml +6 -5
- package/mcp-server/src/server.py +112 -25
- package/package.json +1 -1
- package/scripts/import-conversations-unified.py +269 -0
- package/scripts/import-recent-only.py +5 -1
- package/scripts/import-watcher.py +1 -1
package/mcp-server/src/server.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Claude Reflect MCP Server with Memory Decay."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import asyncio
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Any, Optional, List, Dict, Union
|
|
6
7
|
from datetime import datetime
|
|
@@ -30,8 +31,25 @@ DECAY_WEIGHT = float(os.getenv('DECAY_WEIGHT', '0.3'))
|
|
|
30
31
|
DECAY_SCALE_DAYS = float(os.getenv('DECAY_SCALE_DAYS', '90'))
|
|
31
32
|
USE_NATIVE_DECAY = os.getenv('USE_NATIVE_DECAY', 'false').lower() == 'true'
|
|
32
33
|
|
|
33
|
-
#
|
|
34
|
-
|
|
34
|
+
# Embedding configuration
|
|
35
|
+
PREFER_LOCAL_EMBEDDINGS = os.getenv('PREFER_LOCAL_EMBEDDINGS', 'false').lower() == 'true'
|
|
36
|
+
EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
|
|
37
|
+
|
|
38
|
+
# Initialize Voyage AI client (only if not using local embeddings)
|
|
39
|
+
voyage_client = None
|
|
40
|
+
if not PREFER_LOCAL_EMBEDDINGS and VOYAGE_API_KEY:
|
|
41
|
+
voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY)
|
|
42
|
+
|
|
43
|
+
# Initialize local embedding model if needed
|
|
44
|
+
local_embedding_model = None
|
|
45
|
+
if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
|
|
46
|
+
try:
|
|
47
|
+
from fastembed import TextEmbedding
|
|
48
|
+
local_embedding_model = TextEmbedding(model_name=EMBEDDING_MODEL)
|
|
49
|
+
print(f"[DEBUG] Initialized local embedding model: {EMBEDDING_MODEL}")
|
|
50
|
+
except ImportError:
|
|
51
|
+
print("[ERROR] FastEmbed not available. Install with: pip install fastembed")
|
|
52
|
+
raise
|
|
35
53
|
|
|
36
54
|
# Debug environment loading
|
|
37
55
|
print(f"[DEBUG] Environment variables loaded:")
|
|
@@ -39,6 +57,8 @@ print(f"[DEBUG] ENABLE_MEMORY_DECAY: {ENABLE_MEMORY_DECAY}")
|
|
|
39
57
|
print(f"[DEBUG] USE_NATIVE_DECAY: {USE_NATIVE_DECAY}")
|
|
40
58
|
print(f"[DEBUG] DECAY_WEIGHT: {DECAY_WEIGHT}")
|
|
41
59
|
print(f"[DEBUG] DECAY_SCALE_DAYS: {DECAY_SCALE_DAYS}")
|
|
60
|
+
print(f"[DEBUG] PREFER_LOCAL_EMBEDDINGS: {PREFER_LOCAL_EMBEDDINGS}")
|
|
61
|
+
print(f"[DEBUG] EMBEDDING_MODEL: {EMBEDDING_MODEL}")
|
|
42
62
|
print(f"[DEBUG] env_path: {env_path}")
|
|
43
63
|
|
|
44
64
|
|
|
@@ -63,22 +83,50 @@ mcp = FastMCP(
|
|
|
63
83
|
# Create Qdrant client
|
|
64
84
|
qdrant_client = AsyncQdrantClient(url=QDRANT_URL)
|
|
65
85
|
|
|
66
|
-
async def
|
|
67
|
-
"""Get all Voyage
|
|
86
|
+
async def get_all_collections() -> List[str]:
|
|
87
|
+
"""Get all collections (both Voyage and local)."""
|
|
68
88
|
collections = await qdrant_client.get_collections()
|
|
69
|
-
|
|
89
|
+
# Support both _voyage and _local collections, plus reflections
|
|
90
|
+
return [c.name for c in collections.collections
|
|
91
|
+
if c.name.endswith('_voyage') or c.name.endswith('_local') or c.name.startswith('reflections')]
|
|
70
92
|
|
|
71
93
|
async def generate_embedding(text: str) -> List[float]:
|
|
72
|
-
"""Generate embedding using
|
|
73
|
-
if not voyage_client:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
94
|
+
"""Generate embedding using configured provider."""
|
|
95
|
+
if PREFER_LOCAL_EMBEDDINGS or not voyage_client:
|
|
96
|
+
# Use local embeddings
|
|
97
|
+
if not local_embedding_model:
|
|
98
|
+
raise ValueError("Local embedding model not initialized")
|
|
99
|
+
|
|
100
|
+
# Run in executor since fastembed is synchronous
|
|
101
|
+
loop = asyncio.get_event_loop()
|
|
102
|
+
embeddings = await loop.run_in_executor(
|
|
103
|
+
None, lambda: list(local_embedding_model.embed([text]))
|
|
104
|
+
)
|
|
105
|
+
return embeddings[0].tolist()
|
|
106
|
+
else:
|
|
107
|
+
# Use Voyage AI
|
|
108
|
+
result = voyage_client.embed(
|
|
109
|
+
texts=[text],
|
|
110
|
+
model="voyage-3-large",
|
|
111
|
+
input_type="query"
|
|
112
|
+
)
|
|
113
|
+
return result.embeddings[0]
|
|
114
|
+
|
|
115
|
+
def get_embedding_dimension() -> int:
|
|
116
|
+
"""Get the dimension of embeddings based on the provider."""
|
|
117
|
+
if PREFER_LOCAL_EMBEDDINGS or not voyage_client:
|
|
118
|
+
# all-MiniLM-L6-v2 produces 384-dimensional embeddings
|
|
119
|
+
return 384
|
|
120
|
+
else:
|
|
121
|
+
# voyage-3-large produces 1024-dimensional embeddings
|
|
122
|
+
return 1024
|
|
123
|
+
|
|
124
|
+
def get_collection_suffix() -> str:
|
|
125
|
+
"""Get the collection suffix based on embedding provider."""
|
|
126
|
+
if PREFER_LOCAL_EMBEDDINGS or not voyage_client:
|
|
127
|
+
return "_local"
|
|
128
|
+
else:
|
|
129
|
+
return "_voyage"
|
|
82
130
|
|
|
83
131
|
# Register tools
|
|
84
132
|
@mcp.tool()
|
|
@@ -115,17 +163,18 @@ async def reflect_on_past(
|
|
|
115
163
|
# Generate embedding
|
|
116
164
|
query_embedding = await generate_embedding(query)
|
|
117
165
|
|
|
118
|
-
# Get all
|
|
119
|
-
|
|
120
|
-
if not
|
|
166
|
+
# Get all collections
|
|
167
|
+
all_collections = await get_all_collections()
|
|
168
|
+
if not all_collections:
|
|
121
169
|
return "No conversation collections found. Please import conversations first."
|
|
122
170
|
|
|
123
|
-
await ctx.debug(f"Searching across {len(
|
|
171
|
+
await ctx.debug(f"Searching across {len(all_collections)} collections")
|
|
172
|
+
await ctx.debug(f"Using {'local' if PREFER_LOCAL_EMBEDDINGS or not voyage_client else 'Voyage AI'} embeddings")
|
|
124
173
|
|
|
125
174
|
all_results = []
|
|
126
175
|
|
|
127
176
|
# Search each collection
|
|
128
|
-
for collection_name in
|
|
177
|
+
for collection_name in all_collections:
|
|
129
178
|
try:
|
|
130
179
|
if should_use_decay and USE_NATIVE_DECAY:
|
|
131
180
|
# Use native Qdrant decay
|
|
@@ -179,7 +228,7 @@ async def reflect_on_past(
|
|
|
179
228
|
timestamp=point.payload.get('timestamp', datetime.now().isoformat()),
|
|
180
229
|
role=point.payload.get('start_role', point.payload.get('role', 'unknown')),
|
|
181
230
|
excerpt=(point.payload.get('text', '')[:500] + '...'),
|
|
182
|
-
project_name=point.payload.get('project', collection_name.replace('conv_', '').replace('_voyage', '')),
|
|
231
|
+
project_name=point.payload.get('project', collection_name.replace('conv_', '').replace('_voyage', '').replace('_local', '')),
|
|
183
232
|
conversation_id=point.payload.get('conversation_id'),
|
|
184
233
|
collection_name=collection_name
|
|
185
234
|
))
|
|
@@ -240,7 +289,7 @@ async def reflect_on_past(
|
|
|
240
289
|
timestamp=point.payload.get('timestamp', datetime.now().isoformat()),
|
|
241
290
|
role=point.payload.get('start_role', point.payload.get('role', 'unknown')),
|
|
242
291
|
excerpt=(point.payload.get('text', '')[:500] + '...'),
|
|
243
|
-
project_name=point.payload.get('project', collection_name.replace('conv_', '').replace('_voyage', '')),
|
|
292
|
+
project_name=point.payload.get('project', collection_name.replace('conv_', '').replace('_voyage', '').replace('_local', '')),
|
|
244
293
|
conversation_id=point.payload.get('conversation_id'),
|
|
245
294
|
collection_name=collection_name
|
|
246
295
|
))
|
|
@@ -261,7 +310,7 @@ async def reflect_on_past(
|
|
|
261
310
|
timestamp=point.payload.get('timestamp', datetime.now().isoformat()),
|
|
262
311
|
role=point.payload.get('start_role', point.payload.get('role', 'unknown')),
|
|
263
312
|
excerpt=(point.payload.get('text', '')[:500] + '...'),
|
|
264
|
-
project_name=point.payload.get('project', collection_name.replace('conv_', '').replace('_voyage', '')),
|
|
313
|
+
project_name=point.payload.get('project', collection_name.replace('conv_', '').replace('_voyage', '').replace('_local', '')),
|
|
265
314
|
conversation_id=point.payload.get('conversation_id'),
|
|
266
315
|
collection_name=collection_name
|
|
267
316
|
))
|
|
@@ -302,8 +351,46 @@ async def store_reflection(
|
|
|
302
351
|
"""Store an important insight or reflection for future reference."""
|
|
303
352
|
|
|
304
353
|
try:
|
|
305
|
-
#
|
|
306
|
-
|
|
354
|
+
# Create reflections collection name
|
|
355
|
+
collection_name = f"reflections{get_collection_suffix()}"
|
|
356
|
+
|
|
357
|
+
# Ensure collection exists
|
|
358
|
+
try:
|
|
359
|
+
collection_info = await qdrant_client.get_collection(collection_name)
|
|
360
|
+
except:
|
|
361
|
+
# Create collection if it doesn't exist
|
|
362
|
+
await qdrant_client.create_collection(
|
|
363
|
+
collection_name=collection_name,
|
|
364
|
+
vectors_config=VectorParams(
|
|
365
|
+
size=get_embedding_dimension(),
|
|
366
|
+
distance=Distance.COSINE
|
|
367
|
+
)
|
|
368
|
+
)
|
|
369
|
+
await ctx.debug(f"Created reflections collection: {collection_name}")
|
|
370
|
+
|
|
371
|
+
# Generate embedding for the reflection
|
|
372
|
+
embedding = await generate_embedding(content)
|
|
373
|
+
|
|
374
|
+
# Create point with metadata
|
|
375
|
+
point_id = datetime.now().timestamp()
|
|
376
|
+
point = PointStruct(
|
|
377
|
+
id=int(point_id),
|
|
378
|
+
vector=embedding,
|
|
379
|
+
payload={
|
|
380
|
+
"text": content,
|
|
381
|
+
"tags": tags,
|
|
382
|
+
"timestamp": datetime.now().isoformat(),
|
|
383
|
+
"type": "reflection",
|
|
384
|
+
"role": "user_reflection"
|
|
385
|
+
}
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
# Store in Qdrant
|
|
389
|
+
await qdrant_client.upsert(
|
|
390
|
+
collection_name=collection_name,
|
|
391
|
+
points=[point]
|
|
392
|
+
)
|
|
393
|
+
|
|
307
394
|
tags_str = ', '.join(tags) if tags else 'none'
|
|
308
395
|
return f"Reflection stored successfully with tags: {tags_str}"
|
|
309
396
|
|
package/package.json
CHANGED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Unified import script that supports both local and Voyage AI embeddings.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
import json
|
|
9
|
+
import glob
|
|
10
|
+
import hashlib
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from typing import List, Dict, Any
|
|
13
|
+
import logging
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from qdrant_client import QdrantClient
|
|
17
|
+
from qdrant_client.models import (
|
|
18
|
+
VectorParams, Distance, PointStruct,
|
|
19
|
+
Filter, FieldCondition, MatchValue
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Configuration
|
|
23
|
+
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
|
24
|
+
LOGS_DIR = os.getenv("LOGS_DIR", "/logs")
|
|
25
|
+
STATE_FILE = os.getenv("STATE_FILE", "/config/imported-files.json")
|
|
26
|
+
BATCH_SIZE = int(os.getenv("BATCH_SIZE", "100"))
|
|
27
|
+
PREFER_LOCAL_EMBEDDINGS = os.getenv("PREFER_LOCAL_EMBEDDINGS", "false").lower() == "true"
|
|
28
|
+
VOYAGE_API_KEY = os.getenv("VOYAGE_KEY")
|
|
29
|
+
|
|
30
|
+
# Set up logging
|
|
31
|
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
# Initialize embedding provider
|
|
35
|
+
embedding_provider = None
|
|
36
|
+
embedding_dimension = None
|
|
37
|
+
collection_suffix = None
|
|
38
|
+
|
|
39
|
+
if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
|
|
40
|
+
# Use local embeddings
|
|
41
|
+
logger.info("Using local embeddings (fastembed)")
|
|
42
|
+
from fastembed import TextEmbedding
|
|
43
|
+
embedding_provider = TextEmbedding("sentence-transformers/all-MiniLM-L6-v2")
|
|
44
|
+
embedding_dimension = 384
|
|
45
|
+
collection_suffix = "_local"
|
|
46
|
+
else:
|
|
47
|
+
# Use Voyage AI
|
|
48
|
+
logger.info("Using Voyage AI embeddings")
|
|
49
|
+
import voyageai
|
|
50
|
+
voyage_client = voyageai.Client(api_key=VOYAGE_API_KEY)
|
|
51
|
+
embedding_dimension = 1024
|
|
52
|
+
collection_suffix = "_voyage"
|
|
53
|
+
|
|
54
|
+
# Initialize Qdrant client
|
|
55
|
+
client = QdrantClient(url=QDRANT_URL)
|
|
56
|
+
|
|
57
|
+
def generate_embeddings(texts: List[str]) -> List[List[float]]:
|
|
58
|
+
"""Generate embeddings for a list of texts."""
|
|
59
|
+
if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
|
|
60
|
+
# Local embeddings using FastEmbed
|
|
61
|
+
embeddings = list(embedding_provider.passage_embed(texts))
|
|
62
|
+
return [embedding.tolist() for embedding in embeddings]
|
|
63
|
+
else:
|
|
64
|
+
# Voyage AI embeddings
|
|
65
|
+
result = voyage_client.embed(
|
|
66
|
+
texts=texts,
|
|
67
|
+
model="voyage-3-large",
|
|
68
|
+
input_type="document"
|
|
69
|
+
)
|
|
70
|
+
return result.embeddings
|
|
71
|
+
|
|
72
|
+
def chunk_conversation(messages: List[Dict[str, Any]], chunk_size: int = 10) -> List[Dict[str, Any]]:
|
|
73
|
+
"""Chunk conversation into smaller segments."""
|
|
74
|
+
chunks = []
|
|
75
|
+
|
|
76
|
+
for i in range(0, len(messages), chunk_size):
|
|
77
|
+
chunk_messages = messages[i:i + chunk_size]
|
|
78
|
+
|
|
79
|
+
# Extract text content
|
|
80
|
+
texts = []
|
|
81
|
+
for msg in chunk_messages:
|
|
82
|
+
role = msg.get("role", "unknown")
|
|
83
|
+
content = msg.get("content", "")
|
|
84
|
+
|
|
85
|
+
if isinstance(content, list):
|
|
86
|
+
# Handle structured content
|
|
87
|
+
text_parts = []
|
|
88
|
+
for item in content:
|
|
89
|
+
if isinstance(item, dict) and item.get("type") == "text":
|
|
90
|
+
text_parts.append(item.get("text", ""))
|
|
91
|
+
elif isinstance(item, str):
|
|
92
|
+
text_parts.append(item)
|
|
93
|
+
content = " ".join(text_parts)
|
|
94
|
+
|
|
95
|
+
if content:
|
|
96
|
+
texts.append(f"{role.upper()}: {content}")
|
|
97
|
+
|
|
98
|
+
if texts:
|
|
99
|
+
chunks.append({
|
|
100
|
+
"text": "\n".join(texts),
|
|
101
|
+
"messages": chunk_messages,
|
|
102
|
+
"chunk_index": i // chunk_size,
|
|
103
|
+
"start_role": chunk_messages[0].get("role", "unknown") if chunk_messages else "unknown"
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
return chunks
|
|
107
|
+
|
|
108
|
+
def import_project(project_path: Path, collection_name: str) -> int:
|
|
109
|
+
"""Import all conversations from a project."""
|
|
110
|
+
jsonl_files = list(project_path.glob("*.jsonl"))
|
|
111
|
+
|
|
112
|
+
if not jsonl_files:
|
|
113
|
+
logger.warning(f"No JSONL files found in {project_path}")
|
|
114
|
+
return 0
|
|
115
|
+
|
|
116
|
+
# Check if collection exists
|
|
117
|
+
collections = client.get_collections().collections
|
|
118
|
+
if collection_name not in [c.name for c in collections]:
|
|
119
|
+
logger.info(f"Creating collection: {collection_name}")
|
|
120
|
+
client.create_collection(
|
|
121
|
+
collection_name=collection_name,
|
|
122
|
+
vectors_config=VectorParams(
|
|
123
|
+
size=embedding_dimension,
|
|
124
|
+
distance=Distance.COSINE
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
total_chunks = 0
|
|
129
|
+
|
|
130
|
+
for jsonl_file in jsonl_files:
|
|
131
|
+
logger.info(f"Processing file: {jsonl_file.name}")
|
|
132
|
+
try:
|
|
133
|
+
# Read JSONL file and extract messages
|
|
134
|
+
messages = []
|
|
135
|
+
created_at = None
|
|
136
|
+
|
|
137
|
+
with open(jsonl_file, 'r', encoding='utf-8') as f:
|
|
138
|
+
for line_num, line in enumerate(f, 1):
|
|
139
|
+
line = line.strip()
|
|
140
|
+
if not line:
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
data = json.loads(line)
|
|
145
|
+
|
|
146
|
+
# Extract timestamp from first message
|
|
147
|
+
if created_at is None and 'timestamp' in data:
|
|
148
|
+
created_at = data.get('timestamp')
|
|
149
|
+
|
|
150
|
+
# Skip non-message lines (summaries, etc.)
|
|
151
|
+
if data.get('type') == 'summary':
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
# Extract message if present
|
|
155
|
+
if 'message' in data and data['message']:
|
|
156
|
+
msg = data['message']
|
|
157
|
+
if msg.get('role') and msg.get('content'):
|
|
158
|
+
# Handle content that's an array of objects
|
|
159
|
+
content = msg['content']
|
|
160
|
+
if isinstance(content, list):
|
|
161
|
+
text_parts = []
|
|
162
|
+
for item in content:
|
|
163
|
+
if isinstance(item, dict) and item.get('type') == 'text':
|
|
164
|
+
text_parts.append(item.get('text', ''))
|
|
165
|
+
elif isinstance(item, str):
|
|
166
|
+
text_parts.append(item)
|
|
167
|
+
content = '\n'.join(text_parts)
|
|
168
|
+
|
|
169
|
+
if content:
|
|
170
|
+
messages.append({
|
|
171
|
+
'role': msg['role'],
|
|
172
|
+
'content': content
|
|
173
|
+
})
|
|
174
|
+
except json.JSONDecodeError:
|
|
175
|
+
logger.debug(f"Skipping invalid JSON at line {line_num}")
|
|
176
|
+
except Exception as e:
|
|
177
|
+
logger.error(f"Error processing line {line_num}: {e}")
|
|
178
|
+
|
|
179
|
+
if not messages:
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
# Extract metadata
|
|
183
|
+
if created_at is None:
|
|
184
|
+
created_at = datetime.now().isoformat()
|
|
185
|
+
conversation_id = jsonl_file.stem
|
|
186
|
+
|
|
187
|
+
# Chunk the conversation
|
|
188
|
+
chunks = chunk_conversation(messages)
|
|
189
|
+
|
|
190
|
+
if not chunks:
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
# Process in batches
|
|
194
|
+
for batch_start in range(0, len(chunks), BATCH_SIZE):
|
|
195
|
+
batch = chunks[batch_start:batch_start + BATCH_SIZE]
|
|
196
|
+
texts = [chunk["text"] for chunk in batch]
|
|
197
|
+
|
|
198
|
+
# Generate embeddings
|
|
199
|
+
embeddings = generate_embeddings(texts)
|
|
200
|
+
|
|
201
|
+
# Create points
|
|
202
|
+
points = []
|
|
203
|
+
for chunk, embedding in zip(batch, embeddings):
|
|
204
|
+
point_id = hashlib.md5(
|
|
205
|
+
f"{conversation_id}_{chunk['chunk_index']}".encode()
|
|
206
|
+
).hexdigest()[:16]
|
|
207
|
+
|
|
208
|
+
points.append(PointStruct(
|
|
209
|
+
id=int(point_id, 16) % (2**63), # Convert to valid integer ID
|
|
210
|
+
vector=embedding,
|
|
211
|
+
payload={
|
|
212
|
+
"text": chunk["text"],
|
|
213
|
+
"conversation_id": conversation_id,
|
|
214
|
+
"chunk_index": chunk["chunk_index"],
|
|
215
|
+
"timestamp": created_at,
|
|
216
|
+
"project": project_path.name,
|
|
217
|
+
"start_role": chunk["start_role"]
|
|
218
|
+
}
|
|
219
|
+
))
|
|
220
|
+
|
|
221
|
+
# Upload to Qdrant
|
|
222
|
+
client.upsert(
|
|
223
|
+
collection_name=collection_name,
|
|
224
|
+
points=points
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
total_chunks += len(points)
|
|
228
|
+
|
|
229
|
+
logger.info(f"Imported {len(chunks)} chunks from {jsonl_file.name}")
|
|
230
|
+
|
|
231
|
+
except Exception as e:
|
|
232
|
+
logger.error(f"Failed to import {jsonl_file}: {e}")
|
|
233
|
+
import traceback
|
|
234
|
+
logger.error(traceback.format_exc())
|
|
235
|
+
|
|
236
|
+
return total_chunks
|
|
237
|
+
|
|
238
|
+
def main():
|
|
239
|
+
"""Main import function."""
|
|
240
|
+
logs_path = Path(LOGS_DIR)
|
|
241
|
+
|
|
242
|
+
if not logs_path.exists():
|
|
243
|
+
logger.error(f"Logs directory not found: {LOGS_DIR}")
|
|
244
|
+
return
|
|
245
|
+
|
|
246
|
+
# Find all project directories
|
|
247
|
+
project_dirs = [d for d in logs_path.iterdir() if d.is_dir()]
|
|
248
|
+
|
|
249
|
+
if not project_dirs:
|
|
250
|
+
logger.warning("No project directories found")
|
|
251
|
+
return
|
|
252
|
+
|
|
253
|
+
logger.info(f"Found {len(project_dirs)} projects to import")
|
|
254
|
+
|
|
255
|
+
# Import each project
|
|
256
|
+
total_imported = 0
|
|
257
|
+
for project_dir in project_dirs:
|
|
258
|
+
# Create collection name from project path
|
|
259
|
+
collection_name = f"conv_{hashlib.md5(project_dir.name.encode()).hexdigest()[:8]}{collection_suffix}"
|
|
260
|
+
|
|
261
|
+
logger.info(f"Importing project: {project_dir.name} -> {collection_name}")
|
|
262
|
+
chunks = import_project(project_dir, collection_name)
|
|
263
|
+
total_imported += chunks
|
|
264
|
+
logger.info(f"Imported {chunks} chunks from {project_dir.name}")
|
|
265
|
+
|
|
266
|
+
logger.info(f"Import complete! Total chunks imported: {total_imported}")
|
|
267
|
+
|
|
268
|
+
if __name__ == "__main__":
|
|
269
|
+
main()
|
|
@@ -23,7 +23,11 @@ for file in os.listdir(project_path):
|
|
|
23
23
|
print(f"Found {len(recent_files)} recent files to import")
|
|
24
24
|
|
|
25
25
|
# Set environment variable
|
|
26
|
-
|
|
26
|
+
# VOYAGE_KEY must be set as environment variable
|
|
27
|
+
if not os.getenv("VOYAGE_KEY"):
|
|
28
|
+
print("Error: VOYAGE_KEY environment variable not set")
|
|
29
|
+
print("Please set: export VOYAGE_KEY='your-voyage-api-key'")
|
|
30
|
+
sys.exit(1)
|
|
27
31
|
|
|
28
32
|
# Import the whole project (the script will handle individual files)
|
|
29
33
|
os.system(f"python {import_script} {project_path}")
|
|
@@ -19,7 +19,7 @@ WATCH_DIR = os.getenv("WATCH_DIR", "/logs")
|
|
|
19
19
|
STATE_FILE = os.getenv("STATE_FILE", "/config/imported-files.json")
|
|
20
20
|
WATCH_INTERVAL = int(os.getenv("WATCH_INTERVAL", "60")) # seconds
|
|
21
21
|
IMPORT_DELAY = int(os.getenv("IMPORT_DELAY", "30")) # Wait before importing new files
|
|
22
|
-
IMPORTER_SCRIPT = "/scripts/import-conversations-
|
|
22
|
+
IMPORTER_SCRIPT = "/scripts/import-conversations-unified.py"
|
|
23
23
|
|
|
24
24
|
# Set up logging
|
|
25
25
|
logging.basicConfig(
|