claude-self-reflect 2.7.2 → 2.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/mcp-server/src/server.py
CHANGED
|
@@ -381,13 +381,19 @@ async def reflect_on_past(
|
|
|
381
381
|
if c.startswith(f"conv_{project_hash}_")
|
|
382
382
|
]
|
|
383
383
|
|
|
384
|
+
# Always include reflections collections when searching a specific project
|
|
385
|
+
reflections_collections = [c for c in all_collections if c.startswith('reflections')]
|
|
386
|
+
|
|
384
387
|
if not project_collections:
|
|
385
388
|
# Fall back to searching all collections but filtering by project metadata
|
|
386
389
|
await ctx.debug(f"No collections found for project {target_project}, will filter by metadata")
|
|
387
390
|
collections_to_search = all_collections
|
|
388
391
|
else:
|
|
389
392
|
await ctx.debug(f"Found {len(project_collections)} collections for project {target_project}")
|
|
390
|
-
|
|
393
|
+
# Include both project collections and reflections
|
|
394
|
+
collections_to_search = project_collections + reflections_collections
|
|
395
|
+
# Remove duplicates
|
|
396
|
+
collections_to_search = list(set(collections_to_search))
|
|
391
397
|
else:
|
|
392
398
|
collections_to_search = all_collections
|
|
393
399
|
|
|
@@ -524,12 +530,26 @@ async def reflect_on_past(
|
|
|
524
530
|
# Check project filter if we're searching all collections but want specific project
|
|
525
531
|
point_project = point.payload.get('project', collection_name.replace('conv_', '').replace('_voyage', '').replace('_local', ''))
|
|
526
532
|
|
|
533
|
+
# Special handling for reflections - they're global by default but can have project context
|
|
534
|
+
is_reflection_collection = collection_name.startswith('reflections')
|
|
535
|
+
|
|
527
536
|
# Handle project matching - check if the target project name appears at the end of the stored project path
|
|
528
|
-
if target_project != 'all' and not project_collections:
|
|
537
|
+
if target_project != 'all' and not project_collections and not is_reflection_collection:
|
|
529
538
|
# The stored project name is like "-Users-username-projects-ShopifyMCPMockShop"
|
|
530
539
|
# We want to match just "ShopifyMCPMockShop"
|
|
531
540
|
if not point_project.endswith(f"-{target_project}") and point_project != target_project:
|
|
532
541
|
continue # Skip results from other projects
|
|
542
|
+
|
|
543
|
+
# For reflections with project context, optionally filter by project
|
|
544
|
+
if is_reflection_collection and target_project != 'all' and 'project' in point.payload:
|
|
545
|
+
# Only filter if the reflection has project metadata
|
|
546
|
+
reflection_project = point.payload.get('project', '')
|
|
547
|
+
if reflection_project and not (
|
|
548
|
+
reflection_project == target_project or
|
|
549
|
+
reflection_project.endswith(f"/{target_project}") or
|
|
550
|
+
reflection_project.endswith(f"-{target_project}")
|
|
551
|
+
):
|
|
552
|
+
continue # Skip reflections from other projects
|
|
533
553
|
|
|
534
554
|
all_results.append(SearchResult(
|
|
535
555
|
id=str(point.id),
|
|
@@ -604,12 +624,26 @@ async def reflect_on_past(
|
|
|
604
624
|
# Check project filter if we're searching all collections but want specific project
|
|
605
625
|
point_project = point.payload.get('project', collection_name.replace('conv_', '').replace('_voyage', '').replace('_local', ''))
|
|
606
626
|
|
|
627
|
+
# Special handling for reflections - they're global by default but can have project context
|
|
628
|
+
is_reflection_collection = collection_name.startswith('reflections')
|
|
629
|
+
|
|
607
630
|
# Handle project matching - check if the target project name appears at the end of the stored project path
|
|
608
|
-
if target_project != 'all' and not project_collections:
|
|
631
|
+
if target_project != 'all' and not project_collections and not is_reflection_collection:
|
|
609
632
|
# The stored project name is like "-Users-username-projects-ShopifyMCPMockShop"
|
|
610
633
|
# We want to match just "ShopifyMCPMockShop"
|
|
611
634
|
if not point_project.endswith(f"-{target_project}") and point_project != target_project:
|
|
612
635
|
continue # Skip results from other projects
|
|
636
|
+
|
|
637
|
+
# For reflections with project context, optionally filter by project
|
|
638
|
+
if is_reflection_collection and target_project != 'all' and 'project' in point.payload:
|
|
639
|
+
# Only filter if the reflection has project metadata
|
|
640
|
+
reflection_project = point.payload.get('project', '')
|
|
641
|
+
if reflection_project and not (
|
|
642
|
+
reflection_project == target_project or
|
|
643
|
+
reflection_project.endswith(f"/{target_project}") or
|
|
644
|
+
reflection_project.endswith(f"-{target_project}")
|
|
645
|
+
):
|
|
646
|
+
continue # Skip reflections from other projects
|
|
613
647
|
|
|
614
648
|
all_results.append(SearchResult(
|
|
615
649
|
id=str(point.id),
|
|
@@ -641,13 +675,27 @@ async def reflect_on_past(
|
|
|
641
675
|
# Check project filter if we're searching all collections but want specific project
|
|
642
676
|
point_project = point.payload.get('project', collection_name.replace('conv_', '').replace('_voyage', '').replace('_local', ''))
|
|
643
677
|
|
|
678
|
+
# Special handling for reflections - they're global by default but can have project context
|
|
679
|
+
is_reflection_collection = collection_name.startswith('reflections')
|
|
680
|
+
|
|
644
681
|
# Handle project matching - check if the target project name appears at the end of the stored project path
|
|
645
|
-
if target_project != 'all' and not project_collections:
|
|
682
|
+
if target_project != 'all' and not project_collections and not is_reflection_collection:
|
|
646
683
|
# The stored project name is like "-Users-username-projects-ShopifyMCPMockShop"
|
|
647
684
|
# We want to match just "ShopifyMCPMockShop"
|
|
648
685
|
if not point_project.endswith(f"-{target_project}") and point_project != target_project:
|
|
649
686
|
continue # Skip results from other projects
|
|
650
687
|
|
|
688
|
+
# For reflections with project context, optionally filter by project
|
|
689
|
+
if is_reflection_collection and target_project != 'all' and 'project' in point.payload:
|
|
690
|
+
# Only filter if the reflection has project metadata
|
|
691
|
+
reflection_project = point.payload.get('project', '')
|
|
692
|
+
if reflection_project and not (
|
|
693
|
+
reflection_project == target_project or
|
|
694
|
+
reflection_project.endswith(f"/{target_project}") or
|
|
695
|
+
reflection_project.endswith(f"-{target_project}")
|
|
696
|
+
):
|
|
697
|
+
continue # Skip reflections from other projects
|
|
698
|
+
|
|
651
699
|
# BOOST V2 CHUNKS: Apply score boost for v2 chunks (better quality)
|
|
652
700
|
original_score = point.score
|
|
653
701
|
final_score = original_score
|
|
@@ -932,6 +980,25 @@ async def store_reflection(
|
|
|
932
980
|
# Create reflections collection name
|
|
933
981
|
collection_name = f"reflections{get_collection_suffix()}"
|
|
934
982
|
|
|
983
|
+
# Get current project context
|
|
984
|
+
cwd = os.environ.get('MCP_CLIENT_CWD', os.getcwd())
|
|
985
|
+
project_path = Path(cwd)
|
|
986
|
+
|
|
987
|
+
# Extract project name from path
|
|
988
|
+
project_name = None
|
|
989
|
+
path_parts = project_path.parts
|
|
990
|
+
if 'projects' in path_parts:
|
|
991
|
+
idx = path_parts.index('projects')
|
|
992
|
+
if idx + 1 < len(path_parts):
|
|
993
|
+
# Get all parts after 'projects' to form the project name
|
|
994
|
+
# This handles cases like projects/Connectiva-App/connectiva-ai
|
|
995
|
+
project_parts = path_parts[idx + 1:]
|
|
996
|
+
project_name = '/'.join(project_parts)
|
|
997
|
+
|
|
998
|
+
# If no project detected, use the last directory name
|
|
999
|
+
if not project_name:
|
|
1000
|
+
project_name = project_path.name
|
|
1001
|
+
|
|
935
1002
|
# Ensure collection exists
|
|
936
1003
|
try:
|
|
937
1004
|
collection_info = await qdrant_client.get_collection(collection_name)
|
|
@@ -949,7 +1016,7 @@ async def store_reflection(
|
|
|
949
1016
|
# Generate embedding for the reflection
|
|
950
1017
|
embedding = await generate_embedding(content)
|
|
951
1018
|
|
|
952
|
-
# Create point with metadata
|
|
1019
|
+
# Create point with metadata including project context
|
|
953
1020
|
point_id = datetime.now().timestamp()
|
|
954
1021
|
point = PointStruct(
|
|
955
1022
|
id=int(point_id),
|
|
@@ -959,7 +1026,9 @@ async def store_reflection(
|
|
|
959
1026
|
"tags": tags,
|
|
960
1027
|
"timestamp": datetime.now().isoformat(),
|
|
961
1028
|
"type": "reflection",
|
|
962
|
-
"role": "user_reflection"
|
|
1029
|
+
"role": "user_reflection",
|
|
1030
|
+
"project": project_name, # Add project context
|
|
1031
|
+
"project_path": str(project_path) # Add full path for reference
|
|
963
1032
|
}
|
|
964
1033
|
)
|
|
965
1034
|
|
package/package.json
CHANGED
|
@@ -1,374 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Streaming importer with true line-by-line processing to prevent OOM.
|
|
4
|
-
Processes JSONL files without loading entire file into memory.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import json
|
|
8
|
-
import os
|
|
9
|
-
import sys
|
|
10
|
-
import hashlib
|
|
11
|
-
import gc
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
from datetime import datetime
|
|
14
|
-
from typing import List, Dict, Any, Optional
|
|
15
|
-
import logging
|
|
16
|
-
|
|
17
|
-
# Add the project root to the Python path
|
|
18
|
-
project_root = Path(__file__).parent.parent
|
|
19
|
-
sys.path.insert(0, str(project_root))
|
|
20
|
-
|
|
21
|
-
from qdrant_client import QdrantClient
|
|
22
|
-
from qdrant_client.models import PointStruct, Distance, VectorParams
|
|
23
|
-
|
|
24
|
-
# Set up logging
|
|
25
|
-
logging.basicConfig(
|
|
26
|
-
level=logging.INFO,
|
|
27
|
-
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
28
|
-
)
|
|
29
|
-
logger = logging.getLogger(__name__)
|
|
30
|
-
|
|
31
|
-
# Environment variables
|
|
32
|
-
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
|
33
|
-
STATE_FILE = os.getenv("STATE_FILE", "/config/imported-files.json")
|
|
34
|
-
PREFER_LOCAL_EMBEDDINGS = os.getenv("PREFER_LOCAL_EMBEDDINGS", "true").lower() == "true"
|
|
35
|
-
VOYAGE_API_KEY = os.getenv("VOYAGE_KEY")
|
|
36
|
-
MAX_CHUNK_SIZE = int(os.getenv("MAX_CHUNK_SIZE", "50")) # Messages per chunk
|
|
37
|
-
|
|
38
|
-
# Initialize Qdrant client
|
|
39
|
-
client = QdrantClient(url=QDRANT_URL)
|
|
40
|
-
|
|
41
|
-
# Initialize embedding provider
|
|
42
|
-
embedding_provider = None
|
|
43
|
-
embedding_dimension = None
|
|
44
|
-
|
|
45
|
-
if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
|
|
46
|
-
logger.info("Using local embeddings (fastembed)")
|
|
47
|
-
from fastembed import TextEmbedding
|
|
48
|
-
embedding_provider = TextEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
49
|
-
embedding_dimension = 384
|
|
50
|
-
collection_suffix = "local"
|
|
51
|
-
else:
|
|
52
|
-
logger.info("Using Voyage AI embeddings")
|
|
53
|
-
import voyageai
|
|
54
|
-
embedding_provider = voyageai.Client(api_key=VOYAGE_API_KEY)
|
|
55
|
-
embedding_dimension = 1024
|
|
56
|
-
collection_suffix = "voyage"
|
|
57
|
-
|
|
58
|
-
def normalize_project_name(project_name: str) -> str:
|
|
59
|
-
"""Normalize project name for consistency."""
|
|
60
|
-
return project_name.replace("-Users-ramakrishnanannaswamy-projects-", "").replace("-", "_").lower()
|
|
61
|
-
|
|
62
|
-
def get_collection_name(project_path: Path) -> str:
|
|
63
|
-
"""Generate collection name from project path."""
|
|
64
|
-
normalized = normalize_project_name(project_path.name)
|
|
65
|
-
name_hash = hashlib.md5(normalized.encode()).hexdigest()[:8]
|
|
66
|
-
return f"conv_{name_hash}_{collection_suffix}"
|
|
67
|
-
|
|
68
|
-
def ensure_collection(collection_name: str):
|
|
69
|
-
"""Ensure collection exists with correct configuration."""
|
|
70
|
-
collections = client.get_collections().collections
|
|
71
|
-
if not any(c.name == collection_name for c in collections):
|
|
72
|
-
logger.info(f"Creating collection: {collection_name}")
|
|
73
|
-
client.create_collection(
|
|
74
|
-
collection_name=collection_name,
|
|
75
|
-
vectors_config=VectorParams(size=embedding_dimension, distance=Distance.COSINE)
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
def generate_embeddings(texts: List[str]) -> List[List[float]]:
|
|
79
|
-
"""Generate embeddings for texts."""
|
|
80
|
-
if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
|
|
81
|
-
embeddings = list(embedding_provider.passage_embed(texts))
|
|
82
|
-
return [emb.tolist() if hasattr(emb, 'tolist') else emb for emb in embeddings]
|
|
83
|
-
else:
|
|
84
|
-
response = embedding_provider.embed(texts, model="voyage-3")
|
|
85
|
-
return response.embeddings
|
|
86
|
-
|
|
87
|
-
def process_and_upload_chunk(messages: List[Dict[str, Any]], chunk_index: int,
|
|
88
|
-
conversation_id: str, created_at: str,
|
|
89
|
-
metadata: Dict[str, Any], collection_name: str,
|
|
90
|
-
project_path: Path) -> int:
|
|
91
|
-
"""Process and immediately upload a single chunk."""
|
|
92
|
-
if not messages:
|
|
93
|
-
return 0
|
|
94
|
-
|
|
95
|
-
# Extract text content
|
|
96
|
-
texts = []
|
|
97
|
-
for msg in messages:
|
|
98
|
-
role = msg.get("role", "unknown")
|
|
99
|
-
content = msg.get("content", "")
|
|
100
|
-
if content:
|
|
101
|
-
texts.append(f"{role.upper()}: {content}")
|
|
102
|
-
|
|
103
|
-
if not texts:
|
|
104
|
-
return 0
|
|
105
|
-
|
|
106
|
-
chunk_text = "\n".join(texts)
|
|
107
|
-
|
|
108
|
-
try:
|
|
109
|
-
# Generate embedding
|
|
110
|
-
embeddings = generate_embeddings([chunk_text])
|
|
111
|
-
|
|
112
|
-
# Create point ID
|
|
113
|
-
point_id = hashlib.md5(
|
|
114
|
-
f"{conversation_id}_{chunk_index}".encode()
|
|
115
|
-
).hexdigest()[:16]
|
|
116
|
-
|
|
117
|
-
# Create payload
|
|
118
|
-
payload = {
|
|
119
|
-
"text": chunk_text,
|
|
120
|
-
"conversation_id": conversation_id,
|
|
121
|
-
"chunk_index": chunk_index,
|
|
122
|
-
"timestamp": created_at,
|
|
123
|
-
"project": normalize_project_name(project_path.name),
|
|
124
|
-
"start_role": messages[0].get("role", "unknown") if messages else "unknown",
|
|
125
|
-
"message_count": len(messages)
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
# Add metadata
|
|
129
|
-
if metadata:
|
|
130
|
-
payload.update(metadata)
|
|
131
|
-
|
|
132
|
-
# Create point
|
|
133
|
-
point = PointStruct(
|
|
134
|
-
id=int(point_id, 16) % (2**63),
|
|
135
|
-
vector=embeddings[0],
|
|
136
|
-
payload=payload
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
# Upload immediately
|
|
140
|
-
client.upsert(
|
|
141
|
-
collection_name=collection_name,
|
|
142
|
-
points=[point],
|
|
143
|
-
wait=True
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
return 1
|
|
147
|
-
|
|
148
|
-
except Exception as e:
|
|
149
|
-
logger.error(f"Error processing chunk {chunk_index}: {e}")
|
|
150
|
-
return 0
|
|
151
|
-
|
|
152
|
-
def extract_metadata_single_pass(file_path: str) -> tuple[Dict[str, Any], str]:
|
|
153
|
-
"""Extract metadata in a single pass, return metadata and first timestamp."""
|
|
154
|
-
metadata = {
|
|
155
|
-
"files_analyzed": [],
|
|
156
|
-
"files_edited": [],
|
|
157
|
-
"tools_used": [],
|
|
158
|
-
"concepts": []
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
first_timestamp = None
|
|
162
|
-
|
|
163
|
-
try:
|
|
164
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
165
|
-
for line in f:
|
|
166
|
-
if not line.strip():
|
|
167
|
-
continue
|
|
168
|
-
|
|
169
|
-
try:
|
|
170
|
-
data = json.loads(line)
|
|
171
|
-
|
|
172
|
-
# Get timestamp from first valid entry
|
|
173
|
-
if first_timestamp is None and 'timestamp' in data:
|
|
174
|
-
first_timestamp = data.get('timestamp')
|
|
175
|
-
|
|
176
|
-
# Extract tool usage from messages
|
|
177
|
-
if 'message' in data and data['message']:
|
|
178
|
-
msg = data['message']
|
|
179
|
-
if msg.get('content'):
|
|
180
|
-
content = msg['content']
|
|
181
|
-
if isinstance(content, list):
|
|
182
|
-
for item in content:
|
|
183
|
-
if isinstance(item, dict) and item.get('type') == 'tool_use':
|
|
184
|
-
tool_name = item.get('name', '')
|
|
185
|
-
if tool_name and tool_name not in metadata['tools_used']:
|
|
186
|
-
metadata['tools_used'].append(tool_name)
|
|
187
|
-
|
|
188
|
-
# Extract file references
|
|
189
|
-
if 'input' in item:
|
|
190
|
-
input_data = item['input']
|
|
191
|
-
if isinstance(input_data, dict):
|
|
192
|
-
if 'file_path' in input_data:
|
|
193
|
-
file_ref = input_data['file_path']
|
|
194
|
-
if file_ref not in metadata['files_analyzed']:
|
|
195
|
-
metadata['files_analyzed'].append(file_ref)
|
|
196
|
-
if 'path' in input_data:
|
|
197
|
-
file_ref = input_data['path']
|
|
198
|
-
if file_ref not in metadata['files_analyzed']:
|
|
199
|
-
metadata['files_analyzed'].append(file_ref)
|
|
200
|
-
|
|
201
|
-
except json.JSONDecodeError:
|
|
202
|
-
continue
|
|
203
|
-
except Exception:
|
|
204
|
-
continue
|
|
205
|
-
|
|
206
|
-
except Exception as e:
|
|
207
|
-
logger.warning(f"Error extracting metadata: {e}")
|
|
208
|
-
|
|
209
|
-
return metadata, first_timestamp or datetime.now().isoformat()
|
|
210
|
-
|
|
211
|
-
def stream_import_file(jsonl_file: Path, collection_name: str, project_path: Path) -> int:
|
|
212
|
-
"""Stream import a single JSONL file without loading it into memory."""
|
|
213
|
-
logger.info(f"Streaming import of {jsonl_file.name}")
|
|
214
|
-
|
|
215
|
-
# Extract metadata in first pass (lightweight)
|
|
216
|
-
metadata, created_at = extract_metadata_single_pass(str(jsonl_file))
|
|
217
|
-
|
|
218
|
-
# Stream messages and process in chunks
|
|
219
|
-
chunk_buffer = []
|
|
220
|
-
chunk_index = 0
|
|
221
|
-
total_chunks = 0
|
|
222
|
-
conversation_id = jsonl_file.stem
|
|
223
|
-
|
|
224
|
-
try:
|
|
225
|
-
with open(jsonl_file, 'r', encoding='utf-8') as f:
|
|
226
|
-
for line_num, line in enumerate(f, 1):
|
|
227
|
-
line = line.strip()
|
|
228
|
-
if not line:
|
|
229
|
-
continue
|
|
230
|
-
|
|
231
|
-
try:
|
|
232
|
-
data = json.loads(line)
|
|
233
|
-
|
|
234
|
-
# Skip non-message lines
|
|
235
|
-
if data.get('type') == 'summary':
|
|
236
|
-
continue
|
|
237
|
-
|
|
238
|
-
# Extract message if present
|
|
239
|
-
if 'message' in data and data['message']:
|
|
240
|
-
msg = data['message']
|
|
241
|
-
if msg.get('role') and msg.get('content'):
|
|
242
|
-
# Extract content
|
|
243
|
-
content = msg['content']
|
|
244
|
-
if isinstance(content, list):
|
|
245
|
-
text_parts = []
|
|
246
|
-
for item in content:
|
|
247
|
-
if isinstance(item, dict) and item.get('type') == 'text':
|
|
248
|
-
text_parts.append(item.get('text', ''))
|
|
249
|
-
elif isinstance(item, str):
|
|
250
|
-
text_parts.append(item)
|
|
251
|
-
content = '\n'.join(text_parts)
|
|
252
|
-
|
|
253
|
-
if content:
|
|
254
|
-
chunk_buffer.append({
|
|
255
|
-
'role': msg['role'],
|
|
256
|
-
'content': content
|
|
257
|
-
})
|
|
258
|
-
|
|
259
|
-
# Process chunk when buffer reaches MAX_CHUNK_SIZE
|
|
260
|
-
if len(chunk_buffer) >= MAX_CHUNK_SIZE:
|
|
261
|
-
chunks = process_and_upload_chunk(
|
|
262
|
-
chunk_buffer, chunk_index, conversation_id,
|
|
263
|
-
created_at, metadata, collection_name, project_path
|
|
264
|
-
)
|
|
265
|
-
total_chunks += chunks
|
|
266
|
-
chunk_buffer = []
|
|
267
|
-
chunk_index += 1
|
|
268
|
-
|
|
269
|
-
# Force garbage collection after each chunk
|
|
270
|
-
gc.collect()
|
|
271
|
-
|
|
272
|
-
# Log progress
|
|
273
|
-
if chunk_index % 10 == 0:
|
|
274
|
-
logger.info(f"Processed {chunk_index} chunks from {jsonl_file.name}")
|
|
275
|
-
|
|
276
|
-
except json.JSONDecodeError:
|
|
277
|
-
logger.debug(f"Skipping invalid JSON at line {line_num}")
|
|
278
|
-
except Exception as e:
|
|
279
|
-
logger.debug(f"Error processing line {line_num}: {e}")
|
|
280
|
-
|
|
281
|
-
# Process remaining messages
|
|
282
|
-
if chunk_buffer:
|
|
283
|
-
chunks = process_and_upload_chunk(
|
|
284
|
-
chunk_buffer, chunk_index, conversation_id,
|
|
285
|
-
created_at, metadata, collection_name, project_path
|
|
286
|
-
)
|
|
287
|
-
total_chunks += chunks
|
|
288
|
-
|
|
289
|
-
logger.info(f"Imported {total_chunks} chunks from {jsonl_file.name}")
|
|
290
|
-
return total_chunks
|
|
291
|
-
|
|
292
|
-
except Exception as e:
|
|
293
|
-
logger.error(f"Failed to import {jsonl_file}: {e}")
|
|
294
|
-
return 0
|
|
295
|
-
|
|
296
|
-
def load_state() -> dict:
|
|
297
|
-
"""Load import state."""
|
|
298
|
-
if os.path.exists(STATE_FILE):
|
|
299
|
-
try:
|
|
300
|
-
with open(STATE_FILE, 'r') as f:
|
|
301
|
-
return json.load(f)
|
|
302
|
-
except:
|
|
303
|
-
pass
|
|
304
|
-
return {"imported_files": {}}
|
|
305
|
-
|
|
306
|
-
def save_state(state: dict):
|
|
307
|
-
"""Save import state."""
|
|
308
|
-
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
|
|
309
|
-
with open(STATE_FILE, 'w') as f:
|
|
310
|
-
json.dump(state, f, indent=2)
|
|
311
|
-
|
|
312
|
-
def should_import_file(file_path: Path, state: dict) -> bool:
|
|
313
|
-
"""Check if file should be imported."""
|
|
314
|
-
file_str = str(file_path)
|
|
315
|
-
if file_str in state.get("imported_files", {}):
|
|
316
|
-
file_info = state["imported_files"][file_str]
|
|
317
|
-
last_modified = file_path.stat().st_mtime
|
|
318
|
-
if file_info.get("last_modified") == last_modified:
|
|
319
|
-
logger.info(f"Skipping unchanged file: {file_path.name}")
|
|
320
|
-
return False
|
|
321
|
-
return True
|
|
322
|
-
|
|
323
|
-
def update_file_state(file_path: Path, state: dict, chunks: int):
|
|
324
|
-
"""Update state for imported file."""
|
|
325
|
-
file_str = str(file_path)
|
|
326
|
-
state["imported_files"][file_str] = {
|
|
327
|
-
"imported_at": datetime.now().isoformat(),
|
|
328
|
-
"last_modified": file_path.stat().st_mtime,
|
|
329
|
-
"chunks": chunks
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
def main():
|
|
333
|
-
"""Main import function."""
|
|
334
|
-
# Load state
|
|
335
|
-
state = load_state()
|
|
336
|
-
logger.info(f"Loaded state with {len(state.get('imported_files', {}))} previously imported files")
|
|
337
|
-
|
|
338
|
-
# Find all projects
|
|
339
|
-
logs_dir = Path(os.getenv("LOGS_DIR", "/logs"))
|
|
340
|
-
project_dirs = [d for d in logs_dir.iterdir() if d.is_dir()]
|
|
341
|
-
logger.info(f"Found {len(project_dirs)} projects to import")
|
|
342
|
-
|
|
343
|
-
total_imported = 0
|
|
344
|
-
|
|
345
|
-
for project_dir in project_dirs:
|
|
346
|
-
# Get collection name
|
|
347
|
-
collection_name = get_collection_name(project_dir)
|
|
348
|
-
logger.info(f"Importing project: {project_dir.name} -> {collection_name}")
|
|
349
|
-
|
|
350
|
-
# Ensure collection exists
|
|
351
|
-
ensure_collection(collection_name)
|
|
352
|
-
|
|
353
|
-
# Find JSONL files
|
|
354
|
-
jsonl_files = sorted(project_dir.glob("*.jsonl"))
|
|
355
|
-
|
|
356
|
-
# Limit files per cycle if specified
|
|
357
|
-
max_files = int(os.getenv("MAX_FILES_PER_CYCLE", "1000"))
|
|
358
|
-
jsonl_files = jsonl_files[:max_files]
|
|
359
|
-
|
|
360
|
-
for jsonl_file in jsonl_files:
|
|
361
|
-
if should_import_file(jsonl_file, state):
|
|
362
|
-
chunks = stream_import_file(jsonl_file, collection_name, project_dir)
|
|
363
|
-
if chunks > 0:
|
|
364
|
-
update_file_state(jsonl_file, state, chunks)
|
|
365
|
-
save_state(state)
|
|
366
|
-
total_imported += 1
|
|
367
|
-
|
|
368
|
-
# Force GC after each file
|
|
369
|
-
gc.collect()
|
|
370
|
-
|
|
371
|
-
logger.info(f"Import complete: processed {total_imported} files")
|
|
372
|
-
|
|
373
|
-
if __name__ == "__main__":
|
|
374
|
-
main()
|
package/scripts/import-latest.py
DELETED
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Quick import script for current project's latest conversations.
|
|
4
|
-
Designed for PreCompact hook integration - targets <10 second imports.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import os
|
|
8
|
-
import sys
|
|
9
|
-
import json
|
|
10
|
-
import subprocess
|
|
11
|
-
from datetime import datetime, timedelta
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
import logging
|
|
14
|
-
|
|
15
|
-
# Configuration
|
|
16
|
-
LOGS_DIR = os.getenv("LOGS_DIR", os.path.expanduser("~/.claude/projects"))
|
|
17
|
-
STATE_FILE = os.getenv("STATE_FILE", os.path.expanduser("~/.claude-self-reflect-state.json"))
|
|
18
|
-
HOURS_BACK = int(os.getenv("IMPORT_HOURS_BACK", "2")) # Only import last 2 hours by default
|
|
19
|
-
|
|
20
|
-
# Set up logging
|
|
21
|
-
logging.basicConfig(
|
|
22
|
-
level=logging.INFO,
|
|
23
|
-
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
24
|
-
)
|
|
25
|
-
logger = logging.getLogger(__name__)
|
|
26
|
-
|
|
27
|
-
def load_state():
|
|
28
|
-
"""Load import state from file."""
|
|
29
|
-
if os.path.exists(STATE_FILE):
|
|
30
|
-
try:
|
|
31
|
-
with open(STATE_FILE, 'r') as f:
|
|
32
|
-
return json.load(f)
|
|
33
|
-
except:
|
|
34
|
-
return {}
|
|
35
|
-
return {}
|
|
36
|
-
|
|
37
|
-
def save_state(state):
|
|
38
|
-
"""Save import state to file."""
|
|
39
|
-
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
|
|
40
|
-
with open(STATE_FILE, 'w') as f:
|
|
41
|
-
json.dump(state, f, indent=2)
|
|
42
|
-
|
|
43
|
-
def get_project_from_cwd():
|
|
44
|
-
"""Detect project from current working directory."""
|
|
45
|
-
cwd = os.getcwd()
|
|
46
|
-
# Convert path to project name format used in logs
|
|
47
|
-
# Claude logs use format: -Users-username-path-to-project
|
|
48
|
-
project_name = cwd.replace('/', '-')
|
|
49
|
-
# Keep the leading dash as that's how Claude stores it
|
|
50
|
-
if not project_name.startswith('-'):
|
|
51
|
-
project_name = '-' + project_name
|
|
52
|
-
return project_name
|
|
53
|
-
|
|
54
|
-
def get_recent_files(project_path: Path, hours_back: int):
|
|
55
|
-
"""Get JSONL files modified in the last N hours."""
|
|
56
|
-
cutoff_time = datetime.now() - timedelta(hours=hours_back)
|
|
57
|
-
recent_files = []
|
|
58
|
-
|
|
59
|
-
for jsonl_file in project_path.glob("*.jsonl"):
|
|
60
|
-
mtime = datetime.fromtimestamp(jsonl_file.stat().st_mtime)
|
|
61
|
-
if mtime > cutoff_time:
|
|
62
|
-
recent_files.append(jsonl_file)
|
|
63
|
-
|
|
64
|
-
return sorted(recent_files, key=lambda f: f.stat().st_mtime, reverse=True)
|
|
65
|
-
|
|
66
|
-
def main():
|
|
67
|
-
"""Main quick import function."""
|
|
68
|
-
start_time = datetime.now()
|
|
69
|
-
|
|
70
|
-
# Detect current project
|
|
71
|
-
project_name = get_project_from_cwd()
|
|
72
|
-
project_path = Path(LOGS_DIR) / project_name
|
|
73
|
-
|
|
74
|
-
if not project_path.exists():
|
|
75
|
-
logger.warning(f"Project logs not found: {project_path}")
|
|
76
|
-
logger.info("Make sure you're in a project directory with Claude conversations.")
|
|
77
|
-
return
|
|
78
|
-
|
|
79
|
-
logger.info(f"Quick importing latest conversations for: {project_name}")
|
|
80
|
-
|
|
81
|
-
# Get recent files
|
|
82
|
-
recent_files = get_recent_files(project_path, HOURS_BACK)
|
|
83
|
-
logger.info(f"Found {len(recent_files)} files modified in last {HOURS_BACK} hours")
|
|
84
|
-
|
|
85
|
-
if not recent_files:
|
|
86
|
-
logger.info("No recent conversations to import")
|
|
87
|
-
return
|
|
88
|
-
|
|
89
|
-
# For now, just call the unified importer with the specific project
|
|
90
|
-
# This is a temporary solution until we implement incremental imports
|
|
91
|
-
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
92
|
-
unified_script = os.path.join(script_dir, "import-conversations-unified.py")
|
|
93
|
-
|
|
94
|
-
# Set environment to only process this project
|
|
95
|
-
env = os.environ.copy()
|
|
96
|
-
env['LOGS_DIR'] = str(project_path.parent)
|
|
97
|
-
env['IMPORT_PROJECT'] = project_name
|
|
98
|
-
|
|
99
|
-
try:
|
|
100
|
-
# Run the unified importer for just this project
|
|
101
|
-
result = subprocess.run(
|
|
102
|
-
[sys.executable, unified_script],
|
|
103
|
-
env=env,
|
|
104
|
-
capture_output=True,
|
|
105
|
-
text=True,
|
|
106
|
-
timeout=60 # 60 second timeout
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
if result.returncode == 0:
|
|
110
|
-
logger.info("Quick import completed successfully")
|
|
111
|
-
else:
|
|
112
|
-
logger.error(f"Import failed: {result.stderr}")
|
|
113
|
-
|
|
114
|
-
except subprocess.TimeoutExpired:
|
|
115
|
-
logger.warning("Import timed out after 60 seconds")
|
|
116
|
-
except Exception as e:
|
|
117
|
-
logger.error(f"Error during import: {e}")
|
|
118
|
-
|
|
119
|
-
# Report timing
|
|
120
|
-
elapsed = (datetime.now() - start_time).total_seconds()
|
|
121
|
-
logger.info(f"Quick import completed in {elapsed:.1f} seconds")
|
|
122
|
-
|
|
123
|
-
if __name__ == "__main__":
|
|
124
|
-
main()
|