claude-self-reflect 7.1.10 → 7.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -78
- package/docs/design/GRADER_PROMPT.md +81 -0
- package/docs/design/batch_ground_truth_generator.py +496 -0
- package/docs/design/batch_import_all_projects.py +477 -0
- package/docs/design/batch_import_v3.py +278 -0
- package/docs/design/conversation-analyzer/SKILL.md +133 -0
- package/docs/design/conversation-analyzer/SKILL_V2.md +218 -0
- package/docs/design/conversation-analyzer/extract_structured.py +186 -0
- package/docs/design/extract_events_v3.py +533 -0
- package/docs/design/import_existing_batch.py +188 -0
- package/docs/design/recover_all_batches.py +297 -0
- package/docs/design/recover_batch_results.py +287 -0
- package/package.json +4 -1
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Recover batch results and complete the Qdrant import.
|
|
4
|
+
|
|
5
|
+
Retrieves narratives from completed batches and imports to Qdrant.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
import time
|
|
14
|
+
|
|
15
|
+
load_dotenv()
|
|
16
|
+
|
|
17
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
18
|
+
|
|
19
|
+
import anthropic
|
|
20
|
+
from qdrant_client import QdrantClient
|
|
21
|
+
from qdrant_client.models import PointStruct
|
|
22
|
+
|
|
23
|
+
# Import FastEmbed
|
|
24
|
+
try:
|
|
25
|
+
from fastembed import TextEmbedding
|
|
26
|
+
FASTEMBED_AVAILABLE = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
FASTEMBED_AVAILABLE = False
|
|
29
|
+
print("⚠️ FastEmbed not available")
|
|
30
|
+
sys.exit(1)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Batch IDs from dashboard (in chronological order)
|
|
34
|
+
COMPLETED_BATCHES = [
|
|
35
|
+
{
|
|
36
|
+
'batch_id': 'msgbatch_01ATPhpjCw1gqPisHUgoPnab',
|
|
37
|
+
'project': 'address-book-fix',
|
|
38
|
+
'count': 1
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
'batch_id': 'msgbatch_016g8zHtH7or7DtJu3ZzAczS',
|
|
42
|
+
'project': 'anukruti',
|
|
43
|
+
'count': 5
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
'batch_id': 'msgbatch_01QCwhFw9DYDJ8uPjYsHg8Xu',
|
|
47
|
+
'project': 'buyindian',
|
|
48
|
+
'count': 5
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
'batch_id': 'msgbatch_01WVbb5X2xYwuzzgEdqVicZJ',
|
|
52
|
+
'project': 'procsolve-website', # or cc-enhance
|
|
53
|
+
'count': 2
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
'batch_id': 'msgbatch_01EemyvChmnShYAuJix7m1As',
|
|
57
|
+
'project': 'claude-self-reflect',
|
|
58
|
+
'count': 36
|
|
59
|
+
}
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_embedding(text: str, embedding_model) -> list:
|
|
64
|
+
"""Generate embedding for text."""
|
|
65
|
+
embeddings = list(embedding_model.embed([text]))
|
|
66
|
+
return embeddings[0].tolist()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def retrieve_batch_narratives(client: anthropic.Anthropic, batch_id: str):
|
|
70
|
+
"""Retrieve narratives from a completed batch."""
|
|
71
|
+
|
|
72
|
+
print(f"\n🔄 Retrieving batch {batch_id}...")
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
# Get batch results
|
|
76
|
+
results_response = client.messages.batches.results(batch_id)
|
|
77
|
+
|
|
78
|
+
narratives = {}
|
|
79
|
+
total_cost = 0.0
|
|
80
|
+
total_input = 0
|
|
81
|
+
total_output = 0
|
|
82
|
+
|
|
83
|
+
for result_item in results_response:
|
|
84
|
+
conv_id = result_item.custom_id
|
|
85
|
+
|
|
86
|
+
if result_item.result.type == "succeeded":
|
|
87
|
+
message = result_item.result.message
|
|
88
|
+
|
|
89
|
+
# Extract narrative
|
|
90
|
+
narrative = ""
|
|
91
|
+
for block in message.content:
|
|
92
|
+
if hasattr(block, 'text'):
|
|
93
|
+
narrative += block.text
|
|
94
|
+
|
|
95
|
+
narratives[conv_id] = narrative
|
|
96
|
+
|
|
97
|
+
# Track usage
|
|
98
|
+
input_tokens = message.usage.input_tokens
|
|
99
|
+
output_tokens = message.usage.output_tokens
|
|
100
|
+
cost = (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
|
101
|
+
|
|
102
|
+
total_input += input_tokens
|
|
103
|
+
total_output += output_tokens
|
|
104
|
+
total_cost += cost
|
|
105
|
+
else:
|
|
106
|
+
print(f" ❌ Error for {conv_id}: {result_item.result.error}")
|
|
107
|
+
|
|
108
|
+
print(f" ✅ Retrieved {len(narratives)} narratives")
|
|
109
|
+
print(f" 📊 Tokens: {total_input} input, {total_output} output")
|
|
110
|
+
print(f" 💰 Cost: ${total_cost:.4f}")
|
|
111
|
+
|
|
112
|
+
return narratives, total_cost
|
|
113
|
+
|
|
114
|
+
except Exception as e:
|
|
115
|
+
print(f" ❌ Failed to retrieve batch: {e}")
|
|
116
|
+
return {}, 0.0
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def load_conversation_data(project_dir: Path):
|
|
120
|
+
"""Load V3 extraction results and metadata for a project."""
|
|
121
|
+
|
|
122
|
+
conversations = {}
|
|
123
|
+
|
|
124
|
+
# Import metadata extraction functions
|
|
125
|
+
import importlib.util
|
|
126
|
+
delta_metadata_path = Path(__file__).parent.parent.parent / "src" / "runtime" / "delta-metadata-update.py"
|
|
127
|
+
spec = importlib.util.spec_from_file_location("delta_metadata_update", delta_metadata_path)
|
|
128
|
+
delta_metadata_update = importlib.util.module_from_spec(spec)
|
|
129
|
+
spec.loader.exec_module(delta_metadata_update)
|
|
130
|
+
extract_tool_usage_from_jsonl = delta_metadata_update.extract_tool_usage_from_jsonl
|
|
131
|
+
extract_concepts = delta_metadata_update.extract_concepts
|
|
132
|
+
|
|
133
|
+
from docs.design.extract_events_v3 import extract_events_v3
|
|
134
|
+
|
|
135
|
+
for jsonl_file in project_dir.glob("*.jsonl"):
|
|
136
|
+
conv_id = jsonl_file.stem
|
|
137
|
+
|
|
138
|
+
# Extract metadata
|
|
139
|
+
tool_usage = extract_tool_usage_from_jsonl(str(jsonl_file))
|
|
140
|
+
|
|
141
|
+
# Read messages for V3 extraction
|
|
142
|
+
messages = []
|
|
143
|
+
conversation_text = ""
|
|
144
|
+
with open(jsonl_file) as f:
|
|
145
|
+
for line in f:
|
|
146
|
+
if line.strip():
|
|
147
|
+
msg = json.loads(line)
|
|
148
|
+
messages.append(msg)
|
|
149
|
+
|
|
150
|
+
if 'message' in msg and msg['message']:
|
|
151
|
+
content = msg['message'].get('content', '')
|
|
152
|
+
if isinstance(content, str):
|
|
153
|
+
conversation_text += content + "\n"
|
|
154
|
+
elif isinstance(content, list):
|
|
155
|
+
for item in content:
|
|
156
|
+
if isinstance(item, dict) and item.get('text'):
|
|
157
|
+
conversation_text += item['text'] + "\n"
|
|
158
|
+
|
|
159
|
+
# Extract concepts
|
|
160
|
+
concepts = extract_concepts(conversation_text[:10000], tool_usage)
|
|
161
|
+
|
|
162
|
+
# Build metadata dict
|
|
163
|
+
metadata = {
|
|
164
|
+
'tool_usage': tool_usage,
|
|
165
|
+
'concepts': concepts
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
# V3 extraction WITH metadata
|
|
169
|
+
result = extract_events_v3(messages, metadata=metadata)
|
|
170
|
+
|
|
171
|
+
conversations[conv_id] = result
|
|
172
|
+
|
|
173
|
+
return conversations
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def main():
|
|
177
|
+
"""Recover and import batch results."""
|
|
178
|
+
|
|
179
|
+
print(f"\n{'='*80}")
|
|
180
|
+
print(f"BATCH RESULTS RECOVERY & QDRANT IMPORT")
|
|
181
|
+
print(f"{'='*80}\n")
|
|
182
|
+
|
|
183
|
+
# Initialize clients
|
|
184
|
+
print("🔧 Initializing clients...")
|
|
185
|
+
anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
|
|
186
|
+
qdrant_client = QdrantClient(url=os.getenv("QDRANT_URL", "http://localhost:6333"))
|
|
187
|
+
embedding_model = TextEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
188
|
+
print(" ✅ Clients initialized")
|
|
189
|
+
|
|
190
|
+
# Collection should already exist
|
|
191
|
+
collection_name = "v3_all_projects"
|
|
192
|
+
|
|
193
|
+
# Retrieve all batch results
|
|
194
|
+
all_narratives = {}
|
|
195
|
+
grand_total_cost = 0.0
|
|
196
|
+
|
|
197
|
+
for batch_info in COMPLETED_BATCHES:
|
|
198
|
+
narratives, cost = retrieve_batch_narratives(
|
|
199
|
+
anthropic_client,
|
|
200
|
+
batch_info['batch_id']
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Tag narratives with project
|
|
204
|
+
for conv_id, narrative in narratives.items():
|
|
205
|
+
all_narratives[conv_id] = {
|
|
206
|
+
'narrative': narrative,
|
|
207
|
+
'project': batch_info['project']
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
grand_total_cost += cost
|
|
211
|
+
|
|
212
|
+
print(f"\n📊 Total narratives retrieved: {len(all_narratives)}")
|
|
213
|
+
print(f"💰 Total cost: ${grand_total_cost:.4f}")
|
|
214
|
+
|
|
215
|
+
# Load conversation data and create points
|
|
216
|
+
print(f"\n🔄 Loading conversation data and creating points...")
|
|
217
|
+
|
|
218
|
+
projects_dir = Path.home() / ".claude/projects"
|
|
219
|
+
all_points = []
|
|
220
|
+
|
|
221
|
+
for batch_info in COMPLETED_BATCHES:
|
|
222
|
+
project_name = batch_info['project']
|
|
223
|
+
|
|
224
|
+
# Find project directory
|
|
225
|
+
project_dirs = list(projects_dir.glob(f"*{project_name}"))
|
|
226
|
+
if not project_dirs:
|
|
227
|
+
print(f" ⚠️ Project directory not found for {project_name}")
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
project_dir = project_dirs[0]
|
|
231
|
+
print(f"\n Processing {project_name}...")
|
|
232
|
+
|
|
233
|
+
# Load conversation data
|
|
234
|
+
conversations = load_conversation_data(project_dir)
|
|
235
|
+
|
|
236
|
+
# Create points
|
|
237
|
+
for conv_id, result in conversations.items():
|
|
238
|
+
if conv_id not in all_narratives:
|
|
239
|
+
print(f" ⚠️ No narrative for {conv_id}")
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
narrative = all_narratives[conv_id]['narrative']
|
|
243
|
+
|
|
244
|
+
# Generate embedding
|
|
245
|
+
embedding = get_embedding(narrative, embedding_model)
|
|
246
|
+
|
|
247
|
+
# Create point
|
|
248
|
+
point = PointStruct(
|
|
249
|
+
id=conv_id,
|
|
250
|
+
vector=embedding,
|
|
251
|
+
payload={
|
|
252
|
+
"conversation_id": conv_id,
|
|
253
|
+
"project": project_name,
|
|
254
|
+
"narrative": narrative,
|
|
255
|
+
"search_index": result['search_index'],
|
|
256
|
+
"context_cache": result['context_cache'],
|
|
257
|
+
"signature": result['signature'],
|
|
258
|
+
"timestamp": time.time(),
|
|
259
|
+
"extraction_stats": result['stats']
|
|
260
|
+
}
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
all_points.append(point)
|
|
264
|
+
print(f" ✅ {conv_id}")
|
|
265
|
+
|
|
266
|
+
# Import to Qdrant
|
|
267
|
+
print(f"\n🔄 Importing {len(all_points)} points to Qdrant...")
|
|
268
|
+
|
|
269
|
+
batch_size = 100
|
|
270
|
+
for i in range(0, len(all_points), batch_size):
|
|
271
|
+
batch = all_points[i:i+batch_size]
|
|
272
|
+
qdrant_client.upsert(
|
|
273
|
+
collection_name=collection_name,
|
|
274
|
+
points=batch
|
|
275
|
+
)
|
|
276
|
+
print(f" ✅ Imported batch {i//batch_size + 1}: {len(batch)} points")
|
|
277
|
+
|
|
278
|
+
# Verify
|
|
279
|
+
collection_info = qdrant_client.get_collection(collection_name)
|
|
280
|
+
print(f"\n✅ RECOVERY COMPLETE!")
|
|
281
|
+
print(f" Collection: {collection_name}")
|
|
282
|
+
print(f" Total points: {collection_info.points_count}")
|
|
283
|
+
print(f" Total cost: ${grand_total_cost:.4f}")
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
if __name__ == "__main__":
|
|
287
|
+
main()
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-self-reflect",
|
|
3
|
-
"version": "7.1.
|
|
3
|
+
"version": "7.1.11",
|
|
4
4
|
"description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"claude",
|
|
@@ -53,6 +53,9 @@
|
|
|
53
53
|
"config/qdrant-config.yaml",
|
|
54
54
|
"docker-compose.yaml",
|
|
55
55
|
"Dockerfile.*",
|
|
56
|
+
"docs/design/**/*.py",
|
|
57
|
+
"docs/design/**/*.md",
|
|
58
|
+
"docs/design/conversation-analyzer/**",
|
|
56
59
|
".env.example",
|
|
57
60
|
"README.md",
|
|
58
61
|
"LICENSE"
|