claude-self-reflect 7.1.9 → 7.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,188 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Import existing batch results to Qdrant.
4
+ Batch ID: msgbatch_01QGo1y5maCUgqR7WWE1z2aT (27 conversations)
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import json
10
+ import re
11
+ from pathlib import Path
12
+ from dotenv import load_dotenv
13
+ from datetime import datetime
14
+
15
+ load_dotenv()
16
+
17
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
18
+
19
+ import anthropic
20
+ from qdrant_client import QdrantClient
21
+ from qdrant_client.models import PointStruct
22
+
23
+ # Import FastEmbed
24
+ from fastembed import TextEmbedding
25
+
26
+ def get_embedding(text: str, embedding_model) -> list:
27
+ """Generate embedding for text."""
28
+ embeddings = list(embedding_model.embed([text]))
29
+ return embeddings[0].tolist()
30
+
31
+ def fix_json_response(content: str) -> str:
32
+ """Fix Claude's backtick-based JSON responses."""
33
+ # Try to extract JSON from markdown code fence if present
34
+ if '```json' in content:
35
+ json_start = content.find('```json') + 7
36
+ json_end = content.find('```', json_start)
37
+ content = content[json_start:json_end].strip()
38
+ elif '```' in content:
39
+ json_start = content.find('```') + 3
40
+ json_end = content.find('```', json_start)
41
+ content = content[json_start:json_end].strip()
42
+
43
+ # Fix invalid JSON: replace backticks with escaped quotes for field values
44
+ # Pattern: "field": `value` -> "field": "value with escaped newlines"
45
+ content = re.sub(
46
+ r':\s*`([^`]*)`',
47
+ lambda m: f': "{m.group(1).replace(chr(10), "\\n").replace(chr(13), "").replace('"', '\\"')}"',
48
+ content,
49
+ flags=re.DOTALL
50
+ )
51
+
52
+ return content
53
+
54
+ def main():
55
+ print("=" * 70)
56
+ print("IMPORT EXISTING BATCH RESULTS")
57
+ print("=" * 70)
58
+ print(f"Batch ID: msgbatch_01QGo1y5maCUgqR7WWE1z2aT")
59
+ print(f"Target: v3_all_projects collection")
60
+ print()
61
+
62
+ # Initialize clients
63
+ anthropic_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
64
+ qdrant_client = QdrantClient(url='http://localhost:6333')
65
+ embedding_model = TextEmbedding(model_name='sentence-transformers/all-MiniLM-L6-v2')
66
+
67
+ # Get TIER 1 conversation mapping
68
+ print("šŸ“Š Loading TIER 1 conversation mapping...")
69
+ collections = qdrant_client.get_collections().collections
70
+ conv_cols = [c for c in collections if c.name.startswith('conv_') and c.name.endswith('_local')]
71
+
72
+ conversations = []
73
+ for col in conv_cols:
74
+ base_id = col.name[5:-6]
75
+ results = qdrant_client.scroll(
76
+ collection_name=col.name,
77
+ limit=1,
78
+ with_payload=True
79
+ )
80
+ if results[0]:
81
+ first_payload = results[0][0].payload
82
+ conversation_id = first_payload.get('conversation_id', base_id)
83
+ project = first_payload.get('project_name', 'unknown')
84
+ conversations.append({
85
+ 'conversation_id': conversation_id,
86
+ 'project': project,
87
+ 'collection_name': col.name
88
+ })
89
+
90
+ print(f"āœ… Loaded {len(conversations)} conversation mappings")
91
+
92
+ # Retrieve batch results
93
+ print("\nšŸ“„ Retrieving batch results...")
94
+ results = []
95
+ for result in anthropic_client.beta.messages.batches.results('msgbatch_01QGo1y5maCUgqR7WWE1z2aT'):
96
+ if result.result.type == 'succeeded':
97
+ results.append(result)
98
+
99
+ print(f"āœ… Retrieved {len(results)} successful results")
100
+
101
+ # Process results
102
+ print("\nšŸ“¦ Processing narratives...")
103
+ points_to_add = []
104
+ processed_count = 0
105
+ failed_ids = []
106
+
107
+ for result in results:
108
+ custom_id = result.custom_id
109
+ try:
110
+ response_content = result.result.message.content[0].text
111
+ response_content = fix_json_response(response_content)
112
+ narrative_data = json.loads(response_content)
113
+
114
+ # Get original conversation data
115
+ conv_idx = int(custom_id.split('_')[1]) - 1
116
+ conv = conversations[conv_idx]
117
+
118
+ # Create point
119
+ search_text = narrative_data.get('search_index', narrative_data['narrative'][:1000])
120
+ embedding = get_embedding(search_text, embedding_model)
121
+
122
+ payload = {
123
+ 'conversation_id': conv['conversation_id'],
124
+ 'project': conv['project'],
125
+ 'narrative': narrative_data['narrative'],
126
+ 'search_index': narrative_data.get('search_index', ''),
127
+ 'timestamp': datetime.now().timestamp(),
128
+ 'source': 'tier1_migration',
129
+ 'original_collection': conv['collection_name']
130
+ }
131
+
132
+ if 'metadata' in narrative_data:
133
+ metadata = narrative_data['metadata']
134
+ payload['signature'] = {
135
+ 'tools_used': metadata.get('tools_used', []),
136
+ 'concepts': metadata.get('concepts', []),
137
+ 'files_modified': metadata.get('files_modified', []),
138
+ 'completion_status': 'migrated'
139
+ }
140
+
141
+ point = PointStruct(
142
+ id=conv['conversation_id'], # Use UUID directly
143
+ vector=embedding,
144
+ payload=payload
145
+ )
146
+
147
+ points_to_add.append(point)
148
+ processed_count += 1
149
+
150
+ if processed_count % 10 == 0:
151
+ print(f" Processed {processed_count}/{len(results)} narratives...")
152
+
153
+ except Exception as e:
154
+ failed_ids.append((custom_id, str(e)))
155
+ print(f" āš ļø Error processing {custom_id}: {e}")
156
+
157
+ # Add to Qdrant
158
+ if points_to_add:
159
+ print(f"\nšŸ“¤ Adding {len(points_to_add)} points to v3_all_projects...")
160
+ qdrant_client.upsert(
161
+ collection_name='v3_all_projects',
162
+ points=points_to_add
163
+ )
164
+ print(f"āœ… Added {len(points_to_add)} narratives to Qdrant!")
165
+
166
+ # Summary
167
+ print("\n" + "=" * 70)
168
+ print("IMPORT COMPLETE!")
169
+ print("=" * 70)
170
+ print(f"āœ… Successfully processed: {processed_count}/{len(results)}")
171
+ print(f"āŒ Failed: {len(failed_ids)}/{len(results)}")
172
+
173
+ if failed_ids:
174
+ print("\nFailed IDs:")
175
+ for custom_id, error in failed_ids:
176
+ print(f" - {custom_id}: {error[:50]}...")
177
+
178
+ # Check final collection size
179
+ collection_info = qdrant_client.get_collection('v3_all_projects')
180
+ print(f"\nšŸ“Š v3_all_projects now has {collection_info.points_count} narratives")
181
+ print(f" (was 54, added {processed_count}, now {collection_info.points_count})")
182
+ print()
183
+ print("šŸŽÆ Test with MCP tools (no restart needed):")
184
+ print(" csr_reflect_on_past('OpenGraph procsolve website')")
185
+
186
+
187
+ if __name__ == '__main__':
188
+ main()
@@ -0,0 +1,297 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Recover ALL batch results from dashboard and complete Qdrant import.
4
+
5
+ Retrieves narratives from all 8 completed batches shown in dashboard.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import json
11
+ from pathlib import Path
12
+ from dotenv import load_dotenv
13
+ import time
14
+
15
+ load_dotenv()
16
+
17
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
18
+
19
+ import anthropic
20
+ from qdrant_client import QdrantClient
21
+ from qdrant_client.models import PointStruct
22
+
23
+ # Import FastEmbed
24
+ try:
25
+ from fastembed import TextEmbedding
26
+ FASTEMBED_AVAILABLE = True
27
+ except ImportError:
28
+ FASTEMBED_AVAILABLE = False
29
+ print("āš ļø FastEmbed not available")
30
+ sys.exit(1)
31
+
32
+
33
+ # ALL Batch IDs from dashboard (complete list)
34
+ ALL_BATCHES = [
35
+ 'msgbatch_012GH6kVL74ihT3NFFHbrYHZ', # 1 request (mystery - thegatehouse?)
36
+ 'msgbatch_01DMoYp2egP7Wz2Xa8Lv7cNc', # 1 request (address-book-fix run 2)
37
+ 'msgbatch_01Prq1G5CbfjjDdyGezKUGzH', # 5 requests (anukruti run 2)
38
+ 'msgbatch_01ATPhpjCw1gqPisHUgoPnab', # 1 request (address-book-fix)
39
+ 'msgbatch_016g8zHtH7or7DtJu3ZzAczS', # 5 requests (anukruti)
40
+ 'msgbatch_01QCwhFw9DYDJ8uPjYsHg8Xu', # 5 requests (buyindian)
41
+ 'msgbatch_01WVbb5X2xYwuzzgEdqVicZJ', # 2 requests (procsolve-website or cc-enhance)
42
+ 'msgbatch_01EemyvChmnShYAuJix7m1As', # 36 requests (claude-self-reflect)
43
+ ]
44
+
45
+
46
+ def get_embedding(text: str, embedding_model) -> list:
47
+ """Generate embedding for text."""
48
+ embeddings = list(embedding_model.embed([text]))
49
+ return embeddings[0].tolist()
50
+
51
+
52
+ def retrieve_batch_narratives(client: anthropic.Anthropic, batch_id: str):
53
+ """Retrieve narratives from a completed batch."""
54
+
55
+ print(f"\nšŸ”„ Retrieving batch {batch_id}...")
56
+
57
+ try:
58
+ # Get batch results
59
+ results_response = client.messages.batches.results(batch_id)
60
+
61
+ narratives = {}
62
+ total_cost = 0.0
63
+ total_input = 0
64
+ total_output = 0
65
+
66
+ for result_item in results_response:
67
+ conv_id = result_item.custom_id
68
+
69
+ if result_item.result.type == "succeeded":
70
+ message = result_item.result.message
71
+
72
+ # Extract narrative
73
+ narrative = ""
74
+ for block in message.content:
75
+ if hasattr(block, 'text'):
76
+ narrative += block.text
77
+
78
+ narratives[conv_id] = narrative
79
+
80
+ # Track usage
81
+ input_tokens = message.usage.input_tokens
82
+ output_tokens = message.usage.output_tokens
83
+ cost = (input_tokens * 3 + output_tokens * 15) / 1_000_000
84
+
85
+ total_input += input_tokens
86
+ total_output += output_tokens
87
+ total_cost += cost
88
+ else:
89
+ print(f" āŒ Error for {conv_id}: {result_item.result.error}")
90
+
91
+ print(f" āœ… Retrieved {len(narratives)} narratives")
92
+ print(f" šŸ“Š Tokens: {total_input} input, {total_output} output")
93
+ print(f" šŸ’° Cost: ${total_cost:.4f}")
94
+
95
+ return narratives, total_cost
96
+
97
+ except Exception as e:
98
+ print(f" āŒ Failed to retrieve batch: {e}")
99
+ return {}, 0.0
100
+
101
+
102
+ def load_conversation_data(projects_dir: Path):
103
+ """Load V3 extraction results and metadata for ALL projects."""
104
+
105
+ conversations = {}
106
+
107
+ # Import metadata extraction functions
108
+ import importlib.util
109
+ delta_metadata_path = Path(__file__).parent.parent.parent / "src" / "runtime" / "delta-metadata-update.py"
110
+ spec = importlib.util.spec_from_file_location("delta_metadata_update", delta_metadata_path)
111
+ delta_metadata_update = importlib.util.module_from_spec(spec)
112
+ spec.loader.exec_module(delta_metadata_update)
113
+ extract_tool_usage_from_jsonl = delta_metadata_update.extract_tool_usage_from_jsonl
114
+ extract_concepts = delta_metadata_update.extract_concepts
115
+
116
+ from docs.design.extract_events_v3 import extract_events_v3
117
+
118
+ # Scan ALL project directories
119
+ for project_dir in projects_dir.iterdir():
120
+ if not project_dir.is_dir() or project_dir.name.startswith('.'):
121
+ continue
122
+
123
+ jsonl_files = list(project_dir.glob("*.jsonl"))
124
+ if not jsonl_files:
125
+ continue
126
+
127
+ # Extract project name
128
+ parts = project_dir.name.split('-projects-')
129
+ project_name = parts[-1] if len(parts) > 1 else project_dir.name
130
+
131
+ print(f"\nšŸ“‚ Loading {project_name}...")
132
+
133
+ for jsonl_file in jsonl_files:
134
+ conv_id = jsonl_file.stem
135
+
136
+ # Extract metadata FIRST
137
+ tool_usage = extract_tool_usage_from_jsonl(str(jsonl_file))
138
+
139
+ # Read messages for V3 extraction
140
+ messages = []
141
+ conversation_text = ""
142
+ with open(jsonl_file) as f:
143
+ for line in f:
144
+ if line.strip():
145
+ msg = json.loads(line)
146
+ messages.append(msg)
147
+
148
+ if 'message' in msg and msg['message']:
149
+ content = msg['message'].get('content', '')
150
+ if isinstance(content, str):
151
+ conversation_text += content + "\n"
152
+ elif isinstance(content, list):
153
+ for item in content:
154
+ if isinstance(item, dict) and item.get('text'):
155
+ conversation_text += item['text'] + "\n"
156
+
157
+ # Extract concepts
158
+ concepts = extract_concepts(conversation_text[:10000], tool_usage)
159
+
160
+ # Build metadata dict
161
+ metadata = {
162
+ 'tool_usage': tool_usage,
163
+ 'concepts': concepts
164
+ }
165
+
166
+ # V3 extraction WITH metadata
167
+ result = extract_events_v3(messages, metadata=metadata)
168
+
169
+ conversations[conv_id] = {
170
+ 'result': result,
171
+ 'project': project_name
172
+ }
173
+
174
+ print(f" āœ… {conv_id[:8]}... ({project_name})")
175
+
176
+ return conversations
177
+
178
+
179
+ def main():
180
+ """Recover and import ALL batch results."""
181
+
182
+ print(f"\n{'='*80}")
183
+ print(f"COMPLETE BATCH RECOVERY & QDRANT IMPORT")
184
+ print(f"{'='*80}\n")
185
+
186
+ # Initialize clients
187
+ print("šŸ”§ Initializing clients...")
188
+ anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
189
+ qdrant_client = QdrantClient(url=os.getenv("QDRANT_URL", "http://localhost:6333"))
190
+ embedding_model = TextEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
191
+ print(" āœ… Clients initialized")
192
+
193
+ # Collection name
194
+ collection_name = "v3_all_projects"
195
+
196
+ # Retrieve ALL batch results
197
+ print(f"\nšŸ”„ Retrieving {len(ALL_BATCHES)} batches...")
198
+ all_narratives = {}
199
+ grand_total_cost = 0.0
200
+
201
+ for batch_id in ALL_BATCHES:
202
+ narratives, cost = retrieve_batch_narratives(anthropic_client, batch_id)
203
+
204
+ # Add narratives (with dedupe)
205
+ for conv_id, narrative in narratives.items():
206
+ if conv_id not in all_narratives:
207
+ all_narratives[conv_id] = narrative
208
+ else:
209
+ print(f" āš ļø Duplicate {conv_id[:8]}... (skipping)")
210
+
211
+ grand_total_cost += cost
212
+
213
+ print(f"\nšŸ“Š Total unique narratives retrieved: {len(all_narratives)}")
214
+ print(f"šŸ’° Total cost: ${grand_total_cost:.4f}")
215
+
216
+ # Load conversation data and create points
217
+ print(f"\nšŸ”„ Loading ALL conversation data...")
218
+
219
+ projects_dir = Path.home() / ".claude/projects"
220
+ conversations = load_conversation_data(projects_dir)
221
+
222
+ print(f"\nāœ… Loaded {len(conversations)} conversations from disk")
223
+
224
+ # Match narratives to conversations and create points
225
+ print(f"\nšŸ”„ Creating points...")
226
+ all_points = []
227
+
228
+ for conv_id, conv_data in conversations.items():
229
+ if conv_id not in all_narratives:
230
+ print(f" āš ļø No narrative for {conv_id[:8]}... ({conv_data['project']})")
231
+ continue
232
+
233
+ narrative = all_narratives[conv_id]
234
+ result = conv_data['result']
235
+ project = conv_data['project']
236
+
237
+ # Generate embedding
238
+ embedding = get_embedding(narrative, embedding_model)
239
+
240
+ # Create point
241
+ point = PointStruct(
242
+ id=conv_id,
243
+ vector=embedding,
244
+ payload={
245
+ "conversation_id": conv_id,
246
+ "project": project,
247
+ "narrative": narrative,
248
+ "search_index": result['search_index'],
249
+ "context_cache": result['context_cache'],
250
+ "signature": result['signature'],
251
+ "timestamp": time.time(),
252
+ "extraction_stats": result['stats']
253
+ }
254
+ )
255
+
256
+ all_points.append(point)
257
+ print(f" āœ… {conv_id[:8]}... ({project})")
258
+
259
+ # Import to Qdrant (upsert to avoid duplicates)
260
+ print(f"\nšŸ”„ Importing {len(all_points)} points to Qdrant...")
261
+
262
+ batch_size = 100
263
+ for i in range(0, len(all_points), batch_size):
264
+ batch = all_points[i:i+batch_size]
265
+ qdrant_client.upsert(
266
+ collection_name=collection_name,
267
+ points=batch
268
+ )
269
+ print(f" āœ… Imported batch {i//batch_size + 1}: {len(batch)} points")
270
+
271
+ # Verify
272
+ collection_info = qdrant_client.get_collection(collection_name)
273
+ print(f"\nāœ… COMPLETE RECOVERY DONE!")
274
+ print(f" Collection: {collection_name}")
275
+ print(f" Total points: {collection_info.points_count}")
276
+ print(f" Total cost: ${grand_total_cost:.4f}")
277
+
278
+ # Show projects breakdown
279
+ from collections import defaultdict
280
+ results = qdrant_client.scroll(
281
+ collection_name=collection_name,
282
+ limit=100,
283
+ with_payload=['project'],
284
+ with_vectors=False
285
+ )
286
+
287
+ projects = defaultdict(int)
288
+ for point in results[0]:
289
+ projects[point.payload.get('project', 'unknown')] += 1
290
+
291
+ print(f"\nšŸ“Š Final breakdown by project:")
292
+ for project, count in sorted(projects.items()):
293
+ print(f" • {project}: {count} conversations")
294
+
295
+
296
+ if __name__ == "__main__":
297
+ main()