claude-self-reflect 7.1.9 ā 7.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -78
- package/docs/design/GRADER_PROMPT.md +81 -0
- package/docs/design/batch_ground_truth_generator.py +496 -0
- package/docs/design/batch_import_all_projects.py +477 -0
- package/docs/design/batch_import_v3.py +278 -0
- package/docs/design/conversation-analyzer/SKILL.md +133 -0
- package/docs/design/conversation-analyzer/SKILL_V2.md +218 -0
- package/docs/design/conversation-analyzer/extract_structured.py +186 -0
- package/docs/design/extract_events_v3.py +533 -0
- package/docs/design/import_existing_batch.py +188 -0
- package/docs/design/recover_all_batches.py +297 -0
- package/docs/design/recover_batch_results.py +287 -0
- package/package.json +5 -1
- package/scripts/ralph/backup_and_restore.sh +309 -0
- package/scripts/ralph/install_hooks.sh +244 -0
- package/scripts/ralph/test_with_rollback.sh +195 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Import existing batch results to Qdrant.
|
|
4
|
+
Batch ID: msgbatch_01QGo1y5maCUgqR7WWE1z2aT (27 conversations)
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import json
|
|
10
|
+
import re
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
|
|
15
|
+
load_dotenv()
|
|
16
|
+
|
|
17
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
18
|
+
|
|
19
|
+
import anthropic
|
|
20
|
+
from qdrant_client import QdrantClient
|
|
21
|
+
from qdrant_client.models import PointStruct
|
|
22
|
+
|
|
23
|
+
# Import FastEmbed
|
|
24
|
+
from fastembed import TextEmbedding
|
|
25
|
+
|
|
26
|
+
def get_embedding(text: str, embedding_model) -> list:
|
|
27
|
+
"""Generate embedding for text."""
|
|
28
|
+
embeddings = list(embedding_model.embed([text]))
|
|
29
|
+
return embeddings[0].tolist()
|
|
30
|
+
|
|
31
|
+
def fix_json_response(content: str) -> str:
|
|
32
|
+
"""Fix Claude's backtick-based JSON responses."""
|
|
33
|
+
# Try to extract JSON from markdown code fence if present
|
|
34
|
+
if '```json' in content:
|
|
35
|
+
json_start = content.find('```json') + 7
|
|
36
|
+
json_end = content.find('```', json_start)
|
|
37
|
+
content = content[json_start:json_end].strip()
|
|
38
|
+
elif '```' in content:
|
|
39
|
+
json_start = content.find('```') + 3
|
|
40
|
+
json_end = content.find('```', json_start)
|
|
41
|
+
content = content[json_start:json_end].strip()
|
|
42
|
+
|
|
43
|
+
# Fix invalid JSON: replace backticks with escaped quotes for field values
|
|
44
|
+
# Pattern: "field": `value` -> "field": "value with escaped newlines"
|
|
45
|
+
content = re.sub(
|
|
46
|
+
r':\s*`([^`]*)`',
|
|
47
|
+
lambda m: f': "{m.group(1).replace(chr(10), "\\n").replace(chr(13), "").replace('"', '\\"')}"',
|
|
48
|
+
content,
|
|
49
|
+
flags=re.DOTALL
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return content
|
|
53
|
+
|
|
54
|
+
def main():
|
|
55
|
+
print("=" * 70)
|
|
56
|
+
print("IMPORT EXISTING BATCH RESULTS")
|
|
57
|
+
print("=" * 70)
|
|
58
|
+
print(f"Batch ID: msgbatch_01QGo1y5maCUgqR7WWE1z2aT")
|
|
59
|
+
print(f"Target: v3_all_projects collection")
|
|
60
|
+
print()
|
|
61
|
+
|
|
62
|
+
# Initialize clients
|
|
63
|
+
anthropic_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
|
|
64
|
+
qdrant_client = QdrantClient(url='http://localhost:6333')
|
|
65
|
+
embedding_model = TextEmbedding(model_name='sentence-transformers/all-MiniLM-L6-v2')
|
|
66
|
+
|
|
67
|
+
# Get TIER 1 conversation mapping
|
|
68
|
+
print("š Loading TIER 1 conversation mapping...")
|
|
69
|
+
collections = qdrant_client.get_collections().collections
|
|
70
|
+
conv_cols = [c for c in collections if c.name.startswith('conv_') and c.name.endswith('_local')]
|
|
71
|
+
|
|
72
|
+
conversations = []
|
|
73
|
+
for col in conv_cols:
|
|
74
|
+
base_id = col.name[5:-6]
|
|
75
|
+
results = qdrant_client.scroll(
|
|
76
|
+
collection_name=col.name,
|
|
77
|
+
limit=1,
|
|
78
|
+
with_payload=True
|
|
79
|
+
)
|
|
80
|
+
if results[0]:
|
|
81
|
+
first_payload = results[0][0].payload
|
|
82
|
+
conversation_id = first_payload.get('conversation_id', base_id)
|
|
83
|
+
project = first_payload.get('project_name', 'unknown')
|
|
84
|
+
conversations.append({
|
|
85
|
+
'conversation_id': conversation_id,
|
|
86
|
+
'project': project,
|
|
87
|
+
'collection_name': col.name
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
print(f"ā
Loaded {len(conversations)} conversation mappings")
|
|
91
|
+
|
|
92
|
+
# Retrieve batch results
|
|
93
|
+
print("\nš„ Retrieving batch results...")
|
|
94
|
+
results = []
|
|
95
|
+
for result in anthropic_client.beta.messages.batches.results('msgbatch_01QGo1y5maCUgqR7WWE1z2aT'):
|
|
96
|
+
if result.result.type == 'succeeded':
|
|
97
|
+
results.append(result)
|
|
98
|
+
|
|
99
|
+
print(f"ā
Retrieved {len(results)} successful results")
|
|
100
|
+
|
|
101
|
+
# Process results
|
|
102
|
+
print("\nš¦ Processing narratives...")
|
|
103
|
+
points_to_add = []
|
|
104
|
+
processed_count = 0
|
|
105
|
+
failed_ids = []
|
|
106
|
+
|
|
107
|
+
for result in results:
|
|
108
|
+
custom_id = result.custom_id
|
|
109
|
+
try:
|
|
110
|
+
response_content = result.result.message.content[0].text
|
|
111
|
+
response_content = fix_json_response(response_content)
|
|
112
|
+
narrative_data = json.loads(response_content)
|
|
113
|
+
|
|
114
|
+
# Get original conversation data
|
|
115
|
+
conv_idx = int(custom_id.split('_')[1]) - 1
|
|
116
|
+
conv = conversations[conv_idx]
|
|
117
|
+
|
|
118
|
+
# Create point
|
|
119
|
+
search_text = narrative_data.get('search_index', narrative_data['narrative'][:1000])
|
|
120
|
+
embedding = get_embedding(search_text, embedding_model)
|
|
121
|
+
|
|
122
|
+
payload = {
|
|
123
|
+
'conversation_id': conv['conversation_id'],
|
|
124
|
+
'project': conv['project'],
|
|
125
|
+
'narrative': narrative_data['narrative'],
|
|
126
|
+
'search_index': narrative_data.get('search_index', ''),
|
|
127
|
+
'timestamp': datetime.now().timestamp(),
|
|
128
|
+
'source': 'tier1_migration',
|
|
129
|
+
'original_collection': conv['collection_name']
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if 'metadata' in narrative_data:
|
|
133
|
+
metadata = narrative_data['metadata']
|
|
134
|
+
payload['signature'] = {
|
|
135
|
+
'tools_used': metadata.get('tools_used', []),
|
|
136
|
+
'concepts': metadata.get('concepts', []),
|
|
137
|
+
'files_modified': metadata.get('files_modified', []),
|
|
138
|
+
'completion_status': 'migrated'
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
point = PointStruct(
|
|
142
|
+
id=conv['conversation_id'], # Use UUID directly
|
|
143
|
+
vector=embedding,
|
|
144
|
+
payload=payload
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
points_to_add.append(point)
|
|
148
|
+
processed_count += 1
|
|
149
|
+
|
|
150
|
+
if processed_count % 10 == 0:
|
|
151
|
+
print(f" Processed {processed_count}/{len(results)} narratives...")
|
|
152
|
+
|
|
153
|
+
except Exception as e:
|
|
154
|
+
failed_ids.append((custom_id, str(e)))
|
|
155
|
+
print(f" ā ļø Error processing {custom_id}: {e}")
|
|
156
|
+
|
|
157
|
+
# Add to Qdrant
|
|
158
|
+
if points_to_add:
|
|
159
|
+
print(f"\nš¤ Adding {len(points_to_add)} points to v3_all_projects...")
|
|
160
|
+
qdrant_client.upsert(
|
|
161
|
+
collection_name='v3_all_projects',
|
|
162
|
+
points=points_to_add
|
|
163
|
+
)
|
|
164
|
+
print(f"ā
Added {len(points_to_add)} narratives to Qdrant!")
|
|
165
|
+
|
|
166
|
+
# Summary
|
|
167
|
+
print("\n" + "=" * 70)
|
|
168
|
+
print("IMPORT COMPLETE!")
|
|
169
|
+
print("=" * 70)
|
|
170
|
+
print(f"ā
Successfully processed: {processed_count}/{len(results)}")
|
|
171
|
+
print(f"ā Failed: {len(failed_ids)}/{len(results)}")
|
|
172
|
+
|
|
173
|
+
if failed_ids:
|
|
174
|
+
print("\nFailed IDs:")
|
|
175
|
+
for custom_id, error in failed_ids:
|
|
176
|
+
print(f" - {custom_id}: {error[:50]}...")
|
|
177
|
+
|
|
178
|
+
# Check final collection size
|
|
179
|
+
collection_info = qdrant_client.get_collection('v3_all_projects')
|
|
180
|
+
print(f"\nš v3_all_projects now has {collection_info.points_count} narratives")
|
|
181
|
+
print(f" (was 54, added {processed_count}, now {collection_info.points_count})")
|
|
182
|
+
print()
|
|
183
|
+
print("šÆ Test with MCP tools (no restart needed):")
|
|
184
|
+
print(" csr_reflect_on_past('OpenGraph procsolve website')")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
if __name__ == '__main__':
|
|
188
|
+
main()
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Recover ALL batch results from dashboard and complete Qdrant import.
|
|
4
|
+
|
|
5
|
+
Retrieves narratives from all 8 completed batches shown in dashboard.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
import time
|
|
14
|
+
|
|
15
|
+
load_dotenv()
|
|
16
|
+
|
|
17
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
18
|
+
|
|
19
|
+
import anthropic
|
|
20
|
+
from qdrant_client import QdrantClient
|
|
21
|
+
from qdrant_client.models import PointStruct
|
|
22
|
+
|
|
23
|
+
# Import FastEmbed
|
|
24
|
+
try:
|
|
25
|
+
from fastembed import TextEmbedding
|
|
26
|
+
FASTEMBED_AVAILABLE = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
FASTEMBED_AVAILABLE = False
|
|
29
|
+
print("ā ļø FastEmbed not available")
|
|
30
|
+
sys.exit(1)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ALL Batch IDs from dashboard (complete list)
|
|
34
|
+
ALL_BATCHES = [
|
|
35
|
+
'msgbatch_012GH6kVL74ihT3NFFHbrYHZ', # 1 request (mystery - thegatehouse?)
|
|
36
|
+
'msgbatch_01DMoYp2egP7Wz2Xa8Lv7cNc', # 1 request (address-book-fix run 2)
|
|
37
|
+
'msgbatch_01Prq1G5CbfjjDdyGezKUGzH', # 5 requests (anukruti run 2)
|
|
38
|
+
'msgbatch_01ATPhpjCw1gqPisHUgoPnab', # 1 request (address-book-fix)
|
|
39
|
+
'msgbatch_016g8zHtH7or7DtJu3ZzAczS', # 5 requests (anukruti)
|
|
40
|
+
'msgbatch_01QCwhFw9DYDJ8uPjYsHg8Xu', # 5 requests (buyindian)
|
|
41
|
+
'msgbatch_01WVbb5X2xYwuzzgEdqVicZJ', # 2 requests (procsolve-website or cc-enhance)
|
|
42
|
+
'msgbatch_01EemyvChmnShYAuJix7m1As', # 36 requests (claude-self-reflect)
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_embedding(text: str, embedding_model) -> list:
|
|
47
|
+
"""Generate embedding for text."""
|
|
48
|
+
embeddings = list(embedding_model.embed([text]))
|
|
49
|
+
return embeddings[0].tolist()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def retrieve_batch_narratives(client: anthropic.Anthropic, batch_id: str):
|
|
53
|
+
"""Retrieve narratives from a completed batch."""
|
|
54
|
+
|
|
55
|
+
print(f"\nš Retrieving batch {batch_id}...")
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
# Get batch results
|
|
59
|
+
results_response = client.messages.batches.results(batch_id)
|
|
60
|
+
|
|
61
|
+
narratives = {}
|
|
62
|
+
total_cost = 0.0
|
|
63
|
+
total_input = 0
|
|
64
|
+
total_output = 0
|
|
65
|
+
|
|
66
|
+
for result_item in results_response:
|
|
67
|
+
conv_id = result_item.custom_id
|
|
68
|
+
|
|
69
|
+
if result_item.result.type == "succeeded":
|
|
70
|
+
message = result_item.result.message
|
|
71
|
+
|
|
72
|
+
# Extract narrative
|
|
73
|
+
narrative = ""
|
|
74
|
+
for block in message.content:
|
|
75
|
+
if hasattr(block, 'text'):
|
|
76
|
+
narrative += block.text
|
|
77
|
+
|
|
78
|
+
narratives[conv_id] = narrative
|
|
79
|
+
|
|
80
|
+
# Track usage
|
|
81
|
+
input_tokens = message.usage.input_tokens
|
|
82
|
+
output_tokens = message.usage.output_tokens
|
|
83
|
+
cost = (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
|
84
|
+
|
|
85
|
+
total_input += input_tokens
|
|
86
|
+
total_output += output_tokens
|
|
87
|
+
total_cost += cost
|
|
88
|
+
else:
|
|
89
|
+
print(f" ā Error for {conv_id}: {result_item.result.error}")
|
|
90
|
+
|
|
91
|
+
print(f" ā
Retrieved {len(narratives)} narratives")
|
|
92
|
+
print(f" š Tokens: {total_input} input, {total_output} output")
|
|
93
|
+
print(f" š° Cost: ${total_cost:.4f}")
|
|
94
|
+
|
|
95
|
+
return narratives, total_cost
|
|
96
|
+
|
|
97
|
+
except Exception as e:
|
|
98
|
+
print(f" ā Failed to retrieve batch: {e}")
|
|
99
|
+
return {}, 0.0
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def load_conversation_data(projects_dir: Path):
|
|
103
|
+
"""Load V3 extraction results and metadata for ALL projects."""
|
|
104
|
+
|
|
105
|
+
conversations = {}
|
|
106
|
+
|
|
107
|
+
# Import metadata extraction functions
|
|
108
|
+
import importlib.util
|
|
109
|
+
delta_metadata_path = Path(__file__).parent.parent.parent / "src" / "runtime" / "delta-metadata-update.py"
|
|
110
|
+
spec = importlib.util.spec_from_file_location("delta_metadata_update", delta_metadata_path)
|
|
111
|
+
delta_metadata_update = importlib.util.module_from_spec(spec)
|
|
112
|
+
spec.loader.exec_module(delta_metadata_update)
|
|
113
|
+
extract_tool_usage_from_jsonl = delta_metadata_update.extract_tool_usage_from_jsonl
|
|
114
|
+
extract_concepts = delta_metadata_update.extract_concepts
|
|
115
|
+
|
|
116
|
+
from docs.design.extract_events_v3 import extract_events_v3
|
|
117
|
+
|
|
118
|
+
# Scan ALL project directories
|
|
119
|
+
for project_dir in projects_dir.iterdir():
|
|
120
|
+
if not project_dir.is_dir() or project_dir.name.startswith('.'):
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
jsonl_files = list(project_dir.glob("*.jsonl"))
|
|
124
|
+
if not jsonl_files:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
# Extract project name
|
|
128
|
+
parts = project_dir.name.split('-projects-')
|
|
129
|
+
project_name = parts[-1] if len(parts) > 1 else project_dir.name
|
|
130
|
+
|
|
131
|
+
print(f"\nš Loading {project_name}...")
|
|
132
|
+
|
|
133
|
+
for jsonl_file in jsonl_files:
|
|
134
|
+
conv_id = jsonl_file.stem
|
|
135
|
+
|
|
136
|
+
# Extract metadata FIRST
|
|
137
|
+
tool_usage = extract_tool_usage_from_jsonl(str(jsonl_file))
|
|
138
|
+
|
|
139
|
+
# Read messages for V3 extraction
|
|
140
|
+
messages = []
|
|
141
|
+
conversation_text = ""
|
|
142
|
+
with open(jsonl_file) as f:
|
|
143
|
+
for line in f:
|
|
144
|
+
if line.strip():
|
|
145
|
+
msg = json.loads(line)
|
|
146
|
+
messages.append(msg)
|
|
147
|
+
|
|
148
|
+
if 'message' in msg and msg['message']:
|
|
149
|
+
content = msg['message'].get('content', '')
|
|
150
|
+
if isinstance(content, str):
|
|
151
|
+
conversation_text += content + "\n"
|
|
152
|
+
elif isinstance(content, list):
|
|
153
|
+
for item in content:
|
|
154
|
+
if isinstance(item, dict) and item.get('text'):
|
|
155
|
+
conversation_text += item['text'] + "\n"
|
|
156
|
+
|
|
157
|
+
# Extract concepts
|
|
158
|
+
concepts = extract_concepts(conversation_text[:10000], tool_usage)
|
|
159
|
+
|
|
160
|
+
# Build metadata dict
|
|
161
|
+
metadata = {
|
|
162
|
+
'tool_usage': tool_usage,
|
|
163
|
+
'concepts': concepts
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
# V3 extraction WITH metadata
|
|
167
|
+
result = extract_events_v3(messages, metadata=metadata)
|
|
168
|
+
|
|
169
|
+
conversations[conv_id] = {
|
|
170
|
+
'result': result,
|
|
171
|
+
'project': project_name
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
print(f" ā
{conv_id[:8]}... ({project_name})")
|
|
175
|
+
|
|
176
|
+
return conversations
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def main():
|
|
180
|
+
"""Recover and import ALL batch results."""
|
|
181
|
+
|
|
182
|
+
print(f"\n{'='*80}")
|
|
183
|
+
print(f"COMPLETE BATCH RECOVERY & QDRANT IMPORT")
|
|
184
|
+
print(f"{'='*80}\n")
|
|
185
|
+
|
|
186
|
+
# Initialize clients
|
|
187
|
+
print("š§ Initializing clients...")
|
|
188
|
+
anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
|
|
189
|
+
qdrant_client = QdrantClient(url=os.getenv("QDRANT_URL", "http://localhost:6333"))
|
|
190
|
+
embedding_model = TextEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
191
|
+
print(" ā
Clients initialized")
|
|
192
|
+
|
|
193
|
+
# Collection name
|
|
194
|
+
collection_name = "v3_all_projects"
|
|
195
|
+
|
|
196
|
+
# Retrieve ALL batch results
|
|
197
|
+
print(f"\nš Retrieving {len(ALL_BATCHES)} batches...")
|
|
198
|
+
all_narratives = {}
|
|
199
|
+
grand_total_cost = 0.0
|
|
200
|
+
|
|
201
|
+
for batch_id in ALL_BATCHES:
|
|
202
|
+
narratives, cost = retrieve_batch_narratives(anthropic_client, batch_id)
|
|
203
|
+
|
|
204
|
+
# Add narratives (with dedupe)
|
|
205
|
+
for conv_id, narrative in narratives.items():
|
|
206
|
+
if conv_id not in all_narratives:
|
|
207
|
+
all_narratives[conv_id] = narrative
|
|
208
|
+
else:
|
|
209
|
+
print(f" ā ļø Duplicate {conv_id[:8]}... (skipping)")
|
|
210
|
+
|
|
211
|
+
grand_total_cost += cost
|
|
212
|
+
|
|
213
|
+
print(f"\nš Total unique narratives retrieved: {len(all_narratives)}")
|
|
214
|
+
print(f"š° Total cost: ${grand_total_cost:.4f}")
|
|
215
|
+
|
|
216
|
+
# Load conversation data and create points
|
|
217
|
+
print(f"\nš Loading ALL conversation data...")
|
|
218
|
+
|
|
219
|
+
projects_dir = Path.home() / ".claude/projects"
|
|
220
|
+
conversations = load_conversation_data(projects_dir)
|
|
221
|
+
|
|
222
|
+
print(f"\nā
Loaded {len(conversations)} conversations from disk")
|
|
223
|
+
|
|
224
|
+
# Match narratives to conversations and create points
|
|
225
|
+
print(f"\nš Creating points...")
|
|
226
|
+
all_points = []
|
|
227
|
+
|
|
228
|
+
for conv_id, conv_data in conversations.items():
|
|
229
|
+
if conv_id not in all_narratives:
|
|
230
|
+
print(f" ā ļø No narrative for {conv_id[:8]}... ({conv_data['project']})")
|
|
231
|
+
continue
|
|
232
|
+
|
|
233
|
+
narrative = all_narratives[conv_id]
|
|
234
|
+
result = conv_data['result']
|
|
235
|
+
project = conv_data['project']
|
|
236
|
+
|
|
237
|
+
# Generate embedding
|
|
238
|
+
embedding = get_embedding(narrative, embedding_model)
|
|
239
|
+
|
|
240
|
+
# Create point
|
|
241
|
+
point = PointStruct(
|
|
242
|
+
id=conv_id,
|
|
243
|
+
vector=embedding,
|
|
244
|
+
payload={
|
|
245
|
+
"conversation_id": conv_id,
|
|
246
|
+
"project": project,
|
|
247
|
+
"narrative": narrative,
|
|
248
|
+
"search_index": result['search_index'],
|
|
249
|
+
"context_cache": result['context_cache'],
|
|
250
|
+
"signature": result['signature'],
|
|
251
|
+
"timestamp": time.time(),
|
|
252
|
+
"extraction_stats": result['stats']
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
all_points.append(point)
|
|
257
|
+
print(f" ā
{conv_id[:8]}... ({project})")
|
|
258
|
+
|
|
259
|
+
# Import to Qdrant (upsert to avoid duplicates)
|
|
260
|
+
print(f"\nš Importing {len(all_points)} points to Qdrant...")
|
|
261
|
+
|
|
262
|
+
batch_size = 100
|
|
263
|
+
for i in range(0, len(all_points), batch_size):
|
|
264
|
+
batch = all_points[i:i+batch_size]
|
|
265
|
+
qdrant_client.upsert(
|
|
266
|
+
collection_name=collection_name,
|
|
267
|
+
points=batch
|
|
268
|
+
)
|
|
269
|
+
print(f" ā
Imported batch {i//batch_size + 1}: {len(batch)} points")
|
|
270
|
+
|
|
271
|
+
# Verify
|
|
272
|
+
collection_info = qdrant_client.get_collection(collection_name)
|
|
273
|
+
print(f"\nā
COMPLETE RECOVERY DONE!")
|
|
274
|
+
print(f" Collection: {collection_name}")
|
|
275
|
+
print(f" Total points: {collection_info.points_count}")
|
|
276
|
+
print(f" Total cost: ${grand_total_cost:.4f}")
|
|
277
|
+
|
|
278
|
+
# Show projects breakdown
|
|
279
|
+
from collections import defaultdict
|
|
280
|
+
results = qdrant_client.scroll(
|
|
281
|
+
collection_name=collection_name,
|
|
282
|
+
limit=100,
|
|
283
|
+
with_payload=['project'],
|
|
284
|
+
with_vectors=False
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
projects = defaultdict(int)
|
|
288
|
+
for point in results[0]:
|
|
289
|
+
projects[point.payload.get('project', 'unknown')] += 1
|
|
290
|
+
|
|
291
|
+
print(f"\nš Final breakdown by project:")
|
|
292
|
+
for project, count in sorted(projects.items()):
|
|
293
|
+
print(f" ⢠{project}: {count} conversations")
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
if __name__ == "__main__":
|
|
297
|
+
main()
|