claude-self-reflect 6.0.5 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +34 -0
- package/Dockerfile.batch-monitor +36 -0
- package/Dockerfile.batch-watcher +38 -0
- package/README.md +130 -29
- package/docker-compose.yaml +105 -15
- package/installer/setup-wizard-docker.js +108 -2
- package/package.json +1 -1
- package/src/runtime/batch_monitor.py +300 -0
- package/src/runtime/batch_watcher.py +455 -0
- package/src/runtime/config.py +61 -0
- package/src/runtime/qdrant_connection.py +73 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Batch Monitor Service - Monitors Anthropic Batch API jobs and triggers downstream processes.
|
|
4
|
+
|
|
5
|
+
This service:
|
|
6
|
+
1. Monitors active batch jobs (narratives and evaluations)
|
|
7
|
+
2. Retrieves completed results
|
|
8
|
+
3. Triggers ground truth generation after narratives complete
|
|
9
|
+
4. Manages batch lifecycle
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import sys
|
|
14
|
+
import time
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import fcntl
|
|
18
|
+
import tempfile
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Dict, List, Optional
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
from dotenv import load_dotenv
|
|
23
|
+
|
|
24
|
+
# Add project root to path
|
|
25
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
26
|
+
|
|
27
|
+
load_dotenv()
|
|
28
|
+
|
|
29
|
+
import anthropic
|
|
30
|
+
from qdrant_client import QdrantClient
|
|
31
|
+
|
|
32
|
+
# Import centralized config and utilities
|
|
33
|
+
sys.path.insert(0, str(Path(__file__).parent))
|
|
34
|
+
from config import (
|
|
35
|
+
CSR_BATCH_STATE_DIR,
|
|
36
|
+
QDRANT_URL,
|
|
37
|
+
QDRANT_API_KEY
|
|
38
|
+
)
|
|
39
|
+
from qdrant_connection import connect_to_qdrant_with_retry
|
|
40
|
+
|
|
41
|
+
logging.basicConfig(
|
|
42
|
+
level=logging.INFO,
|
|
43
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
44
|
+
)
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class BatchMonitor:
|
|
49
|
+
"""Monitor and manage Anthropic Batch API jobs."""
|
|
50
|
+
|
|
51
|
+
def __init__(self):
|
|
52
|
+
"""Initialize batch monitor."""
|
|
53
|
+
# Validate API key is configured
|
|
54
|
+
api_key = os.getenv("ANTHROPIC_API_KEY")
|
|
55
|
+
if not api_key:
|
|
56
|
+
raise ValueError(
|
|
57
|
+
"ANTHROPIC_API_KEY environment variable required for batch automation. "
|
|
58
|
+
"Get your key at: https://console.anthropic.com/settings/keys"
|
|
59
|
+
)
|
|
60
|
+
self.client = anthropic.Anthropic(api_key=api_key)
|
|
61
|
+
|
|
62
|
+
# Initialize Qdrant with retry logic
|
|
63
|
+
self.qdrant = connect_to_qdrant_with_retry(
|
|
64
|
+
url=QDRANT_URL,
|
|
65
|
+
api_key=QDRANT_API_KEY if QDRANT_API_KEY else None
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Use centralized config for state directory
|
|
69
|
+
self.state_dir = CSR_BATCH_STATE_DIR
|
|
70
|
+
|
|
71
|
+
self.narrative_state = self.state_dir / "narrative_batches.json"
|
|
72
|
+
self.eval_state = self.state_dir / "eval_batches.json"
|
|
73
|
+
|
|
74
|
+
def load_batch_state(self, state_file: Path) -> Dict:
|
|
75
|
+
"""Load batch state from file with shared lock."""
|
|
76
|
+
if not state_file.exists():
|
|
77
|
+
return {"active": [], "completed": [], "failed": []}
|
|
78
|
+
|
|
79
|
+
with open(state_file, 'r', encoding='utf-8') as f:
|
|
80
|
+
# Acquire shared lock for reading
|
|
81
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_SH)
|
|
82
|
+
try:
|
|
83
|
+
return json.load(f)
|
|
84
|
+
finally:
|
|
85
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
|
|
86
|
+
|
|
87
|
+
def save_batch_state(self, state_file: Path, state: Dict):
|
|
88
|
+
"""Save batch state to file atomically with exclusive lock."""
|
|
89
|
+
state_file.parent.mkdir(parents=True, exist_ok=True)
|
|
90
|
+
|
|
91
|
+
# Write to temp file with exclusive lock, then atomically replace
|
|
92
|
+
with tempfile.NamedTemporaryFile('w', delete=False, dir=state_file.parent, encoding='utf-8') as tmp:
|
|
93
|
+
fcntl.flock(tmp.fileno(), fcntl.LOCK_EX)
|
|
94
|
+
try:
|
|
95
|
+
json.dump(state, tmp, indent=2)
|
|
96
|
+
tmp.flush()
|
|
97
|
+
os.fsync(tmp.fileno())
|
|
98
|
+
finally:
|
|
99
|
+
fcntl.flock(tmp.fileno(), fcntl.LOCK_UN)
|
|
100
|
+
temp_name = tmp.name
|
|
101
|
+
|
|
102
|
+
# Atomic replace
|
|
103
|
+
os.replace(temp_name, state_file)
|
|
104
|
+
|
|
105
|
+
def register_narrative_batch(self, batch_id: str, metadata: Dict):
|
|
106
|
+
"""Register a new narrative generation batch."""
|
|
107
|
+
state = self.load_batch_state(self.narrative_state)
|
|
108
|
+
|
|
109
|
+
state["active"].append({
|
|
110
|
+
"batch_id": batch_id,
|
|
111
|
+
"type": "narrative",
|
|
112
|
+
"submitted_at": datetime.utcnow().isoformat(),
|
|
113
|
+
"metadata": metadata
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
self.save_batch_state(self.narrative_state, state)
|
|
117
|
+
logger.info(f"Registered narrative batch: {batch_id}")
|
|
118
|
+
|
|
119
|
+
def register_eval_batch(self, batch_id: str, metadata: Dict):
|
|
120
|
+
"""Register a new evaluation batch."""
|
|
121
|
+
state = self.load_batch_state(self.eval_state)
|
|
122
|
+
|
|
123
|
+
state["active"].append({
|
|
124
|
+
"batch_id": batch_id,
|
|
125
|
+
"type": "evaluation",
|
|
126
|
+
"submitted_at": datetime.utcnow().isoformat(),
|
|
127
|
+
"metadata": metadata
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
self.save_batch_state(self.eval_state, state)
|
|
131
|
+
logger.info(f"Registered evaluation batch: {batch_id}")
|
|
132
|
+
|
|
133
|
+
def check_active_batches(self):
|
|
134
|
+
"""Check status of all active batches."""
|
|
135
|
+
# Check narrative batches
|
|
136
|
+
narrative_state = self.load_batch_state(self.narrative_state)
|
|
137
|
+
self._process_batches(narrative_state, self.narrative_state, "narrative")
|
|
138
|
+
|
|
139
|
+
# Check evaluation batches
|
|
140
|
+
eval_state = self.load_batch_state(self.eval_state)
|
|
141
|
+
self._process_batches(eval_state, self.eval_state, "evaluation")
|
|
142
|
+
|
|
143
|
+
def _process_batches(self, state: Dict, state_file: Path, batch_type: str):
|
|
144
|
+
"""Process batches of a given type."""
|
|
145
|
+
still_active = []
|
|
146
|
+
|
|
147
|
+
for batch_info in state["active"]:
|
|
148
|
+
batch_id = batch_info["batch_id"]
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
# Check batch status
|
|
152
|
+
batch = self.client.messages.batches.retrieve(batch_id)
|
|
153
|
+
|
|
154
|
+
if batch.processing_status == "ended":
|
|
155
|
+
# Batch completed
|
|
156
|
+
if batch.request_counts.succeeded > 0:
|
|
157
|
+
logger.info(f"✅ {batch_type.title()} batch completed: {batch_id} ({batch.request_counts.succeeded} succeeded)")
|
|
158
|
+
|
|
159
|
+
# Process completed batch
|
|
160
|
+
if batch_type == "narrative":
|
|
161
|
+
self._handle_completed_narrative_batch(batch_id, batch_info)
|
|
162
|
+
else:
|
|
163
|
+
self._handle_completed_eval_batch(batch_id, batch_info)
|
|
164
|
+
|
|
165
|
+
# Move to completed
|
|
166
|
+
batch_info["completed_at"] = datetime.utcnow().isoformat()
|
|
167
|
+
batch_info["succeeded"] = batch.request_counts.succeeded
|
|
168
|
+
batch_info["failed"] = batch.request_counts.errored
|
|
169
|
+
state["completed"].append(batch_info)
|
|
170
|
+
else:
|
|
171
|
+
# All failed
|
|
172
|
+
logger.error(f"❌ {batch_type.title()} batch failed: {batch_id}")
|
|
173
|
+
batch_info["failed_at"] = datetime.utcnow().isoformat()
|
|
174
|
+
state["failed"].append(batch_info)
|
|
175
|
+
else:
|
|
176
|
+
# Still processing
|
|
177
|
+
logger.info(f"⏳ {batch_type.title()} batch in progress: {batch_id} ({batch.request_counts.processing} processing)")
|
|
178
|
+
still_active.append(batch_info)
|
|
179
|
+
|
|
180
|
+
except Exception as e:
|
|
181
|
+
logger.error(f"Error checking batch {batch_id}: {e}")
|
|
182
|
+
still_active.append(batch_info) # Keep in active list for retry
|
|
183
|
+
|
|
184
|
+
# Update state
|
|
185
|
+
state["active"] = still_active
|
|
186
|
+
self.save_batch_state(state_file, state)
|
|
187
|
+
|
|
188
|
+
def _handle_completed_narrative_batch(self, batch_id: str, batch_info: Dict):
|
|
189
|
+
"""Handle completed narrative batch - trigger evaluation generation."""
|
|
190
|
+
logger.info(f"📝 Processing completed narrative batch: {batch_id}")
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
# Retrieve results (already handled by batch_import_all_projects.py)
|
|
194
|
+
# Just trigger evaluation generation for these new narratives
|
|
195
|
+
|
|
196
|
+
conversation_ids = batch_info.get("metadata", {}).get("conversation_ids", [])
|
|
197
|
+
|
|
198
|
+
if conversation_ids:
|
|
199
|
+
logger.info(f"🎯 Triggering evaluation generation for {len(conversation_ids)} conversations")
|
|
200
|
+
|
|
201
|
+
# Import and run evaluation generator
|
|
202
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "docs" / "design"))
|
|
203
|
+
from batch_ground_truth_generator import BatchGroundTruthGenerator
|
|
204
|
+
|
|
205
|
+
generator = BatchGroundTruthGenerator()
|
|
206
|
+
|
|
207
|
+
# Fetch these specific narratives
|
|
208
|
+
import requests
|
|
209
|
+
url = f"{generator.qdrant_url}/collections/{generator.collection_name}/points/scroll"
|
|
210
|
+
payload = {
|
|
211
|
+
'limit': len(conversation_ids),
|
|
212
|
+
'with_payload': True,
|
|
213
|
+
'with_vector': False,
|
|
214
|
+
'filter': {
|
|
215
|
+
'must': [
|
|
216
|
+
{
|
|
217
|
+
'key': 'conversation_id',
|
|
218
|
+
'match': {
|
|
219
|
+
'any': conversation_ids
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
]
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
response = requests.post(url, json=payload, timeout=10)
|
|
227
|
+
response.raise_for_status()
|
|
228
|
+
data = response.json()
|
|
229
|
+
points = data.get('result', {}).get('points', [])
|
|
230
|
+
|
|
231
|
+
if points:
|
|
232
|
+
# Create and submit evaluation batch
|
|
233
|
+
requests_file = generator.create_batch_requests(
|
|
234
|
+
points,
|
|
235
|
+
output_file=f'auto_eval_{batch_id[:8]}.jsonl'
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
eval_batch_id = generator.submit_batch(requests_file)
|
|
239
|
+
|
|
240
|
+
# Register the evaluation batch
|
|
241
|
+
self.register_eval_batch(eval_batch_id, {
|
|
242
|
+
"triggered_by": batch_id,
|
|
243
|
+
"conversation_ids": conversation_ids,
|
|
244
|
+
"auto_generated": True
|
|
245
|
+
})
|
|
246
|
+
|
|
247
|
+
logger.info(f"✅ Evaluation batch submitted: {eval_batch_id}")
|
|
248
|
+
|
|
249
|
+
except Exception as e:
|
|
250
|
+
logger.error(f"Error handling narrative batch completion: {e}", exc_info=True)
|
|
251
|
+
|
|
252
|
+
def _handle_completed_eval_batch(self, batch_id: str, batch_info: Dict):
|
|
253
|
+
"""Handle completed evaluation batch - results already in Qdrant."""
|
|
254
|
+
logger.info(f"📊 Evaluation batch completed: {batch_id}")
|
|
255
|
+
|
|
256
|
+
# Results are already pushed to Qdrant by batch_ground_truth_generator.py
|
|
257
|
+
# Just log completion
|
|
258
|
+
logger.info(f"✅ {batch_info.get('metadata', {}).get('conversation_count', 'N/A')} evaluations stored in Qdrant")
|
|
259
|
+
|
|
260
|
+
def run_once(self):
|
|
261
|
+
"""Run one monitoring cycle."""
|
|
262
|
+
logger.info("🔍 Checking active batches...")
|
|
263
|
+
self.check_active_batches()
|
|
264
|
+
|
|
265
|
+
def run_forever(self, interval: int = 60):
|
|
266
|
+
"""Run monitoring loop forever."""
|
|
267
|
+
logger.info(f"🚀 Batch monitor started (checking every {interval}s)")
|
|
268
|
+
|
|
269
|
+
while True:
|
|
270
|
+
try:
|
|
271
|
+
self.run_once()
|
|
272
|
+
time.sleep(interval)
|
|
273
|
+
except KeyboardInterrupt:
|
|
274
|
+
logger.info("👋 Batch monitor stopped")
|
|
275
|
+
break
|
|
276
|
+
except Exception as e:
|
|
277
|
+
logger.error(f"Error in monitoring loop: {e}", exc_info=True)
|
|
278
|
+
time.sleep(interval)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def main():
|
|
282
|
+
"""Main entry point."""
|
|
283
|
+
import argparse
|
|
284
|
+
|
|
285
|
+
parser = argparse.ArgumentParser(description="Batch Monitor Service")
|
|
286
|
+
parser.add_argument("--once", action="store_true", help="Run once and exit")
|
|
287
|
+
parser.add_argument("--interval", type=int, default=60, help="Check interval in seconds")
|
|
288
|
+
|
|
289
|
+
args = parser.parse_args()
|
|
290
|
+
|
|
291
|
+
monitor = BatchMonitor()
|
|
292
|
+
|
|
293
|
+
if args.once:
|
|
294
|
+
monitor.run_once()
|
|
295
|
+
else:
|
|
296
|
+
monitor.run_forever(interval=args.interval)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
if __name__ == "__main__":
|
|
300
|
+
main()
|