claude-self-reflect 6.0.5 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +34 -0
- package/Dockerfile.batch-monitor +36 -0
- package/Dockerfile.batch-watcher +38 -0
- package/README.md +130 -29
- package/docker-compose.yaml +105 -15
- package/installer/setup-wizard-docker.js +108 -2
- package/package.json +1 -1
- package/src/runtime/batch_monitor.py +300 -0
- package/src/runtime/batch_watcher.py +455 -0
- package/src/runtime/config.py +61 -0
- package/src/runtime/qdrant_connection.py +73 -0
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Batch-Aware Watcher Service - Enhanced version of streaming-watcher.py
|
|
4
|
+
Integrates with Anthropic Batch API for narrative generation.
|
|
5
|
+
|
|
6
|
+
This service:
|
|
7
|
+
1. Watches for new conversation JSONL files
|
|
8
|
+
2. Accumulates conversations in a batch queue
|
|
9
|
+
3. Triggers batch narrative generation every 10 files OR every 30 minutes
|
|
10
|
+
4. Registers batches with batch_monitor for automated evaluation
|
|
11
|
+
5. Maintains hot/warm/cold priority system for responsiveness
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
import time
|
|
17
|
+
import json
|
|
18
|
+
import logging
|
|
19
|
+
import fcntl
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Dict, List, Set
|
|
22
|
+
from datetime import datetime, timedelta
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from collections import deque
|
|
25
|
+
|
|
26
|
+
# Add project root to path
|
|
27
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
28
|
+
|
|
29
|
+
from dotenv import load_dotenv
|
|
30
|
+
load_dotenv()
|
|
31
|
+
|
|
32
|
+
# Import existing infrastructure
|
|
33
|
+
sys.path.insert(0, str(Path(__file__).parent))
|
|
34
|
+
from unified_state_manager import UnifiedStateManager
|
|
35
|
+
from utils import normalize_project_name
|
|
36
|
+
|
|
37
|
+
# Import batch monitor and centralized config
|
|
38
|
+
sys.path.insert(0, str(Path(__file__).parent))
|
|
39
|
+
from batch_monitor import BatchMonitor
|
|
40
|
+
from config import (
|
|
41
|
+
CSR_CONFIG_DIR,
|
|
42
|
+
CSR_BATCH_QUEUE_DIR,
|
|
43
|
+
CLAUDE_PROJECTS_DIR,
|
|
44
|
+
BATCH_SIZE_TRIGGER,
|
|
45
|
+
BATCH_TIME_TRIGGER_MINUTES,
|
|
46
|
+
HOT_WINDOW_MINUTES,
|
|
47
|
+
WARM_WINDOW_HOURS,
|
|
48
|
+
MAX_COLD_FILES,
|
|
49
|
+
HOT_CHECK_INTERVAL_S,
|
|
50
|
+
NORMAL_CHECK_INTERVAL_S,
|
|
51
|
+
SUBPROCESS_TIMEOUT_SECONDS
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Path to batch scripts
|
|
55
|
+
BATCH_IMPORT_SCRIPT = Path(__file__).parent.parent.parent / "docs" / "design" / "batch_import_all_projects.py"
|
|
56
|
+
|
|
57
|
+
logging.basicConfig(
|
|
58
|
+
level=logging.INFO,
|
|
59
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
60
|
+
)
|
|
61
|
+
logger = logging.getLogger(__name__)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class BatchWatcherConfig:
|
|
66
|
+
"""Configuration for batch-aware watcher."""
|
|
67
|
+
|
|
68
|
+
# Directories (from centralized config)
|
|
69
|
+
logs_dir: Path = field(default_factory=lambda: CLAUDE_PROJECTS_DIR)
|
|
70
|
+
queue_dir: Path = field(default_factory=lambda: CSR_BATCH_QUEUE_DIR)
|
|
71
|
+
|
|
72
|
+
# Batch triggers (from centralized config)
|
|
73
|
+
batch_size_trigger: int = field(default_factory=lambda: BATCH_SIZE_TRIGGER)
|
|
74
|
+
batch_time_trigger_minutes: int = field(default_factory=lambda: BATCH_TIME_TRIGGER_MINUTES)
|
|
75
|
+
|
|
76
|
+
# Priority thresholds (from centralized config)
|
|
77
|
+
hot_window_minutes: int = field(default_factory=lambda: HOT_WINDOW_MINUTES)
|
|
78
|
+
warm_window_hours: int = field(default_factory=lambda: WARM_WINDOW_HOURS)
|
|
79
|
+
max_cold_files: int = field(default_factory=lambda: MAX_COLD_FILES)
|
|
80
|
+
|
|
81
|
+
# Timing (from centralized config)
|
|
82
|
+
hot_check_interval_s: int = field(default_factory=lambda: HOT_CHECK_INTERVAL_S)
|
|
83
|
+
normal_check_interval_s: int = field(default_factory=lambda: NORMAL_CHECK_INTERVAL_S)
|
|
84
|
+
|
|
85
|
+
# State files (from centralized config)
|
|
86
|
+
state_file: Path = field(default_factory=lambda: CSR_CONFIG_DIR / "batch-watcher.json")
|
|
87
|
+
queue_state_file: Path = field(default_factory=lambda: CSR_BATCH_QUEUE_DIR / "queue-state.json")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class BatchQueue:
|
|
91
|
+
"""Manages the queue of conversations waiting for batch processing."""
|
|
92
|
+
|
|
93
|
+
def __init__(self, config: BatchWatcherConfig):
|
|
94
|
+
self.config = config
|
|
95
|
+
self.config.queue_dir.mkdir(parents=True, exist_ok=True)
|
|
96
|
+
self.queue_state_file = config.queue_state_file
|
|
97
|
+
|
|
98
|
+
# Load existing queue and last batch time
|
|
99
|
+
queue_state = self._load_queue_state()
|
|
100
|
+
self.queue = queue_state.get('queued_files', [])
|
|
101
|
+
|
|
102
|
+
# Restore last_batch_time from state, or use current time if not available
|
|
103
|
+
last_batch_str = queue_state.get('last_batch_time')
|
|
104
|
+
if last_batch_str:
|
|
105
|
+
try:
|
|
106
|
+
self.last_batch_time = datetime.fromisoformat(last_batch_str)
|
|
107
|
+
except (ValueError, TypeError):
|
|
108
|
+
self.last_batch_time = datetime.now()
|
|
109
|
+
else:
|
|
110
|
+
self.last_batch_time = datetime.now()
|
|
111
|
+
|
|
112
|
+
def _load_queue_state(self) -> Dict:
|
|
113
|
+
"""Load queue state from file with file locking."""
|
|
114
|
+
if not self.queue_state_file.exists():
|
|
115
|
+
return {"queued_files": [], "last_batch_time": None}
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
with open(self.queue_state_file, 'r', encoding='utf-8') as f:
|
|
119
|
+
# Acquire shared lock for reading
|
|
120
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_SH)
|
|
121
|
+
try:
|
|
122
|
+
data = json.load(f)
|
|
123
|
+
return data
|
|
124
|
+
finally:
|
|
125
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
|
|
126
|
+
except FileNotFoundError:
|
|
127
|
+
return {"queued_files": [], "last_batch_time": None}
|
|
128
|
+
except Exception as e:
|
|
129
|
+
logger.error(f"Error loading queue: {e}")
|
|
130
|
+
return {"queued_files": [], "last_batch_time": None}
|
|
131
|
+
|
|
132
|
+
def _save_queue(self):
|
|
133
|
+
"""Save queue state to file with exclusive file locking."""
|
|
134
|
+
self.queue_state_file.parent.mkdir(parents=True, exist_ok=True)
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
# Use atomic write: write to temp file, then rename
|
|
138
|
+
temp_file = self.queue_state_file.with_suffix('.tmp')
|
|
139
|
+
|
|
140
|
+
with open(temp_file, 'w', encoding='utf-8') as f:
|
|
141
|
+
# Acquire exclusive lock for writing
|
|
142
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
|
|
143
|
+
try:
|
|
144
|
+
json.dump({
|
|
145
|
+
"queued_files": self.queue,
|
|
146
|
+
"last_batch_time": self.last_batch_time.isoformat(),
|
|
147
|
+
"queue_size": len(self.queue)
|
|
148
|
+
}, f, indent=2)
|
|
149
|
+
f.flush()
|
|
150
|
+
os.fsync(f.fileno())
|
|
151
|
+
finally:
|
|
152
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
|
|
153
|
+
|
|
154
|
+
# Atomic rename (POSIX guarantees atomicity)
|
|
155
|
+
temp_file.replace(self.queue_state_file)
|
|
156
|
+
|
|
157
|
+
except Exception as e:
|
|
158
|
+
logger.error(f"Error saving queue: {e}")
|
|
159
|
+
# Clean up temp file if it exists
|
|
160
|
+
if temp_file.exists():
|
|
161
|
+
temp_file.unlink()
|
|
162
|
+
|
|
163
|
+
def add(self, file_path: str, project: str):
|
|
164
|
+
"""Add a conversation to the queue."""
|
|
165
|
+
entry = {
|
|
166
|
+
"file_path": file_path,
|
|
167
|
+
"project": project,
|
|
168
|
+
"queued_at": datetime.now().isoformat()
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
# Avoid duplicates
|
|
172
|
+
if not any(q["file_path"] == file_path for q in self.queue):
|
|
173
|
+
self.queue.append(entry)
|
|
174
|
+
self._save_queue()
|
|
175
|
+
logger.info(f"📝 Queued for batch: {file_path} (queue size: {len(self.queue)})")
|
|
176
|
+
|
|
177
|
+
def should_trigger_batch(self) -> bool:
|
|
178
|
+
"""Check if batch should be triggered."""
|
|
179
|
+
# Size trigger
|
|
180
|
+
if len(self.queue) >= self.config.batch_size_trigger:
|
|
181
|
+
logger.info(f"🎯 Batch size trigger: {len(self.queue)} >= {self.config.batch_size_trigger}")
|
|
182
|
+
return True
|
|
183
|
+
|
|
184
|
+
# Time trigger (and queue not empty)
|
|
185
|
+
if len(self.queue) > 0:
|
|
186
|
+
time_since_last = datetime.now() - self.last_batch_time
|
|
187
|
+
if time_since_last > timedelta(minutes=self.config.batch_time_trigger_minutes):
|
|
188
|
+
logger.info(f"⏰ Batch time trigger: {time_since_last.total_seconds()/60:.1f} min >= {self.config.batch_time_trigger_minutes} min")
|
|
189
|
+
return True
|
|
190
|
+
|
|
191
|
+
return False
|
|
192
|
+
|
|
193
|
+
def get_batch(self) -> List[Dict]:
|
|
194
|
+
"""Get all queued files and clear the queue."""
|
|
195
|
+
batch = self.queue.copy()
|
|
196
|
+
self.queue = []
|
|
197
|
+
self.last_batch_time = datetime.now()
|
|
198
|
+
self._save_queue()
|
|
199
|
+
return batch
|
|
200
|
+
|
|
201
|
+
def size(self) -> int:
|
|
202
|
+
"""Get current queue size."""
|
|
203
|
+
return len(self.queue)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class BatchWatcher:
|
|
207
|
+
"""Enhanced watcher that integrates with Batch API."""
|
|
208
|
+
|
|
209
|
+
def __init__(self, config: BatchWatcherConfig):
|
|
210
|
+
self.config = config
|
|
211
|
+
self.state_manager = UnifiedStateManager(str(config.state_file))
|
|
212
|
+
self.batch_queue = BatchQueue(config)
|
|
213
|
+
self.batch_monitor = BatchMonitor()
|
|
214
|
+
|
|
215
|
+
logger.info(f"🚀 Batch Watcher initialized")
|
|
216
|
+
logger.info(f" Watching: {config.logs_dir}")
|
|
217
|
+
logger.info(f" Batch triggers: {config.batch_size_trigger} files OR {config.batch_time_trigger_minutes} min")
|
|
218
|
+
logger.info(f" Queue state: {config.queue_state_file}")
|
|
219
|
+
|
|
220
|
+
def _discover_files(self) -> List[tuple]:
|
|
221
|
+
"""Discover all JSONL conversation files with priority."""
|
|
222
|
+
files = []
|
|
223
|
+
now = datetime.now()
|
|
224
|
+
|
|
225
|
+
for project_dir in self.config.logs_dir.iterdir():
|
|
226
|
+
if not project_dir.is_dir():
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
project_name = project_dir.name
|
|
230
|
+
|
|
231
|
+
# Find JSONL files in project
|
|
232
|
+
jsonl_files = list(project_dir.glob("*.jsonl"))
|
|
233
|
+
|
|
234
|
+
for file_path in jsonl_files:
|
|
235
|
+
try:
|
|
236
|
+
stat = file_path.stat()
|
|
237
|
+
mtime = datetime.fromtimestamp(stat.st_mtime)
|
|
238
|
+
age = now - mtime
|
|
239
|
+
|
|
240
|
+
# Calculate priority
|
|
241
|
+
if age < timedelta(minutes=self.config.hot_window_minutes):
|
|
242
|
+
priority = "HOT"
|
|
243
|
+
elif age < timedelta(hours=self.config.warm_window_hours):
|
|
244
|
+
priority = "WARM"
|
|
245
|
+
else:
|
|
246
|
+
priority = "COLD"
|
|
247
|
+
|
|
248
|
+
files.append((str(file_path), project_name, priority, age))
|
|
249
|
+
|
|
250
|
+
except Exception as e:
|
|
251
|
+
logger.warning(f"Error checking {file_path}: {e}")
|
|
252
|
+
|
|
253
|
+
# Sort by priority (HOT first, then WARM, then COLD by age)
|
|
254
|
+
priority_order = {"HOT": 0, "WARM": 1, "COLD": 2}
|
|
255
|
+
files.sort(key=lambda x: (priority_order[x[2]], x[3]))
|
|
256
|
+
|
|
257
|
+
return files
|
|
258
|
+
|
|
259
|
+
def _process_file(self, file_path: str, project: str, priority: str) -> bool:
|
|
260
|
+
"""Process a single file - add to queue or trigger batch."""
|
|
261
|
+
# Check if already processed
|
|
262
|
+
imported_files = self.state_manager.get_imported_files()
|
|
263
|
+
normalized_path = self.state_manager.normalize_path(file_path)
|
|
264
|
+
|
|
265
|
+
if normalized_path in imported_files:
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
# HOT files: Add to queue immediately
|
|
269
|
+
if priority == "HOT":
|
|
270
|
+
logger.info(f"🔥 HOT file detected: {Path(file_path).name}")
|
|
271
|
+
self.batch_queue.add(file_path, project)
|
|
272
|
+
|
|
273
|
+
# Mark as queued (not processed yet, but in queue)
|
|
274
|
+
# Don't mark as processed until batch completes
|
|
275
|
+
return True
|
|
276
|
+
|
|
277
|
+
# WARM/COLD files: Add to queue
|
|
278
|
+
else:
|
|
279
|
+
self.batch_queue.add(file_path, project)
|
|
280
|
+
return True
|
|
281
|
+
|
|
282
|
+
def _trigger_batch(self):
|
|
283
|
+
"""Trigger batch narrative generation."""
|
|
284
|
+
batch_files = self.batch_queue.get_batch()
|
|
285
|
+
|
|
286
|
+
if not batch_files:
|
|
287
|
+
return
|
|
288
|
+
|
|
289
|
+
logger.info(f"\n{'='*60}")
|
|
290
|
+
logger.info(f"🚀 TRIGGERING BATCH NARRATIVE GENERATION")
|
|
291
|
+
logger.info(f" Files: {len(batch_files)}")
|
|
292
|
+
logger.info(f"{'='*60}\n")
|
|
293
|
+
|
|
294
|
+
try:
|
|
295
|
+
import subprocess
|
|
296
|
+
|
|
297
|
+
# Run batch import script with configurable timeout
|
|
298
|
+
result = subprocess.run(
|
|
299
|
+
[sys.executable, str(BATCH_IMPORT_SCRIPT)],
|
|
300
|
+
capture_output=True,
|
|
301
|
+
text=True,
|
|
302
|
+
timeout=SUBPROCESS_TIMEOUT_SECONDS, # Default: 1800s (30 min)
|
|
303
|
+
check=True
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
logger.info("\n✅ Batch triggered successfully")
|
|
307
|
+
logger.info(" Output:\n%s", result.stdout)
|
|
308
|
+
|
|
309
|
+
# Mark files as processed
|
|
310
|
+
for entry in batch_files:
|
|
311
|
+
self.state_manager.add_imported_file(
|
|
312
|
+
file_path=entry["file_path"],
|
|
313
|
+
chunks=0, # Will be updated by batch import
|
|
314
|
+
metadata={"batch_queued": True}
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
except subprocess.CalledProcessError as cpe:
|
|
318
|
+
logger.error("❌ Batch import failed (rc=%s)", cpe.returncode)
|
|
319
|
+
logger.error(" Stdout: %s", cpe.stdout)
|
|
320
|
+
logger.error(" Stderr: %s", cpe.stderr)
|
|
321
|
+
|
|
322
|
+
# Re-queue failed files
|
|
323
|
+
for entry in batch_files:
|
|
324
|
+
self.batch_queue.add(entry["file_path"], entry["project"])
|
|
325
|
+
|
|
326
|
+
except Exception as e:
|
|
327
|
+
logger.error("❌ Error triggering batch: %s", e, exc_info=True)
|
|
328
|
+
|
|
329
|
+
# Re-queue failed files
|
|
330
|
+
for entry in batch_files:
|
|
331
|
+
self.batch_queue.add(entry["file_path"], entry["project"])
|
|
332
|
+
|
|
333
|
+
def _hot_cycle(self):
|
|
334
|
+
"""Fast cycle to check for HOT files only."""
|
|
335
|
+
files = self._discover_files()
|
|
336
|
+
|
|
337
|
+
hot_files = [f for f in files if f[2] == "HOT"]
|
|
338
|
+
|
|
339
|
+
if hot_files:
|
|
340
|
+
logger.info(f"🔥 {len(hot_files)} HOT files detected")
|
|
341
|
+
|
|
342
|
+
for file_path, project, priority, age in hot_files:
|
|
343
|
+
self._process_file(file_path, project, priority)
|
|
344
|
+
|
|
345
|
+
# Check if batch should trigger
|
|
346
|
+
if self.batch_queue.should_trigger_batch():
|
|
347
|
+
self._trigger_batch()
|
|
348
|
+
|
|
349
|
+
def _normal_cycle(self):
|
|
350
|
+
"""Normal cycle to process all files."""
|
|
351
|
+
files = self._discover_files()
|
|
352
|
+
|
|
353
|
+
logger.info(f"\n📊 Scan results:")
|
|
354
|
+
hot = sum(1 for f in files if f[2] == "HOT")
|
|
355
|
+
warm = sum(1 for f in files if f[2] == "WARM")
|
|
356
|
+
cold = sum(1 for f in files if f[2] == "COLD")
|
|
357
|
+
logger.info(f" 🔥 HOT: {hot}, 🌤️ WARM: {warm}, ❄️ COLD: {cold}")
|
|
358
|
+
logger.info(f" 📝 Queue size: {self.batch_queue.size()}")
|
|
359
|
+
|
|
360
|
+
# Process files by priority
|
|
361
|
+
processed = 0
|
|
362
|
+
|
|
363
|
+
# HOT files (all of them)
|
|
364
|
+
for file_path, project, priority, age in files:
|
|
365
|
+
if priority == "HOT":
|
|
366
|
+
if self._process_file(file_path, project, priority):
|
|
367
|
+
processed += 1
|
|
368
|
+
|
|
369
|
+
# WARM files (all of them)
|
|
370
|
+
for file_path, project, priority, age in files:
|
|
371
|
+
if priority == "WARM":
|
|
372
|
+
if self._process_file(file_path, project, priority):
|
|
373
|
+
processed += 1
|
|
374
|
+
|
|
375
|
+
# COLD files (limited)
|
|
376
|
+
cold_processed = 0
|
|
377
|
+
for file_path, project, priority, age in files:
|
|
378
|
+
if priority == "COLD" and cold_processed < self.config.max_cold_files:
|
|
379
|
+
if self._process_file(file_path, project, priority):
|
|
380
|
+
processed += 1
|
|
381
|
+
cold_processed += 1
|
|
382
|
+
|
|
383
|
+
if processed > 0:
|
|
384
|
+
logger.info(f" ✅ Queued {processed} new files")
|
|
385
|
+
|
|
386
|
+
# Check if batch should trigger
|
|
387
|
+
if self.batch_queue.should_trigger_batch():
|
|
388
|
+
self._trigger_batch()
|
|
389
|
+
|
|
390
|
+
def run_once(self):
|
|
391
|
+
"""Run one monitoring cycle."""
|
|
392
|
+
self._normal_cycle()
|
|
393
|
+
|
|
394
|
+
# Also check batch monitor
|
|
395
|
+
self.batch_monitor.run_once()
|
|
396
|
+
|
|
397
|
+
def run_forever(self):
|
|
398
|
+
"""Run watcher forever with hot/normal cycles."""
|
|
399
|
+
logger.info(f"\n{'='*60}")
|
|
400
|
+
logger.info(f"🚀 BATCH WATCHER STARTED")
|
|
401
|
+
logger.info(f"{'='*60}")
|
|
402
|
+
logger.info(f"HOT check: every {self.config.hot_check_interval_s}s")
|
|
403
|
+
logger.info(f"Normal cycle: every {self.config.normal_check_interval_s}s")
|
|
404
|
+
logger.info(f"{'='*60}\n")
|
|
405
|
+
|
|
406
|
+
last_normal_cycle = time.time()
|
|
407
|
+
|
|
408
|
+
while True:
|
|
409
|
+
try:
|
|
410
|
+
# HOT cycle (every 2 seconds)
|
|
411
|
+
self._hot_cycle()
|
|
412
|
+
|
|
413
|
+
# Normal cycle (every 60 seconds)
|
|
414
|
+
now = time.time()
|
|
415
|
+
if now - last_normal_cycle >= self.config.normal_check_interval_s:
|
|
416
|
+
self._normal_cycle()
|
|
417
|
+
last_normal_cycle = now
|
|
418
|
+
|
|
419
|
+
# Sleep until next HOT check
|
|
420
|
+
time.sleep(self.config.hot_check_interval_s)
|
|
421
|
+
|
|
422
|
+
except KeyboardInterrupt:
|
|
423
|
+
logger.info("\n👋 Batch watcher stopped")
|
|
424
|
+
break
|
|
425
|
+
except Exception as e:
|
|
426
|
+
logger.error(f"Error in watcher loop: {e}", exc_info=True)
|
|
427
|
+
time.sleep(self.config.hot_check_interval_s)
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def main():
|
|
431
|
+
"""Main entry point."""
|
|
432
|
+
import argparse
|
|
433
|
+
|
|
434
|
+
parser = argparse.ArgumentParser(description="Batch-Aware Watcher Service")
|
|
435
|
+
parser.add_argument("--once", action="store_true", help="Run once and exit")
|
|
436
|
+
parser.add_argument("--batch-size", type=int, default=10, help="Batch size trigger")
|
|
437
|
+
parser.add_argument("--batch-time", type=int, default=30, help="Batch time trigger (minutes)")
|
|
438
|
+
|
|
439
|
+
args = parser.parse_args()
|
|
440
|
+
|
|
441
|
+
config = BatchWatcherConfig(
|
|
442
|
+
batch_size_trigger=args.batch_size,
|
|
443
|
+
batch_time_trigger_minutes=args.batch_time
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
watcher = BatchWatcher(config)
|
|
447
|
+
|
|
448
|
+
if args.once:
|
|
449
|
+
watcher.run_once()
|
|
450
|
+
else:
|
|
451
|
+
watcher.run_forever()
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
if __name__ == "__main__":
|
|
455
|
+
main()
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Centralized configuration for Claude Self-Reflect runtime.
|
|
4
|
+
All paths and environment variables are defined here.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_env_path(env_var: str, default: str) -> Path:
|
|
13
|
+
"""Get a path from environment variable, expanding ~ and making absolute."""
|
|
14
|
+
path_str = os.getenv(env_var, default)
|
|
15
|
+
return Path(path_str).expanduser().resolve()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Core directories
|
|
19
|
+
CSR_HOME = get_env_path("CSR_HOME", "~/.claude-self-reflect")
|
|
20
|
+
CSR_CONFIG_DIR = get_env_path("CSR_CONFIG_DIR", f"{CSR_HOME}/config")
|
|
21
|
+
CSR_BATCH_STATE_DIR = get_env_path("CSR_BATCH_STATE_DIR", f"{CSR_HOME}/batch_state")
|
|
22
|
+
CSR_BATCH_QUEUE_DIR = get_env_path("CSR_BATCH_QUEUE_DIR", f"{CSR_HOME}/batch_queue")
|
|
23
|
+
|
|
24
|
+
# Claude projects directory
|
|
25
|
+
CLAUDE_PROJECTS_DIR = get_env_path("CLAUDE_PROJECTS_DIR", "~/.claude/projects")
|
|
26
|
+
|
|
27
|
+
# Qdrant configuration
|
|
28
|
+
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
|
|
29
|
+
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "")
|
|
30
|
+
|
|
31
|
+
# Batch watcher configuration
|
|
32
|
+
BATCH_SIZE_TRIGGER = int(os.getenv("BATCH_SIZE_TRIGGER", "10"))
|
|
33
|
+
BATCH_TIME_TRIGGER_MINUTES = int(os.getenv("BATCH_TIME_TRIGGER_MINUTES", "30"))
|
|
34
|
+
|
|
35
|
+
# Watcher timing
|
|
36
|
+
HOT_WINDOW_MINUTES = int(os.getenv("HOT_WINDOW_MINUTES", "5"))
|
|
37
|
+
WARM_WINDOW_HOURS = int(os.getenv("WARM_WINDOW_HOURS", "24"))
|
|
38
|
+
MAX_COLD_FILES = int(os.getenv("MAX_COLD_FILES", "5"))
|
|
39
|
+
|
|
40
|
+
HOT_CHECK_INTERVAL_S = int(os.getenv("HOT_CHECK_INTERVAL_S", "2"))
|
|
41
|
+
NORMAL_CHECK_INTERVAL_S = int(os.getenv("NORMAL_CHECK_INTERVAL_S", "60"))
|
|
42
|
+
|
|
43
|
+
# Subprocess timeout (30 minutes for batch operations)
|
|
44
|
+
SUBPROCESS_TIMEOUT_SECONDS = int(os.getenv("SUBPROCESS_TIMEOUT_SECONDS", "1800"))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def ensure_directories():
|
|
48
|
+
"""Ensure all required directories exist."""
|
|
49
|
+
directories = [
|
|
50
|
+
CSR_HOME,
|
|
51
|
+
CSR_CONFIG_DIR,
|
|
52
|
+
CSR_BATCH_STATE_DIR,
|
|
53
|
+
CSR_BATCH_QUEUE_DIR,
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
for directory in directories:
|
|
57
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# Create directories on import
|
|
61
|
+
ensure_directories()
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Qdrant connection utilities with retry logic.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Optional
|
|
8
|
+
from qdrant_client import QdrantClient
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def connect_to_qdrant_with_retry(
|
|
14
|
+
url: str,
|
|
15
|
+
api_key: Optional[str] = None,
|
|
16
|
+
max_retries: int = 5,
|
|
17
|
+
initial_delay: float = 1.0
|
|
18
|
+
) -> QdrantClient:
|
|
19
|
+
"""
|
|
20
|
+
Connect to Qdrant with exponential backoff retry logic.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
url: Qdrant URL
|
|
24
|
+
api_key: Optional API key for authentication
|
|
25
|
+
max_retries: Maximum number of retry attempts (default: 5)
|
|
26
|
+
initial_delay: Initial delay in seconds, doubles each retry (default: 1.0)
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Connected QdrantClient instance
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
Exception: If all retries fail
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
>>> client = connect_to_qdrant_with_retry(
|
|
36
|
+
... url="http://localhost:6333",
|
|
37
|
+
... api_key="optional-api-key"
|
|
38
|
+
... )
|
|
39
|
+
✅ Connected to Qdrant at http://localhost:6333
|
|
40
|
+
"""
|
|
41
|
+
delay = initial_delay
|
|
42
|
+
|
|
43
|
+
for attempt in range(max_retries):
|
|
44
|
+
try:
|
|
45
|
+
# Initialize client
|
|
46
|
+
if api_key:
|
|
47
|
+
client = QdrantClient(url=url, api_key=api_key)
|
|
48
|
+
else:
|
|
49
|
+
client = QdrantClient(url=url)
|
|
50
|
+
|
|
51
|
+
# Test connection by fetching collections
|
|
52
|
+
client.get_collections()
|
|
53
|
+
|
|
54
|
+
except Exception as e:
|
|
55
|
+
if attempt < max_retries - 1:
|
|
56
|
+
logger.warning(
|
|
57
|
+
f"⚠️ Qdrant connection attempt {attempt + 1}/{max_retries} failed: {e}"
|
|
58
|
+
)
|
|
59
|
+
logger.info(f" Retrying in {delay}s...")
|
|
60
|
+
time.sleep(delay)
|
|
61
|
+
delay *= 2 # Exponential backoff
|
|
62
|
+
else:
|
|
63
|
+
logger.exception(
|
|
64
|
+
f"Failed to connect to Qdrant after {max_retries} attempts"
|
|
65
|
+
)
|
|
66
|
+
raise
|
|
67
|
+
else:
|
|
68
|
+
# Connection successful
|
|
69
|
+
logger.info(f"✅ Connected to Qdrant at {url}")
|
|
70
|
+
return client
|
|
71
|
+
|
|
72
|
+
# Should never reach here due to raise in except block
|
|
73
|
+
raise Exception(f"Failed to connect to Qdrant at {url} after {max_retries} attempts")
|