claude-self-reflect 6.0.5 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,455 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Batch-Aware Watcher Service - Enhanced version of streaming-watcher.py
4
+ Integrates with Anthropic Batch API for narrative generation.
5
+
6
+ This service:
7
+ 1. Watches for new conversation JSONL files
8
+ 2. Accumulates conversations in a batch queue
9
+ 3. Triggers batch narrative generation every 10 files OR every 30 minutes
10
+ 4. Registers batches with batch_monitor for automated evaluation
11
+ 5. Maintains hot/warm/cold priority system for responsiveness
12
+ """
13
+
14
+ import os
15
+ import sys
16
+ import time
17
+ import json
18
+ import logging
19
+ import fcntl
20
+ from pathlib import Path
21
+ from typing import Dict, List, Set
22
+ from datetime import datetime, timedelta
23
+ from dataclasses import dataclass, field
24
+ from collections import deque
25
+
26
+ # Add project root to path
27
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
28
+
29
+ from dotenv import load_dotenv
30
+ load_dotenv()
31
+
32
+ # Import existing infrastructure
33
+ sys.path.insert(0, str(Path(__file__).parent))
34
+ from unified_state_manager import UnifiedStateManager
35
+ from utils import normalize_project_name
36
+
37
+ # Import batch monitor and centralized config
38
+ sys.path.insert(0, str(Path(__file__).parent))
39
+ from batch_monitor import BatchMonitor
40
+ from config import (
41
+ CSR_CONFIG_DIR,
42
+ CSR_BATCH_QUEUE_DIR,
43
+ CLAUDE_PROJECTS_DIR,
44
+ BATCH_SIZE_TRIGGER,
45
+ BATCH_TIME_TRIGGER_MINUTES,
46
+ HOT_WINDOW_MINUTES,
47
+ WARM_WINDOW_HOURS,
48
+ MAX_COLD_FILES,
49
+ HOT_CHECK_INTERVAL_S,
50
+ NORMAL_CHECK_INTERVAL_S,
51
+ SUBPROCESS_TIMEOUT_SECONDS
52
+ )
53
+
54
+ # Path to batch scripts
55
+ BATCH_IMPORT_SCRIPT = Path(__file__).parent.parent.parent / "docs" / "design" / "batch_import_all_projects.py"
56
+
57
+ logging.basicConfig(
58
+ level=logging.INFO,
59
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
60
+ )
61
+ logger = logging.getLogger(__name__)
62
+
63
+
64
+ @dataclass
65
+ class BatchWatcherConfig:
66
+ """Configuration for batch-aware watcher."""
67
+
68
+ # Directories (from centralized config)
69
+ logs_dir: Path = field(default_factory=lambda: CLAUDE_PROJECTS_DIR)
70
+ queue_dir: Path = field(default_factory=lambda: CSR_BATCH_QUEUE_DIR)
71
+
72
+ # Batch triggers (from centralized config)
73
+ batch_size_trigger: int = field(default_factory=lambda: BATCH_SIZE_TRIGGER)
74
+ batch_time_trigger_minutes: int = field(default_factory=lambda: BATCH_TIME_TRIGGER_MINUTES)
75
+
76
+ # Priority thresholds (from centralized config)
77
+ hot_window_minutes: int = field(default_factory=lambda: HOT_WINDOW_MINUTES)
78
+ warm_window_hours: int = field(default_factory=lambda: WARM_WINDOW_HOURS)
79
+ max_cold_files: int = field(default_factory=lambda: MAX_COLD_FILES)
80
+
81
+ # Timing (from centralized config)
82
+ hot_check_interval_s: int = field(default_factory=lambda: HOT_CHECK_INTERVAL_S)
83
+ normal_check_interval_s: int = field(default_factory=lambda: NORMAL_CHECK_INTERVAL_S)
84
+
85
+ # State files (from centralized config)
86
+ state_file: Path = field(default_factory=lambda: CSR_CONFIG_DIR / "batch-watcher.json")
87
+ queue_state_file: Path = field(default_factory=lambda: CSR_BATCH_QUEUE_DIR / "queue-state.json")
88
+
89
+
90
+ class BatchQueue:
91
+ """Manages the queue of conversations waiting for batch processing."""
92
+
93
+ def __init__(self, config: BatchWatcherConfig):
94
+ self.config = config
95
+ self.config.queue_dir.mkdir(parents=True, exist_ok=True)
96
+ self.queue_state_file = config.queue_state_file
97
+
98
+ # Load existing queue and last batch time
99
+ queue_state = self._load_queue_state()
100
+ self.queue = queue_state.get('queued_files', [])
101
+
102
+ # Restore last_batch_time from state, or use current time if not available
103
+ last_batch_str = queue_state.get('last_batch_time')
104
+ if last_batch_str:
105
+ try:
106
+ self.last_batch_time = datetime.fromisoformat(last_batch_str)
107
+ except (ValueError, TypeError):
108
+ self.last_batch_time = datetime.now()
109
+ else:
110
+ self.last_batch_time = datetime.now()
111
+
112
+ def _load_queue_state(self) -> Dict:
113
+ """Load queue state from file with file locking."""
114
+ if not self.queue_state_file.exists():
115
+ return {"queued_files": [], "last_batch_time": None}
116
+
117
+ try:
118
+ with open(self.queue_state_file, 'r', encoding='utf-8') as f:
119
+ # Acquire shared lock for reading
120
+ fcntl.flock(f.fileno(), fcntl.LOCK_SH)
121
+ try:
122
+ data = json.load(f)
123
+ return data
124
+ finally:
125
+ fcntl.flock(f.fileno(), fcntl.LOCK_UN)
126
+ except FileNotFoundError:
127
+ return {"queued_files": [], "last_batch_time": None}
128
+ except Exception as e:
129
+ logger.error(f"Error loading queue: {e}")
130
+ return {"queued_files": [], "last_batch_time": None}
131
+
132
+ def _save_queue(self):
133
+ """Save queue state to file with exclusive file locking."""
134
+ self.queue_state_file.parent.mkdir(parents=True, exist_ok=True)
135
+
136
+ try:
137
+ # Use atomic write: write to temp file, then rename
138
+ temp_file = self.queue_state_file.with_suffix('.tmp')
139
+
140
+ with open(temp_file, 'w', encoding='utf-8') as f:
141
+ # Acquire exclusive lock for writing
142
+ fcntl.flock(f.fileno(), fcntl.LOCK_EX)
143
+ try:
144
+ json.dump({
145
+ "queued_files": self.queue,
146
+ "last_batch_time": self.last_batch_time.isoformat(),
147
+ "queue_size": len(self.queue)
148
+ }, f, indent=2)
149
+ f.flush()
150
+ os.fsync(f.fileno())
151
+ finally:
152
+ fcntl.flock(f.fileno(), fcntl.LOCK_UN)
153
+
154
+ # Atomic rename (POSIX guarantees atomicity)
155
+ temp_file.replace(self.queue_state_file)
156
+
157
+ except Exception as e:
158
+ logger.error(f"Error saving queue: {e}")
159
+ # Clean up temp file if it exists
160
+ if temp_file.exists():
161
+ temp_file.unlink()
162
+
163
+ def add(self, file_path: str, project: str):
164
+ """Add a conversation to the queue."""
165
+ entry = {
166
+ "file_path": file_path,
167
+ "project": project,
168
+ "queued_at": datetime.now().isoformat()
169
+ }
170
+
171
+ # Avoid duplicates
172
+ if not any(q["file_path"] == file_path for q in self.queue):
173
+ self.queue.append(entry)
174
+ self._save_queue()
175
+ logger.info(f"📝 Queued for batch: {file_path} (queue size: {len(self.queue)})")
176
+
177
+ def should_trigger_batch(self) -> bool:
178
+ """Check if batch should be triggered."""
179
+ # Size trigger
180
+ if len(self.queue) >= self.config.batch_size_trigger:
181
+ logger.info(f"🎯 Batch size trigger: {len(self.queue)} >= {self.config.batch_size_trigger}")
182
+ return True
183
+
184
+ # Time trigger (and queue not empty)
185
+ if len(self.queue) > 0:
186
+ time_since_last = datetime.now() - self.last_batch_time
187
+ if time_since_last > timedelta(minutes=self.config.batch_time_trigger_minutes):
188
+ logger.info(f"⏰ Batch time trigger: {time_since_last.total_seconds()/60:.1f} min >= {self.config.batch_time_trigger_minutes} min")
189
+ return True
190
+
191
+ return False
192
+
193
+ def get_batch(self) -> List[Dict]:
194
+ """Get all queued files and clear the queue."""
195
+ batch = self.queue.copy()
196
+ self.queue = []
197
+ self.last_batch_time = datetime.now()
198
+ self._save_queue()
199
+ return batch
200
+
201
+ def size(self) -> int:
202
+ """Get current queue size."""
203
+ return len(self.queue)
204
+
205
+
206
+ class BatchWatcher:
207
+ """Enhanced watcher that integrates with Batch API."""
208
+
209
+ def __init__(self, config: BatchWatcherConfig):
210
+ self.config = config
211
+ self.state_manager = UnifiedStateManager(str(config.state_file))
212
+ self.batch_queue = BatchQueue(config)
213
+ self.batch_monitor = BatchMonitor()
214
+
215
+ logger.info(f"🚀 Batch Watcher initialized")
216
+ logger.info(f" Watching: {config.logs_dir}")
217
+ logger.info(f" Batch triggers: {config.batch_size_trigger} files OR {config.batch_time_trigger_minutes} min")
218
+ logger.info(f" Queue state: {config.queue_state_file}")
219
+
220
+ def _discover_files(self) -> List[tuple]:
221
+ """Discover all JSONL conversation files with priority."""
222
+ files = []
223
+ now = datetime.now()
224
+
225
+ for project_dir in self.config.logs_dir.iterdir():
226
+ if not project_dir.is_dir():
227
+ continue
228
+
229
+ project_name = project_dir.name
230
+
231
+ # Find JSONL files in project
232
+ jsonl_files = list(project_dir.glob("*.jsonl"))
233
+
234
+ for file_path in jsonl_files:
235
+ try:
236
+ stat = file_path.stat()
237
+ mtime = datetime.fromtimestamp(stat.st_mtime)
238
+ age = now - mtime
239
+
240
+ # Calculate priority
241
+ if age < timedelta(minutes=self.config.hot_window_minutes):
242
+ priority = "HOT"
243
+ elif age < timedelta(hours=self.config.warm_window_hours):
244
+ priority = "WARM"
245
+ else:
246
+ priority = "COLD"
247
+
248
+ files.append((str(file_path), project_name, priority, age))
249
+
250
+ except Exception as e:
251
+ logger.warning(f"Error checking {file_path}: {e}")
252
+
253
+ # Sort by priority (HOT first, then WARM, then COLD by age)
254
+ priority_order = {"HOT": 0, "WARM": 1, "COLD": 2}
255
+ files.sort(key=lambda x: (priority_order[x[2]], x[3]))
256
+
257
+ return files
258
+
259
+ def _process_file(self, file_path: str, project: str, priority: str) -> bool:
260
+ """Process a single file - add to queue or trigger batch."""
261
+ # Check if already processed
262
+ imported_files = self.state_manager.get_imported_files()
263
+ normalized_path = self.state_manager.normalize_path(file_path)
264
+
265
+ if normalized_path in imported_files:
266
+ return False
267
+
268
+ # HOT files: Add to queue immediately
269
+ if priority == "HOT":
270
+ logger.info(f"🔥 HOT file detected: {Path(file_path).name}")
271
+ self.batch_queue.add(file_path, project)
272
+
273
+ # Mark as queued (not processed yet, but in queue)
274
+ # Don't mark as processed until batch completes
275
+ return True
276
+
277
+ # WARM/COLD files: Add to queue
278
+ else:
279
+ self.batch_queue.add(file_path, project)
280
+ return True
281
+
282
+ def _trigger_batch(self):
283
+ """Trigger batch narrative generation."""
284
+ batch_files = self.batch_queue.get_batch()
285
+
286
+ if not batch_files:
287
+ return
288
+
289
+ logger.info(f"\n{'='*60}")
290
+ logger.info(f"🚀 TRIGGERING BATCH NARRATIVE GENERATION")
291
+ logger.info(f" Files: {len(batch_files)}")
292
+ logger.info(f"{'='*60}\n")
293
+
294
+ try:
295
+ import subprocess
296
+
297
+ # Run batch import script with configurable timeout
298
+ result = subprocess.run(
299
+ [sys.executable, str(BATCH_IMPORT_SCRIPT)],
300
+ capture_output=True,
301
+ text=True,
302
+ timeout=SUBPROCESS_TIMEOUT_SECONDS, # Default: 1800s (30 min)
303
+ check=True
304
+ )
305
+
306
+ logger.info("\n✅ Batch triggered successfully")
307
+ logger.info(" Output:\n%s", result.stdout)
308
+
309
+ # Mark files as processed
310
+ for entry in batch_files:
311
+ self.state_manager.add_imported_file(
312
+ file_path=entry["file_path"],
313
+ chunks=0, # Will be updated by batch import
314
+ metadata={"batch_queued": True}
315
+ )
316
+
317
+ except subprocess.CalledProcessError as cpe:
318
+ logger.error("❌ Batch import failed (rc=%s)", cpe.returncode)
319
+ logger.error(" Stdout: %s", cpe.stdout)
320
+ logger.error(" Stderr: %s", cpe.stderr)
321
+
322
+ # Re-queue failed files
323
+ for entry in batch_files:
324
+ self.batch_queue.add(entry["file_path"], entry["project"])
325
+
326
+ except Exception as e:
327
+ logger.error("❌ Error triggering batch: %s", e, exc_info=True)
328
+
329
+ # Re-queue failed files
330
+ for entry in batch_files:
331
+ self.batch_queue.add(entry["file_path"], entry["project"])
332
+
333
+ def _hot_cycle(self):
334
+ """Fast cycle to check for HOT files only."""
335
+ files = self._discover_files()
336
+
337
+ hot_files = [f for f in files if f[2] == "HOT"]
338
+
339
+ if hot_files:
340
+ logger.info(f"🔥 {len(hot_files)} HOT files detected")
341
+
342
+ for file_path, project, priority, age in hot_files:
343
+ self._process_file(file_path, project, priority)
344
+
345
+ # Check if batch should trigger
346
+ if self.batch_queue.should_trigger_batch():
347
+ self._trigger_batch()
348
+
349
+ def _normal_cycle(self):
350
+ """Normal cycle to process all files."""
351
+ files = self._discover_files()
352
+
353
+ logger.info(f"\n📊 Scan results:")
354
+ hot = sum(1 for f in files if f[2] == "HOT")
355
+ warm = sum(1 for f in files if f[2] == "WARM")
356
+ cold = sum(1 for f in files if f[2] == "COLD")
357
+ logger.info(f" 🔥 HOT: {hot}, 🌤️ WARM: {warm}, ❄️ COLD: {cold}")
358
+ logger.info(f" 📝 Queue size: {self.batch_queue.size()}")
359
+
360
+ # Process files by priority
361
+ processed = 0
362
+
363
+ # HOT files (all of them)
364
+ for file_path, project, priority, age in files:
365
+ if priority == "HOT":
366
+ if self._process_file(file_path, project, priority):
367
+ processed += 1
368
+
369
+ # WARM files (all of them)
370
+ for file_path, project, priority, age in files:
371
+ if priority == "WARM":
372
+ if self._process_file(file_path, project, priority):
373
+ processed += 1
374
+
375
+ # COLD files (limited)
376
+ cold_processed = 0
377
+ for file_path, project, priority, age in files:
378
+ if priority == "COLD" and cold_processed < self.config.max_cold_files:
379
+ if self._process_file(file_path, project, priority):
380
+ processed += 1
381
+ cold_processed += 1
382
+
383
+ if processed > 0:
384
+ logger.info(f" ✅ Queued {processed} new files")
385
+
386
+ # Check if batch should trigger
387
+ if self.batch_queue.should_trigger_batch():
388
+ self._trigger_batch()
389
+
390
+ def run_once(self):
391
+ """Run one monitoring cycle."""
392
+ self._normal_cycle()
393
+
394
+ # Also check batch monitor
395
+ self.batch_monitor.run_once()
396
+
397
+ def run_forever(self):
398
+ """Run watcher forever with hot/normal cycles."""
399
+ logger.info(f"\n{'='*60}")
400
+ logger.info(f"🚀 BATCH WATCHER STARTED")
401
+ logger.info(f"{'='*60}")
402
+ logger.info(f"HOT check: every {self.config.hot_check_interval_s}s")
403
+ logger.info(f"Normal cycle: every {self.config.normal_check_interval_s}s")
404
+ logger.info(f"{'='*60}\n")
405
+
406
+ last_normal_cycle = time.time()
407
+
408
+ while True:
409
+ try:
410
+ # HOT cycle (every 2 seconds)
411
+ self._hot_cycle()
412
+
413
+ # Normal cycle (every 60 seconds)
414
+ now = time.time()
415
+ if now - last_normal_cycle >= self.config.normal_check_interval_s:
416
+ self._normal_cycle()
417
+ last_normal_cycle = now
418
+
419
+ # Sleep until next HOT check
420
+ time.sleep(self.config.hot_check_interval_s)
421
+
422
+ except KeyboardInterrupt:
423
+ logger.info("\n👋 Batch watcher stopped")
424
+ break
425
+ except Exception as e:
426
+ logger.error(f"Error in watcher loop: {e}", exc_info=True)
427
+ time.sleep(self.config.hot_check_interval_s)
428
+
429
+
430
+ def main():
431
+ """Main entry point."""
432
+ import argparse
433
+
434
+ parser = argparse.ArgumentParser(description="Batch-Aware Watcher Service")
435
+ parser.add_argument("--once", action="store_true", help="Run once and exit")
436
+ parser.add_argument("--batch-size", type=int, default=10, help="Batch size trigger")
437
+ parser.add_argument("--batch-time", type=int, default=30, help="Batch time trigger (minutes)")
438
+
439
+ args = parser.parse_args()
440
+
441
+ config = BatchWatcherConfig(
442
+ batch_size_trigger=args.batch_size,
443
+ batch_time_trigger_minutes=args.batch_time
444
+ )
445
+
446
+ watcher = BatchWatcher(config)
447
+
448
+ if args.once:
449
+ watcher.run_once()
450
+ else:
451
+ watcher.run_forever()
452
+
453
+
454
+ if __name__ == "__main__":
455
+ main()
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Centralized configuration for Claude Self-Reflect runtime.
4
+ All paths and environment variables are defined here.
5
+ """
6
+
7
+ import os
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+
12
+ def get_env_path(env_var: str, default: str) -> Path:
13
+ """Get a path from environment variable, expanding ~ and making absolute."""
14
+ path_str = os.getenv(env_var, default)
15
+ return Path(path_str).expanduser().resolve()
16
+
17
+
18
+ # Core directories
19
+ CSR_HOME = get_env_path("CSR_HOME", "~/.claude-self-reflect")
20
+ CSR_CONFIG_DIR = get_env_path("CSR_CONFIG_DIR", f"{CSR_HOME}/config")
21
+ CSR_BATCH_STATE_DIR = get_env_path("CSR_BATCH_STATE_DIR", f"{CSR_HOME}/batch_state")
22
+ CSR_BATCH_QUEUE_DIR = get_env_path("CSR_BATCH_QUEUE_DIR", f"{CSR_HOME}/batch_queue")
23
+
24
+ # Claude projects directory
25
+ CLAUDE_PROJECTS_DIR = get_env_path("CLAUDE_PROJECTS_DIR", "~/.claude/projects")
26
+
27
+ # Qdrant configuration
28
+ QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
29
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "")
30
+
31
+ # Batch watcher configuration
32
+ BATCH_SIZE_TRIGGER = int(os.getenv("BATCH_SIZE_TRIGGER", "10"))
33
+ BATCH_TIME_TRIGGER_MINUTES = int(os.getenv("BATCH_TIME_TRIGGER_MINUTES", "30"))
34
+
35
+ # Watcher timing
36
+ HOT_WINDOW_MINUTES = int(os.getenv("HOT_WINDOW_MINUTES", "5"))
37
+ WARM_WINDOW_HOURS = int(os.getenv("WARM_WINDOW_HOURS", "24"))
38
+ MAX_COLD_FILES = int(os.getenv("MAX_COLD_FILES", "5"))
39
+
40
+ HOT_CHECK_INTERVAL_S = int(os.getenv("HOT_CHECK_INTERVAL_S", "2"))
41
+ NORMAL_CHECK_INTERVAL_S = int(os.getenv("NORMAL_CHECK_INTERVAL_S", "60"))
42
+
43
+ # Subprocess timeout (30 minutes for batch operations)
44
+ SUBPROCESS_TIMEOUT_SECONDS = int(os.getenv("SUBPROCESS_TIMEOUT_SECONDS", "1800"))
45
+
46
+
47
+ def ensure_directories():
48
+ """Ensure all required directories exist."""
49
+ directories = [
50
+ CSR_HOME,
51
+ CSR_CONFIG_DIR,
52
+ CSR_BATCH_STATE_DIR,
53
+ CSR_BATCH_QUEUE_DIR,
54
+ ]
55
+
56
+ for directory in directories:
57
+ directory.mkdir(parents=True, exist_ok=True)
58
+
59
+
60
+ # Create directories on import
61
+ ensure_directories()
@@ -0,0 +1,73 @@
1
+ """
2
+ Qdrant connection utilities with retry logic.
3
+ """
4
+
5
+ import time
6
+ import logging
7
+ from typing import Optional
8
+ from qdrant_client import QdrantClient
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def connect_to_qdrant_with_retry(
14
+ url: str,
15
+ api_key: Optional[str] = None,
16
+ max_retries: int = 5,
17
+ initial_delay: float = 1.0
18
+ ) -> QdrantClient:
19
+ """
20
+ Connect to Qdrant with exponential backoff retry logic.
21
+
22
+ Args:
23
+ url: Qdrant URL
24
+ api_key: Optional API key for authentication
25
+ max_retries: Maximum number of retry attempts (default: 5)
26
+ initial_delay: Initial delay in seconds, doubles each retry (default: 1.0)
27
+
28
+ Returns:
29
+ Connected QdrantClient instance
30
+
31
+ Raises:
32
+ Exception: If all retries fail
33
+
34
+ Example:
35
+ >>> client = connect_to_qdrant_with_retry(
36
+ ... url="http://localhost:6333",
37
+ ... api_key="optional-api-key"
38
+ ... )
39
+ ✅ Connected to Qdrant at http://localhost:6333
40
+ """
41
+ delay = initial_delay
42
+
43
+ for attempt in range(max_retries):
44
+ try:
45
+ # Initialize client
46
+ if api_key:
47
+ client = QdrantClient(url=url, api_key=api_key)
48
+ else:
49
+ client = QdrantClient(url=url)
50
+
51
+ # Test connection by fetching collections
52
+ client.get_collections()
53
+
54
+ except Exception as e:
55
+ if attempt < max_retries - 1:
56
+ logger.warning(
57
+ f"⚠️ Qdrant connection attempt {attempt + 1}/{max_retries} failed: {e}"
58
+ )
59
+ logger.info(f" Retrying in {delay}s...")
60
+ time.sleep(delay)
61
+ delay *= 2 # Exponential backoff
62
+ else:
63
+ logger.exception(
64
+ f"Failed to connect to Qdrant after {max_retries} attempts"
65
+ )
66
+ raise
67
+ else:
68
+ # Connection successful
69
+ logger.info(f"✅ Connected to Qdrant at {url}")
70
+ return client
71
+
72
+ # Should never reach here due to raise in except block
73
+ raise Exception(f"Failed to connect to Qdrant at {url} after {max_retries} attempts")