overcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- overcode/__init__.py +5 -0
- overcode/cli.py +812 -0
- overcode/config.py +72 -0
- overcode/daemon.py +1184 -0
- overcode/daemon_claude_skill.md +180 -0
- overcode/daemon_state.py +113 -0
- overcode/data_export.py +257 -0
- overcode/dependency_check.py +227 -0
- overcode/exceptions.py +219 -0
- overcode/history_reader.py +448 -0
- overcode/implementations.py +214 -0
- overcode/interfaces.py +49 -0
- overcode/launcher.py +434 -0
- overcode/logging_config.py +193 -0
- overcode/mocks.py +152 -0
- overcode/monitor_daemon.py +808 -0
- overcode/monitor_daemon_state.py +358 -0
- overcode/pid_utils.py +225 -0
- overcode/presence_logger.py +454 -0
- overcode/protocols.py +143 -0
- overcode/session_manager.py +606 -0
- overcode/settings.py +412 -0
- overcode/standing_instructions.py +276 -0
- overcode/status_constants.py +190 -0
- overcode/status_detector.py +339 -0
- overcode/status_history.py +164 -0
- overcode/status_patterns.py +264 -0
- overcode/summarizer_client.py +136 -0
- overcode/summarizer_component.py +312 -0
- overcode/supervisor_daemon.py +1000 -0
- overcode/supervisor_layout.sh +50 -0
- overcode/tmux_manager.py +228 -0
- overcode/tui.py +2549 -0
- overcode/tui_helpers.py +495 -0
- overcode/web_api.py +279 -0
- overcode/web_server.py +138 -0
- overcode/web_templates.py +563 -0
- overcode-0.1.0.dist-info/METADATA +87 -0
- overcode-0.1.0.dist-info/RECORD +43 -0
- overcode-0.1.0.dist-info/WHEEL +5 -0
- overcode-0.1.0.dist-info/entry_points.txt +2 -0
- overcode-0.1.0.dist-info/licenses/LICENSE +21 -0
- overcode-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,808 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Monitor Daemon - Single source of truth for all session metrics.
|
|
4
|
+
|
|
5
|
+
This daemon handles all monitoring responsibilities:
|
|
6
|
+
- Agent status detection (via StatusDetector)
|
|
7
|
+
- Time tracking (green_time_seconds, non_green_time_seconds)
|
|
8
|
+
- Claude Code stats sync (tokens, interactions)
|
|
9
|
+
- Presence tracking (macOS only, graceful degradation)
|
|
10
|
+
- Status history logging (CSV)
|
|
11
|
+
|
|
12
|
+
The Monitor Daemon publishes MonitorDaemonState to a JSON file that
|
|
13
|
+
consumers (TUI, Supervisor Daemon) read from.
|
|
14
|
+
|
|
15
|
+
This separation ensures:
|
|
16
|
+
- No duplicate time tracking between TUI and daemon
|
|
17
|
+
- Clean interface contract via MonitorDaemonState
|
|
18
|
+
- Platform-agnostic core (presence is optional)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
import signal
|
|
23
|
+
import sys
|
|
24
|
+
import time
|
|
25
|
+
from datetime import datetime
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Dict, List, Optional
|
|
28
|
+
|
|
29
|
+
from rich.console import Console
|
|
30
|
+
from rich.text import Text
|
|
31
|
+
from rich.theme import Theme
|
|
32
|
+
|
|
33
|
+
from .history_reader import get_session_stats
|
|
34
|
+
from .monitor_daemon_state import (
|
|
35
|
+
MonitorDaemonState,
|
|
36
|
+
SessionDaemonState,
|
|
37
|
+
get_monitor_daemon_state,
|
|
38
|
+
)
|
|
39
|
+
from .pid_utils import (
|
|
40
|
+
acquire_daemon_lock,
|
|
41
|
+
get_process_pid,
|
|
42
|
+
is_process_running,
|
|
43
|
+
remove_pid_file,
|
|
44
|
+
write_pid_file,
|
|
45
|
+
)
|
|
46
|
+
from .session_manager import SessionManager
|
|
47
|
+
from .settings import (
|
|
48
|
+
DAEMON,
|
|
49
|
+
DAEMON_VERSION,
|
|
50
|
+
PATHS,
|
|
51
|
+
ensure_session_dir,
|
|
52
|
+
get_monitor_daemon_pid_path,
|
|
53
|
+
get_monitor_daemon_state_path,
|
|
54
|
+
get_agent_history_path,
|
|
55
|
+
get_activity_signal_path,
|
|
56
|
+
get_supervisor_stats_path,
|
|
57
|
+
)
|
|
58
|
+
from .config import get_relay_config
|
|
59
|
+
from .status_constants import STATUS_RUNNING, STATUS_TERMINATED
|
|
60
|
+
from .status_detector import StatusDetector
|
|
61
|
+
from .status_history import log_agent_status
|
|
62
|
+
from .summarizer_component import SummarizerComponent, SummarizerConfig
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# Check for macOS presence APIs (optional)
|
|
66
|
+
try:
|
|
67
|
+
from .presence_logger import (
|
|
68
|
+
MACOS_APIS_AVAILABLE,
|
|
69
|
+
get_current_presence_state,
|
|
70
|
+
PresenceLogger,
|
|
71
|
+
PresenceLoggerConfig,
|
|
72
|
+
)
|
|
73
|
+
except ImportError:
|
|
74
|
+
MACOS_APIS_AVAILABLE = False
|
|
75
|
+
get_current_presence_state = None
|
|
76
|
+
PresenceLogger = None
|
|
77
|
+
PresenceLoggerConfig = None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# Interval settings (in seconds)
|
|
81
|
+
INTERVAL_FAST = DAEMON.interval_fast # When active or agents working
|
|
82
|
+
INTERVAL_SLOW = DAEMON.interval_slow # When all agents need user input
|
|
83
|
+
INTERVAL_IDLE = DAEMON.interval_idle # When no agents at all
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def is_monitor_daemon_running(session: str = None) -> bool:
|
|
87
|
+
"""Check if the monitor daemon process is currently running for a session.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
session: tmux session name (default: from config)
|
|
91
|
+
"""
|
|
92
|
+
if session is None:
|
|
93
|
+
session = DAEMON.default_tmux_session
|
|
94
|
+
return is_process_running(get_monitor_daemon_pid_path(session))
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def get_monitor_daemon_pid(session: str = None) -> Optional[int]:
|
|
98
|
+
"""Get the monitor daemon PID if running, None otherwise.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
session: tmux session name (default: from config)
|
|
102
|
+
"""
|
|
103
|
+
if session is None:
|
|
104
|
+
session = DAEMON.default_tmux_session
|
|
105
|
+
return get_process_pid(get_monitor_daemon_pid_path(session))
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def stop_monitor_daemon(session: str = None) -> bool:
|
|
109
|
+
"""Stop the monitor daemon process if running.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
session: tmux session name (default: from config)
|
|
113
|
+
|
|
114
|
+
Returns True if daemon was stopped, False if it wasn't running.
|
|
115
|
+
"""
|
|
116
|
+
if session is None:
|
|
117
|
+
session = DAEMON.default_tmux_session
|
|
118
|
+
pid_path = get_monitor_daemon_pid_path(session)
|
|
119
|
+
pid = get_process_pid(pid_path)
|
|
120
|
+
if pid is None:
|
|
121
|
+
remove_pid_file(pid_path)
|
|
122
|
+
return False
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
os.kill(pid, signal.SIGTERM)
|
|
126
|
+
remove_pid_file(pid_path)
|
|
127
|
+
return True
|
|
128
|
+
except (OSError, ProcessLookupError):
|
|
129
|
+
remove_pid_file(pid_path)
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def check_activity_signal(session: str = None) -> bool:
|
|
134
|
+
"""Check for and consume the activity signal from TUI.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
session: tmux session name (default: from config)
|
|
138
|
+
"""
|
|
139
|
+
if session is None:
|
|
140
|
+
session = DAEMON.default_tmux_session
|
|
141
|
+
signal_path = get_activity_signal_path(session)
|
|
142
|
+
# Atomic: just try to unlink, don't check exists() first (TOCTOU race)
|
|
143
|
+
try:
|
|
144
|
+
signal_path.unlink()
|
|
145
|
+
return True
|
|
146
|
+
except FileNotFoundError:
|
|
147
|
+
# Signal doesn't exist - that's fine
|
|
148
|
+
return False
|
|
149
|
+
except OSError:
|
|
150
|
+
# Other error (permissions, etc) - signal may exist but can't consume
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# Rich theme for daemon logs
|
|
155
|
+
MONITOR_THEME = Theme({
|
|
156
|
+
"info": "cyan",
|
|
157
|
+
"warn": "yellow",
|
|
158
|
+
"error": "bold red",
|
|
159
|
+
"success": "bold green",
|
|
160
|
+
"dim": "dim white",
|
|
161
|
+
"highlight": "bold white",
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class MonitorDaemonLogger:
|
|
166
|
+
"""Simple logger for monitor daemon."""
|
|
167
|
+
|
|
168
|
+
def __init__(self, session: str = "agents", log_file: Optional[Path] = None):
|
|
169
|
+
self.console = Console(theme=MONITOR_THEME, force_terminal=True)
|
|
170
|
+
# Use session-specific log file
|
|
171
|
+
if log_file:
|
|
172
|
+
self.log_file = log_file
|
|
173
|
+
else:
|
|
174
|
+
session_dir = ensure_session_dir(session)
|
|
175
|
+
self.log_file = session_dir / "monitor_daemon.log"
|
|
176
|
+
self._logged_messages: set = set()
|
|
177
|
+
|
|
178
|
+
def _log(self, style: str, prefix: str, message: str):
|
|
179
|
+
"""Log a message with style."""
|
|
180
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
181
|
+
text = Text()
|
|
182
|
+
text.append(f"[{timestamp}] ", style="dim")
|
|
183
|
+
text.append(f"{prefix} ", style=style)
|
|
184
|
+
text.append(message)
|
|
185
|
+
self.console.print(text)
|
|
186
|
+
|
|
187
|
+
# Also log to file
|
|
188
|
+
try:
|
|
189
|
+
with open(self.log_file, 'a') as f:
|
|
190
|
+
f.write(f"[{timestamp}] {prefix} {message}\n")
|
|
191
|
+
except OSError:
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
def info(self, message: str):
|
|
195
|
+
self._log("info", "●", message)
|
|
196
|
+
|
|
197
|
+
def warn(self, message: str):
|
|
198
|
+
self._log("warn", "⚠", message)
|
|
199
|
+
|
|
200
|
+
def error(self, message: str):
|
|
201
|
+
self._log("error", "✗", message)
|
|
202
|
+
|
|
203
|
+
def success(self, message: str):
|
|
204
|
+
self._log("success", "✓", message)
|
|
205
|
+
|
|
206
|
+
def debug(self, message: str):
|
|
207
|
+
"""Log a debug message (only to file, not console)."""
|
|
208
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
209
|
+
try:
|
|
210
|
+
with open(self.log_file, 'a') as f:
|
|
211
|
+
f.write(f"[{timestamp}] DEBUG {message}\n")
|
|
212
|
+
except OSError:
|
|
213
|
+
pass
|
|
214
|
+
|
|
215
|
+
def section(self, title: str):
|
|
216
|
+
"""Print a section header."""
|
|
217
|
+
self.console.print()
|
|
218
|
+
self.console.rule(f"[bold cyan]{title}[/]")
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class PresenceComponent:
|
|
222
|
+
"""Presence tracking with graceful degradation for non-macOS."""
|
|
223
|
+
|
|
224
|
+
def __init__(self):
|
|
225
|
+
self.available = MACOS_APIS_AVAILABLE
|
|
226
|
+
self._logger: Optional[PresenceLogger] = None
|
|
227
|
+
|
|
228
|
+
if self.available and PresenceLogger is not None:
|
|
229
|
+
config = PresenceLoggerConfig()
|
|
230
|
+
self._logger = PresenceLogger(config)
|
|
231
|
+
self._logger.start()
|
|
232
|
+
|
|
233
|
+
def get_current_state(self) -> tuple:
|
|
234
|
+
"""Get current presence state.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Tuple of (state, idle_seconds, locked) or (None, None, None) if unavailable
|
|
238
|
+
"""
|
|
239
|
+
if not self.available or get_current_presence_state is None:
|
|
240
|
+
return None, None, None
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
return get_current_presence_state()
|
|
244
|
+
except Exception:
|
|
245
|
+
return None, None, None
|
|
246
|
+
|
|
247
|
+
def stop(self):
|
|
248
|
+
"""Stop the presence logger if running."""
|
|
249
|
+
if self._logger is not None:
|
|
250
|
+
self._logger.stop()
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class MonitorDaemon:
|
|
254
|
+
"""Monitor Daemon - single source of truth for all session metrics.
|
|
255
|
+
|
|
256
|
+
Responsibilities:
|
|
257
|
+
- Status detection for all sessions
|
|
258
|
+
- Time tracking (green/non-green)
|
|
259
|
+
- Claude Code stats sync
|
|
260
|
+
- Presence tracking (optional)
|
|
261
|
+
- Status history logging
|
|
262
|
+
- Publishing MonitorDaemonState
|
|
263
|
+
|
|
264
|
+
Each tmux session gets its own Monitor Daemon instance with
|
|
265
|
+
isolated state files and PID tracking.
|
|
266
|
+
"""
|
|
267
|
+
|
|
268
|
+
def __init__(
|
|
269
|
+
self,
|
|
270
|
+
tmux_session: str = "agents",
|
|
271
|
+
session_manager: Optional[SessionManager] = None,
|
|
272
|
+
status_detector: Optional[StatusDetector] = None,
|
|
273
|
+
):
|
|
274
|
+
self.tmux_session = tmux_session
|
|
275
|
+
|
|
276
|
+
# Ensure session directory exists
|
|
277
|
+
ensure_session_dir(tmux_session)
|
|
278
|
+
|
|
279
|
+
# Session-specific paths
|
|
280
|
+
self.pid_path = get_monitor_daemon_pid_path(tmux_session)
|
|
281
|
+
self.state_path = get_monitor_daemon_state_path(tmux_session)
|
|
282
|
+
self.history_path = get_agent_history_path(tmux_session)
|
|
283
|
+
|
|
284
|
+
# Dependencies (allow injection for testing)
|
|
285
|
+
self.session_manager = session_manager or SessionManager()
|
|
286
|
+
self.status_detector = status_detector or StatusDetector(tmux_session)
|
|
287
|
+
|
|
288
|
+
# Presence tracking (graceful degradation)
|
|
289
|
+
self.presence = PresenceComponent()
|
|
290
|
+
|
|
291
|
+
# Summarizer component (graceful degradation if no API key)
|
|
292
|
+
self.summarizer = SummarizerComponent(
|
|
293
|
+
tmux_session=tmux_session,
|
|
294
|
+
config=SummarizerConfig(enabled=False), # Off by default, enable via CLI
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# Logging - session-specific log file
|
|
298
|
+
self.log = MonitorDaemonLogger(session=tmux_session)
|
|
299
|
+
|
|
300
|
+
# State tracking
|
|
301
|
+
self.state = MonitorDaemonState(
|
|
302
|
+
pid=os.getpid(),
|
|
303
|
+
status="starting",
|
|
304
|
+
started_at=datetime.now().isoformat(),
|
|
305
|
+
daemon_version=DAEMON_VERSION,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
# Per-session tracking
|
|
309
|
+
self.previous_states: Dict[str, str] = {}
|
|
310
|
+
self.last_state_times: Dict[str, datetime] = {}
|
|
311
|
+
self.operation_start_times: Dict[str, datetime] = {}
|
|
312
|
+
|
|
313
|
+
# Stats sync throttling
|
|
314
|
+
self._last_stats_sync = datetime.now()
|
|
315
|
+
self._stats_sync_interval = 60 # seconds
|
|
316
|
+
|
|
317
|
+
# Relay configuration (for pushing state to cloud)
|
|
318
|
+
self._relay_config = get_relay_config()
|
|
319
|
+
self._last_relay_push = datetime.min
|
|
320
|
+
if self._relay_config:
|
|
321
|
+
self.log.info(f"Relay enabled: {self._relay_config['url']}")
|
|
322
|
+
|
|
323
|
+
# Shutdown flag
|
|
324
|
+
self._shutdown = False
|
|
325
|
+
|
|
326
|
+
def track_session_stats(self, session, status: str) -> SessionDaemonState:
|
|
327
|
+
"""Track session state and build SessionDaemonState.
|
|
328
|
+
|
|
329
|
+
Returns the session state for inclusion in MonitorDaemonState.
|
|
330
|
+
"""
|
|
331
|
+
session_id = session.id
|
|
332
|
+
now = datetime.now()
|
|
333
|
+
|
|
334
|
+
# Get previous status
|
|
335
|
+
prev_status = self.previous_states.get(session_id, status)
|
|
336
|
+
|
|
337
|
+
# Update time tracking
|
|
338
|
+
self._update_state_time(session, status, now)
|
|
339
|
+
|
|
340
|
+
# Track state transitions for operation timing
|
|
341
|
+
was_running = prev_status == STATUS_RUNNING
|
|
342
|
+
is_running = status == STATUS_RUNNING
|
|
343
|
+
|
|
344
|
+
# Session went from running to waiting (operation started)
|
|
345
|
+
if was_running and not is_running:
|
|
346
|
+
self.operation_start_times[session_id] = now
|
|
347
|
+
|
|
348
|
+
# Session went from waiting to running (operation completed)
|
|
349
|
+
if not was_running and is_running:
|
|
350
|
+
if session_id in self.operation_start_times:
|
|
351
|
+
start_time = self.operation_start_times[session_id]
|
|
352
|
+
op_duration = (now - start_time).total_seconds()
|
|
353
|
+
del self.operation_start_times[session_id]
|
|
354
|
+
|
|
355
|
+
# Update operation times
|
|
356
|
+
current_stats = session.stats
|
|
357
|
+
op_times = list(current_stats.operation_times)
|
|
358
|
+
if op_duration > 0:
|
|
359
|
+
op_times.append(op_duration)
|
|
360
|
+
op_times = op_times[-100:]
|
|
361
|
+
self.session_manager.update_stats(
|
|
362
|
+
session_id,
|
|
363
|
+
operation_times=op_times,
|
|
364
|
+
last_activity=now.isoformat()
|
|
365
|
+
)
|
|
366
|
+
self.log.info(f"[{session.name}] Operation completed ({op_duration:.1f}s)")
|
|
367
|
+
|
|
368
|
+
# Update previous state
|
|
369
|
+
self.previous_states[session_id] = status
|
|
370
|
+
|
|
371
|
+
# Build session state for publishing
|
|
372
|
+
stats = session.stats
|
|
373
|
+
return SessionDaemonState(
|
|
374
|
+
session_id=session_id,
|
|
375
|
+
name=session.name,
|
|
376
|
+
tmux_window=session.tmux_window,
|
|
377
|
+
current_status=status,
|
|
378
|
+
current_activity=stats.current_task or "",
|
|
379
|
+
status_since=stats.state_since,
|
|
380
|
+
green_time_seconds=stats.green_time_seconds,
|
|
381
|
+
non_green_time_seconds=stats.non_green_time_seconds,
|
|
382
|
+
interaction_count=stats.interaction_count,
|
|
383
|
+
input_tokens=stats.input_tokens,
|
|
384
|
+
output_tokens=stats.output_tokens,
|
|
385
|
+
cache_creation_tokens=stats.cache_creation_tokens,
|
|
386
|
+
cache_read_tokens=stats.cache_read_tokens,
|
|
387
|
+
estimated_cost_usd=stats.estimated_cost_usd,
|
|
388
|
+
median_work_time=self._calculate_median_work_time(stats.operation_times),
|
|
389
|
+
repo_name=session.repo_name,
|
|
390
|
+
branch=session.branch,
|
|
391
|
+
standing_instructions=session.standing_instructions or "",
|
|
392
|
+
standing_orders_complete=session.standing_orders_complete,
|
|
393
|
+
steers_count=stats.steers_count,
|
|
394
|
+
start_time=session.start_time,
|
|
395
|
+
permissiveness_mode=session.permissiveness_mode,
|
|
396
|
+
start_directory=session.start_directory,
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
def _update_state_time(self, session, status: str, now: datetime) -> None:
|
|
400
|
+
"""Update green_time_seconds and non_green_time_seconds."""
|
|
401
|
+
session_id = session.id
|
|
402
|
+
current_stats = session.stats
|
|
403
|
+
|
|
404
|
+
# Get last recorded time
|
|
405
|
+
last_time = self.last_state_times.get(session_id)
|
|
406
|
+
if last_time is None:
|
|
407
|
+
# First observation after daemon (re)start - use last_time_accumulation
|
|
408
|
+
# to avoid re-adding time that was already accumulated before restart
|
|
409
|
+
if current_stats.last_time_accumulation:
|
|
410
|
+
try:
|
|
411
|
+
last_time = datetime.fromisoformat(current_stats.last_time_accumulation)
|
|
412
|
+
except ValueError:
|
|
413
|
+
last_time = now
|
|
414
|
+
elif current_stats.state_since:
|
|
415
|
+
# Fallback for sessions without last_time_accumulation
|
|
416
|
+
try:
|
|
417
|
+
last_time = datetime.fromisoformat(current_stats.state_since)
|
|
418
|
+
except ValueError:
|
|
419
|
+
last_time = now
|
|
420
|
+
else:
|
|
421
|
+
last_time = now
|
|
422
|
+
self.last_state_times[session_id] = last_time
|
|
423
|
+
return # Don't accumulate on first observation
|
|
424
|
+
|
|
425
|
+
# Calculate elapsed time
|
|
426
|
+
elapsed = (now - last_time).total_seconds()
|
|
427
|
+
if elapsed <= 0:
|
|
428
|
+
return
|
|
429
|
+
|
|
430
|
+
# Accumulate time based on state
|
|
431
|
+
green_time = current_stats.green_time_seconds
|
|
432
|
+
non_green_time = current_stats.non_green_time_seconds
|
|
433
|
+
|
|
434
|
+
if status == STATUS_RUNNING:
|
|
435
|
+
green_time += elapsed
|
|
436
|
+
elif status != STATUS_TERMINATED:
|
|
437
|
+
# Only count non-green time for non-terminated states
|
|
438
|
+
non_green_time += elapsed
|
|
439
|
+
# else: terminated - don't accumulate time
|
|
440
|
+
|
|
441
|
+
# INVARIANT CHECK: accumulated time should never exceed uptime
|
|
442
|
+
# This catches bugs like multiple daemons running simultaneously
|
|
443
|
+
if session.start_time:
|
|
444
|
+
try:
|
|
445
|
+
session_start = datetime.fromisoformat(session.start_time)
|
|
446
|
+
max_allowed = (now - session_start).total_seconds()
|
|
447
|
+
total_accumulated = green_time + non_green_time
|
|
448
|
+
|
|
449
|
+
if total_accumulated > max_allowed * 1.1: # 10% tolerance for timing jitter
|
|
450
|
+
# Reset to sane values based on ratio
|
|
451
|
+
ratio = max_allowed / total_accumulated if total_accumulated > 0 else 1.0
|
|
452
|
+
green_time = green_time * ratio
|
|
453
|
+
non_green_time = non_green_time * ratio
|
|
454
|
+
self.log.warn(
|
|
455
|
+
f"[{session.name}] Time tracking reset: "
|
|
456
|
+
f"accumulated {total_accumulated/3600:.1f}h > uptime {max_allowed/3600:.1f}h"
|
|
457
|
+
)
|
|
458
|
+
except (ValueError, TypeError):
|
|
459
|
+
pass
|
|
460
|
+
|
|
461
|
+
# Update state tracking
|
|
462
|
+
prev_status = self.previous_states.get(session_id, status)
|
|
463
|
+
state_since = current_stats.state_since
|
|
464
|
+
if prev_status != status:
|
|
465
|
+
state_since = now.isoformat()
|
|
466
|
+
elif not state_since:
|
|
467
|
+
# Initialize state_since if never set (e.g., new session)
|
|
468
|
+
state_since = now.isoformat()
|
|
469
|
+
|
|
470
|
+
# Save to session manager
|
|
471
|
+
self.session_manager.update_stats(
|
|
472
|
+
session_id,
|
|
473
|
+
current_state=status,
|
|
474
|
+
state_since=state_since,
|
|
475
|
+
green_time_seconds=green_time,
|
|
476
|
+
non_green_time_seconds=non_green_time,
|
|
477
|
+
last_time_accumulation=now.isoformat(),
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
self.last_state_times[session_id] = now
|
|
481
|
+
|
|
482
|
+
def sync_claude_code_stats(self, session) -> None:
|
|
483
|
+
"""Sync token/interaction stats from Claude Code history files."""
|
|
484
|
+
try:
|
|
485
|
+
stats = get_session_stats(session)
|
|
486
|
+
if stats is None:
|
|
487
|
+
return
|
|
488
|
+
|
|
489
|
+
now = datetime.now()
|
|
490
|
+
total_tokens = (
|
|
491
|
+
stats.input_tokens +
|
|
492
|
+
stats.output_tokens +
|
|
493
|
+
stats.cache_creation_tokens +
|
|
494
|
+
stats.cache_read_tokens
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
# Estimate cost
|
|
498
|
+
cost_estimate = (
|
|
499
|
+
(stats.input_tokens / 1_000_000) * 3.0 +
|
|
500
|
+
(stats.output_tokens / 1_000_000) * 15.0 +
|
|
501
|
+
(stats.cache_creation_tokens / 1_000_000) * 3.75 +
|
|
502
|
+
(stats.cache_read_tokens / 1_000_000) * 0.30
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
self.session_manager.update_stats(
|
|
506
|
+
session.id,
|
|
507
|
+
interaction_count=stats.interaction_count,
|
|
508
|
+
total_tokens=total_tokens,
|
|
509
|
+
input_tokens=stats.input_tokens,
|
|
510
|
+
output_tokens=stats.output_tokens,
|
|
511
|
+
cache_creation_tokens=stats.cache_creation_tokens,
|
|
512
|
+
cache_read_tokens=stats.cache_read_tokens,
|
|
513
|
+
estimated_cost_usd=round(cost_estimate, 4),
|
|
514
|
+
last_stats_update=now.isoformat(),
|
|
515
|
+
)
|
|
516
|
+
except Exception as e:
|
|
517
|
+
self.log.warn(f"Failed to sync stats for {session.name}: {e}")
|
|
518
|
+
|
|
519
|
+
def _calculate_median_work_time(self, operation_times: List[float]) -> float:
|
|
520
|
+
"""Calculate median operation time."""
|
|
521
|
+
if not operation_times:
|
|
522
|
+
return 0.0
|
|
523
|
+
sorted_times = sorted(operation_times)
|
|
524
|
+
n = len(sorted_times)
|
|
525
|
+
if n % 2 == 0:
|
|
526
|
+
return (sorted_times[n // 2 - 1] + sorted_times[n // 2]) / 2
|
|
527
|
+
return sorted_times[n // 2]
|
|
528
|
+
|
|
529
|
+
def calculate_interval(self, sessions: list, all_waiting_user: bool) -> int:
|
|
530
|
+
"""Calculate appropriate loop interval."""
|
|
531
|
+
if not sessions:
|
|
532
|
+
return INTERVAL_IDLE
|
|
533
|
+
|
|
534
|
+
if all_waiting_user:
|
|
535
|
+
return INTERVAL_SLOW
|
|
536
|
+
|
|
537
|
+
return INTERVAL_FAST
|
|
538
|
+
|
|
539
|
+
def _interruptible_sleep(self, total_seconds: int) -> None:
|
|
540
|
+
"""Sleep with activity signal checking."""
|
|
541
|
+
chunk_size = 10
|
|
542
|
+
elapsed = 0
|
|
543
|
+
|
|
544
|
+
while elapsed < total_seconds and not self._shutdown:
|
|
545
|
+
remaining = total_seconds - elapsed
|
|
546
|
+
sleep_time = min(chunk_size, remaining)
|
|
547
|
+
time.sleep(sleep_time)
|
|
548
|
+
elapsed += sleep_time
|
|
549
|
+
|
|
550
|
+
if check_activity_signal(self.tmux_session):
|
|
551
|
+
self.log.info("User activity detected → waking up")
|
|
552
|
+
self.state.current_interval = INTERVAL_FAST
|
|
553
|
+
self.state.save(self.state_path)
|
|
554
|
+
return
|
|
555
|
+
|
|
556
|
+
def _publish_state(self, session_states: List[SessionDaemonState]) -> None:
|
|
557
|
+
"""Publish current state to JSON file."""
|
|
558
|
+
now = datetime.now()
|
|
559
|
+
|
|
560
|
+
# Update presence state
|
|
561
|
+
presence_state, presence_idle, _ = self.presence.get_current_state()
|
|
562
|
+
|
|
563
|
+
self.state.last_loop_time = now.isoformat()
|
|
564
|
+
self.state.sessions = session_states
|
|
565
|
+
self.state.presence_available = self.presence.available
|
|
566
|
+
self.state.presence_state = presence_state
|
|
567
|
+
self.state.presence_idle_seconds = presence_idle
|
|
568
|
+
|
|
569
|
+
# Read supervisor stats if available (populated by supervisor daemon)
|
|
570
|
+
supervisor_stats_path = get_supervisor_stats_path(self.tmux_session)
|
|
571
|
+
if supervisor_stats_path.exists():
|
|
572
|
+
try:
|
|
573
|
+
import json
|
|
574
|
+
with open(supervisor_stats_path) as f:
|
|
575
|
+
stats = json.load(f)
|
|
576
|
+
self.state.supervisor_launches = stats.get("supervisor_launches", 0)
|
|
577
|
+
self.state.supervisor_tokens = stats.get("supervisor_tokens", 0)
|
|
578
|
+
# Daemon Claude run tracking
|
|
579
|
+
self.state.supervisor_claude_running = stats.get("supervisor_claude_running", False)
|
|
580
|
+
self.state.supervisor_claude_started_at = stats.get("supervisor_claude_started_at")
|
|
581
|
+
self.state.supervisor_claude_total_run_seconds = stats.get("supervisor_claude_total_run_seconds", 0.0)
|
|
582
|
+
except (json.JSONDecodeError, OSError):
|
|
583
|
+
pass
|
|
584
|
+
|
|
585
|
+
# Update summarizer stats
|
|
586
|
+
self.state.summarizer_available = self.summarizer.available
|
|
587
|
+
self.state.summarizer_enabled = self.summarizer.enabled
|
|
588
|
+
self.state.summarizer_calls = self.summarizer.total_calls
|
|
589
|
+
# Estimate cost: ~$0.0007 per call (4K input tokens + 150 output tokens)
|
|
590
|
+
self.state.summarizer_cost_usd = round(self.summarizer.total_calls * 0.0007, 4)
|
|
591
|
+
|
|
592
|
+
self.state.save(self.state_path)
|
|
593
|
+
|
|
594
|
+
# Push to relay if configured and interval elapsed
|
|
595
|
+
self._maybe_push_to_relay()
|
|
596
|
+
|
|
597
|
+
def _maybe_push_to_relay(self) -> None:
|
|
598
|
+
"""Push state to cloud relay if configured."""
|
|
599
|
+
# Update relay enabled status
|
|
600
|
+
self.state.relay_enabled = self._relay_config is not None
|
|
601
|
+
|
|
602
|
+
if not self._relay_config:
|
|
603
|
+
self.state.relay_last_status = "disabled"
|
|
604
|
+
return
|
|
605
|
+
|
|
606
|
+
now = datetime.now()
|
|
607
|
+
interval = self._relay_config.get("interval", 30)
|
|
608
|
+
if (now - self._last_relay_push).total_seconds() < interval:
|
|
609
|
+
return
|
|
610
|
+
|
|
611
|
+
self._last_relay_push = now
|
|
612
|
+
|
|
613
|
+
try:
|
|
614
|
+
import json
|
|
615
|
+
import urllib.request
|
|
616
|
+
import urllib.error
|
|
617
|
+
|
|
618
|
+
# Build status payload using web_api format
|
|
619
|
+
from .web_api import get_status_data, get_timeline_data
|
|
620
|
+
|
|
621
|
+
payload = get_status_data(self.tmux_session)
|
|
622
|
+
|
|
623
|
+
# Optionally include timeline (less frequent)
|
|
624
|
+
# payload["timeline"] = get_timeline_data(self.tmux_session)
|
|
625
|
+
|
|
626
|
+
data = json.dumps(payload).encode("utf-8")
|
|
627
|
+
|
|
628
|
+
req = urllib.request.Request(
|
|
629
|
+
self._relay_config["url"],
|
|
630
|
+
data=data,
|
|
631
|
+
headers={
|
|
632
|
+
"Content-Type": "application/json",
|
|
633
|
+
"X-API-Key": self._relay_config["api_key"],
|
|
634
|
+
},
|
|
635
|
+
method="POST",
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
639
|
+
if resp.status == 200:
|
|
640
|
+
self.state.relay_last_push = now.isoformat()
|
|
641
|
+
self.state.relay_last_status = "ok"
|
|
642
|
+
self.log.debug(f"Relay push OK")
|
|
643
|
+
else:
|
|
644
|
+
self.state.relay_last_status = "error"
|
|
645
|
+
self.log.warn(f"Relay push failed: HTTP {resp.status}")
|
|
646
|
+
|
|
647
|
+
except urllib.error.URLError as e:
|
|
648
|
+
self.state.relay_last_status = "error"
|
|
649
|
+
self.log.warn(f"Relay push failed: {e.reason}")
|
|
650
|
+
except Exception as e:
|
|
651
|
+
self.state.relay_last_status = "error"
|
|
652
|
+
self.log.warn(f"Relay push error: {e}")
|
|
653
|
+
|
|
654
|
+
def run(self, check_interval: int = INTERVAL_FAST):
|
|
655
|
+
"""Main daemon loop."""
|
|
656
|
+
# Atomically check if already running and acquire lock
|
|
657
|
+
# This prevents TOCTOU race conditions that could cause multiple daemons
|
|
658
|
+
acquired, existing_pid = acquire_daemon_lock(self.pid_path)
|
|
659
|
+
if not acquired:
|
|
660
|
+
if existing_pid:
|
|
661
|
+
self.log.error(f"Monitor daemon already running (PID {existing_pid})")
|
|
662
|
+
else:
|
|
663
|
+
self.log.error("Could not acquire daemon lock (another daemon may be starting)")
|
|
664
|
+
sys.exit(1)
|
|
665
|
+
|
|
666
|
+
self.log.section("Monitor Daemon")
|
|
667
|
+
self.log.info(f"PID: {os.getpid()}")
|
|
668
|
+
self.log.info(f"tmux session: {self.tmux_session}")
|
|
669
|
+
self.log.info(f"Presence tracking: {'available' if self.presence.available else 'unavailable (non-macOS)'}")
|
|
670
|
+
|
|
671
|
+
# Setup signal handlers
|
|
672
|
+
def handle_shutdown(signum, frame):
|
|
673
|
+
self.log.info("Shutdown signal received")
|
|
674
|
+
self._shutdown = True
|
|
675
|
+
|
|
676
|
+
signal.signal(signal.SIGTERM, handle_shutdown)
|
|
677
|
+
signal.signal(signal.SIGINT, handle_shutdown)
|
|
678
|
+
|
|
679
|
+
self.state.status = "active"
|
|
680
|
+
self.state.current_interval = check_interval
|
|
681
|
+
self.state.save(self.state_path)
|
|
682
|
+
|
|
683
|
+
try:
|
|
684
|
+
while not self._shutdown:
|
|
685
|
+
self.state.loop_count += 1
|
|
686
|
+
now = datetime.now()
|
|
687
|
+
|
|
688
|
+
# Get all sessions
|
|
689
|
+
sessions = self.session_manager.list_sessions()
|
|
690
|
+
|
|
691
|
+
# Detect status and track stats for each session
|
|
692
|
+
session_states = []
|
|
693
|
+
all_waiting_user = True
|
|
694
|
+
|
|
695
|
+
for session in sessions:
|
|
696
|
+
# Detect status
|
|
697
|
+
status, activity, _ = self.status_detector.detect_status(session)
|
|
698
|
+
|
|
699
|
+
# Refresh git context (branch may have changed)
|
|
700
|
+
self.session_manager.refresh_git_context(session.id)
|
|
701
|
+
|
|
702
|
+
# Update current task in session
|
|
703
|
+
self.session_manager.update_stats(
|
|
704
|
+
session.id,
|
|
705
|
+
current_task=activity[:100] if activity else ""
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
# Reload session to get fresh stats
|
|
709
|
+
session = self.session_manager.get_session(session.id)
|
|
710
|
+
if session is None:
|
|
711
|
+
continue
|
|
712
|
+
|
|
713
|
+
# Track stats and build state
|
|
714
|
+
session_state = self.track_session_stats(session, status)
|
|
715
|
+
session_state.current_activity = activity
|
|
716
|
+
session_states.append(session_state)
|
|
717
|
+
|
|
718
|
+
# Log status history
|
|
719
|
+
log_agent_status(session.name, status, activity)
|
|
720
|
+
|
|
721
|
+
# Track if any session is not waiting for user
|
|
722
|
+
if status != "waiting_user":
|
|
723
|
+
all_waiting_user = False
|
|
724
|
+
|
|
725
|
+
# Clean up stale entries for deleted sessions
|
|
726
|
+
current_session_ids = {s.id for s in sessions}
|
|
727
|
+
stale_ids = set(self.operation_start_times.keys()) - current_session_ids
|
|
728
|
+
for stale_id in stale_ids:
|
|
729
|
+
del self.operation_start_times[stale_id]
|
|
730
|
+
stale_ids = set(self.previous_states.keys()) - current_session_ids
|
|
731
|
+
for stale_id in stale_ids:
|
|
732
|
+
del self.previous_states[stale_id]
|
|
733
|
+
|
|
734
|
+
# Sync Claude Code stats periodically (git context is refreshed every loop above)
|
|
735
|
+
if (now - self._last_stats_sync).total_seconds() >= self._stats_sync_interval:
|
|
736
|
+
for session in sessions:
|
|
737
|
+
self.sync_claude_code_stats(session)
|
|
738
|
+
self._last_stats_sync = now
|
|
739
|
+
|
|
740
|
+
# Update summaries (if enabled)
|
|
741
|
+
summaries = self.summarizer.update(sessions)
|
|
742
|
+
for session_state in session_states:
|
|
743
|
+
summary = summaries.get(session_state.session_id)
|
|
744
|
+
if summary:
|
|
745
|
+
session_state.activity_summary = summary.text
|
|
746
|
+
session_state.activity_summary_updated = summary.updated_at
|
|
747
|
+
|
|
748
|
+
# Calculate interval
|
|
749
|
+
interval = self.calculate_interval(sessions, all_waiting_user)
|
|
750
|
+
self.state.current_interval = interval
|
|
751
|
+
|
|
752
|
+
# Update status based on state
|
|
753
|
+
if not sessions:
|
|
754
|
+
self.state.status = "no_agents"
|
|
755
|
+
elif all_waiting_user:
|
|
756
|
+
self.state.status = "idle"
|
|
757
|
+
else:
|
|
758
|
+
self.state.status = "active"
|
|
759
|
+
|
|
760
|
+
# Publish state
|
|
761
|
+
self._publish_state(session_states)
|
|
762
|
+
|
|
763
|
+
# Log summary
|
|
764
|
+
green = sum(1 for s in session_states if s.current_status == STATUS_RUNNING)
|
|
765
|
+
non_green = len(session_states) - green
|
|
766
|
+
self.log.info(f"Loop #{self.state.loop_count}: {len(sessions)} sessions ({green} green, {non_green} non-green), interval={interval}s")
|
|
767
|
+
|
|
768
|
+
# Sleep
|
|
769
|
+
self._interruptible_sleep(interval)
|
|
770
|
+
|
|
771
|
+
except Exception as e:
|
|
772
|
+
self.log.error(f"Monitor daemon error: {e}")
|
|
773
|
+
raise
|
|
774
|
+
finally:
|
|
775
|
+
self.log.info("Monitor daemon shutting down")
|
|
776
|
+
self.presence.stop()
|
|
777
|
+
self.summarizer.stop()
|
|
778
|
+
self.state.status = "stopped"
|
|
779
|
+
self.state.save(self.state_path)
|
|
780
|
+
remove_pid_file(self.pid_path)
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def main() -> int:
|
|
784
|
+
"""CLI entrypoint for monitor daemon."""
|
|
785
|
+
import argparse
|
|
786
|
+
|
|
787
|
+
parser = argparse.ArgumentParser(description="Overcode Monitor Daemon")
|
|
788
|
+
parser.add_argument(
|
|
789
|
+
"--session", "-s",
|
|
790
|
+
default="agents",
|
|
791
|
+
help="tmux session name (default: agents)"
|
|
792
|
+
)
|
|
793
|
+
parser.add_argument(
|
|
794
|
+
"--interval", "-i",
|
|
795
|
+
type=int,
|
|
796
|
+
default=INTERVAL_FAST,
|
|
797
|
+
help=f"Check interval in seconds (default: {INTERVAL_FAST})"
|
|
798
|
+
)
|
|
799
|
+
|
|
800
|
+
args = parser.parse_args()
|
|
801
|
+
|
|
802
|
+
daemon = MonitorDaemon(tmux_session=args.session)
|
|
803
|
+
daemon.run(check_interval=args.interval)
|
|
804
|
+
return 0
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
if __name__ == "__main__":
|
|
808
|
+
sys.exit(main())
|