overcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- overcode/__init__.py +5 -0
- overcode/cli.py +812 -0
- overcode/config.py +72 -0
- overcode/daemon.py +1184 -0
- overcode/daemon_claude_skill.md +180 -0
- overcode/daemon_state.py +113 -0
- overcode/data_export.py +257 -0
- overcode/dependency_check.py +227 -0
- overcode/exceptions.py +219 -0
- overcode/history_reader.py +448 -0
- overcode/implementations.py +214 -0
- overcode/interfaces.py +49 -0
- overcode/launcher.py +434 -0
- overcode/logging_config.py +193 -0
- overcode/mocks.py +152 -0
- overcode/monitor_daemon.py +808 -0
- overcode/monitor_daemon_state.py +358 -0
- overcode/pid_utils.py +225 -0
- overcode/presence_logger.py +454 -0
- overcode/protocols.py +143 -0
- overcode/session_manager.py +606 -0
- overcode/settings.py +412 -0
- overcode/standing_instructions.py +276 -0
- overcode/status_constants.py +190 -0
- overcode/status_detector.py +339 -0
- overcode/status_history.py +164 -0
- overcode/status_patterns.py +264 -0
- overcode/summarizer_client.py +136 -0
- overcode/summarizer_component.py +312 -0
- overcode/supervisor_daemon.py +1000 -0
- overcode/supervisor_layout.sh +50 -0
- overcode/tmux_manager.py +228 -0
- overcode/tui.py +2549 -0
- overcode/tui_helpers.py +495 -0
- overcode/web_api.py +279 -0
- overcode/web_server.py +138 -0
- overcode/web_templates.py +563 -0
- overcode-0.1.0.dist-info/METADATA +87 -0
- overcode-0.1.0.dist-info/RECORD +43 -0
- overcode-0.1.0.dist-info/WHEEL +5 -0
- overcode-0.1.0.dist-info/entry_points.txt +2 -0
- overcode-0.1.0.dist-info/licenses/LICENSE +21 -0
- overcode-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Monitor Daemon state management.
|
|
3
|
+
|
|
4
|
+
This module defines the official interface between the Monitor Daemon
|
|
5
|
+
and its consumers (TUI, Supervisor Daemon).
|
|
6
|
+
|
|
7
|
+
The Monitor Daemon is the single source of truth for:
|
|
8
|
+
- Agent status detection
|
|
9
|
+
- Time tracking (green_time_seconds, non_green_time_seconds)
|
|
10
|
+
- Claude Code stats (tokens, interactions)
|
|
11
|
+
- User presence state (macOS only)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Optional
|
|
19
|
+
|
|
20
|
+
from .settings import (
|
|
21
|
+
PATHS,
|
|
22
|
+
DAEMON,
|
|
23
|
+
get_monitor_daemon_state_path,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class SessionDaemonState:
|
|
29
|
+
"""Per-session state published by Monitor Daemon.
|
|
30
|
+
|
|
31
|
+
This is the authoritative source for session metrics.
|
|
32
|
+
The TUI and Supervisor Daemon should read from here,
|
|
33
|
+
not from Claude Code files directly.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# Session identity
|
|
37
|
+
session_id: str = ""
|
|
38
|
+
name: str = ""
|
|
39
|
+
tmux_window: int = 0
|
|
40
|
+
|
|
41
|
+
# Status (from StatusDetector)
|
|
42
|
+
current_status: str = "unknown" # running, waiting_user, waiting_supervisor, no_instructions, terminated
|
|
43
|
+
current_activity: str = ""
|
|
44
|
+
status_since: Optional[str] = None # ISO timestamp
|
|
45
|
+
|
|
46
|
+
# Time tracking (authoritative - only Monitor Daemon updates these)
|
|
47
|
+
green_time_seconds: float = 0.0
|
|
48
|
+
non_green_time_seconds: float = 0.0
|
|
49
|
+
|
|
50
|
+
# Claude Code stats (synced from ~/.claude/projects/)
|
|
51
|
+
interaction_count: int = 0
|
|
52
|
+
input_tokens: int = 0
|
|
53
|
+
output_tokens: int = 0
|
|
54
|
+
cache_creation_tokens: int = 0
|
|
55
|
+
cache_read_tokens: int = 0
|
|
56
|
+
estimated_cost_usd: float = 0.0
|
|
57
|
+
median_work_time: float = 0.0
|
|
58
|
+
|
|
59
|
+
# Session metadata
|
|
60
|
+
repo_name: Optional[str] = None
|
|
61
|
+
branch: Optional[str] = None
|
|
62
|
+
standing_instructions: str = ""
|
|
63
|
+
standing_orders_complete: bool = False
|
|
64
|
+
steers_count: int = 0
|
|
65
|
+
|
|
66
|
+
# Additional session info (for web dashboard parity with TUI)
|
|
67
|
+
start_time: Optional[str] = None # ISO timestamp when session started
|
|
68
|
+
permissiveness_mode: str = "normal" # normal, permissive, bypass
|
|
69
|
+
start_directory: Optional[str] = None # For git diff stats
|
|
70
|
+
|
|
71
|
+
# Activity summary (from SummarizerComponent)
|
|
72
|
+
activity_summary: str = ""
|
|
73
|
+
activity_summary_updated: Optional[str] = None # ISO timestamp
|
|
74
|
+
|
|
75
|
+
def to_dict(self) -> dict:
|
|
76
|
+
"""Convert to dictionary for JSON serialization."""
|
|
77
|
+
return {
|
|
78
|
+
"session_id": self.session_id,
|
|
79
|
+
"name": self.name,
|
|
80
|
+
"tmux_window": self.tmux_window,
|
|
81
|
+
"current_status": self.current_status,
|
|
82
|
+
"current_activity": self.current_activity,
|
|
83
|
+
"status_since": self.status_since,
|
|
84
|
+
"green_time_seconds": self.green_time_seconds,
|
|
85
|
+
"non_green_time_seconds": self.non_green_time_seconds,
|
|
86
|
+
"interaction_count": self.interaction_count,
|
|
87
|
+
"input_tokens": self.input_tokens,
|
|
88
|
+
"output_tokens": self.output_tokens,
|
|
89
|
+
"cache_creation_tokens": self.cache_creation_tokens,
|
|
90
|
+
"cache_read_tokens": self.cache_read_tokens,
|
|
91
|
+
"estimated_cost_usd": self.estimated_cost_usd,
|
|
92
|
+
"median_work_time": self.median_work_time,
|
|
93
|
+
"repo_name": self.repo_name,
|
|
94
|
+
"branch": self.branch,
|
|
95
|
+
"standing_instructions": self.standing_instructions,
|
|
96
|
+
"standing_orders_complete": self.standing_orders_complete,
|
|
97
|
+
"steers_count": self.steers_count,
|
|
98
|
+
"start_time": self.start_time,
|
|
99
|
+
"permissiveness_mode": self.permissiveness_mode,
|
|
100
|
+
"start_directory": self.start_directory,
|
|
101
|
+
"activity_summary": self.activity_summary,
|
|
102
|
+
"activity_summary_updated": self.activity_summary_updated,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
@classmethod
|
|
106
|
+
def from_dict(cls, data: dict) -> "SessionDaemonState":
|
|
107
|
+
"""Create from dictionary."""
|
|
108
|
+
return cls(
|
|
109
|
+
session_id=data.get("session_id", ""),
|
|
110
|
+
name=data.get("name", ""),
|
|
111
|
+
tmux_window=data.get("tmux_window", 0),
|
|
112
|
+
current_status=data.get("current_status", "unknown"),
|
|
113
|
+
current_activity=data.get("current_activity", ""),
|
|
114
|
+
status_since=data.get("status_since"),
|
|
115
|
+
green_time_seconds=data.get("green_time_seconds", 0.0),
|
|
116
|
+
non_green_time_seconds=data.get("non_green_time_seconds", 0.0),
|
|
117
|
+
interaction_count=data.get("interaction_count", 0),
|
|
118
|
+
input_tokens=data.get("input_tokens", 0),
|
|
119
|
+
output_tokens=data.get("output_tokens", 0),
|
|
120
|
+
cache_creation_tokens=data.get("cache_creation_tokens", 0),
|
|
121
|
+
cache_read_tokens=data.get("cache_read_tokens", 0),
|
|
122
|
+
estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
|
|
123
|
+
median_work_time=data.get("median_work_time", 0.0),
|
|
124
|
+
repo_name=data.get("repo_name"),
|
|
125
|
+
branch=data.get("branch"),
|
|
126
|
+
standing_instructions=data.get("standing_instructions", ""),
|
|
127
|
+
standing_orders_complete=data.get("standing_orders_complete", False),
|
|
128
|
+
steers_count=data.get("steers_count", 0),
|
|
129
|
+
start_time=data.get("start_time"),
|
|
130
|
+
permissiveness_mode=data.get("permissiveness_mode", "normal"),
|
|
131
|
+
start_directory=data.get("start_directory"),
|
|
132
|
+
activity_summary=data.get("activity_summary", ""),
|
|
133
|
+
activity_summary_updated=data.get("activity_summary_updated"),
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@dataclass
|
|
138
|
+
class MonitorDaemonState:
|
|
139
|
+
"""State published by Monitor Daemon for TUI and Supervisor Daemon.
|
|
140
|
+
|
|
141
|
+
This is the official interface for reading monitoring data.
|
|
142
|
+
Consumers should use MonitorDaemonState.load() to get current state.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
# Daemon metadata
|
|
146
|
+
pid: int = 0
|
|
147
|
+
status: str = "stopped" # starting, active, idle, sleeping, stopped
|
|
148
|
+
loop_count: int = 0
|
|
149
|
+
current_interval: int = field(default_factory=lambda: DAEMON.interval_fast)
|
|
150
|
+
last_loop_time: Optional[str] = None # ISO timestamp
|
|
151
|
+
started_at: Optional[str] = None # ISO timestamp
|
|
152
|
+
daemon_version: int = 0 # Version of daemon code
|
|
153
|
+
|
|
154
|
+
# Session states (one per agent)
|
|
155
|
+
sessions: List[SessionDaemonState] = field(default_factory=list)
|
|
156
|
+
|
|
157
|
+
# Presence state (optional, macOS only)
|
|
158
|
+
presence_available: bool = False
|
|
159
|
+
presence_state: Optional[int] = None # 1=locked/sleep, 2=inactive, 3=active
|
|
160
|
+
presence_idle_seconds: Optional[float] = None
|
|
161
|
+
|
|
162
|
+
# Summary metrics (computed from sessions)
|
|
163
|
+
total_green_time: float = 0.0
|
|
164
|
+
total_non_green_time: float = 0.0
|
|
165
|
+
green_sessions: int = 0
|
|
166
|
+
non_green_sessions: int = 0
|
|
167
|
+
|
|
168
|
+
# Supervisor aggregates (from SupervisorStats + sessions)
|
|
169
|
+
total_supervisions: int = 0 # Sum of steers_count across sessions
|
|
170
|
+
supervisor_launches: int = 0 # Times daemon claude was launched
|
|
171
|
+
supervisor_tokens: int = 0 # Total tokens used by daemon claude
|
|
172
|
+
|
|
173
|
+
# Daemon Claude run status (from SupervisorStats)
|
|
174
|
+
supervisor_claude_running: bool = False
|
|
175
|
+
supervisor_claude_started_at: Optional[str] = None # ISO timestamp
|
|
176
|
+
supervisor_claude_total_run_seconds: float = 0.0 # Cumulative run time
|
|
177
|
+
|
|
178
|
+
# Relay status (for remote monitoring)
|
|
179
|
+
relay_enabled: bool = False
|
|
180
|
+
relay_last_push: Optional[str] = None # ISO timestamp of last successful push
|
|
181
|
+
relay_last_status: str = "disabled" # "ok", "error", "disabled"
|
|
182
|
+
|
|
183
|
+
# Summarizer status
|
|
184
|
+
summarizer_enabled: bool = False
|
|
185
|
+
summarizer_available: bool = False # True if OPENAI_API_KEY is set
|
|
186
|
+
summarizer_calls: int = 0
|
|
187
|
+
summarizer_cost_usd: float = 0.0
|
|
188
|
+
|
|
189
|
+
def to_dict(self) -> dict:
|
|
190
|
+
"""Convert to dictionary for JSON serialization."""
|
|
191
|
+
return {
|
|
192
|
+
"pid": self.pid,
|
|
193
|
+
"status": self.status,
|
|
194
|
+
"loop_count": self.loop_count,
|
|
195
|
+
"current_interval": self.current_interval,
|
|
196
|
+
"last_loop_time": self.last_loop_time,
|
|
197
|
+
"started_at": self.started_at,
|
|
198
|
+
"daemon_version": self.daemon_version,
|
|
199
|
+
"sessions": [s.to_dict() for s in self.sessions],
|
|
200
|
+
"presence_available": self.presence_available,
|
|
201
|
+
"presence_state": self.presence_state,
|
|
202
|
+
"presence_idle_seconds": self.presence_idle_seconds,
|
|
203
|
+
"total_green_time": self.total_green_time,
|
|
204
|
+
"total_non_green_time": self.total_non_green_time,
|
|
205
|
+
"green_sessions": self.green_sessions,
|
|
206
|
+
"non_green_sessions": self.non_green_sessions,
|
|
207
|
+
"total_supervisions": self.total_supervisions,
|
|
208
|
+
"supervisor_launches": self.supervisor_launches,
|
|
209
|
+
"supervisor_tokens": self.supervisor_tokens,
|
|
210
|
+
"supervisor_claude_running": self.supervisor_claude_running,
|
|
211
|
+
"supervisor_claude_started_at": self.supervisor_claude_started_at,
|
|
212
|
+
"supervisor_claude_total_run_seconds": self.supervisor_claude_total_run_seconds,
|
|
213
|
+
"relay_enabled": self.relay_enabled,
|
|
214
|
+
"relay_last_push": self.relay_last_push,
|
|
215
|
+
"relay_last_status": self.relay_last_status,
|
|
216
|
+
"summarizer_enabled": self.summarizer_enabled,
|
|
217
|
+
"summarizer_available": self.summarizer_available,
|
|
218
|
+
"summarizer_calls": self.summarizer_calls,
|
|
219
|
+
"summarizer_cost_usd": self.summarizer_cost_usd,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
@classmethod
|
|
223
|
+
def from_dict(cls, data: dict) -> "MonitorDaemonState":
|
|
224
|
+
"""Create from dictionary."""
|
|
225
|
+
sessions = [
|
|
226
|
+
SessionDaemonState.from_dict(s)
|
|
227
|
+
for s in data.get("sessions", [])
|
|
228
|
+
]
|
|
229
|
+
|
|
230
|
+
return cls(
|
|
231
|
+
pid=data.get("pid", 0),
|
|
232
|
+
status=data.get("status", "stopped"),
|
|
233
|
+
loop_count=data.get("loop_count", 0),
|
|
234
|
+
current_interval=data.get("current_interval", DAEMON.interval_fast),
|
|
235
|
+
last_loop_time=data.get("last_loop_time"),
|
|
236
|
+
started_at=data.get("started_at"),
|
|
237
|
+
daemon_version=data.get("daemon_version", 0),
|
|
238
|
+
sessions=sessions,
|
|
239
|
+
presence_available=data.get("presence_available", False),
|
|
240
|
+
presence_state=data.get("presence_state"),
|
|
241
|
+
presence_idle_seconds=data.get("presence_idle_seconds"),
|
|
242
|
+
total_green_time=data.get("total_green_time", 0.0),
|
|
243
|
+
total_non_green_time=data.get("total_non_green_time", 0.0),
|
|
244
|
+
green_sessions=data.get("green_sessions", 0),
|
|
245
|
+
non_green_sessions=data.get("non_green_sessions", 0),
|
|
246
|
+
total_supervisions=data.get("total_supervisions", 0),
|
|
247
|
+
supervisor_launches=data.get("supervisor_launches", 0),
|
|
248
|
+
supervisor_tokens=data.get("supervisor_tokens", 0),
|
|
249
|
+
supervisor_claude_running=data.get("supervisor_claude_running", False),
|
|
250
|
+
supervisor_claude_started_at=data.get("supervisor_claude_started_at"),
|
|
251
|
+
supervisor_claude_total_run_seconds=data.get("supervisor_claude_total_run_seconds", 0.0),
|
|
252
|
+
relay_enabled=data.get("relay_enabled", False),
|
|
253
|
+
relay_last_push=data.get("relay_last_push"),
|
|
254
|
+
relay_last_status=data.get("relay_last_status", "disabled"),
|
|
255
|
+
summarizer_enabled=data.get("summarizer_enabled", False),
|
|
256
|
+
summarizer_available=data.get("summarizer_available", False),
|
|
257
|
+
summarizer_calls=data.get("summarizer_calls", 0),
|
|
258
|
+
summarizer_cost_usd=data.get("summarizer_cost_usd", 0.0),
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def update_summaries(self) -> None:
|
|
262
|
+
"""Recompute summary metrics from session data."""
|
|
263
|
+
self.total_green_time = sum(s.green_time_seconds for s in self.sessions)
|
|
264
|
+
self.total_non_green_time = sum(s.non_green_time_seconds for s in self.sessions)
|
|
265
|
+
self.green_sessions = sum(1 for s in self.sessions if s.current_status == "running")
|
|
266
|
+
self.non_green_sessions = len(self.sessions) - self.green_sessions
|
|
267
|
+
self.total_supervisions = sum(s.steers_count for s in self.sessions)
|
|
268
|
+
|
|
269
|
+
def get_session(self, session_id: str) -> Optional[SessionDaemonState]:
|
|
270
|
+
"""Get session state by ID."""
|
|
271
|
+
for session in self.sessions:
|
|
272
|
+
if session.session_id == session_id:
|
|
273
|
+
return session
|
|
274
|
+
return None
|
|
275
|
+
|
|
276
|
+
def get_session_by_name(self, name: str) -> Optional[SessionDaemonState]:
|
|
277
|
+
"""Get session state by name."""
|
|
278
|
+
for session in self.sessions:
|
|
279
|
+
if session.name == name:
|
|
280
|
+
return session
|
|
281
|
+
return None
|
|
282
|
+
|
|
283
|
+
def save(self, state_file: Optional[Path] = None) -> None:
|
|
284
|
+
"""Save state to file for consumers to read.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
state_file: Optional path override (for testing)
|
|
288
|
+
"""
|
|
289
|
+
path = state_file or PATHS.monitor_daemon_state
|
|
290
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
291
|
+
|
|
292
|
+
# Update summaries before saving
|
|
293
|
+
self.update_summaries()
|
|
294
|
+
|
|
295
|
+
with open(path, 'w') as f:
|
|
296
|
+
json.dump(self.to_dict(), f, indent=2)
|
|
297
|
+
|
|
298
|
+
@classmethod
|
|
299
|
+
def load(cls, state_file: Optional[Path] = None) -> Optional["MonitorDaemonState"]:
|
|
300
|
+
"""Load state from file.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
state_file: Optional path override (for testing)
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
MonitorDaemonState if file exists and is valid, None otherwise
|
|
307
|
+
"""
|
|
308
|
+
path = state_file or PATHS.monitor_daemon_state
|
|
309
|
+
if not path.exists():
|
|
310
|
+
return None
|
|
311
|
+
|
|
312
|
+
try:
|
|
313
|
+
with open(path) as f:
|
|
314
|
+
data = json.load(f)
|
|
315
|
+
return cls.from_dict(data)
|
|
316
|
+
except (json.JSONDecodeError, KeyError, ValueError, TypeError):
|
|
317
|
+
return None
|
|
318
|
+
|
|
319
|
+
def is_stale(self, buffer_seconds: float = 30.0) -> bool:
|
|
320
|
+
"""Check if the state is stale (daemon may have crashed).
|
|
321
|
+
|
|
322
|
+
Uses current_interval + buffer to determine staleness. This way, a daemon
|
|
323
|
+
sleeping for 300s won't be considered stale after just 30s.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
buffer_seconds: Extra time beyond current_interval before considered stale
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
True if state is older than (current_interval + buffer_seconds)
|
|
330
|
+
"""
|
|
331
|
+
if not self.last_loop_time:
|
|
332
|
+
return True
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
last_time = datetime.fromisoformat(self.last_loop_time)
|
|
336
|
+
age = (datetime.now() - last_time).total_seconds()
|
|
337
|
+
# Allow current_interval + buffer before considering stale
|
|
338
|
+
max_age = self.current_interval + buffer_seconds
|
|
339
|
+
return age > max_age
|
|
340
|
+
except (ValueError, TypeError):
|
|
341
|
+
return True
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def get_monitor_daemon_state(session: Optional[str] = None) -> Optional[MonitorDaemonState]:
|
|
345
|
+
"""Get the current monitor daemon state from file.
|
|
346
|
+
|
|
347
|
+
Convenience function for TUI and other consumers.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
session: tmux session name. If None, uses default from config.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
MonitorDaemonState if daemon is running and state file exists, None otherwise
|
|
354
|
+
"""
|
|
355
|
+
if session is None:
|
|
356
|
+
session = DAEMON.default_tmux_session
|
|
357
|
+
state_path = get_monitor_daemon_state_path(session)
|
|
358
|
+
return MonitorDaemonState.load(state_path)
|
overcode/pid_utils.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PID file management utilities for Overcode.
|
|
3
|
+
|
|
4
|
+
Provides common functions for checking process status via PID files,
|
|
5
|
+
used by both the daemon and presence logger.
|
|
6
|
+
|
|
7
|
+
Uses file locking to prevent TOCTOU race conditions when multiple
|
|
8
|
+
daemons try to start simultaneously.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import fcntl
|
|
12
|
+
import os
|
|
13
|
+
import signal
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Optional, Tuple
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_process_running(pid_file: Path) -> bool:
|
|
19
|
+
"""Check if a process is running based on its PID file.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
pid_file: Path to the PID file
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
True if PID file exists and process is alive, False otherwise.
|
|
26
|
+
"""
|
|
27
|
+
if not pid_file.exists():
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
pid = int(pid_file.read_text().strip())
|
|
32
|
+
# Check if process exists by sending signal 0
|
|
33
|
+
os.kill(pid, 0)
|
|
34
|
+
return True
|
|
35
|
+
except (ValueError, OSError, ProcessLookupError):
|
|
36
|
+
# PID file invalid or process not running
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_process_pid(pid_file: Path) -> Optional[int]:
|
|
41
|
+
"""Get the PID from a PID file if the process is running.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
pid_file: Path to the PID file
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
The PID if process is running, None otherwise.
|
|
48
|
+
"""
|
|
49
|
+
if not pid_file.exists():
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
pid = int(pid_file.read_text().strip())
|
|
54
|
+
os.kill(pid, 0) # Check if alive
|
|
55
|
+
return pid
|
|
56
|
+
except (ValueError, OSError, ProcessLookupError):
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def write_pid_file(pid_file: Path, pid: Optional[int] = None) -> None:
|
|
61
|
+
"""Write a PID to a PID file.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
pid_file: Path to the PID file
|
|
65
|
+
pid: PID to write (defaults to current process PID)
|
|
66
|
+
"""
|
|
67
|
+
if pid is None:
|
|
68
|
+
pid = os.getpid()
|
|
69
|
+
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
pid_file.write_text(str(pid))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def remove_pid_file(pid_file: Path) -> None:
|
|
74
|
+
"""Remove a PID file if it exists.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
pid_file: Path to the PID file
|
|
78
|
+
"""
|
|
79
|
+
try:
|
|
80
|
+
pid_file.unlink()
|
|
81
|
+
except FileNotFoundError:
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def acquire_daemon_lock(pid_file: Path) -> Tuple[bool, Optional[int]]:
|
|
86
|
+
"""Atomically check if daemon is running and acquire the lock if not.
|
|
87
|
+
|
|
88
|
+
Uses file locking to prevent TOCTOU race conditions when multiple
|
|
89
|
+
processes try to start the daemon simultaneously.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
pid_file: Path to the PID file
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Tuple of (acquired, existing_pid):
|
|
96
|
+
- (True, None) if lock was acquired and PID file written
|
|
97
|
+
- (False, existing_pid) if another daemon is already running
|
|
98
|
+
"""
|
|
99
|
+
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
|
100
|
+
|
|
101
|
+
# Use a separate lock file to avoid truncation issues
|
|
102
|
+
lock_file = pid_file.with_suffix('.lock')
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
# Open lock file for writing (creates if doesn't exist)
|
|
106
|
+
fd = os.open(str(lock_file), os.O_WRONLY | os.O_CREAT, 0o644)
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
# Try to acquire exclusive lock (non-blocking)
|
|
110
|
+
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
111
|
+
|
|
112
|
+
# We have the lock - now check if another daemon is running
|
|
113
|
+
if pid_file.exists():
|
|
114
|
+
try:
|
|
115
|
+
existing_pid = int(pid_file.read_text().strip())
|
|
116
|
+
# Check if process is still alive
|
|
117
|
+
os.kill(existing_pid, 0)
|
|
118
|
+
# Process exists - another daemon is running
|
|
119
|
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
120
|
+
os.close(fd)
|
|
121
|
+
return False, existing_pid
|
|
122
|
+
except (ValueError, OSError, ProcessLookupError):
|
|
123
|
+
# PID file exists but process is dead - clean up
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
# Write our PID
|
|
127
|
+
current_pid = os.getpid()
|
|
128
|
+
pid_file.write_text(str(current_pid))
|
|
129
|
+
|
|
130
|
+
# Release the lock (but keep file for tracking)
|
|
131
|
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
132
|
+
os.close(fd)
|
|
133
|
+
|
|
134
|
+
return True, None
|
|
135
|
+
|
|
136
|
+
except OSError:
|
|
137
|
+
# Lock acquisition failed (another process has it)
|
|
138
|
+
os.close(fd)
|
|
139
|
+
# Read existing PID if available
|
|
140
|
+
if pid_file.exists():
|
|
141
|
+
try:
|
|
142
|
+
existing_pid = int(pid_file.read_text().strip())
|
|
143
|
+
return False, existing_pid
|
|
144
|
+
except (ValueError, OSError):
|
|
145
|
+
pass
|
|
146
|
+
return False, None
|
|
147
|
+
|
|
148
|
+
except OSError:
|
|
149
|
+
# Could not open lock file
|
|
150
|
+
return False, None
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def count_daemon_processes(pattern: str = "monitor_daemon") -> int:
|
|
154
|
+
"""Count running daemon processes matching the pattern.
|
|
155
|
+
|
|
156
|
+
Uses pgrep to find processes matching the pattern.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
pattern: Pattern to search for in process names/args
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Number of matching processes (excluding this check process)
|
|
163
|
+
"""
|
|
164
|
+
import subprocess
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
# Use pgrep to find matching processes
|
|
168
|
+
result = subprocess.run(
|
|
169
|
+
["pgrep", "-f", pattern],
|
|
170
|
+
capture_output=True,
|
|
171
|
+
text=True,
|
|
172
|
+
timeout=5.0,
|
|
173
|
+
)
|
|
174
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
175
|
+
# Count non-empty lines (each line is a PID)
|
|
176
|
+
pids = [p for p in result.stdout.strip().split('\n') if p]
|
|
177
|
+
return len(pids)
|
|
178
|
+
return 0
|
|
179
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
180
|
+
return 0
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def stop_process(pid_file: Path, timeout: float = 5.0) -> bool:
|
|
184
|
+
"""Stop a process by reading its PID file and sending SIGTERM.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
pid_file: Path to the PID file
|
|
188
|
+
timeout: Seconds to wait for process to terminate
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
True if process was stopped, False if it wasn't running.
|
|
192
|
+
"""
|
|
193
|
+
import time
|
|
194
|
+
|
|
195
|
+
if not pid_file.exists():
|
|
196
|
+
return False
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
pid = int(pid_file.read_text().strip())
|
|
200
|
+
os.kill(pid, signal.SIGTERM)
|
|
201
|
+
|
|
202
|
+
# Wait for process to terminate
|
|
203
|
+
start = time.time()
|
|
204
|
+
while time.time() - start < timeout:
|
|
205
|
+
try:
|
|
206
|
+
os.kill(pid, 0)
|
|
207
|
+
time.sleep(0.1)
|
|
208
|
+
except (OSError, ProcessLookupError):
|
|
209
|
+
# Process terminated
|
|
210
|
+
remove_pid_file(pid_file)
|
|
211
|
+
return True
|
|
212
|
+
|
|
213
|
+
# Process didn't terminate, try SIGKILL
|
|
214
|
+
try:
|
|
215
|
+
os.kill(pid, signal.SIGKILL)
|
|
216
|
+
remove_pid_file(pid_file)
|
|
217
|
+
return True
|
|
218
|
+
except (OSError, ProcessLookupError):
|
|
219
|
+
remove_pid_file(pid_file)
|
|
220
|
+
return True
|
|
221
|
+
|
|
222
|
+
except (ValueError, OSError, ProcessLookupError):
|
|
223
|
+
# PID file invalid or process not running
|
|
224
|
+
remove_pid_file(pid_file)
|
|
225
|
+
return False
|