procler 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- procler/__init__.py +3 -0
- procler/__main__.py +6 -0
- procler/api/__init__.py +5 -0
- procler/api/app.py +261 -0
- procler/api/deps.py +21 -0
- procler/api/routes/__init__.py +5 -0
- procler/api/routes/config.py +290 -0
- procler/api/routes/groups.py +62 -0
- procler/api/routes/logs.py +43 -0
- procler/api/routes/processes.py +185 -0
- procler/api/routes/recipes.py +69 -0
- procler/api/routes/snippets.py +134 -0
- procler/api/routes/ws.py +459 -0
- procler/cli.py +1478 -0
- procler/config/__init__.py +65 -0
- procler/config/changelog.py +148 -0
- procler/config/loader.py +256 -0
- procler/config/schema.py +315 -0
- procler/core/__init__.py +54 -0
- procler/core/context_base.py +117 -0
- procler/core/context_docker.py +384 -0
- procler/core/context_local.py +287 -0
- procler/core/daemon_detector.py +325 -0
- procler/core/events.py +74 -0
- procler/core/groups.py +419 -0
- procler/core/health.py +280 -0
- procler/core/log_tailer.py +262 -0
- procler/core/process_manager.py +1277 -0
- procler/core/recipes.py +330 -0
- procler/core/snippets.py +231 -0
- procler/core/variable_substitution.py +65 -0
- procler/db.py +96 -0
- procler/logging.py +41 -0
- procler/models.py +130 -0
- procler/py.typed +0 -0
- procler/settings.py +29 -0
- procler/static/assets/AboutView-BwZnsfpW.js +4 -0
- procler/static/assets/AboutView-UHbxWXcS.css +1 -0
- procler/static/assets/Code-HTS-H1S6.js +74 -0
- procler/static/assets/ConfigView-CGJcmp9G.css +1 -0
- procler/static/assets/ConfigView-aVtbRDf8.js +1 -0
- procler/static/assets/DashboardView-C5jw9Nsd.css +1 -0
- procler/static/assets/DashboardView-Dab7Cu9v.js +1 -0
- procler/static/assets/DataTable-z39TOAa4.js +746 -0
- procler/static/assets/DescriptionsItem-B2E8YbqJ.js +74 -0
- procler/static/assets/Divider-Dk-6aD2Y.js +42 -0
- procler/static/assets/Empty-MuygEHZM.js +24 -0
- procler/static/assets/Grid-CZ9QVKAT.js +1 -0
- procler/static/assets/GroupsView-BALG7i1X.js +1 -0
- procler/static/assets/GroupsView-gXAI1CVC.css +1 -0
- procler/static/assets/Input-e0xaxoWE.js +259 -0
- procler/static/assets/PhArrowsClockwise.vue-DqDg31az.js +1 -0
- procler/static/assets/PhCheckCircle.vue-Fwj9sh9m.js +1 -0
- procler/static/assets/PhEye.vue-JcPHciC2.js +1 -0
- procler/static/assets/PhPlay.vue-CZm7Gy3u.js +1 -0
- procler/static/assets/PhPlus.vue-yTWqKlSh.js +1 -0
- procler/static/assets/PhStop.vue-DxsqwIki.js +1 -0
- procler/static/assets/PhTrash.vue-DcqQbN1_.js +125 -0
- procler/static/assets/PhXCircle.vue-BXWmrabV.js +1 -0
- procler/static/assets/ProcessDetailView-DDbtIWq9.css +1 -0
- procler/static/assets/ProcessDetailView-DPtdNV-q.js +1 -0
- procler/static/assets/ProcessesView-B3a6Umur.js +1 -0
- procler/static/assets/ProcessesView-goLmghbJ.css +1 -0
- procler/static/assets/RecipesView-D2VxdneD.js +166 -0
- procler/static/assets/RecipesView-DXnFDCK4.css +1 -0
- procler/static/assets/Select-BBR17AHq.js +317 -0
- procler/static/assets/SnippetsView-B3a9q3AI.css +1 -0
- procler/static/assets/SnippetsView-DBCB2yGq.js +1 -0
- procler/static/assets/Spin-BXTjvFUk.js +90 -0
- procler/static/assets/Tag-Bh_qV63A.js +71 -0
- procler/static/assets/changelog-KkTT4H9-.js +1 -0
- procler/static/assets/groups-Zu-_v8ey.js +1 -0
- procler/static/assets/index-BsN-YMXq.css +1 -0
- procler/static/assets/index-BzW1XhyH.js +1282 -0
- procler/static/assets/procler-DOrSB1Vj.js +1 -0
- procler/static/assets/recipes-1w5SseGb.js +1 -0
- procler/static/index.html +17 -0
- procler/static/procler.png +0 -0
- procler-0.2.0.dist-info/METADATA +545 -0
- procler-0.2.0.dist-info/RECORD +83 -0
- procler-0.2.0.dist-info/WHEEL +4 -0
- procler-0.2.0.dist-info/entry_points.txt +2 -0
- procler-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1277 @@
|
|
|
1
|
+
"""Central process manager coordinating all process operations."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import shlex
|
|
8
|
+
from datetime import datetime, timedelta
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from sqler.query import SQLerField as F
|
|
13
|
+
|
|
14
|
+
from ..config import ChangelogAction, append_changelog
|
|
15
|
+
from ..db import init_database
|
|
16
|
+
from ..models import LogEntry, Process, ProcessStatus
|
|
17
|
+
from .context_base import ExecResult, ExecutionContext, ProcessHandle
|
|
18
|
+
from .context_docker import get_docker_context, is_docker_available
|
|
19
|
+
from .context_local import get_local_context
|
|
20
|
+
from .daemon_detector import get_daemon_detector
|
|
21
|
+
from .events import EVENT_LOG_ENTRY, EVENT_STATUS_CHANGE, get_event_bus
|
|
22
|
+
from .variable_substitution import substitute_vars_from_config
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# Default max log entries per process
|
|
27
|
+
DEFAULT_MAX_LOGS = 10000
|
|
28
|
+
|
|
29
|
+
# Default log directory (inside container for docker, on host for local)
|
|
30
|
+
DEFAULT_LOG_DIR = "/tmp/procler"
|
|
31
|
+
|
|
32
|
+
# Linux process state descriptions
|
|
33
|
+
LINUX_PROCESS_STATES = {
|
|
34
|
+
"R": {"name": "running", "description": "Running or runnable (on run queue)"},
|
|
35
|
+
"S": {"name": "sleeping", "description": "Interruptible sleep (waiting for event)"},
|
|
36
|
+
"D": {
|
|
37
|
+
"name": "disk_sleep",
|
|
38
|
+
"description": "Uninterruptible sleep (usually I/O) - CANNOT BE KILLED",
|
|
39
|
+
},
|
|
40
|
+
"Z": {"name": "zombie", "description": "Zombie - terminated but not reaped by parent"},
|
|
41
|
+
"T": {"name": "stopped", "description": "Stopped by job control signal"},
|
|
42
|
+
"t": {"name": "tracing_stop", "description": "Stopped by debugger during tracing"},
|
|
43
|
+
"X": {"name": "dead", "description": "Dead (should never be seen)"},
|
|
44
|
+
"I": {"name": "idle", "description": "Idle kernel thread"},
|
|
45
|
+
"W": {"name": "waking", "description": "Waking (Linux 2.6.33 to 3.13 only)"},
|
|
46
|
+
"P": {"name": "parked", "description": "Parked (Linux 3.9 to 3.13 only)"},
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_linux_process_state(pid: int) -> dict[str, Any] | None:
|
|
51
|
+
"""
|
|
52
|
+
Read the process state from /proc/[pid]/stat.
|
|
53
|
+
|
|
54
|
+
Returns a dict with:
|
|
55
|
+
- state_code: Single letter (R, S, D, Z, T, etc.)
|
|
56
|
+
- state_name: Human-readable name
|
|
57
|
+
- state_description: Full description
|
|
58
|
+
- is_killable: False if in D state
|
|
59
|
+
|
|
60
|
+
Returns None if the process doesn't exist or can't be read.
|
|
61
|
+
"""
|
|
62
|
+
try:
|
|
63
|
+
stat_path = Path(f"/proc/{pid}/stat")
|
|
64
|
+
if not stat_path.exists():
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
content = stat_path.read_text()
|
|
68
|
+
|
|
69
|
+
# Format: pid (comm) state ppid ...
|
|
70
|
+
# comm can contain spaces and parentheses, so find the last )
|
|
71
|
+
last_paren = content.rfind(")")
|
|
72
|
+
if last_paren == -1:
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
# State is the first character after ") "
|
|
76
|
+
state_section = content[last_paren + 2 :]
|
|
77
|
+
state_code = state_section.split()[0]
|
|
78
|
+
|
|
79
|
+
state_info = LINUX_PROCESS_STATES.get(
|
|
80
|
+
state_code, {"name": "unknown", "description": f"Unknown state: {state_code}"}
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
"state_code": state_code,
|
|
85
|
+
"state_name": state_info["name"],
|
|
86
|
+
"state_description": state_info["description"],
|
|
87
|
+
"is_killable": state_code != "D",
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
except (OSError, IndexError):
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_process_children(pid: int) -> list[int]:
|
|
95
|
+
"""Get all child PIDs of a process."""
|
|
96
|
+
children = []
|
|
97
|
+
try:
|
|
98
|
+
children_path = Path(f"/proc/{pid}/task/{pid}/children")
|
|
99
|
+
if children_path.exists():
|
|
100
|
+
content = children_path.read_text().strip()
|
|
101
|
+
if content:
|
|
102
|
+
children = [int(p) for p in content.split()]
|
|
103
|
+
except (OSError, ValueError):
|
|
104
|
+
pass
|
|
105
|
+
return children
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def parse_duration(duration: str) -> int:
|
|
109
|
+
"""Parse a duration string like '5m', '1h', '30s' to seconds."""
|
|
110
|
+
if not duration:
|
|
111
|
+
return 0
|
|
112
|
+
|
|
113
|
+
duration = duration.strip().lower()
|
|
114
|
+
|
|
115
|
+
# Try ISO timestamp first
|
|
116
|
+
try:
|
|
117
|
+
dt = datetime.fromisoformat(duration)
|
|
118
|
+
return int((datetime.now() - dt).total_seconds())
|
|
119
|
+
except ValueError:
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
multipliers = {
|
|
123
|
+
"s": 1,
|
|
124
|
+
"m": 60,
|
|
125
|
+
"h": 3600,
|
|
126
|
+
"d": 86400,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if duration[-1] in multipliers:
|
|
130
|
+
try:
|
|
131
|
+
value = int(duration[:-1])
|
|
132
|
+
return value * multipliers[duration[-1]]
|
|
133
|
+
except ValueError:
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
# Try as raw seconds
|
|
137
|
+
try:
|
|
138
|
+
return int(duration)
|
|
139
|
+
except ValueError:
|
|
140
|
+
raise ValueError(f"Invalid duration format: {duration}")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def get_log_file_path(process_name: str) -> str:
|
|
144
|
+
"""Get the default log file path for a process."""
|
|
145
|
+
# Sanitize process name for filename (replace unsafe chars)
|
|
146
|
+
safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in process_name)
|
|
147
|
+
return f"{DEFAULT_LOG_DIR}/{safe_name}.log"
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def extract_docker_exec_user(command: str) -> str | None:
|
|
151
|
+
"""Extract the -u/--user value from a docker exec command."""
|
|
152
|
+
# Match -u <user> or --user <user> or --user=<user>
|
|
153
|
+
match = re.search(r"(?:-u|--user)[=\s]+([^\s]+)", command)
|
|
154
|
+
return match.group(1) if match else None
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def ensure_log_dir_in_container(container: str, user: str | None = None) -> None:
|
|
158
|
+
"""Ensure log directory exists inside a Docker container.
|
|
159
|
+
|
|
160
|
+
If user is provided, creates directory as that user so they can write to it.
|
|
161
|
+
"""
|
|
162
|
+
import subprocess
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
cmd = ["docker", "exec"]
|
|
166
|
+
if user:
|
|
167
|
+
cmd.extend(["-u", user])
|
|
168
|
+
cmd.extend([container, "mkdir", "-p", DEFAULT_LOG_DIR])
|
|
169
|
+
subprocess.run(cmd, check=True, capture_output=True)
|
|
170
|
+
except subprocess.CalledProcessError:
|
|
171
|
+
pass # Best effort - directory might already exist or container might be unavailable
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def ensure_local_log_dir() -> None:
|
|
175
|
+
"""Ensure log directory exists on local filesystem."""
|
|
176
|
+
Path(DEFAULT_LOG_DIR).mkdir(parents=True, exist_ok=True)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
async def read_log_file_from_container(container: str, log_path: str, tail: int = 100) -> list[str]:
|
|
180
|
+
"""Read log file from inside a Docker container."""
|
|
181
|
+
import subprocess
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
result = subprocess.run(
|
|
185
|
+
["docker", "exec", container, "tail", "-n", str(tail), log_path],
|
|
186
|
+
capture_output=True,
|
|
187
|
+
text=True,
|
|
188
|
+
timeout=10,
|
|
189
|
+
)
|
|
190
|
+
if result.returncode == 0:
|
|
191
|
+
return result.stdout.splitlines()
|
|
192
|
+
except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
|
|
193
|
+
pass
|
|
194
|
+
return []
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def wrap_command_with_log_redirect(command: str, log_path: str) -> str:
|
|
198
|
+
"""Wrap a command to redirect stdout/stderr to a log file.
|
|
199
|
+
|
|
200
|
+
For shell -c commands (bash, fish, sh, zsh), we inject the redirect inside the quoted command.
|
|
201
|
+
For other commands, we append the redirect.
|
|
202
|
+
"""
|
|
203
|
+
# Quote the log path to prevent shell injection
|
|
204
|
+
safe_log_path = shlex.quote(log_path)
|
|
205
|
+
|
|
206
|
+
# Check if this is a shell -c "..." pattern (bash, fish, sh, zsh)
|
|
207
|
+
# Pattern: (shell) -c (quote)(command)(quote)
|
|
208
|
+
shell_pattern = r'((bash|fish|sh|zsh) -c ["\'])(.+?)(["\'])(\s*)$'
|
|
209
|
+
match = re.search(shell_pattern, command)
|
|
210
|
+
if match:
|
|
211
|
+
prefix = command[: match.start()] + match.group(1)
|
|
212
|
+
inner_cmd = match.group(3)
|
|
213
|
+
quote = match.group(4)
|
|
214
|
+
suffix = match.group(5)
|
|
215
|
+
# Add redirect to inner command (truncate on start with >)
|
|
216
|
+
# Inside shell -c, strip outer quotes from safe_log_path since we're already quoted
|
|
217
|
+
inner_safe_path = safe_log_path[1:-1] if safe_log_path.startswith("'") else safe_log_path
|
|
218
|
+
return f"{prefix}{inner_cmd} > {inner_safe_path} 2>&1{quote}{suffix}"
|
|
219
|
+
|
|
220
|
+
# For simple commands, just append redirect
|
|
221
|
+
return f"{command} > {safe_log_path} 2>&1"
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class ProcessManager:
|
|
225
|
+
"""Central coordinator for process operations."""
|
|
226
|
+
|
|
227
|
+
def __init__(self):
|
|
228
|
+
self._contexts: dict[str, ExecutionContext] = {}
|
|
229
|
+
self._handles: dict[int, ProcessHandle] = {} # process_id -> handle
|
|
230
|
+
self._lock = asyncio.Lock() # Protects _handles from concurrent access
|
|
231
|
+
self._init_contexts()
|
|
232
|
+
|
|
233
|
+
def _init_contexts(self) -> None:
|
|
234
|
+
"""Initialize execution contexts."""
|
|
235
|
+
self._contexts["local"] = get_local_context()
|
|
236
|
+
# Add Docker context if available
|
|
237
|
+
if is_docker_available():
|
|
238
|
+
try:
|
|
239
|
+
self._contexts["docker"] = get_docker_context()
|
|
240
|
+
except Exception:
|
|
241
|
+
pass # Docker not available or not running
|
|
242
|
+
|
|
243
|
+
def _get_context(self, context_type: str) -> ExecutionContext:
|
|
244
|
+
"""Get the execution context for a given type."""
|
|
245
|
+
context = self._contexts.get(context_type)
|
|
246
|
+
if not context:
|
|
247
|
+
raise ValueError(f"Unknown context type: {context_type}")
|
|
248
|
+
return context
|
|
249
|
+
|
|
250
|
+
def _get_process_by_name(self, name: str) -> Process | None:
|
|
251
|
+
"""Get a process by name."""
|
|
252
|
+
results = Process.query().filter(F("name") == name).all()
|
|
253
|
+
return results[0] if results else None
|
|
254
|
+
|
|
255
|
+
async def _get_handle(self, process_id: int) -> ProcessHandle | None:
|
|
256
|
+
"""Thread-safe get handle."""
|
|
257
|
+
async with self._lock:
|
|
258
|
+
return self._handles.get(process_id)
|
|
259
|
+
|
|
260
|
+
async def _set_handle(self, process_id: int, handle: ProcessHandle) -> None:
|
|
261
|
+
"""Thread-safe set handle."""
|
|
262
|
+
async with self._lock:
|
|
263
|
+
self._handles[process_id] = handle
|
|
264
|
+
|
|
265
|
+
async def _remove_handle(self, process_id: int) -> None:
|
|
266
|
+
"""Thread-safe remove handle."""
|
|
267
|
+
async with self._lock:
|
|
268
|
+
self._handles.pop(process_id, None)
|
|
269
|
+
|
|
270
|
+
def _log_callback(self, process_id: int, stream: str):
|
|
271
|
+
"""Create a callback for logging output."""
|
|
272
|
+
|
|
273
|
+
def callback(line: str) -> None:
|
|
274
|
+
timestamp = datetime.now().isoformat()
|
|
275
|
+
entry = LogEntry(
|
|
276
|
+
process_id=process_id,
|
|
277
|
+
stream=stream,
|
|
278
|
+
line=line,
|
|
279
|
+
timestamp=timestamp,
|
|
280
|
+
)
|
|
281
|
+
try:
|
|
282
|
+
entry.save()
|
|
283
|
+
except Exception as e:
|
|
284
|
+
logger.error(f"Failed to save log entry for process {process_id}: {e}")
|
|
285
|
+
|
|
286
|
+
# Emit event for WebSocket broadcast
|
|
287
|
+
try:
|
|
288
|
+
get_event_bus().emit_sync(
|
|
289
|
+
EVENT_LOG_ENTRY,
|
|
290
|
+
{
|
|
291
|
+
"process_id": process_id,
|
|
292
|
+
"stream": stream,
|
|
293
|
+
"line": line,
|
|
294
|
+
"timestamp": timestamp,
|
|
295
|
+
},
|
|
296
|
+
)
|
|
297
|
+
except Exception as e:
|
|
298
|
+
logger.error(f"Failed to emit log event for process {process_id}: {e}")
|
|
299
|
+
|
|
300
|
+
return callback
|
|
301
|
+
|
|
302
|
+
def _exit_callback(self, process: Process):
|
|
303
|
+
"""Create a callback for process exit."""
|
|
304
|
+
|
|
305
|
+
def callback(exit_code: int) -> None:
|
|
306
|
+
# Reload process to get latest state
|
|
307
|
+
try:
|
|
308
|
+
updated = Process.from_id(process._id)
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.error(f"Failed to reload process {process._id} on exit: {e}")
|
|
311
|
+
self._handles.pop(process._id, None)
|
|
312
|
+
return
|
|
313
|
+
|
|
314
|
+
if updated:
|
|
315
|
+
updated.status = ProcessStatus.STOPPED.value
|
|
316
|
+
updated.exit_code = exit_code
|
|
317
|
+
updated.pid = None
|
|
318
|
+
try:
|
|
319
|
+
updated.save()
|
|
320
|
+
except Exception as e:
|
|
321
|
+
logger.error(f"Failed to save process {process._id} on exit: {e}")
|
|
322
|
+
|
|
323
|
+
# Remove handle (use pop() to avoid race with concurrent access)
|
|
324
|
+
self._handles.pop(process._id, None)
|
|
325
|
+
|
|
326
|
+
# Emit status change event
|
|
327
|
+
try:
|
|
328
|
+
get_event_bus().emit_sync(
|
|
329
|
+
EVENT_STATUS_CHANGE,
|
|
330
|
+
{
|
|
331
|
+
"process_id": process._id,
|
|
332
|
+
"name": updated.name,
|
|
333
|
+
"status": updated.status,
|
|
334
|
+
"exit_code": exit_code,
|
|
335
|
+
"pid": None,
|
|
336
|
+
},
|
|
337
|
+
)
|
|
338
|
+
except Exception as e:
|
|
339
|
+
logger.error(f"Failed to emit exit event for process {process._id}: {e}")
|
|
340
|
+
|
|
341
|
+
return callback
|
|
342
|
+
|
|
343
|
+
async def start(self, name: str) -> dict[str, Any]:
|
|
344
|
+
"""
|
|
345
|
+
Start a process by name.
|
|
346
|
+
|
|
347
|
+
Returns a dict with status and process info (for JSON output).
|
|
348
|
+
"""
|
|
349
|
+
init_database()
|
|
350
|
+
process = self._get_process_by_name(name)
|
|
351
|
+
|
|
352
|
+
if not process:
|
|
353
|
+
return {
|
|
354
|
+
"success": False,
|
|
355
|
+
"error": f"Process '{name}' not found",
|
|
356
|
+
"error_code": "process_not_found",
|
|
357
|
+
"suggestion": "Use 'procler list' to see available processes, or 'procler define' to create one",
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
# Check if already running
|
|
361
|
+
if process.status == ProcessStatus.RUNNING.value:
|
|
362
|
+
is_running = False
|
|
363
|
+
handle = await self._get_handle(process._id)
|
|
364
|
+
if handle:
|
|
365
|
+
context = self._get_context(process.context_type)
|
|
366
|
+
is_running = await context.is_running(handle)
|
|
367
|
+
elif process.pid:
|
|
368
|
+
# No handle but we have a PID - check in the correct context
|
|
369
|
+
is_running = await self._is_process_pid_running(process)
|
|
370
|
+
|
|
371
|
+
if is_running:
|
|
372
|
+
return {
|
|
373
|
+
"success": True,
|
|
374
|
+
"data": {
|
|
375
|
+
"status": "already_running",
|
|
376
|
+
"process": self._process_to_dict(process),
|
|
377
|
+
},
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
# Daemon mode: Check if we should adopt an existing daemon
|
|
381
|
+
if getattr(process, "daemon_mode", False) and getattr(process, "adopt_existing", False):
|
|
382
|
+
detector = get_daemon_detector()
|
|
383
|
+
# Determine container for daemon detection
|
|
384
|
+
# Use daemon_container if set, otherwise fall back to container_name for docker context
|
|
385
|
+
raw_container = getattr(process, "daemon_container", None) or (
|
|
386
|
+
process.container_name if process.context_type == "docker" else None
|
|
387
|
+
)
|
|
388
|
+
# Substitute vars in container name (e.g., ${SIM_CONTAINER})
|
|
389
|
+
container = substitute_vars_from_config(raw_container) if raw_container else None
|
|
390
|
+
# Try to find existing daemon
|
|
391
|
+
existing_pid = await detector.find_daemon_pid(
|
|
392
|
+
pattern=getattr(process, "daemon_match_pattern", None),
|
|
393
|
+
pidfile=getattr(process, "daemon_pidfile", None),
|
|
394
|
+
container=container,
|
|
395
|
+
)
|
|
396
|
+
if existing_pid:
|
|
397
|
+
# Adopt the existing daemon
|
|
398
|
+
process.status = ProcessStatus.RUNNING.value
|
|
399
|
+
process.pid = existing_pid
|
|
400
|
+
process.started_at = datetime.now().isoformat()
|
|
401
|
+
process.adopted = True
|
|
402
|
+
|
|
403
|
+
# Set up log_file path for adopted processes
|
|
404
|
+
# Note: We can't redirect output of already-running processes,
|
|
405
|
+
# but setting the path allows logs() to look for it and shows
|
|
406
|
+
# users where logs would go if they restart via procler
|
|
407
|
+
log_file_path = getattr(process, "log_file", None) or get_log_file_path(process.name)
|
|
408
|
+
process.log_file = log_file_path
|
|
409
|
+
process.save()
|
|
410
|
+
|
|
411
|
+
append_changelog(
|
|
412
|
+
action=ChangelogAction.START,
|
|
413
|
+
entity_type="process",
|
|
414
|
+
entity_name=name,
|
|
415
|
+
details={
|
|
416
|
+
"pid": existing_pid,
|
|
417
|
+
"adopted": True,
|
|
418
|
+
"context": process.context_type,
|
|
419
|
+
},
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
return {
|
|
423
|
+
"success": True,
|
|
424
|
+
"data": {
|
|
425
|
+
"status": "adopted",
|
|
426
|
+
"process": self._process_to_dict(process),
|
|
427
|
+
},
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
# Get the appropriate context
|
|
431
|
+
try:
|
|
432
|
+
context = self._get_context(process.context_type)
|
|
433
|
+
except ValueError:
|
|
434
|
+
if process.context_type == "docker" and not is_docker_available():
|
|
435
|
+
return {
|
|
436
|
+
"success": False,
|
|
437
|
+
"error": "Docker is not available",
|
|
438
|
+
"error_code": "docker_unavailable",
|
|
439
|
+
"suggestion": "Ensure Docker is installed and running",
|
|
440
|
+
}
|
|
441
|
+
return {
|
|
442
|
+
"success": False,
|
|
443
|
+
"error": f"Unknown context type: {process.context_type}",
|
|
444
|
+
"error_code": "invalid_context",
|
|
445
|
+
"suggestion": "Valid context types are 'local' or 'docker'. Check process definition.",
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
# Validate Docker context requirements
|
|
449
|
+
if process.context_type == "docker" and not process.container_name:
|
|
450
|
+
return {
|
|
451
|
+
"success": False,
|
|
452
|
+
"error": "Container name required for docker context",
|
|
453
|
+
"error_code": "missing_container",
|
|
454
|
+
"suggestion": "Define process with --container <name>",
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
# Pre-flight container check for docker context
|
|
458
|
+
if process.context_type == "docker" and process.container_name:
|
|
459
|
+
container_name = substitute_vars_from_config(process.container_name)
|
|
460
|
+
docker_ctx = self._get_context("docker")
|
|
461
|
+
is_available, error_msg = docker_ctx.check_container_available(container_name)
|
|
462
|
+
if not is_available:
|
|
463
|
+
return {
|
|
464
|
+
"success": False,
|
|
465
|
+
"error": error_msg,
|
|
466
|
+
"error_code": "container_unavailable",
|
|
467
|
+
"suggestion": "Ensure the Docker container is running with 'docker ps'",
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
# Update status to starting
|
|
471
|
+
process.status = ProcessStatus.STARTING.value
|
|
472
|
+
process.save()
|
|
473
|
+
|
|
474
|
+
try:
|
|
475
|
+
# Substitute config vars in command (e.g., ${SIM_CONTAINER})
|
|
476
|
+
resolved_command = substitute_vars_from_config(process.command)
|
|
477
|
+
|
|
478
|
+
# Set up log file for daemon processes
|
|
479
|
+
log_file_path = None
|
|
480
|
+
if getattr(process, "daemon_mode", False):
|
|
481
|
+
log_file_path = getattr(process, "log_file", None) or get_log_file_path(process.name)
|
|
482
|
+
# Get container for log directory creation
|
|
483
|
+
raw_daemon_container = getattr(process, "daemon_container", None)
|
|
484
|
+
daemon_container = substitute_vars_from_config(raw_daemon_container) if raw_daemon_container else None
|
|
485
|
+
|
|
486
|
+
# Ensure log directory exists (as the user who will run the process)
|
|
487
|
+
if daemon_container:
|
|
488
|
+
# Extract user from docker exec command (e.g., -u 1000)
|
|
489
|
+
exec_user = extract_docker_exec_user(resolved_command)
|
|
490
|
+
ensure_log_dir_in_container(daemon_container, user=exec_user)
|
|
491
|
+
else:
|
|
492
|
+
ensure_local_log_dir()
|
|
493
|
+
|
|
494
|
+
# Wrap command to redirect output to log file
|
|
495
|
+
resolved_command = wrap_command_with_log_redirect(resolved_command, log_file_path)
|
|
496
|
+
|
|
497
|
+
# Store log file path
|
|
498
|
+
process.log_file = log_file_path
|
|
499
|
+
|
|
500
|
+
# Start the process (Docker context needs container_name)
|
|
501
|
+
if process.context_type == "docker":
|
|
502
|
+
handle = await context.start_process(
|
|
503
|
+
command=resolved_command,
|
|
504
|
+
cwd=process.cwd,
|
|
505
|
+
env=process.env,
|
|
506
|
+
on_stdout=self._log_callback(process._id, "stdout"),
|
|
507
|
+
on_stderr=self._log_callback(process._id, "stderr"),
|
|
508
|
+
on_exit=self._exit_callback(process),
|
|
509
|
+
container_name=process.container_name,
|
|
510
|
+
)
|
|
511
|
+
else:
|
|
512
|
+
handle = await context.start_process(
|
|
513
|
+
command=resolved_command,
|
|
514
|
+
cwd=process.cwd,
|
|
515
|
+
env=process.env,
|
|
516
|
+
on_stdout=self._log_callback(process._id, "stdout"),
|
|
517
|
+
on_stderr=self._log_callback(process._id, "stderr"),
|
|
518
|
+
on_exit=self._exit_callback(process),
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
# Daemon mode: Wait for fork and find real daemon PID
|
|
522
|
+
daemon_pid = handle.pid
|
|
523
|
+
if getattr(process, "daemon_mode", False):
|
|
524
|
+
pattern = getattr(process, "daemon_match_pattern", None)
|
|
525
|
+
pidfile = getattr(process, "daemon_pidfile", None)
|
|
526
|
+
if pattern or pidfile:
|
|
527
|
+
detector = get_daemon_detector()
|
|
528
|
+
# Use daemon_container if set, otherwise fall back to container_name
|
|
529
|
+
raw_container = getattr(process, "daemon_container", None) or (
|
|
530
|
+
process.container_name if process.context_type == "docker" else None
|
|
531
|
+
)
|
|
532
|
+
# Substitute vars in container name (e.g., ${SIM_CONTAINER})
|
|
533
|
+
container = substitute_vars_from_config(raw_container) if raw_container else None
|
|
534
|
+
# Wait for daemon to fork and find its real PID
|
|
535
|
+
found_pid = await detector.wait_for_fork(
|
|
536
|
+
pattern=pattern or "",
|
|
537
|
+
container=container,
|
|
538
|
+
timeout=5.0,
|
|
539
|
+
)
|
|
540
|
+
if found_pid:
|
|
541
|
+
daemon_pid = found_pid
|
|
542
|
+
|
|
543
|
+
# Update process state
|
|
544
|
+
process.status = ProcessStatus.RUNNING.value
|
|
545
|
+
process.pid = daemon_pid
|
|
546
|
+
process.started_at = datetime.now().isoformat()
|
|
547
|
+
process.exit_code = None
|
|
548
|
+
process.error_message = None
|
|
549
|
+
process.save()
|
|
550
|
+
|
|
551
|
+
# Store handle (thread-safe)
|
|
552
|
+
await self._set_handle(process._id, handle)
|
|
553
|
+
|
|
554
|
+
# Emit status change event
|
|
555
|
+
get_event_bus().emit_sync(
|
|
556
|
+
EVENT_STATUS_CHANGE,
|
|
557
|
+
{
|
|
558
|
+
"process_id": process._id,
|
|
559
|
+
"name": process.name,
|
|
560
|
+
"status": process.status,
|
|
561
|
+
"pid": process.pid,
|
|
562
|
+
},
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# Log to changelog
|
|
566
|
+
append_changelog(
|
|
567
|
+
action=ChangelogAction.START,
|
|
568
|
+
entity_type="process",
|
|
569
|
+
entity_name=name,
|
|
570
|
+
details={"pid": process.pid, "context": process.context_type},
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
return {
|
|
574
|
+
"success": True,
|
|
575
|
+
"data": {
|
|
576
|
+
"status": "started",
|
|
577
|
+
"process": self._process_to_dict(process),
|
|
578
|
+
},
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
except Exception as e:
|
|
582
|
+
process.status = ProcessStatus.ERROR.value
|
|
583
|
+
process.error_message = str(e)
|
|
584
|
+
process.save()
|
|
585
|
+
|
|
586
|
+
return {
|
|
587
|
+
"success": False,
|
|
588
|
+
"error": f"Failed to start process: {e}",
|
|
589
|
+
"error_code": "start_failed",
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
async def stop(self, name: str, timeout: float = 10.0) -> dict[str, Any]:
|
|
593
|
+
"""
|
|
594
|
+
Stop a process by name.
|
|
595
|
+
|
|
596
|
+
Returns a dict with status and process info (for JSON output).
|
|
597
|
+
"""
|
|
598
|
+
init_database()
|
|
599
|
+
process = self._get_process_by_name(name)
|
|
600
|
+
|
|
601
|
+
if not process:
|
|
602
|
+
return {
|
|
603
|
+
"success": False,
|
|
604
|
+
"error": f"Process '{name}' not found",
|
|
605
|
+
"error_code": "process_not_found",
|
|
606
|
+
"suggestion": "Use 'procler list' to see available processes, or 'procler define' to create one",
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
# Check if already stopped
|
|
610
|
+
if process.status == ProcessStatus.STOPPED.value:
|
|
611
|
+
return {
|
|
612
|
+
"success": True,
|
|
613
|
+
"data": {
|
|
614
|
+
"status": "already_stopped",
|
|
615
|
+
"process": self._process_to_dict(process),
|
|
616
|
+
},
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
handle = await self._get_handle(process._id)
|
|
620
|
+
|
|
621
|
+
# Update status to stopping
|
|
622
|
+
process.status = ProcessStatus.STOPPING.value
|
|
623
|
+
process.save()
|
|
624
|
+
|
|
625
|
+
try:
|
|
626
|
+
exit_code = 0
|
|
627
|
+
|
|
628
|
+
# Daemon mode with container: Kill daemon inside container
|
|
629
|
+
raw_daemon_container = getattr(process, "daemon_container", None)
|
|
630
|
+
# Substitute vars in container name (e.g., ${SIM_CONTAINER})
|
|
631
|
+
daemon_container = substitute_vars_from_config(raw_daemon_container) if raw_daemon_container else None
|
|
632
|
+
if getattr(process, "daemon_mode", False) and daemon_container and process.pid:
|
|
633
|
+
exit_code = await self._kill_daemon_in_container(
|
|
634
|
+
container=daemon_container,
|
|
635
|
+
pid=process.pid,
|
|
636
|
+
timeout=timeout,
|
|
637
|
+
)
|
|
638
|
+
elif process.pid and self._is_pid_running(process.pid):
|
|
639
|
+
# PID is running - kill directly (most reliable across CLI invocations)
|
|
640
|
+
exit_code = await self._kill_pid(process.pid, timeout=timeout)
|
|
641
|
+
elif handle:
|
|
642
|
+
# Try handle-based stop (works within same event loop only)
|
|
643
|
+
try:
|
|
644
|
+
context = self._get_context(process.context_type)
|
|
645
|
+
exit_code = await context.stop_process(handle, timeout=timeout)
|
|
646
|
+
except RuntimeError:
|
|
647
|
+
# Handle attached to closed event loop - process likely already dead
|
|
648
|
+
pass
|
|
649
|
+
|
|
650
|
+
# Update process state
|
|
651
|
+
process.status = ProcessStatus.STOPPED.value
|
|
652
|
+
process.pid = None
|
|
653
|
+
process.exit_code = exit_code
|
|
654
|
+
process.save()
|
|
655
|
+
|
|
656
|
+
# Remove handle (thread-safe)
|
|
657
|
+
await self._remove_handle(process._id)
|
|
658
|
+
|
|
659
|
+
# Emit status change event
|
|
660
|
+
get_event_bus().emit_sync(
|
|
661
|
+
EVENT_STATUS_CHANGE,
|
|
662
|
+
{
|
|
663
|
+
"process_id": process._id,
|
|
664
|
+
"name": process.name,
|
|
665
|
+
"status": process.status,
|
|
666
|
+
"exit_code": exit_code,
|
|
667
|
+
"pid": None,
|
|
668
|
+
},
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
# Log to changelog
|
|
672
|
+
append_changelog(
|
|
673
|
+
action=ChangelogAction.STOP,
|
|
674
|
+
entity_type="process",
|
|
675
|
+
entity_name=name,
|
|
676
|
+
details={"exit_code": exit_code},
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
return {
|
|
680
|
+
"success": True,
|
|
681
|
+
"data": {
|
|
682
|
+
"status": "stopped",
|
|
683
|
+
"exit_code": exit_code,
|
|
684
|
+
"process": self._process_to_dict(process),
|
|
685
|
+
},
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
except Exception as e:
|
|
689
|
+
process.status = ProcessStatus.ERROR.value
|
|
690
|
+
process.error_message = str(e)
|
|
691
|
+
process.save()
|
|
692
|
+
|
|
693
|
+
# Emit error status event
|
|
694
|
+
get_event_bus().emit_sync(
|
|
695
|
+
EVENT_STATUS_CHANGE,
|
|
696
|
+
{
|
|
697
|
+
"process_id": process._id,
|
|
698
|
+
"name": process.name,
|
|
699
|
+
"status": process.status,
|
|
700
|
+
"error_message": str(e),
|
|
701
|
+
},
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
return {
|
|
705
|
+
"success": False,
|
|
706
|
+
"error": f"Failed to stop process: {e}",
|
|
707
|
+
"error_code": "stop_failed",
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
async def restart(
|
|
711
|
+
self,
|
|
712
|
+
name: str,
|
|
713
|
+
timeout: float = 10.0,
|
|
714
|
+
clear_logs: bool = False,
|
|
715
|
+
) -> dict[str, Any]:
|
|
716
|
+
"""
|
|
717
|
+
Restart a process by name (stop then start).
|
|
718
|
+
|
|
719
|
+
Args:
|
|
720
|
+
name: Process name
|
|
721
|
+
timeout: Seconds to wait for stop
|
|
722
|
+
clear_logs: If True, delete old logs before starting
|
|
723
|
+
|
|
724
|
+
Returns a dict with status and process info (for JSON output).
|
|
725
|
+
"""
|
|
726
|
+
init_database()
|
|
727
|
+
process = self._get_process_by_name(name)
|
|
728
|
+
|
|
729
|
+
if not process:
|
|
730
|
+
return {
|
|
731
|
+
"success": False,
|
|
732
|
+
"error": f"Process '{name}' not found",
|
|
733
|
+
"error_code": "process_not_found",
|
|
734
|
+
"suggestion": "Use 'procler list' to see available processes, or 'procler define' to create one",
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
# Stop if running
|
|
738
|
+
if process.status in [ProcessStatus.RUNNING.value, ProcessStatus.STARTING.value]:
|
|
739
|
+
stop_result = await self.stop(name, timeout=timeout)
|
|
740
|
+
if not stop_result["success"]:
|
|
741
|
+
return stop_result
|
|
742
|
+
|
|
743
|
+
# Clear old logs if requested
|
|
744
|
+
if clear_logs and process._id:
|
|
745
|
+
LogEntry.delete().where(F("process_id") == process._id).execute()
|
|
746
|
+
|
|
747
|
+
# Start
|
|
748
|
+
return await self.start(name)
|
|
749
|
+
|
|
750
|
+
async def status(self, name: str | None = None) -> dict[str, Any]:
|
|
751
|
+
"""
|
|
752
|
+
Get status of one or all processes.
|
|
753
|
+
|
|
754
|
+
Returns a dict with process info (for JSON output).
|
|
755
|
+
"""
|
|
756
|
+
init_database()
|
|
757
|
+
|
|
758
|
+
if name:
|
|
759
|
+
process = self._get_process_by_name(name)
|
|
760
|
+
if not process:
|
|
761
|
+
return {
|
|
762
|
+
"success": False,
|
|
763
|
+
"error": f"Process '{name}' not found",
|
|
764
|
+
"error_code": "process_not_found",
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
# Verify running status
|
|
768
|
+
await self._verify_running_status(process)
|
|
769
|
+
|
|
770
|
+
return {
|
|
771
|
+
"success": True,
|
|
772
|
+
"data": {
|
|
773
|
+
"process": self._process_to_dict(process),
|
|
774
|
+
},
|
|
775
|
+
}
|
|
776
|
+
else:
|
|
777
|
+
processes = Process.query().all()
|
|
778
|
+
for p in processes:
|
|
779
|
+
await self._verify_running_status(p)
|
|
780
|
+
|
|
781
|
+
return {
|
|
782
|
+
"success": True,
|
|
783
|
+
"data": {
|
|
784
|
+
"processes": [self._process_to_dict(p) for p in processes],
|
|
785
|
+
},
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
async def _verify_running_status(self, process: Process) -> None:
|
|
789
|
+
"""Verify and update the running status of a process.
|
|
790
|
+
|
|
791
|
+
For daemon processes, this also auto-adopts running daemons even if
|
|
792
|
+
the process is marked as stopped.
|
|
793
|
+
"""
|
|
794
|
+
# Daemon mode: Use daemon detector to find/verify PID
|
|
795
|
+
# Check daemon status regardless of current status (to auto-adopt)
|
|
796
|
+
if getattr(process, "daemon_mode", False):
|
|
797
|
+
pattern = getattr(process, "daemon_match_pattern", None)
|
|
798
|
+
pidfile = getattr(process, "daemon_pidfile", None)
|
|
799
|
+
if pattern or pidfile:
|
|
800
|
+
detector = get_daemon_detector()
|
|
801
|
+
# Use daemon_container if set, otherwise fall back to container_name
|
|
802
|
+
raw_container = getattr(process, "daemon_container", None) or (
|
|
803
|
+
process.container_name if process.context_type == "docker" else None
|
|
804
|
+
)
|
|
805
|
+
# Substitute vars in container name (e.g., ${SIM_CONTAINER})
|
|
806
|
+
container = substitute_vars_from_config(raw_container) if raw_container else None
|
|
807
|
+
found_pid = await detector.find_daemon_pid(
|
|
808
|
+
pattern=pattern,
|
|
809
|
+
pidfile=pidfile,
|
|
810
|
+
container=container,
|
|
811
|
+
)
|
|
812
|
+
if found_pid:
|
|
813
|
+
# Daemon is running - update status and PID
|
|
814
|
+
if process.status != ProcessStatus.RUNNING.value or process.pid != found_pid:
|
|
815
|
+
process.status = ProcessStatus.RUNNING.value
|
|
816
|
+
process.pid = found_pid
|
|
817
|
+
if not process.started_at:
|
|
818
|
+
process.started_at = datetime.now().isoformat()
|
|
819
|
+
# Set log_file path for auto-adopted processes
|
|
820
|
+
if not getattr(process, "log_file", None):
|
|
821
|
+
process.log_file = get_log_file_path(process.name)
|
|
822
|
+
process.adopted = True
|
|
823
|
+
process.save()
|
|
824
|
+
return
|
|
825
|
+
else:
|
|
826
|
+
# Daemon not found - mark as stopped
|
|
827
|
+
if process.status == ProcessStatus.RUNNING.value:
|
|
828
|
+
process.status = ProcessStatus.STOPPED.value
|
|
829
|
+
process.pid = None
|
|
830
|
+
await self._remove_handle(process._id)
|
|
831
|
+
process.save()
|
|
832
|
+
return
|
|
833
|
+
|
|
834
|
+
# Non-daemon: Only verify if currently marked as running
|
|
835
|
+
if process.status != ProcessStatus.RUNNING.value:
|
|
836
|
+
return
|
|
837
|
+
|
|
838
|
+
# Non-daemon mode: Use handle or PID check
|
|
839
|
+
handle = await self._get_handle(process._id)
|
|
840
|
+
if handle:
|
|
841
|
+
# We have a handle, check via context
|
|
842
|
+
context = self._get_context(process.context_type)
|
|
843
|
+
if not await context.is_running(handle):
|
|
844
|
+
process.status = ProcessStatus.STOPPED.value
|
|
845
|
+
process.pid = None
|
|
846
|
+
process.save()
|
|
847
|
+
await self._remove_handle(process._id)
|
|
848
|
+
elif process.pid:
|
|
849
|
+
# No handle but we have a PID - check in the correct context
|
|
850
|
+
is_running = await self._is_process_pid_running(process)
|
|
851
|
+
if not is_running:
|
|
852
|
+
process.status = ProcessStatus.STOPPED.value
|
|
853
|
+
process.pid = None
|
|
854
|
+
process.save()
|
|
855
|
+
else:
|
|
856
|
+
# No handle and no PID - mark as stopped
|
|
857
|
+
process.status = ProcessStatus.STOPPED.value
|
|
858
|
+
process.save()
|
|
859
|
+
|
|
860
|
+
def _is_pid_running(self, pid: int) -> bool:
|
|
861
|
+
"""Check if a PID is still running in the OS."""
|
|
862
|
+
|
|
863
|
+
try:
|
|
864
|
+
os.kill(pid, 0) # Signal 0 just checks if process exists
|
|
865
|
+
return True
|
|
866
|
+
except ProcessLookupError:
|
|
867
|
+
return False
|
|
868
|
+
except PermissionError:
|
|
869
|
+
# Process exists but we don't have permission to signal it
|
|
870
|
+
return True
|
|
871
|
+
|
|
872
|
+
async def _is_process_pid_running(self, process: Process) -> bool:
|
|
873
|
+
"""Check if a process PID is running in its execution context."""
|
|
874
|
+
if not process.pid:
|
|
875
|
+
return False
|
|
876
|
+
|
|
877
|
+
if process.context_type == "docker":
|
|
878
|
+
raw_container = getattr(process, "daemon_container", None) or process.container_name
|
|
879
|
+
container = substitute_vars_from_config(raw_container) if raw_container else None
|
|
880
|
+
if container:
|
|
881
|
+
detector = get_daemon_detector()
|
|
882
|
+
return await detector.is_pid_running(process.pid, container=container)
|
|
883
|
+
|
|
884
|
+
return self._is_pid_running(process.pid)
|
|
885
|
+
|
|
886
|
+
async def _kill_pid(self, pid: int, timeout: float = 10.0) -> int:
|
|
887
|
+
"""Kill a process by PID directly (kills entire process group)."""
|
|
888
|
+
import signal
|
|
889
|
+
|
|
890
|
+
try:
|
|
891
|
+
# Kill the process group (negative PID) for graceful termination
|
|
892
|
+
# This ensures child processes are also terminated
|
|
893
|
+
try:
|
|
894
|
+
os.killpg(pid, signal.SIGTERM)
|
|
895
|
+
except ProcessLookupError:
|
|
896
|
+
# Process group might not exist, try regular kill
|
|
897
|
+
os.kill(pid, signal.SIGTERM)
|
|
898
|
+
|
|
899
|
+
# Wait for process to exit
|
|
900
|
+
for _ in range(int(timeout * 10)):
|
|
901
|
+
await asyncio.sleep(0.1)
|
|
902
|
+
if not self._is_pid_running(pid):
|
|
903
|
+
return 0
|
|
904
|
+
|
|
905
|
+
# Force kill if still running
|
|
906
|
+
try:
|
|
907
|
+
os.killpg(pid, signal.SIGKILL)
|
|
908
|
+
except ProcessLookupError:
|
|
909
|
+
os.kill(pid, signal.SIGKILL)
|
|
910
|
+
await asyncio.sleep(0.1)
|
|
911
|
+
return -9
|
|
912
|
+
|
|
913
|
+
except ProcessLookupError:
|
|
914
|
+
return 0
|
|
915
|
+
except PermissionError:
|
|
916
|
+
return -1
|
|
917
|
+
|
|
918
|
+
async def _kill_daemon_in_container(self, container: str, pid: int, timeout: float = 10.0) -> int:
|
|
919
|
+
"""Kill a daemon process inside a Docker container."""
|
|
920
|
+
import logging
|
|
921
|
+
|
|
922
|
+
logger = logging.getLogger(__name__)
|
|
923
|
+
|
|
924
|
+
try:
|
|
925
|
+
# Send SIGTERM to daemon
|
|
926
|
+
cmd = f"docker exec {container} kill -TERM {pid}"
|
|
927
|
+
proc = await asyncio.create_subprocess_shell(
|
|
928
|
+
cmd,
|
|
929
|
+
stdout=asyncio.subprocess.PIPE,
|
|
930
|
+
stderr=asyncio.subprocess.PIPE,
|
|
931
|
+
)
|
|
932
|
+
await proc.communicate()
|
|
933
|
+
|
|
934
|
+
# Wait for daemon to exit
|
|
935
|
+
detector = get_daemon_detector()
|
|
936
|
+
for _ in range(int(timeout * 10)):
|
|
937
|
+
await asyncio.sleep(0.1)
|
|
938
|
+
if not await detector.is_pid_running(pid, container=container):
|
|
939
|
+
logger.debug(f"Daemon PID {pid} in {container} stopped gracefully")
|
|
940
|
+
return 0
|
|
941
|
+
|
|
942
|
+
# Force kill if still running
|
|
943
|
+
cmd = f"docker exec {container} kill -KILL {pid}"
|
|
944
|
+
proc = await asyncio.create_subprocess_shell(
|
|
945
|
+
cmd,
|
|
946
|
+
stdout=asyncio.subprocess.PIPE,
|
|
947
|
+
stderr=asyncio.subprocess.PIPE,
|
|
948
|
+
)
|
|
949
|
+
await proc.communicate()
|
|
950
|
+
await asyncio.sleep(0.1)
|
|
951
|
+
logger.debug(f"Daemon PID {pid} in {container} force killed")
|
|
952
|
+
return -9
|
|
953
|
+
|
|
954
|
+
except Exception as e:
|
|
955
|
+
logger.error(f"Error killing daemon {pid} in {container}: {e}")
|
|
956
|
+
return -1
|
|
957
|
+
|
|
958
|
+
def _process_to_dict(self, process: Process) -> dict[str, Any]:
|
|
959
|
+
"""Convert a Process to a dict for JSON output."""
|
|
960
|
+
result = {
|
|
961
|
+
"id": process._id,
|
|
962
|
+
"name": process.name,
|
|
963
|
+
"display_name": process.display_name,
|
|
964
|
+
"command": process.command,
|
|
965
|
+
"context_type": process.context_type,
|
|
966
|
+
"context": process.context_type,
|
|
967
|
+
"container": process.container_name,
|
|
968
|
+
"cwd": process.cwd,
|
|
969
|
+
"tags": process.tags,
|
|
970
|
+
"created_at": process.created_at,
|
|
971
|
+
"updated_at": process.updated_at,
|
|
972
|
+
"status": process.status,
|
|
973
|
+
"pid": process.pid,
|
|
974
|
+
"uptime_seconds": process.uptime_seconds,
|
|
975
|
+
"exit_code": process.exit_code,
|
|
976
|
+
"error_message": process.error_message,
|
|
977
|
+
# Daemon mode fields
|
|
978
|
+
"daemon_mode": getattr(process, "daemon_mode", False) or None,
|
|
979
|
+
"daemon_match_pattern": getattr(process, "daemon_match_pattern", None),
|
|
980
|
+
"daemon_container": getattr(process, "daemon_container", None),
|
|
981
|
+
"log_file": getattr(process, "log_file", None),
|
|
982
|
+
"adopted": getattr(process, "adopted", False) or None,
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
# Add Linux process state if running and we have a PID
|
|
986
|
+
if process.pid and process.status == ProcessStatus.RUNNING.value:
|
|
987
|
+
linux_state = get_linux_process_state(process.pid)
|
|
988
|
+
if linux_state:
|
|
989
|
+
result["linux_state"] = linux_state
|
|
990
|
+
# Add warning for problematic states
|
|
991
|
+
if linux_state["state_code"] == "D":
|
|
992
|
+
result["warning"] = "Process in uninterruptible sleep (D state) - may be stuck on I/O"
|
|
993
|
+
elif linux_state["state_code"] == "Z":
|
|
994
|
+
result["warning"] = "Process is a zombie - parent has not reaped it"
|
|
995
|
+
elif linux_state["state_code"] == "T":
|
|
996
|
+
result["warning"] = "Process is stopped (possibly by debugger or signal)"
|
|
997
|
+
|
|
998
|
+
return result
|
|
999
|
+
|
|
1000
|
+
async def logs(
|
|
1001
|
+
self,
|
|
1002
|
+
name: str,
|
|
1003
|
+
tail: int = 100,
|
|
1004
|
+
since: str | None = None,
|
|
1005
|
+
) -> dict[str, Any]:
|
|
1006
|
+
"""
|
|
1007
|
+
Get logs for a process.
|
|
1008
|
+
|
|
1009
|
+
Args:
|
|
1010
|
+
name: Process name
|
|
1011
|
+
tail: Number of lines to return (most recent)
|
|
1012
|
+
since: Time filter (e.g., '5m', '1h', ISO timestamp)
|
|
1013
|
+
|
|
1014
|
+
Returns a dict with logs (for JSON output).
|
|
1015
|
+
"""
|
|
1016
|
+
init_database()
|
|
1017
|
+
process = self._get_process_by_name(name)
|
|
1018
|
+
|
|
1019
|
+
if not process:
|
|
1020
|
+
return {
|
|
1021
|
+
"success": False,
|
|
1022
|
+
"error": f"Process '{name}' not found",
|
|
1023
|
+
"error_code": "process_not_found",
|
|
1024
|
+
"suggestion": "Use 'procler list' to see available processes, or 'procler define' to create one",
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
# Build query for logs
|
|
1028
|
+
query = LogEntry.query().filter(F("process_id") == process._id)
|
|
1029
|
+
|
|
1030
|
+
# Apply time filter if specified
|
|
1031
|
+
if since:
|
|
1032
|
+
try:
|
|
1033
|
+
seconds_ago = parse_duration(since)
|
|
1034
|
+
cutoff = datetime.now() - timedelta(seconds=seconds_ago)
|
|
1035
|
+
cutoff_str = cutoff.isoformat()
|
|
1036
|
+
query = query.filter(F("timestamp") >= cutoff_str)
|
|
1037
|
+
except ValueError as e:
|
|
1038
|
+
return {
|
|
1039
|
+
"success": False,
|
|
1040
|
+
"error": str(e),
|
|
1041
|
+
"error_code": "invalid_duration",
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
# Get logs ordered by timestamp descending, limited by tail
|
|
1045
|
+
# sqler doesn't have ORDER BY in query builder, so we fetch all and sort in Python
|
|
1046
|
+
all_logs = query.all()
|
|
1047
|
+
|
|
1048
|
+
# Sort by timestamp descending
|
|
1049
|
+
all_logs.sort(key=lambda x: x.timestamp or "", reverse=True)
|
|
1050
|
+
|
|
1051
|
+
# Take the last `tail` entries and reverse for chronological order
|
|
1052
|
+
logs_subset = all_logs[:tail]
|
|
1053
|
+
logs_subset.reverse()
|
|
1054
|
+
|
|
1055
|
+
log_entries = [
|
|
1056
|
+
{
|
|
1057
|
+
"timestamp": entry.timestamp,
|
|
1058
|
+
"stream": entry.stream,
|
|
1059
|
+
"line": entry.line,
|
|
1060
|
+
}
|
|
1061
|
+
for entry in logs_subset
|
|
1062
|
+
]
|
|
1063
|
+
|
|
1064
|
+
# For daemon processes with log_file, prefer file over database
|
|
1065
|
+
# (database may have stale entries from previous runs)
|
|
1066
|
+
log_source = "database"
|
|
1067
|
+
log_file = getattr(process, "log_file", None)
|
|
1068
|
+
is_daemon = getattr(process, "daemon_mode", False)
|
|
1069
|
+
|
|
1070
|
+
# Try log file for daemon processes or when database is empty
|
|
1071
|
+
if log_file and (is_daemon or not log_entries):
|
|
1072
|
+
# Determine if we need to read from container
|
|
1073
|
+
raw_daemon_container = getattr(process, "daemon_container", None)
|
|
1074
|
+
daemon_container = substitute_vars_from_config(raw_daemon_container) if raw_daemon_container else None
|
|
1075
|
+
|
|
1076
|
+
if daemon_container:
|
|
1077
|
+
# Read from log file inside container
|
|
1078
|
+
lines = await read_log_file_from_container(daemon_container, log_file, tail)
|
|
1079
|
+
else:
|
|
1080
|
+
# Read from local log file
|
|
1081
|
+
try:
|
|
1082
|
+
log_path = Path(log_file)
|
|
1083
|
+
if log_path.exists():
|
|
1084
|
+
with open(log_path) as f:
|
|
1085
|
+
all_lines = f.readlines()
|
|
1086
|
+
lines = [line.rstrip("\n") for line in all_lines[-tail:]]
|
|
1087
|
+
else:
|
|
1088
|
+
lines = []
|
|
1089
|
+
except OSError:
|
|
1090
|
+
lines = []
|
|
1091
|
+
|
|
1092
|
+
if lines:
|
|
1093
|
+
log_source = "file"
|
|
1094
|
+
# Log files don't have embedded timestamps, use current time as retrieval time
|
|
1095
|
+
now = datetime.now().isoformat()
|
|
1096
|
+
log_entries = [
|
|
1097
|
+
{
|
|
1098
|
+
"timestamp": now,
|
|
1099
|
+
"stream": "stdout",
|
|
1100
|
+
"line": line,
|
|
1101
|
+
}
|
|
1102
|
+
for line in lines
|
|
1103
|
+
]
|
|
1104
|
+
|
|
1105
|
+
result_data = {
|
|
1106
|
+
"process": name,
|
|
1107
|
+
"logs": log_entries,
|
|
1108
|
+
"count": len(log_entries),
|
|
1109
|
+
"source": log_source,
|
|
1110
|
+
"log_file": log_file,
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
# Add helpful message for adopted processes with no logs
|
|
1114
|
+
is_adopted = getattr(process, "adopted", False)
|
|
1115
|
+
if is_adopted and not log_entries:
|
|
1116
|
+
result_data["adopted"] = True
|
|
1117
|
+
result_data["note"] = (
|
|
1118
|
+
"This process was adopted (found already running). "
|
|
1119
|
+
"Historical logs are not available. Restart via 'procler restart' to capture logs."
|
|
1120
|
+
)
|
|
1121
|
+
elif is_adopted:
|
|
1122
|
+
result_data["adopted"] = True
|
|
1123
|
+
|
|
1124
|
+
return {
|
|
1125
|
+
"success": True,
|
|
1126
|
+
"data": result_data,
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
async def exec_command(
|
|
1130
|
+
self,
|
|
1131
|
+
command: str,
|
|
1132
|
+
context_type: str = "local",
|
|
1133
|
+
container_name: str | None = None,
|
|
1134
|
+
cwd: str | None = None,
|
|
1135
|
+
timeout: float = 60.0,
|
|
1136
|
+
) -> dict[str, Any]:
|
|
1137
|
+
"""
|
|
1138
|
+
Execute an arbitrary command.
|
|
1139
|
+
|
|
1140
|
+
Args:
|
|
1141
|
+
command: The command to execute
|
|
1142
|
+
context_type: Execution context ('local' or 'docker')
|
|
1143
|
+
container_name: Docker container name (required if context=docker)
|
|
1144
|
+
cwd: Working directory
|
|
1145
|
+
timeout: Maximum execution time in seconds
|
|
1146
|
+
|
|
1147
|
+
Returns a dict with execution result (for JSON output).
|
|
1148
|
+
"""
|
|
1149
|
+
init_database()
|
|
1150
|
+
|
|
1151
|
+
if context_type == "docker":
|
|
1152
|
+
if not container_name:
|
|
1153
|
+
return {
|
|
1154
|
+
"success": False,
|
|
1155
|
+
"error": "Container name required for docker context",
|
|
1156
|
+
"error_code": "missing_container",
|
|
1157
|
+
"suggestion": "Use --container <name> to specify the Docker container",
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
if not is_docker_available():
|
|
1161
|
+
return {
|
|
1162
|
+
"success": False,
|
|
1163
|
+
"error": "Docker is not available",
|
|
1164
|
+
"error_code": "docker_unavailable",
|
|
1165
|
+
"suggestion": "Ensure Docker is installed and running",
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
try:
|
|
1169
|
+
context = self._get_context(context_type)
|
|
1170
|
+
except ValueError as e:
|
|
1171
|
+
return {
|
|
1172
|
+
"success": False,
|
|
1173
|
+
"error": str(e),
|
|
1174
|
+
"error_code": "invalid_context",
|
|
1175
|
+
"suggestion": "Valid context types are 'local' or 'docker'",
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
try:
|
|
1179
|
+
# Docker context needs container_name parameter
|
|
1180
|
+
if context_type == "docker":
|
|
1181
|
+
result: ExecResult = await context.exec_command(
|
|
1182
|
+
command=command,
|
|
1183
|
+
cwd=cwd,
|
|
1184
|
+
timeout=timeout,
|
|
1185
|
+
container_name=container_name,
|
|
1186
|
+
)
|
|
1187
|
+
else:
|
|
1188
|
+
result: ExecResult = await context.exec_command(
|
|
1189
|
+
command=command,
|
|
1190
|
+
cwd=cwd,
|
|
1191
|
+
timeout=timeout,
|
|
1192
|
+
)
|
|
1193
|
+
|
|
1194
|
+
return {
|
|
1195
|
+
"success": True,
|
|
1196
|
+
"data": {
|
|
1197
|
+
"stdout": result.stdout,
|
|
1198
|
+
"stderr": result.stderr,
|
|
1199
|
+
"exit_code": result.exit_code,
|
|
1200
|
+
},
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
except ValueError as e:
|
|
1204
|
+
# Container not found or similar
|
|
1205
|
+
return {
|
|
1206
|
+
"success": False,
|
|
1207
|
+
"error": str(e),
|
|
1208
|
+
"error_code": "container_not_found",
|
|
1209
|
+
"suggestion": "Run 'docker ps' to list available containers",
|
|
1210
|
+
}
|
|
1211
|
+
except Exception as e:
|
|
1212
|
+
return {
|
|
1213
|
+
"success": False,
|
|
1214
|
+
"error": f"Failed to execute command: {e}",
|
|
1215
|
+
"error_code": "exec_failed",
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
def rotate_logs(self, process_id: int, max_entries: int = DEFAULT_MAX_LOGS) -> int:
|
|
1219
|
+
"""
|
|
1220
|
+
Rotate logs for a process, keeping only the most recent entries.
|
|
1221
|
+
|
|
1222
|
+
Args:
|
|
1223
|
+
process_id: The process ID
|
|
1224
|
+
max_entries: Maximum number of log entries to keep
|
|
1225
|
+
|
|
1226
|
+
Returns the number of entries deleted.
|
|
1227
|
+
"""
|
|
1228
|
+
init_database()
|
|
1229
|
+
|
|
1230
|
+
# Get all logs for this process
|
|
1231
|
+
all_logs = LogEntry.query().filter(F("process_id") == process_id).all()
|
|
1232
|
+
|
|
1233
|
+
if len(all_logs) <= max_entries:
|
|
1234
|
+
return 0
|
|
1235
|
+
|
|
1236
|
+
# Sort by timestamp descending
|
|
1237
|
+
all_logs.sort(key=lambda x: x.timestamp or "", reverse=True)
|
|
1238
|
+
|
|
1239
|
+
# Keep the most recent max_entries, delete the rest
|
|
1240
|
+
logs_to_delete = all_logs[max_entries:]
|
|
1241
|
+
deleted_count = 0
|
|
1242
|
+
|
|
1243
|
+
for log in logs_to_delete:
|
|
1244
|
+
log.delete()
|
|
1245
|
+
deleted_count += 1
|
|
1246
|
+
|
|
1247
|
+
return deleted_count
|
|
1248
|
+
|
|
1249
|
+
def cleanup_all_logs(self, max_entries_per_process: int = DEFAULT_MAX_LOGS) -> dict[str, int]:
|
|
1250
|
+
"""
|
|
1251
|
+
Rotate logs for all processes.
|
|
1252
|
+
|
|
1253
|
+
Returns a dict mapping process names to deleted counts.
|
|
1254
|
+
"""
|
|
1255
|
+
init_database()
|
|
1256
|
+
|
|
1257
|
+
processes = Process.query().all()
|
|
1258
|
+
results = {}
|
|
1259
|
+
|
|
1260
|
+
for process in processes:
|
|
1261
|
+
deleted = self.rotate_logs(process._id, max_entries_per_process)
|
|
1262
|
+
if deleted > 0:
|
|
1263
|
+
results[process.name] = deleted
|
|
1264
|
+
|
|
1265
|
+
return results
|
|
1266
|
+
|
|
1267
|
+
|
|
1268
|
+
# Global singleton
|
|
1269
|
+
_manager: ProcessManager | None = None
|
|
1270
|
+
|
|
1271
|
+
|
|
1272
|
+
def get_process_manager() -> ProcessManager:
|
|
1273
|
+
"""Get the global ProcessManager instance."""
|
|
1274
|
+
global _manager
|
|
1275
|
+
if _manager is None:
|
|
1276
|
+
_manager = ProcessManager()
|
|
1277
|
+
return _manager
|