abstractgateway 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractgateway/__init__.py +1 -2
- abstractgateway/__main__.py +7 -0
- abstractgateway/app.py +4 -4
- abstractgateway/cli.py +568 -8
- abstractgateway/config.py +15 -5
- abstractgateway/embeddings_config.py +45 -0
- abstractgateway/host_metrics.py +274 -0
- abstractgateway/hosts/bundle_host.py +528 -55
- abstractgateway/hosts/visualflow_host.py +30 -3
- abstractgateway/integrations/__init__.py +2 -0
- abstractgateway/integrations/email_bridge.py +782 -0
- abstractgateway/integrations/telegram_bridge.py +534 -0
- abstractgateway/maintenance/__init__.py +5 -0
- abstractgateway/maintenance/action_tokens.py +100 -0
- abstractgateway/maintenance/backlog_exec_runner.py +1592 -0
- abstractgateway/maintenance/backlog_parser.py +184 -0
- abstractgateway/maintenance/draft_generator.py +451 -0
- abstractgateway/maintenance/llm_assist.py +212 -0
- abstractgateway/maintenance/notifier.py +109 -0
- abstractgateway/maintenance/process_manager.py +1064 -0
- abstractgateway/maintenance/report_models.py +81 -0
- abstractgateway/maintenance/report_parser.py +219 -0
- abstractgateway/maintenance/text_similarity.py +123 -0
- abstractgateway/maintenance/triage.py +507 -0
- abstractgateway/maintenance/triage_queue.py +142 -0
- abstractgateway/migrate.py +155 -0
- abstractgateway/routes/__init__.py +2 -2
- abstractgateway/routes/gateway.py +10817 -179
- abstractgateway/routes/triage.py +118 -0
- abstractgateway/runner.py +689 -14
- abstractgateway/security/gateway_security.py +425 -110
- abstractgateway/service.py +213 -6
- abstractgateway/stores.py +64 -4
- abstractgateway/workflow_deprecations.py +225 -0
- abstractgateway-0.1.1.dist-info/METADATA +135 -0
- abstractgateway-0.1.1.dist-info/RECORD +40 -0
- abstractgateway-0.1.0.dist-info/METADATA +0 -101
- abstractgateway-0.1.0.dist-info/RECORD +0 -18
- {abstractgateway-0.1.0.dist-info → abstractgateway-0.1.1.dist-info}/WHEEL +0 -0
- {abstractgateway-0.1.0.dist-info → abstractgateway-0.1.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,1064 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import signal
|
|
8
|
+
import subprocess
|
|
9
|
+
import sys
|
|
10
|
+
import threading
|
|
11
|
+
import time
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
from urllib.parse import urlparse
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
_SAFE_ID_RE = re.compile(r"^[a-zA-Z0-9_-]+$")
|
|
19
|
+
_SAFE_ENV_KEY_RE = re.compile(r"^[A-Z][A-Z0-9_]*$")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class ManagedEnvVarSpec:
|
|
24
|
+
key: str
|
|
25
|
+
label: str
|
|
26
|
+
description: str
|
|
27
|
+
category: str = "general"
|
|
28
|
+
secret: bool = False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def managed_env_var_allowlist() -> Dict[str, ManagedEnvVarSpec]:
|
|
32
|
+
"""Allowlisted environment variables that can be set via the process manager UI.
|
|
33
|
+
|
|
34
|
+
Security rationale:
|
|
35
|
+
- Disallow arbitrary env var editing (PATH, LD_PRELOAD, PYTHONPATH, NODE_OPTIONS, etc).
|
|
36
|
+
- Treat stored values as secrets: never return them to HTTP clients.
|
|
37
|
+
"""
|
|
38
|
+
specs = [
|
|
39
|
+
# Email (framework tools + bridges).
|
|
40
|
+
ManagedEnvVarSpec(
|
|
41
|
+
key="ABSTRACT_EMAIL_ACCOUNTS_CONFIG",
|
|
42
|
+
label="Email accounts config path",
|
|
43
|
+
description="Path to a YAML/JSON multi-account config file (e.g. /path/to/emails.yaml).",
|
|
44
|
+
category="email",
|
|
45
|
+
),
|
|
46
|
+
ManagedEnvVarSpec(
|
|
47
|
+
key="ABSTRACT_EMAIL_SMTP_HOST",
|
|
48
|
+
label="SMTP host",
|
|
49
|
+
description="SMTP server hostname (e.g. smtp.gmail.com).",
|
|
50
|
+
category="email",
|
|
51
|
+
),
|
|
52
|
+
ManagedEnvVarSpec(
|
|
53
|
+
key="ABSTRACT_EMAIL_SMTP_PORT",
|
|
54
|
+
label="SMTP port",
|
|
55
|
+
description="SMTP port (e.g. 587 for STARTTLS, 465 for implicit TLS).",
|
|
56
|
+
category="email",
|
|
57
|
+
),
|
|
58
|
+
ManagedEnvVarSpec(
|
|
59
|
+
key="ABSTRACT_EMAIL_SMTP_USERNAME",
|
|
60
|
+
label="SMTP username",
|
|
61
|
+
description="SMTP username (often the email address).",
|
|
62
|
+
category="email",
|
|
63
|
+
),
|
|
64
|
+
ManagedEnvVarSpec(
|
|
65
|
+
key="ABSTRACT_EMAIL_SMTP_PASSWORD_ENV_VAR",
|
|
66
|
+
label="SMTP password env var",
|
|
67
|
+
description="Name of the env var that contains the SMTP password (default: EMAIL_PASSWORD).",
|
|
68
|
+
category="email",
|
|
69
|
+
),
|
|
70
|
+
ManagedEnvVarSpec(
|
|
71
|
+
key="ABSTRACT_EMAIL_SMTP_STARTTLS",
|
|
72
|
+
label="SMTP STARTTLS",
|
|
73
|
+
description="Whether to use STARTTLS for SMTP (true/false).",
|
|
74
|
+
category="email",
|
|
75
|
+
),
|
|
76
|
+
ManagedEnvVarSpec(
|
|
77
|
+
key="ABSTRACT_EMAIL_FROM",
|
|
78
|
+
label="From email",
|
|
79
|
+
description="Default From address (used when the tool doesn't specify one).",
|
|
80
|
+
category="email",
|
|
81
|
+
),
|
|
82
|
+
ManagedEnvVarSpec(
|
|
83
|
+
key="ABSTRACT_EMAIL_REPLY_TO",
|
|
84
|
+
label="Reply-To",
|
|
85
|
+
description="Optional default Reply-To address.",
|
|
86
|
+
category="email",
|
|
87
|
+
),
|
|
88
|
+
ManagedEnvVarSpec(
|
|
89
|
+
key="ABSTRACT_EMAIL_DEFAULT_ACCOUNT",
|
|
90
|
+
label="Default account",
|
|
91
|
+
description="Default email account name (when multiple accounts exist).",
|
|
92
|
+
category="email",
|
|
93
|
+
),
|
|
94
|
+
ManagedEnvVarSpec(
|
|
95
|
+
key="ABSTRACT_EMAIL_ACCOUNT_NAME",
|
|
96
|
+
label="Account name",
|
|
97
|
+
description="Optional account name label for env-based config (default: 'default').",
|
|
98
|
+
category="email",
|
|
99
|
+
),
|
|
100
|
+
ManagedEnvVarSpec(
|
|
101
|
+
key="ABSTRACT_EMAIL_IMAP_HOST",
|
|
102
|
+
label="IMAP host",
|
|
103
|
+
description="IMAP server hostname.",
|
|
104
|
+
category="email",
|
|
105
|
+
),
|
|
106
|
+
ManagedEnvVarSpec(
|
|
107
|
+
key="ABSTRACT_EMAIL_IMAP_PORT",
|
|
108
|
+
label="IMAP port",
|
|
109
|
+
description="IMAP port (default: 993).",
|
|
110
|
+
category="email",
|
|
111
|
+
),
|
|
112
|
+
ManagedEnvVarSpec(
|
|
113
|
+
key="ABSTRACT_EMAIL_IMAP_USERNAME",
|
|
114
|
+
label="IMAP username",
|
|
115
|
+
description="IMAP username (often the email address).",
|
|
116
|
+
category="email",
|
|
117
|
+
),
|
|
118
|
+
ManagedEnvVarSpec(
|
|
119
|
+
key="ABSTRACT_EMAIL_IMAP_PASSWORD_ENV_VAR",
|
|
120
|
+
label="IMAP password env var",
|
|
121
|
+
description="Name of the env var that contains the IMAP password (default: EMAIL_PASSWORD).",
|
|
122
|
+
category="email",
|
|
123
|
+
),
|
|
124
|
+
ManagedEnvVarSpec(
|
|
125
|
+
key="ABSTRACT_EMAIL_IMAP_FOLDER",
|
|
126
|
+
label="IMAP folder",
|
|
127
|
+
description="Mailbox folder to poll (default: INBOX).",
|
|
128
|
+
category="email",
|
|
129
|
+
),
|
|
130
|
+
ManagedEnvVarSpec(
|
|
131
|
+
key="EMAIL_PASSWORD",
|
|
132
|
+
label="EMAIL_PASSWORD",
|
|
133
|
+
description="Email account password (referenced by *_PASSWORD_ENV_VAR by default).",
|
|
134
|
+
category="email",
|
|
135
|
+
secret=True,
|
|
136
|
+
),
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
out: Dict[str, ManagedEnvVarSpec] = {}
|
|
140
|
+
for s in specs:
|
|
141
|
+
k = str(s.key or "").strip()
|
|
142
|
+
if not k or not _SAFE_ENV_KEY_RE.match(k):
|
|
143
|
+
raise ValueError(f"Invalid allowlisted env var key: {k!r}")
|
|
144
|
+
out[k] = s
|
|
145
|
+
return out
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _now_utc_iso() -> str:
|
|
149
|
+
return datetime.datetime.now(datetime.timezone.utc).isoformat()
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _ts_compact_utc() -> str:
|
|
153
|
+
return datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _is_pid_running(pid: int) -> bool:
|
|
157
|
+
if not isinstance(pid, int) or pid <= 0:
|
|
158
|
+
return False
|
|
159
|
+
try:
|
|
160
|
+
os.kill(pid, 0)
|
|
161
|
+
return True
|
|
162
|
+
except Exception:
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _pid_commandline(pid: int) -> str:
|
|
167
|
+
if not isinstance(pid, int) or pid <= 0:
|
|
168
|
+
return ""
|
|
169
|
+
try:
|
|
170
|
+
proc = subprocess.run(
|
|
171
|
+
# Use wide output so long commandlines (node/uvicorn) aren't truncated.
|
|
172
|
+
# This is critical for UAT stop safety checks that match on ports/markers.
|
|
173
|
+
["ps", "-ww", "-p", str(pid), "-o", "command="],
|
|
174
|
+
stdout=subprocess.PIPE,
|
|
175
|
+
stderr=subprocess.DEVNULL,
|
|
176
|
+
text=True,
|
|
177
|
+
timeout=1.0,
|
|
178
|
+
check=False,
|
|
179
|
+
)
|
|
180
|
+
except Exception:
|
|
181
|
+
return ""
|
|
182
|
+
return str(proc.stdout or "").strip()
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _expected_port_from_url(url: Optional[str]) -> Optional[int]:
|
|
186
|
+
s = str(url or "").strip()
|
|
187
|
+
if not s:
|
|
188
|
+
return None
|
|
189
|
+
try:
|
|
190
|
+
u = urlparse(s)
|
|
191
|
+
except Exception:
|
|
192
|
+
return None
|
|
193
|
+
if u.port is None:
|
|
194
|
+
return None
|
|
195
|
+
try:
|
|
196
|
+
return int(u.port)
|
|
197
|
+
except Exception:
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _default_shell() -> str:
|
|
202
|
+
return str(os.environ.get("SHELL") or "/bin/bash")
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
@dataclass(frozen=True)
|
|
206
|
+
class ProcessSpec:
|
|
207
|
+
id: str
|
|
208
|
+
label: str
|
|
209
|
+
kind: str = "service" # service|task|self
|
|
210
|
+
description: Optional[str] = None
|
|
211
|
+
cwd: str = "."
|
|
212
|
+
command: List[str] = field(default_factory=list)
|
|
213
|
+
env: Dict[str, str] = field(default_factory=dict)
|
|
214
|
+
url: Optional[str] = None
|
|
215
|
+
|
|
216
|
+
def validate(self) -> None:
|
|
217
|
+
pid = str(self.id or "").strip()
|
|
218
|
+
if not pid or not _SAFE_ID_RE.match(pid):
|
|
219
|
+
raise ValueError(f"Invalid process id: {self.id!r}")
|
|
220
|
+
if self.kind not in {"service", "task", "self"}:
|
|
221
|
+
raise ValueError(f"Invalid process kind: {self.kind!r}")
|
|
222
|
+
if self.kind != "self":
|
|
223
|
+
if not isinstance(self.command, list) or not self.command or not all(isinstance(x, str) and x.strip() for x in self.command):
|
|
224
|
+
raise ValueError(f"Invalid command for process {pid!r}")
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def default_process_specs(*, repo_root: Path) -> Dict[str, ProcessSpec]:
|
|
228
|
+
"""Default managed processes for the monorepo dev topology."""
|
|
229
|
+
return {
|
|
230
|
+
"gateway": ProcessSpec(
|
|
231
|
+
id="gateway",
|
|
232
|
+
label="AbstractGateway (this process)",
|
|
233
|
+
kind="self",
|
|
234
|
+
description="Gateway API + (optional) runner. Supports restart/redeploy.",
|
|
235
|
+
cwd=".",
|
|
236
|
+
command=[],
|
|
237
|
+
url=None,
|
|
238
|
+
),
|
|
239
|
+
"gateway_uat": ProcessSpec(
|
|
240
|
+
id="gateway_uat",
|
|
241
|
+
label="AbstractGateway (UAT)",
|
|
242
|
+
kind="service",
|
|
243
|
+
description="Gateway running candidate code from untracked/backlog_exec_uat/current.",
|
|
244
|
+
cwd=".",
|
|
245
|
+
env={
|
|
246
|
+
# Pin defaults to prevent accidental env leakage from the operator shell.
|
|
247
|
+
"ABSTRACTGATEWAY_UAT_PORT": "6081",
|
|
248
|
+
"ABSTRACTGATEWAY_UAT_DATA_DIR": "runtime/gateway_uat",
|
|
249
|
+
"ABSTRACTGATEWAY_UAT_REPO_ROOT": "untracked/backlog_exec_uat/current",
|
|
250
|
+
# UAT should not execute backlog jobs (only the prod gateway should).
|
|
251
|
+
"ABSTRACTGATEWAY_BACKLOG_EXEC_RUNNER": "0",
|
|
252
|
+
},
|
|
253
|
+
command=[_default_shell(), "-lc", "./agw-uat.sh"],
|
|
254
|
+
url="http://localhost:6081",
|
|
255
|
+
),
|
|
256
|
+
"build": ProcessSpec(
|
|
257
|
+
id="build",
|
|
258
|
+
label="build.sh (deps install)",
|
|
259
|
+
kind="task",
|
|
260
|
+
description="Runs the repo build script (pip/npm installs).",
|
|
261
|
+
cwd=".",
|
|
262
|
+
command=[_default_shell(), "-lc", "./build.sh"],
|
|
263
|
+
),
|
|
264
|
+
"abstractobserver": ProcessSpec(
|
|
265
|
+
id="abstractobserver",
|
|
266
|
+
label="AbstractObserver (web)",
|
|
267
|
+
kind="service",
|
|
268
|
+
description="Vite dev server.",
|
|
269
|
+
cwd="abstractobserver",
|
|
270
|
+
command=["npm", "run", "dev"],
|
|
271
|
+
url="http://localhost:3001",
|
|
272
|
+
),
|
|
273
|
+
"abstractobserver_uat": ProcessSpec(
|
|
274
|
+
id="abstractobserver_uat",
|
|
275
|
+
label="AbstractObserver (web, UAT)",
|
|
276
|
+
kind="service",
|
|
277
|
+
description="Vite dev server from untracked/backlog_exec_uat/current.",
|
|
278
|
+
cwd=".",
|
|
279
|
+
env={
|
|
280
|
+
"ABSTRACTOBSERVER_UAT_PORT": "6082",
|
|
281
|
+
"ABSTRACTGATEWAY_UAT_REPO_ROOT": "untracked/backlog_exec_uat/current",
|
|
282
|
+
},
|
|
283
|
+
command=[_default_shell(), "-lc", "./aobs-uat.sh"],
|
|
284
|
+
url="http://localhost:6082",
|
|
285
|
+
),
|
|
286
|
+
"abstractcode_web": ProcessSpec(
|
|
287
|
+
id="abstractcode_web",
|
|
288
|
+
label="AbstractCode Web",
|
|
289
|
+
kind="service",
|
|
290
|
+
description="Vite dev server.",
|
|
291
|
+
cwd="abstractcode/web",
|
|
292
|
+
command=["npm", "run", "dev"],
|
|
293
|
+
url="http://localhost:3002",
|
|
294
|
+
),
|
|
295
|
+
"abstractcode_web_uat": ProcessSpec(
|
|
296
|
+
id="abstractcode_web_uat",
|
|
297
|
+
label="AbstractCode Web (UAT)",
|
|
298
|
+
kind="service",
|
|
299
|
+
description="Vite dev server from untracked/backlog_exec_uat/current.",
|
|
300
|
+
cwd=".",
|
|
301
|
+
env={
|
|
302
|
+
"ABSTRACTCODE_WEB_UAT_PORT": "6083",
|
|
303
|
+
"ABSTRACTGATEWAY_UAT_REPO_ROOT": "untracked/backlog_exec_uat/current",
|
|
304
|
+
},
|
|
305
|
+
command=[_default_shell(), "-lc", "./acode-web-uat.sh"],
|
|
306
|
+
url="http://localhost:6083",
|
|
307
|
+
),
|
|
308
|
+
"abstractflow_frontend": ProcessSpec(
|
|
309
|
+
id="abstractflow_frontend",
|
|
310
|
+
label="AbstractFlow Web (frontend)",
|
|
311
|
+
kind="service",
|
|
312
|
+
description="Vite dev server.",
|
|
313
|
+
cwd="abstractflow/web/frontend",
|
|
314
|
+
command=["npm", "run", "dev"],
|
|
315
|
+
url="http://localhost:3003",
|
|
316
|
+
),
|
|
317
|
+
"abstractflow_frontend_uat": ProcessSpec(
|
|
318
|
+
id="abstractflow_frontend_uat",
|
|
319
|
+
label="AbstractFlow Web (frontend, UAT)",
|
|
320
|
+
kind="service",
|
|
321
|
+
description="Vite dev server from untracked/backlog_exec_uat/current.",
|
|
322
|
+
cwd=".",
|
|
323
|
+
env={
|
|
324
|
+
"ABSTRACTFLOW_FRONTEND_UAT_PORT": "6084",
|
|
325
|
+
"ABSTRACTFLOW_BACKEND_UAT_PORT": "6080",
|
|
326
|
+
"ABSTRACTGATEWAY_UAT_REPO_ROOT": "untracked/backlog_exec_uat/current",
|
|
327
|
+
},
|
|
328
|
+
command=[_default_shell(), "-lc", "./aflow-frontend-uat.sh"],
|
|
329
|
+
url="http://localhost:6084",
|
|
330
|
+
),
|
|
331
|
+
"abstractflow_backend": ProcessSpec(
|
|
332
|
+
id="abstractflow_backend",
|
|
333
|
+
label="AbstractFlow Web (backend)",
|
|
334
|
+
kind="service",
|
|
335
|
+
description="FastAPI backend (uvicorn).",
|
|
336
|
+
cwd="abstractflow/web",
|
|
337
|
+
command=[sys.executable, "-m", "backend", "--host", "0.0.0.0", "--port", "8080", "--reload"],
|
|
338
|
+
url="http://localhost:8080",
|
|
339
|
+
),
|
|
340
|
+
"abstractflow_backend_uat": ProcessSpec(
|
|
341
|
+
id="abstractflow_backend_uat",
|
|
342
|
+
label="AbstractFlow Web (backend, UAT)",
|
|
343
|
+
kind="service",
|
|
344
|
+
description="FastAPI backend from untracked/backlog_exec_uat/current.",
|
|
345
|
+
cwd=".",
|
|
346
|
+
env={
|
|
347
|
+
"ABSTRACTFLOW_BACKEND_UAT_PORT": "6080",
|
|
348
|
+
"ABSTRACTFLOW_RUNTIME_DIR": "runtime/abstractflow_uat",
|
|
349
|
+
"ABSTRACTGATEWAY_UAT_REPO_ROOT": "untracked/backlog_exec_uat/current",
|
|
350
|
+
},
|
|
351
|
+
command=[_default_shell(), "-lc", "./aflow-backend-uat.sh"],
|
|
352
|
+
url="http://localhost:6080",
|
|
353
|
+
),
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _load_specs_from_path(*, repo_root: Path, config_path: Path) -> Dict[str, ProcessSpec]:
|
|
358
|
+
raw = config_path.read_text(encoding="utf-8", errors="replace")
|
|
359
|
+
obj = json.loads(raw)
|
|
360
|
+
if not isinstance(obj, dict):
|
|
361
|
+
raise ValueError("process manager config must be a JSON object")
|
|
362
|
+
processes = obj.get("processes")
|
|
363
|
+
if not isinstance(processes, list):
|
|
364
|
+
raise ValueError("process manager config must contain 'processes' (list)")
|
|
365
|
+
|
|
366
|
+
out: Dict[str, ProcessSpec] = {}
|
|
367
|
+
for p in processes:
|
|
368
|
+
if not isinstance(p, dict):
|
|
369
|
+
continue
|
|
370
|
+
pid = str(p.get("id") or "").strip()
|
|
371
|
+
if not pid or not _SAFE_ID_RE.match(pid):
|
|
372
|
+
raise ValueError(f"Invalid process id in config: {pid!r}")
|
|
373
|
+
label = str(p.get("label") or pid).strip() or pid
|
|
374
|
+
kind = str(p.get("kind") or "service").strip().lower() or "service"
|
|
375
|
+
cwd_raw = p.get("cwd")
|
|
376
|
+
cwd = str(cwd_raw if cwd_raw is not None else ".").strip() or "."
|
|
377
|
+
# Security guardrail: treat cwd as repo-relative to avoid arbitrary host path execution.
|
|
378
|
+
if os.path.isabs(cwd):
|
|
379
|
+
raise ValueError(f"Process {pid!r} cwd must be relative to repo_root")
|
|
380
|
+
|
|
381
|
+
cmd = p.get("command")
|
|
382
|
+
command: List[str] = []
|
|
383
|
+
if isinstance(cmd, list):
|
|
384
|
+
command = [str(x) for x in cmd if isinstance(x, (str, int, float)) and str(x).strip()]
|
|
385
|
+
elif isinstance(cmd, str) and cmd.strip():
|
|
386
|
+
command = [cmd.strip()]
|
|
387
|
+
|
|
388
|
+
env_raw = p.get("env")
|
|
389
|
+
env: Dict[str, str] = {}
|
|
390
|
+
if isinstance(env_raw, dict):
|
|
391
|
+
for k, v in env_raw.items():
|
|
392
|
+
ks = str(k or "").strip()
|
|
393
|
+
if not ks:
|
|
394
|
+
continue
|
|
395
|
+
env[ks] = str(v if v is not None else "")
|
|
396
|
+
|
|
397
|
+
spec = ProcessSpec(
|
|
398
|
+
id=pid,
|
|
399
|
+
label=label,
|
|
400
|
+
kind=kind,
|
|
401
|
+
description=str(p.get("description") or "").strip() or None,
|
|
402
|
+
cwd=cwd,
|
|
403
|
+
command=command,
|
|
404
|
+
env=env,
|
|
405
|
+
url=str(p.get("url") or "").strip() or None,
|
|
406
|
+
)
|
|
407
|
+
spec.validate()
|
|
408
|
+
out[pid] = spec
|
|
409
|
+
|
|
410
|
+
# Always include the gateway self entry so UIs can restart/redeploy.
|
|
411
|
+
out.setdefault(
|
|
412
|
+
"gateway",
|
|
413
|
+
ProcessSpec(
|
|
414
|
+
id="gateway",
|
|
415
|
+
label="AbstractGateway (this process)",
|
|
416
|
+
kind="self",
|
|
417
|
+
description="Gateway API + (optional) runner. Supports restart/redeploy.",
|
|
418
|
+
cwd=".",
|
|
419
|
+
command=[],
|
|
420
|
+
),
|
|
421
|
+
)
|
|
422
|
+
return out
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def load_process_specs(*, repo_root: Path) -> Dict[str, ProcessSpec]:
|
|
426
|
+
cfg_path = str(os.getenv("ABSTRACTGATEWAY_PROCESS_MANAGER_CONFIG") or "").strip()
|
|
427
|
+
if not cfg_path:
|
|
428
|
+
specs = default_process_specs(repo_root=repo_root)
|
|
429
|
+
for s in specs.values():
|
|
430
|
+
s.validate()
|
|
431
|
+
return specs
|
|
432
|
+
path = Path(cfg_path).expanduser().resolve()
|
|
433
|
+
return _load_specs_from_path(repo_root=repo_root, config_path=path)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
class ProcessManager:
|
|
437
|
+
def __init__(self, *, base_dir: Path, repo_root: Path, specs: Dict[str, ProcessSpec]):
|
|
438
|
+
self._base_dir = Path(base_dir).expanduser().resolve()
|
|
439
|
+
self._repo_root = Path(repo_root).expanduser().resolve()
|
|
440
|
+
self._specs = dict(specs)
|
|
441
|
+
self._managed_env_specs = managed_env_var_allowlist()
|
|
442
|
+
self._lock = threading.Lock()
|
|
443
|
+
self._procs: Dict[str, subprocess.Popen[bytes]] = {}
|
|
444
|
+
|
|
445
|
+
self._state_dir = (self._base_dir / "process_manager").resolve()
|
|
446
|
+
self._logs_dir = (self._base_dir / "process_logs").resolve()
|
|
447
|
+
self._state_path = (self._state_dir / "state.json").resolve()
|
|
448
|
+
self._env_overrides_path = (self._state_dir / "env_overrides.json").resolve()
|
|
449
|
+
|
|
450
|
+
self._state_dir.mkdir(parents=True, exist_ok=True)
|
|
451
|
+
self._logs_dir.mkdir(parents=True, exist_ok=True)
|
|
452
|
+
|
|
453
|
+
self._state: Dict[str, Dict[str, Any]] = self._load_state()
|
|
454
|
+
self._env_overrides_error: Optional[str] = None
|
|
455
|
+
self._env_overrides: Dict[str, Dict[str, Any]] = self._load_env_overrides()
|
|
456
|
+
|
|
457
|
+
# Apply persisted overrides to this gateway process early so runtime
|
|
458
|
+
# integrations that read os.getenv() observe the configured values.
|
|
459
|
+
try:
|
|
460
|
+
with self._lock:
|
|
461
|
+
self._apply_env_overrides_to_environ_locked()
|
|
462
|
+
except Exception:
|
|
463
|
+
pass
|
|
464
|
+
|
|
465
|
+
@property
|
|
466
|
+
def base_dir(self) -> Path:
|
|
467
|
+
return self._base_dir
|
|
468
|
+
|
|
469
|
+
@property
|
|
470
|
+
def repo_root(self) -> Path:
|
|
471
|
+
return self._repo_root
|
|
472
|
+
|
|
473
|
+
# ----------------------------
|
|
474
|
+
# State I/O
|
|
475
|
+
# ----------------------------
|
|
476
|
+
|
|
477
|
+
def _load_state(self) -> Dict[str, Dict[str, Any]]:
|
|
478
|
+
if not self._state_path.exists():
|
|
479
|
+
return {}
|
|
480
|
+
try:
|
|
481
|
+
raw = self._state_path.read_text(encoding="utf-8", errors="replace")
|
|
482
|
+
obj = json.loads(raw)
|
|
483
|
+
except Exception:
|
|
484
|
+
return {}
|
|
485
|
+
if not isinstance(obj, dict):
|
|
486
|
+
return {}
|
|
487
|
+
procs = obj.get("processes")
|
|
488
|
+
if not isinstance(procs, dict):
|
|
489
|
+
return {}
|
|
490
|
+
out: Dict[str, Dict[str, Any]] = {}
|
|
491
|
+
for k, v in procs.items():
|
|
492
|
+
pid = str(k or "").strip()
|
|
493
|
+
if not pid or not _SAFE_ID_RE.match(pid) or not isinstance(v, dict):
|
|
494
|
+
continue
|
|
495
|
+
out[pid] = dict(v)
|
|
496
|
+
return out
|
|
497
|
+
|
|
498
|
+
def _save_state(self) -> None:
|
|
499
|
+
tmp = self._state_path.with_suffix(".tmp")
|
|
500
|
+
obj = {"version": 1, "updated_at": _now_utc_iso(), "processes": self._state}
|
|
501
|
+
data = json.dumps(obj, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
|
|
502
|
+
tmp.write_text(data, encoding="utf-8")
|
|
503
|
+
tmp.replace(self._state_path)
|
|
504
|
+
|
|
505
|
+
# ----------------------------
|
|
506
|
+
# Managed env overrides (write-only)
|
|
507
|
+
# ----------------------------
|
|
508
|
+
|
|
509
|
+
def _load_env_overrides(self) -> Dict[str, Dict[str, Any]]:
|
|
510
|
+
self._env_overrides_error = None
|
|
511
|
+
if not self._env_overrides_path.exists():
|
|
512
|
+
return {}
|
|
513
|
+
try:
|
|
514
|
+
raw = self._env_overrides_path.read_text(encoding="utf-8", errors="replace")
|
|
515
|
+
obj = json.loads(raw)
|
|
516
|
+
except Exception as e:
|
|
517
|
+
self._env_overrides_error = f"Failed to read env_overrides.json: {e}"
|
|
518
|
+
return {}
|
|
519
|
+
if not isinstance(obj, dict):
|
|
520
|
+
self._env_overrides_error = "env_overrides.json must be a JSON object"
|
|
521
|
+
return {}
|
|
522
|
+
raw_vars = obj.get("vars")
|
|
523
|
+
if not isinstance(raw_vars, dict):
|
|
524
|
+
self._env_overrides_error = "env_overrides.json must contain 'vars' (object)"
|
|
525
|
+
return {}
|
|
526
|
+
|
|
527
|
+
out: Dict[str, Dict[str, Any]] = {}
|
|
528
|
+
for k, v in raw_vars.items():
|
|
529
|
+
key = str(k or "").strip()
|
|
530
|
+
if not key or not _SAFE_ENV_KEY_RE.match(key):
|
|
531
|
+
continue
|
|
532
|
+
if not isinstance(v, dict):
|
|
533
|
+
continue
|
|
534
|
+
enabled = v.get("enabled")
|
|
535
|
+
out[key] = {
|
|
536
|
+
"enabled": bool(enabled) if isinstance(enabled, bool) else True,
|
|
537
|
+
"value": str(v.get("value") if v.get("value") is not None else ""),
|
|
538
|
+
"updated_at": str(v.get("updated_at") or "").strip() or None,
|
|
539
|
+
}
|
|
540
|
+
return out
|
|
541
|
+
|
|
542
|
+
def _save_env_overrides(self) -> None:
|
|
543
|
+
tmp = self._env_overrides_path.with_suffix(".tmp")
|
|
544
|
+
obj = {"version": 1, "updated_at": _now_utc_iso(), "vars": self._env_overrides}
|
|
545
|
+
data = json.dumps(obj, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
|
|
546
|
+
tmp.write_text(data, encoding="utf-8")
|
|
547
|
+
|
|
548
|
+
# Best-effort: keep secrets readable only by the current user.
|
|
549
|
+
try:
|
|
550
|
+
os.chmod(tmp, 0o600)
|
|
551
|
+
except Exception:
|
|
552
|
+
pass
|
|
553
|
+
|
|
554
|
+
tmp.replace(self._env_overrides_path)
|
|
555
|
+
try:
|
|
556
|
+
os.chmod(self._env_overrides_path, 0o600)
|
|
557
|
+
except Exception:
|
|
558
|
+
pass
|
|
559
|
+
|
|
560
|
+
# ----------------------------
|
|
561
|
+
# Public API
|
|
562
|
+
# ----------------------------
|
|
563
|
+
|
|
564
|
+
def list_processes(self) -> List[Dict[str, Any]]:
|
|
565
|
+
with self._lock:
|
|
566
|
+
self._refresh_states_locked()
|
|
567
|
+
out: List[Dict[str, Any]] = []
|
|
568
|
+
for pid, spec in sorted(self._specs.items(), key=lambda kv: kv[0]):
|
|
569
|
+
st = dict(self._state.get(pid) or {})
|
|
570
|
+
info = {
|
|
571
|
+
"id": pid,
|
|
572
|
+
"label": spec.label,
|
|
573
|
+
"kind": spec.kind,
|
|
574
|
+
"description": spec.description,
|
|
575
|
+
"cwd": spec.cwd,
|
|
576
|
+
"command": list(spec.command) if spec.kind != "self" else [],
|
|
577
|
+
"url": spec.url,
|
|
578
|
+
"status": str(st.get("status") or ("running" if spec.kind == "self" else "stopped")),
|
|
579
|
+
"pid": st.get("pid"),
|
|
580
|
+
"started_at": st.get("started_at"),
|
|
581
|
+
"stopped_at": st.get("stopped_at"),
|
|
582
|
+
"exit_code": st.get("exit_code"),
|
|
583
|
+
"log_relpath": st.get("log_relpath"),
|
|
584
|
+
"last_error": st.get("last_error"),
|
|
585
|
+
"actions": self._actions_for_spec(spec),
|
|
586
|
+
}
|
|
587
|
+
if spec.kind == "self":
|
|
588
|
+
info["pid"] = os.getpid()
|
|
589
|
+
info["status"] = "running"
|
|
590
|
+
out.append(info)
|
|
591
|
+
return out
|
|
592
|
+
|
|
593
|
+
def list_managed_env_vars(self) -> Dict[str, Any]:
|
|
594
|
+
"""Return allowlisted env vars metadata without exposing values."""
|
|
595
|
+
with self._lock:
|
|
596
|
+
error = self._env_overrides_error
|
|
597
|
+
out: List[Dict[str, Any]] = []
|
|
598
|
+
for key, spec in sorted(self._managed_env_specs.items(), key=lambda kv: kv[0]):
|
|
599
|
+
rec = self._env_overrides.get(key) if isinstance(self._env_overrides, dict) else None
|
|
600
|
+
source = "missing"
|
|
601
|
+
updated_at: Optional[str] = None
|
|
602
|
+
if isinstance(rec, dict):
|
|
603
|
+
enabled0 = rec.get("enabled")
|
|
604
|
+
enabled = bool(enabled0) if isinstance(enabled0, bool) else True
|
|
605
|
+
updated_at = str(rec.get("updated_at") or "").strip() or None
|
|
606
|
+
source = "override" if enabled else "unset"
|
|
607
|
+
else:
|
|
608
|
+
v = os.getenv(key)
|
|
609
|
+
if v is not None and str(v).strip() != "":
|
|
610
|
+
source = "inherited"
|
|
611
|
+
elif v is not None:
|
|
612
|
+
source = "inherited_empty"
|
|
613
|
+
|
|
614
|
+
is_set = source in {"override", "inherited", "inherited_empty"}
|
|
615
|
+
out.append(
|
|
616
|
+
{
|
|
617
|
+
"key": key,
|
|
618
|
+
"label": spec.label,
|
|
619
|
+
"description": spec.description,
|
|
620
|
+
"category": spec.category,
|
|
621
|
+
"secret": bool(spec.secret),
|
|
622
|
+
"is_set": bool(is_set),
|
|
623
|
+
"source": source,
|
|
624
|
+
"updated_at": updated_at,
|
|
625
|
+
}
|
|
626
|
+
)
|
|
627
|
+
return {"ok": True, "error": error, "vars": out}
|
|
628
|
+
|
|
629
|
+
def update_managed_env_vars(self, *, set_vars: Dict[str, str], unset: List[str]) -> Dict[str, Any]:
|
|
630
|
+
set_vars = dict(set_vars or {})
|
|
631
|
+
unset = list(unset or [])
|
|
632
|
+
|
|
633
|
+
# Validate keys early to avoid persisting partial updates.
|
|
634
|
+
normalized_set: Dict[str, str] = {}
|
|
635
|
+
for k, v in set_vars.items():
|
|
636
|
+
key = str(k or "").strip()
|
|
637
|
+
if not key or not _SAFE_ENV_KEY_RE.match(key):
|
|
638
|
+
raise ValueError(f"Invalid env var key: {key!r}")
|
|
639
|
+
if key not in self._managed_env_specs:
|
|
640
|
+
raise ValueError(f"Env var key not allowlisted: {key}")
|
|
641
|
+
value = "" if v is None else str(v)
|
|
642
|
+
if "\x00" in value:
|
|
643
|
+
raise ValueError(f"Invalid env var value for {key}: contains NUL byte")
|
|
644
|
+
if len(value.encode("utf-8", errors="replace")) > 16_384:
|
|
645
|
+
raise ValueError(f"Env var value too large for {key} (max 16KB)")
|
|
646
|
+
normalized_set[key] = value
|
|
647
|
+
|
|
648
|
+
normalized_unset: List[str] = []
|
|
649
|
+
for k in unset:
|
|
650
|
+
key = str(k or "").strip()
|
|
651
|
+
if not key or not _SAFE_ENV_KEY_RE.match(key):
|
|
652
|
+
raise ValueError(f"Invalid env var key: {key!r}")
|
|
653
|
+
if key not in self._managed_env_specs:
|
|
654
|
+
raise ValueError(f"Env var key not allowlisted: {key}")
|
|
655
|
+
normalized_unset.append(key)
|
|
656
|
+
|
|
657
|
+
overlap = set(normalized_set.keys()) & set(normalized_unset)
|
|
658
|
+
if overlap:
|
|
659
|
+
keys = ", ".join(sorted(overlap))
|
|
660
|
+
raise ValueError(f"Env vars cannot be both set and unset in the same request: {keys}")
|
|
661
|
+
|
|
662
|
+
if not normalized_set and not normalized_unset:
|
|
663
|
+
raise ValueError("No env var updates provided (set/unset)")
|
|
664
|
+
|
|
665
|
+
if len(normalized_set) + len(normalized_unset) > 64:
|
|
666
|
+
raise ValueError("Too many env vars in one request (max 64)")
|
|
667
|
+
|
|
668
|
+
now = _now_utc_iso()
|
|
669
|
+
with self._lock:
|
|
670
|
+
for key, value in normalized_set.items():
|
|
671
|
+
self._env_overrides[key] = {"enabled": True, "value": value, "updated_at": now}
|
|
672
|
+
for key in normalized_unset:
|
|
673
|
+
# Security: clear the stored value when unsetting (avoid lingering secrets on disk).
|
|
674
|
+
self._env_overrides[key] = {"enabled": False, "value": "", "updated_at": now}
|
|
675
|
+
|
|
676
|
+
self._save_env_overrides()
|
|
677
|
+
self._env_overrides_error = None
|
|
678
|
+
|
|
679
|
+
# Apply immediately to this gateway process environment. This is safe
|
|
680
|
+
# because keys are strictly allowlisted.
|
|
681
|
+
self._apply_env_overrides_to_environ_locked()
|
|
682
|
+
|
|
683
|
+
# Return a fresh view (still metadata-only).
|
|
684
|
+
return self.list_managed_env_vars()
|
|
685
|
+
|
|
686
|
+
def _apply_env_overrides_to_environ_locked(self) -> None:
|
|
687
|
+
for key in self._managed_env_specs.keys():
|
|
688
|
+
rec = self._env_overrides.get(key) if isinstance(self._env_overrides, dict) else None
|
|
689
|
+
if not isinstance(rec, dict):
|
|
690
|
+
continue
|
|
691
|
+
enabled0 = rec.get("enabled")
|
|
692
|
+
enabled = bool(enabled0) if isinstance(enabled0, bool) else True
|
|
693
|
+
if enabled:
|
|
694
|
+
os.environ[key] = str(rec.get("value") if rec.get("value") is not None else "")
|
|
695
|
+
else:
|
|
696
|
+
os.environ.pop(key, None)
|
|
697
|
+
|
|
698
|
+
def _apply_env_overrides_to_dict_locked(self, env: Dict[str, str]) -> None:
|
|
699
|
+
for key in self._managed_env_specs.keys():
|
|
700
|
+
rec = self._env_overrides.get(key) if isinstance(self._env_overrides, dict) else None
|
|
701
|
+
if not isinstance(rec, dict):
|
|
702
|
+
continue
|
|
703
|
+
enabled0 = rec.get("enabled")
|
|
704
|
+
enabled = bool(enabled0) if isinstance(enabled0, bool) else True
|
|
705
|
+
if enabled:
|
|
706
|
+
env[key] = str(rec.get("value") if rec.get("value") is not None else "")
|
|
707
|
+
else:
|
|
708
|
+
env.pop(key, None)
|
|
709
|
+
|
|
710
|
+
def start(self, process_id: str) -> Dict[str, Any]:
|
|
711
|
+
pid = str(process_id or "").strip()
|
|
712
|
+
if not pid or pid not in self._specs:
|
|
713
|
+
raise KeyError(f"Unknown process id: {pid}")
|
|
714
|
+
spec = self._specs[pid]
|
|
715
|
+
if spec.kind == "self":
|
|
716
|
+
raise ValueError("Cannot start a self-managed process")
|
|
717
|
+
|
|
718
|
+
with self._lock:
|
|
719
|
+
self._refresh_one_locked(pid)
|
|
720
|
+
st = dict(self._state.get(pid) or {})
|
|
721
|
+
if str(st.get("status") or "").strip().lower() == "running" and isinstance(st.get("pid"), int):
|
|
722
|
+
return dict(st)
|
|
723
|
+
|
|
724
|
+
ts = _ts_compact_utc()
|
|
725
|
+
log_name = f"{pid}.{ts}.log"
|
|
726
|
+
log_path = (self._logs_dir / log_name).resolve()
|
|
727
|
+
log_relpath = str(log_path.relative_to(self._base_dir))
|
|
728
|
+
|
|
729
|
+
cwd_path = (self._repo_root / spec.cwd).resolve()
|
|
730
|
+
try:
|
|
731
|
+
cwd_path.relative_to(self._repo_root)
|
|
732
|
+
except Exception as e:
|
|
733
|
+
raise ValueError(f"Process cwd must be under repo_root: {e}")
|
|
734
|
+
if not cwd_path.exists():
|
|
735
|
+
raise FileNotFoundError(f"cwd does not exist: {cwd_path}")
|
|
736
|
+
|
|
737
|
+
env = dict(os.environ)
|
|
738
|
+
self._apply_env_overrides_to_dict_locked(env)
|
|
739
|
+
for k, v in (spec.env or {}).items():
|
|
740
|
+
env[str(k)] = str(v)
|
|
741
|
+
|
|
742
|
+
# Ensure the subprocess is in its own process group so we can stop it cleanly.
|
|
743
|
+
f = open(log_path, "ab", buffering=0)
|
|
744
|
+
try:
|
|
745
|
+
proc = subprocess.Popen(
|
|
746
|
+
list(spec.command),
|
|
747
|
+
cwd=str(cwd_path),
|
|
748
|
+
env=env,
|
|
749
|
+
stdin=subprocess.DEVNULL,
|
|
750
|
+
stdout=f,
|
|
751
|
+
stderr=subprocess.STDOUT,
|
|
752
|
+
start_new_session=True,
|
|
753
|
+
)
|
|
754
|
+
except Exception:
|
|
755
|
+
f.close()
|
|
756
|
+
raise
|
|
757
|
+
finally:
|
|
758
|
+
# The child keeps its own fd; close our handle to avoid leaking descriptors.
|
|
759
|
+
try:
|
|
760
|
+
f.close()
|
|
761
|
+
except Exception:
|
|
762
|
+
pass
|
|
763
|
+
|
|
764
|
+
self._procs[pid] = proc
|
|
765
|
+
st2 = {
|
|
766
|
+
"status": "running",
|
|
767
|
+
"pid": int(proc.pid),
|
|
768
|
+
"started_at": _now_utc_iso(),
|
|
769
|
+
"stopped_at": None,
|
|
770
|
+
"exit_code": None,
|
|
771
|
+
"log_relpath": log_relpath,
|
|
772
|
+
"last_error": None,
|
|
773
|
+
}
|
|
774
|
+
self._state[pid] = st2
|
|
775
|
+
self._save_state()
|
|
776
|
+
|
|
777
|
+
t = threading.Thread(target=self._watch_process, args=(pid, proc), daemon=True)
|
|
778
|
+
t.start()
|
|
779
|
+
|
|
780
|
+
return dict(st2)
|
|
781
|
+
|
|
782
|
+
def stop(self, process_id: str, *, timeout_s: float = 6.0) -> Dict[str, Any]:
|
|
783
|
+
pid = str(process_id or "").strip()
|
|
784
|
+
if not pid or pid not in self._specs:
|
|
785
|
+
raise KeyError(f"Unknown process id: {pid}")
|
|
786
|
+
spec = self._specs[pid]
|
|
787
|
+
if spec.kind == "self":
|
|
788
|
+
raise ValueError("Cannot stop a self-managed process (use restart or an external supervisor)")
|
|
789
|
+
|
|
790
|
+
with self._lock:
|
|
791
|
+
self._refresh_one_locked(pid)
|
|
792
|
+
st = dict(self._state.get(pid) or {})
|
|
793
|
+
proc_pid = st.get("pid")
|
|
794
|
+
if not isinstance(proc_pid, int) or proc_pid <= 0 or str(st.get("status") or "").lower() != "running":
|
|
795
|
+
st["status"] = "stopped"
|
|
796
|
+
st["pid"] = None
|
|
797
|
+
self._state[pid] = st
|
|
798
|
+
self._save_state()
|
|
799
|
+
return dict(st)
|
|
800
|
+
|
|
801
|
+
# Safety: UAT processes are frequently restarted and operator-triggered; if state is stale or
|
|
802
|
+
# the PID was re-used by an unrelated process, stopping could terminate the wrong service.
|
|
803
|
+
#
|
|
804
|
+
# For UAT processes, require a best-effort commandline sanity check. We accept either:
|
|
805
|
+
# - the expected port (from spec.url), OR
|
|
806
|
+
# - the UAT launch script name (from spec.command) when present.
|
|
807
|
+
if pid.endswith("_uat"):
|
|
808
|
+
expected_port = _expected_port_from_url(spec.url)
|
|
809
|
+
expected_marker = ""
|
|
810
|
+
try:
|
|
811
|
+
last = str((spec.command or [])[-1] or "").strip()
|
|
812
|
+
if last:
|
|
813
|
+
name = Path(last).name
|
|
814
|
+
if name and ("uat" in name.lower() or name.lower().endswith(".sh")):
|
|
815
|
+
expected_marker = name
|
|
816
|
+
except Exception:
|
|
817
|
+
expected_marker = ""
|
|
818
|
+
|
|
819
|
+
cmdline = _pid_commandline(proc_pid)
|
|
820
|
+
if not cmdline:
|
|
821
|
+
st["status"] = "error"
|
|
822
|
+
st["last_error"] = (
|
|
823
|
+
f"Refusing to stop pid={proc_pid}: cannot read commandline via ps "
|
|
824
|
+
f"(expected port {expected_port}, marker {expected_marker!r})"
|
|
825
|
+
)
|
|
826
|
+
self._state[pid] = st
|
|
827
|
+
self._save_state()
|
|
828
|
+
return dict(st)
|
|
829
|
+
|
|
830
|
+
ok = False
|
|
831
|
+
if isinstance(expected_port, int) and expected_port > 0 and str(expected_port) in cmdline:
|
|
832
|
+
ok = True
|
|
833
|
+
if expected_marker and expected_marker in cmdline:
|
|
834
|
+
ok = True
|
|
835
|
+
|
|
836
|
+
if not ok:
|
|
837
|
+
st["status"] = "error"
|
|
838
|
+
st["last_error"] = (
|
|
839
|
+
f"Refusing to stop pid={proc_pid}: commandline does not match expected UAT markers. "
|
|
840
|
+
f"expected_port={expected_port}, marker={expected_marker!r}, cmd={cmdline[:240]!r}"
|
|
841
|
+
)
|
|
842
|
+
self._state[pid] = st
|
|
843
|
+
self._save_state()
|
|
844
|
+
return dict(st)
|
|
845
|
+
|
|
846
|
+
# Best-effort: terminate the process group.
|
|
847
|
+
try:
|
|
848
|
+
os.killpg(proc_pid, signal.SIGTERM)
|
|
849
|
+
except Exception:
|
|
850
|
+
try:
|
|
851
|
+
os.kill(proc_pid, signal.SIGTERM)
|
|
852
|
+
except Exception:
|
|
853
|
+
pass
|
|
854
|
+
|
|
855
|
+
# Wait outside the lock.
|
|
856
|
+
end = time.time() + max(0.25, float(timeout_s))
|
|
857
|
+
while time.time() < end:
|
|
858
|
+
if not _is_pid_running(proc_pid):
|
|
859
|
+
break
|
|
860
|
+
time.sleep(0.05)
|
|
861
|
+
|
|
862
|
+
# Escalate if needed.
|
|
863
|
+
if _is_pid_running(proc_pid):
|
|
864
|
+
try:
|
|
865
|
+
os.killpg(proc_pid, signal.SIGKILL)
|
|
866
|
+
except Exception:
|
|
867
|
+
try:
|
|
868
|
+
os.kill(proc_pid, signal.SIGKILL)
|
|
869
|
+
except Exception:
|
|
870
|
+
pass
|
|
871
|
+
|
|
872
|
+
with self._lock:
|
|
873
|
+
self._refresh_one_locked(pid)
|
|
874
|
+
st2 = dict(self._state.get(pid) or {})
|
|
875
|
+
st2.setdefault("stopped_at", _now_utc_iso())
|
|
876
|
+
st2["status"] = "stopped"
|
|
877
|
+
st2["pid"] = None
|
|
878
|
+
self._state[pid] = st2
|
|
879
|
+
self._save_state()
|
|
880
|
+
return dict(st2)
|
|
881
|
+
|
|
882
|
+
def restart(self, process_id: str) -> Dict[str, Any]:
|
|
883
|
+
pid = str(process_id or "").strip()
|
|
884
|
+
spec = self._specs.get(pid)
|
|
885
|
+
if spec is None:
|
|
886
|
+
raise KeyError(f"Unknown process id: {pid}")
|
|
887
|
+
if spec.kind == "self":
|
|
888
|
+
return self.restart_self()
|
|
889
|
+
try:
|
|
890
|
+
st = self.stop(pid)
|
|
891
|
+
if isinstance(st, dict) and str(st.get("status") or "").strip().lower() == "error":
|
|
892
|
+
return dict(st)
|
|
893
|
+
except Exception:
|
|
894
|
+
# Continue with best-effort restart for non-UAT processes only.
|
|
895
|
+
if pid.endswith("_uat"):
|
|
896
|
+
raise
|
|
897
|
+
return self.start(pid)
|
|
898
|
+
|
|
899
|
+
def restart_self(self) -> Dict[str, Any]:
|
|
900
|
+
# Reply immediately; the actual exec happens async.
|
|
901
|
+
self._schedule_gateway_execv(delay_s=0.75)
|
|
902
|
+
return {"status": "restarting", "scheduled_at": _now_utc_iso()}
|
|
903
|
+
|
|
904
|
+
def redeploy_gateway(self) -> Dict[str, Any]:
|
|
905
|
+
"""Run build, then restart the gateway on success (best-effort)."""
|
|
906
|
+
self._schedule_gateway_redeploy()
|
|
907
|
+
return {"status": "redeploy_scheduled", "scheduled_at": _now_utc_iso()}
|
|
908
|
+
|
|
909
|
+
def log_tail(self, process_id: str, *, max_bytes: int = 80_000) -> Dict[str, Any]:
|
|
910
|
+
pid = str(process_id or "").strip()
|
|
911
|
+
if not pid or pid not in self._specs:
|
|
912
|
+
raise KeyError(f"Unknown process id: {pid}")
|
|
913
|
+
|
|
914
|
+
with self._lock:
|
|
915
|
+
self._refresh_one_locked(pid)
|
|
916
|
+
st = dict(self._state.get(pid) or {})
|
|
917
|
+
rel = st.get("log_relpath")
|
|
918
|
+
# Special case: gateway "self" logs map to the audit log by default.
|
|
919
|
+
if pid == "gateway" and (not isinstance(rel, str) or not rel.strip()):
|
|
920
|
+
rel = "audit_log.jsonl"
|
|
921
|
+
if not isinstance(rel, str) or not rel.strip():
|
|
922
|
+
return {"bytes": 0, "truncated": False, "content": "", "log_relpath": None}
|
|
923
|
+
path = (self._base_dir / rel).resolve()
|
|
924
|
+
try:
|
|
925
|
+
path.relative_to(self._base_dir)
|
|
926
|
+
except Exception:
|
|
927
|
+
return {"bytes": 0, "truncated": False, "content": "", "log_relpath": None}
|
|
928
|
+
|
|
929
|
+
if not path.exists():
|
|
930
|
+
return {"bytes": 0, "truncated": False, "content": "", "log_relpath": str(rel)}
|
|
931
|
+
|
|
932
|
+
data = b""
|
|
933
|
+
truncated = False
|
|
934
|
+
try:
|
|
935
|
+
with open(path, "rb") as f:
|
|
936
|
+
f.seek(0, os.SEEK_END)
|
|
937
|
+
size = int(f.tell() or 0)
|
|
938
|
+
start = max(0, size - int(max_bytes))
|
|
939
|
+
truncated = start > 0
|
|
940
|
+
f.seek(start, os.SEEK_SET)
|
|
941
|
+
data = f.read(int(max_bytes))
|
|
942
|
+
except Exception:
|
|
943
|
+
return {"bytes": 0, "truncated": False, "content": "", "log_relpath": str(rel)}
|
|
944
|
+
|
|
945
|
+
text = ""
|
|
946
|
+
try:
|
|
947
|
+
text = data.decode("utf-8", errors="replace")
|
|
948
|
+
except Exception:
|
|
949
|
+
text = ""
|
|
950
|
+
return {"bytes": len(data), "truncated": bool(truncated), "content": text, "log_relpath": str(rel)}
|
|
951
|
+
|
|
952
|
+
# ----------------------------
|
|
953
|
+
# Internals
|
|
954
|
+
# ----------------------------
|
|
955
|
+
|
|
956
|
+
def _actions_for_spec(self, spec: ProcessSpec) -> List[str]:
|
|
957
|
+
if spec.kind == "self":
|
|
958
|
+
return ["restart", "redeploy", "logs"]
|
|
959
|
+
actions = ["logs"]
|
|
960
|
+
if spec.kind in {"service", "task"}:
|
|
961
|
+
actions = ["start", "stop", "restart", "logs"]
|
|
962
|
+
return actions
|
|
963
|
+
|
|
964
|
+
def _refresh_states_locked(self) -> None:
|
|
965
|
+
for pid in list(self._state.keys()):
|
|
966
|
+
self._refresh_one_locked(pid)
|
|
967
|
+
|
|
968
|
+
def _refresh_one_locked(self, process_id: str) -> None:
|
|
969
|
+
st = dict(self._state.get(process_id) or {})
|
|
970
|
+
pid = st.get("pid")
|
|
971
|
+
if isinstance(pid, int) and pid > 0:
|
|
972
|
+
if _is_pid_running(pid):
|
|
973
|
+
st["status"] = "running"
|
|
974
|
+
else:
|
|
975
|
+
st["status"] = "stopped"
|
|
976
|
+
st["pid"] = None
|
|
977
|
+
st.setdefault("stopped_at", _now_utc_iso())
|
|
978
|
+
self._state[process_id] = st
|
|
979
|
+
|
|
980
|
+
def _watch_process(self, process_id: str, proc: subprocess.Popen[bytes]) -> None:
|
|
981
|
+
rc: Optional[int] = None
|
|
982
|
+
try:
|
|
983
|
+
rc = proc.wait()
|
|
984
|
+
except Exception:
|
|
985
|
+
rc = None
|
|
986
|
+
finally:
|
|
987
|
+
with self._lock:
|
|
988
|
+
st = dict(self._state.get(process_id) or {})
|
|
989
|
+
st["status"] = "stopped"
|
|
990
|
+
st["pid"] = None
|
|
991
|
+
st["exit_code"] = int(rc) if isinstance(rc, int) else st.get("exit_code")
|
|
992
|
+
st["stopped_at"] = _now_utc_iso()
|
|
993
|
+
self._state[process_id] = st
|
|
994
|
+
self._procs.pop(process_id, None)
|
|
995
|
+
try:
|
|
996
|
+
self._save_state()
|
|
997
|
+
except Exception:
|
|
998
|
+
pass
|
|
999
|
+
|
|
1000
|
+
def _schedule_gateway_execv(self, *, delay_s: float) -> None:
|
|
1001
|
+
def _do() -> None:
|
|
1002
|
+
time.sleep(max(0.0, float(delay_s)))
|
|
1003
|
+
try:
|
|
1004
|
+
with self._lock:
|
|
1005
|
+
self._apply_env_overrides_to_environ_locked()
|
|
1006
|
+
except Exception:
|
|
1007
|
+
pass
|
|
1008
|
+
argv = list(sys.argv)
|
|
1009
|
+
exe = argv[0] if argv else ""
|
|
1010
|
+
# Prefer re-exec of the original entrypoint when possible.
|
|
1011
|
+
try:
|
|
1012
|
+
if exe and os.path.exists(exe) and os.access(exe, os.X_OK):
|
|
1013
|
+
os.execv(exe, argv)
|
|
1014
|
+
return
|
|
1015
|
+
except Exception:
|
|
1016
|
+
pass
|
|
1017
|
+
try:
|
|
1018
|
+
# Fallback: execute as a module (keeps compatibility with `python -m`).
|
|
1019
|
+
os.execv(sys.executable, [sys.executable, "-m", "abstractgateway.cli", *argv[1:]])
|
|
1020
|
+
except Exception:
|
|
1021
|
+
# Last resort: exit (requires external supervisor).
|
|
1022
|
+
os._exit(0)
|
|
1023
|
+
|
|
1024
|
+
t = threading.Thread(target=_do, daemon=True)
|
|
1025
|
+
t.start()
|
|
1026
|
+
|
|
1027
|
+
def _schedule_gateway_redeploy(self) -> None:
|
|
1028
|
+
def _do() -> None:
|
|
1029
|
+
try:
|
|
1030
|
+
st = self.start("build")
|
|
1031
|
+
except Exception:
|
|
1032
|
+
st = {}
|
|
1033
|
+
|
|
1034
|
+
# Wait for build to finish by polling state (works across the monitor thread).
|
|
1035
|
+
for _ in range(60 * 60): # up to 1h
|
|
1036
|
+
time.sleep(1.0)
|
|
1037
|
+
with self._lock:
|
|
1038
|
+
cur = dict(self._state.get("build") or {})
|
|
1039
|
+
if str(cur.get("status") or "").lower() != "running":
|
|
1040
|
+
exit_code = cur.get("exit_code")
|
|
1041
|
+
if isinstance(exit_code, int) and exit_code == 0:
|
|
1042
|
+
self._schedule_gateway_execv(delay_s=0.75)
|
|
1043
|
+
return
|
|
1044
|
+
|
|
1045
|
+
t = threading.Thread(target=_do, daemon=True)
|
|
1046
|
+
t.start()
|
|
1047
|
+
|
|
1048
|
+
|
|
1049
|
+
_PROCESS_MANAGER: Optional[ProcessManager] = None
|
|
1050
|
+
_PROCESS_MANAGER_LOCK = threading.Lock()
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
def get_process_manager(*, base_dir: Path, repo_root: Path) -> ProcessManager:
|
|
1054
|
+
global _PROCESS_MANAGER
|
|
1055
|
+
with _PROCESS_MANAGER_LOCK:
|
|
1056
|
+
resolved_base = Path(base_dir).expanduser().resolve()
|
|
1057
|
+
resolved_repo = Path(repo_root).expanduser().resolve()
|
|
1058
|
+
if _PROCESS_MANAGER is not None:
|
|
1059
|
+
if _PROCESS_MANAGER.base_dir == resolved_base and _PROCESS_MANAGER.repo_root == resolved_repo:
|
|
1060
|
+
return _PROCESS_MANAGER
|
|
1061
|
+
|
|
1062
|
+
specs = load_process_specs(repo_root=resolved_repo)
|
|
1063
|
+
_PROCESS_MANAGER = ProcessManager(base_dir=resolved_base, repo_root=resolved_repo, specs=specs)
|
|
1064
|
+
return _PROCESS_MANAGER
|