abstractgateway 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. abstractgateway/__init__.py +1 -2
  2. abstractgateway/__main__.py +7 -0
  3. abstractgateway/app.py +4 -4
  4. abstractgateway/cli.py +568 -8
  5. abstractgateway/config.py +15 -5
  6. abstractgateway/embeddings_config.py +45 -0
  7. abstractgateway/host_metrics.py +274 -0
  8. abstractgateway/hosts/bundle_host.py +528 -55
  9. abstractgateway/hosts/visualflow_host.py +30 -3
  10. abstractgateway/integrations/__init__.py +2 -0
  11. abstractgateway/integrations/email_bridge.py +782 -0
  12. abstractgateway/integrations/telegram_bridge.py +534 -0
  13. abstractgateway/maintenance/__init__.py +5 -0
  14. abstractgateway/maintenance/action_tokens.py +100 -0
  15. abstractgateway/maintenance/backlog_exec_runner.py +1592 -0
  16. abstractgateway/maintenance/backlog_parser.py +184 -0
  17. abstractgateway/maintenance/draft_generator.py +451 -0
  18. abstractgateway/maintenance/llm_assist.py +212 -0
  19. abstractgateway/maintenance/notifier.py +109 -0
  20. abstractgateway/maintenance/process_manager.py +1064 -0
  21. abstractgateway/maintenance/report_models.py +81 -0
  22. abstractgateway/maintenance/report_parser.py +219 -0
  23. abstractgateway/maintenance/text_similarity.py +123 -0
  24. abstractgateway/maintenance/triage.py +507 -0
  25. abstractgateway/maintenance/triage_queue.py +142 -0
  26. abstractgateway/migrate.py +155 -0
  27. abstractgateway/routes/__init__.py +2 -2
  28. abstractgateway/routes/gateway.py +10817 -179
  29. abstractgateway/routes/triage.py +118 -0
  30. abstractgateway/runner.py +689 -14
  31. abstractgateway/security/gateway_security.py +425 -110
  32. abstractgateway/service.py +213 -6
  33. abstractgateway/stores.py +64 -4
  34. abstractgateway/workflow_deprecations.py +225 -0
  35. abstractgateway-0.1.1.dist-info/METADATA +135 -0
  36. abstractgateway-0.1.1.dist-info/RECORD +40 -0
  37. abstractgateway-0.1.0.dist-info/METADATA +0 -101
  38. abstractgateway-0.1.0.dist-info/RECORD +0 -18
  39. {abstractgateway-0.1.0.dist-info → abstractgateway-0.1.1.dist-info}/WHEEL +0 -0
  40. {abstractgateway-0.1.0.dist-info → abstractgateway-0.1.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1064 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import json
5
+ import os
6
+ import re
7
+ import signal
8
+ import subprocess
9
+ import sys
10
+ import threading
11
+ import time
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+ from typing import Any, Dict, List, Optional
15
+ from urllib.parse import urlparse
16
+
17
+
18
+ _SAFE_ID_RE = re.compile(r"^[a-zA-Z0-9_-]+$")
19
+ _SAFE_ENV_KEY_RE = re.compile(r"^[A-Z][A-Z0-9_]*$")
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class ManagedEnvVarSpec:
24
+ key: str
25
+ label: str
26
+ description: str
27
+ category: str = "general"
28
+ secret: bool = False
29
+
30
+
31
+ def managed_env_var_allowlist() -> Dict[str, ManagedEnvVarSpec]:
32
+ """Allowlisted environment variables that can be set via the process manager UI.
33
+
34
+ Security rationale:
35
+ - Disallow arbitrary env var editing (PATH, LD_PRELOAD, PYTHONPATH, NODE_OPTIONS, etc).
36
+ - Treat stored values as secrets: never return them to HTTP clients.
37
+ """
38
+ specs = [
39
+ # Email (framework tools + bridges).
40
+ ManagedEnvVarSpec(
41
+ key="ABSTRACT_EMAIL_ACCOUNTS_CONFIG",
42
+ label="Email accounts config path",
43
+ description="Path to a YAML/JSON multi-account config file (e.g. /path/to/emails.yaml).",
44
+ category="email",
45
+ ),
46
+ ManagedEnvVarSpec(
47
+ key="ABSTRACT_EMAIL_SMTP_HOST",
48
+ label="SMTP host",
49
+ description="SMTP server hostname (e.g. smtp.gmail.com).",
50
+ category="email",
51
+ ),
52
+ ManagedEnvVarSpec(
53
+ key="ABSTRACT_EMAIL_SMTP_PORT",
54
+ label="SMTP port",
55
+ description="SMTP port (e.g. 587 for STARTTLS, 465 for implicit TLS).",
56
+ category="email",
57
+ ),
58
+ ManagedEnvVarSpec(
59
+ key="ABSTRACT_EMAIL_SMTP_USERNAME",
60
+ label="SMTP username",
61
+ description="SMTP username (often the email address).",
62
+ category="email",
63
+ ),
64
+ ManagedEnvVarSpec(
65
+ key="ABSTRACT_EMAIL_SMTP_PASSWORD_ENV_VAR",
66
+ label="SMTP password env var",
67
+ description="Name of the env var that contains the SMTP password (default: EMAIL_PASSWORD).",
68
+ category="email",
69
+ ),
70
+ ManagedEnvVarSpec(
71
+ key="ABSTRACT_EMAIL_SMTP_STARTTLS",
72
+ label="SMTP STARTTLS",
73
+ description="Whether to use STARTTLS for SMTP (true/false).",
74
+ category="email",
75
+ ),
76
+ ManagedEnvVarSpec(
77
+ key="ABSTRACT_EMAIL_FROM",
78
+ label="From email",
79
+ description="Default From address (used when the tool doesn't specify one).",
80
+ category="email",
81
+ ),
82
+ ManagedEnvVarSpec(
83
+ key="ABSTRACT_EMAIL_REPLY_TO",
84
+ label="Reply-To",
85
+ description="Optional default Reply-To address.",
86
+ category="email",
87
+ ),
88
+ ManagedEnvVarSpec(
89
+ key="ABSTRACT_EMAIL_DEFAULT_ACCOUNT",
90
+ label="Default account",
91
+ description="Default email account name (when multiple accounts exist).",
92
+ category="email",
93
+ ),
94
+ ManagedEnvVarSpec(
95
+ key="ABSTRACT_EMAIL_ACCOUNT_NAME",
96
+ label="Account name",
97
+ description="Optional account name label for env-based config (default: 'default').",
98
+ category="email",
99
+ ),
100
+ ManagedEnvVarSpec(
101
+ key="ABSTRACT_EMAIL_IMAP_HOST",
102
+ label="IMAP host",
103
+ description="IMAP server hostname.",
104
+ category="email",
105
+ ),
106
+ ManagedEnvVarSpec(
107
+ key="ABSTRACT_EMAIL_IMAP_PORT",
108
+ label="IMAP port",
109
+ description="IMAP port (default: 993).",
110
+ category="email",
111
+ ),
112
+ ManagedEnvVarSpec(
113
+ key="ABSTRACT_EMAIL_IMAP_USERNAME",
114
+ label="IMAP username",
115
+ description="IMAP username (often the email address).",
116
+ category="email",
117
+ ),
118
+ ManagedEnvVarSpec(
119
+ key="ABSTRACT_EMAIL_IMAP_PASSWORD_ENV_VAR",
120
+ label="IMAP password env var",
121
+ description="Name of the env var that contains the IMAP password (default: EMAIL_PASSWORD).",
122
+ category="email",
123
+ ),
124
+ ManagedEnvVarSpec(
125
+ key="ABSTRACT_EMAIL_IMAP_FOLDER",
126
+ label="IMAP folder",
127
+ description="Mailbox folder to poll (default: INBOX).",
128
+ category="email",
129
+ ),
130
+ ManagedEnvVarSpec(
131
+ key="EMAIL_PASSWORD",
132
+ label="EMAIL_PASSWORD",
133
+ description="Email account password (referenced by *_PASSWORD_ENV_VAR by default).",
134
+ category="email",
135
+ secret=True,
136
+ ),
137
+ ]
138
+
139
+ out: Dict[str, ManagedEnvVarSpec] = {}
140
+ for s in specs:
141
+ k = str(s.key or "").strip()
142
+ if not k or not _SAFE_ENV_KEY_RE.match(k):
143
+ raise ValueError(f"Invalid allowlisted env var key: {k!r}")
144
+ out[k] = s
145
+ return out
146
+
147
+
148
+ def _now_utc_iso() -> str:
149
+ return datetime.datetime.now(datetime.timezone.utc).isoformat()
150
+
151
+
152
+ def _ts_compact_utc() -> str:
153
+ return datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%SZ")
154
+
155
+
156
+ def _is_pid_running(pid: int) -> bool:
157
+ if not isinstance(pid, int) or pid <= 0:
158
+ return False
159
+ try:
160
+ os.kill(pid, 0)
161
+ return True
162
+ except Exception:
163
+ return False
164
+
165
+
166
+ def _pid_commandline(pid: int) -> str:
167
+ if not isinstance(pid, int) or pid <= 0:
168
+ return ""
169
+ try:
170
+ proc = subprocess.run(
171
+ # Use wide output so long commandlines (node/uvicorn) aren't truncated.
172
+ # This is critical for UAT stop safety checks that match on ports/markers.
173
+ ["ps", "-ww", "-p", str(pid), "-o", "command="],
174
+ stdout=subprocess.PIPE,
175
+ stderr=subprocess.DEVNULL,
176
+ text=True,
177
+ timeout=1.0,
178
+ check=False,
179
+ )
180
+ except Exception:
181
+ return ""
182
+ return str(proc.stdout or "").strip()
183
+
184
+
185
+ def _expected_port_from_url(url: Optional[str]) -> Optional[int]:
186
+ s = str(url or "").strip()
187
+ if not s:
188
+ return None
189
+ try:
190
+ u = urlparse(s)
191
+ except Exception:
192
+ return None
193
+ if u.port is None:
194
+ return None
195
+ try:
196
+ return int(u.port)
197
+ except Exception:
198
+ return None
199
+
200
+
201
+ def _default_shell() -> str:
202
+ return str(os.environ.get("SHELL") or "/bin/bash")
203
+
204
+
205
+ @dataclass(frozen=True)
206
+ class ProcessSpec:
207
+ id: str
208
+ label: str
209
+ kind: str = "service" # service|task|self
210
+ description: Optional[str] = None
211
+ cwd: str = "."
212
+ command: List[str] = field(default_factory=list)
213
+ env: Dict[str, str] = field(default_factory=dict)
214
+ url: Optional[str] = None
215
+
216
+ def validate(self) -> None:
217
+ pid = str(self.id or "").strip()
218
+ if not pid or not _SAFE_ID_RE.match(pid):
219
+ raise ValueError(f"Invalid process id: {self.id!r}")
220
+ if self.kind not in {"service", "task", "self"}:
221
+ raise ValueError(f"Invalid process kind: {self.kind!r}")
222
+ if self.kind != "self":
223
+ if not isinstance(self.command, list) or not self.command or not all(isinstance(x, str) and x.strip() for x in self.command):
224
+ raise ValueError(f"Invalid command for process {pid!r}")
225
+
226
+
227
+ def default_process_specs(*, repo_root: Path) -> Dict[str, ProcessSpec]:
228
+ """Default managed processes for the monorepo dev topology."""
229
+ return {
230
+ "gateway": ProcessSpec(
231
+ id="gateway",
232
+ label="AbstractGateway (this process)",
233
+ kind="self",
234
+ description="Gateway API + (optional) runner. Supports restart/redeploy.",
235
+ cwd=".",
236
+ command=[],
237
+ url=None,
238
+ ),
239
+ "gateway_uat": ProcessSpec(
240
+ id="gateway_uat",
241
+ label="AbstractGateway (UAT)",
242
+ kind="service",
243
+ description="Gateway running candidate code from untracked/backlog_exec_uat/current.",
244
+ cwd=".",
245
+ env={
246
+ # Pin defaults to prevent accidental env leakage from the operator shell.
247
+ "ABSTRACTGATEWAY_UAT_PORT": "6081",
248
+ "ABSTRACTGATEWAY_UAT_DATA_DIR": "runtime/gateway_uat",
249
+ "ABSTRACTGATEWAY_UAT_REPO_ROOT": "untracked/backlog_exec_uat/current",
250
+ # UAT should not execute backlog jobs (only the prod gateway should).
251
+ "ABSTRACTGATEWAY_BACKLOG_EXEC_RUNNER": "0",
252
+ },
253
+ command=[_default_shell(), "-lc", "./agw-uat.sh"],
254
+ url="http://localhost:6081",
255
+ ),
256
+ "build": ProcessSpec(
257
+ id="build",
258
+ label="build.sh (deps install)",
259
+ kind="task",
260
+ description="Runs the repo build script (pip/npm installs).",
261
+ cwd=".",
262
+ command=[_default_shell(), "-lc", "./build.sh"],
263
+ ),
264
+ "abstractobserver": ProcessSpec(
265
+ id="abstractobserver",
266
+ label="AbstractObserver (web)",
267
+ kind="service",
268
+ description="Vite dev server.",
269
+ cwd="abstractobserver",
270
+ command=["npm", "run", "dev"],
271
+ url="http://localhost:3001",
272
+ ),
273
+ "abstractobserver_uat": ProcessSpec(
274
+ id="abstractobserver_uat",
275
+ label="AbstractObserver (web, UAT)",
276
+ kind="service",
277
+ description="Vite dev server from untracked/backlog_exec_uat/current.",
278
+ cwd=".",
279
+ env={
280
+ "ABSTRACTOBSERVER_UAT_PORT": "6082",
281
+ "ABSTRACTGATEWAY_UAT_REPO_ROOT": "untracked/backlog_exec_uat/current",
282
+ },
283
+ command=[_default_shell(), "-lc", "./aobs-uat.sh"],
284
+ url="http://localhost:6082",
285
+ ),
286
+ "abstractcode_web": ProcessSpec(
287
+ id="abstractcode_web",
288
+ label="AbstractCode Web",
289
+ kind="service",
290
+ description="Vite dev server.",
291
+ cwd="abstractcode/web",
292
+ command=["npm", "run", "dev"],
293
+ url="http://localhost:3002",
294
+ ),
295
+ "abstractcode_web_uat": ProcessSpec(
296
+ id="abstractcode_web_uat",
297
+ label="AbstractCode Web (UAT)",
298
+ kind="service",
299
+ description="Vite dev server from untracked/backlog_exec_uat/current.",
300
+ cwd=".",
301
+ env={
302
+ "ABSTRACTCODE_WEB_UAT_PORT": "6083",
303
+ "ABSTRACTGATEWAY_UAT_REPO_ROOT": "untracked/backlog_exec_uat/current",
304
+ },
305
+ command=[_default_shell(), "-lc", "./acode-web-uat.sh"],
306
+ url="http://localhost:6083",
307
+ ),
308
+ "abstractflow_frontend": ProcessSpec(
309
+ id="abstractflow_frontend",
310
+ label="AbstractFlow Web (frontend)",
311
+ kind="service",
312
+ description="Vite dev server.",
313
+ cwd="abstractflow/web/frontend",
314
+ command=["npm", "run", "dev"],
315
+ url="http://localhost:3003",
316
+ ),
317
+ "abstractflow_frontend_uat": ProcessSpec(
318
+ id="abstractflow_frontend_uat",
319
+ label="AbstractFlow Web (frontend, UAT)",
320
+ kind="service",
321
+ description="Vite dev server from untracked/backlog_exec_uat/current.",
322
+ cwd=".",
323
+ env={
324
+ "ABSTRACTFLOW_FRONTEND_UAT_PORT": "6084",
325
+ "ABSTRACTFLOW_BACKEND_UAT_PORT": "6080",
326
+ "ABSTRACTGATEWAY_UAT_REPO_ROOT": "untracked/backlog_exec_uat/current",
327
+ },
328
+ command=[_default_shell(), "-lc", "./aflow-frontend-uat.sh"],
329
+ url="http://localhost:6084",
330
+ ),
331
+ "abstractflow_backend": ProcessSpec(
332
+ id="abstractflow_backend",
333
+ label="AbstractFlow Web (backend)",
334
+ kind="service",
335
+ description="FastAPI backend (uvicorn).",
336
+ cwd="abstractflow/web",
337
+ command=[sys.executable, "-m", "backend", "--host", "0.0.0.0", "--port", "8080", "--reload"],
338
+ url="http://localhost:8080",
339
+ ),
340
+ "abstractflow_backend_uat": ProcessSpec(
341
+ id="abstractflow_backend_uat",
342
+ label="AbstractFlow Web (backend, UAT)",
343
+ kind="service",
344
+ description="FastAPI backend from untracked/backlog_exec_uat/current.",
345
+ cwd=".",
346
+ env={
347
+ "ABSTRACTFLOW_BACKEND_UAT_PORT": "6080",
348
+ "ABSTRACTFLOW_RUNTIME_DIR": "runtime/abstractflow_uat",
349
+ "ABSTRACTGATEWAY_UAT_REPO_ROOT": "untracked/backlog_exec_uat/current",
350
+ },
351
+ command=[_default_shell(), "-lc", "./aflow-backend-uat.sh"],
352
+ url="http://localhost:6080",
353
+ ),
354
+ }
355
+
356
+
357
+ def _load_specs_from_path(*, repo_root: Path, config_path: Path) -> Dict[str, ProcessSpec]:
358
+ raw = config_path.read_text(encoding="utf-8", errors="replace")
359
+ obj = json.loads(raw)
360
+ if not isinstance(obj, dict):
361
+ raise ValueError("process manager config must be a JSON object")
362
+ processes = obj.get("processes")
363
+ if not isinstance(processes, list):
364
+ raise ValueError("process manager config must contain 'processes' (list)")
365
+
366
+ out: Dict[str, ProcessSpec] = {}
367
+ for p in processes:
368
+ if not isinstance(p, dict):
369
+ continue
370
+ pid = str(p.get("id") or "").strip()
371
+ if not pid or not _SAFE_ID_RE.match(pid):
372
+ raise ValueError(f"Invalid process id in config: {pid!r}")
373
+ label = str(p.get("label") or pid).strip() or pid
374
+ kind = str(p.get("kind") or "service").strip().lower() or "service"
375
+ cwd_raw = p.get("cwd")
376
+ cwd = str(cwd_raw if cwd_raw is not None else ".").strip() or "."
377
+ # Security guardrail: treat cwd as repo-relative to avoid arbitrary host path execution.
378
+ if os.path.isabs(cwd):
379
+ raise ValueError(f"Process {pid!r} cwd must be relative to repo_root")
380
+
381
+ cmd = p.get("command")
382
+ command: List[str] = []
383
+ if isinstance(cmd, list):
384
+ command = [str(x) for x in cmd if isinstance(x, (str, int, float)) and str(x).strip()]
385
+ elif isinstance(cmd, str) and cmd.strip():
386
+ command = [cmd.strip()]
387
+
388
+ env_raw = p.get("env")
389
+ env: Dict[str, str] = {}
390
+ if isinstance(env_raw, dict):
391
+ for k, v in env_raw.items():
392
+ ks = str(k or "").strip()
393
+ if not ks:
394
+ continue
395
+ env[ks] = str(v if v is not None else "")
396
+
397
+ spec = ProcessSpec(
398
+ id=pid,
399
+ label=label,
400
+ kind=kind,
401
+ description=str(p.get("description") or "").strip() or None,
402
+ cwd=cwd,
403
+ command=command,
404
+ env=env,
405
+ url=str(p.get("url") or "").strip() or None,
406
+ )
407
+ spec.validate()
408
+ out[pid] = spec
409
+
410
+ # Always include the gateway self entry so UIs can restart/redeploy.
411
+ out.setdefault(
412
+ "gateway",
413
+ ProcessSpec(
414
+ id="gateway",
415
+ label="AbstractGateway (this process)",
416
+ kind="self",
417
+ description="Gateway API + (optional) runner. Supports restart/redeploy.",
418
+ cwd=".",
419
+ command=[],
420
+ ),
421
+ )
422
+ return out
423
+
424
+
425
+ def load_process_specs(*, repo_root: Path) -> Dict[str, ProcessSpec]:
426
+ cfg_path = str(os.getenv("ABSTRACTGATEWAY_PROCESS_MANAGER_CONFIG") or "").strip()
427
+ if not cfg_path:
428
+ specs = default_process_specs(repo_root=repo_root)
429
+ for s in specs.values():
430
+ s.validate()
431
+ return specs
432
+ path = Path(cfg_path).expanduser().resolve()
433
+ return _load_specs_from_path(repo_root=repo_root, config_path=path)
434
+
435
+
436
+ class ProcessManager:
437
+ def __init__(self, *, base_dir: Path, repo_root: Path, specs: Dict[str, ProcessSpec]):
438
+ self._base_dir = Path(base_dir).expanduser().resolve()
439
+ self._repo_root = Path(repo_root).expanduser().resolve()
440
+ self._specs = dict(specs)
441
+ self._managed_env_specs = managed_env_var_allowlist()
442
+ self._lock = threading.Lock()
443
+ self._procs: Dict[str, subprocess.Popen[bytes]] = {}
444
+
445
+ self._state_dir = (self._base_dir / "process_manager").resolve()
446
+ self._logs_dir = (self._base_dir / "process_logs").resolve()
447
+ self._state_path = (self._state_dir / "state.json").resolve()
448
+ self._env_overrides_path = (self._state_dir / "env_overrides.json").resolve()
449
+
450
+ self._state_dir.mkdir(parents=True, exist_ok=True)
451
+ self._logs_dir.mkdir(parents=True, exist_ok=True)
452
+
453
+ self._state: Dict[str, Dict[str, Any]] = self._load_state()
454
+ self._env_overrides_error: Optional[str] = None
455
+ self._env_overrides: Dict[str, Dict[str, Any]] = self._load_env_overrides()
456
+
457
+ # Apply persisted overrides to this gateway process early so runtime
458
+ # integrations that read os.getenv() observe the configured values.
459
+ try:
460
+ with self._lock:
461
+ self._apply_env_overrides_to_environ_locked()
462
+ except Exception:
463
+ pass
464
+
465
+ @property
466
+ def base_dir(self) -> Path:
467
+ return self._base_dir
468
+
469
+ @property
470
+ def repo_root(self) -> Path:
471
+ return self._repo_root
472
+
473
+ # ----------------------------
474
+ # State I/O
475
+ # ----------------------------
476
+
477
+ def _load_state(self) -> Dict[str, Dict[str, Any]]:
478
+ if not self._state_path.exists():
479
+ return {}
480
+ try:
481
+ raw = self._state_path.read_text(encoding="utf-8", errors="replace")
482
+ obj = json.loads(raw)
483
+ except Exception:
484
+ return {}
485
+ if not isinstance(obj, dict):
486
+ return {}
487
+ procs = obj.get("processes")
488
+ if not isinstance(procs, dict):
489
+ return {}
490
+ out: Dict[str, Dict[str, Any]] = {}
491
+ for k, v in procs.items():
492
+ pid = str(k or "").strip()
493
+ if not pid or not _SAFE_ID_RE.match(pid) or not isinstance(v, dict):
494
+ continue
495
+ out[pid] = dict(v)
496
+ return out
497
+
498
+ def _save_state(self) -> None:
499
+ tmp = self._state_path.with_suffix(".tmp")
500
+ obj = {"version": 1, "updated_at": _now_utc_iso(), "processes": self._state}
501
+ data = json.dumps(obj, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
502
+ tmp.write_text(data, encoding="utf-8")
503
+ tmp.replace(self._state_path)
504
+
505
+ # ----------------------------
506
+ # Managed env overrides (write-only)
507
+ # ----------------------------
508
+
509
+ def _load_env_overrides(self) -> Dict[str, Dict[str, Any]]:
510
+ self._env_overrides_error = None
511
+ if not self._env_overrides_path.exists():
512
+ return {}
513
+ try:
514
+ raw = self._env_overrides_path.read_text(encoding="utf-8", errors="replace")
515
+ obj = json.loads(raw)
516
+ except Exception as e:
517
+ self._env_overrides_error = f"Failed to read env_overrides.json: {e}"
518
+ return {}
519
+ if not isinstance(obj, dict):
520
+ self._env_overrides_error = "env_overrides.json must be a JSON object"
521
+ return {}
522
+ raw_vars = obj.get("vars")
523
+ if not isinstance(raw_vars, dict):
524
+ self._env_overrides_error = "env_overrides.json must contain 'vars' (object)"
525
+ return {}
526
+
527
+ out: Dict[str, Dict[str, Any]] = {}
528
+ for k, v in raw_vars.items():
529
+ key = str(k or "").strip()
530
+ if not key or not _SAFE_ENV_KEY_RE.match(key):
531
+ continue
532
+ if not isinstance(v, dict):
533
+ continue
534
+ enabled = v.get("enabled")
535
+ out[key] = {
536
+ "enabled": bool(enabled) if isinstance(enabled, bool) else True,
537
+ "value": str(v.get("value") if v.get("value") is not None else ""),
538
+ "updated_at": str(v.get("updated_at") or "").strip() or None,
539
+ }
540
+ return out
541
+
542
+ def _save_env_overrides(self) -> None:
543
+ tmp = self._env_overrides_path.with_suffix(".tmp")
544
+ obj = {"version": 1, "updated_at": _now_utc_iso(), "vars": self._env_overrides}
545
+ data = json.dumps(obj, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
546
+ tmp.write_text(data, encoding="utf-8")
547
+
548
+ # Best-effort: keep secrets readable only by the current user.
549
+ try:
550
+ os.chmod(tmp, 0o600)
551
+ except Exception:
552
+ pass
553
+
554
+ tmp.replace(self._env_overrides_path)
555
+ try:
556
+ os.chmod(self._env_overrides_path, 0o600)
557
+ except Exception:
558
+ pass
559
+
560
+ # ----------------------------
561
+ # Public API
562
+ # ----------------------------
563
+
564
+ def list_processes(self) -> List[Dict[str, Any]]:
565
+ with self._lock:
566
+ self._refresh_states_locked()
567
+ out: List[Dict[str, Any]] = []
568
+ for pid, spec in sorted(self._specs.items(), key=lambda kv: kv[0]):
569
+ st = dict(self._state.get(pid) or {})
570
+ info = {
571
+ "id": pid,
572
+ "label": spec.label,
573
+ "kind": spec.kind,
574
+ "description": spec.description,
575
+ "cwd": spec.cwd,
576
+ "command": list(spec.command) if spec.kind != "self" else [],
577
+ "url": spec.url,
578
+ "status": str(st.get("status") or ("running" if spec.kind == "self" else "stopped")),
579
+ "pid": st.get("pid"),
580
+ "started_at": st.get("started_at"),
581
+ "stopped_at": st.get("stopped_at"),
582
+ "exit_code": st.get("exit_code"),
583
+ "log_relpath": st.get("log_relpath"),
584
+ "last_error": st.get("last_error"),
585
+ "actions": self._actions_for_spec(spec),
586
+ }
587
+ if spec.kind == "self":
588
+ info["pid"] = os.getpid()
589
+ info["status"] = "running"
590
+ out.append(info)
591
+ return out
592
+
593
+ def list_managed_env_vars(self) -> Dict[str, Any]:
594
+ """Return allowlisted env vars metadata without exposing values."""
595
+ with self._lock:
596
+ error = self._env_overrides_error
597
+ out: List[Dict[str, Any]] = []
598
+ for key, spec in sorted(self._managed_env_specs.items(), key=lambda kv: kv[0]):
599
+ rec = self._env_overrides.get(key) if isinstance(self._env_overrides, dict) else None
600
+ source = "missing"
601
+ updated_at: Optional[str] = None
602
+ if isinstance(rec, dict):
603
+ enabled0 = rec.get("enabled")
604
+ enabled = bool(enabled0) if isinstance(enabled0, bool) else True
605
+ updated_at = str(rec.get("updated_at") or "").strip() or None
606
+ source = "override" if enabled else "unset"
607
+ else:
608
+ v = os.getenv(key)
609
+ if v is not None and str(v).strip() != "":
610
+ source = "inherited"
611
+ elif v is not None:
612
+ source = "inherited_empty"
613
+
614
+ is_set = source in {"override", "inherited", "inherited_empty"}
615
+ out.append(
616
+ {
617
+ "key": key,
618
+ "label": spec.label,
619
+ "description": spec.description,
620
+ "category": spec.category,
621
+ "secret": bool(spec.secret),
622
+ "is_set": bool(is_set),
623
+ "source": source,
624
+ "updated_at": updated_at,
625
+ }
626
+ )
627
+ return {"ok": True, "error": error, "vars": out}
628
+
629
+ def update_managed_env_vars(self, *, set_vars: Dict[str, str], unset: List[str]) -> Dict[str, Any]:
630
+ set_vars = dict(set_vars or {})
631
+ unset = list(unset or [])
632
+
633
+ # Validate keys early to avoid persisting partial updates.
634
+ normalized_set: Dict[str, str] = {}
635
+ for k, v in set_vars.items():
636
+ key = str(k or "").strip()
637
+ if not key or not _SAFE_ENV_KEY_RE.match(key):
638
+ raise ValueError(f"Invalid env var key: {key!r}")
639
+ if key not in self._managed_env_specs:
640
+ raise ValueError(f"Env var key not allowlisted: {key}")
641
+ value = "" if v is None else str(v)
642
+ if "\x00" in value:
643
+ raise ValueError(f"Invalid env var value for {key}: contains NUL byte")
644
+ if len(value.encode("utf-8", errors="replace")) > 16_384:
645
+ raise ValueError(f"Env var value too large for {key} (max 16KB)")
646
+ normalized_set[key] = value
647
+
648
+ normalized_unset: List[str] = []
649
+ for k in unset:
650
+ key = str(k or "").strip()
651
+ if not key or not _SAFE_ENV_KEY_RE.match(key):
652
+ raise ValueError(f"Invalid env var key: {key!r}")
653
+ if key not in self._managed_env_specs:
654
+ raise ValueError(f"Env var key not allowlisted: {key}")
655
+ normalized_unset.append(key)
656
+
657
+ overlap = set(normalized_set.keys()) & set(normalized_unset)
658
+ if overlap:
659
+ keys = ", ".join(sorted(overlap))
660
+ raise ValueError(f"Env vars cannot be both set and unset in the same request: {keys}")
661
+
662
+ if not normalized_set and not normalized_unset:
663
+ raise ValueError("No env var updates provided (set/unset)")
664
+
665
+ if len(normalized_set) + len(normalized_unset) > 64:
666
+ raise ValueError("Too many env vars in one request (max 64)")
667
+
668
+ now = _now_utc_iso()
669
+ with self._lock:
670
+ for key, value in normalized_set.items():
671
+ self._env_overrides[key] = {"enabled": True, "value": value, "updated_at": now}
672
+ for key in normalized_unset:
673
+ # Security: clear the stored value when unsetting (avoid lingering secrets on disk).
674
+ self._env_overrides[key] = {"enabled": False, "value": "", "updated_at": now}
675
+
676
+ self._save_env_overrides()
677
+ self._env_overrides_error = None
678
+
679
+ # Apply immediately to this gateway process environment. This is safe
680
+ # because keys are strictly allowlisted.
681
+ self._apply_env_overrides_to_environ_locked()
682
+
683
+ # Return a fresh view (still metadata-only).
684
+ return self.list_managed_env_vars()
685
+
686
+ def _apply_env_overrides_to_environ_locked(self) -> None:
687
+ for key in self._managed_env_specs.keys():
688
+ rec = self._env_overrides.get(key) if isinstance(self._env_overrides, dict) else None
689
+ if not isinstance(rec, dict):
690
+ continue
691
+ enabled0 = rec.get("enabled")
692
+ enabled = bool(enabled0) if isinstance(enabled0, bool) else True
693
+ if enabled:
694
+ os.environ[key] = str(rec.get("value") if rec.get("value") is not None else "")
695
+ else:
696
+ os.environ.pop(key, None)
697
+
698
+ def _apply_env_overrides_to_dict_locked(self, env: Dict[str, str]) -> None:
699
+ for key in self._managed_env_specs.keys():
700
+ rec = self._env_overrides.get(key) if isinstance(self._env_overrides, dict) else None
701
+ if not isinstance(rec, dict):
702
+ continue
703
+ enabled0 = rec.get("enabled")
704
+ enabled = bool(enabled0) if isinstance(enabled0, bool) else True
705
+ if enabled:
706
+ env[key] = str(rec.get("value") if rec.get("value") is not None else "")
707
+ else:
708
+ env.pop(key, None)
709
+
710
+ def start(self, process_id: str) -> Dict[str, Any]:
711
+ pid = str(process_id or "").strip()
712
+ if not pid or pid not in self._specs:
713
+ raise KeyError(f"Unknown process id: {pid}")
714
+ spec = self._specs[pid]
715
+ if spec.kind == "self":
716
+ raise ValueError("Cannot start a self-managed process")
717
+
718
+ with self._lock:
719
+ self._refresh_one_locked(pid)
720
+ st = dict(self._state.get(pid) or {})
721
+ if str(st.get("status") or "").strip().lower() == "running" and isinstance(st.get("pid"), int):
722
+ return dict(st)
723
+
724
+ ts = _ts_compact_utc()
725
+ log_name = f"{pid}.{ts}.log"
726
+ log_path = (self._logs_dir / log_name).resolve()
727
+ log_relpath = str(log_path.relative_to(self._base_dir))
728
+
729
+ cwd_path = (self._repo_root / spec.cwd).resolve()
730
+ try:
731
+ cwd_path.relative_to(self._repo_root)
732
+ except Exception as e:
733
+ raise ValueError(f"Process cwd must be under repo_root: {e}")
734
+ if not cwd_path.exists():
735
+ raise FileNotFoundError(f"cwd does not exist: {cwd_path}")
736
+
737
+ env = dict(os.environ)
738
+ self._apply_env_overrides_to_dict_locked(env)
739
+ for k, v in (spec.env or {}).items():
740
+ env[str(k)] = str(v)
741
+
742
+ # Ensure the subprocess is in its own process group so we can stop it cleanly.
743
+ f = open(log_path, "ab", buffering=0)
744
+ try:
745
+ proc = subprocess.Popen(
746
+ list(spec.command),
747
+ cwd=str(cwd_path),
748
+ env=env,
749
+ stdin=subprocess.DEVNULL,
750
+ stdout=f,
751
+ stderr=subprocess.STDOUT,
752
+ start_new_session=True,
753
+ )
754
+ except Exception:
755
+ f.close()
756
+ raise
757
+ finally:
758
+ # The child keeps its own fd; close our handle to avoid leaking descriptors.
759
+ try:
760
+ f.close()
761
+ except Exception:
762
+ pass
763
+
764
+ self._procs[pid] = proc
765
+ st2 = {
766
+ "status": "running",
767
+ "pid": int(proc.pid),
768
+ "started_at": _now_utc_iso(),
769
+ "stopped_at": None,
770
+ "exit_code": None,
771
+ "log_relpath": log_relpath,
772
+ "last_error": None,
773
+ }
774
+ self._state[pid] = st2
775
+ self._save_state()
776
+
777
+ t = threading.Thread(target=self._watch_process, args=(pid, proc), daemon=True)
778
+ t.start()
779
+
780
+ return dict(st2)
781
+
782
+ def stop(self, process_id: str, *, timeout_s: float = 6.0) -> Dict[str, Any]:
783
+ pid = str(process_id or "").strip()
784
+ if not pid or pid not in self._specs:
785
+ raise KeyError(f"Unknown process id: {pid}")
786
+ spec = self._specs[pid]
787
+ if spec.kind == "self":
788
+ raise ValueError("Cannot stop a self-managed process (use restart or an external supervisor)")
789
+
790
+ with self._lock:
791
+ self._refresh_one_locked(pid)
792
+ st = dict(self._state.get(pid) or {})
793
+ proc_pid = st.get("pid")
794
+ if not isinstance(proc_pid, int) or proc_pid <= 0 or str(st.get("status") or "").lower() != "running":
795
+ st["status"] = "stopped"
796
+ st["pid"] = None
797
+ self._state[pid] = st
798
+ self._save_state()
799
+ return dict(st)
800
+
801
+ # Safety: UAT processes are frequently restarted and operator-triggered; if state is stale or
802
+ # the PID was re-used by an unrelated process, stopping could terminate the wrong service.
803
+ #
804
+ # For UAT processes, require a best-effort commandline sanity check. We accept either:
805
+ # - the expected port (from spec.url), OR
806
+ # - the UAT launch script name (from spec.command) when present.
807
+ if pid.endswith("_uat"):
808
+ expected_port = _expected_port_from_url(spec.url)
809
+ expected_marker = ""
810
+ try:
811
+ last = str((spec.command or [])[-1] or "").strip()
812
+ if last:
813
+ name = Path(last).name
814
+ if name and ("uat" in name.lower() or name.lower().endswith(".sh")):
815
+ expected_marker = name
816
+ except Exception:
817
+ expected_marker = ""
818
+
819
+ cmdline = _pid_commandline(proc_pid)
820
+ if not cmdline:
821
+ st["status"] = "error"
822
+ st["last_error"] = (
823
+ f"Refusing to stop pid={proc_pid}: cannot read commandline via ps "
824
+ f"(expected port {expected_port}, marker {expected_marker!r})"
825
+ )
826
+ self._state[pid] = st
827
+ self._save_state()
828
+ return dict(st)
829
+
830
+ ok = False
831
+ if isinstance(expected_port, int) and expected_port > 0 and str(expected_port) in cmdline:
832
+ ok = True
833
+ if expected_marker and expected_marker in cmdline:
834
+ ok = True
835
+
836
+ if not ok:
837
+ st["status"] = "error"
838
+ st["last_error"] = (
839
+ f"Refusing to stop pid={proc_pid}: commandline does not match expected UAT markers. "
840
+ f"expected_port={expected_port}, marker={expected_marker!r}, cmd={cmdline[:240]!r}"
841
+ )
842
+ self._state[pid] = st
843
+ self._save_state()
844
+ return dict(st)
845
+
846
+ # Best-effort: terminate the process group.
847
+ try:
848
+ os.killpg(proc_pid, signal.SIGTERM)
849
+ except Exception:
850
+ try:
851
+ os.kill(proc_pid, signal.SIGTERM)
852
+ except Exception:
853
+ pass
854
+
855
+ # Wait outside the lock.
856
+ end = time.time() + max(0.25, float(timeout_s))
857
+ while time.time() < end:
858
+ if not _is_pid_running(proc_pid):
859
+ break
860
+ time.sleep(0.05)
861
+
862
+ # Escalate if needed.
863
+ if _is_pid_running(proc_pid):
864
+ try:
865
+ os.killpg(proc_pid, signal.SIGKILL)
866
+ except Exception:
867
+ try:
868
+ os.kill(proc_pid, signal.SIGKILL)
869
+ except Exception:
870
+ pass
871
+
872
+ with self._lock:
873
+ self._refresh_one_locked(pid)
874
+ st2 = dict(self._state.get(pid) or {})
875
+ st2.setdefault("stopped_at", _now_utc_iso())
876
+ st2["status"] = "stopped"
877
+ st2["pid"] = None
878
+ self._state[pid] = st2
879
+ self._save_state()
880
+ return dict(st2)
881
+
882
+ def restart(self, process_id: str) -> Dict[str, Any]:
883
+ pid = str(process_id or "").strip()
884
+ spec = self._specs.get(pid)
885
+ if spec is None:
886
+ raise KeyError(f"Unknown process id: {pid}")
887
+ if spec.kind == "self":
888
+ return self.restart_self()
889
+ try:
890
+ st = self.stop(pid)
891
+ if isinstance(st, dict) and str(st.get("status") or "").strip().lower() == "error":
892
+ return dict(st)
893
+ except Exception:
894
+ # Continue with best-effort restart for non-UAT processes only.
895
+ if pid.endswith("_uat"):
896
+ raise
897
+ return self.start(pid)
898
+
899
+ def restart_self(self) -> Dict[str, Any]:
900
+ # Reply immediately; the actual exec happens async.
901
+ self._schedule_gateway_execv(delay_s=0.75)
902
+ return {"status": "restarting", "scheduled_at": _now_utc_iso()}
903
+
904
+ def redeploy_gateway(self) -> Dict[str, Any]:
905
+ """Run build, then restart the gateway on success (best-effort)."""
906
+ self._schedule_gateway_redeploy()
907
+ return {"status": "redeploy_scheduled", "scheduled_at": _now_utc_iso()}
908
+
909
+ def log_tail(self, process_id: str, *, max_bytes: int = 80_000) -> Dict[str, Any]:
910
+ pid = str(process_id or "").strip()
911
+ if not pid or pid not in self._specs:
912
+ raise KeyError(f"Unknown process id: {pid}")
913
+
914
+ with self._lock:
915
+ self._refresh_one_locked(pid)
916
+ st = dict(self._state.get(pid) or {})
917
+ rel = st.get("log_relpath")
918
+ # Special case: gateway "self" logs map to the audit log by default.
919
+ if pid == "gateway" and (not isinstance(rel, str) or not rel.strip()):
920
+ rel = "audit_log.jsonl"
921
+ if not isinstance(rel, str) or not rel.strip():
922
+ return {"bytes": 0, "truncated": False, "content": "", "log_relpath": None}
923
+ path = (self._base_dir / rel).resolve()
924
+ try:
925
+ path.relative_to(self._base_dir)
926
+ except Exception:
927
+ return {"bytes": 0, "truncated": False, "content": "", "log_relpath": None}
928
+
929
+ if not path.exists():
930
+ return {"bytes": 0, "truncated": False, "content": "", "log_relpath": str(rel)}
931
+
932
+ data = b""
933
+ truncated = False
934
+ try:
935
+ with open(path, "rb") as f:
936
+ f.seek(0, os.SEEK_END)
937
+ size = int(f.tell() or 0)
938
+ start = max(0, size - int(max_bytes))
939
+ truncated = start > 0
940
+ f.seek(start, os.SEEK_SET)
941
+ data = f.read(int(max_bytes))
942
+ except Exception:
943
+ return {"bytes": 0, "truncated": False, "content": "", "log_relpath": str(rel)}
944
+
945
+ text = ""
946
+ try:
947
+ text = data.decode("utf-8", errors="replace")
948
+ except Exception:
949
+ text = ""
950
+ return {"bytes": len(data), "truncated": bool(truncated), "content": text, "log_relpath": str(rel)}
951
+
952
+ # ----------------------------
953
+ # Internals
954
+ # ----------------------------
955
+
956
+ def _actions_for_spec(self, spec: ProcessSpec) -> List[str]:
957
+ if spec.kind == "self":
958
+ return ["restart", "redeploy", "logs"]
959
+ actions = ["logs"]
960
+ if spec.kind in {"service", "task"}:
961
+ actions = ["start", "stop", "restart", "logs"]
962
+ return actions
963
+
964
+ def _refresh_states_locked(self) -> None:
965
+ for pid in list(self._state.keys()):
966
+ self._refresh_one_locked(pid)
967
+
968
+ def _refresh_one_locked(self, process_id: str) -> None:
969
+ st = dict(self._state.get(process_id) or {})
970
+ pid = st.get("pid")
971
+ if isinstance(pid, int) and pid > 0:
972
+ if _is_pid_running(pid):
973
+ st["status"] = "running"
974
+ else:
975
+ st["status"] = "stopped"
976
+ st["pid"] = None
977
+ st.setdefault("stopped_at", _now_utc_iso())
978
+ self._state[process_id] = st
979
+
980
+ def _watch_process(self, process_id: str, proc: subprocess.Popen[bytes]) -> None:
981
+ rc: Optional[int] = None
982
+ try:
983
+ rc = proc.wait()
984
+ except Exception:
985
+ rc = None
986
+ finally:
987
+ with self._lock:
988
+ st = dict(self._state.get(process_id) or {})
989
+ st["status"] = "stopped"
990
+ st["pid"] = None
991
+ st["exit_code"] = int(rc) if isinstance(rc, int) else st.get("exit_code")
992
+ st["stopped_at"] = _now_utc_iso()
993
+ self._state[process_id] = st
994
+ self._procs.pop(process_id, None)
995
+ try:
996
+ self._save_state()
997
+ except Exception:
998
+ pass
999
+
1000
+ def _schedule_gateway_execv(self, *, delay_s: float) -> None:
1001
+ def _do() -> None:
1002
+ time.sleep(max(0.0, float(delay_s)))
1003
+ try:
1004
+ with self._lock:
1005
+ self._apply_env_overrides_to_environ_locked()
1006
+ except Exception:
1007
+ pass
1008
+ argv = list(sys.argv)
1009
+ exe = argv[0] if argv else ""
1010
+ # Prefer re-exec of the original entrypoint when possible.
1011
+ try:
1012
+ if exe and os.path.exists(exe) and os.access(exe, os.X_OK):
1013
+ os.execv(exe, argv)
1014
+ return
1015
+ except Exception:
1016
+ pass
1017
+ try:
1018
+ # Fallback: execute as a module (keeps compatibility with `python -m`).
1019
+ os.execv(sys.executable, [sys.executable, "-m", "abstractgateway.cli", *argv[1:]])
1020
+ except Exception:
1021
+ # Last resort: exit (requires external supervisor).
1022
+ os._exit(0)
1023
+
1024
+ t = threading.Thread(target=_do, daemon=True)
1025
+ t.start()
1026
+
1027
+ def _schedule_gateway_redeploy(self) -> None:
1028
+ def _do() -> None:
1029
+ try:
1030
+ st = self.start("build")
1031
+ except Exception:
1032
+ st = {}
1033
+
1034
+ # Wait for build to finish by polling state (works across the monitor thread).
1035
+ for _ in range(60 * 60): # up to 1h
1036
+ time.sleep(1.0)
1037
+ with self._lock:
1038
+ cur = dict(self._state.get("build") or {})
1039
+ if str(cur.get("status") or "").lower() != "running":
1040
+ exit_code = cur.get("exit_code")
1041
+ if isinstance(exit_code, int) and exit_code == 0:
1042
+ self._schedule_gateway_execv(delay_s=0.75)
1043
+ return
1044
+
1045
+ t = threading.Thread(target=_do, daemon=True)
1046
+ t.start()
1047
+
1048
+
1049
+ _PROCESS_MANAGER: Optional[ProcessManager] = None
1050
+ _PROCESS_MANAGER_LOCK = threading.Lock()
1051
+
1052
+
1053
+ def get_process_manager(*, base_dir: Path, repo_root: Path) -> ProcessManager:
1054
+ global _PROCESS_MANAGER
1055
+ with _PROCESS_MANAGER_LOCK:
1056
+ resolved_base = Path(base_dir).expanduser().resolve()
1057
+ resolved_repo = Path(repo_root).expanduser().resolve()
1058
+ if _PROCESS_MANAGER is not None:
1059
+ if _PROCESS_MANAGER.base_dir == resolved_base and _PROCESS_MANAGER.repo_root == resolved_repo:
1060
+ return _PROCESS_MANAGER
1061
+
1062
+ specs = load_process_specs(repo_root=resolved_repo)
1063
+ _PROCESS_MANAGER = ProcessManager(base_dir=resolved_base, repo_root=resolved_repo, specs=specs)
1064
+ return _PROCESS_MANAGER