loki-mode 6.60.0 → 6.61.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -3,7 +3,7 @@ name: loki-mode
3
3
  description: Multi-agent autonomous startup system. Triggers on "Loki Mode". Takes PRD to deployed product with minimal human intervention. Requires --dangerously-skip-permissions flag.
4
4
  ---
5
5
 
6
- # Loki Mode v6.60.0
6
+ # Loki Mode v6.61.0
7
7
 
8
8
  **You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
9
9
 
@@ -267,4 +267,4 @@ The following features are documented in skill modules but not yet fully automat
267
267
  | Quality gates 3-reviewer system | Implemented (v5.35.0) | 5 specialist reviewers in `skills/quality-gates.md`; execution in run.sh |
268
268
  | Benchmarks (HumanEval, SWE-bench) | Infrastructure only | Runner scripts and datasets exist in `benchmarks/`; no published results |
269
269
 
270
- **v6.60.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
270
+ **v6.61.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
package/VERSION CHANGED
@@ -1 +1 @@
1
- 6.60.0
1
+ 6.61.0
package/autonomy/loki CHANGED
@@ -5741,7 +5741,10 @@ cmd_doctor() {
5741
5741
  echo -e " ${GREEN}PASS${NC} $sname ${DIM}$short_path${NC}"
5742
5742
  pass_count=$((pass_count + 1))
5743
5743
  elif [ -L "$sdir" ]; then
5744
- echo -e " ${RED}FAIL${NC} $sname ${DIM}(broken symlink at $short_path)${NC}"
5744
+ local _target
5745
+ _target=$(readlink "$sdir" 2>/dev/null || echo "unknown")
5746
+ echo -e " ${RED}FAIL${NC} $sname ${DIM}(broken symlink -> $_target)${NC}"
5747
+ echo -e " ${YELLOW}Fix: loki setup-skill${NC}"
5745
5748
  fail_count=$((fail_count + 1))
5746
5749
  else
5747
5750
  echo -e " ${YELLOW}WARN${NC} $sname ${DIM}(not found - run 'loki setup-skill')${NC}"
@@ -7,7 +7,7 @@ Modules:
7
7
  control: Session control API (start/stop/pause/resume)
8
8
  """
9
9
 
10
- __version__ = "6.60.0"
10
+ __version__ = "6.61.0"
11
11
 
12
12
  # Expose the control app for easy import
13
13
  try:
@@ -4,10 +4,13 @@ Database setup for Loki Mode Dashboard.
4
4
  Uses SQLAlchemy 2.0 with async support and SQLite.
5
5
  """
6
6
 
7
+ import logging
7
8
  import os
8
9
  from contextlib import asynccontextmanager
9
10
  from typing import AsyncGenerator
10
11
 
12
+ logger = logging.getLogger(__name__)
13
+
11
14
  from sqlalchemy.ext.asyncio import (
12
15
  AsyncSession,
13
16
  async_sessionmaker,
@@ -40,11 +43,14 @@ async_session_factory = async_sessionmaker(
40
43
 
41
44
  async def init_db() -> None:
42
45
  """Initialize the database, creating all tables."""
43
- # Ensure database directory exists
44
46
  os.makedirs(DATABASE_DIR, exist_ok=True)
45
-
46
- async with engine.begin() as conn:
47
- await conn.run_sync(Base.metadata.create_all)
47
+ try:
48
+ async with engine.begin() as conn:
49
+ await conn.run_sync(Base.metadata.create_all)
50
+ logger.info("Database initialized at %s", DATABASE_PATH)
51
+ except Exception as exc:
52
+ logger.error("Database initialization failed: %s", exc, exc_info=True)
53
+ raise
48
54
 
49
55
 
50
56
  async def close_db() -> None:
@@ -52,6 +58,17 @@ async def close_db() -> None:
52
58
  await engine.dispose()
53
59
 
54
60
 
61
+ async def check_db_health() -> bool:
62
+ """Check if the database is accessible."""
63
+ try:
64
+ async with async_session_factory() as session:
65
+ from sqlalchemy import text
66
+ await session.execute(text("SELECT 1"))
67
+ return True
68
+ except Exception:
69
+ return False
70
+
71
+
55
72
  @asynccontextmanager
56
73
  async def get_session() -> AsyncGenerator[AsyncSession, None]:
57
74
  """Get an async database session."""
@@ -30,7 +30,7 @@ from fastapi import (
30
30
  )
31
31
  from fastapi.middleware.cors import CORSMiddleware
32
32
  from fastapi.responses import JSONResponse, PlainTextResponse
33
- from pydantic import BaseModel, Field, field_validator
33
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
34
34
  from sqlalchemy import select, update, delete
35
35
  from sqlalchemy.ext.asyncio import AsyncSession
36
36
  from sqlalchemy.orm import selectinload
@@ -180,6 +180,8 @@ class ProjectUpdate(BaseModel):
180
180
 
181
181
  class ProjectResponse(BaseModel):
182
182
  """Schema for project response."""
183
+ model_config = ConfigDict(from_attributes=True)
184
+
183
185
  id: int
184
186
  name: str
185
187
  description: Optional[str]
@@ -190,9 +192,6 @@ class ProjectResponse(BaseModel):
190
192
  task_count: int = 0
191
193
  completed_task_count: int = 0
192
194
 
193
- class Config:
194
- from_attributes = True
195
-
196
195
 
197
196
  class TaskCreate(BaseModel):
198
197
  """Schema for creating a task."""
@@ -231,6 +230,8 @@ class TaskMove(BaseModel):
231
230
 
232
231
  class TaskResponse(BaseModel):
233
232
  """Schema for task response."""
233
+ model_config = ConfigDict(from_attributes=True)
234
+
234
235
  id: int
235
236
  project_id: int
236
237
  title: str
@@ -246,9 +247,6 @@ class TaskResponse(BaseModel):
246
247
  updated_at: datetime
247
248
  completed_at: Optional[datetime]
248
249
 
249
- class Config:
250
- from_attributes = True
251
-
252
250
 
253
251
  class SessionInfo(BaseModel):
254
252
  """Info about a single running session."""
@@ -416,7 +414,12 @@ async def _push_loki_state_loop() -> None:
416
414
  async def lifespan(app: FastAPI):
417
415
  """Application lifespan handler."""
418
416
  # Startup
419
- await init_db()
417
+ try:
418
+ await init_db()
419
+ app.state.db_available = True
420
+ except Exception as exc:
421
+ logger.error("Database init failed: %s -- DB routes will return 503", exc)
422
+ app.state.db_available = False
420
423
  _telemetry.send_telemetry("dashboard_start")
421
424
  push_task = asyncio.create_task(_push_loki_state_loop())
422
425
  yield
@@ -723,50 +726,54 @@ async def list_projects(
723
726
  db: AsyncSession = Depends(get_db),
724
727
  ) -> list[ProjectResponse]:
725
728
  """List projects with pagination. Does not eager-load tasks for efficiency."""
726
- from sqlalchemy import func as sa_func
727
-
728
- query = select(Project)
729
- if status:
730
- query = query.where(Project.status == status)
731
- query = query.order_by(Project.created_at.desc()).offset(offset).limit(limit)
732
-
733
- result = await db.execute(query)
734
- projects = result.scalars().all()
735
-
736
- # Batch-fetch task counts instead of N+1 eager loading
737
- project_ids = [p.id for p in projects]
738
- response = []
739
- if project_ids:
740
- count_query = (
741
- select(
742
- Task.project_id,
743
- sa_func.count().label("total"),
744
- sa_func.count().filter(Task.status == TaskStatus.DONE).label("done"),
729
+ try:
730
+ from sqlalchemy import func as sa_func
731
+
732
+ query = select(Project)
733
+ if status:
734
+ query = query.where(Project.status == status)
735
+ query = query.order_by(Project.created_at.desc()).offset(offset).limit(limit)
736
+
737
+ result = await db.execute(query)
738
+ projects = result.scalars().all()
739
+
740
+ # Batch-fetch task counts instead of N+1 eager loading
741
+ project_ids = [p.id for p in projects]
742
+ response = []
743
+ if project_ids:
744
+ count_query = (
745
+ select(
746
+ Task.project_id,
747
+ sa_func.count().label("total"),
748
+ sa_func.count().filter(Task.status == TaskStatus.DONE).label("done"),
749
+ )
750
+ .where(Task.project_id.in_(project_ids))
751
+ .group_by(Task.project_id)
745
752
  )
746
- .where(Task.project_id.in_(project_ids))
747
- .group_by(Task.project_id)
748
- )
749
- count_result = await db.execute(count_query)
750
- counts = {row.project_id: (row.total, row.done) for row in count_result}
751
- else:
752
- counts = {}
753
-
754
- for project in projects:
755
- total, done = counts.get(project.id, (0, 0))
756
- response.append(
757
- ProjectResponse(
758
- id=project.id,
759
- name=project.name,
760
- description=project.description,
761
- prd_path=project.prd_path,
762
- status=project.status,
763
- created_at=project.created_at,
764
- updated_at=project.updated_at,
765
- task_count=total,
766
- completed_task_count=done,
753
+ count_result = await db.execute(count_query)
754
+ counts = {row.project_id: (row.total, row.done) for row in count_result}
755
+ else:
756
+ counts = {}
757
+
758
+ for project in projects:
759
+ total, done = counts.get(project.id, (0, 0))
760
+ response.append(
761
+ ProjectResponse(
762
+ id=project.id,
763
+ name=project.name,
764
+ description=project.description,
765
+ prd_path=project.prd_path,
766
+ status=project.status,
767
+ created_at=project.created_at,
768
+ updated_at=project.updated_at,
769
+ task_count=total,
770
+ completed_task_count=done,
771
+ )
767
772
  )
768
- )
769
- return response
773
+ return response
774
+ except Exception as exc:
775
+ logger.error("Failed to list projects: %s", exc, exc_info=True)
776
+ raise HTTPException(status_code=500, detail="Database query failed") from exc
770
777
 
771
778
 
772
779
  @app.post("/api/projects", response_model=ProjectResponse, status_code=201, dependencies=[Depends(auth.require_scope("control"))])
@@ -2133,7 +2140,7 @@ def _get_memory_storage():
2133
2140
  @app.get("/api/memory/search")
2134
2141
  async def search_memory(
2135
2142
  q: str = Query(..., min_length=1, max_length=500, description="Search query"),
2136
- collection: str = Query(default="all", regex="^(episodes|patterns|skills|all)$"),
2143
+ collection: str = Query(default="all", pattern="^(episodes|patterns|skills|all)$"),
2137
2144
  limit: int = Query(default=20, ge=1, le=100),
2138
2145
  ):
2139
2146
  """Full-text search across memory using FTS5."""
@@ -2,7 +2,7 @@
2
2
 
3
3
  The flagship product of [Autonomi](https://www.autonomi.dev/). Complete installation instructions for all platforms and use cases.
4
4
 
5
- **Version:** v6.60.0
5
+ **Version:** v6.61.0
6
6
 
7
7
  ---
8
8
 
package/mcp/__init__.py CHANGED
@@ -57,4 +57,4 @@ try:
57
57
  except ImportError:
58
58
  __all__ = ['mcp']
59
59
 
60
- __version__ = '6.60.0'
60
+ __version__ = '6.61.0'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "loki-mode",
3
- "version": "6.60.0",
3
+ "version": "6.61.0",
4
4
  "description": "Loki Mode by Autonomi - Multi-agent autonomous startup system for Claude Code, Codex CLI, and Gemini CLI",
5
5
  "keywords": [
6
6
  "agent",
package/web-app/server.py CHANGED
@@ -36,7 +36,9 @@ from fastapi.middleware.cors import CORSMiddleware
36
36
  from fastapi.responses import FileResponse, JSONResponse, Response
37
37
  from starlette.responses import StreamingResponse
38
38
  from fastapi.staticfiles import StaticFiles
39
- from pydantic import BaseModel
39
+ import shlex
40
+
41
+ from pydantic import BaseModel, field_validator
40
42
 
41
43
  logger = logging.getLogger("purple-lab")
42
44
 
@@ -46,6 +48,8 @@ logger = logging.getLogger("purple-lab")
46
48
 
47
49
  HOST = os.environ.get("PURPLE_LAB_HOST", "127.0.0.1")
48
50
  PORT = int(os.environ.get("PURPLE_LAB_PORT", "57375"))
51
+ MAX_WS_CLIENTS = int(os.environ.get("PURPLE_LAB_MAX_WS_CLIENTS", "50"))
52
+ MAX_TERMINAL_PTYS = int(os.environ.get("PURPLE_LAB_MAX_TERMINALS", "20"))
49
53
 
50
54
  # Resolve paths
51
55
  SCRIPT_DIR = Path(__file__).resolve().parent
@@ -107,11 +111,15 @@ _cors_origins = (
107
111
  else _default_cors_origins
108
112
  )
109
113
 
114
+ if "*" in _cors_origins:
115
+ logger.warning("CORS wildcard '*' detected -- restricting to localhost for security")
116
+ _cors_origins = _default_cors_origins
117
+
110
118
  app.add_middleware(
111
119
  CORSMiddleware,
112
120
  allow_origins=_cors_origins,
113
- allow_methods=["*"],
114
- allow_headers=["*"],
121
+ allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
122
+ allow_headers=["Content-Type", "Authorization", "X-Requested-With", "Accept"],
115
123
  )
116
124
 
117
125
  # ---------------------------------------------------------------------------
@@ -162,31 +170,36 @@ class SessionState:
162
170
 
163
171
 
164
172
  def _kill_tracked_child_processes() -> None:
165
- """Kill only processes that Purple Lab started, not external loki sessions."""
166
- import subprocess as _sp
173
+ """Kill all tracked child processes and their process groups."""
167
174
  tracked = _get_tracked_child_pids()
168
175
  if not tracked:
169
176
  return
170
177
 
178
+ # SIGTERM to process groups first
171
179
  for pid in tracked:
172
180
  try:
173
- # Kill the entire process tree (children first, then parent)
174
- _sp.run(["pkill", "-TERM", "-P", str(pid)],
175
- capture_output=True, timeout=5)
176
- os.kill(pid, signal.SIGTERM)
181
+ pgid = os.getpgid(pid)
182
+ os.killpg(pgid, signal.SIGTERM)
177
183
  except (ProcessLookupError, PermissionError, OSError):
178
- pass
184
+ try:
185
+ os.kill(pid, signal.SIGTERM)
186
+ except (ProcessLookupError, PermissionError, OSError):
187
+ pass
179
188
 
180
- # Wait briefly then SIGKILL survivors
181
- import time as _time
182
- _time.sleep(2)
189
+ # Wait briefly for graceful shutdown
190
+ time.sleep(2)
191
+
192
+ # SIGKILL anything still running
183
193
  for pid in tracked:
184
194
  try:
185
- _sp.run(["pkill", "-9", "-P", str(pid)],
186
- capture_output=True, timeout=5)
187
- os.kill(pid, signal.SIGKILL)
195
+ os.kill(pid, 0) # Check if still alive
196
+ try:
197
+ pgid = os.getpgid(pid)
198
+ os.killpg(pgid, signal.SIGKILL)
199
+ except (ProcessLookupError, PermissionError, OSError):
200
+ os.kill(pid, signal.SIGKILL)
188
201
  except (ProcessLookupError, PermissionError, OSError):
189
- pass
202
+ pass # Already dead
190
203
 
191
204
  _clear_tracked_pids()
192
205
 
@@ -306,6 +319,16 @@ class SecretRequest(BaseModel):
306
319
  class DevServerStartRequest(BaseModel):
307
320
  command: Optional[str] = None
308
321
 
322
+ @field_validator("command")
323
+ @classmethod
324
+ def validate_command(cls, v: Optional[str]) -> Optional[str]:
325
+ if v is None:
326
+ return v
327
+ dangerous = set(';|`$(){}<>\n\r')
328
+ if any(c in dangerous for c in v):
329
+ raise ValueError("Command contains disallowed shell characters")
330
+ return v.strip()
331
+
309
332
 
310
333
  # ---------------------------------------------------------------------------
311
334
  # File Watcher (watchdog-based, broadcasts changes via WebSocket)
@@ -627,7 +650,8 @@ class DevServerManager:
627
650
  py_file = root / py_entry
628
651
  if py_file.exists():
629
652
  try:
630
- src = py_file.read_text(errors="replace")
653
+ with open(py_file, "r", errors="replace") as f:
654
+ src = f.read(1024)
631
655
  if "fastapi" in src.lower() or "FastAPI" in src:
632
656
  module = py_entry[:-3]
633
657
  return {"command": f"uvicorn {module}:app --reload --port 8000",
@@ -713,15 +737,36 @@ class DevServerManager:
713
737
  expected_port = detected["expected_port"] if detected else 3000
714
738
  framework = detected["framework"] if detected else "unknown"
715
739
 
716
- # Auto-install dependencies if needed
740
+ # Auto-install dependencies before starting the dev server
717
741
  actual_path = Path(actual_dir)
718
742
  needs_npm = (actual_path / "package.json").exists() and not (actual_path / "node_modules").exists()
719
743
  needs_pip = (actual_path / "requirements.txt").exists() and not (actual_path / "venv").exists()
744
+
745
+ build_env = {**os.environ}
746
+ build_env.update(_load_secrets())
747
+
720
748
  if needs_npm:
721
- # Prepend npm install to the command
722
- cmd_str = f"npm install && {cmd_str}"
749
+ try:
750
+ subprocess.run(
751
+ ["npm", "install"],
752
+ cwd=actual_dir,
753
+ capture_output=True,
754
+ timeout=120,
755
+ env=build_env,
756
+ )
757
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as exc:
758
+ logger.warning("npm install failed: %s", exc)
759
+
723
760
  if needs_pip:
724
- cmd_str = f"pip install -r requirements.txt && {cmd_str}"
761
+ try:
762
+ subprocess.run(
763
+ [sys.executable, "-m", "pip", "install", "-r", "requirements.txt"],
764
+ cwd=actual_dir,
765
+ capture_output=True,
766
+ timeout=120,
767
+ )
768
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as exc:
769
+ logger.warning("pip install failed: %s", exc)
725
770
 
726
771
  # Check if portless is available and proxy is running
727
772
  use_portless = False
@@ -729,18 +774,17 @@ class DevServerManager:
729
774
  if self._has_portless() and self._ensure_portless_proxy():
730
775
  portless_app_name = self._portless_app_name(session_id)
731
776
  use_portless = True
732
- # Wrap the command with portless
733
- effective_cmd = f"portless {portless_app_name} {cmd_str}"
734
- else:
735
- effective_cmd = cmd_str
736
777
 
737
- build_env = {**os.environ}
738
- build_env.update(_load_secrets())
778
+ # Build command as list (no shell=True needed)
779
+ if use_portless and portless_app_name:
780
+ cmd_parts = ["portless", portless_app_name] + shlex.split(cmd_str)
781
+ else:
782
+ cmd_parts = shlex.split(cmd_str)
739
783
 
740
784
  try:
741
785
  proc = subprocess.Popen(
742
- effective_cmd,
743
- shell=True,
786
+ cmd_parts,
787
+ shell=False,
744
788
  stdout=subprocess.PIPE,
745
789
  stderr=subprocess.STDOUT,
746
790
  stdin=subprocess.DEVNULL,
@@ -753,6 +797,9 @@ class DevServerManager:
753
797
  except Exception as e:
754
798
  return {"status": "error", "message": f"Failed to start: {e}"}
755
799
 
800
+ _track_child_pid(proc.pid)
801
+
802
+ effective_cmd = " ".join(cmd_parts)
756
803
  server_info: dict = {
757
804
  "process": proc,
758
805
  "port": None,
@@ -861,22 +908,36 @@ class DevServerManager:
861
908
  if detected_port:
862
909
  info["port"] = detected_port
863
910
  except Exception:
864
- pass
911
+ logger.error("Dev server monitor failed for session %s", session_id, exc_info=True)
865
912
  finally:
866
913
  # Process exited -- mark as error if it was still starting or running
867
914
  if info.get("status") in ("starting", "running"):
868
915
  info["status"] = "error"
869
- # Auto-fix: trigger error repair if under circuit breaker limit
916
+ # Auto-fix with exponential backoff and circuit breaker
870
917
  attempts = info.get("auto_fix_attempts", 0)
871
- if attempts < 3:
918
+ now = time.time()
919
+ timestamps = info.get("auto_fix_timestamps", [])
920
+ recent = [t for t in timestamps if now - t < 300]
921
+
922
+ if len(recent) >= 3:
923
+ info["auto_fix_status"] = "circuit breaker open (3 failures in 5 min)"
924
+ logger.warning("Auto-fix circuit breaker open for session %s", session_id)
925
+ elif attempts < 3:
872
926
  info["auto_fix_attempts"] = attempts + 1
927
+ timestamps.append(now)
928
+ info["auto_fix_timestamps"] = timestamps
929
+ backoff_seconds = 5 * (3 ** attempts)
873
930
  error_context = "\n".join(info.get("output_lines", [])[-30:])
931
+
932
+ async def _delayed_auto_fix():
933
+ await asyncio.sleep(backoff_seconds)
934
+ await self._auto_fix(session_id, error_context)
935
+
874
936
  try:
875
- asyncio.ensure_future(
876
- self._auto_fix(session_id, error_context)
877
- )
937
+ task = asyncio.ensure_future(_delayed_auto_fix())
938
+ info["_auto_fix_task"] = task
878
939
  except Exception:
879
- pass
940
+ logger.warning("Failed to schedule auto-fix for session %s", session_id, exc_info=True)
880
941
 
881
942
  async def _auto_fix(self, session_id: str, error_context: str) -> None:
882
943
  """Auto-fix a crashed dev server by invoking loki quick with the error."""
@@ -970,6 +1031,11 @@ class DevServerManager:
970
1031
  if not info:
971
1032
  return {"stopped": False, "message": "No dev server running"}
972
1033
 
1034
+ # Cancel any pending auto-fix task
1035
+ fix_task = info.get("_auto_fix_task")
1036
+ if fix_task and not fix_task.done():
1037
+ fix_task.cancel()
1038
+
973
1039
  # For Docker containers, run docker compose down
974
1040
  if info.get("framework") == "docker":
975
1041
  try:
@@ -986,7 +1052,7 @@ class DevServerManager:
986
1052
  cwd=project_dir,
987
1053
  capture_output=True, timeout=30,
988
1054
  )
989
- except Exception:
1055
+ except (ProcessLookupError, PermissionError, OSError):
990
1056
  pass
991
1057
 
992
1058
  proc = info["process"]
@@ -998,12 +1064,12 @@ class DevServerManager:
998
1064
  except (ProcessLookupError, PermissionError, OSError):
999
1065
  try:
1000
1066
  proc.terminate()
1001
- except Exception:
1067
+ except (ProcessLookupError, PermissionError, OSError):
1002
1068
  pass
1003
1069
  else:
1004
1070
  try:
1005
1071
  proc.terminate()
1006
- except Exception:
1072
+ except (ProcessLookupError, PermissionError, OSError):
1007
1073
  pass
1008
1074
  try:
1009
1075
  proc.wait(timeout=5)
@@ -1015,14 +1081,15 @@ class DevServerManager:
1015
1081
  except (ProcessLookupError, PermissionError, OSError):
1016
1082
  try:
1017
1083
  proc.kill()
1018
- except Exception:
1084
+ except (ProcessLookupError, PermissionError, OSError):
1019
1085
  pass
1020
1086
  else:
1021
1087
  try:
1022
1088
  proc.kill()
1023
- except Exception:
1089
+ except (ProcessLookupError, PermissionError, OSError):
1024
1090
  pass
1025
1091
 
1092
+ _untrack_child_pid(proc.pid)
1026
1093
  return {"stopped": True, "message": "Dev server stopped"}
1027
1094
 
1028
1095
  async def status(self, session_id: str) -> dict:
@@ -1132,6 +1199,7 @@ async def _broadcast(msg: dict) -> None:
1132
1199
  try:
1133
1200
  await ws.send_text(data)
1134
1201
  except Exception:
1202
+ logger.debug("WebSocket send failed for client", exc_info=True)
1135
1203
  dead.append(ws)
1136
1204
  for ws in dead:
1137
1205
  session.ws_clients.discard(ws)
@@ -1160,7 +1228,7 @@ async def _read_process_output() -> None:
1160
1228
  "data": {"line": text, "timestamp": time.strftime("%H:%M:%S")},
1161
1229
  })
1162
1230
  except Exception:
1163
- pass
1231
+ logger.error("Process output reader failed", exc_info=True)
1164
1232
  finally:
1165
1233
  # Process ended
1166
1234
  session.running = False
@@ -2669,6 +2737,7 @@ async def chat_session(session_id: str, req: ChatRequest) -> JSONResponse:
2669
2737
  start_new_session=True,
2670
2738
  )
2671
2739
  task.process = proc
2740
+ _track_child_pid(proc.pid)
2672
2741
  loop = asyncio.get_running_loop()
2673
2742
 
2674
2743
  def _read_lines() -> None:
@@ -3877,6 +3946,9 @@ async def _push_state_to_client(ws: WebSocket) -> None:
3877
3946
  @app.websocket("/ws")
3878
3947
  async def websocket_endpoint(ws: WebSocket) -> None:
3879
3948
  """Real-time stream of loki output and events."""
3949
+ if len(session.ws_clients) >= MAX_WS_CLIENTS:
3950
+ await ws.close(code=1013, reason="Too many connections")
3951
+ return
3880
3952
  await ws.accept()
3881
3953
  session.ws_clients.add(ws)
3882
3954
 
@@ -3953,6 +4025,9 @@ async def terminal_websocket(ws: WebSocket, session_id: str) -> None:
3953
4025
  reconnect or second browser tab). Only kills the PTY when the *last*
3954
4026
  WebSocket client for this session disconnects.
3955
4027
  """
4028
+ if len(_terminal_ptys) >= MAX_TERMINAL_PTYS and session_id not in _terminal_ptys:
4029
+ await ws.close(code=1013, reason="Too many terminal sessions")
4030
+ return
3956
4031
  await ws.accept()
3957
4032
 
3958
4033
  if not HAS_PEXPECT:
@@ -4090,7 +4165,7 @@ async def terminal_websocket(ws: WebSocket, session_id: str) -> None:
4090
4165
  except WebSocketDisconnect:
4091
4166
  pass
4092
4167
  except Exception:
4093
- pass
4168
+ logger.error("Terminal WebSocket error for session %s", session_id, exc_info=True)
4094
4169
  finally:
4095
4170
  reader_task.cancel()
4096
4171
  try: