loki-mode 6.60.0 → 6.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/SKILL.md +2 -2
  2. package/VERSION +1 -1
  3. package/autonomy/app-runner.sh +34 -8
  4. package/autonomy/completion-council.sh +70 -32
  5. package/autonomy/issue-parser.sh +4 -7
  6. package/autonomy/loki +238 -119
  7. package/autonomy/notification-checker.py +49 -23
  8. package/autonomy/run.sh +162 -79
  9. package/autonomy/sandbox.sh +91 -24
  10. package/bin/loki-mode.js +1 -2
  11. package/bin/postinstall.js +10 -4
  12. package/dashboard/__init__.py +1 -1
  13. package/dashboard/control.py +46 -36
  14. package/dashboard/database.py +21 -4
  15. package/dashboard/server.py +107 -78
  16. package/docs/BUG-AUDIT-v6.61.0.md +957 -0
  17. package/docs/INSTALLATION.md +2 -2
  18. package/events/bus.py +129 -28
  19. package/events/bus.ts +41 -27
  20. package/events/emit.sh +1 -1
  21. package/integrations/openclaw/README.md +139 -0
  22. package/integrations/openclaw/SKILL.md +88 -0
  23. package/integrations/openclaw/bridge/__init__.py +1 -0
  24. package/integrations/openclaw/bridge/__main__.py +88 -0
  25. package/integrations/openclaw/bridge/schema_map.py +180 -0
  26. package/integrations/openclaw/bridge/watcher.py +100 -0
  27. package/integrations/openclaw/scripts/format-progress.sh +80 -0
  28. package/integrations/openclaw/scripts/poll-status.sh +74 -0
  29. package/integrations/vibe-kanban.md +289 -0
  30. package/mcp/__init__.py +1 -1
  31. package/mcp/server.py +96 -73
  32. package/memory/consolidation.py +21 -6
  33. package/memory/engine.py +53 -26
  34. package/memory/layers/index_layer.py +16 -3
  35. package/memory/layers/timeline_layer.py +16 -3
  36. package/memory/retrieval.py +4 -1
  37. package/memory/schemas.py +4 -2
  38. package/memory/storage.py +25 -4
  39. package/memory/token_economics.py +9 -2
  40. package/memory/vector_index.py +2 -2
  41. package/package.json +3 -1
  42. package/providers/cline.sh +5 -4
  43. package/providers/codex.sh +27 -5
  44. package/providers/gemini.sh +59 -23
  45. package/providers/loader.sh +3 -2
  46. package/skills/parallel-workflows.md +9 -7
  47. package/state/__init__.py +10 -0
  48. package/state/index.ts +18 -0
  49. package/state/manager.py +1801 -0
  50. package/state/manager.ts +1774 -0
  51. package/state/sqlite_backend.py +188 -0
  52. package/state/test_manager.py +703 -0
  53. package/state/test_manager.ts +366 -0
  54. package/templates/README.md +19 -4
  55. package/templates/dashboard.md +45 -0
  56. package/templates/data-pipeline.md +45 -0
  57. package/templates/game.md +48 -0
  58. package/templates/microservice.md +49 -0
  59. package/templates/npm-library.md +42 -0
  60. package/templates/rest-api.md +170 -33
  61. package/templates/slack-bot.md +48 -0
  62. package/templates/web-scraper.md +45 -0
  63. package/web-app/server.py +360 -191
  64. package/templates/saas-app.md +0 -42
package/web-app/server.py CHANGED
@@ -36,7 +36,9 @@ from fastapi.middleware.cors import CORSMiddleware
36
36
  from fastapi.responses import FileResponse, JSONResponse, Response
37
37
  from starlette.responses import StreamingResponse
38
38
  from fastapi.staticfiles import StaticFiles
39
- from pydantic import BaseModel
39
+ import shlex
40
+
41
+ from pydantic import BaseModel, field_validator
40
42
 
41
43
  logger = logging.getLogger("purple-lab")
42
44
 
@@ -46,6 +48,8 @@ logger = logging.getLogger("purple-lab")
46
48
 
47
49
  HOST = os.environ.get("PURPLE_LAB_HOST", "127.0.0.1")
48
50
  PORT = int(os.environ.get("PURPLE_LAB_PORT", "57375"))
51
+ MAX_WS_CLIENTS = int(os.environ.get("PURPLE_LAB_MAX_WS_CLIENTS", "50"))
52
+ MAX_TERMINAL_PTYS = int(os.environ.get("PURPLE_LAB_MAX_TERMINALS", "20"))
49
53
 
50
54
  # Resolve paths
51
55
  SCRIPT_DIR = Path(__file__).resolve().parent
@@ -92,6 +96,7 @@ async def lifespan(app: FastAPI):
92
96
  except Exception:
93
97
  pass
94
98
  _terminal_ptys.clear()
99
+ _terminal_ws_clients.clear()
95
100
 
96
101
 
97
102
  app = FastAPI(title="Purple Lab", docs_url=None, redoc_url=None, lifespan=lifespan)
@@ -107,11 +112,15 @@ _cors_origins = (
107
112
  else _default_cors_origins
108
113
  )
109
114
 
115
+ if "*" in _cors_origins:
116
+ logger.warning("CORS wildcard '*' detected -- restricting to localhost for security")
117
+ _cors_origins = _default_cors_origins
118
+
110
119
  app.add_middleware(
111
120
  CORSMiddleware,
112
121
  allow_origins=_cors_origins,
113
- allow_methods=["*"],
114
- allow_headers=["*"],
122
+ allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
123
+ allow_headers=["Content-Type", "Authorization", "X-Requested-With", "Accept"],
115
124
  )
116
125
 
117
126
  # ---------------------------------------------------------------------------
@@ -125,11 +134,13 @@ class SessionState:
125
134
  def __init__(self) -> None:
126
135
  self.process: Optional[subprocess.Popen] = None
127
136
  self.running = False
137
+ self.paused = False
128
138
  self.provider = ""
129
139
  self.prd_text = ""
130
140
  self.project_dir = ""
131
141
  self.start_time: float = 0
132
142
  self.log_lines: list[str] = []
143
+ self.log_lines_total: int = 0 # absolute count of all lines ever appended
133
144
  self.ws_clients: set[WebSocket] = set()
134
145
  self._reader_task: Optional[asyncio.Task] = None
135
146
  self._lock = asyncio.Lock()
@@ -154,39 +165,46 @@ class SessionState:
154
165
  def reset(self) -> None:
155
166
  self.process = None
156
167
  self.running = False
168
+ self.paused = False
157
169
  self.provider = ""
158
170
  self.prd_text = ""
159
171
  self.project_dir = ""
160
172
  self.start_time = 0
161
173
  self.log_lines = []
174
+ self.log_lines_total = 0
162
175
 
163
176
 
164
177
  def _kill_tracked_child_processes() -> None:
165
- """Kill only processes that Purple Lab started, not external loki sessions."""
166
- import subprocess as _sp
178
+ """Kill all tracked child processes and their process groups."""
167
179
  tracked = _get_tracked_child_pids()
168
180
  if not tracked:
169
181
  return
170
182
 
183
+ # SIGTERM to process groups first
171
184
  for pid in tracked:
172
185
  try:
173
- # Kill the entire process tree (children first, then parent)
174
- _sp.run(["pkill", "-TERM", "-P", str(pid)],
175
- capture_output=True, timeout=5)
176
- os.kill(pid, signal.SIGTERM)
186
+ pgid = os.getpgid(pid)
187
+ os.killpg(pgid, signal.SIGTERM)
177
188
  except (ProcessLookupError, PermissionError, OSError):
178
- pass
189
+ try:
190
+ os.kill(pid, signal.SIGTERM)
191
+ except (ProcessLookupError, PermissionError, OSError):
192
+ pass
193
+
194
+ # Wait briefly for graceful shutdown
195
+ time.sleep(2)
179
196
 
180
- # Wait briefly then SIGKILL survivors
181
- import time as _time
182
- _time.sleep(2)
197
+ # SIGKILL anything still running
183
198
  for pid in tracked:
184
199
  try:
185
- _sp.run(["pkill", "-9", "-P", str(pid)],
186
- capture_output=True, timeout=5)
187
- os.kill(pid, signal.SIGKILL)
200
+ os.kill(pid, 0) # Check if still alive
201
+ try:
202
+ pgid = os.getpgid(pid)
203
+ os.killpg(pgid, signal.SIGKILL)
204
+ except (ProcessLookupError, PermissionError, OSError):
205
+ os.kill(pid, signal.SIGKILL)
188
206
  except (ProcessLookupError, PermissionError, OSError):
189
- pass
207
+ pass # Already dead
190
208
 
191
209
  _clear_tracked_pids()
192
210
 
@@ -201,27 +219,68 @@ _PURPLE_LAB_PIDS_FILE = SCRIPT_DIR.parent / ".loki" / "purple-lab" / "child-pids
201
219
 
202
220
 
203
221
  def _track_child_pid(pid: int) -> None:
204
- """Record a PID started by Purple Lab so loki web stop can clean it up."""
222
+ """Record a PID started by Purple Lab so loki web stop can clean it up.
223
+
224
+ Uses fcntl.flock for atomic read-modify-write to prevent race conditions.
225
+ """
226
+ import fcntl
205
227
  _PURPLE_LAB_PIDS_FILE.parent.mkdir(parents=True, exist_ok=True)
206
- pids: list[int] = []
207
- if _PURPLE_LAB_PIDS_FILE.exists():
228
+ fd = os.open(str(_PURPLE_LAB_PIDS_FILE), os.O_RDWR | os.O_CREAT)
229
+ try:
230
+ fcntl.flock(fd, fcntl.LOCK_EX)
231
+ f = os.fdopen(fd, "r+")
208
232
  try:
209
- pids = json.loads(_PURPLE_LAB_PIDS_FILE.read_text())
210
- except (json.JSONDecodeError, OSError):
233
+ content = f.read()
234
+ pids = json.loads(content) if content.strip() else []
235
+ except (json.JSONDecodeError, ValueError):
211
236
  pids = []
212
- if pid not in pids:
213
- pids.append(pid)
214
- _PURPLE_LAB_PIDS_FILE.write_text(json.dumps(pids))
237
+ if pid not in pids:
238
+ pids.append(pid)
239
+ f.seek(0)
240
+ f.truncate()
241
+ f.write(json.dumps(pids))
242
+ f.flush()
243
+ except Exception:
244
+ try:
245
+ os.close(fd)
246
+ except OSError:
247
+ pass
248
+ raise
249
+ else:
250
+ f.close() # also releases lock and closes fd
215
251
 
216
252
 
217
253
  def _untrack_child_pid(pid: int) -> None:
218
- """Remove a PID from tracking after it exits."""
254
+ """Remove a PID from tracking after it exits.
255
+
256
+ Uses fcntl.flock for atomic read-modify-write.
257
+ """
258
+ import fcntl
219
259
  if not _PURPLE_LAB_PIDS_FILE.exists():
220
260
  return
221
261
  try:
222
- pids = json.loads(_PURPLE_LAB_PIDS_FILE.read_text())
223
- pids = [p for p in pids if p != pid]
224
- _PURPLE_LAB_PIDS_FILE.write_text(json.dumps(pids))
262
+ fd = os.open(str(_PURPLE_LAB_PIDS_FILE), os.O_RDWR)
263
+ try:
264
+ fcntl.flock(fd, fcntl.LOCK_EX)
265
+ f = os.fdopen(fd, "r+")
266
+ try:
267
+ content = f.read()
268
+ pids = json.loads(content) if content.strip() else []
269
+ except (json.JSONDecodeError, ValueError):
270
+ pids = []
271
+ pids = [p for p in pids if p != pid]
272
+ f.seek(0)
273
+ f.truncate()
274
+ f.write(json.dumps(pids))
275
+ f.flush()
276
+ except Exception:
277
+ try:
278
+ os.close(fd)
279
+ except OSError:
280
+ pass
281
+ raise
282
+ else:
283
+ f.close()
225
284
  except (json.JSONDecodeError, OSError):
226
285
  pass
227
286
 
@@ -306,6 +365,16 @@ class SecretRequest(BaseModel):
306
365
  class DevServerStartRequest(BaseModel):
307
366
  command: Optional[str] = None
308
367
 
368
+ @field_validator("command")
369
+ @classmethod
370
+ def validate_command(cls, v: Optional[str]) -> Optional[str]:
371
+ if v is None:
372
+ return v
373
+ dangerous = set(';|`$(){}<>\n\r')
374
+ if any(c in dangerous for c in v):
375
+ raise ValueError("Command contains disallowed shell characters")
376
+ return v.strip()
377
+
309
378
 
310
379
  # ---------------------------------------------------------------------------
311
380
  # File Watcher (watchdog-based, broadcasts changes via WebSocket)
@@ -478,8 +547,8 @@ class DevServerManager:
478
547
  # Generic "listening on port 3000" or "on port 3000"
479
548
  re.compile(r"listening\s+on\s+(?:port\s+)?(\d+)", re.IGNORECASE),
480
549
  re.compile(r"on\s+port\s+(\d+)", re.IGNORECASE),
481
- # "port 3000" standalone
482
- re.compile(r"port\s+(\d+)", re.IGNORECASE),
550
+ # "listening on port 3000" / "running on port 3000" / "started on port 3000" / "serving on port 3000"
551
+ re.compile(r"(?:listening|running|started|serving)\s+(?:on\s+)?port\s+(\d+)", re.IGNORECASE),
483
552
  # Vite ready message: "ready in 300ms -- http://localhost:5173/"
484
553
  re.compile(r"ready\s+in\s+\d+m?s.*localhost:(\d+)"),
485
554
  # Generic URL patterns (last resort -- broad matches)
@@ -511,7 +580,7 @@ class DevServerManager:
511
580
  clean = re.sub(r"[^a-zA-Z0-9]", "", session_id)
512
581
  return f"lab-{clean[:6].lower()}"
513
582
 
514
- def _ensure_portless_proxy(self) -> bool:
583
+ async def _ensure_portless_proxy(self) -> bool:
515
584
  """Start the portless proxy if not already running.
516
585
 
517
586
  Returns True if the proxy is available, False otherwise.
@@ -537,9 +606,8 @@ class DevServerManager:
537
606
  stderr=subprocess.DEVNULL,
538
607
  stdin=subprocess.DEVNULL,
539
608
  )
540
- # Give it a moment to start
541
- import time as _time
542
- _time.sleep(1)
609
+ # Give it a moment to start (async to avoid blocking the event loop)
610
+ await asyncio.sleep(1)
543
611
  self._portless_proxy_started = True
544
612
  return True
545
613
  except (FileNotFoundError, OSError):
@@ -571,7 +639,10 @@ class DevServerManager:
571
639
  for p in ports:
572
640
  p_str = str(p)
573
641
  if ":" in p_str:
574
- host_port = p_str.split(":")[0]
642
+ # Handle IP:host:container (e.g. "127.0.0.1:8080:80")
643
+ # and host:container (e.g. "8080:80")
644
+ parts = p_str.split(":")
645
+ host_port = parts[-2] # second-to-last is always host port
575
646
  port = int(host_port)
576
647
  break
577
648
  if port != 3000:
@@ -599,8 +670,16 @@ class DevServerManager:
599
670
  if "expo" in deps:
600
671
  return {"command": "npx expo start", "expected_port": 8081, "framework": "expo"}
601
672
  if "dev" in scripts:
602
- port = 5173 if "vite" in deps else 3000
603
- fw = "vite" if "vite" in deps else "next" if "next" in deps else "node"
673
+ # Check Next.js BEFORE Vite -- Next.js projects may also have vite as a dep
674
+ if "next" in deps:
675
+ fw = "next"
676
+ port = 3000
677
+ elif "vite" in deps:
678
+ fw = "vite"
679
+ port = 5173
680
+ else:
681
+ fw = "node"
682
+ port = 3000
604
683
  return {"command": "npm run dev", "expected_port": port, "framework": fw}
605
684
  if "start" in scripts:
606
685
  fw = "next" if "next" in deps else "react" if "react" in deps else "node"
@@ -627,7 +706,8 @@ class DevServerManager:
627
706
  py_file = root / py_entry
628
707
  if py_file.exists():
629
708
  try:
630
- src = py_file.read_text(errors="replace")
709
+ with open(py_file, "r", errors="replace") as f:
710
+ src = f.read(4096)
631
711
  if "fastapi" in src.lower() or "FastAPI" in src:
632
712
  module = py_entry[:-3]
633
713
  return {"command": f"uvicorn {module}:app --reload --port 8000",
@@ -713,34 +793,73 @@ class DevServerManager:
713
793
  expected_port = detected["expected_port"] if detected else 3000
714
794
  framework = detected["framework"] if detected else "unknown"
715
795
 
716
- # Auto-install dependencies if needed
796
+ # Auto-install dependencies before starting the dev server
717
797
  actual_path = Path(actual_dir)
718
798
  needs_npm = (actual_path / "package.json").exists() and not (actual_path / "node_modules").exists()
719
799
  needs_pip = (actual_path / "requirements.txt").exists() and not (actual_path / "venv").exists()
800
+
801
+ build_env = {**os.environ}
802
+ build_env.update(_load_secrets())
803
+
720
804
  if needs_npm:
721
- # Prepend npm install to the command
722
- cmd_str = f"npm install && {cmd_str}"
805
+ try:
806
+ subprocess.run(
807
+ ["npm", "install"],
808
+ cwd=actual_dir,
809
+ capture_output=True,
810
+ timeout=120,
811
+ env=build_env,
812
+ )
813
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as exc:
814
+ logger.warning("npm install failed: %s", exc)
815
+
723
816
  if needs_pip:
724
- cmd_str = f"pip install -r requirements.txt && {cmd_str}"
817
+ # Use project venv if available, otherwise create one to avoid
818
+ # installing into the server's own Python environment.
819
+ venv_dir = None
820
+ for venv_name in ("venv", ".venv", "env"):
821
+ candidate = actual_path / venv_name
822
+ if candidate.is_dir() and (candidate / "bin" / "pip").exists():
823
+ venv_dir = candidate
824
+ break
825
+ if venv_dir is None:
826
+ # Create a virtual environment for the project
827
+ try:
828
+ subprocess.run(
829
+ [sys.executable, "-m", "venv", str(actual_path / "venv")],
830
+ capture_output=True, timeout=60,
831
+ )
832
+ venv_dir = actual_path / "venv"
833
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as exc:
834
+ logger.warning("venv creation failed: %s", exc)
835
+ pip_executable = str(venv_dir / "bin" / "pip") if venv_dir else "pip"
836
+ try:
837
+ subprocess.run(
838
+ [pip_executable, "install", "-r", "requirements.txt"],
839
+ cwd=actual_dir,
840
+ capture_output=True,
841
+ timeout=120,
842
+ )
843
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as exc:
844
+ logger.warning("pip install failed: %s", exc)
725
845
 
726
846
  # Check if portless is available and proxy is running
727
847
  use_portless = False
728
848
  portless_app_name = None
729
- if self._has_portless() and self._ensure_portless_proxy():
849
+ if self._has_portless() and await self._ensure_portless_proxy():
730
850
  portless_app_name = self._portless_app_name(session_id)
731
851
  use_portless = True
732
- # Wrap the command with portless
733
- effective_cmd = f"portless {portless_app_name} {cmd_str}"
734
- else:
735
- effective_cmd = cmd_str
736
852
 
737
- build_env = {**os.environ}
738
- build_env.update(_load_secrets())
853
+ # Build command as list (no shell=True needed)
854
+ if use_portless and portless_app_name:
855
+ cmd_parts = ["portless", portless_app_name] + shlex.split(cmd_str)
856
+ else:
857
+ cmd_parts = shlex.split(cmd_str)
739
858
 
740
859
  try:
741
860
  proc = subprocess.Popen(
742
- effective_cmd,
743
- shell=True,
861
+ cmd_parts,
862
+ shell=False,
744
863
  stdout=subprocess.PIPE,
745
864
  stderr=subprocess.STDOUT,
746
865
  stdin=subprocess.DEVNULL,
@@ -753,6 +872,9 @@ class DevServerManager:
753
872
  except Exception as e:
754
873
  return {"status": "error", "message": f"Failed to start: {e}"}
755
874
 
875
+ _track_child_pid(proc.pid)
876
+
877
+ effective_cmd = " ".join(cmd_parts)
756
878
  server_info: dict = {
757
879
  "process": proc,
758
880
  "port": None,
@@ -856,27 +978,43 @@ class DevServerManager:
856
978
  info["output_lines"].append(text)
857
979
  if len(info["output_lines"]) > 200:
858
980
  info["output_lines"] = info["output_lines"][-200:]
859
- if info["port"] is None:
860
- detected_port = self._parse_port(text)
861
- if detected_port:
862
- info["port"] = detected_port
981
+ detected_port = self._parse_port(text)
982
+ if detected_port:
983
+ info["port"] = detected_port
984
+ # Transition from "starting" to "running" when port is detected
985
+ if info.get("status") == "starting":
986
+ info["status"] = "running"
863
987
  except Exception:
864
- pass
988
+ logger.error("Dev server monitor failed for session %s", session_id, exc_info=True)
865
989
  finally:
866
990
  # Process exited -- mark as error if it was still starting or running
867
991
  if info.get("status") in ("starting", "running"):
868
992
  info["status"] = "error"
869
- # Auto-fix: trigger error repair if under circuit breaker limit
993
+ # Auto-fix with exponential backoff and circuit breaker
870
994
  attempts = info.get("auto_fix_attempts", 0)
871
- if attempts < 3:
995
+ now = time.time()
996
+ timestamps = info.get("auto_fix_timestamps", [])
997
+ recent = [t for t in timestamps if now - t < 300]
998
+
999
+ if len(recent) >= 3:
1000
+ info["auto_fix_status"] = "circuit breaker open (3 failures in 5 min)"
1001
+ logger.warning("Auto-fix circuit breaker open for session %s", session_id)
1002
+ elif attempts < 3:
872
1003
  info["auto_fix_attempts"] = attempts + 1
1004
+ timestamps.append(now)
1005
+ info["auto_fix_timestamps"] = timestamps
1006
+ backoff_seconds = 5 * (3 ** attempts)
873
1007
  error_context = "\n".join(info.get("output_lines", [])[-30:])
1008
+
1009
+ async def _delayed_auto_fix():
1010
+ await asyncio.sleep(backoff_seconds)
1011
+ await self._auto_fix(session_id, error_context)
1012
+
874
1013
  try:
875
- asyncio.ensure_future(
876
- self._auto_fix(session_id, error_context)
877
- )
1014
+ task = asyncio.ensure_future(_delayed_auto_fix())
1015
+ info["_auto_fix_task"] = task
878
1016
  except Exception:
879
- pass
1017
+ logger.warning("Failed to schedule auto-fix for session %s", session_id, exc_info=True)
880
1018
 
881
1019
  async def _auto_fix(self, session_id: str, error_context: str) -> None:
882
1020
  """Auto-fix a crashed dev server by invoking loki quick with the error."""
@@ -907,9 +1045,11 @@ class DevServerManager:
907
1045
  )
908
1046
 
909
1047
  # Save original command before stop() removes the info dict
910
- cmd = info.get("command")
1048
+ cmd = info.get("original_command")
911
1049
 
912
1050
  try:
1051
+ auto_fix_env = {**os.environ}
1052
+ auto_fix_env.update(_load_secrets())
913
1053
  result = await asyncio.get_running_loop().run_in_executor(
914
1054
  None,
915
1055
  lambda: subprocess.run(
@@ -918,7 +1058,7 @@ class DevServerManager:
918
1058
  capture_output=True,
919
1059
  text=True,
920
1060
  timeout=300,
921
- env={**os.environ},
1061
+ env=auto_fix_env,
922
1062
  start_new_session=True,
923
1063
  ),
924
1064
  )
@@ -970,6 +1110,11 @@ class DevServerManager:
970
1110
  if not info:
971
1111
  return {"stopped": False, "message": "No dev server running"}
972
1112
 
1113
+ # Cancel any pending auto-fix task
1114
+ fix_task = info.get("_auto_fix_task")
1115
+ if fix_task and not fix_task.done():
1116
+ fix_task.cancel()
1117
+
973
1118
  # For Docker containers, run docker compose down
974
1119
  if info.get("framework") == "docker":
975
1120
  try:
@@ -986,7 +1131,7 @@ class DevServerManager:
986
1131
  cwd=project_dir,
987
1132
  capture_output=True, timeout=30,
988
1133
  )
989
- except Exception:
1134
+ except (ProcessLookupError, PermissionError, OSError):
990
1135
  pass
991
1136
 
992
1137
  proc = info["process"]
@@ -998,12 +1143,12 @@ class DevServerManager:
998
1143
  except (ProcessLookupError, PermissionError, OSError):
999
1144
  try:
1000
1145
  proc.terminate()
1001
- except Exception:
1146
+ except (ProcessLookupError, PermissionError, OSError):
1002
1147
  pass
1003
1148
  else:
1004
1149
  try:
1005
1150
  proc.terminate()
1006
- except Exception:
1151
+ except (ProcessLookupError, PermissionError, OSError):
1007
1152
  pass
1008
1153
  try:
1009
1154
  proc.wait(timeout=5)
@@ -1015,14 +1160,15 @@ class DevServerManager:
1015
1160
  except (ProcessLookupError, PermissionError, OSError):
1016
1161
  try:
1017
1162
  proc.kill()
1018
- except Exception:
1163
+ except (ProcessLookupError, PermissionError, OSError):
1019
1164
  pass
1020
1165
  else:
1021
1166
  try:
1022
1167
  proc.kill()
1023
- except Exception:
1168
+ except (ProcessLookupError, PermissionError, OSError):
1024
1169
  pass
1025
1170
 
1171
+ _untrack_child_pid(proc.pid)
1026
1172
  return {"stopped": True, "message": "Dev server stopped"}
1027
1173
 
1028
1174
  async def status(self, session_id: str) -> dict:
@@ -1132,6 +1278,7 @@ async def _broadcast(msg: dict) -> None:
1132
1278
  try:
1133
1279
  await ws.send_text(data)
1134
1280
  except Exception:
1281
+ logger.debug("WebSocket send failed for client", exc_info=True)
1135
1282
  dead.append(ws)
1136
1283
  for ws in dead:
1137
1284
  session.ws_clients.discard(ws)
@@ -1152,6 +1299,7 @@ async def _read_process_output() -> None:
1152
1299
  break
1153
1300
  text = line.rstrip("\n")
1154
1301
  session.log_lines.append(text)
1302
+ session.log_lines_total += 1
1155
1303
  # Keep last 5000 lines
1156
1304
  if len(session.log_lines) > 5000:
1157
1305
  session.log_lines = session.log_lines[-5000:]
@@ -1160,10 +1308,11 @@ async def _read_process_output() -> None:
1160
1308
  "data": {"line": text, "timestamp": time.strftime("%H:%M:%S")},
1161
1309
  })
1162
1310
  except Exception:
1163
- pass
1311
+ logger.error("Process output reader failed", exc_info=True)
1164
1312
  finally:
1165
- # Process ended
1166
- session.running = False
1313
+ # Process ended -- acquire lock before mutating state
1314
+ async with session._lock:
1315
+ session.running = False
1167
1316
  await _broadcast({"type": "session_end", "data": {"message": "Session ended"}})
1168
1317
 
1169
1318
 
@@ -1208,7 +1357,18 @@ _SECRETS_FILE = SCRIPT_DIR.parent / ".loki" / "purple-lab" / "secrets.json"
1208
1357
 
1209
1358
  def _load_secrets() -> dict[str, str]:
1210
1359
  """Load secrets from disk, decrypting values if encryption is configured."""
1211
- from crypto import decrypt_value, encryption_available
1360
+ try:
1361
+ from crypto import decrypt_value, encryption_available
1362
+ except ImportError:
1363
+ # crypto module not available -- return raw secrets or empty dict
1364
+ if _SECRETS_FILE.exists():
1365
+ try:
1366
+ data = json.loads(_SECRETS_FILE.read_text())
1367
+ if isinstance(data, dict):
1368
+ return data
1369
+ except (json.JSONDecodeError, OSError):
1370
+ pass
1371
+ return {}
1212
1372
  if _SECRETS_FILE.exists():
1213
1373
  try:
1214
1374
  data = json.loads(_SECRETS_FILE.read_text())
@@ -1298,7 +1458,7 @@ async def start_session(req: StartRequest) -> JSONResponse:
1298
1458
  # Determine project directory
1299
1459
  project_dir = req.projectDir
1300
1460
  if not project_dir:
1301
- project_dir = os.path.join(Path.home(), "purple-lab-projects", f"project-{int(time.time())}")
1461
+ project_dir = os.path.join(Path.home(), "purple-lab-projects", f"project-{int(time.time() * 1000)}")
1302
1462
  os.makedirs(project_dir, exist_ok=True)
1303
1463
 
1304
1464
  # Write PRD to a temp file in the project dir
@@ -1459,6 +1619,8 @@ async def stop_session() -> JSONResponse:
1459
1619
  except Exception:
1460
1620
  pass
1461
1621
  _terminal_ptys.clear()
1622
+ _terminal_ws_clients.clear()
1623
+ _terminal_reader_tasks.clear()
1462
1624
 
1463
1625
  # Kill any orphaned loki-run processes for this project
1464
1626
  if session.project_dir:
@@ -1468,69 +1630,21 @@ async def stop_session() -> JSONResponse:
1468
1630
 
1469
1631
  await _broadcast({"type": "session_end", "data": {"message": "Session stopped by user"}})
1470
1632
 
1471
- return JSONResponse(content={"stopped": True, "message": "Session stopped"})
1472
-
1473
-
1474
-
1475
- if not session.running or session.process is None:
1476
- return
1477
-
1478
- project_dir = session.project_dir
1479
- session.running = False
1480
- await session.cleanup()
1481
-
1482
- proc = session.process
1483
- if proc and proc.poll() is None:
1484
- if sys.platform != "win32":
1485
- try:
1486
- pgid = os.getpgid(proc.pid)
1487
- os.killpg(pgid, signal.SIGTERM)
1488
- except (ProcessLookupError, PermissionError, OSError):
1489
- try:
1490
- proc.terminate()
1491
- except Exception:
1492
- pass
1493
- else:
1494
- try:
1495
- proc.terminate()
1496
- except Exception:
1497
- pass
1498
- try:
1499
- proc.wait(timeout=5)
1500
- except subprocess.TimeoutExpired:
1501
- if sys.platform != "win32":
1502
- try:
1503
- pgid = os.getpgid(proc.pid)
1504
- os.killpg(pgid, signal.SIGKILL)
1505
- except (ProcessLookupError, PermissionError, OSError):
1506
- try:
1507
- proc.kill()
1508
- except Exception:
1509
- pass
1510
- else:
1511
- try:
1512
- proc.kill()
1513
- except Exception:
1514
- pass
1515
- try:
1516
- proc.wait(timeout=3)
1517
- except Exception:
1518
- pass
1633
+ # Reset session state so it can be reused
1634
+ session.reset()
1519
1635
 
1520
- # Kill any orphaned loki-run processes for this project
1521
- if project_dir:
1522
- await asyncio.get_running_loop().run_in_executor(
1523
- None, _kill_tracked_child_processes
1524
- )
1636
+ return JSONResponse(content={"stopped": True, "message": "Session stopped"})
1525
1637
 
1526
1638
 
1527
1639
  @app.get("/api/session/status")
1528
1640
  async def get_status() -> JSONResponse:
1529
1641
  """Get current session status."""
1530
- # Check if process is still alive
1531
- if session.process and session.running:
1642
+ # Check if process is still alive (read-only -- do not mutate session.running
1643
+ # here; that is handled by _read_process_output under the lock)
1644
+ is_running = session.running
1645
+ if session.process and is_running:
1532
1646
  if session.process.poll() is not None:
1533
- session.running = False
1647
+ is_running = False
1534
1648
 
1535
1649
  # Try to read .loki state files for richer status
1536
1650
  loki_dir = _loki_dir()
@@ -1553,11 +1667,11 @@ async def get_status() -> JSONResponse:
1553
1667
  except (json.JSONDecodeError, OSError):
1554
1668
  pass
1555
1669
 
1556
- uptime = time.time() - session.start_time if session.running else 0
1670
+ uptime = time.time() - session.start_time if is_running else 0
1557
1671
 
1558
1672
  return JSONResponse(content={
1559
- "running": session.running,
1560
- "paused": False,
1673
+ "running": is_running,
1674
+ "paused": session.paused,
1561
1675
  "phase": phase,
1562
1676
  "iteration": iteration,
1563
1677
  "complexity": complexity,
@@ -1722,6 +1836,7 @@ async def pause_session() -> JSONResponse:
1722
1836
  return JSONResponse(content={"paused": False, "message": "Process not found"})
1723
1837
  except Exception as e:
1724
1838
  return JSONResponse(content={"paused": False, "message": str(e)})
1839
+ session.paused = True
1725
1840
  await _broadcast({"type": "session_paused", "data": {}})
1726
1841
  return JSONResponse(content={"paused": True})
1727
1842
 
@@ -1737,6 +1852,7 @@ async def resume_session() -> JSONResponse:
1737
1852
  return JSONResponse(content={"resumed": False, "message": "Process not found"})
1738
1853
  except Exception as e:
1739
1854
  return JSONResponse(content={"resumed": False, "message": str(e)})
1855
+ session.paused = False
1740
1856
  await _broadcast({"type": "session_resumed", "data": {}})
1741
1857
  return JSONResponse(content={"resumed": True})
1742
1858
 
@@ -2181,8 +2297,6 @@ async def get_sessions_history() -> JSONResponse:
2181
2297
  session_info["file_count"] = 0
2182
2298
 
2183
2299
  history.append(session_info)
2184
- if history:
2185
- break # Use first directory that has entries
2186
2300
  return JSONResponse(content=history)
2187
2301
 
2188
2302
 
@@ -2196,6 +2310,10 @@ async def delete_session(session_id: str) -> JSONResponse:
2196
2310
  if target is None:
2197
2311
  return JSONResponse(status_code=404, content={"error": "Session not found"})
2198
2312
 
2313
+ # Prevent deleting the currently active session directory
2314
+ if session.project_dir and Path(session.project_dir).resolve() == target.resolve():
2315
+ return JSONResponse(status_code=409, content={"error": "Cannot delete the currently active session. Stop it first."})
2316
+
2199
2317
  # 1. Stop Docker containers for this project (before stopping dev server)
2200
2318
  try:
2201
2319
  for compose_file in ("docker-compose.yml", "docker-compose.yaml", "compose.yml", "compose.yaml"):
@@ -2658,6 +2776,8 @@ async def chat_session(session_id: str, req: ChatRequest) -> JSONResponse:
2658
2776
  # Quick and Standard both use 'loki quick' -- fast, focused changes
2659
2777
  cmd_args = [loki, "quick", full_message + docker_note]
2660
2778
  try:
2779
+ chat_env = {**os.environ}
2780
+ chat_env.update(_load_secrets())
2661
2781
  proc = subprocess.Popen(
2662
2782
  cmd_args,
2663
2783
  stdout=subprocess.PIPE,
@@ -2665,10 +2785,11 @@ async def chat_session(session_id: str, req: ChatRequest) -> JSONResponse:
2665
2785
  stdin=subprocess.DEVNULL,
2666
2786
  text=True,
2667
2787
  cwd=str(target),
2668
- env={**os.environ},
2788
+ env=chat_env,
2669
2789
  start_new_session=True,
2670
2790
  )
2671
2791
  task.process = proc
2792
+ _track_child_pid(proc.pid)
2672
2793
  loop = asyncio.get_running_loop()
2673
2794
 
2674
2795
  def _read_lines() -> None:
@@ -2753,6 +2874,9 @@ async def chat_session(session_id: str, req: ChatRequest) -> JSONResponse:
2753
2874
  task.files_changed = sorted(changed)
2754
2875
  except Exception:
2755
2876
  pass
2877
+ # Untrack the child PID now that the chat process is done
2878
+ if proc is not None:
2879
+ _untrack_child_pid(proc.pid)
2756
2880
  task.complete = True
2757
2881
 
2758
2882
  asyncio.create_task(run_chat())
@@ -2842,8 +2966,8 @@ async def cancel_chat(session_id: str, task_id: str) -> JSONResponse:
2842
2966
  try:
2843
2967
  pgid = os.getpgid(task.process.pid)
2844
2968
  os.killpg(pgid, signal.SIGTERM)
2845
- task.process.wait(timeout=3)
2846
- except (ProcessLookupError, OSError):
2969
+ await asyncio.to_thread(task.process.wait, timeout=3)
2970
+ except (ProcessLookupError, OSError, subprocess.TimeoutExpired):
2847
2971
  pass
2848
2972
  if task.process.poll() is None:
2849
2973
  try:
@@ -2851,7 +2975,10 @@ async def cancel_chat(session_id: str, task_id: str) -> JSONResponse:
2851
2975
  os.killpg(pgid, signal.SIGKILL)
2852
2976
  except (ProcessLookupError, OSError):
2853
2977
  task.process.kill()
2854
- task.process.wait(timeout=5)
2978
+ try:
2979
+ await asyncio.to_thread(task.process.wait, timeout=5)
2980
+ except subprocess.TimeoutExpired:
2981
+ pass
2855
2982
  task.output_lines.append("[cancelled by user]")
2856
2983
  task.returncode = 1
2857
2984
  task.complete = True
@@ -2896,6 +3023,8 @@ async def fix_session(session_id: str) -> JSONResponse:
2896
3023
  return
2897
3024
  proc: Optional[subprocess.Popen] = None
2898
3025
  try:
3026
+ fix_env = {**os.environ}
3027
+ fix_env.update(_load_secrets())
2899
3028
  proc = subprocess.Popen(
2900
3029
  [loki, "quick", fix_message],
2901
3030
  stdout=subprocess.PIPE,
@@ -2903,7 +3032,7 @@ async def fix_session(session_id: str) -> JSONResponse:
2903
3032
  stdin=subprocess.DEVNULL,
2904
3033
  text=True,
2905
3034
  cwd=str(target),
2906
- env={**os.environ},
3035
+ env=fix_env,
2907
3036
  start_new_session=True,
2908
3037
  )
2909
3038
  task.process = proc
@@ -3513,8 +3642,11 @@ async def proxy_websocket(websocket: WebSocket, session_id: str, path: str):
3513
3642
  async def client_to_upstream():
3514
3643
  try:
3515
3644
  while True:
3516
- data = await websocket.receive_text()
3517
- await upstream.send(data)
3645
+ msg = await websocket.receive()
3646
+ if msg.get("text") is not None:
3647
+ await upstream.send(msg["text"])
3648
+ elif msg.get("bytes") is not None:
3649
+ await upstream.send(msg["bytes"])
3518
3650
  except (WebSocketDisconnect, Exception):
3519
3651
  pass
3520
3652
 
@@ -3551,8 +3683,10 @@ async def proxy_websocket(websocket: WebSocket, session_id: str, path: str):
3551
3683
  async def auth_middleware(request: Request, call_next):
3552
3684
  """Enforce JWT auth when database is configured. Skip for public paths."""
3553
3685
  path = request.url.path
3554
- skip_auth_prefixes = ["/health", "/api/auth/", "/ws", "/proxy/"]
3555
- if any(path.startswith(p) for p in skip_auth_prefixes) or not path.startswith("/api/"):
3686
+ skip_auth_prefixes = ["/health", "/api/auth/"]
3687
+ if any(path.startswith(p) for p in skip_auth_prefixes) or not (
3688
+ path.startswith("/api/") or path.startswith("/ws") or path.startswith("/proxy/")
3689
+ ):
3556
3690
  return await call_next(request)
3557
3691
 
3558
3692
  # If no DB configured, skip auth (local mode)
@@ -3777,7 +3911,9 @@ async def _push_state_to_client(ws: WebSocket) -> None:
3777
3911
  Sends only incremental log deltas (new lines since last push) instead
3778
3912
  of the full log buffer each time.
3779
3913
  """
3780
- last_log_index = max(len(session.log_lines) - 100, 0) # backfill handled on connect
3914
+ # Track absolute log offset to handle truncation correctly.
3915
+ # The buffer holds only the last N lines, but log_lines_total counts all.
3916
+ last_abs_index = max(session.log_lines_total - 100, 0) # backfill handled on connect
3781
3917
  while True:
3782
3918
  is_running = (
3783
3919
  session.process is not None
@@ -3787,30 +3923,49 @@ async def _push_state_to_client(ws: WebSocket) -> None:
3787
3923
  interval = 2.0 if is_running else 30.0
3788
3924
 
3789
3925
  # Build status payload (same logic as GET /api/session/status)
3790
- loki_dir = _loki_dir()
3791
- phase = "idle"
3792
- iteration = 0
3793
- complexity = "standard"
3794
- current_task = ""
3795
- pending_tasks = 0
3796
-
3797
- state_file = loki_dir / "state" / "session.json"
3798
- if state_file.exists():
3799
- try:
3800
- with open(state_file) as f:
3801
- state_data = json.load(f)
3802
- phase = state_data.get("phase", phase)
3803
- iteration = state_data.get("iteration", iteration)
3804
- complexity = state_data.get("complexity", complexity)
3805
- current_task = state_data.get("current_task", current_task)
3806
- pending_tasks = state_data.get("pending_tasks", pending_tasks)
3807
- except (json.JSONDecodeError, OSError):
3808
- pass
3926
+ # Use asyncio.to_thread to avoid blocking the event loop on file I/O
3927
+ def _read_state_files():
3928
+ loki_dir = _loki_dir()
3929
+ _phase = "idle"
3930
+ _iteration = 0
3931
+ _complexity = "standard"
3932
+ _current_task = ""
3933
+ _pending_tasks = 0
3934
+ _agents = []
3935
+
3936
+ state_file = loki_dir / "state" / "session.json"
3937
+ if state_file.exists():
3938
+ try:
3939
+ with open(state_file) as f:
3940
+ state_data = json.load(f)
3941
+ _phase = state_data.get("phase", _phase)
3942
+ _iteration = state_data.get("iteration", _iteration)
3943
+ _complexity = state_data.get("complexity", _complexity)
3944
+ _current_task = state_data.get("current_task", _current_task)
3945
+ _pending_tasks = state_data.get("pending_tasks", _pending_tasks)
3946
+ except (json.JSONDecodeError, OSError):
3947
+ pass
3948
+
3949
+ agents_file = loki_dir / "state" / "agents.json"
3950
+ if agents_file.exists():
3951
+ try:
3952
+ with open(agents_file) as f:
3953
+ agents_data = json.load(f)
3954
+ if isinstance(agents_data, list):
3955
+ _agents = agents_data
3956
+ except (json.JSONDecodeError, OSError):
3957
+ pass
3958
+
3959
+ return _phase, _iteration, _complexity, _current_task, _pending_tasks, _agents
3960
+
3961
+ phase, iteration, complexity, current_task, pending_tasks, agents_payload = (
3962
+ await asyncio.to_thread(_read_state_files)
3963
+ )
3809
3964
 
3810
3965
  uptime = time.time() - session.start_time if is_running else 0
3811
3966
  status_payload = {
3812
3967
  "running": session.running,
3813
- "paused": False,
3968
+ "paused": session.paused,
3814
3969
  "phase": phase,
3815
3970
  "iteration": iteration,
3816
3971
  "complexity": complexity,
@@ -3825,22 +3980,15 @@ async def _push_state_to_client(ws: WebSocket) -> None:
3825
3980
  "projectDir": session.project_dir,
3826
3981
  }
3827
3982
 
3828
- # Build agents payload
3829
- agents_payload: list = []
3830
- agents_file = loki_dir / "state" / "agents.json"
3831
- if agents_file.exists():
3832
- try:
3833
- with open(agents_file) as f:
3834
- agents_data = json.load(f)
3835
- if isinstance(agents_data, list):
3836
- agents_payload = agents_data
3837
- except (json.JSONDecodeError, OSError):
3838
- pass
3839
-
3840
- # Build incremental logs payload (only new lines since last push)
3841
- current_len = len(session.log_lines)
3842
- new_lines = session.log_lines[last_log_index:current_len] if current_len > last_log_index else []
3843
- last_log_index = current_len
3983
+ # Build incremental logs payload using absolute offset to handle truncation
3984
+ total_now = session.log_lines_total
3985
+ buf_len = len(session.log_lines)
3986
+ buf_start = total_now - buf_len # absolute index of first item in buffer
3987
+ if last_abs_index < buf_start:
3988
+ last_abs_index = buf_start # skip lines that were truncated away
3989
+ relative_start = last_abs_index - buf_start
3990
+ new_lines = session.log_lines[relative_start:] if relative_start < buf_len else []
3991
+ last_abs_index = total_now
3844
3992
  logs_payload = []
3845
3993
  for line in new_lines:
3846
3994
  level = "info"
@@ -3878,6 +4026,10 @@ async def _push_state_to_client(ws: WebSocket) -> None:
3878
4026
  async def websocket_endpoint(ws: WebSocket) -> None:
3879
4027
  """Real-time stream of loki output and events."""
3880
4028
  await ws.accept()
4029
+ if len(session.ws_clients) >= MAX_WS_CLIENTS:
4030
+ await ws.send_text(json.dumps({"type": "error", "data": {"message": "Too many connections"}}))
4031
+ await ws.close(code=1013, reason="Too many connections")
4032
+ return
3881
4033
  session.ws_clients.add(ws)
3882
4034
 
3883
4035
  # Send current state on connect
@@ -3905,13 +4057,12 @@ async def websocket_endpoint(ws: WebSocket) -> None:
3905
4057
  while True:
3906
4058
  try:
3907
4059
  data = await asyncio.wait_for(ws.receive_text(), timeout=60.0)
3908
- missed_pongs = 0 # any message resets idle counter
3909
4060
  try:
3910
4061
  msg = json.loads(data)
3911
4062
  if msg.get("type") == "ping":
3912
4063
  await ws.send_text(json.dumps({"type": "pong"}))
3913
4064
  elif msg.get("type") == "pong":
3914
- pass # client responded to our ping
4065
+ missed_pongs = 0 # only reset on pong-type messages
3915
4066
  except json.JSONDecodeError:
3916
4067
  pass
3917
4068
  except asyncio.TimeoutError:
@@ -3944,6 +4095,9 @@ async def websocket_endpoint(ws: WebSocket) -> None:
3944
4095
  # Track active WebSocket connections per session for multi-tab awareness
3945
4096
  _terminal_ws_clients: Dict[str, set] = {}
3946
4097
 
4098
+ # Track active PTY reader tasks per session to prevent duplicate readers
4099
+ _terminal_reader_tasks: Dict[str, asyncio.Task] = {}
4100
+
3947
4101
 
3948
4102
  @app.websocket("/ws/terminal/{session_id}")
3949
4103
  async def terminal_websocket(ws: WebSocket, session_id: str) -> None:
@@ -3954,6 +4108,10 @@ async def terminal_websocket(ws: WebSocket, session_id: str) -> None:
3954
4108
  WebSocket client for this session disconnects.
3955
4109
  """
3956
4110
  await ws.accept()
4111
+ if len(_terminal_ptys) >= MAX_TERMINAL_PTYS and session_id not in _terminal_ptys:
4112
+ await ws.send_text(json.dumps({"type": "error", "data": {"message": "Too many terminal sessions"}}))
4113
+ await ws.close(code=1013, reason="Too many terminal sessions")
4114
+ return
3957
4115
 
3958
4116
  if not HAS_PEXPECT:
3959
4117
  # Try to install pexpect automatically
@@ -3964,6 +4122,7 @@ async def terminal_websocket(ws: WebSocket, session_id: str) -> None:
3964
4122
  capture_output=True, timeout=30,
3965
4123
  )
3966
4124
  import pexpect as _pex # noqa: F811
4125
+ globals()["pexpect"] = _pex
3967
4126
  globals()["HAS_PEXPECT"] = True
3968
4127
  except Exception:
3969
4128
  await ws.send_text(json.dumps({
@@ -4039,6 +4198,8 @@ async def terminal_websocket(ws: WebSocket, session_id: str) -> None:
4039
4198
  pass
4040
4199
 
4041
4200
  # ---- Background task: read PTY output and forward to WebSocket ----------
4201
+ # Only create one reader per PTY to avoid race conditions when multiple
4202
+ # tabs connect to the same terminal session.
4042
4203
  async def read_pty_output() -> None:
4043
4204
  loop = asyncio.get_event_loop()
4044
4205
  while True:
@@ -4064,7 +4225,13 @@ async def terminal_websocket(ws: WebSocket, session_id: str) -> None:
4064
4225
  except Exception:
4065
4226
  break
4066
4227
 
4067
- reader_task = asyncio.create_task(read_pty_output())
4228
+ existing_reader = _terminal_reader_tasks.get(session_id)
4229
+ if existing_reader is not None and not existing_reader.done():
4230
+ # A reader already exists for this PTY -- reuse it, don't create another
4231
+ reader_task = None
4232
+ else:
4233
+ reader_task = asyncio.create_task(read_pty_output())
4234
+ _terminal_reader_tasks[session_id] = reader_task
4068
4235
 
4069
4236
  try:
4070
4237
  while True:
@@ -4090,13 +4257,15 @@ async def terminal_websocket(ws: WebSocket, session_id: str) -> None:
4090
4257
  except WebSocketDisconnect:
4091
4258
  pass
4092
4259
  except Exception:
4093
- pass
4260
+ logger.error("Terminal WebSocket error for session %s", session_id, exc_info=True)
4094
4261
  finally:
4095
- reader_task.cancel()
4096
- try:
4097
- await reader_task
4098
- except (asyncio.CancelledError, Exception):
4099
- pass
4262
+ if reader_task is not None:
4263
+ reader_task.cancel()
4264
+ try:
4265
+ await reader_task
4266
+ except (asyncio.CancelledError, Exception):
4267
+ pass
4268
+ _terminal_reader_tasks.pop(session_id, None)
4100
4269
 
4101
4270
  # Untrack this client
4102
4271
  clients = _terminal_ws_clients.get(session_id)