loki-mode 7.62.0 → 7.63.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,11 +8,13 @@ incremental codebase migrations with checkpoint/rollback support.
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ import contextlib
11
12
  import dataclasses
12
13
  import json
13
14
  import logging
14
15
  import os
15
16
  import re
17
+ import secrets
16
18
  import subprocess
17
19
  import tempfile
18
20
  import threading
@@ -21,6 +23,13 @@ from datetime import datetime, timezone
21
23
  from pathlib import Path
22
24
  from typing import Any, Optional
23
25
 
26
+ try:
27
+ import fcntl # POSIX only (macOS + Linux). Absent on Windows.
28
+ _HAS_FCNTL = True
29
+ except ImportError: # pragma: no cover - non-POSIX fallback
30
+ fcntl = None # type: ignore[assignment]
31
+ _HAS_FCNTL = False
32
+
24
33
  logger = logging.getLogger("loki-migration")
25
34
 
26
35
  LOKI_DATA_DIR = os.environ.get("LOKI_DATA_DIR", os.path.expanduser("~/.loki"))
@@ -172,6 +181,45 @@ def _timestamp_iso() -> str:
172
181
  return datetime.now(timezone.utc).isoformat()
173
182
 
174
183
 
184
+ @contextlib.contextmanager
185
+ def _manifest_file_lock(migration_dir: Path):
186
+ """Cross-process advisory lock around a migration's manifest read-modify-write.
187
+
188
+ Uses an OS file lock (fcntl.flock LOCK_EX) on a dedicated lockfile inside
189
+ the migration directory. A fresh file descriptor is opened on every
190
+ acquisition: flock keys on the open file description, so distinct fds let
191
+ the kernel serialize even threads in the same process (the FastAPI sync
192
+ threadpool deployment) AND separate processes (the server vs. the
193
+ `loki migrate` CLI), which a per-instance threading.Lock cannot do.
194
+
195
+ Only the OUTERMOST read-modify-write entry point should take this lock.
196
+ Nesting two flock-wrapped calls in the same thread would self-deadlock
197
+ (two fds, the second LOCK_EX blocks on the first), so locked writers must
198
+ call the _unlocked internals and never re-enter a flock-wrapped method.
199
+
200
+ On non-POSIX platforms (no fcntl) this degrades to a no-op: the caller's
201
+ in-process threading.Lock still serializes threads in the same process,
202
+ but cross-process exclusion is NOT available. This is an accepted residual
203
+ on Windows only.
204
+ """
205
+ migration_dir.mkdir(parents=True, exist_ok=True)
206
+ lock_path = migration_dir / "manifest.json.lock"
207
+ if not _HAS_FCNTL:
208
+ # Graceful degrade: no OS lock available. In-process callers still rely
209
+ # on their threading.Lock; cross-process safety is unavailable here.
210
+ yield
211
+ return
212
+ fd = os.open(str(lock_path), os.O_RDWR | os.O_CREAT, 0o644)
213
+ try:
214
+ fcntl.flock(fd, fcntl.LOCK_EX)
215
+ try:
216
+ yield
217
+ finally:
218
+ fcntl.flock(fd, fcntl.LOCK_UN)
219
+ finally:
220
+ os.close(fd)
221
+
222
+
175
223
  # ---------------------------------------------------------------------------
176
224
  # MigrationPipeline
177
225
  # ---------------------------------------------------------------------------
@@ -181,7 +229,17 @@ class MigrationPipeline:
181
229
  """Manages the lifecycle of a codebase migration.
182
230
 
183
231
  All state is persisted under ~/.loki/migrations/<migration_id>/.
184
- Thread-safe for concurrent manifest reads and writes.
232
+
233
+ Concurrency: manifest read-modify-write operations (start_phase,
234
+ advance_phase, save_manifest, update_progress, create_checkpoint) are
235
+ serialized across BOTH threads and processes by an OS file lock
236
+ (see _manifest_file_lock) keyed on the migration directory. This holds
237
+ for the FastAPI sync-endpoint threadpool (each request builds a fresh
238
+ pipeline via load(), so per-instance threading.Lock alone would not
239
+ serialize them) and for the separate `loki migrate` CLI process running
240
+ concurrently with the server. Pure reads use the per-instance lock only.
241
+ On non-POSIX platforms (no fcntl) the file lock degrades to a no-op and
242
+ only in-process serialization remains (see _manifest_file_lock).
185
243
  """
186
244
 
187
245
  def __init__(
@@ -207,7 +265,18 @@ class MigrationPipeline:
207
265
  (self.migration_dir / "checkpoints").mkdir(exist_ok=True)
208
266
 
209
267
  def _generate_migration_id(self) -> str:
210
- """Generate a unique migration ID like mig_20260223_143052_<dirname>."""
268
+ """Generate a unique migration ID like mig_20260223_143052_<dirname>-a1b2c3.
269
+
270
+ The trailing 6-hex-char random suffix prevents collisions when two
271
+ migrations of the same path-basename start in the same second (the
272
+ date_str/time_str are second-resolution). Without it, the two would
273
+ derive the same id and the second create_manifest would overwrite the
274
+ first (the server rate-limiter throttles same-second server starts, but
275
+ the CLI bypasses it). The suffix is appended WITHIN the trailing name
276
+ segment (hyphen-joined), so the load() validation regex
277
+ ^mig_\\d{8}_\\d{6}_[a-zA-Z0-9_-]+$ still matches without modification
278
+ (its trailing group already permits letters, digits, hyphens).
279
+ """
211
280
  dirname = os.path.basename(self.codebase_path)
212
281
  # Sanitize dirname to match validation regex
213
282
  safe_dirname = re.sub(r'[^a-zA-Z0-9_-]', '_', dirname)
@@ -216,7 +285,8 @@ class MigrationPipeline:
216
285
  now = datetime.now(timezone.utc)
217
286
  date_str = now.strftime("%Y%m%d")
218
287
  time_str = now.strftime("%H%M%S")
219
- return f"mig_{date_str}_{time_str}_{safe_dirname}"
288
+ suffix = secrets.token_hex(3) # 6 hex chars, within [a-zA-Z0-9_-]+
289
+ return f"mig_{date_str}_{time_str}_{safe_dirname}-{suffix}"
220
290
 
221
291
  @classmethod
222
292
  def load(cls, migration_id: str) -> 'MigrationPipeline':
@@ -303,9 +373,15 @@ class MigrationPipeline:
303
373
  return self._load_manifest_unlocked()
304
374
 
305
375
  def save_manifest(self, manifest: MigrationManifest) -> None:
306
- """Persist manifest to disk atomically."""
307
- with self._lock:
308
- self._save_manifest_unlocked(manifest)
376
+ """Persist manifest to disk atomically.
377
+
378
+ Outermost RMW writer: takes the cross-process file lock so a concurrent
379
+ save in another thread/process cannot interleave. (create_manifest calls
380
+ through here, so it is covered without taking the lock itself.)
381
+ """
382
+ with _manifest_file_lock(self.migration_dir):
383
+ with self._lock:
384
+ self._save_manifest_unlocked(manifest)
309
385
 
310
386
  # -- Phase gate logic ----------------------------------------------------
311
387
 
@@ -325,17 +401,21 @@ class MigrationPipeline:
325
401
  """
326
402
  if phase not in PHASE_ORDER:
327
403
  raise ValueError(f"Unknown phase: {phase}")
328
- with self._lock:
329
- manifest = self._load_manifest_unlocked()
330
- if phase not in manifest.phases:
331
- manifest.phases[phase] = {"status": "pending", "started_at": "", "completed_at": ""}
332
- current_status = manifest.phases[phase].get("status", "pending")
333
- if current_status == "in_progress":
334
- return # Already started, idempotent
335
- manifest.phases[phase]["status"] = "in_progress"
336
- manifest.phases[phase]["started_at"] = datetime.now(timezone.utc).isoformat()
337
- manifest.phases[phase]["completed_at"] = ""
338
- self._save_manifest_unlocked(manifest)
404
+ # Outermost RMW: file lock serializes across processes/threads; the
405
+ # inner threading.Lock guards instance-local state. Uses _unlocked
406
+ # internals only, so no flock-wrapped method is re-entered (no deadlock).
407
+ with _manifest_file_lock(self.migration_dir):
408
+ with self._lock:
409
+ manifest = self._load_manifest_unlocked()
410
+ if phase not in manifest.phases:
411
+ manifest.phases[phase] = {"status": "pending", "started_at": "", "completed_at": ""}
412
+ current_status = manifest.phases[phase].get("status", "pending")
413
+ if current_status == "in_progress":
414
+ return # Already started, idempotent
415
+ manifest.phases[phase]["status"] = "in_progress"
416
+ manifest.phases[phase]["started_at"] = datetime.now(timezone.utc).isoformat()
417
+ manifest.phases[phase]["completed_at"] = ""
418
+ self._save_manifest_unlocked(manifest)
339
419
 
340
420
  def _check_phase_gate_unlocked(self, from_phase: str, to_phase: str) -> tuple[bool, str]:
341
421
  """Validate phase transition (caller must hold self._lock or ensure safety).
@@ -434,37 +514,42 @@ class MigrationPipeline:
434
514
  phase_idx = PHASE_ORDER.index(phase)
435
515
  next_phase = PHASE_ORDER[phase_idx + 1] if phase_idx + 1 < len(PHASE_ORDER) else None
436
516
 
437
- with self._lock:
438
- # Enforce phase gate if there is a next phase (inside lock for consistency)
439
- if next_phase is not None:
440
- allowed, reason = self._check_phase_gate_unlocked(phase, next_phase)
441
- if not allowed:
442
- raise RuntimeError(f"Phase gate failed: {reason}")
443
-
444
- manifest = self._load_manifest_unlocked()
445
- now = _timestamp_iso()
446
-
447
- # Verify current phase is in_progress before advancing
448
- if phase in manifest.phases:
449
- current_status = manifest.phases[phase].get("status", "pending")
450
- if current_status != "in_progress":
451
- raise RuntimeError(
452
- f"Cannot advance phase '{phase}': status is '{current_status}', expected 'in_progress'"
453
- )
454
-
455
- # Mark current phase completed
456
- if phase in manifest.phases:
457
- manifest.phases[phase]["status"] = "completed"
458
- manifest.phases[phase]["completed_at"] = now
517
+ # Outermost RMW: file lock makes the gate-check + status-check +
518
+ # write a single critical section across processes/threads, so two
519
+ # concurrent advances cannot both read in_progress and both write.
520
+ # Calls _unlocked internals only (no flock-wrapped re-entry).
521
+ with _manifest_file_lock(self.migration_dir):
522
+ with self._lock:
523
+ # Enforce phase gate if there is a next phase (inside lock for consistency)
524
+ if next_phase is not None:
525
+ allowed, reason = self._check_phase_gate_unlocked(phase, next_phase)
526
+ if not allowed:
527
+ raise RuntimeError(f"Phase gate failed: {reason}")
459
528
 
460
- # Start next phase if there is one
461
- if next_phase is not None:
462
- if next_phase not in manifest.phases:
463
- manifest.phases[next_phase] = {"status": "pending", "started_at": "", "completed_at": ""}
464
- manifest.phases[next_phase]["status"] = "in_progress"
465
- manifest.phases[next_phase]["started_at"] = now
529
+ manifest = self._load_manifest_unlocked()
530
+ now = _timestamp_iso()
531
+
532
+ # Verify current phase is in_progress before advancing
533
+ if phase in manifest.phases:
534
+ current_status = manifest.phases[phase].get("status", "pending")
535
+ if current_status != "in_progress":
536
+ raise RuntimeError(
537
+ f"Cannot advance phase '{phase}': status is '{current_status}', expected 'in_progress'"
538
+ )
539
+
540
+ # Mark current phase completed
541
+ if phase in manifest.phases:
542
+ manifest.phases[phase]["status"] = "completed"
543
+ manifest.phases[phase]["completed_at"] = now
544
+
545
+ # Start next phase if there is one
546
+ if next_phase is not None:
547
+ if next_phase not in manifest.phases:
548
+ manifest.phases[next_phase] = {"status": "pending", "started_at": "", "completed_at": ""}
549
+ manifest.phases[next_phase]["status"] = "in_progress"
550
+ manifest.phases[next_phase]["started_at"] = now
466
551
 
467
- self._save_manifest_unlocked(manifest)
552
+ self._save_manifest_unlocked(manifest)
468
553
 
469
554
  result = PhaseResult(
470
555
  phase=phase,
@@ -598,12 +683,15 @@ class MigrationPipeline:
598
683
  logger.error("Failed to create checkpoint tag %s: %s", tag_name, exc.stderr)
599
684
  raise RuntimeError(f"Git tag creation failed: {exc.stderr}") from exc
600
685
 
601
- # Record in manifest (hold lock for entire read-modify-write)
686
+ # Record in manifest (hold file lock + instance lock for the entire
687
+ # read-modify-write so a concurrent advance/start_phase/checkpoint in
688
+ # another process or thread cannot clobber the appended checkpoint).
602
689
  try:
603
- with self._lock:
604
- manifest = self._load_manifest_unlocked()
605
- manifest.checkpoints.append(tag_name)
606
- self._save_manifest_unlocked(manifest)
690
+ with _manifest_file_lock(self.migration_dir):
691
+ with self._lock:
692
+ manifest = self._load_manifest_unlocked()
693
+ manifest.checkpoints.append(tag_name)
694
+ self._save_manifest_unlocked(manifest)
607
695
  except Exception:
608
696
  # Bug 9: rollback git tag if manifest save fails
609
697
  logger.error("Manifest save failed after git tag creation; deleting tag %s", tag_name)
@@ -855,47 +943,52 @@ class MigrationPipeline:
855
943
  Each entry records what happened so the next agent can orient quickly.
856
944
  """
857
945
  progress_path = Path(self.migration_dir) / "progress.md"
858
- manifest = self.load_manifest()
859
-
860
- # Determine current phase
861
- current_phase = "pending"
862
- for phase in PHASE_ORDER:
863
- status = manifest.phases.get(phase, {}).get("status", "pending")
864
- if status == "in_progress":
865
- current_phase = phase
866
- break
867
- elif status == "completed":
868
- current_phase = phase
946
+ # Outermost RMW on progress.md: serialize the read-append-write across
947
+ # processes/threads so concurrent agent sessions cannot lose each
948
+ # other's entries. load_manifest() inside takes only self._lock (a read,
949
+ # no file lock), so this does not re-enter the file lock (no deadlock).
950
+ with _manifest_file_lock(self.migration_dir):
951
+ manifest = self.load_manifest()
952
+
953
+ # Determine current phase
954
+ current_phase = "pending"
955
+ for phase in PHASE_ORDER:
956
+ status = manifest.phases.get(phase, {}).get("status", "pending")
957
+ if status == "in_progress":
958
+ current_phase = phase
959
+ break
960
+ elif status == "completed":
961
+ current_phase = phase
869
962
 
870
- entry = f"""
963
+ entry = f"""
871
964
  ## Session: {_timestamp_iso()}
872
965
  Agent: {agent_id}
873
966
  Phase: {current_phase}
874
967
  Summary: {summary}
875
968
  """
876
- if details:
877
- if details.get("steps_completed"):
878
- entry += f"Steps completed: {details['steps_completed']}\n"
879
- if details.get("tests_passing"):
880
- entry += f"Tests: {details['tests_passing']}\n"
881
- if details.get("notes"):
882
- entry += f"Notes: {details['notes']}\n"
883
-
884
- if progress_path.exists():
885
- existing = progress_path.read_text(encoding="utf-8")
886
- # Keep last 50 entries max, compact older ones
887
- entries = existing.split("\n## Session:")
888
- if len(entries) > 50:
889
- header = entries[0]
890
- recent = entries[-50:]
891
- content = header + "\n## Session:" + "\n## Session:".join(recent)
969
+ if details:
970
+ if details.get("steps_completed"):
971
+ entry += f"Steps completed: {details['steps_completed']}\n"
972
+ if details.get("tests_passing"):
973
+ entry += f"Tests: {details['tests_passing']}\n"
974
+ if details.get("notes"):
975
+ entry += f"Notes: {details['notes']}\n"
976
+
977
+ if progress_path.exists():
978
+ existing = progress_path.read_text(encoding="utf-8")
979
+ # Keep last 50 entries max, compact older ones
980
+ entries = existing.split("\n## Session:")
981
+ if len(entries) > 50:
982
+ header = entries[0]
983
+ recent = entries[-50:]
984
+ content = header + "\n## Session:" + "\n## Session:".join(recent)
985
+ else:
986
+ content = existing
987
+ content += entry
892
988
  else:
893
- content = existing
894
- content += entry
895
- else:
896
- content = f"# Migration Progress\n# Auto-updated after every agent session\n{entry}"
989
+ content = f"# Migration Progress\n# Auto-updated after every agent session\n{entry}"
897
990
 
898
- _atomic_write(progress_path, content)
991
+ _atomic_write(progress_path, content)
899
992
  logger.info("Updated progress.md for agent %s", agent_id)
900
993
 
901
994
  # -- Plan summary --------------------------------------------------------
@@ -8776,6 +8776,12 @@ def advance_migration(migration_id: str, request_body: dict):
8776
8776
  try:
8777
8777
  result = pipeline.advance_phase(from_phase)
8778
8778
  return asdict(result) if hasattr(result, '__dataclass_fields__') else result
8779
+ except (ValueError, RuntimeError) as exc:
8780
+ # advance_phase raises RuntimeError on a failed phase gate or when the
8781
+ # phase is not in_progress (e.g. already advanced). These are client
8782
+ # contract errors, not server faults: map to 409 like the sibling
8783
+ # start_migration_phase endpoint does.
8784
+ raise HTTPException(status_code=409, detail=str(exc))
8779
8785
  except Exception as exc:
8780
8786
  logger.error("Migration advance error: %s", exc)
8781
8787
  raise HTTPException(status_code=500, detail="Failed to advance migration")
@@ -0,0 +1,144 @@
1
+ # FEAT-PRDREUSE-DOCKER-PLAN
2
+
3
+ Implementation plan for the PRD-reuse + Docker feature batch. Anchored to verified file:line.
4
+ Designed for 4-5 parallel dev agents with zero file overlap. Founder scope locks:
5
+ - Docker dashboard: publish on a HOST PORT and AUTO-OPEN (like local loki start); show BOTH local and docker runs.
6
+ - Image cleanup: after pull, prune ONLY dangling/old asklokesh/loki-mode images NOT in use by a running container.
7
+ - loki stop: ALSO stops/removes the loki-mode docker container for this project (tracked via .loki docker state).
8
+
9
+ ## Context: wave-4 uncommitted edits (build on top, do NOT revert)
10
+ Uncommitted W4 work in: autonomy/run.sh, autonomy/sandbox.sh, autonomy/prd-checklist.sh,
11
+ autonomy/spec-interrogation.sh, dashboard/migration_engine.py, dashboard/server.py,
12
+ loki-ts/src/council/voter_agents.ts, loki-ts/src/runner/build_prompt.ts, loki-ts/src/runner/council.ts.
13
+ Relevant: build_prompt.ts buildPromptForRunner now passes ctx.prdPath (a PATH). PRD-reuse Bun work depends
14
+ on this and must NOT touch build_prompt.ts.
15
+
16
+ ## Existing scaffolding (EXTEND, not duplicate)
17
+ Bash route already implements generated-PRD reuse for the no-file case:
18
+ - .loki/generated-prd.md is the canonical generated-PRD path, byte-locked for parity (run.sh ANALYSIS_INSTRUCTION,
19
+ build_prompt.ts:208, resume lines).
20
+ - decide_generated_prd_action() (run.sh:4892) returns reuse | update | generate | user_owned.
21
+ - persist_prd_signature_if_present() (run.sh:4983) writes .loki/state/prd-signature.json.
22
+ - run_autonomous() (run.sh:~13824) auto-detect block handles the empty prd_path case only.
23
+ - Bun route: runAutonomous (autonomous.ts:230) -> makeContext (autonomous.ts:641) sets ctx.prdPath = opts.prdPath. No reuse/persist.
24
+
25
+ Docker route already has host-aggregating dashboard:
26
+ - cmd_docker (autonomy/loki:~28748) -> docker-run.sh helpers.
27
+ - _loki_docker_register_host running/stopped brackets the blocking run; registers $(pwd) in ~/.loki/dashboard/projects.json.
28
+ - Dashboard /api/projects (server.py:~2623) aggregates local + docker; pid=None reads bind-mounted .loki/session.json.
29
+ - Dashboard Stop for docker works via STOP file (server.py:~2970).
30
+ - cmd_dashboard_start (autonomy/loki:~4038) + cmd_dashboard_open (~3982) are standalone host dashboard entries.
31
+
32
+ ## Design decision locks
33
+ LOCK 1: Canonical PRD path = .loki/generated-prd.md (do NOT invent .loki/prd/current.md). Persist user content INTO it;
34
+ record provenance in .loki/state/prd-signature.json via a new `source` field.
35
+ LOCK 2: User PRDs resolve to reuse/user_owned, NEVER update. Stamp source:"user" at persist; short-circuit
36
+ decide_generated_prd_action to user_owned when source=user (except --fresh-prd). Signature-diff `update` stays scoped to source=generated.
37
+ This answers "update only if needed": user PRD = always as-is; generated PRD = existing signature logic.
38
+ LOCK 3: DOCKER-DASH uses the HOST dashboard, not the published container port. Container mounts only workspace; an
39
+ in-container dashboard sees ONE project and cannot satisfy "shows BOTH." Host dashboard already aggregates both.
40
+ Local loki start runs the dashboard on host port 57374 + auto-opens (run.sh:~10081-10099), so host dashboard IS "like local loki start."
41
+ LOCK 4: Ownership deconflicted by FILE with an interface contract (matrix below).
42
+
43
+ ## FEATURE 1 - FEAT-PRD-REUSE
44
+ Semantics (both routes identical):
45
+ | Run | File arg? | Persisted PRD? | Behavior |
46
+ | 1st | yes | no | use file; persist to .loki/generated-prd.md, source=user |
47
+ | 1st | no | no | codebase-analysis generates .loki/generated-prd.md, source=generated (existing) |
48
+ | 2nd+ | no | yes | continue from persisted PRD (reuse; generated may update on drift, user never) |
49
+ | 2nd+ | yes | yes | new file overwrites .loki/generated-prd.md, source reset to user (brownfield) |
50
+
51
+ Bash (Agent C, autonomy/run.sh ONLY):
52
+ - New persistence branch for explicit user PRD: when prd_path non-empty and not already .loki/generated-prd.md:
53
+ mkdir -p .loki .loki/state; atomic copy prd_path -> .loki/generated-prd.md; write prd-signature.json with
54
+ source:"user", prd_sha (reuse _loki_prd_file_hash run.sh:4869), generated_at, signature (compute_codebase_signature),
55
+ origin_path; repoint prd_path=".loki/generated-prd.md"; export GENERATED_PRD_ACTION="user_owned".
56
+ - Extend decide_generated_prd_action (4892): read source from prd-signature.json. Precedence:
57
+ --fresh-prd/LOKI_PRD_REGEN -> generate > source=user -> user_owned > existing generated logic.
58
+
59
+ Bun (Agent D, loki-ts/src/runner/autonomous.ts + NEW loki-ts/src/runner/prd_reuse.ts; NOT build_prompt.ts):
60
+ - New helper resolvePrdForRun(opts) called at top of runAutonomous (autonomous.ts:233 before makeContext). Mirrors 1a/1b:
61
+ user file -> copy + persist source:user -> return generated path; empty + generated exists -> decideGeneratedPrdAction
62
+ TS port -> return generated path for reuse/update/user_owned, undefined for generate; empty + none -> undefined.
63
+ Set resolved path onto opts.prdPath before makeContext (autonomous.ts:655). No build_prompt.ts edit.
64
+ - Parity: TS prd-signature.json schema + decideGeneratedPrdAction must match bash exactly.
65
+
66
+ .loki state additions: .loki/generated-prd.md also holds persisted user PRD. prd-signature.json adds
67
+ source ("user"|"generated"), origin_path (when source=user).
68
+
69
+ AC: AC1 user file persists byte-equal + source:user. AC2 no-arg rerun reuses, no codebase analysis. AC3 new file
70
+ overwrites, source stays user, origin_path updates. AC4 no-arg first run still generates source:generated. AC5
71
+ --fresh-prd re-analyzes -> source:generated. AC6 bash/Bun identical source+action (parity test). AC7 no-arg rerun
72
+ after user PRD never enters GENERATED_PRD_UPDATE_MODE even if codebase changed.
73
+
74
+ ## FEATURE 2 - FEAT-DOCKER-DASH (host dashboard auto-open)
75
+ Architecture (LOCK 3): host dashboard, auto-opened.
76
+ Agent A (autonomy/loki, inside cmd_docker, start path): between _loki_docker_register_host running and the blocking run:
77
+ 1. Start/reuse host dashboard via cmd_dashboard_start (idempotent). Port: DASHBOARD_DEFAULT_PORT 57374 with fallback
78
+ (Agent B loki_docker_pick_host_port). 2. Auto-open gated like run.sh:~10088 ([ -t 1 ] && not background && LOKI_NO_AUTO_OPEN!=1)
79
+ via cmd_dashboard_open. 3. Container stays dashboard-OFF (docker-run.sh LOKI_DASHBOARD=false), no container port publish.
80
+ Agent B (autonomy/docker-run.sh): loki_docker_pick_host_port - probe 57374, increment to free port if bound, echo chosen port.
81
+ server.py (Agent E only IF a gap appears): /api/projects already aggregates docker, Stop already handles docker. Default: no change.
82
+
83
+ AC: AC8 loki docker start in TTY starts host dashboard + opens browser. AC9 dashboard lists docker run alongside local.
84
+ AC10 LOKI_NO_AUTO_OPEN=1/non-TTY/--bg no browser. AC11 second docker start reuses dashboard, both runs listed.
85
+
86
+ ## FEATURE 3 - FEAT-DOCKER-PRUNE (scoped image cleanup after pull)
87
+ No explicit docker pull today (run auto-pulls only if absent). PRUNE needs explicit pull.
88
+ Agent B (docker-run.sh helpers) + Agent A (call site in cmd_docker): loki_docker_pull_and_prune, called from cmd_docker before run (start path):
89
+ 1. docker pull $LOKI_DOCKER_IMAGE (default asklokesh/loki-mode:latest), capture image ID.
90
+ 2. In-use set: docker ps --format '{{.Image}} {{.ImageID}}'.
91
+ 3. Enumerate ONLY asklokesh/loki-mode: docker images --filter 'reference=asklokesh/loki-mode' --format '{{.ID}} ...'
92
+ + dangling: --filter 'reference=asklokesh/loki-mode' --filter 'dangling=true' -q.
93
+ 4. docker rmi each ID NOT the just-pulled :latest AND NOT in-use (best-effort).
94
+ 5. NEVER docker image prune -a. Scope strictly reference=asklokesh/loki-mode.
95
+ 6. Honest output: reclaimed count/bytes or "nothing to reclaim."
96
+ Gate: LOKI_DOCKER_PRUNE=${LOKI_DOCKER_PRUNE:-1} (default on, =0 opt-out; =0 also skips explicit pull).
97
+
98
+ AC: AC12 old asklokesh/loki-mode IDs removed, :latest remains. AC13 in-use image never removed. AC14 non-loki-mode
99
+ image never touched (decoy survives). AC15 LOKI_DOCKER_PRUNE=0 skips; honest output.
100
+
101
+ ## FEATURE 4 - FIX-DOCKER-STOP (loki stop reaps the container)
102
+ Repro: container loki-<sha12> Up but loki stop says "No active session." cmd_stop (autonomy/loki:~2242) only checks .loki.
103
+ Container name deterministic: loki-<sha12 of workspace> (docker-run.sh:~204-214).
104
+ Agent A (autonomy/loki write+read) + Agent B (docker-run.sh helpers):
105
+ Write: before blocking run write .loki/docker/run.json {container, image, project_dir, started_at}; clear after.
106
+ Helpers loki_docker_write_runstate / loki_docker_clear_runstate.
107
+ Read/reap (cmd_stop folder-scoped default, before "No active session"):
108
+ 1. Read .loki/docker/run.json -> container; fallback recompute loki-<sha12 of $(pwd)> (loki_docker_container_name).
109
+ 2. If docker ps -q -f name=^${container}$ non-empty -> docker stop then docker rm (best-effort; --rm may auto-remove).
110
+ 3. Remove run.json. 4. Report reap, no "No active session" when docker run Up.
111
+ 5. loki stop --all: also docker ps -q --filter ancestor=asklokesh/loki-mode -> stop/rm all (machine-wide, parity with --all PID).
112
+ Folder-scoped default stays folder-scoped. Preserves v7.7.30-34 stop-scoping.
113
+
114
+ .loki state additions: .loki/docker/run.json (NEW) {container, image, project_dir, started_at}.
115
+
116
+ AC: AC16 docker start then stop (same folder) stops+removes container, names it, no "No active session". AC17 run.json
117
+ deleted -> still reaps via recomputed name. AC18 stop in folder X does not stop docker run in folder Y. AC19 stop --all
118
+ stops every loki-mode container. AC20 no docker run + no local session -> existing "No active session" (no regression).
119
+
120
+ ## FILE-OWNERSHIP MATRIX (zero overlap)
121
+ - Agent A (Docker orchestration): autonomy/loki ONLY. cmd_docker (dashboard start/open F2, pull+prune F3, write/clear
122
+ run.json F4), cmd_stop (docker reconcile+reap F4). Calls Agent-B helpers by name; uses cmd_dashboard_start/open.
123
+ - Agent B (Docker helpers): autonomy/docker-run.sh ONLY. loki_docker_pick_host_port, loki_docker_pull_and_prune,
124
+ loki_docker_write_runstate, loki_docker_clear_runstate, loki_docker_container_name. No call-site edits in autonomy/loki.
125
+ - Agent C (PRD-reuse bash): autonomy/run.sh ONLY. User-PRD persistence in run_autonomous, extend decide_generated_prd_action.
126
+ - Agent D (PRD-reuse Bun): loki-ts/src/runner/autonomous.ts + NEW loki-ts/src/runner/prd_reuse.ts. MUST NOT edit build_prompt.ts.
127
+ - Agent E (Tests + server.py iff needed): tests/** new files, loki-ts/tests/** new files; server.py only if a real DASH gap appears.
128
+ Do not edit existing W4-touched test files.
129
+
130
+ A<->B: disjoint files, share function-boundary contract. C<->A: PRD-reuse-bash entirely in run_autonomous; cmd_start
131
+ already passes file arg to run.sh. D<->W4: D sets ctx.prdPath upstream; build_prompt.ts read-only for D.
132
+
133
+ Sequencing: B, C, D independent -> parallel immediately. A depends on B helper signatures (contract fixed up-front, A can
134
+ start against signatures). E writes tests against contracts in parallel, finalizes after A-D land.
135
+
136
+ ## RISKS
137
+ R1 (DASH architecture): host dashboard satisfies host-port + auto-open + shows-both; container-port publish breaks shows-both. Host chosen (LOCK 3).
138
+ R2 (host-port conflict): cmd_dashboard_start idempotent; loki_docker_pick_host_port fallback. Container-port publish was already disabled for 57374 collision.
139
+ R3 (PRD update-only-if-needed): LOCK 2 source field. source=user always reuse/user_owned; source=generated existing signature logic. Hand-edited persisted PRD -> still user_owned.
140
+ R4 (prune over-aggression): triple-scoped (reference filter + exclude :latest ID + exclude in-use). AC14 decoy survives. rmi best-effort.
141
+ R5 (pull latency): pull-always v1, gated by LOKI_DOCKER_PRUNE. OQ: pull-always vs only-on-digest-change; recommend pull-always v1.
142
+ R6 (--rm + stop): docker stop triggers auto-removal; docker rm best-effort (already-gone = success).
143
+ OQ1: run.json write BEFORE blocking run, clear AFTER. Deterministic-name fallback covers the window.
144
+ OQ2: confirm Bun runAutonomous reached with opts.prdPath set from same arg as bash; parity test guards decision logic regardless.