loki-mode 7.62.0 → 7.63.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +2 -2
- package/VERSION +1 -1
- package/autonomy/docker-run.sh +215 -13
- package/autonomy/loki +129 -4
- package/autonomy/prd-checklist.sh +33 -4
- package/autonomy/run.sh +112 -1
- package/autonomy/sandbox.sh +46 -7
- package/autonomy/spec-interrogation.sh +263 -26
- package/dashboard/__init__.py +1 -1
- package/dashboard/migration_engine.py +177 -84
- package/dashboard/server.py +6 -0
- package/docs/FEAT-PRDREUSE-DOCKER-PLAN.md +144 -0
- package/loki-ts/dist/loki.js +140 -139
- package/mcp/__init__.py +1 -1
- package/package.json +1 -1
- package/plugins/loki-mode/.claude-plugin/plugin.json +1 -1
|
@@ -8,11 +8,13 @@ incremental codebase migrations with checkpoint/rollback support.
|
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
|
+
import contextlib
|
|
11
12
|
import dataclasses
|
|
12
13
|
import json
|
|
13
14
|
import logging
|
|
14
15
|
import os
|
|
15
16
|
import re
|
|
17
|
+
import secrets
|
|
16
18
|
import subprocess
|
|
17
19
|
import tempfile
|
|
18
20
|
import threading
|
|
@@ -21,6 +23,13 @@ from datetime import datetime, timezone
|
|
|
21
23
|
from pathlib import Path
|
|
22
24
|
from typing import Any, Optional
|
|
23
25
|
|
|
26
|
+
try:
|
|
27
|
+
import fcntl # POSIX only (macOS + Linux). Absent on Windows.
|
|
28
|
+
_HAS_FCNTL = True
|
|
29
|
+
except ImportError: # pragma: no cover - non-POSIX fallback
|
|
30
|
+
fcntl = None # type: ignore[assignment]
|
|
31
|
+
_HAS_FCNTL = False
|
|
32
|
+
|
|
24
33
|
logger = logging.getLogger("loki-migration")
|
|
25
34
|
|
|
26
35
|
LOKI_DATA_DIR = os.environ.get("LOKI_DATA_DIR", os.path.expanduser("~/.loki"))
|
|
@@ -172,6 +181,45 @@ def _timestamp_iso() -> str:
|
|
|
172
181
|
return datetime.now(timezone.utc).isoformat()
|
|
173
182
|
|
|
174
183
|
|
|
184
|
+
@contextlib.contextmanager
|
|
185
|
+
def _manifest_file_lock(migration_dir: Path):
|
|
186
|
+
"""Cross-process advisory lock around a migration's manifest read-modify-write.
|
|
187
|
+
|
|
188
|
+
Uses an OS file lock (fcntl.flock LOCK_EX) on a dedicated lockfile inside
|
|
189
|
+
the migration directory. A fresh file descriptor is opened on every
|
|
190
|
+
acquisition: flock keys on the open file description, so distinct fds let
|
|
191
|
+
the kernel serialize even threads in the same process (the FastAPI sync
|
|
192
|
+
threadpool deployment) AND separate processes (the server vs. the
|
|
193
|
+
`loki migrate` CLI), which a per-instance threading.Lock cannot do.
|
|
194
|
+
|
|
195
|
+
Only the OUTERMOST read-modify-write entry point should take this lock.
|
|
196
|
+
Nesting two flock-wrapped calls in the same thread would self-deadlock
|
|
197
|
+
(two fds, the second LOCK_EX blocks on the first), so locked writers must
|
|
198
|
+
call the _unlocked internals and never re-enter a flock-wrapped method.
|
|
199
|
+
|
|
200
|
+
On non-POSIX platforms (no fcntl) this degrades to a no-op: the caller's
|
|
201
|
+
in-process threading.Lock still serializes threads in the same process,
|
|
202
|
+
but cross-process exclusion is NOT available. This is an accepted residual
|
|
203
|
+
on Windows only.
|
|
204
|
+
"""
|
|
205
|
+
migration_dir.mkdir(parents=True, exist_ok=True)
|
|
206
|
+
lock_path = migration_dir / "manifest.json.lock"
|
|
207
|
+
if not _HAS_FCNTL:
|
|
208
|
+
# Graceful degrade: no OS lock available. In-process callers still rely
|
|
209
|
+
# on their threading.Lock; cross-process safety is unavailable here.
|
|
210
|
+
yield
|
|
211
|
+
return
|
|
212
|
+
fd = os.open(str(lock_path), os.O_RDWR | os.O_CREAT, 0o644)
|
|
213
|
+
try:
|
|
214
|
+
fcntl.flock(fd, fcntl.LOCK_EX)
|
|
215
|
+
try:
|
|
216
|
+
yield
|
|
217
|
+
finally:
|
|
218
|
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
219
|
+
finally:
|
|
220
|
+
os.close(fd)
|
|
221
|
+
|
|
222
|
+
|
|
175
223
|
# ---------------------------------------------------------------------------
|
|
176
224
|
# MigrationPipeline
|
|
177
225
|
# ---------------------------------------------------------------------------
|
|
@@ -181,7 +229,17 @@ class MigrationPipeline:
|
|
|
181
229
|
"""Manages the lifecycle of a codebase migration.
|
|
182
230
|
|
|
183
231
|
All state is persisted under ~/.loki/migrations/<migration_id>/.
|
|
184
|
-
|
|
232
|
+
|
|
233
|
+
Concurrency: manifest read-modify-write operations (start_phase,
|
|
234
|
+
advance_phase, save_manifest, update_progress, create_checkpoint) are
|
|
235
|
+
serialized across BOTH threads and processes by an OS file lock
|
|
236
|
+
(see _manifest_file_lock) keyed on the migration directory. This holds
|
|
237
|
+
for the FastAPI sync-endpoint threadpool (each request builds a fresh
|
|
238
|
+
pipeline via load(), so per-instance threading.Lock alone would not
|
|
239
|
+
serialize them) and for the separate `loki migrate` CLI process running
|
|
240
|
+
concurrently with the server. Pure reads use the per-instance lock only.
|
|
241
|
+
On non-POSIX platforms (no fcntl) the file lock degrades to a no-op and
|
|
242
|
+
only in-process serialization remains (see _manifest_file_lock).
|
|
185
243
|
"""
|
|
186
244
|
|
|
187
245
|
def __init__(
|
|
@@ -207,7 +265,18 @@ class MigrationPipeline:
|
|
|
207
265
|
(self.migration_dir / "checkpoints").mkdir(exist_ok=True)
|
|
208
266
|
|
|
209
267
|
def _generate_migration_id(self) -> str:
|
|
210
|
-
"""Generate a unique migration ID like mig_20260223_143052_<dirname
|
|
268
|
+
"""Generate a unique migration ID like mig_20260223_143052_<dirname>-a1b2c3.
|
|
269
|
+
|
|
270
|
+
The trailing 6-hex-char random suffix prevents collisions when two
|
|
271
|
+
migrations of the same path-basename start in the same second (the
|
|
272
|
+
date_str/time_str are second-resolution). Without it, the two would
|
|
273
|
+
derive the same id and the second create_manifest would overwrite the
|
|
274
|
+
first (the server rate-limiter throttles same-second server starts, but
|
|
275
|
+
the CLI bypasses it). The suffix is appended WITHIN the trailing name
|
|
276
|
+
segment (hyphen-joined), so the load() validation regex
|
|
277
|
+
^mig_\\d{8}_\\d{6}_[a-zA-Z0-9_-]+$ still matches without modification
|
|
278
|
+
(its trailing group already permits letters, digits, hyphens).
|
|
279
|
+
"""
|
|
211
280
|
dirname = os.path.basename(self.codebase_path)
|
|
212
281
|
# Sanitize dirname to match validation regex
|
|
213
282
|
safe_dirname = re.sub(r'[^a-zA-Z0-9_-]', '_', dirname)
|
|
@@ -216,7 +285,8 @@ class MigrationPipeline:
|
|
|
216
285
|
now = datetime.now(timezone.utc)
|
|
217
286
|
date_str = now.strftime("%Y%m%d")
|
|
218
287
|
time_str = now.strftime("%H%M%S")
|
|
219
|
-
|
|
288
|
+
suffix = secrets.token_hex(3) # 6 hex chars, within [a-zA-Z0-9_-]+
|
|
289
|
+
return f"mig_{date_str}_{time_str}_{safe_dirname}-{suffix}"
|
|
220
290
|
|
|
221
291
|
@classmethod
|
|
222
292
|
def load(cls, migration_id: str) -> 'MigrationPipeline':
|
|
@@ -303,9 +373,15 @@ class MigrationPipeline:
|
|
|
303
373
|
return self._load_manifest_unlocked()
|
|
304
374
|
|
|
305
375
|
def save_manifest(self, manifest: MigrationManifest) -> None:
|
|
306
|
-
"""Persist manifest to disk atomically.
|
|
307
|
-
|
|
308
|
-
|
|
376
|
+
"""Persist manifest to disk atomically.
|
|
377
|
+
|
|
378
|
+
Outermost RMW writer: takes the cross-process file lock so a concurrent
|
|
379
|
+
save in another thread/process cannot interleave. (create_manifest calls
|
|
380
|
+
through here, so it is covered without taking the lock itself.)
|
|
381
|
+
"""
|
|
382
|
+
with _manifest_file_lock(self.migration_dir):
|
|
383
|
+
with self._lock:
|
|
384
|
+
self._save_manifest_unlocked(manifest)
|
|
309
385
|
|
|
310
386
|
# -- Phase gate logic ----------------------------------------------------
|
|
311
387
|
|
|
@@ -325,17 +401,21 @@ class MigrationPipeline:
|
|
|
325
401
|
"""
|
|
326
402
|
if phase not in PHASE_ORDER:
|
|
327
403
|
raise ValueError(f"Unknown phase: {phase}")
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
404
|
+
# Outermost RMW: file lock serializes across processes/threads; the
|
|
405
|
+
# inner threading.Lock guards instance-local state. Uses _unlocked
|
|
406
|
+
# internals only, so no flock-wrapped method is re-entered (no deadlock).
|
|
407
|
+
with _manifest_file_lock(self.migration_dir):
|
|
408
|
+
with self._lock:
|
|
409
|
+
manifest = self._load_manifest_unlocked()
|
|
410
|
+
if phase not in manifest.phases:
|
|
411
|
+
manifest.phases[phase] = {"status": "pending", "started_at": "", "completed_at": ""}
|
|
412
|
+
current_status = manifest.phases[phase].get("status", "pending")
|
|
413
|
+
if current_status == "in_progress":
|
|
414
|
+
return # Already started, idempotent
|
|
415
|
+
manifest.phases[phase]["status"] = "in_progress"
|
|
416
|
+
manifest.phases[phase]["started_at"] = datetime.now(timezone.utc).isoformat()
|
|
417
|
+
manifest.phases[phase]["completed_at"] = ""
|
|
418
|
+
self._save_manifest_unlocked(manifest)
|
|
339
419
|
|
|
340
420
|
def _check_phase_gate_unlocked(self, from_phase: str, to_phase: str) -> tuple[bool, str]:
|
|
341
421
|
"""Validate phase transition (caller must hold self._lock or ensure safety).
|
|
@@ -434,37 +514,42 @@ class MigrationPipeline:
|
|
|
434
514
|
phase_idx = PHASE_ORDER.index(phase)
|
|
435
515
|
next_phase = PHASE_ORDER[phase_idx + 1] if phase_idx + 1 < len(PHASE_ORDER) else None
|
|
436
516
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
if phase in manifest.phases:
|
|
449
|
-
current_status = manifest.phases[phase].get("status", "pending")
|
|
450
|
-
if current_status != "in_progress":
|
|
451
|
-
raise RuntimeError(
|
|
452
|
-
f"Cannot advance phase '{phase}': status is '{current_status}', expected 'in_progress'"
|
|
453
|
-
)
|
|
454
|
-
|
|
455
|
-
# Mark current phase completed
|
|
456
|
-
if phase in manifest.phases:
|
|
457
|
-
manifest.phases[phase]["status"] = "completed"
|
|
458
|
-
manifest.phases[phase]["completed_at"] = now
|
|
517
|
+
# Outermost RMW: file lock makes the gate-check + status-check +
|
|
518
|
+
# write a single critical section across processes/threads, so two
|
|
519
|
+
# concurrent advances cannot both read in_progress and both write.
|
|
520
|
+
# Calls _unlocked internals only (no flock-wrapped re-entry).
|
|
521
|
+
with _manifest_file_lock(self.migration_dir):
|
|
522
|
+
with self._lock:
|
|
523
|
+
# Enforce phase gate if there is a next phase (inside lock for consistency)
|
|
524
|
+
if next_phase is not None:
|
|
525
|
+
allowed, reason = self._check_phase_gate_unlocked(phase, next_phase)
|
|
526
|
+
if not allowed:
|
|
527
|
+
raise RuntimeError(f"Phase gate failed: {reason}")
|
|
459
528
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
manifest.phases
|
|
465
|
-
|
|
529
|
+
manifest = self._load_manifest_unlocked()
|
|
530
|
+
now = _timestamp_iso()
|
|
531
|
+
|
|
532
|
+
# Verify current phase is in_progress before advancing
|
|
533
|
+
if phase in manifest.phases:
|
|
534
|
+
current_status = manifest.phases[phase].get("status", "pending")
|
|
535
|
+
if current_status != "in_progress":
|
|
536
|
+
raise RuntimeError(
|
|
537
|
+
f"Cannot advance phase '{phase}': status is '{current_status}', expected 'in_progress'"
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
# Mark current phase completed
|
|
541
|
+
if phase in manifest.phases:
|
|
542
|
+
manifest.phases[phase]["status"] = "completed"
|
|
543
|
+
manifest.phases[phase]["completed_at"] = now
|
|
544
|
+
|
|
545
|
+
# Start next phase if there is one
|
|
546
|
+
if next_phase is not None:
|
|
547
|
+
if next_phase not in manifest.phases:
|
|
548
|
+
manifest.phases[next_phase] = {"status": "pending", "started_at": "", "completed_at": ""}
|
|
549
|
+
manifest.phases[next_phase]["status"] = "in_progress"
|
|
550
|
+
manifest.phases[next_phase]["started_at"] = now
|
|
466
551
|
|
|
467
|
-
|
|
552
|
+
self._save_manifest_unlocked(manifest)
|
|
468
553
|
|
|
469
554
|
result = PhaseResult(
|
|
470
555
|
phase=phase,
|
|
@@ -598,12 +683,15 @@ class MigrationPipeline:
|
|
|
598
683
|
logger.error("Failed to create checkpoint tag %s: %s", tag_name, exc.stderr)
|
|
599
684
|
raise RuntimeError(f"Git tag creation failed: {exc.stderr}") from exc
|
|
600
685
|
|
|
601
|
-
# Record in manifest (hold lock for entire
|
|
686
|
+
# Record in manifest (hold file lock + instance lock for the entire
|
|
687
|
+
# read-modify-write so a concurrent advance/start_phase/checkpoint in
|
|
688
|
+
# another process or thread cannot clobber the appended checkpoint).
|
|
602
689
|
try:
|
|
603
|
-
with self.
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
690
|
+
with _manifest_file_lock(self.migration_dir):
|
|
691
|
+
with self._lock:
|
|
692
|
+
manifest = self._load_manifest_unlocked()
|
|
693
|
+
manifest.checkpoints.append(tag_name)
|
|
694
|
+
self._save_manifest_unlocked(manifest)
|
|
607
695
|
except Exception:
|
|
608
696
|
# Bug 9: rollback git tag if manifest save fails
|
|
609
697
|
logger.error("Manifest save failed after git tag creation; deleting tag %s", tag_name)
|
|
@@ -855,47 +943,52 @@ class MigrationPipeline:
|
|
|
855
943
|
Each entry records what happened so the next agent can orient quickly.
|
|
856
944
|
"""
|
|
857
945
|
progress_path = Path(self.migration_dir) / "progress.md"
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
#
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
946
|
+
# Outermost RMW on progress.md: serialize the read-append-write across
|
|
947
|
+
# processes/threads so concurrent agent sessions cannot lose each
|
|
948
|
+
# other's entries. load_manifest() inside takes only self._lock (a read,
|
|
949
|
+
# no file lock), so this does not re-enter the file lock (no deadlock).
|
|
950
|
+
with _manifest_file_lock(self.migration_dir):
|
|
951
|
+
manifest = self.load_manifest()
|
|
952
|
+
|
|
953
|
+
# Determine current phase
|
|
954
|
+
current_phase = "pending"
|
|
955
|
+
for phase in PHASE_ORDER:
|
|
956
|
+
status = manifest.phases.get(phase, {}).get("status", "pending")
|
|
957
|
+
if status == "in_progress":
|
|
958
|
+
current_phase = phase
|
|
959
|
+
break
|
|
960
|
+
elif status == "completed":
|
|
961
|
+
current_phase = phase
|
|
869
962
|
|
|
870
|
-
|
|
963
|
+
entry = f"""
|
|
871
964
|
## Session: {_timestamp_iso()}
|
|
872
965
|
Agent: {agent_id}
|
|
873
966
|
Phase: {current_phase}
|
|
874
967
|
Summary: {summary}
|
|
875
968
|
"""
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
969
|
+
if details:
|
|
970
|
+
if details.get("steps_completed"):
|
|
971
|
+
entry += f"Steps completed: {details['steps_completed']}\n"
|
|
972
|
+
if details.get("tests_passing"):
|
|
973
|
+
entry += f"Tests: {details['tests_passing']}\n"
|
|
974
|
+
if details.get("notes"):
|
|
975
|
+
entry += f"Notes: {details['notes']}\n"
|
|
976
|
+
|
|
977
|
+
if progress_path.exists():
|
|
978
|
+
existing = progress_path.read_text(encoding="utf-8")
|
|
979
|
+
# Keep last 50 entries max, compact older ones
|
|
980
|
+
entries = existing.split("\n## Session:")
|
|
981
|
+
if len(entries) > 50:
|
|
982
|
+
header = entries[0]
|
|
983
|
+
recent = entries[-50:]
|
|
984
|
+
content = header + "\n## Session:" + "\n## Session:".join(recent)
|
|
985
|
+
else:
|
|
986
|
+
content = existing
|
|
987
|
+
content += entry
|
|
892
988
|
else:
|
|
893
|
-
content =
|
|
894
|
-
content += entry
|
|
895
|
-
else:
|
|
896
|
-
content = f"# Migration Progress\n# Auto-updated after every agent session\n{entry}"
|
|
989
|
+
content = f"# Migration Progress\n# Auto-updated after every agent session\n{entry}"
|
|
897
990
|
|
|
898
|
-
|
|
991
|
+
_atomic_write(progress_path, content)
|
|
899
992
|
logger.info("Updated progress.md for agent %s", agent_id)
|
|
900
993
|
|
|
901
994
|
# -- Plan summary --------------------------------------------------------
|
package/dashboard/server.py
CHANGED
|
@@ -8776,6 +8776,12 @@ def advance_migration(migration_id: str, request_body: dict):
|
|
|
8776
8776
|
try:
|
|
8777
8777
|
result = pipeline.advance_phase(from_phase)
|
|
8778
8778
|
return asdict(result) if hasattr(result, '__dataclass_fields__') else result
|
|
8779
|
+
except (ValueError, RuntimeError) as exc:
|
|
8780
|
+
# advance_phase raises RuntimeError on a failed phase gate or when the
|
|
8781
|
+
# phase is not in_progress (e.g. already advanced). These are client
|
|
8782
|
+
# contract errors, not server faults: map to 409 like the sibling
|
|
8783
|
+
# start_migration_phase endpoint does.
|
|
8784
|
+
raise HTTPException(status_code=409, detail=str(exc))
|
|
8779
8785
|
except Exception as exc:
|
|
8780
8786
|
logger.error("Migration advance error: %s", exc)
|
|
8781
8787
|
raise HTTPException(status_code=500, detail="Failed to advance migration")
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# FEAT-PRDREUSE-DOCKER-PLAN
|
|
2
|
+
|
|
3
|
+
Implementation plan for the PRD-reuse + Docker feature batch. Anchored to verified file:line.
|
|
4
|
+
Designed for 4-5 parallel dev agents with zero file overlap. Founder scope locks:
|
|
5
|
+
- Docker dashboard: publish on a HOST PORT and AUTO-OPEN (like local loki start); show BOTH local and docker runs.
|
|
6
|
+
- Image cleanup: after pull, prune ONLY dangling/old asklokesh/loki-mode images NOT in use by a running container.
|
|
7
|
+
- loki stop: ALSO stops/removes the loki-mode docker container for this project (tracked via .loki docker state).
|
|
8
|
+
|
|
9
|
+
## Context: wave-4 uncommitted edits (build on top, do NOT revert)
|
|
10
|
+
Uncommitted W4 work in: autonomy/run.sh, autonomy/sandbox.sh, autonomy/prd-checklist.sh,
|
|
11
|
+
autonomy/spec-interrogation.sh, dashboard/migration_engine.py, dashboard/server.py,
|
|
12
|
+
loki-ts/src/council/voter_agents.ts, loki-ts/src/runner/build_prompt.ts, loki-ts/src/runner/council.ts.
|
|
13
|
+
Relevant: build_prompt.ts buildPromptForRunner now passes ctx.prdPath (a PATH). PRD-reuse Bun work depends
|
|
14
|
+
on this and must NOT touch build_prompt.ts.
|
|
15
|
+
|
|
16
|
+
## Existing scaffolding (EXTEND, not duplicate)
|
|
17
|
+
Bash route already implements generated-PRD reuse for the no-file case:
|
|
18
|
+
- .loki/generated-prd.md is the canonical generated-PRD path, byte-locked for parity (run.sh ANALYSIS_INSTRUCTION,
|
|
19
|
+
build_prompt.ts:208, resume lines).
|
|
20
|
+
- decide_generated_prd_action() (run.sh:4892) returns reuse | update | generate | user_owned.
|
|
21
|
+
- persist_prd_signature_if_present() (run.sh:4983) writes .loki/state/prd-signature.json.
|
|
22
|
+
- run_autonomous() (run.sh:~13824) auto-detect block handles the empty prd_path case only.
|
|
23
|
+
- Bun route: runAutonomous (autonomous.ts:230) -> makeContext (autonomous.ts:641) sets ctx.prdPath = opts.prdPath. No reuse/persist.
|
|
24
|
+
|
|
25
|
+
Docker route already has host-aggregating dashboard:
|
|
26
|
+
- cmd_docker (autonomy/loki:~28748) -> docker-run.sh helpers.
|
|
27
|
+
- _loki_docker_register_host running/stopped brackets the blocking run; registers $(pwd) in ~/.loki/dashboard/projects.json.
|
|
28
|
+
- Dashboard /api/projects (server.py:~2623) aggregates local + docker; pid=None reads bind-mounted .loki/session.json.
|
|
29
|
+
- Dashboard Stop for docker works via STOP file (server.py:~2970).
|
|
30
|
+
- cmd_dashboard_start (autonomy/loki:~4038) + cmd_dashboard_open (~3982) are standalone host dashboard entries.
|
|
31
|
+
|
|
32
|
+
## Design decision locks
|
|
33
|
+
LOCK 1: Canonical PRD path = .loki/generated-prd.md (do NOT invent .loki/prd/current.md). Persist user content INTO it;
|
|
34
|
+
record provenance in .loki/state/prd-signature.json via a new `source` field.
|
|
35
|
+
LOCK 2: User PRDs resolve to reuse/user_owned, NEVER update. Stamp source:"user" at persist; short-circuit
|
|
36
|
+
decide_generated_prd_action to user_owned when source=user (except --fresh-prd). Signature-diff `update` stays scoped to source=generated.
|
|
37
|
+
This answers "update only if needed": user PRD = always as-is; generated PRD = existing signature logic.
|
|
38
|
+
LOCK 3: DOCKER-DASH uses the HOST dashboard, not the published container port. Container mounts only workspace; an
|
|
39
|
+
in-container dashboard sees ONE project and cannot satisfy "shows BOTH." Host dashboard already aggregates both.
|
|
40
|
+
Local loki start runs the dashboard on host port 57374 + auto-opens (run.sh:~10081-10099), so host dashboard IS "like local loki start."
|
|
41
|
+
LOCK 4: Ownership deconflicted by FILE with an interface contract (matrix below).
|
|
42
|
+
|
|
43
|
+
## FEATURE 1 - FEAT-PRD-REUSE
|
|
44
|
+
Semantics (both routes identical):
|
|
45
|
+
| Run | File arg? | Persisted PRD? | Behavior |
|
|
46
|
+
| 1st | yes | no | use file; persist to .loki/generated-prd.md, source=user |
|
|
47
|
+
| 1st | no | no | codebase-analysis generates .loki/generated-prd.md, source=generated (existing) |
|
|
48
|
+
| 2nd+ | no | yes | continue from persisted PRD (reuse; generated may update on drift, user never) |
|
|
49
|
+
| 2nd+ | yes | yes | new file overwrites .loki/generated-prd.md, source reset to user (brownfield) |
|
|
50
|
+
|
|
51
|
+
Bash (Agent C, autonomy/run.sh ONLY):
|
|
52
|
+
- New persistence branch for explicit user PRD: when prd_path non-empty and not already .loki/generated-prd.md:
|
|
53
|
+
mkdir -p .loki .loki/state; atomic copy prd_path -> .loki/generated-prd.md; write prd-signature.json with
|
|
54
|
+
source:"user", prd_sha (reuse _loki_prd_file_hash run.sh:4869), generated_at, signature (compute_codebase_signature),
|
|
55
|
+
origin_path; repoint prd_path=".loki/generated-prd.md"; export GENERATED_PRD_ACTION="user_owned".
|
|
56
|
+
- Extend decide_generated_prd_action (4892): read source from prd-signature.json. Precedence:
|
|
57
|
+
--fresh-prd/LOKI_PRD_REGEN -> generate > source=user -> user_owned > existing generated logic.
|
|
58
|
+
|
|
59
|
+
Bun (Agent D, loki-ts/src/runner/autonomous.ts + NEW loki-ts/src/runner/prd_reuse.ts; NOT build_prompt.ts):
|
|
60
|
+
- New helper resolvePrdForRun(opts) called at top of runAutonomous (autonomous.ts:233 before makeContext). Mirrors 1a/1b:
|
|
61
|
+
user file -> copy + persist source:user -> return generated path; empty + generated exists -> decideGeneratedPrdAction
|
|
62
|
+
TS port -> return generated path for reuse/update/user_owned, undefined for generate; empty + none -> undefined.
|
|
63
|
+
Set resolved path onto opts.prdPath before makeContext (autonomous.ts:655). No build_prompt.ts edit.
|
|
64
|
+
- Parity: TS prd-signature.json schema + decideGeneratedPrdAction must match bash exactly.
|
|
65
|
+
|
|
66
|
+
.loki state additions: .loki/generated-prd.md also holds persisted user PRD. prd-signature.json adds
|
|
67
|
+
source ("user"|"generated"), origin_path (when source=user).
|
|
68
|
+
|
|
69
|
+
AC: AC1 user file persists byte-equal + source:user. AC2 no-arg rerun reuses, no codebase analysis. AC3 new file
|
|
70
|
+
overwrites, source stays user, origin_path updates. AC4 no-arg first run still generates source:generated. AC5
|
|
71
|
+
--fresh-prd re-analyzes -> source:generated. AC6 bash/Bun identical source+action (parity test). AC7 no-arg rerun
|
|
72
|
+
after user PRD never enters GENERATED_PRD_UPDATE_MODE even if codebase changed.
|
|
73
|
+
|
|
74
|
+
## FEATURE 2 - FEAT-DOCKER-DASH (host dashboard auto-open)
|
|
75
|
+
Architecture (LOCK 3): host dashboard, auto-opened.
|
|
76
|
+
Agent A (autonomy/loki, inside cmd_docker, start path): between _loki_docker_register_host running and the blocking run:
|
|
77
|
+
1. Start/reuse host dashboard via cmd_dashboard_start (idempotent). Port: DASHBOARD_DEFAULT_PORT 57374 with fallback
|
|
78
|
+
(Agent B loki_docker_pick_host_port). 2. Auto-open gated like run.sh:~10088 ([ -t 1 ] && not background && LOKI_NO_AUTO_OPEN!=1)
|
|
79
|
+
via cmd_dashboard_open. 3. Container stays dashboard-OFF (docker-run.sh LOKI_DASHBOARD=false), no container port publish.
|
|
80
|
+
Agent B (autonomy/docker-run.sh): loki_docker_pick_host_port - probe 57374, increment to free port if bound, echo chosen port.
|
|
81
|
+
server.py (Agent E only IF a gap appears): /api/projects already aggregates docker, Stop already handles docker. Default: no change.
|
|
82
|
+
|
|
83
|
+
AC: AC8 loki docker start in TTY starts host dashboard + opens browser. AC9 dashboard lists docker run alongside local.
|
|
84
|
+
AC10 LOKI_NO_AUTO_OPEN=1/non-TTY/--bg no browser. AC11 second docker start reuses dashboard, both runs listed.
|
|
85
|
+
|
|
86
|
+
## FEATURE 3 - FEAT-DOCKER-PRUNE (scoped image cleanup after pull)
|
|
87
|
+
No explicit docker pull today (run auto-pulls only if absent). PRUNE needs explicit pull.
|
|
88
|
+
Agent B (docker-run.sh helpers) + Agent A (call site in cmd_docker): loki_docker_pull_and_prune, called from cmd_docker before run (start path):
|
|
89
|
+
1. docker pull $LOKI_DOCKER_IMAGE (default asklokesh/loki-mode:latest), capture image ID.
|
|
90
|
+
2. In-use set: docker ps --format '{{.Image}} {{.ImageID}}'.
|
|
91
|
+
3. Enumerate ONLY asklokesh/loki-mode: docker images --filter 'reference=asklokesh/loki-mode' --format '{{.ID}} ...'
|
|
92
|
+
+ dangling: --filter 'reference=asklokesh/loki-mode' --filter 'dangling=true' -q.
|
|
93
|
+
4. docker rmi each ID NOT the just-pulled :latest AND NOT in-use (best-effort).
|
|
94
|
+
5. NEVER docker image prune -a. Scope strictly reference=asklokesh/loki-mode.
|
|
95
|
+
6. Honest output: reclaimed count/bytes or "nothing to reclaim."
|
|
96
|
+
Gate: LOKI_DOCKER_PRUNE=${LOKI_DOCKER_PRUNE:-1} (default on, =0 opt-out; =0 also skips explicit pull).
|
|
97
|
+
|
|
98
|
+
AC: AC12 old asklokesh/loki-mode IDs removed, :latest remains. AC13 in-use image never removed. AC14 non-loki-mode
|
|
99
|
+
image never touched (decoy survives). AC15 LOKI_DOCKER_PRUNE=0 skips; honest output.
|
|
100
|
+
|
|
101
|
+
## FEATURE 4 - FIX-DOCKER-STOP (loki stop reaps the container)
|
|
102
|
+
Repro: container loki-<sha12> Up but loki stop says "No active session." cmd_stop (autonomy/loki:~2242) only checks .loki.
|
|
103
|
+
Container name deterministic: loki-<sha12 of workspace> (docker-run.sh:~204-214).
|
|
104
|
+
Agent A (autonomy/loki write+read) + Agent B (docker-run.sh helpers):
|
|
105
|
+
Write: before blocking run write .loki/docker/run.json {container, image, project_dir, started_at}; clear after.
|
|
106
|
+
Helpers loki_docker_write_runstate / loki_docker_clear_runstate.
|
|
107
|
+
Read/reap (cmd_stop folder-scoped default, before "No active session"):
|
|
108
|
+
1. Read .loki/docker/run.json -> container; fallback recompute loki-<sha12 of $(pwd)> (loki_docker_container_name).
|
|
109
|
+
2. If docker ps -q -f name=^${container}$ non-empty -> docker stop then docker rm (best-effort; --rm may auto-remove).
|
|
110
|
+
3. Remove run.json. 4. Report reap, no "No active session" when docker run Up.
|
|
111
|
+
5. loki stop --all: also docker ps -q --filter ancestor=asklokesh/loki-mode -> stop/rm all (machine-wide, parity with --all PID).
|
|
112
|
+
Folder-scoped default stays folder-scoped. Preserves v7.7.30-34 stop-scoping.
|
|
113
|
+
|
|
114
|
+
.loki state additions: .loki/docker/run.json (NEW) {container, image, project_dir, started_at}.
|
|
115
|
+
|
|
116
|
+
AC: AC16 docker start then stop (same folder) stops+removes container, names it, no "No active session". AC17 run.json
|
|
117
|
+
deleted -> still reaps via recomputed name. AC18 stop in folder X does not stop docker run in folder Y. AC19 stop --all
|
|
118
|
+
stops every loki-mode container. AC20 no docker run + no local session -> existing "No active session" (no regression).
|
|
119
|
+
|
|
120
|
+
## FILE-OWNERSHIP MATRIX (zero overlap)
|
|
121
|
+
- Agent A (Docker orchestration): autonomy/loki ONLY. cmd_docker (dashboard start/open F2, pull+prune F3, write/clear
|
|
122
|
+
run.json F4), cmd_stop (docker reconcile+reap F4). Calls Agent-B helpers by name; uses cmd_dashboard_start/open.
|
|
123
|
+
- Agent B (Docker helpers): autonomy/docker-run.sh ONLY. loki_docker_pick_host_port, loki_docker_pull_and_prune,
|
|
124
|
+
loki_docker_write_runstate, loki_docker_clear_runstate, loki_docker_container_name. No call-site edits in autonomy/loki.
|
|
125
|
+
- Agent C (PRD-reuse bash): autonomy/run.sh ONLY. User-PRD persistence in run_autonomous, extend decide_generated_prd_action.
|
|
126
|
+
- Agent D (PRD-reuse Bun): loki-ts/src/runner/autonomous.ts + NEW loki-ts/src/runner/prd_reuse.ts. MUST NOT edit build_prompt.ts.
|
|
127
|
+
- Agent E (Tests + server.py iff needed): tests/** new files, loki-ts/tests/** new files; server.py only if a real DASH gap appears.
|
|
128
|
+
Do not edit existing W4-touched test files.
|
|
129
|
+
|
|
130
|
+
A<->B: disjoint files, share function-boundary contract. C<->A: PRD-reuse-bash entirely in run_autonomous; cmd_start
|
|
131
|
+
already passes file arg to run.sh. D<->W4: D sets ctx.prdPath upstream; build_prompt.ts read-only for D.
|
|
132
|
+
|
|
133
|
+
Sequencing: B, C, D independent -> parallel immediately. A depends on B helper signatures (contract fixed up-front, A can
|
|
134
|
+
start against signatures). E writes tests against contracts in parallel, finalizes after A-D land.
|
|
135
|
+
|
|
136
|
+
## RISKS
|
|
137
|
+
R1 (DASH architecture): host dashboard satisfies host-port + auto-open + shows-both; container-port publish breaks shows-both. Host chosen (LOCK 3).
|
|
138
|
+
R2 (host-port conflict): cmd_dashboard_start idempotent; loki_docker_pick_host_port fallback. Container-port publish was already disabled for 57374 collision.
|
|
139
|
+
R3 (PRD update-only-if-needed): LOCK 2 source field. source=user always reuse/user_owned; source=generated existing signature logic. Hand-edited persisted PRD -> still user_owned.
|
|
140
|
+
R4 (prune over-aggression): triple-scoped (reference filter + exclude :latest ID + exclude in-use). AC14 decoy survives. rmi best-effort.
|
|
141
|
+
R5 (pull latency): pull-always v1, gated by LOKI_DOCKER_PRUNE. OQ: pull-always vs only-on-digest-change; recommend pull-always v1.
|
|
142
|
+
R6 (--rm + stop): docker stop triggers auto-removal; docker rm best-effort (already-gone = success).
|
|
143
|
+
OQ1: run.json write BEFORE blocking run, clear AFTER. Deterministic-name fallback covers the window.
|
|
144
|
+
OQ2: confirm Bun runAutonomous reached with opts.prdPath set from same arg as bash; parity test guards decision logic regardless.
|