loki-mode 7.5.11 → 7.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  <div align="center">
2
2
 
3
- # Loki Mode aka Autonomi
3
+ # Loki Mode
4
4
 
5
5
  ### Build the future, faster.
6
6
 
package/SKILL.md CHANGED
@@ -3,7 +3,7 @@ name: loki-mode
3
3
  description: Multi-agent autonomous startup system. Triggers on "Loki Mode". Takes a spec (PRD, GitHub issue, OpenAPI doc, etc.) to deployed product with minimal human intervention. Requires --dangerously-skip-permissions flag.
4
4
  ---
5
5
 
6
- # Loki Mode v7.5.11
6
+ # Loki Mode v7.5.13
7
7
 
8
8
  **You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
9
9
 
@@ -91,7 +91,7 @@ These rules guide autonomous operation. Test results and code quality always tak
91
91
 
92
92
  ## Model Selection
93
93
 
94
- **Default since v5.3.0 (reaffirmed in v7.5.11):** Haiku disabled for quality. Use `--allow-haiku` or `LOKI_ALLOW_HAIKU=true` to enable.
94
+ **Default since v5.3.0 (reaffirmed in v7.5.13):** Haiku disabled for quality. Use `--allow-haiku` or `LOKI_ALLOW_HAIKU=true` to enable.
95
95
 
96
96
  | Task Type | Tier | Claude (default) | Claude (--allow-haiku) | Codex (GPT-5.3) | Gemini |
97
97
  |-----------|------|------------------|------------------------|------------------|--------|
@@ -333,7 +333,7 @@ See `references/core-workflow.md` for the full RARV-C contract.
333
333
 
334
334
  ---
335
335
 
336
- ## Concurrency and Security Hardening (v7.5.7 - v7.5.11)
336
+ ## Concurrency and Security Hardening (v7.5.7 - v7.5.13)
337
337
 
338
338
  Three back-to-back patches closed cross-process and security gaps. No user-facing behavior change on the default flow; verify via the cited paths.
339
339
 
@@ -342,7 +342,7 @@ Three back-to-back patches closed cross-process and security gaps. No user-facin
342
342
  - **Dashboard auth** now required on `/api/memory/*`, `/api/learning/*`, and `/api/status` in `dashboard/server.py` (previously unauthenticated read paths).
343
343
  - **Bash quoting hardening** across `autonomy/run.sh` and `autonomy/loki` -- variable expansions inside command substitution and `[ ]` tests quoted to prevent word-splitting on paths with spaces.
344
344
 
345
- See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.11] for the per-fix list and reviewer sign-off.
345
+ See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.13] for the per-fix list and reviewer sign-off.
346
346
 
347
347
  ---
348
348
 
@@ -381,4 +381,4 @@ See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.11] for the per-fix list and r
381
381
 
382
382
  ---
383
383
 
384
- **v7.5.11 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
384
+ **v7.5.13 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
package/VERSION CHANGED
@@ -1 +1 @@
1
- 7.5.11
1
+ 7.5.13
@@ -431,6 +431,66 @@ _install_python_deps() {
431
431
  fi
432
432
  }
433
433
 
434
+ # Resolve the directory containing the compose file. Falls back to the passed
435
+ # directory when no compose file is found (callers should already have verified
436
+ # detection). Honors LOKI_COMPOSE_FILE override.
437
+ _app_runner_compose_dir() {
438
+ local base="${1:-${TARGET_DIR:-.}}"
439
+ if [ -n "${LOKI_COMPOSE_FILE:-}" ] && [ -f "${LOKI_COMPOSE_FILE}" ]; then
440
+ dirname "${LOKI_COMPOSE_FILE}"
441
+ return
442
+ fi
443
+ for candidate in \
444
+ "$base/docker-compose.yml" \
445
+ "$base/docker-compose.yaml" \
446
+ "$base/compose.yml" \
447
+ "$base/compose.yaml"; do
448
+ if [ -f "$candidate" ]; then
449
+ dirname "$candidate"
450
+ return
451
+ fi
452
+ done
453
+ printf '%s\n' "$base"
454
+ }
455
+
456
+ # Count containers currently in the "running" state for the compose project.
457
+ # Polls up to LOKI_COMPOSE_HEALTH_TIMEOUT seconds (default 30) at 1s intervals
458
+ # so containers transitioning from Created -> Running are not falsely reported
459
+ # as failed. Echoes the final running-container count (0 on failure).
460
+ _app_runner_compose_running_count() {
461
+ local base="${1:-${TARGET_DIR:-.}}"
462
+ local compose_dir
463
+ compose_dir=$(_app_runner_compose_dir "$base")
464
+ local timeout="${LOKI_COMPOSE_HEALTH_TIMEOUT:-30}"
465
+ if ! [[ "$timeout" =~ ^[0-9]+$ ]]; then
466
+ timeout=30
467
+ fi
468
+ local elapsed=0
469
+ local count=0
470
+ while [ "$elapsed" -lt "$timeout" ]; do
471
+ # Prefer the structured --format '{{.State}}' which lists one state per
472
+ # container (one per line) and is stable across docker-compose v2.x.
473
+ local states
474
+ states=$(cd "$compose_dir" && docker compose ps --format '{{.State}}' 2>/dev/null || true)
475
+ if [ -n "$states" ]; then
476
+ # Match exact "running" lines only (case-insensitive). Avoid grep -c
477
+ # on empty input which can return 0 with success even when nothing
478
+ # ran. Also strip CR for safety on weird terminals.
479
+ count=$(printf '%s\n' "$states" | tr -d '\r' | grep -ciE '^running$' || true)
480
+ else
481
+ count=0
482
+ fi
483
+ if [ "${count:-0}" -gt 0 ]; then
484
+ printf '%s\n' "$count"
485
+ return 0
486
+ fi
487
+ sleep 1
488
+ elapsed=$(( elapsed + 1 ))
489
+ done
490
+ printf '%s\n' "${count:-0}"
491
+ return 0
492
+ }
493
+
434
494
  #===============================================================================
435
495
  # Lifecycle
436
496
  #===============================================================================
@@ -501,15 +561,24 @@ app_runner_start() {
501
561
 
502
562
  # Verify process started
503
563
  if [ "$_APP_RUNNER_IS_DOCKER" = true ] && echo "$_APP_RUNNER_METHOD" | grep -q "docker compose"; then
504
- # Docker compose -d exits immediately; check containers instead of PID
564
+ # Docker compose -d exits immediately; poll for containers in "running"
565
+ # state. Containers may report "Created" briefly before transitioning to
566
+ # "Running", so retry up to ~30 seconds before declaring failure.
505
567
  local running_containers
506
- running_containers=$(cd "${TARGET_DIR:-.}" && { docker compose ps --status running -q 2>/dev/null || docker compose ps 2>/dev/null | grep -ciE 'running|up'; } | wc -l | tr -d ' ')
568
+ running_containers=$(_app_runner_compose_running_count "$dir")
507
569
  if [ "${running_containers:-0}" -gt 0 ]; then
508
570
  _write_app_state "running"
509
571
  log_info "App Runner: docker compose started ($running_containers container(s) running)"
510
572
  return 0
511
573
  else
512
- log_error "App Runner: docker compose containers failed to start"
574
+ # Capture diagnostic output for postmortem
575
+ local compose_dir
576
+ compose_dir=$(_app_runner_compose_dir "$dir")
577
+ local diag
578
+ diag=$(cd "$compose_dir" && docker compose ps 2>&1 || true)
579
+ log_error "App Runner: docker compose containers failed to start (no containers in running state after retries)"
580
+ log_error "App Runner: docker compose ps output:"
581
+ printf '%s\n' "$diag" | while IFS= read -r line; do log_error " $line"; done
513
582
  _APP_RUNNER_CRASH_COUNT=$(( _APP_RUNNER_CRASH_COUNT + 1 ))
514
583
  _write_app_state "failed"
515
584
  return 1
@@ -547,7 +616,9 @@ app_runner_stop() {
547
616
  docker rm "$_APP_RUNNER_DOCKER_CONTAINER" 2>/dev/null || true
548
617
  fi
549
618
  if echo "$_APP_RUNNER_METHOD" | grep -q "docker compose"; then
550
- (cd "${TARGET_DIR:-.}" && docker compose down 2>/dev/null) || true
619
+ local _stop_compose_dir
620
+ _stop_compose_dir=$(_app_runner_compose_dir "${TARGET_DIR:-.}")
621
+ (cd "$_stop_compose_dir" && docker compose down 2>/dev/null) || true
551
622
  fi
552
623
  fi
553
624
 
@@ -619,8 +690,10 @@ app_runner_health_check() {
619
690
 
620
691
  # Docker compose: check containers instead of PID (docker compose up -d exits immediately)
621
692
  if [ "$_APP_RUNNER_IS_DOCKER" = true ] && echo "$_APP_RUNNER_METHOD" | grep -q "docker compose"; then
693
+ # Use a 1-second timeout for health checks (no long retry); start-time
694
+ # retries are handled in app_runner_start.
622
695
  local running_containers
623
- running_containers=$(cd "${TARGET_DIR:-.}" && { docker compose ps --status running -q 2>/dev/null || docker compose ps 2>/dev/null | grep -ciE 'running|up'; } | wc -l | tr -d ' ')
696
+ running_containers=$(LOKI_COMPOSE_HEALTH_TIMEOUT=1 _app_runner_compose_running_count "${TARGET_DIR:-.}")
624
697
  if [ "${running_containers:-0}" -gt 0 ]; then
625
698
  _write_health "true"
626
699
  _write_app_state "running"
@@ -759,7 +832,9 @@ app_runner_cleanup() {
759
832
  docker rm "$_APP_RUNNER_DOCKER_CONTAINER" 2>/dev/null || true
760
833
  fi
761
834
  if echo "$_APP_RUNNER_METHOD" | grep -q "docker compose"; then
762
- (cd "${TARGET_DIR:-.}" && docker compose down 2>/dev/null) || true
835
+ local _stop_compose_dir
836
+ _stop_compose_dir=$(_app_runner_compose_dir "${TARGET_DIR:-.}")
837
+ (cd "$_stop_compose_dir" && docker compose down 2>/dev/null) || true
763
838
  fi
764
839
  fi
765
840
 
@@ -0,0 +1,147 @@
1
+ #!/usr/bin/env bash
2
+ # Loki Mode -- portable file locking helper.
3
+ #
4
+ # Why this exists:
5
+ # flock(1) is a Linux util-linux binary not shipped on macOS or BSDs.
6
+ # Bash callers that depend on it either degrade to non-atomic PID checks
7
+ # (race condition) or print a "flock not available" warning. This helper
8
+ # gives every bash caller one cross-platform primitive.
9
+ #
10
+ # Strategy:
11
+ # mkdir() is atomic on all POSIX filesystems -- exactly one concurrent
12
+ # caller wins the create. We use <target>.lockdir as the mutex, write a
13
+ # PID-stamped sentinel inside it for stale detection, and clean up via
14
+ # trap so a killed holder cannot wedge later callers.
15
+ #
16
+ # Public API:
17
+ # safe_acquire_lock <target> [timeout_seconds] -> 0 on acquire, 1 on timeout
18
+ # safe_release_lock <target> -> always 0
19
+ # safe_with_lock <target> <command...> -> runs command under lock,
20
+ # returns command's exit code
21
+ #
22
+ # Stale-lock policy: a lockdir whose sentinel PID is no longer alive AND
23
+ # whose mtime is >30s old is reaped automatically.
24
+ #
25
+ # Acquire timing: poll every 50ms, default ceiling 5s.
26
+
27
+ # Guard against double-source.
28
+ if [ "${__LOKI_LOCK_SH_LOADED:-0}" = "1" ]; then
29
+ return 0 2>/dev/null || true
30
+ fi
31
+ __LOKI_LOCK_SH_LOADED=1
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Internal helpers
35
+ # ---------------------------------------------------------------------------
36
+
37
+ # _loki_lock_sleep_50ms: portable 50ms sleep.
38
+ # perl is preinstalled on macOS + most Linux; bash builtin `read -t 0.05` is
39
+ # the fallback; final fallback is a 1s sleep (still correct, just slower).
40
+ _loki_lock_sleep_50ms() {
41
+ perl -e 'select(undef,undef,undef,0.05)' 2>/dev/null \
42
+ || read -r -t 0.05 _ < /dev/null 2>/dev/null \
43
+ || sleep 1
44
+ }
45
+
46
+ # _loki_lock_mtime <path>: portable mtime in epoch seconds, "0" on failure.
47
+ _loki_lock_mtime() {
48
+ stat -f%m "$1" 2>/dev/null \
49
+ || stat -c%Y "$1" 2>/dev/null \
50
+ || echo 0
51
+ }
52
+
53
+ # _loki_lock_is_stale <lockdir>: 0 if reapable, 1 otherwise.
54
+ # Stale = sentinel PID dead AND mtime >30s old. A bare lockdir with no
55
+ # sentinel (legacy / partial create) is treated as stale after 30s as well.
56
+ _loki_lock_is_stale() {
57
+ local lockdir="$1"
58
+ local sentinel="$lockdir/owner.pid"
59
+ local now age pid
60
+ now=$(date +%s 2>/dev/null || echo 0)
61
+ age=$(( now - $(_loki_lock_mtime "$lockdir") ))
62
+ if [ "$age" -le 30 ]; then
63
+ return 1
64
+ fi
65
+ if [ -f "$sentinel" ]; then
66
+ pid=$(cat "$sentinel" 2>/dev/null)
67
+ if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
68
+ return 1
69
+ fi
70
+ fi
71
+ return 0
72
+ }
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Public API
76
+ # ---------------------------------------------------------------------------
77
+
78
+ # safe_acquire_lock <target> [timeout_seconds=5]
79
+ # Acquires a mutex on <target>.lockdir. Returns 0 on acquire, 1 on timeout.
80
+ safe_acquire_lock() {
81
+ local target="$1"
82
+ local timeout_s="${2:-5}"
83
+ local lockdir="${target}.lockdir"
84
+ local target_dir
85
+ target_dir=$(dirname "$target")
86
+ [ -d "$target_dir" ] || mkdir -p "$target_dir" 2>/dev/null || true
87
+
88
+ # 50ms poll interval -> 20 attempts/sec.
89
+ local max_attempts=$(( timeout_s * 20 ))
90
+ [ "$max_attempts" -lt 1 ] && max_attempts=1
91
+ local attempts=0
92
+
93
+ while ! mkdir "$lockdir" 2>/dev/null; do
94
+ if _loki_lock_is_stale "$lockdir"; then
95
+ rm -rf "$lockdir" 2>/dev/null || true
96
+ continue
97
+ fi
98
+ attempts=$((attempts + 1))
99
+ if [ "$attempts" -ge "$max_attempts" ]; then
100
+ return 1
101
+ fi
102
+ _loki_lock_sleep_50ms
103
+ done
104
+
105
+ # Stamp sentinel for stale detection.
106
+ echo "$$" > "$lockdir/owner.pid" 2>/dev/null || true
107
+ return 0
108
+ }
109
+
110
+ # safe_release_lock <target>
111
+ # Releases the mutex on <target>.lockdir. Idempotent.
112
+ safe_release_lock() {
113
+ local target="$1"
114
+ local lockdir="${target}.lockdir"
115
+ rm -rf "$lockdir" 2>/dev/null || true
116
+ return 0
117
+ }
118
+
119
+ # safe_with_lock <target> <command> [args...]
120
+ # Runs <command args...> under an exclusive lock on <target>. Releases the
121
+ # lock automatically (trap-based) even on signal. Returns the command's
122
+ # exit code. If the lock cannot be acquired within 5s, returns 1 without
123
+ # running the command (caller can detect via $?).
124
+ safe_with_lock() {
125
+ local target="$1"; shift
126
+ if ! safe_acquire_lock "$target" 5; then
127
+ return 1
128
+ fi
129
+ # Trap at caller scope so signal-driven termination still releases.
130
+ # We keep this in the current shell (not a subshell) so the trap can
131
+ # see the local $target. We carefully restore any prior EXIT trap.
132
+ local rc=0
133
+ local _prev_exit_trap
134
+ _prev_exit_trap=$(trap -p EXIT 2>/dev/null)
135
+ # shellcheck disable=SC2064
136
+ trap "safe_release_lock '$target'" EXIT INT TERM HUP
137
+ "$@"
138
+ rc=$?
139
+ safe_release_lock "$target"
140
+ # Restore prior EXIT trap (or clear if none).
141
+ if [ -n "$_prev_exit_trap" ]; then
142
+ eval "$_prev_exit_trap"
143
+ else
144
+ trap - EXIT INT TERM HUP
145
+ fi
146
+ return $rc
147
+ }
package/autonomy/loki CHANGED
@@ -1379,6 +1379,28 @@ cmd_start() {
1379
1379
  fi
1380
1380
  fi
1381
1381
 
1382
+ # v7.5.12 Gap B: Stale-PID detection. Hard-kill (Ctrl+C followed by SIGKILL or
1383
+ # `loki stop`) can leave .loki/loki.pid + .loki/session.lock orphaned. The
1384
+ # next `loki start` then refuses to launch -- or worse, run.sh's downstream
1385
+ # cleanup may treat the stale pid as live. Detect-and-clean here, BEFORE
1386
+ # exec, so the user gets one clear log line instead of mysterious silent
1387
+ # behavior.
1388
+ local _start_loki_dir="${LOKI_DIR:-.loki}"
1389
+ local _start_pid_file="$_start_loki_dir/loki.pid"
1390
+ if [ -f "$_start_pid_file" ]; then
1391
+ local _existing_pid
1392
+ _existing_pid=$(cat "$_start_pid_file" 2>/dev/null | tr -dc '0-9')
1393
+ if [ -n "$_existing_pid" ] && kill -0 "$_existing_pid" 2>/dev/null; then
1394
+ echo -e "${RED}Error: another loki instance is running (pid $_existing_pid).${NC}" >&2
1395
+ echo -e "${YELLOW}Run 'loki stop' first, then retry 'loki start'.${NC}" >&2
1396
+ exit 1
1397
+ fi
1398
+ # PID is stale (file present but process gone). Log + remove + continue.
1399
+ echo -e "${YELLOW}Removing stale pid file ($_start_pid_file, pid=${_existing_pid:-empty} not alive)${NC}" >&2
1400
+ rm -f "$_start_pid_file" 2>/dev/null || true
1401
+ rm -f "$_start_loki_dir/session.lock" 2>/dev/null || true
1402
+ fi
1403
+
1382
1404
  # Determine effective provider for display
1383
1405
  local effective_provider="${provider:-${LOKI_PROVIDER:-claude}}"
1384
1406