loki-mode 7.66.1 → 7.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -3,7 +3,7 @@ name: loki-mode
3
3
  description: Autonomous spec-driven build system with a built-in trust layer. It does not call work done until it is verified (RARV-C closure loop, 8 quality gates, completion council, verified-completion evidence gate). Triggers on "Loki Mode". Takes a spec (PRD, GitHub issue, OpenAPI doc, etc.) to deployed product with minimal human intervention. Provider-agnostic. Requires --dangerously-skip-permissions flag.
4
4
  ---
5
5
 
6
- # Loki Mode v7.66.1
6
+ # Loki Mode v7.68.0
7
7
 
8
8
  **You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
9
9
 
@@ -406,4 +406,4 @@ See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.13] for the per-fix list and r
406
406
 
407
407
  ---
408
408
 
409
- **v7.66.1 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
409
+ **v7.68.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
package/VERSION CHANGED
@@ -1 +1 @@
1
- 7.66.1
1
+ 7.68.0
@@ -778,7 +778,12 @@ app_runner_init() {
778
778
  local _project_hash
779
779
  _project_hash=$(echo "$dir" | (md5sum 2>/dev/null || md5 -r 2>/dev/null || echo "$$") | cut -c1-8)
780
780
  _APP_RUNNER_DOCKER_CONTAINER="loki-app-${_project_hash}"
781
- _APP_RUNNER_METHOD="docker build -t loki-app . && docker run -d -p ${_APP_RUNNER_PORT}:${_APP_RUNNER_PORT} --name ${_APP_RUNNER_DOCKER_CONTAINER} loki-app"
781
+ # Hash the image tag the same way the container name is hashed so two
782
+ # Dockerfile-based projects do not clobber each other's image (an
783
+ # unhashed `loki-app` tag would be shared across every project). Build
784
+ # tag and run image arg MUST stay identical.
785
+ local _image_tag="loki-app-${_project_hash}"
786
+ _APP_RUNNER_METHOD="docker build -t ${_image_tag} . && docker run -d -p ${_APP_RUNNER_PORT}:${_APP_RUNNER_PORT} --name ${_APP_RUNNER_DOCKER_CONTAINER} ${_image_tag}"
782
787
  _APP_RUNNER_IS_DOCKER=true
783
788
  _write_detection "dockerfile" "$_APP_RUNNER_METHOD"
784
789
  log_info "App Runner: detected Dockerfile"
@@ -1012,6 +1017,59 @@ _app_runner_compose_running_count() {
1012
1017
  return 0
1013
1018
  }
1014
1019
 
1020
+ # Decide whether to prepend `exec` to the launched method. `exec` replaces the
1021
+ # bash wrapper with the command so the captured PID is the app itself (PID
1022
+ # identity for npm start / python app.py etc.). That is ONLY valid for a SINGLE
1023
+ # command. A compound method like `docker build ... && docker run ...` must NOT
1024
+ # be exec'd: `exec docker build` would replace the shell and the `&& docker run`
1025
+ # half would never run (the verified HIGH-1 bug -- image builds, no container).
1026
+ # Detection runs on the METHOD STRING ONLY, never the assembled launch line: the
1027
+ # assembled line always contains `;` (from the PORT env prefix and the pgid
1028
+ # `echo $$`), so testing it would mark every method compound and silently drop
1029
+ # the exec optimization for single commands.
1030
+ # Echoes "exec " for a single command, or "" (empty) for a compound command.
1031
+ _app_runner_exec_prefix() {
1032
+ local method="$1"
1033
+ case "$method" in
1034
+ *"&&"*|*"||"*|*";"*)
1035
+ # Compound: let bash run the full sequence as a child (no exec).
1036
+ printf '%s' ""
1037
+ ;;
1038
+ *)
1039
+ printf '%s' "exec "
1040
+ ;;
1041
+ esac
1042
+ }
1043
+
1044
+ # Liveness predicate for the Dockerfile (single-image `docker run -d`) path,
1045
+ # which -- unlike compose -- has a project-hashed container name in
1046
+ # $_APP_RUNNER_DOCKER_CONTAINER. The method is a compound `docker build && docker
1047
+ # run -d` launched WITHOUT exec, so the captured PID is the short-lived bash
1048
+ # wrapper: it stays alive for the (possibly multi-minute) build, then exits right
1049
+ # after `docker run -d` detaches. Therefore liveness is:
1050
+ # alive = container running OR wrapper PID still alive (build in progress)
1051
+ # dead = wrapper PID dead AND container not running
1052
+ # This tolerates a slow-but-succeeding build while a genuinely broken Dockerfile
1053
+ # still trips the watchdog breaker (wrapper dies, no container, 5x). Returns 0
1054
+ # when alive, 1 when dead. Never hard-fails (guarded for set -u / future set -e).
1055
+ _app_runner_dockerfile_container_running() {
1056
+ local _name="${_APP_RUNNER_DOCKER_CONTAINER:-}"
1057
+ [ -z "$_name" ] && return 1
1058
+ if command -v docker >/dev/null 2>&1; then
1059
+ local _state
1060
+ _state=$(docker inspect -f '{{.State.Running}}' "$_name" 2>/dev/null || true)
1061
+ if [ "$_state" = "true" ]; then
1062
+ return 0
1063
+ fi
1064
+ fi
1065
+ # Container not (yet) running: the build may still be in progress. The wrapper
1066
+ # PID being alive is the build-in-progress signal.
1067
+ if [ -n "${_APP_RUNNER_PID:-}" ] && kill -0 "$_APP_RUNNER_PID" 2>/dev/null; then
1068
+ return 0
1069
+ fi
1070
+ return 1
1071
+ }
1072
+
1015
1073
  # Read the RUNTIME published host port of the identified primary web service from
1016
1074
  # `docker compose ps` (the live mapping), as opposed to the config-declared port
1017
1075
  # from `docker compose config`. The config port is correct for fixed mappings
@@ -1127,6 +1185,25 @@ app_runner_start() {
1127
1185
  _port_env_prefix="export PORT=$_APP_RUNNER_PORT HTTP_PORT=$_APP_RUNNER_PORT SERVER_PORT=$_APP_RUNNER_PORT APP_PORT=$_APP_RUNNER_PORT; "
1128
1186
  fi
1129
1187
 
1188
+ # Conditional exec (HIGH-1 fix): only `exec` a SINGLE command. A compound
1189
+ # method (`docker build ... && docker run ...`) must run as a child so BOTH
1190
+ # halves execute -- `exec docker build` would replace the shell and never
1191
+ # reach `&& docker run`. Computed on the method string ONLY (see
1192
+ # _app_runner_exec_prefix), not the assembled launch line.
1193
+ local _exec_prefix
1194
+ _exec_prefix=$(_app_runner_exec_prefix "$_APP_RUNNER_METHOD")
1195
+
1196
+ # Dockerfile path: `docker run --name <hashed>` fails if a stale (exited)
1197
+ # container with that name still exists. This happens on a watchdog restart
1198
+ # (the prior run's container was stopped, not removed) and would make every
1199
+ # auto-restart fail with "name already in use". Remove any stale container
1200
+ # by name before launch. Idempotent and safe when none exists. Compose has no
1201
+ # _APP_RUNNER_DOCKER_CONTAINER, so this is Dockerfile-path only.
1202
+ if [ "$_APP_RUNNER_IS_DOCKER" = true ] && [ -n "${_APP_RUNNER_DOCKER_CONTAINER:-}" ] \
1203
+ && command -v docker >/dev/null 2>&1; then
1204
+ docker rm -f "$_APP_RUNNER_DOCKER_CONTAINER" >/dev/null 2>&1 || true
1205
+ fi
1206
+
1130
1207
  # Start the process in a new process group
1131
1208
  if command -v setsid >/dev/null 2>&1; then
1132
1209
  _APP_RUNNER_HAS_SETSID=true
@@ -1136,7 +1213,7 @@ app_runner_start() {
1136
1213
  # Note: $_APP_RUNNER_METHOD has passed _validate_app_command (whitelist).
1137
1214
  # The `--` after `bash -lc` prevents flag injection if the assembled
1138
1215
  # script string ever begins with a `-`.
1139
- (cd "$dir" && setsid bash -lc -- "$_port_env_prefix"'echo $$ > "'"$_pgid_file"'"; exec '"$_APP_RUNNER_METHOD" >> "$_APP_RUNNER_DIR/app.log" 2>&1) &
1216
+ (cd "$dir" && setsid bash -lc -- "$_port_env_prefix"'echo $$ > "'"$_pgid_file"'"; '"$_exec_prefix$_APP_RUNNER_METHOD" >> "$_APP_RUNNER_DIR/app.log" 2>&1) &
1140
1217
  local _subshell_pid=$!
1141
1218
  # Wait briefly for the pgid file to appear, then read the real PGID
1142
1219
  local _pgid_wait=0
@@ -1154,7 +1231,7 @@ app_runner_start() {
1154
1231
  _APP_RUNNER_HAS_SETSID=false
1155
1232
  # Note: $_APP_RUNNER_METHOD has passed _validate_app_command (whitelist).
1156
1233
  # The `--` after `bash -lc` prevents flag injection.
1157
- (cd "$dir" && bash -lc -- "${_port_env_prefix}exec $_APP_RUNNER_METHOD" >> "$_APP_RUNNER_DIR/app.log" 2>&1) &
1234
+ (cd "$dir" && bash -lc -- "${_port_env_prefix}${_exec_prefix}$_APP_RUNNER_METHOD" >> "$_APP_RUNNER_DIR/app.log" 2>&1) &
1158
1235
  _APP_RUNNER_PID=$!
1159
1236
  fi
1160
1237
  # Register with central PID registry if available
@@ -1212,6 +1289,24 @@ app_runner_start() {
1212
1289
  _write_app_state "failed"
1213
1290
  return 1
1214
1291
  fi
1292
+ elif [ "$_APP_RUNNER_IS_DOCKER" = true ] && [ -n "${_APP_RUNNER_DOCKER_CONTAINER:-}" ]; then
1293
+ # Dockerfile path (HIGH-1): `docker build && docker run -d` is compound, so
1294
+ # it is launched WITHOUT exec and the captured PID is the short-lived bash
1295
+ # wrapper that exits once the detached container is up. Liveness keys on the
1296
+ # container (or the wrapper still building), NOT the wrapper PID -- the same
1297
+ # reasoning as the compose branch above. Port mapping is the fixed
1298
+ # `-p PORT:PORT` from detection and the URL is already set, so no port
1299
+ # reconciliation or PID identity token is needed here.
1300
+ if _app_runner_dockerfile_container_running; then
1301
+ _write_app_state "running"
1302
+ log_info "App Runner: Dockerfile container '$_APP_RUNNER_DOCKER_CONTAINER' starting/running on port $_APP_RUNNER_PORT"
1303
+ return 0
1304
+ else
1305
+ log_error "App Runner: Dockerfile container failed to start (no running container, build wrapper exited)"
1306
+ _APP_RUNNER_CRASH_COUNT=$(( _APP_RUNNER_CRASH_COUNT + 1 ))
1307
+ _write_app_state "failed"
1308
+ return 1
1309
+ fi
1215
1310
  elif kill -0 "$_APP_RUNNER_PID" 2>/dev/null; then
1216
1311
  # Reconcile recorded port with the port the app actually bound (finding
1217
1312
  # #597), so state.json / detection.json / the preview URL point at the
@@ -1456,6 +1551,23 @@ app_runner_health_check() {
1456
1551
  return 0
1457
1552
  fi
1458
1553
 
1554
+ # Dockerfile path (HIGH-1): the detached `docker run -d` container's liveness
1555
+ # is the container running (or the build wrapper still building), NOT the
1556
+ # ephemeral bash wrapper PID. Without this branch the wrapper PID dies after
1557
+ # the build detaches the container and the PID check below would report the
1558
+ # live container as crashed -> watchdog tears it down and rebuilds forever.
1559
+ if [ "$_APP_RUNNER_IS_DOCKER" = true ] && [ -n "${_APP_RUNNER_DOCKER_CONTAINER:-}" ]; then
1560
+ if _app_runner_dockerfile_container_running; then
1561
+ _write_health "true"
1562
+ _write_app_state "running"
1563
+ return 0
1564
+ else
1565
+ _write_health "false"
1566
+ _write_app_state "crashed"
1567
+ return 1
1568
+ fi
1569
+ fi
1570
+
1459
1571
  # Check PID is alive (non-docker-compose methods)
1460
1572
  if ! kill -0 "$_APP_RUNNER_PID" 2>/dev/null; then
1461
1573
  _write_health "false"
@@ -1580,7 +1692,16 @@ app_runner_watchdog() {
1580
1692
  # This is what makes the service-aware health logic actually fire in the
1581
1693
  # live monitoring loop (not just in isolation). On an unhealthy web service
1582
1694
  # it restarts the stack under the same crash-count circuit breaker.
1583
- if [ "$_APP_RUNNER_IS_DOCKER" = true ] && echo "$_APP_RUNNER_METHOD" | grep -q "docker compose"; then
1695
+ # Detached-docker paths (compose stacks AND the Dockerfile `docker run -d`
1696
+ # container) both exit their captured wrapper PID once the container is up, so
1697
+ # `kill -0` is the wrong liveness signal. Delegate to app_runner_health_check,
1698
+ # whose container-aware branches (compose web service / hashed Dockerfile
1699
+ # container) own the real liveness check, under the same crash-count circuit
1700
+ # breaker. Without including the Dockerfile container here, the wrapper PID
1701
+ # would read as dead after the build detaches and the watchdog would tear the
1702
+ # live container down and rebuild forever (the HIGH-1 symptom).
1703
+ if [ "$_APP_RUNNER_IS_DOCKER" = true ] && \
1704
+ { echo "$_APP_RUNNER_METHOD" | grep -q "docker compose" || [ -n "${_APP_RUNNER_DOCKER_CONTAINER:-}" ]; }; then
1584
1705
  if app_runner_health_check; then
1585
1706
  # BUG 3 fix: the breaker is meant to fire on 5 CONSECUTIVE failures.
1586
1707
  # A confirmed-healthy observation clears any accumulated count so a
@@ -1590,7 +1711,7 @@ app_runner_watchdog() {
1590
1711
  return 0
1591
1712
  fi
1592
1713
  _APP_RUNNER_CRASH_COUNT=$(( _APP_RUNNER_CRASH_COUNT + 1 ))
1593
- log_warn "App Runner: compose web service unhealthy (crash #$_APP_RUNNER_CRASH_COUNT)"
1714
+ log_warn "App Runner: docker container unhealthy (crash #$_APP_RUNNER_CRASH_COUNT)"
1594
1715
  if [ "$_APP_RUNNER_CRASH_COUNT" -ge 5 ]; then
1595
1716
  log_error "App Runner: crash limit reached (5), marking as crashed"
1596
1717
  tail -20 "$_APP_RUNNER_DIR/app.log" 2>/dev/null | while IFS= read -r line; do
@@ -1601,9 +1722,9 @@ app_runner_watchdog() {
1601
1722
  fi
1602
1723
  local _c_backoff=$(( 1 << _APP_RUNNER_CRASH_COUNT ))
1603
1724
  [ "$_c_backoff" -gt 30 ] && _c_backoff=30
1604
- log_info "App Runner: restarting compose stack in ${_c_backoff}s..."
1725
+ log_info "App Runner: restarting docker app in ${_c_backoff}s..."
1605
1726
  sleep "$_c_backoff"
1606
- app_runner_start || log_warn "App Runner: compose auto-restart failed"
1727
+ app_runner_start || log_warn "App Runner: docker auto-restart failed"
1607
1728
  return 0
1608
1729
  fi
1609
1730
 
package/autonomy/loki CHANGED
@@ -2204,6 +2204,72 @@ _kill_pid() {
2204
2204
  fi
2205
2205
  }
2206
2206
 
2207
+ # v7.7.34 group-kill, factored (loki-stop-F1). Reaps the orchestrator's whole
2208
+ # process group via the recorded pgid so the autonomous agent (claude/codex/
2209
+ # aider), which shares the orchestrator group and would otherwise reparent to
2210
+ # init and keep editing files, is killed atomically. This is the SAME logic the
2211
+ # no-arg `loki stop` path used inline; it is now shared so the by-id path
2212
+ # (`loki stop <session-id>`) gets identical reaping instead of skipping it.
2213
+ #
2214
+ # Args: one or more pgid-file paths. Each is read, validated (numeric, > 1, NOT
2215
+ # this shell's own group), and group-killed; the file is removed after. A
2216
+ # protected-pid conflict (dashboard / app-runner / registered pids that happen
2217
+ # to share the group) downgrades the kill to a per-pid kill that excludes the
2218
+ # protected pids, so a group-kill never tears down the dashboard. Every kill is
2219
+ # `|| true` guarded -- safe under set -e (killing an already-dead member, or an
2220
+ # empty group, returns non-zero legitimately).
2221
+ _stop_group_by_pgid_files() {
2222
+ local _stop_pgid_file
2223
+ for _stop_pgid_file in "$@"; do
2224
+ [ -f "$_stop_pgid_file" ] || continue
2225
+ local _spgid
2226
+ _spgid=$(cat "$_stop_pgid_file" 2>/dev/null | tr -d ' ')
2227
+ case "$_spgid" in ''|*[!0-9]*) continue ;; esac
2228
+ [ "$_spgid" -gt 1 ] 2>/dev/null || continue
2229
+ local _my_pgid
2230
+ _my_pgid=$(ps -o pgid= -p $$ 2>/dev/null | tr -d ' ')
2231
+ [ "$_spgid" = "$_my_pgid" ] && continue # never kill our own group
2232
+ # Collect protected pids (dashboard, app-runner, registered pids) so
2233
+ # a group-kill never takes down the dashboard if it happens to share
2234
+ # the orchestrator group. Mirrors the dashboard Python route.
2235
+ local _protected=" "
2236
+ local _pf
2237
+ if [ -d "$LOKI_DIR/pids" ]; then
2238
+ for _pf in "$LOKI_DIR/pids"/*.json; do
2239
+ [ -f "$_pf" ] || continue
2240
+ _protected="${_protected}$(basename "$_pf" .json) "
2241
+ done
2242
+ fi
2243
+ for _pf in "$LOKI_DIR/dashboard/dashboard.pid" "${HOME}/.loki/dashboard/dashboard.pid"; do
2244
+ [ -f "$_pf" ] && _protected="${_protected}$(cat "$_pf" 2>/dev/null | tr -d ' ') "
2245
+ done
2246
+ # Does any protected pid share this group?
2247
+ local _conflict=0 _gp
2248
+ for _gp in $(ps -axo pid=,pgid= 2>/dev/null | awk -v g="$_spgid" '$2==g{print $1}'); do
2249
+ case "$_protected" in *" $_gp "*) _conflict=1; break ;; esac
2250
+ done
2251
+ if [ "$_conflict" = "1" ]; then
2252
+ # Per-pid kill of group members EXCLUDING protected pids.
2253
+ for _gp in $(ps -axo pid=,pgid= 2>/dev/null | awk -v g="$_spgid" '$2==g{print $1}'); do
2254
+ case "$_protected" in *" $_gp "*) continue ;; esac
2255
+ [ "$_gp" = "$$" ] && continue
2256
+ kill -TERM "$_gp" 2>/dev/null || true
2257
+ done
2258
+ sleep 1
2259
+ for _gp in $(ps -axo pid=,pgid= 2>/dev/null | awk -v g="$_spgid" '$2==g{print $1}'); do
2260
+ case "$_protected" in *" $_gp "*) continue ;; esac
2261
+ [ "$_gp" = "$$" ] && continue
2262
+ kill -KILL "$_gp" 2>/dev/null || true
2263
+ done
2264
+ else
2265
+ kill -TERM -- -"$_spgid" 2>/dev/null || true
2266
+ sleep 1
2267
+ kill -KILL -- -"$_spgid" 2>/dev/null || true
2268
+ fi
2269
+ rm -f "$_stop_pgid_file" 2>/dev/null || true
2270
+ done
2271
+ }
2272
+
2207
2273
  # Stop a specific session by its session ID
2208
2274
  _stop_session_by_id() {
2209
2275
  local sid="$1"
@@ -2296,6 +2362,29 @@ cmd_stop() {
2296
2362
  # Stop a specific session by ID
2297
2363
  if [ -n "$target_session" ]; then
2298
2364
  if is_session_running "$target_session"; then
2365
+ # loki-stop-F1: group-kill FIRST (v7.7.34 discipline), scoped to THIS
2366
+ # session's recorded pgid. Without this the by-id path reaped only the
2367
+ # orchestrator pid (via _stop_session_by_id -> _kill_pid), leaving the
2368
+ # autonomous agent (claude/codex/aider) -- which shares the
2369
+ # orchestrator's process group -- to reparent to init and keep editing
2370
+ # files. That is exactly the v7.7.34 orphaned-agent bug, reopened on
2371
+ # the by-id path. The pgid is session-scoped: run.sh writes it next to
2372
+ # the session pid as ${pid_file%.pid}.pgid, so we only ever touch THIS
2373
+ # session's group (modern sessions/<id>/loki.pgid + legacy
2374
+ # run-<id>.pgid), never a sibling session or another folder.
2375
+ #
2376
+ # Deliberately NOT mirrored from the no-arg path: the docker reap,
2377
+ # session.json->stopped, and dashboard registry mark are folder/global
2378
+ # side effects (the docker container is named by workspace sha with no
2379
+ # per-session container; registry.mark_project_stopped marks the whole
2380
+ # project; session.json is the folder-level skill session). Firing them
2381
+ # on a by-id stop would mismark the project / kill a docker run while a
2382
+ # sibling session in the same folder is still building. The group-kill
2383
+ # alone closes the stated orphaned-agent hole; folder-global teardown
2384
+ # stays on the no-arg / --all paths.
2385
+ _stop_group_by_pgid_files \
2386
+ "$LOKI_DIR/sessions/$target_session/loki.pgid" \
2387
+ "$LOKI_DIR/run-${target_session}.pgid"
2299
2388
  _stop_session_by_id "$target_session"
2300
2389
  echo "Stopped session: $target_session"
2301
2390
  else
@@ -2381,56 +2470,9 @@ cmd_stop() {
2381
2470
  # session leader), so signaling the whole group reaps the orchestrator
2382
2471
  # AND the agent child atomically. Killing only the orchestrator pid lets
2383
2472
  # the agent reparent to init and keep editing files -- the reported bug.
2384
- # Guards: only a numeric pgid > 1 that is NOT this shell's own group.
2385
- local _stop_pgid_file
2386
- for _stop_pgid_file in "$LOKI_DIR/loki.pgid" "$LOKI_DIR/run.pgid"; do
2387
- [ -f "$_stop_pgid_file" ] || continue
2388
- local _spgid
2389
- _spgid=$(cat "$_stop_pgid_file" 2>/dev/null | tr -d ' ')
2390
- case "$_spgid" in ''|*[!0-9]*) continue ;; esac
2391
- [ "$_spgid" -gt 1 ] 2>/dev/null || continue
2392
- local _my_pgid
2393
- _my_pgid=$(ps -o pgid= -p $$ 2>/dev/null | tr -d ' ')
2394
- [ "$_spgid" = "$_my_pgid" ] && continue # never kill our own group
2395
- # Collect protected pids (dashboard, app-runner, registered pids) so
2396
- # a group-kill never takes down the dashboard if it happens to share
2397
- # the orchestrator group. Mirrors the dashboard Python route.
2398
- local _protected=" "
2399
- local _pf
2400
- if [ -d "$LOKI_DIR/pids" ]; then
2401
- for _pf in "$LOKI_DIR/pids"/*.json; do
2402
- [ -f "$_pf" ] || continue
2403
- _protected="${_protected}$(basename "$_pf" .json) "
2404
- done
2405
- fi
2406
- for _pf in "$LOKI_DIR/dashboard/dashboard.pid" "${HOME}/.loki/dashboard/dashboard.pid"; do
2407
- [ -f "$_pf" ] && _protected="${_protected}$(cat "$_pf" 2>/dev/null | tr -d ' ') "
2408
- done
2409
- # Does any protected pid share this group?
2410
- local _conflict=0 _gp
2411
- for _gp in $(ps -axo pid=,pgid= 2>/dev/null | awk -v g="$_spgid" '$2==g{print $1}'); do
2412
- case "$_protected" in *" $_gp "*) _conflict=1; break ;; esac
2413
- done
2414
- if [ "$_conflict" = "1" ]; then
2415
- # Per-pid kill of group members EXCLUDING protected pids.
2416
- for _gp in $(ps -axo pid=,pgid= 2>/dev/null | awk -v g="$_spgid" '$2==g{print $1}'); do
2417
- case "$_protected" in *" $_gp "*) continue ;; esac
2418
- [ "$_gp" = "$$" ] && continue
2419
- kill -TERM "$_gp" 2>/dev/null || true
2420
- done
2421
- sleep 1
2422
- for _gp in $(ps -axo pid=,pgid= 2>/dev/null | awk -v g="$_spgid" '$2==g{print $1}'); do
2423
- case "$_protected" in *" $_gp "*) continue ;; esac
2424
- [ "$_gp" = "$$" ] && continue
2425
- kill -KILL "$_gp" 2>/dev/null || true
2426
- done
2427
- else
2428
- kill -TERM -- -"$_spgid" 2>/dev/null || true
2429
- sleep 1
2430
- kill -KILL -- -"$_spgid" 2>/dev/null || true
2431
- fi
2432
- rm -f "$_stop_pgid_file" 2>/dev/null || true
2433
- done
2473
+ # Factored into _stop_group_by_pgid_files (loki-stop-F1) so the by-id stop
2474
+ # path performs identical reaping. Here we pass the GLOBAL pgid files.
2475
+ _stop_group_by_pgid_files "$LOKI_DIR/loki.pgid" "$LOKI_DIR/run.pgid"
2434
2476
 
2435
2477
  local killed_pid=""
2436
2478
  for pid_file in "$LOKI_DIR/loki.pid" "$LOKI_DIR/run.pid"; do
@@ -3029,7 +3071,7 @@ cmd_status_json() {
3029
3071
  local dashboard_port="${LOKI_DASHBOARD_PORT:-57374}"
3030
3072
  local env_provider="${LOKI_PROVIDER:-claude}"
3031
3073
 
3032
- python3 -c "
3074
+ if ! python3 -c "
3033
3075
  import json, os, sys, time
3034
3076
 
3035
3077
  skill_dir = sys.argv[1]
@@ -3317,9 +3359,14 @@ if os.path.isfile(gate_count_file):
3317
3359
  result['phase1'] = phase1
3318
3360
 
3319
3361
  print(json.dumps(result, indent=2))
3320
- " "$skill_dir" "$loki_dir" "$dashboard_port" "$env_provider"
3321
-
3322
- if [ $? -ne 0 ]; then
3362
+ " "$skill_dir" "$loki_dir" "$dashboard_port" "$env_provider"; then
3363
+ # WAVE8 loki-F2: under `set -euo pipefail` a bare python3 call aborts
3364
+ # the whole function on non-zero exit, so the old post-call
3365
+ # `if [ $? -ne 0 ]` fallback was DEAD code -- a missing/broken python3
3366
+ # crashed `loki status --json` instead of degrading. Guarding the call
3367
+ # with `if ! ...; then` catches the non-zero exit and emits the honest
3368
+ # error object. (Most malformed state files already degrade internally
3369
+ # via per-file try/except; this covers the interpreter-failure case.)
3323
3370
  echo '{"error": "Failed to generate JSON status. Ensure python3 is available."}' >&2
3324
3371
  return 1
3325
3372
  fi
@@ -12443,18 +12490,27 @@ for f in data.get('frictions', []):
12443
12490
 
12444
12491
  # Initialize or update healing progress
12445
12492
  if [[ ! -f "$heal_dir/healing-progress.json" ]] || [ "$do_resume" != "true" ]; then
12493
+ # WAVE8 loki-est: pass codebase/phase/strict/out-path via env instead of
12494
+ # interpolating raw bash into the python source. A codebase path or phase
12495
+ # containing an apostrophe made this a SyntaxError; under `|| true` the
12496
+ # progress file was then silently never written (and the later
12497
+ # prev_phase read would fail), breaking healing resume.
12498
+ LOKI_HEAL_CODEBASE="$codebase_path" \
12499
+ LOKI_HEAL_PHASE_VAL="$phase" \
12500
+ LOKI_HEAL_STRICT_VAL="$strict" \
12501
+ LOKI_HEAL_OUT="$heal_dir/healing-progress.json" \
12446
12502
  python3 -c "
12447
- import json
12503
+ import json, os
12448
12504
  from datetime import datetime
12449
12505
  progress = {
12450
- 'codebase': '$codebase_path',
12506
+ 'codebase': os.environ.get('LOKI_HEAL_CODEBASE', ''),
12451
12507
  'started': datetime.now().isoformat(),
12452
- 'current_phase': '$phase',
12453
- 'strict_mode': $( [ "$strict" = "true" ] && echo "True" || echo "False" ),
12508
+ 'current_phase': os.environ.get('LOKI_HEAL_PHASE_VAL', ''),
12509
+ 'strict_mode': os.environ.get('LOKI_HEAL_STRICT_VAL', '') == 'true',
12454
12510
  'components': [],
12455
12511
  'overall_health': 0.0
12456
12512
  }
12457
- with open('$heal_dir/healing-progress.json', 'w') as f:
12513
+ with open(os.environ['LOKI_HEAL_OUT'], 'w') as f:
12458
12514
  json.dump(progress, f, indent=2)
12459
12515
  " || true
12460
12516
  fi
@@ -12462,7 +12518,7 @@ with open('$heal_dir/healing-progress.json', 'w') as f:
12462
12518
  # BUG-HEAL-004: Validate phase gate when resuming from a previous phase
12463
12519
  if [ "$do_resume" = "true" ] && [[ -f "$heal_dir/healing-progress.json" ]] && type hook_healing_phase_gate &>/dev/null; then
12464
12520
  local prev_phase
12465
- prev_phase=$(python3 -c "import json; print(json.load(open('$heal_dir/healing-progress.json')).get('current_phase', 'archaeology'))" 2>/dev/null || echo "archaeology")
12521
+ prev_phase=$(LOKI_HEAL_PROG="$heal_dir/healing-progress.json" python3 -c "import json, os; print(json.load(open(os.environ['LOKI_HEAL_PROG'])).get('current_phase', 'archaeology'))" 2>/dev/null || echo "archaeology")
12466
12522
  if [[ "$prev_phase" != "$phase" ]]; then
12467
12523
  local gate_result
12468
12524
  if ! gate_result=$(hook_healing_phase_gate "$prev_phase" "$phase" 2>&1); then
@@ -22792,35 +22848,39 @@ cmd_onboard() {
22792
22848
  fi
22793
22849
  # Extract metadata from package.json
22794
22850
  if command -v python3 &>/dev/null; then
22851
+ # WAVE8 loki-est (same class as cmd_explain): pass the repo path via
22852
+ # env (os.environ) instead of interpolating into the python source.
22853
+ # A path with an apostrophe made each heredoc a SyntaxError, silently
22854
+ # dropping the package.json name/version/description under `|| true`.
22795
22855
  local pkg_name
22796
- pkg_name=$(python3 -c "
22797
- import json, sys
22856
+ pkg_name=$(LOKI_ONB_PKG="$target_path/package.json" python3 -c "
22857
+ import json, os
22798
22858
  try:
22799
- d = json.load(open('$target_path/package.json'))
22859
+ d = json.load(open(os.environ['LOKI_ONB_PKG']))
22800
22860
  print(d.get('name', ''))
22801
22861
  except: pass
22802
22862
  " 2>/dev/null || true)
22803
22863
  if [ -n "$pkg_name" ]; then
22804
22864
  project_name="$pkg_name"
22805
22865
  fi
22806
- project_description=$(python3 -c "
22807
- import json, sys
22866
+ project_description=$(LOKI_ONB_PKG="$target_path/package.json" python3 -c "
22867
+ import json, os
22808
22868
  try:
22809
- d = json.load(open('$target_path/package.json'))
22869
+ d = json.load(open(os.environ['LOKI_ONB_PKG']))
22810
22870
  print(d.get('description', ''))
22811
22871
  except: pass
22812
22872
  " 2>/dev/null || true)
22813
- project_version=$(python3 -c "
22814
- import json, sys
22873
+ project_version=$(LOKI_ONB_PKG="$target_path/package.json" python3 -c "
22874
+ import json, os
22815
22875
  try:
22816
- d = json.load(open('$target_path/package.json'))
22876
+ d = json.load(open(os.environ['LOKI_ONB_PKG']))
22817
22877
  print(d.get('version', ''))
22818
22878
  except: pass
22819
22879
  " 2>/dev/null || true)
22820
- entry_points=$(python3 -c "
22821
- import json, sys
22880
+ entry_points=$(LOKI_ONB_PKG="$target_path/package.json" python3 -c "
22881
+ import json, os
22822
22882
  try:
22823
- d = json.load(open('$target_path/package.json'))
22883
+ d = json.load(open(os.environ['LOKI_ONB_PKG']))
22824
22884
  main = d.get('main', '')
22825
22885
  if main: print(main)
22826
22886
  scripts = d.get('scripts', {})
@@ -23200,10 +23260,11 @@ $imports"
23200
23260
  if [ -f "$target_path/package.json" ]; then
23201
23261
  if command -v python3 &>/dev/null; then
23202
23262
  local scripts_json
23203
- scripts_json=$(python3 -c "
23204
- import json
23263
+ # WAVE8 loki-est: env-passed path (see cmd_onboard metadata block).
23264
+ scripts_json=$(LOKI_ONB_PKG="$target_path/package.json" python3 -c "
23265
+ import json, os
23205
23266
  try:
23206
- d = json.load(open('$target_path/package.json'))
23267
+ d = json.load(open(os.environ['LOKI_ONB_PKG']))
23207
23268
  s = d.get('scripts', {})
23208
23269
  for k in ['build', 'dev', 'start', 'test', 'lint', 'format', 'check', 'typecheck']:
23209
23270
  if k in s:
@@ -23595,10 +23656,14 @@ cmd_explain() {
23595
23656
 
23596
23657
  if command -v python3 &>/dev/null; then
23597
23658
  local pkg_meta
23598
- pkg_meta=$(python3 -c "
23599
- import json
23659
+ # WAVE8 loki-est: pass the path via env (os.environ) instead of
23660
+ # interpolating into the python source -- a repo path containing an
23661
+ # apostrophe broke the string literal and dropped all package.json
23662
+ # metadata.
23663
+ pkg_meta=$(LOKI_EXP_PKG="$target_path/package.json" python3 -c "
23664
+ import json, os
23600
23665
  try:
23601
- d = json.load(open('$target_path/package.json'))
23666
+ d = json.load(open(os.environ['LOKI_EXP_PKG']))
23602
23667
  print(d.get('name', ''))
23603
23668
  print(d.get('description', ''))
23604
23669
  print(d.get('version', ''))
@@ -23827,17 +23892,67 @@ $devdeps_list"
23827
23892
 
23828
23893
  # --- JSON output ---
23829
23894
  if [ "$output_json" = true ]; then
23895
+ # WAVE8 loki-est: pass every value via the environment and read it with
23896
+ # os.environ instead of interpolating raw bash into the python source.
23897
+ # Interpolation broke on any apostrophe/quote/newline in a project name,
23898
+ # version, description, or path (e.g. a dir named `my'app`), silently
23899
+ # degrading real analysis to `{"error": "JSON generation failed"}`.
23900
+ # env-passing is injection-proof and keeps the same output shape.
23901
+ LOKI_EXP_NAME="$project_name" \
23902
+ LOKI_EXP_DESC="$project_description" \
23903
+ LOKI_EXP_VERSION="$project_version" \
23904
+ LOKI_EXP_PATH="$target_path" \
23905
+ LOKI_EXP_LANGUAGES="$languages" \
23906
+ LOKI_EXP_FRAMEWORKS="$frameworks" \
23907
+ LOKI_EXP_BUILD="$build_system" \
23908
+ LOKI_EXP_PKGMGR="$package_manager" \
23909
+ LOKI_EXP_TESTFW="$test_framework" \
23910
+ LOKI_EXP_CI="$ci_system" \
23911
+ LOKI_EXP_PATTERNS="$detected_patterns" \
23912
+ LOKI_EXP_TOTAL="$total_files" \
23913
+ LOKI_EXP_SRC="$src_count" \
23914
+ LOKI_EXP_TEST="$test_count" \
23915
+ LOKI_EXP_DOC="$doc_count" \
23916
+ LOKI_EXP_CONFIG="$config_count" \
23917
+ LOKI_EXP_BUILDCMD="$build_cmd" \
23918
+ LOKI_EXP_RUNCMD="$run_cmd" \
23919
+ LOKI_EXP_TESTCMD="$test_cmd" \
23920
+ LOKI_EXP_LINTCMD="$lint_cmd" \
23921
+ LOKI_EXP_ENTRY="$major_files" \
23922
+ LOKI_EXP_MONOREPO="$is_monorepo" \
23923
+ LOKI_EXP_DOCKER="$has_docker" \
23830
23924
  python3 -c "
23831
- import json
23925
+ import json, os
23926
+
23927
+ def _s(name):
23928
+ return os.environ.get(name, '')
23929
+
23930
+ def _list(name):
23931
+ v = _s(name).strip()
23932
+ return v.split() if v else []
23933
+
23934
+ def _opt(name):
23935
+ v = _s(name).strip()
23936
+ return v or None
23937
+
23938
+ def _int(name):
23939
+ try:
23940
+ return int(_s(name).strip())
23941
+ except (ValueError, TypeError):
23942
+ return 0
23943
+
23944
+ def _bool(name):
23945
+ return _s(name).strip() == 'true'
23946
+
23832
23947
  data = {
23833
- 'project': {'name': '$project_name', 'description': '''$(echo "$project_description" | sed "s/'/\\\\'/g")''', 'version': '$project_version', 'path': '$target_path'},
23834
- 'stack': {'languages': '${languages}'.split() if '${languages}'.strip() else [], 'frameworks': '${frameworks}'.split() if '${frameworks}'.strip() else [], 'build_system': '$build_system' or None, 'package_manager': '$package_manager' or None, 'test_framework': '${test_framework}'.split() if '${test_framework}'.strip() else [], 'ci': '${ci_system}'.strip() or None},
23835
- 'patterns': '${detected_patterns}'.split() if '${detected_patterns}'.strip() else [],
23836
- 'files': {'total': $total_files, 'source': $src_count, 'test': $test_count, 'docs': $doc_count, 'config': $config_count},
23837
- 'commands': {'build': '${build_cmd}' or None, 'run': '${run_cmd}' or None, 'test': '${test_cmd}' or None, 'lint': '${lint_cmd}' or None},
23838
- 'entry_points': '${major_files}'.split() if '${major_files}'.strip() else [],
23839
- 'monorepo': $( [ "$is_monorepo" = true ] && echo "True" || echo "False" ),
23840
- 'has_docker': $( [ "$has_docker" = true ] && echo "True" || echo "False" )
23948
+ 'project': {'name': _s('LOKI_EXP_NAME'), 'description': _s('LOKI_EXP_DESC'), 'version': _s('LOKI_EXP_VERSION'), 'path': _s('LOKI_EXP_PATH')},
23949
+ 'stack': {'languages': _list('LOKI_EXP_LANGUAGES'), 'frameworks': _list('LOKI_EXP_FRAMEWORKS'), 'build_system': _opt('LOKI_EXP_BUILD'), 'package_manager': _opt('LOKI_EXP_PKGMGR'), 'test_framework': _list('LOKI_EXP_TESTFW'), 'ci': _opt('LOKI_EXP_CI')},
23950
+ 'patterns': _list('LOKI_EXP_PATTERNS'),
23951
+ 'files': {'total': _int('LOKI_EXP_TOTAL'), 'source': _int('LOKI_EXP_SRC'), 'test': _int('LOKI_EXP_TEST'), 'docs': _int('LOKI_EXP_DOC'), 'config': _int('LOKI_EXP_CONFIG')},
23952
+ 'commands': {'build': _opt('LOKI_EXP_BUILDCMD'), 'run': _opt('LOKI_EXP_RUNCMD'), 'test': _opt('LOKI_EXP_TESTCMD'), 'lint': _opt('LOKI_EXP_LINTCMD')},
23953
+ 'entry_points': _list('LOKI_EXP_ENTRY'),
23954
+ 'monorepo': _bool('LOKI_EXP_MONOREPO'),
23955
+ 'has_docker': _bool('LOKI_EXP_DOCKER')
23841
23956
  }
23842
23957
  print(json.dumps(data, indent=2))
23843
23958
  " 2>/dev/null || echo '{"error": "JSON generation failed"}'