loki-mode 5.49.0 → 5.49.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -121
- package/SKILL.md +3 -3
- package/VERSION +1 -1
- package/autonomy/CONSTITUTION.md +4 -4
- package/autonomy/app-runner.sh +9 -0
- package/autonomy/loki +107 -0
- package/autonomy/run.sh +170 -4
- package/dashboard/__init__.py +1 -1
- package/dashboard/server.py +172 -20
- package/dashboard/static/index.html +1 -1
- package/docs/COMPARISON.md +15 -15
- package/docs/COMPETITIVE-ANALYSIS.md +4 -4
- package/docs/INSTALLATION.md +20 -12
- package/docs/alternative-installations.md +145 -0
- package/docs/auto-claude-comparison.md +1 -1
- package/docs/cursor-comparison.md +7 -7
- package/docs/thick2thin.md +2 -2
- package/mcp/__init__.py +1 -1
- package/package.json +1 -1
- package/references/agent-types.md +2 -2
- package/references/agents.md +1 -1
- package/references/competitive-analysis.md +1 -1
- package/references/core-workflow.md +1 -1
- package/skills/00-index.md +1 -1
- package/skills/agents.md +3 -3
- package/skills/artifacts.md +1 -1
- package/skills/parallel-workflows.md +1 -1
- package/skills/quality-gates.md +4 -2
package/autonomy/run.sh
CHANGED
|
@@ -667,6 +667,146 @@ log_error() { echo -e "${RED}[ERROR]${NC} $*"; }
|
|
|
667
667
|
log_step() { echo -e "${CYAN}[STEP]${NC} $*"; }
|
|
668
668
|
log_debug() { [[ "${LOKI_DEBUG:-}" == "true" ]] && echo -e "${CYAN}[DEBUG]${NC} $*" || true; }
|
|
669
669
|
|
|
670
|
+
#===============================================================================
|
|
671
|
+
# Process Registry (PID Supervisor)
|
|
672
|
+
# Central registry of all spawned child processes for reliable cleanup
|
|
673
|
+
#===============================================================================
|
|
674
|
+
|
|
675
|
+
PID_REGISTRY_DIR=""
|
|
676
|
+
|
|
677
|
+
# Initialize the PID registry directory
|
|
678
|
+
init_pid_registry() {
|
|
679
|
+
PID_REGISTRY_DIR="${TARGET_DIR:-.}/.loki/pids"
|
|
680
|
+
mkdir -p "$PID_REGISTRY_DIR"
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
# Parse a field from a JSON registry entry (python3 with shell fallback)
|
|
684
|
+
# Usage: _parse_json_field <file> <field>
|
|
685
|
+
_parse_json_field() {
|
|
686
|
+
local file="$1" field="$2"
|
|
687
|
+
if command -v python3 >/dev/null 2>&1; then
|
|
688
|
+
python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get(sys.argv[2],''))" "$file" "$field" 2>/dev/null
|
|
689
|
+
else
|
|
690
|
+
# Shell fallback: extract value for simple flat JSON
|
|
691
|
+
sed 's/.*"'"$field"'":\s*//' "$file" 2>/dev/null | sed 's/[",}].*//' | head -1
|
|
692
|
+
fi
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
# Register a spawned process in the central registry
|
|
696
|
+
# Usage: register_pid <pid> <label> [<extra_info>]
|
|
697
|
+
# Example: register_pid $! "dashboard" "port=57374"
|
|
698
|
+
register_pid() {
|
|
699
|
+
local pid="$1"
|
|
700
|
+
# Sanitize label and extra for JSON safety (escape backslash first, then double-quote, strip newlines)
|
|
701
|
+
local label="${2//\\/\\\\}"
|
|
702
|
+
label="${label//\"/\\\"}"
|
|
703
|
+
label="$(printf '%s' "$label" | tr -d '\n\r')"
|
|
704
|
+
local extra="${3:-}"
|
|
705
|
+
extra="${extra//\\/\\\\}"
|
|
706
|
+
extra="${extra//\"/\\\"}"
|
|
707
|
+
extra="$(printf '%s' "$extra" | tr -d '\n\r')"
|
|
708
|
+
[ -z "$PID_REGISTRY_DIR" ] && init_pid_registry
|
|
709
|
+
local entry_file="$PID_REGISTRY_DIR/${pid}.json"
|
|
710
|
+
cat > "$entry_file" << EOF
|
|
711
|
+
{"pid":$pid,"label":"$label","started":"$(date -u +%Y-%m-%dT%H:%M:%SZ)","ppid":$$,"extra":"$extra"}
|
|
712
|
+
EOF
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
# Unregister a process from the registry (called on clean shutdown)
|
|
716
|
+
# Usage: unregister_pid <pid>
|
|
717
|
+
unregister_pid() {
|
|
718
|
+
local pid="$1"
|
|
719
|
+
[ -z "$PID_REGISTRY_DIR" ] && init_pid_registry
|
|
720
|
+
rm -f "$PID_REGISTRY_DIR/${pid}.json" 2>/dev/null
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
# Kill a registered process with SIGTERM -> wait -> SIGKILL escalation
|
|
724
|
+
# Usage: kill_registered_pid <pid>
|
|
725
|
+
kill_registered_pid() {
|
|
726
|
+
local pid="$1"
|
|
727
|
+
if kill -0 "$pid" 2>/dev/null; then
|
|
728
|
+
kill "$pid" 2>/dev/null || true
|
|
729
|
+
# Wait up to 2 seconds for graceful exit
|
|
730
|
+
local waited=0
|
|
731
|
+
while [ $waited -lt 4 ] && kill -0 "$pid" 2>/dev/null; do
|
|
732
|
+
sleep 0.5
|
|
733
|
+
waited=$((waited + 1))
|
|
734
|
+
done
|
|
735
|
+
# Escalate to SIGKILL if still alive
|
|
736
|
+
if kill -0 "$pid" 2>/dev/null; then
|
|
737
|
+
kill -9 "$pid" 2>/dev/null || true
|
|
738
|
+
fi
|
|
739
|
+
fi
|
|
740
|
+
unregister_pid "$pid"
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
# Scan registry for orphaned processes and kill them
|
|
744
|
+
# Called on startup and by `loki cleanup`
|
|
745
|
+
# Returns: number of orphans killed
|
|
746
|
+
cleanup_orphan_pids() {
|
|
747
|
+
[ -z "$PID_REGISTRY_DIR" ] && init_pid_registry
|
|
748
|
+
local orphan_count=0
|
|
749
|
+
|
|
750
|
+
if [ ! -d "$PID_REGISTRY_DIR" ]; then
|
|
751
|
+
echo "0"
|
|
752
|
+
return 0
|
|
753
|
+
fi
|
|
754
|
+
|
|
755
|
+
for entry_file in "$PID_REGISTRY_DIR"/*.json; do
|
|
756
|
+
[ -f "$entry_file" ] || continue
|
|
757
|
+
local pid
|
|
758
|
+
pid=$(basename "$entry_file" .json)
|
|
759
|
+
|
|
760
|
+
# Skip non-numeric filenames
|
|
761
|
+
case "$pid" in
|
|
762
|
+
''|*[!0-9]*) continue ;;
|
|
763
|
+
esac
|
|
764
|
+
|
|
765
|
+
if kill -0 "$pid" 2>/dev/null; then
|
|
766
|
+
# Process is alive -- check if its parent session is dead
|
|
767
|
+
local ppid_val=""
|
|
768
|
+
ppid_val=$(_parse_json_field "$entry_file" "ppid") || true
|
|
769
|
+
|
|
770
|
+
# Validate ppid_val is numeric before using with kill
|
|
771
|
+
case "$ppid_val" in ''|*[!0-9]*) ppid_val="" ;; esac
|
|
772
|
+
if [ -n "$ppid_val" ] && [ "$ppid_val" != "$$" ]; then
|
|
773
|
+
if ! kill -0 "$ppid_val" 2>/dev/null; then
|
|
774
|
+
# Parent is dead -- this is an orphan
|
|
775
|
+
local label=""
|
|
776
|
+
label=$(_parse_json_field "$entry_file" "label") || label="unknown"
|
|
777
|
+
log_warn "Killing orphaned process: PID=$pid label=$label (parent $ppid_val is dead)" >&2
|
|
778
|
+
kill_registered_pid "$pid"
|
|
779
|
+
orphan_count=$((orphan_count + 1))
|
|
780
|
+
fi
|
|
781
|
+
fi
|
|
782
|
+
else
|
|
783
|
+
# Process is dead -- clean up stale registry entry
|
|
784
|
+
rm -f "$entry_file" 2>/dev/null
|
|
785
|
+
fi
|
|
786
|
+
done
|
|
787
|
+
|
|
788
|
+
echo "$orphan_count"
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
# Kill ALL registered processes (used during full shutdown)
|
|
792
|
+
kill_all_registered() {
|
|
793
|
+
[ -z "$PID_REGISTRY_DIR" ] && init_pid_registry
|
|
794
|
+
|
|
795
|
+
if [ ! -d "$PID_REGISTRY_DIR" ]; then
|
|
796
|
+
return 0
|
|
797
|
+
fi
|
|
798
|
+
|
|
799
|
+
for entry_file in "$PID_REGISTRY_DIR"/*.json; do
|
|
800
|
+
[ -f "$entry_file" ] || continue
|
|
801
|
+
local pid
|
|
802
|
+
pid=$(basename "$entry_file" .json)
|
|
803
|
+
case "$pid" in
|
|
804
|
+
''|*[!0-9]*) continue ;;
|
|
805
|
+
esac
|
|
806
|
+
kill_registered_pid "$pid"
|
|
807
|
+
done
|
|
808
|
+
}
|
|
809
|
+
|
|
670
810
|
#===============================================================================
|
|
671
811
|
# Event Emission (Dashboard Integration)
|
|
672
812
|
# Writes events to .loki/events.jsonl for dashboard consumption
|
|
@@ -1688,6 +1828,7 @@ create_worktree() {
|
|
|
1688
1828
|
) &
|
|
1689
1829
|
# Capture install PID for cleanup on exit
|
|
1690
1830
|
WORKTREE_INSTALL_PIDS+=($!)
|
|
1831
|
+
register_pid "$!" "worktree-install" "stream=$stream_name"
|
|
1691
1832
|
|
|
1692
1833
|
log_info "Created worktree: $worktree_path"
|
|
1693
1834
|
return 0
|
|
@@ -1796,6 +1937,7 @@ spawn_worktree_session() {
|
|
|
1796
1937
|
|
|
1797
1938
|
local pid=$!
|
|
1798
1939
|
WORKTREE_PIDS[$stream_name]=$pid
|
|
1940
|
+
register_pid "$pid" "worktree-session" "stream=$stream_name"
|
|
1799
1941
|
|
|
1800
1942
|
log_info "Session spawned: $stream_name (PID: $pid)"
|
|
1801
1943
|
return 0
|
|
@@ -2002,6 +2144,7 @@ cleanup_parallel_streams() {
|
|
|
2002
2144
|
if kill -0 "$pid" 2>/dev/null; then
|
|
2003
2145
|
kill "$pid" 2>/dev/null || true
|
|
2004
2146
|
fi
|
|
2147
|
+
unregister_pid "$pid"
|
|
2005
2148
|
done
|
|
2006
2149
|
WORKTREE_INSTALL_PIDS=()
|
|
2007
2150
|
|
|
@@ -2012,6 +2155,7 @@ cleanup_parallel_streams() {
|
|
|
2012
2155
|
log_step "Stopping session: $stream"
|
|
2013
2156
|
kill "$pid" 2>/dev/null || true
|
|
2014
2157
|
fi
|
|
2158
|
+
unregister_pid "$pid"
|
|
2015
2159
|
done
|
|
2016
2160
|
|
|
2017
2161
|
# Wait for all to finish
|
|
@@ -2620,8 +2764,8 @@ write_dashboard_state() {
|
|
|
2620
2764
|
# Get complexity tier
|
|
2621
2765
|
local complexity="${DETECTED_COMPLEXITY:-standard}"
|
|
2622
2766
|
|
|
2623
|
-
# Get RARV cycle step (
|
|
2624
|
-
local rarv_step=$((ITERATION_COUNT % 4))
|
|
2767
|
+
# Get RARV cycle step from actual phase tracking (falls back to iteration-based)
|
|
2768
|
+
local rarv_step=${RARV_CURRENT_STEP:-$((ITERATION_COUNT % 4))}
|
|
2625
2769
|
local rarv_stages='["reason", "act", "reflect", "verify"]'
|
|
2626
2770
|
|
|
2627
2771
|
# Get memory system stats (if available)
|
|
@@ -2634,9 +2778,9 @@ write_dashboard_state() {
|
|
|
2634
2778
|
[ -d ".loki/memory/skills" ] && procedural_count=$(find ".loki/memory/skills" -type f -name "*.json" 2>/dev/null | wc -l | tr -d ' ')
|
|
2635
2779
|
|
|
2636
2780
|
# Get quality gates status (if available)
|
|
2637
|
-
local quality_gates='
|
|
2781
|
+
local quality_gates='null'
|
|
2638
2782
|
if [ -f ".loki/state/quality-gates.json" ]; then
|
|
2639
|
-
quality_gates=$(cat ".loki/state/quality-gates.json" 2>/dev/null || echo
|
|
2783
|
+
quality_gates=$(cat ".loki/state/quality-gates.json" 2>/dev/null || echo 'null')
|
|
2640
2784
|
fi
|
|
2641
2785
|
|
|
2642
2786
|
# Get Completion Council state (v5.25.0)
|
|
@@ -3037,6 +3181,7 @@ start_status_monitor() {
|
|
|
3037
3181
|
done
|
|
3038
3182
|
) &
|
|
3039
3183
|
STATUS_MONITOR_PID=$!
|
|
3184
|
+
register_pid "$STATUS_MONITOR_PID" "status-monitor"
|
|
3040
3185
|
|
|
3041
3186
|
log_info "Status monitor started"
|
|
3042
3187
|
log_info "Monitor progress: ${CYAN}watch -n 2 cat .loki/STATUS.txt${NC}"
|
|
@@ -3046,6 +3191,7 @@ stop_status_monitor() {
|
|
|
3046
3191
|
if [ -n "$STATUS_MONITOR_PID" ]; then
|
|
3047
3192
|
kill "$STATUS_MONITOR_PID" 2>/dev/null || true
|
|
3048
3193
|
wait "$STATUS_MONITOR_PID" 2>/dev/null || true
|
|
3194
|
+
unregister_pid "$STATUS_MONITOR_PID"
|
|
3049
3195
|
fi
|
|
3050
3196
|
stop_resource_monitor
|
|
3051
3197
|
}
|
|
@@ -3621,6 +3767,7 @@ start_resource_monitor() {
|
|
|
3621
3767
|
done
|
|
3622
3768
|
) &
|
|
3623
3769
|
RESOURCE_MONITOR_PID=$!
|
|
3770
|
+
register_pid "$RESOURCE_MONITOR_PID" "resource-monitor"
|
|
3624
3771
|
|
|
3625
3772
|
log_info "Resource monitor started (CPU threshold: ${RESOURCE_CPU_THRESHOLD}%, Memory threshold: ${RESOURCE_MEM_THRESHOLD}%)"
|
|
3626
3773
|
log_info "Check status: ${CYAN}cat .loki/state/resources.json${NC}"
|
|
@@ -3630,6 +3777,7 @@ stop_resource_monitor() {
|
|
|
3630
3777
|
if [ -n "$RESOURCE_MONITOR_PID" ]; then
|
|
3631
3778
|
kill "$RESOURCE_MONITOR_PID" 2>/dev/null || true
|
|
3632
3779
|
wait "$RESOURCE_MONITOR_PID" 2>/dev/null || true
|
|
3780
|
+
unregister_pid "$RESOURCE_MONITOR_PID"
|
|
3633
3781
|
fi
|
|
3634
3782
|
}
|
|
3635
3783
|
|
|
@@ -4720,12 +4868,14 @@ BUILD_PROMPT
|
|
|
4720
4868
|
esac
|
|
4721
4869
|
) &
|
|
4722
4870
|
pids+=($!)
|
|
4871
|
+
register_pid "$!" "code-reviewer" "name=$reviewer_name"
|
|
4723
4872
|
done
|
|
4724
4873
|
|
|
4725
4874
|
# Wait for all reviewers to complete
|
|
4726
4875
|
log_info "Waiting for $reviewer_count reviewers to complete (blind review)..."
|
|
4727
4876
|
for pid in "${pids[@]}"; do
|
|
4728
4877
|
wait "$pid" || true
|
|
4878
|
+
unregister_pid "$pid"
|
|
4729
4879
|
done
|
|
4730
4880
|
|
|
4731
4881
|
log_info "All reviewers complete. Aggregating verdicts..."
|
|
@@ -5191,6 +5341,7 @@ start_dashboard() {
|
|
|
5191
5341
|
LOKI_TLS_CERT="${LOKI_TLS_CERT:-}" LOKI_TLS_KEY="${LOKI_TLS_KEY:-}" \
|
|
5192
5342
|
LOKI_SKILL_DIR="${skill_dir}" PYTHONPATH="${skill_dir}" nohup "$python_cmd" -m dashboard.server > "$log_file" 2>&1 &
|
|
5193
5343
|
DASHBOARD_PID=$!
|
|
5344
|
+
register_pid "$DASHBOARD_PID" "dashboard" "port=${DASHBOARD_PORT:-57374}"
|
|
5194
5345
|
|
|
5195
5346
|
# Save PID for later cleanup
|
|
5196
5347
|
mkdir -p .loki/dashboard
|
|
@@ -5224,6 +5375,7 @@ stop_dashboard() {
|
|
|
5224
5375
|
if [ -n "$DASHBOARD_PID" ]; then
|
|
5225
5376
|
kill "$DASHBOARD_PID" 2>/dev/null || true
|
|
5226
5377
|
wait "$DASHBOARD_PID" 2>/dev/null || true
|
|
5378
|
+
unregister_pid "$DASHBOARD_PID"
|
|
5227
5379
|
fi
|
|
5228
5380
|
|
|
5229
5381
|
# Also try PID file
|
|
@@ -5231,6 +5383,7 @@ stop_dashboard() {
|
|
|
5231
5383
|
local saved_pid=$(cat ".loki/dashboard/dashboard.pid" 2>/dev/null)
|
|
5232
5384
|
if [ -n "$saved_pid" ]; then
|
|
5233
5385
|
kill "$saved_pid" 2>/dev/null || true
|
|
5386
|
+
unregister_pid "$saved_pid"
|
|
5234
5387
|
fi
|
|
5235
5388
|
rm -f ".loki/dashboard/dashboard.pid"
|
|
5236
5389
|
fi
|
|
@@ -7121,6 +7274,7 @@ cleanup() {
|
|
|
7121
7274
|
fi
|
|
7122
7275
|
stop_dashboard
|
|
7123
7276
|
stop_status_monitor
|
|
7277
|
+
kill_all_registered
|
|
7124
7278
|
rm -f "$loki_dir/loki.pid" 2>/dev/null
|
|
7125
7279
|
if [ -f "$loki_dir/session.json" ]; then
|
|
7126
7280
|
_LOKI_SESSION_FILE="$loki_dir/session.json" python3 -c "
|
|
@@ -7148,6 +7302,7 @@ except (json.JSONDecodeError, OSError): pass
|
|
|
7148
7302
|
fi
|
|
7149
7303
|
stop_dashboard
|
|
7150
7304
|
stop_status_monitor
|
|
7305
|
+
kill_all_registered
|
|
7151
7306
|
rm -f .loki/loki.pid .loki/PAUSE 2>/dev/null
|
|
7152
7307
|
# Mark session.json as stopped
|
|
7153
7308
|
if [ -f ".loki/session.json" ]; then
|
|
@@ -7335,6 +7490,7 @@ main() {
|
|
|
7335
7490
|
LOKI_RUNNING_FROM_TEMP='' nohup "$original_script" "${cmd_args[@]}" > "$log_file" 2>&1 &
|
|
7336
7491
|
local bg_pid=$!
|
|
7337
7492
|
echo "$bg_pid" > "$pid_file"
|
|
7493
|
+
register_pid "$bg_pid" "background-session" "log=$log_file"
|
|
7338
7494
|
|
|
7339
7495
|
echo ""
|
|
7340
7496
|
echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
|
@@ -7457,6 +7613,14 @@ main() {
|
|
|
7457
7613
|
# Write PID file for ALL modes (foreground + background)
|
|
7458
7614
|
echo "$$" > "$pid_file"
|
|
7459
7615
|
|
|
7616
|
+
# Initialize PID registry and clean up orphans from previous sessions
|
|
7617
|
+
init_pid_registry
|
|
7618
|
+
local orphan_count
|
|
7619
|
+
orphan_count=$(cleanup_orphan_pids)
|
|
7620
|
+
if [ "$orphan_count" -gt 0 ]; then
|
|
7621
|
+
log_warn "Killed $orphan_count orphaned process(es) from previous session"
|
|
7622
|
+
fi
|
|
7623
|
+
|
|
7460
7624
|
# Copy skill files to .loki/skills/ - makes CLI self-contained
|
|
7461
7625
|
# No need to install Claude Code skill separately
|
|
7462
7626
|
copy_skill_files
|
|
@@ -7536,10 +7700,12 @@ main() {
|
|
|
7536
7700
|
run_autonomous "$PRD_PATH"
|
|
7537
7701
|
) &
|
|
7538
7702
|
local main_pid=$!
|
|
7703
|
+
register_pid "$main_pid" "parallel-main" ""
|
|
7539
7704
|
|
|
7540
7705
|
# Run parallel orchestrator
|
|
7541
7706
|
run_parallel_orchestrator &
|
|
7542
7707
|
local orchestrator_pid=$!
|
|
7708
|
+
register_pid "$orchestrator_pid" "parallel-orchestrator" ""
|
|
7543
7709
|
|
|
7544
7710
|
# Wait for main session (orchestrator continues watching)
|
|
7545
7711
|
wait $main_pid || result=$?
|
package/dashboard/__init__.py
CHANGED
package/dashboard/server.py
CHANGED
|
@@ -404,7 +404,20 @@ async def get_status() -> StatusResponse:
|
|
|
404
404
|
iteration = state.get("iteration", 0)
|
|
405
405
|
complexity = state.get("complexity", "standard")
|
|
406
406
|
mode = state.get("mode", "")
|
|
407
|
-
|
|
407
|
+
# Count only agents with alive PIDs (not raw array length)
|
|
408
|
+
agents_list = state.get("agents", [])
|
|
409
|
+
running_agents = 0
|
|
410
|
+
for agent in agents_list:
|
|
411
|
+
agent_pid = agent.get("pid")
|
|
412
|
+
if agent_pid:
|
|
413
|
+
try:
|
|
414
|
+
os.kill(int(agent_pid), 0)
|
|
415
|
+
running_agents += 1
|
|
416
|
+
except (OSError, ValueError, TypeError):
|
|
417
|
+
pass
|
|
418
|
+
else:
|
|
419
|
+
# No PID field -- count as running (legacy data)
|
|
420
|
+
running_agents += 1
|
|
408
421
|
|
|
409
422
|
tasks = state.get("tasks", {})
|
|
410
423
|
pending_tasks = len(tasks.get("pending", []))
|
|
@@ -3138,23 +3151,112 @@ async def get_github_sync_log(
|
|
|
3138
3151
|
# =============================================================================
|
|
3139
3152
|
|
|
3140
3153
|
|
|
3154
|
+
def _resolve_process_state(pid: Optional[int], last_status: str = "",
|
|
3155
|
+
started: str = "", heartbeat: str = "",
|
|
3156
|
+
stale_threshold: int = 30) -> dict[str, Any]:
|
|
3157
|
+
"""Resolve process state with honest labels.
|
|
3158
|
+
|
|
3159
|
+
States:
|
|
3160
|
+
RUNNING - PID alive AND heartbeat < stale_threshold seconds
|
|
3161
|
+
STALE - PID alive BUT no heartbeat update in > stale_threshold seconds
|
|
3162
|
+
COMPLETED - last_status marked done/completed and PID exited
|
|
3163
|
+
FAILED - last_status marked failed OR PID exited non-zero
|
|
3164
|
+
CRASHED - PID dead BUT last_status was 'running'
|
|
3165
|
+
UNKNOWN - No PID, no status, or conflicting data
|
|
3166
|
+
|
|
3167
|
+
Returns dict with: state, pid_alive, started, last_heartbeat, duration_seconds
|
|
3168
|
+
"""
|
|
3169
|
+
now = datetime.now(timezone.utc)
|
|
3170
|
+
pid_alive = False
|
|
3171
|
+
if pid is not None:
|
|
3172
|
+
try:
|
|
3173
|
+
os.kill(pid, 0)
|
|
3174
|
+
pid_alive = True
|
|
3175
|
+
except (OSError, ValueError, TypeError):
|
|
3176
|
+
pass
|
|
3177
|
+
|
|
3178
|
+
# Parse timestamps
|
|
3179
|
+
started_dt = None
|
|
3180
|
+
heartbeat_dt = None
|
|
3181
|
+
if started:
|
|
3182
|
+
try:
|
|
3183
|
+
started_dt = datetime.fromisoformat(started.replace("Z", "+00:00"))
|
|
3184
|
+
if started_dt.tzinfo is None:
|
|
3185
|
+
started_dt = started_dt.replace(tzinfo=timezone.utc)
|
|
3186
|
+
except (ValueError, AttributeError):
|
|
3187
|
+
pass
|
|
3188
|
+
if heartbeat:
|
|
3189
|
+
try:
|
|
3190
|
+
heartbeat_dt = datetime.fromisoformat(heartbeat.replace("Z", "+00:00"))
|
|
3191
|
+
if heartbeat_dt.tzinfo is None:
|
|
3192
|
+
heartbeat_dt = heartbeat_dt.replace(tzinfo=timezone.utc)
|
|
3193
|
+
except (ValueError, AttributeError):
|
|
3194
|
+
pass
|
|
3195
|
+
|
|
3196
|
+
# Calculate duration
|
|
3197
|
+
duration_seconds = None
|
|
3198
|
+
if started_dt:
|
|
3199
|
+
duration_seconds = round((now - started_dt).total_seconds())
|
|
3200
|
+
|
|
3201
|
+
# Calculate heartbeat age
|
|
3202
|
+
heartbeat_age = None
|
|
3203
|
+
if heartbeat_dt:
|
|
3204
|
+
heartbeat_age = round((now - heartbeat_dt).total_seconds())
|
|
3205
|
+
|
|
3206
|
+
# Resolve state
|
|
3207
|
+
normalized = last_status.lower().strip() if last_status else ""
|
|
3208
|
+
if pid_alive:
|
|
3209
|
+
if heartbeat_age is not None and heartbeat_age > stale_threshold:
|
|
3210
|
+
state = "STALE"
|
|
3211
|
+
else:
|
|
3212
|
+
state = "RUNNING"
|
|
3213
|
+
else:
|
|
3214
|
+
if normalized in ("done", "completed", "complete", "success"):
|
|
3215
|
+
state = "COMPLETED"
|
|
3216
|
+
elif normalized in ("failed", "error", "errored"):
|
|
3217
|
+
state = "FAILED"
|
|
3218
|
+
elif normalized in ("running", "active", "in_progress", "starting"):
|
|
3219
|
+
state = "CRASHED"
|
|
3220
|
+
elif pid is None:
|
|
3221
|
+
state = "UNKNOWN"
|
|
3222
|
+
else:
|
|
3223
|
+
# PID dead, unknown last status
|
|
3224
|
+
state = "CRASHED" if normalized == "" else "UNKNOWN"
|
|
3225
|
+
|
|
3226
|
+
result: dict[str, Any] = {
|
|
3227
|
+
"state": state,
|
|
3228
|
+
"pid_alive": pid_alive,
|
|
3229
|
+
}
|
|
3230
|
+
if started:
|
|
3231
|
+
result["started"] = started
|
|
3232
|
+
if heartbeat:
|
|
3233
|
+
result["last_heartbeat"] = heartbeat
|
|
3234
|
+
if heartbeat_age is not None:
|
|
3235
|
+
result["heartbeat_age_seconds"] = heartbeat_age
|
|
3236
|
+
if duration_seconds is not None:
|
|
3237
|
+
result["duration_seconds"] = duration_seconds
|
|
3238
|
+
return result
|
|
3239
|
+
|
|
3240
|
+
|
|
3141
3241
|
@app.get("/api/health/processes")
|
|
3142
3242
|
async def get_process_health(token: Optional[dict] = Depends(auth.get_current_token)):
|
|
3143
|
-
"""Get health status of all loki processes (dashboard, session, agents).
|
|
3243
|
+
"""Get health status of all loki processes (dashboard, session, agents).
|
|
3244
|
+
|
|
3245
|
+
Returns honest state labels: RUNNING, STALE, COMPLETED, FAILED, CRASHED, UNKNOWN.
|
|
3246
|
+
Every entry includes timestamps (started, last_heartbeat, duration_seconds).
|
|
3247
|
+
"""
|
|
3144
3248
|
result: dict[str, Any] = {"dashboard": None, "session": None, "agents": []}
|
|
3145
3249
|
|
|
3146
3250
|
loki_dir = _get_loki_dir()
|
|
3251
|
+
now_iso = datetime.now(timezone.utc).isoformat()
|
|
3147
3252
|
|
|
3148
3253
|
# Dashboard PID
|
|
3149
3254
|
dpid_file = loki_dir / "dashboard" / "dashboard.pid"
|
|
3150
3255
|
if dpid_file.exists():
|
|
3151
3256
|
try:
|
|
3152
3257
|
dpid = int(dpid_file.read_text().strip())
|
|
3153
|
-
|
|
3154
|
-
|
|
3155
|
-
result["dashboard"] = {"pid": dpid, "status": "alive"}
|
|
3156
|
-
except OSError:
|
|
3157
|
-
result["dashboard"] = {"pid": dpid, "status": "dead"}
|
|
3258
|
+
state_info = _resolve_process_state(dpid, last_status="running")
|
|
3259
|
+
result["dashboard"] = {"pid": dpid, **state_info}
|
|
3158
3260
|
except (ValueError, OSError):
|
|
3159
3261
|
pass
|
|
3160
3262
|
|
|
@@ -3163,14 +3265,23 @@ async def get_process_health(token: Optional[dict] = Depends(auth.get_current_to
|
|
|
3163
3265
|
if spid_file.exists():
|
|
3164
3266
|
try:
|
|
3165
3267
|
spid = int(spid_file.read_text().strip())
|
|
3166
|
-
|
|
3167
|
-
|
|
3168
|
-
result["session"] = {"pid": spid, "status": "alive"}
|
|
3169
|
-
except OSError:
|
|
3170
|
-
result["session"] = {"pid": spid, "status": "dead"}
|
|
3268
|
+
state_info = _resolve_process_state(spid, last_status="running")
|
|
3269
|
+
result["session"] = {"pid": spid, **state_info}
|
|
3171
3270
|
except (ValueError, OSError):
|
|
3172
3271
|
pass
|
|
3173
3272
|
|
|
3273
|
+
# Read dashboard-state.json for heartbeat timestamp
|
|
3274
|
+
state_file = loki_dir / "dashboard-state.json"
|
|
3275
|
+
state_heartbeat = ""
|
|
3276
|
+
if state_file.exists():
|
|
3277
|
+
try:
|
|
3278
|
+
st = os.stat(state_file)
|
|
3279
|
+
state_heartbeat = datetime.fromtimestamp(
|
|
3280
|
+
st.st_mtime, tz=timezone.utc
|
|
3281
|
+
).isoformat()
|
|
3282
|
+
except OSError:
|
|
3283
|
+
pass
|
|
3284
|
+
|
|
3174
3285
|
# Agent PIDs
|
|
3175
3286
|
agents_file = loki_dir / "state" / "agents.json"
|
|
3176
3287
|
if agents_file.exists():
|
|
@@ -3178,24 +3289,65 @@ async def get_process_health(token: Optional[dict] = Depends(auth.get_current_to
|
|
|
3178
3289
|
agents = json.loads(agents_file.read_text())
|
|
3179
3290
|
for agent in agents:
|
|
3180
3291
|
pid = agent.get("pid")
|
|
3181
|
-
|
|
3182
|
-
|
|
3183
|
-
|
|
3184
|
-
|
|
3185
|
-
|
|
3186
|
-
|
|
3187
|
-
|
|
3292
|
+
pid_int = int(pid) if pid else None
|
|
3293
|
+
agent_status = agent.get("status", "")
|
|
3294
|
+
agent_started = agent.get("started", "")
|
|
3295
|
+
agent_heartbeat = agent.get("heartbeat", state_heartbeat)
|
|
3296
|
+
state_info = _resolve_process_state(
|
|
3297
|
+
pid_int,
|
|
3298
|
+
last_status=agent_status,
|
|
3299
|
+
started=agent_started,
|
|
3300
|
+
heartbeat=agent_heartbeat,
|
|
3301
|
+
)
|
|
3188
3302
|
result["agents"].append({
|
|
3189
3303
|
"id": agent.get("id", ""),
|
|
3190
3304
|
"name": agent.get("name", ""),
|
|
3191
3305
|
"pid": pid,
|
|
3192
|
-
|
|
3306
|
+
**state_info,
|
|
3193
3307
|
})
|
|
3194
3308
|
except Exception:
|
|
3195
3309
|
pass
|
|
3196
3310
|
|
|
3311
|
+
# PID registry (central process supervisor)
|
|
3312
|
+
pids_dir = loki_dir / "pids"
|
|
3313
|
+
registered: list[dict[str, Any]] = []
|
|
3314
|
+
if pids_dir.exists():
|
|
3315
|
+
for entry_file in sorted(pids_dir.glob("*.json")):
|
|
3316
|
+
try:
|
|
3317
|
+
pid_str = entry_file.stem
|
|
3318
|
+
pid = int(pid_str)
|
|
3319
|
+
entry = json.loads(entry_file.read_text())
|
|
3320
|
+
entry_started = entry.get("started", "")
|
|
3321
|
+
entry_heartbeat = entry.get("heartbeat", "")
|
|
3322
|
+
# Use file mtime as heartbeat fallback
|
|
3323
|
+
if not entry_heartbeat:
|
|
3324
|
+
try:
|
|
3325
|
+
st = os.stat(entry_file)
|
|
3326
|
+
entry_heartbeat = datetime.fromtimestamp(
|
|
3327
|
+
st.st_mtime, tz=timezone.utc
|
|
3328
|
+
).isoformat()
|
|
3329
|
+
except OSError:
|
|
3330
|
+
pass
|
|
3331
|
+
entry_status = entry.get("status", "running")
|
|
3332
|
+
state_info = _resolve_process_state(
|
|
3333
|
+
pid,
|
|
3334
|
+
last_status=entry_status,
|
|
3335
|
+
started=entry_started,
|
|
3336
|
+
heartbeat=entry_heartbeat,
|
|
3337
|
+
)
|
|
3338
|
+
registered.append({
|
|
3339
|
+
"pid": pid,
|
|
3340
|
+
"label": entry.get("label", "unknown"),
|
|
3341
|
+
"ppid": entry.get("ppid"),
|
|
3342
|
+
**state_info,
|
|
3343
|
+
})
|
|
3344
|
+
except (ValueError, json.JSONDecodeError, OSError):
|
|
3345
|
+
continue
|
|
3346
|
+
result["registered_processes"] = registered
|
|
3347
|
+
|
|
3197
3348
|
watchdog_enabled = os.environ.get("LOKI_WATCHDOG", "false").lower() == "true"
|
|
3198
3349
|
result["watchdog_enabled"] = watchdog_enabled
|
|
3350
|
+
result["checked_at"] = now_iso
|
|
3199
3351
|
|
|
3200
3352
|
return result
|
|
3201
3353
|
|
|
@@ -4774,7 +4774,7 @@ var LokiDashboard=(()=>{var X=Object.defineProperty;var gt=Object.getOwnProperty
|
|
|
4774
4774
|
<p>Checklist not initialized</p>
|
|
4775
4775
|
<p class="hint">The PRD checklist will be created during the first iteration when a PRD is provided.</p>
|
|
4776
4776
|
</div>
|
|
4777
|
-
`}_attachEventListeners(){let t=this.shadowRoot;t&&(t.querySelectorAll(".category-header[data-category]").forEach(e=>{e.addEventListener("click",()=>this._toggleCategory(e.dataset.category))}),t.querySelectorAll("button[data-waive-id]").forEach(e=>{e.addEventListener("click",a=>{a.stopPropagation(),this._waiveItem(e.dataset.waiveId)})}),t.querySelectorAll("button[data-unwaive-id]").forEach(e=>{e.addEventListener("click",a=>{a.stopPropagation(),this._unwaiveItem(e.dataset.unwaiveId)})}))}_escapeHtml(t){return t?String(t).replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/"/g,"""):""}};customElements.define("loki-checklist-viewer",G);var ht={not_initialized:{color:"var(--loki-text-muted, #71717a)",label:"Not Started",pulse:!1},starting:{color:"var(--loki-yellow, #ca8a04)",label:"Starting...",pulse:!0},running:{color:"var(--loki-green, #16a34a)",label:"Running",pulse:!0},crashed:{color:"var(--loki-red, #dc2626)",label:"Crashed",pulse:!1},stopped:{color:"var(--loki-text-muted, #a1a1aa)",label:"Stopped",pulse:!1}},J=class extends c{static get observedAttributes(){return["api-url","theme"]}constructor(){super(),this._loading=!1,this._error=null,this._api=null,this._pollInterval=null,this._status=null,this._logs=[],this._lastDataHash=null,this._lastLogsHash=null}connectedCallback(){super.connectedCallback(),this._setupApi(),this._loadData(),this._startPolling()}disconnectedCallback(){super.disconnectedCallback(),this._stopPolling()}attributeChangedCallback(t,e,a){e!==a&&(t==="api-url"&&this._api&&(this._api.baseUrl=a,this._loadData()),t==="theme"&&this._applyTheme())}_setupApi(){let t=this.getAttribute("api-url")||window.location.origin;this._api=u({baseUrl:t})}_startPolling(){this._pollInterval=setInterval(()=>this._loadData(),3e3),this._visibilityHandler=()=>{document.hidden?this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null):this._pollInterval||(this._loadData(),this._pollInterval=setInterval(()=>this._loadData(),3e3))},document.addEventListener("visibilitychange",this._visibilityHandler)}_stopPolling(){this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null),this._visibilityHandler&&(document.removeEventListener("visibilitychange",this._visibilityHandler),this._visibilityHandler=null)}async _loadData(){try{let[t,e]=await Promise.all([this._api.getAppRunnerStatus(),this._api.getAppRunnerLogs()]),a=JSON.stringify({status:t?.status,port:t?.port,restarts:t?.restart_count,url:t?.url}),i=JSON.stringify(e?.lines?.slice(-5)||[]),s=i!==this._lastLogsHash;if(a===this._lastDataHash&&!s)return;this._lastDataHash=a,this._lastLogsHash=i,this._status=t,this._logs=e?.lines||[],this._error=null,this.render(),this._scrollLogsToBottom()}catch(t){this._error||(this._error=`Failed to load app status: ${t.message}`,this.render())}}_scrollLogsToBottom(){let t=this.shadowRoot;if(!t)return;let e=t.querySelector(".log-area");e&&(e.scrollTop=e.scrollHeight)}async _handleRestart(){try{await this._api.restartApp(),this._loadData()}catch(t){this._error=`Restart failed: ${t.message}`,this.render()}}async _handleStop(){try{await this._api.stopApp(),this._loadData()}catch(t){this._error=`Stop failed: ${t.message}`,this.render()}}_formatUptime(t){if(!t)return"--";let e=new Date(t),i=Math.floor((new Date-e)/1e3);if(i<60)return`${i}s`;if(i<3600)return`${Math.floor(i/60)}m ${i%60}s`;let s=Math.floor(i/3600),r=Math.floor(i%3600/60);return`${s}h ${r}m`}_isValidUrl(t){if(!t)return!1;try{let e=new URL(t);return e.protocol==="http:"||e.protocol==="https:"}catch{return!1}}_getStyles(){return`
|
|
4777
|
+
`}_attachEventListeners(){let t=this.shadowRoot;t&&(t.querySelectorAll(".category-header[data-category]").forEach(e=>{e.addEventListener("click",()=>this._toggleCategory(e.dataset.category))}),t.querySelectorAll("button[data-waive-id]").forEach(e=>{e.addEventListener("click",a=>{a.stopPropagation(),this._waiveItem(e.dataset.waiveId)})}),t.querySelectorAll("button[data-unwaive-id]").forEach(e=>{e.addEventListener("click",a=>{a.stopPropagation(),this._unwaiveItem(e.dataset.unwaiveId)})}))}_escapeHtml(t){return t?String(t).replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/"/g,"""):""}};customElements.define("loki-checklist-viewer",G);var ht={not_initialized:{color:"var(--loki-text-muted, #71717a)",label:"Not Started",pulse:!1},starting:{color:"var(--loki-yellow, #ca8a04)",label:"Starting...",pulse:!0},running:{color:"var(--loki-green, #16a34a)",label:"Running",pulse:!0},stale:{color:"var(--loki-yellow, #ca8a04)",label:"Stale",pulse:!1},completed:{color:"var(--loki-text-muted, #a1a1aa)",label:"Completed",pulse:!1},failed:{color:"var(--loki-red, #dc2626)",label:"Failed",pulse:!1},crashed:{color:"var(--loki-red, #dc2626)",label:"Crashed",pulse:!1},stopped:{color:"var(--loki-text-muted, #a1a1aa)",label:"Stopped",pulse:!1},unknown:{color:"var(--loki-text-muted, #71717a)",label:"Unknown",pulse:!1}},J=class extends c{static get observedAttributes(){return["api-url","theme"]}constructor(){super(),this._loading=!1,this._error=null,this._api=null,this._pollInterval=null,this._status=null,this._logs=[],this._lastDataHash=null,this._lastLogsHash=null}connectedCallback(){super.connectedCallback(),this._setupApi(),this._loadData(),this._startPolling()}disconnectedCallback(){super.disconnectedCallback(),this._stopPolling()}attributeChangedCallback(t,e,a){e!==a&&(t==="api-url"&&this._api&&(this._api.baseUrl=a,this._loadData()),t==="theme"&&this._applyTheme())}_setupApi(){let t=this.getAttribute("api-url")||window.location.origin;this._api=u({baseUrl:t})}_startPolling(){this._pollInterval=setInterval(()=>this._loadData(),3e3),this._visibilityHandler=()=>{document.hidden?this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null):this._pollInterval||(this._loadData(),this._pollInterval=setInterval(()=>this._loadData(),3e3))},document.addEventListener("visibilitychange",this._visibilityHandler)}_stopPolling(){this._pollInterval&&(clearInterval(this._pollInterval),this._pollInterval=null),this._visibilityHandler&&(document.removeEventListener("visibilitychange",this._visibilityHandler),this._visibilityHandler=null)}async _loadData(){try{let[t,e]=await Promise.all([this._api.getAppRunnerStatus(),this._api.getAppRunnerLogs()]),a=JSON.stringify({status:t?.status,port:t?.port,restarts:t?.restart_count,url:t?.url}),i=JSON.stringify(e?.lines?.slice(-5)||[]),s=i!==this._lastLogsHash;if(a===this._lastDataHash&&!s)return;this._lastDataHash=a,this._lastLogsHash=i,this._status=t,this._logs=e?.lines||[],this._error=null,this.render(),this._scrollLogsToBottom()}catch(t){this._error||(this._error=`Failed to load app status: ${t.message}`,this.render())}}_scrollLogsToBottom(){let t=this.shadowRoot;if(!t)return;let e=t.querySelector(".log-area");e&&(e.scrollTop=e.scrollHeight)}async _handleRestart(){try{await this._api.restartApp(),this._loadData()}catch(t){this._error=`Restart failed: ${t.message}`,this.render()}}async _handleStop(){try{await this._api.stopApp(),this._loadData()}catch(t){this._error=`Stop failed: ${t.message}`,this.render()}}_formatUptime(t){if(!t)return"--";let e=new Date(t),i=Math.floor((new Date-e)/1e3);if(i<60)return`${i}s`;if(i<3600)return`${Math.floor(i/60)}m ${i%60}s`;let s=Math.floor(i/3600),r=Math.floor(i%3600/60);return`${s}h ${r}m`}_isValidUrl(t){if(!t)return!1;try{let e=new URL(t);return e.protocol==="http:"||e.protocol==="https:"}catch{return!1}}_getStyles(){return`
|
|
4778
4778
|
.app-status {
|
|
4779
4779
|
padding: 16px;
|
|
4780
4780
|
font-family: var(--loki-font-family, system-ui, -apple-system, sans-serif);
|