loki-mode 6.60.0 → 6.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/SKILL.md +2 -2
  2. package/VERSION +1 -1
  3. package/autonomy/app-runner.sh +34 -8
  4. package/autonomy/completion-council.sh +70 -32
  5. package/autonomy/issue-parser.sh +4 -7
  6. package/autonomy/loki +238 -119
  7. package/autonomy/notification-checker.py +49 -23
  8. package/autonomy/run.sh +162 -79
  9. package/autonomy/sandbox.sh +91 -24
  10. package/bin/loki-mode.js +1 -2
  11. package/bin/postinstall.js +10 -4
  12. package/dashboard/__init__.py +1 -1
  13. package/dashboard/control.py +46 -36
  14. package/dashboard/database.py +21 -4
  15. package/dashboard/server.py +107 -78
  16. package/docs/BUG-AUDIT-v6.61.0.md +957 -0
  17. package/docs/INSTALLATION.md +2 -2
  18. package/events/bus.py +129 -28
  19. package/events/bus.ts +41 -27
  20. package/events/emit.sh +1 -1
  21. package/integrations/openclaw/README.md +139 -0
  22. package/integrations/openclaw/SKILL.md +88 -0
  23. package/integrations/openclaw/bridge/__init__.py +1 -0
  24. package/integrations/openclaw/bridge/__main__.py +88 -0
  25. package/integrations/openclaw/bridge/schema_map.py +180 -0
  26. package/integrations/openclaw/bridge/watcher.py +100 -0
  27. package/integrations/openclaw/scripts/format-progress.sh +80 -0
  28. package/integrations/openclaw/scripts/poll-status.sh +74 -0
  29. package/integrations/vibe-kanban.md +289 -0
  30. package/mcp/__init__.py +1 -1
  31. package/mcp/server.py +96 -73
  32. package/memory/consolidation.py +21 -6
  33. package/memory/engine.py +53 -26
  34. package/memory/layers/index_layer.py +16 -3
  35. package/memory/layers/timeline_layer.py +16 -3
  36. package/memory/retrieval.py +4 -1
  37. package/memory/schemas.py +4 -2
  38. package/memory/storage.py +25 -4
  39. package/memory/token_economics.py +9 -2
  40. package/memory/vector_index.py +2 -2
  41. package/package.json +3 -1
  42. package/providers/cline.sh +5 -4
  43. package/providers/codex.sh +27 -5
  44. package/providers/gemini.sh +59 -23
  45. package/providers/loader.sh +3 -2
  46. package/skills/parallel-workflows.md +9 -7
  47. package/state/__init__.py +10 -0
  48. package/state/index.ts +18 -0
  49. package/state/manager.py +1801 -0
  50. package/state/manager.ts +1774 -0
  51. package/state/sqlite_backend.py +188 -0
  52. package/state/test_manager.py +703 -0
  53. package/state/test_manager.ts +366 -0
  54. package/templates/README.md +19 -4
  55. package/templates/dashboard.md +45 -0
  56. package/templates/data-pipeline.md +45 -0
  57. package/templates/game.md +48 -0
  58. package/templates/microservice.md +49 -0
  59. package/templates/npm-library.md +42 -0
  60. package/templates/rest-api.md +170 -33
  61. package/templates/slack-bot.md +48 -0
  62. package/templates/web-scraper.md +45 -0
  63. package/web-app/server.py +360 -191
  64. package/templates/saas-app.md +0 -42
package/autonomy/run.sh CHANGED
@@ -182,6 +182,8 @@ if [[ -z "${LOKI_RUNNING_FROM_TEMP:-}" ]] && [[ "${BASH_SOURCE[0]}" == "${0}" ]]
182
182
  TEMP_SCRIPT="${TEMP_SCRIPT}.sh"
183
183
  cp "${BASH_SOURCE[0]}" "$TEMP_SCRIPT"
184
184
  chmod 700 "$TEMP_SCRIPT"
185
+ # BUG-XC-011: Set trap BEFORE exec so the temp file gets cleaned up
186
+ trap "rm -f '$TEMP_SCRIPT'" EXIT
185
187
  export LOKI_RUNNING_FROM_TEMP=1
186
188
  export LOKI_ORIGINAL_SCRIPT_DIR="$SCRIPT_DIR"
187
189
  export LOKI_ORIGINAL_PROJECT_DIR="$PROJECT_DIR"
@@ -1684,15 +1686,20 @@ import_github_issues() {
1684
1686
  created_at: $created
1685
1687
  }')
1686
1688
 
1687
- # Append to pending.json (bare array format) with temp file cleanup on error
1689
+ # BUG-XC-010: Create temp file in same directory as target (avoids cross-filesystem mv)
1690
+ # and use flock for queue locking
1688
1691
  local temp_file
1689
- temp_file=$(mktemp)
1690
- if jq ". += [$task_json]" "$pending_file" > "$temp_file" && mv "$temp_file" "$pending_file"; then
1691
- log_info "Imported issue #$number: $title"
1692
- task_count=$((task_count + 1))
1693
- else
1694
- log_warn "Failed to import issue #$number"
1695
- fi
1692
+ temp_file=$(mktemp ".loki/queue/pending.json.tmp.XXXXXX")
1693
+ local lockfile=".loki/queue/.pending.lock"
1694
+ (
1695
+ flock -w 5 200 2>/dev/null || true
1696
+ if jq ". += [$task_json]" "$pending_file" > "$temp_file" && mv "$temp_file" "$pending_file"; then
1697
+ log_info "Imported issue #$number: $title"
1698
+ task_count=$((task_count + 1))
1699
+ else
1700
+ log_warn "Failed to import issue #$number"
1701
+ fi
1702
+ ) 200>"$lockfile"
1696
1703
  rm -f "$temp_file"
1697
1704
  done < <(echo "$issues" | jq -c '.[]')
1698
1705
 
@@ -1930,10 +1937,15 @@ export_tasks_to_github() {
1930
1937
  desc=$(echo "$task" | jq -r '.description // ""')
1931
1938
 
1932
1939
  log_info "Creating issue: $title"
1940
+ # BUG-GH-009: Check if label exists before using --label; skip if absent
1941
+ local label_flag=""
1942
+ if gh label list --repo "$repo" 2>/dev/null | grep -q "loki-mode"; then
1943
+ label_flag="--label loki-mode"
1944
+ fi
1933
1945
  gh issue create --repo "$repo" \
1934
1946
  --title "$title" \
1935
1947
  --body "$desc" \
1936
- --label "loki-mode" \
1948
+ $label_flag \
1937
1949
  2>/dev/null || log_warn "Failed to create issue: $title"
1938
1950
  done
1939
1951
  }
@@ -2848,10 +2860,11 @@ init_loki_dir() {
2848
2860
  mkdir -p .loki/rules
2849
2861
  mkdir -p .loki/signals
2850
2862
 
2851
- # Initialize queue files if they don't exist
2863
+ # BUG-XC-008: Initialize queue files only if missing or invalid JSON
2852
2864
  for queue in pending in-progress completed failed dead-letter; do
2853
- if [ ! -f ".loki/queue/${queue}.json" ]; then
2854
- echo "[]" > ".loki/queue/${queue}.json"
2865
+ local qfile=".loki/queue/${queue}.json"
2866
+ if [ ! -f "$qfile" ] || ! python3 -c "import json; json.load(open('$qfile'))" 2>/dev/null; then
2867
+ echo "[]" > "$qfile"
2855
2868
  fi
2856
2869
  done
2857
2870
 
@@ -3450,23 +3463,28 @@ EOF
3450
3463
  )
3451
3464
 
3452
3465
  # Add to in-progress queue
3466
+ # BUG-XC-003: Use flock for atomic queue modification
3453
3467
  local in_progress_file=".loki/queue/in-progress.json"
3454
- if [ -f "$in_progress_file" ]; then
3455
- local existing=$(cat "$in_progress_file")
3456
- if [ "$existing" = "[]" ] || [ -z "$existing" ]; then
3457
- echo "[$task_json]" > "$in_progress_file"
3458
- else
3459
- # Append to existing array
3460
- echo "$existing" | python3 -c "
3468
+ local lockfile=".loki/queue/.in-progress.lock"
3469
+ (
3470
+ flock -w 5 200 2>/dev/null || true
3471
+ if [ -f "$in_progress_file" ]; then
3472
+ local existing=$(cat "$in_progress_file")
3473
+ if [ "$existing" = "[]" ] || [ -z "$existing" ]; then
3474
+ echo "[$task_json]" > "$in_progress_file"
3475
+ else
3476
+ # Append to existing array
3477
+ echo "$existing" | python3 -c "
3461
3478
  import sys, json
3462
3479
  data = json.load(sys.stdin)
3463
3480
  data.append($task_json)
3464
3481
  print(json.dumps(data, indent=2))
3465
3482
  " > "$in_progress_file" 2>/dev/null || echo "[$task_json]" > "$in_progress_file"
3483
+ fi
3484
+ else
3485
+ echo "[$task_json]" > "$in_progress_file"
3466
3486
  fi
3467
- else
3468
- echo "[$task_json]" > "$in_progress_file"
3469
- fi
3487
+ ) 200>"$lockfile"
3470
3488
 
3471
3489
  # Update current-task.json
3472
3490
  echo "$task_json" > .loki/queue/current-task.json
@@ -6146,7 +6164,8 @@ create_checkpoint() {
6146
6164
  mkdir -p "$cp_dir"
6147
6165
 
6148
6166
  # Copy critical state files (lightweight -- not full .loki/)
6149
- for f in state/orchestrator.json queue/pending.json queue/completed.json queue/in-progress.json queue/current-task.json; do
6167
+ # BUG-ST-009: Include autonomy-state.json in checkpoint backup
6168
+ for f in state/orchestrator.json autonomy-state.json queue/pending.json queue/completed.json queue/in-progress.json queue/current-task.json; do
6150
6169
  if [ -f ".loki/$f" ]; then
6151
6170
  local target_dir="$cp_dir/$(dirname "$f")"
6152
6171
  mkdir -p "$target_dir"
@@ -6191,9 +6210,11 @@ CPEOF
6191
6210
  cp_count=$(find "$checkpoint_dir" -maxdepth 1 -type d -name "cp-*" 2>/dev/null | wc -l | tr -d ' ')
6192
6211
  if [ "$cp_count" -gt 50 ]; then
6193
6212
  local to_remove=$((cp_count - 50))
6213
+ # BUG-ST-012: Sort by basename epoch suffix, not full path with extra dashes
6194
6214
  find "$checkpoint_dir" -maxdepth 1 -type d -name "cp-*" 2>/dev/null \
6195
- | sort -t'-' -k3 -n \
6215
+ | while read -r p; do basename "$p"; done | sort -t'-' -k3 -n \
6196
6216
  | head -n "$to_remove" | while read -r old_cp; do
6217
+ old_cp="${checkpoint_dir}/${old_cp}"
6197
6218
  rm -rf "$old_cp" 2>/dev/null || true
6198
6219
  done
6199
6220
  # Rebuild index atomically from remaining checkpoints (sorted by epoch)
@@ -6552,7 +6573,9 @@ is_child_process_signal() {
6552
6573
 
6553
6574
  calculate_wait() {
6554
6575
  local retry="$1"
6555
- local wait_time=$((BASE_WAIT * (2 ** retry)))
6576
+ # BUG-RUN-004: Cap exponent to prevent overflow at retry>=34
6577
+ local exp=$((retry > 30 ? 30 : retry))
6578
+ local wait_time=$((BASE_WAIT * (2 ** exp)))
6556
6579
 
6557
6580
  # Add jitter (0-30 seconds)
6558
6581
  local jitter=$((RANDOM % 30))
@@ -6637,14 +6660,16 @@ update_failover_state() {
6637
6660
 
6638
6661
  [ ! -f "$failover_file" ] && return 1
6639
6662
 
6640
- python3 << PYEOF 2>/dev/null || true
6663
+ # BUG-RUN-008: Use single-quoted heredoc to prevent shell injection; pass vars via env
6664
+ _FAILOVER_KEY="$key" _FAILOVER_VALUE="$value" _FAILOVER_FILE="$failover_file" \
6665
+ python3 << 'PYEOF' 2>/dev/null || true
6641
6666
  import json, os
6642
- fpath = os.path.join(os.environ.get('TARGET_DIR', '.'), '.loki/state/failover.json')
6667
+ fpath = os.environ['_FAILOVER_FILE']
6643
6668
  try:
6644
6669
  with open(fpath) as f:
6645
6670
  d = json.load(f)
6646
- key = "$key"
6647
- value = "$value"
6671
+ key = os.environ['_FAILOVER_KEY']
6672
+ value = os.environ['_FAILOVER_VALUE']
6648
6673
  # Handle type conversion
6649
6674
  if value == "null":
6650
6675
  d[key] = None
@@ -7255,18 +7280,8 @@ load_ledger_context() {
7255
7280
  fi
7256
7281
  }
7257
7282
 
7258
- # Load recent handoffs for context
7259
- load_handoff_context() {
7260
- local handoff_content=""
7261
-
7262
- # Find most recent handoff (last 24 hours)
7263
- local recent_handoff=$(find .loki/memory/handoffs -name "*.md" -mtime -1 2>/dev/null | head -1)
7264
-
7265
- if [ -n "$recent_handoff" ] && [ -f "$recent_handoff" ]; then
7266
- handoff_content=$(cat "$recent_handoff" | head -80)
7267
- echo "$handoff_content"
7268
- fi
7269
- }
7283
+ # BUG-RUN-006: Removed duplicate load_handoff_context() (dead definition)
7284
+ # The active definition is below, after write_structured_handoff()
7270
7285
 
7271
7286
  # Write structured handoff document (v5.49.0)
7272
7287
  # Produces both JSON (machine-readable) and markdown (human-readable) handoffs
@@ -7773,9 +7788,12 @@ save_state() {
7773
7788
  local status="$2"
7774
7789
  local exit_code="$3"
7775
7790
 
7776
- cat > ".loki/autonomy-state.json" << EOF
7791
+ # BUG-XC-004: Atomic write via temp file + mv
7792
+ local state_tmp=".loki/autonomy-state.json.tmp.$$"
7793
+ cat > "$state_tmp" << EOF
7777
7794
  {
7778
7795
  "retryCount": $retry_count,
7796
+ "iterationCount": $ITERATION_COUNT,
7779
7797
  "status": "$status",
7780
7798
  "lastExitCode": $exit_code,
7781
7799
  "lastRun": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
@@ -7785,15 +7803,18 @@ save_state() {
7785
7803
  "baseWait": $BASE_WAIT
7786
7804
  }
7787
7805
  EOF
7806
+ mv -f "$state_tmp" ".loki/autonomy-state.json"
7788
7807
  }
7789
7808
 
7790
7809
  load_state() {
7791
7810
  if [ -f ".loki/autonomy-state.json" ]; then
7792
7811
  if command -v python3 &> /dev/null; then
7793
- # Load both retry count and status from previous session
7812
+ # Load retry count, iteration count, and status from previous session
7794
7813
  local prev_status
7795
7814
  prev_status=$(python3 -c "import json; print(json.load(open('.loki/autonomy-state.json')).get('status', 'unknown'))" 2>/dev/null || echo "unknown")
7796
7815
  RETRY_COUNT=$(python3 -c "import json; print(json.load(open('.loki/autonomy-state.json')).get('retryCount', 0))" 2>/dev/null || echo "0")
7816
+ # BUG-RUN-003: Restore ITERATION_COUNT from persisted state
7817
+ ITERATION_COUNT=$(python3 -c "import json; print(json.load(open('.loki/autonomy-state.json')).get('iterationCount', 0))" 2>/dev/null || echo "0")
7797
7818
 
7798
7819
  # Reset retry count if previous session ended in a terminal state
7799
7820
  # This allows new sessions to start fresh after failures
@@ -8338,6 +8359,8 @@ if os.path.exists(pending_path):
8338
8359
 
8339
8360
  # Convert BMAD stories to queue task format (with deduplication)
8340
8361
  existing_ids = {t.get("id") for t in existing if isinstance(t, dict)}
8362
+ # BUG-ADP-005: Track added count separately from total stories
8363
+ added_count = 0
8341
8364
  for i, story in enumerate(stories):
8342
8365
  if not isinstance(story, dict):
8343
8366
  continue
@@ -8358,23 +8381,26 @@ for i, story in enumerate(stories):
8358
8381
  if acceptance:
8359
8382
  task["acceptance_criteria"] = acceptance
8360
8383
  existing.append(task)
8384
+ added_count += 1
8361
8385
 
8362
8386
  # Write updated pending queue
8363
8387
  with open(pending_path, "w") as f:
8364
8388
  json.dump(existing, f, indent=2)
8365
8389
 
8366
- msg = f"Added {len(stories)} BMAD stories to task queue"
8390
+ msg = f"Added {added_count} BMAD stories to task queue"
8367
8391
  if skipped_count > 0:
8368
8392
  msg += f" (skipped {skipped_count} completed)"
8369
8393
  print(msg)
8370
8394
  BMAD_QUEUE_EOF
8371
8395
 
8372
- if [[ $? -ne 0 ]]; then
8396
+ local bmad_exit=$?
8397
+ if [[ $bmad_exit -ne 0 ]]; then
8373
8398
  log_warn "Failed to populate BMAD queue (python3 error)"
8399
+ # BUG-RUN-012: Do NOT touch marker file on failure -- allow retry on next run
8374
8400
  return 0
8375
8401
  fi
8376
8402
 
8377
- # Mark as populated so we don't re-add on restart
8403
+ # Mark as populated so we don't re-add on restart (only on success)
8378
8404
  touch ".loki/queue/.bmad-populated"
8379
8405
  log_info "BMAD queue population complete"
8380
8406
  }
@@ -8464,12 +8490,19 @@ try:
8464
8490
  except (json.JSONDecodeError, FileNotFoundError):
8465
8491
  pass
8466
8492
 
8493
+ # BUG-RUN-005: Add deduplication check (like BMAD and MiroFish queue functions)
8494
+ existing_ids = {t.get("id") for t in existing if isinstance(t, dict)}
8495
+ added_count = 0
8496
+
8467
8497
  # Convert OpenSpec tasks to queue format (skip completed tasks)
8468
8498
  for task in openspec_tasks:
8469
8499
  if task.get("status") == "completed":
8470
8500
  continue
8501
+ task_id = task.get("id", "openspec-unknown")
8502
+ if task_id in existing_ids:
8503
+ continue
8471
8504
  queue_entry = {
8472
- "id": task.get("id", "openspec-unknown"),
8505
+ "id": task_id,
8473
8506
  "title": task.get("title", "Untitled"),
8474
8507
  "description": f"[OpenSpec] {task.get('group', 'General')}: {task.get('title', '')}",
8475
8508
  "priority": task.get("priority", "medium"),
@@ -8481,11 +8514,12 @@ for task in openspec_tasks:
8481
8514
  }
8482
8515
  }
8483
8516
  existing.append(queue_entry)
8517
+ added_count += 1
8484
8518
 
8485
8519
  with open(pending_path, "w") as f:
8486
8520
  json.dump(existing, f, indent=2)
8487
8521
 
8488
- pending_count = sum(1 for t in openspec_tasks if t.get('status') != 'completed')
8522
+ pending_count = added_count
8489
8523
  if pending_count == 0:
8490
8524
  print("WARNING: All OpenSpec tasks are already marked as completed. No tasks added to queue.", file=sys.stderr)
8491
8525
  print("Check your tasks.md file -- all checkboxes are checked.", file=sys.stderr)
@@ -8795,6 +8829,11 @@ run_autonomous() {
8795
8829
  echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
8796
8830
  echo ""
8797
8831
 
8832
+ # BUG-RUN-001/RUN-002: Per-iteration output file for scoped checks
8833
+ # (completion promise and rate limit detection should not scan stale daily logs)
8834
+ local iter_output
8835
+ iter_output=$(mktemp ".loki/logs/iter-output-XXXXXX")
8836
+
8798
8837
  # Log start time (to both archival and dashboard logs)
8799
8838
  echo "=== Session started at $(date) ===" | tee -a "$log_file" "$agent_log"
8800
8839
  echo "=== Provider: ${PROVIDER_NAME:-claude} ===" | tee -a "$log_file" "$agent_log"
@@ -8839,7 +8878,7 @@ run_autonomous() {
8839
8878
  { LOKI_CURRENT_MODEL="$tier_param" \
8840
8879
  claude --dangerously-skip-permissions --model "$tier_param" -p "$prompt" \
8841
8880
  --output-format stream-json --verbose 2>&1 | \
8842
- tee -a "$log_file" "$agent_log" | \
8881
+ tee -a "$log_file" "$agent_log" "$iter_output" | \
8843
8882
  python3 -u -c '
8844
8883
  import sys
8845
8884
  import json
@@ -9055,7 +9094,7 @@ if __name__ == "__main__":
9055
9094
  { LOKI_CODEX_REASONING_EFFORT="$tier_param" \
9056
9095
  CODEX_MODEL_REASONING_EFFORT="$tier_param" \
9057
9096
  codex exec --full-auto \
9058
- "$prompt" 2>&1 | tee -a "$log_file" "$agent_log"; \
9097
+ "$prompt" 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"; \
9059
9098
  } && exit_code=0 || exit_code=$?
9060
9099
  ;;
9061
9100
 
@@ -9070,13 +9109,14 @@ if __name__ == "__main__":
9070
9109
  # Try primary model, fallback on rate limit
9071
9110
  local tmp_output
9072
9111
  tmp_output=$(mktemp)
9073
- { gemini --approval-mode=yolo --model "$model" "$prompt" < /dev/null 2>&1 | tee "$tmp_output" | tee -a "$log_file" "$agent_log"; \
9074
- } && exit_code=0 || exit_code=$?
9112
+ # BUG-RUN-011/RUN-013: Use PIPESTATUS[0] for primary invocation too
9113
+ gemini --approval-mode=yolo --model "$model" "$prompt" < /dev/null 2>&1 | tee "$tmp_output" | tee -a "$log_file" "$agent_log" "$iter_output"
9114
+ exit_code=${PIPESTATUS[0]}
9075
9115
 
9076
9116
  if [[ $exit_code -ne 0 ]] && grep -qiE "(rate.?limit|429|quota|resource.?exhausted)" "$tmp_output"; then
9077
9117
  log_warn "Rate limit hit on $model, falling back to $fallback"
9078
9118
  echo "[loki] Fallback to $fallback due to rate limit" >> "$log_file"
9079
- gemini --approval-mode=yolo --model "$fallback" "$prompt" < /dev/null 2>&1 | tee -a "$log_file" "$agent_log"
9119
+ gemini --approval-mode=yolo --model "$fallback" "$prompt" < /dev/null 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"
9080
9120
  exit_code=${PIPESTATUS[0]}
9081
9121
  fi
9082
9122
  rm -f "$tmp_output"
@@ -9086,14 +9126,14 @@ if __name__ == "__main__":
9086
9126
  # Cline: Tier 2 - near-full mode with subagents and MCP
9087
9127
  echo "[loki] Cline model: ${LOKI_CLINE_MODEL:-default}, tier: $tier_param" >> "$log_file"
9088
9128
  echo "[loki] Cline model: ${LOKI_CLINE_MODEL:-default}, tier: $tier_param" >> "$agent_log"
9089
- { invoke_cline "$prompt" 2>&1 | tee -a "$log_file" "$agent_log"; \
9129
+ { invoke_cline "$prompt" 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"; \
9090
9130
  } && exit_code=0 || exit_code=$?
9091
9131
  ;;
9092
9132
  aider)
9093
9133
  # Aider: Tier 3 - degraded mode, 18+ providers
9094
9134
  echo "[loki] Aider model: ${AIDER_DEFAULT_MODEL:-${LOKI_AIDER_MODEL:-claude-sonnet-4-5-20250929}}, tier: $tier_param" >> "$log_file"
9095
9135
  echo "[loki] Aider model: ${AIDER_DEFAULT_MODEL:-${LOKI_AIDER_MODEL:-claude-sonnet-4-5-20250929}}, tier: $tier_param" >> "$agent_log"
9096
- { invoke_aider "$prompt" 2>&1 | tee -a "$log_file" "$agent_log"; \
9136
+ { invoke_aider "$prompt" 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"; \
9097
9137
  } && exit_code=0 || exit_code=$?
9098
9138
  ;;
9099
9139
 
@@ -9221,17 +9261,21 @@ if __name__ == "__main__":
9221
9261
  else
9222
9262
  local cr_count
9223
9263
  cr_count=$(track_gate_failure "code_review")
9264
+ # BUG-QG-007: Always append to gate_failures regardless of escalation tier
9265
+ # BUG-RUN-009: Write PAUSE to .loki/PAUSE (not .loki/signals/PAUSE)
9224
9266
  if [ "$cr_count" -ge "$GATE_PAUSE_LIMIT" ]; then
9225
9267
  log_error "Gate escalation: code_review failed $cr_count times (>= $GATE_PAUSE_LIMIT) - forcing PAUSE for human intervention"
9226
9268
  echo "PAUSE" > "${TARGET_DIR:-.}/.loki/signals/GATE_ESCALATION"
9227
9269
  echo "code_review gate failed $cr_count consecutive times" >> "${TARGET_DIR:-.}/.loki/signals/GATE_ESCALATION"
9228
- touch "${TARGET_DIR:-.}/.loki/signals/PAUSE"
9270
+ touch "${TARGET_DIR:-.}/.loki/PAUSE"
9271
+ gate_failures="${gate_failures}code_review_PAUSED,"
9229
9272
  elif [ "$cr_count" -ge "$GATE_ESCALATE_LIMIT" ]; then
9230
9273
  log_warn "Gate escalation: code_review failed $cr_count times (>= $GATE_ESCALATE_LIMIT) - escalating"
9231
9274
  echo "ESCALATE" > "${TARGET_DIR:-.}/.loki/signals/GATE_ESCALATION"
9232
9275
  gate_failures="${gate_failures}code_review_ESCALATED,"
9233
9276
  elif [ "$cr_count" -ge "$GATE_CLEAR_LIMIT" ]; then
9234
9277
  log_warn "Gate cleared: code_review failed $cr_count times (>= $GATE_CLEAR_LIMIT) - passing gate this iteration, counter continues"
9278
+ gate_failures="${gate_failures}code_review,"
9235
9279
  else
9236
9280
  gate_failures="${gate_failures}code_review,"
9237
9281
  log_warn "Code review BLOCKED ($cr_count consecutive) - Critical/High findings"
@@ -9257,20 +9301,21 @@ if __name__ == "__main__":
9257
9301
  auto_capture_episode "$ITERATION_COUNT" "$exit_code" "${rarv_phase:-iteration}" \
9258
9302
  "${prd_path:-codebase-analysis}" "$duration" "$log_file"
9259
9303
 
9304
+ # BUG-QG-008: Track iteration for convergence regardless of exit code
9305
+ if type council_track_iteration &>/dev/null; then
9306
+ council_track_iteration "$log_file"
9307
+ fi
9308
+
9260
9309
  # Check for success - ONLY stop on explicit completion promise
9261
9310
  # There's never a "complete" product - always improvements, bugs, features
9262
9311
  if [ $exit_code -eq 0 ]; then
9263
9312
  # Episode trace already captured by auto_capture_episode above (v6.15.0)
9264
9313
 
9265
- # Track iteration for Completion Council convergence detection
9266
- if type council_track_iteration &>/dev/null; then
9267
- council_track_iteration "$log_file"
9268
- fi
9269
-
9270
9314
  # Perpetual mode: NEVER stop, always continue
9271
9315
  if [ "$PERPETUAL_MODE" = "true" ]; then
9272
9316
  log_info "Perpetual mode: Ignoring exit, continuing immediately..."
9273
- ((retry++))
9317
+ # BUG-RUN-010: Reset retry counter on success (only count failures)
9318
+ retry=0
9274
9319
  continue # Immediately start next iteration, no wait
9275
9320
  fi
9276
9321
 
@@ -9285,11 +9330,13 @@ if __name__ == "__main__":
9285
9330
  run_memory_consolidation
9286
9331
  notify_all_complete
9287
9332
  save_state $retry "council_approved" 0
9333
+ rm -f "$iter_output" 2>/dev/null
9288
9334
  return 0
9289
9335
  fi
9290
9336
 
9291
9337
  # Only stop if EXPLICIT completion promise text was output
9292
- if [ -n "$COMPLETION_PROMISE" ] && check_completion_promise "$log_file"; then
9338
+ # BUG-RUN-001: Use per-iteration output, not stale daily log
9339
+ if [ -n "$COMPLETION_PROMISE" ] && check_completion_promise "$iter_output"; then
9293
9340
  echo ""
9294
9341
  log_header "COMPLETION PROMISE FULFILLED: $COMPLETION_PROMISE"
9295
9342
  log_info "Explicit completion promise detected in output."
@@ -9298,6 +9345,7 @@ if __name__ == "__main__":
9298
9345
  run_memory_consolidation
9299
9346
  notify_all_complete
9300
9347
  save_state $retry "completion_promise_fulfilled" 0
9348
+ rm -f "$iter_output" 2>/dev/null
9301
9349
  return 0
9302
9350
  fi
9303
9351
 
@@ -9312,7 +9360,8 @@ if __name__ == "__main__":
9312
9360
 
9313
9361
  # SUCCESS exit - continue IMMEDIATELY to next iteration (no wait!)
9314
9362
  log_step "Starting next iteration..."
9315
- ((retry++))
9363
+ # BUG-RUN-010: Reset retry counter on success (only count failures)
9364
+ retry=0
9316
9365
  continue # Immediately start next iteration, no exponential backoff
9317
9366
  fi
9318
9367
 
@@ -9323,7 +9372,8 @@ if __name__ == "__main__":
9323
9372
  create_checkpoint "iteration-${ITERATION_COUNT} failed (exit=$exit_code)" "iteration-${ITERATION_COUNT}-fail"
9324
9373
 
9325
9374
  # Handle retry - check for rate limit first
9326
- local rate_limit_wait=$(detect_rate_limit "$log_file")
9375
+ # BUG-RUN-002: Use per-iteration output, not stale daily log
9376
+ local rate_limit_wait=$(detect_rate_limit "$iter_output")
9327
9377
  local wait_time
9328
9378
 
9329
9379
  if [ $rate_limit_wait -gt 0 ]; then
@@ -9357,11 +9407,16 @@ if __name__ == "__main__":
9357
9407
  while [ $remaining -gt 0 ]; do
9358
9408
  local human_remaining=$(format_duration $remaining)
9359
9409
  printf "\r${YELLOW}Resuming in ${human_remaining}...${NC} "
9360
- sleep $interval
9361
- remaining=$((remaining - interval))
9410
+ # BUG-RUN-007: Prevent timer from overshooting into negative
9411
+ local sleep_time=$((remaining < interval ? remaining : interval))
9412
+ sleep $sleep_time
9413
+ remaining=$((remaining - sleep_time))
9362
9414
  done
9363
9415
  echo ""
9364
9416
 
9417
+ # Clean up per-iteration output file
9418
+ rm -f "$iter_output" 2>/dev/null
9419
+
9365
9420
  ((retry++))
9366
9421
  done
9367
9422
 
@@ -9581,7 +9636,16 @@ EOF
9581
9636
  # Cleanup Handler (with Ctrl+C pause support)
9582
9637
  #===============================================================================
9583
9638
 
9639
+ # BUG-XC-007: Guard against re-entrant signal handler execution
9640
+ _CLEANUP_IN_PROGRESS=0
9641
+
9584
9642
  cleanup() {
9643
+ # Prevent re-entrant execution
9644
+ if [ "$_CLEANUP_IN_PROGRESS" -eq 1 ]; then
9645
+ return
9646
+ fi
9647
+ _CLEANUP_IN_PROGRESS=1
9648
+
9585
9649
  # Block further signals during critical cleanup operations
9586
9650
  trap '' INT TERM
9587
9651
 
@@ -9606,13 +9670,19 @@ cleanup() {
9606
9670
  rm -f "$loki_dir/sessions/${LOKI_SESSION_ID}/loki.pid" 2>/dev/null
9607
9671
  fi
9608
9672
  if [ -f "$loki_dir/session.json" ]; then
9673
+ # BUG-ST-008: Atomic session.json update via temp file + mv
9609
9674
  _LOKI_SESSION_FILE="$loki_dir/session.json" python3 -c "
9610
- import json, os
9675
+ import json, os, tempfile
9611
9676
  sf = os.environ['_LOKI_SESSION_FILE']
9612
9677
  try:
9613
- with open(sf, 'r+') as f:
9614
- d = json.load(f); d['status'] = 'stopped'
9615
- f.seek(0); f.truncate(); json.dump(d, f)
9678
+ with open(sf) as f:
9679
+ d = json.load(f)
9680
+ d['status'] = 'stopped'
9681
+ sd = os.path.dirname(sf)
9682
+ fd, tmp = tempfile.mkstemp(dir=sd, suffix='.json')
9683
+ with os.fdopen(fd, 'w') as f:
9684
+ json.dump(d, f)
9685
+ os.replace(tmp, sf)
9616
9686
  except (json.JSONDecodeError, OSError): pass
9617
9687
  " 2>/dev/null || true
9618
9688
  fi
@@ -9638,13 +9708,19 @@ except (json.JSONDecodeError, OSError): pass
9638
9708
  fi
9639
9709
  # Mark session.json as stopped
9640
9710
  if [ -f "$loki_dir/session.json" ]; then
9711
+ # BUG-ST-008: Atomic session.json update via temp file + mv
9641
9712
  _LOKI_SESSION_FILE="$loki_dir/session.json" python3 -c "
9642
- import json, os
9713
+ import json, os, tempfile
9643
9714
  sf = os.environ['_LOKI_SESSION_FILE']
9644
9715
  try:
9645
- with open(sf, 'r+') as f:
9646
- d = json.load(f); d['status'] = 'stopped'
9647
- f.seek(0); f.truncate(); json.dump(d, f)
9716
+ with open(sf) as f:
9717
+ d = json.load(f)
9718
+ d['status'] = 'stopped'
9719
+ sd = os.path.dirname(sf)
9720
+ fd, tmp = tempfile.mkstemp(dir=sd, suffix='.json')
9721
+ with os.fdopen(fd, 'w') as f:
9722
+ json.dump(d, f)
9723
+ os.replace(tmp, sf)
9648
9724
  except (json.JSONDecodeError, OSError): pass
9649
9725
  " 2>/dev/null || true
9650
9726
  fi
@@ -9655,6 +9731,7 @@ except (json.JSONDecodeError, OSError): pass
9655
9731
  fi
9656
9732
 
9657
9733
  # Re-enable signals for pause mode
9734
+ _CLEANUP_IN_PROGRESS=0
9658
9735
  trap cleanup INT TERM
9659
9736
 
9660
9737
  # Check if this signal was caused by a child process dying (e.g., dashboard)
@@ -10232,13 +10309,19 @@ main() {
10232
10309
  fi
10233
10310
  # Mark session.json as stopped
10234
10311
  if [ -f "$loki_dir/session.json" ]; then
10312
+ # BUG-ST-008: Atomic session.json update via temp file + mv
10235
10313
  _LOKI_SESSION_FILE="$loki_dir/session.json" python3 -c "
10236
- import json, os
10314
+ import json, os, tempfile
10237
10315
  sf = os.environ['_LOKI_SESSION_FILE']
10238
10316
  try:
10239
- with open(sf, 'r+') as f:
10240
- d = json.load(f); d['status'] = 'stopped'
10241
- f.seek(0); f.truncate(); json.dump(d, f)
10317
+ with open(sf) as f:
10318
+ d = json.load(f)
10319
+ d['status'] = 'stopped'
10320
+ sd = os.path.dirname(sf)
10321
+ fd, tmp = tempfile.mkstemp(dir=sd, suffix='.json')
10322
+ with os.fdopen(fd, 'w') as f:
10323
+ json.dump(d, f)
10324
+ os.replace(tmp, sf)
10242
10325
  except (json.JSONDecodeError, OSError): pass
10243
10326
  " 2>/dev/null || true
10244
10327
  fi