loki-mode 6.71.1 → 6.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +9 -1
  2. package/SKILL.md +2 -2
  3. package/VERSION +1 -1
  4. package/autonomy/hooks/migration-hooks.sh +26 -0
  5. package/autonomy/loki +429 -92
  6. package/autonomy/run.sh +219 -38
  7. package/dashboard/__init__.py +1 -1
  8. package/dashboard/server.py +101 -19
  9. package/docs/INSTALLATION.md +20 -11
  10. package/docs/bug-fixes/agent-01-cli-fixes.md +101 -0
  11. package/docs/bug-fixes/agent-02-purplelab-fixes.md +88 -0
  12. package/docs/bug-fixes/agent-03-dashboard-fixes.md +119 -0
  13. package/docs/bug-fixes/agent-04-memory-fixes.md +105 -0
  14. package/docs/bug-fixes/agent-05-provider-fixes.md +86 -0
  15. package/docs/bug-fixes/agent-06-integration-fixes.md +101 -0
  16. package/docs/bug-fixes/agent-07-dash-run-fixes.md +101 -0
  17. package/docs/bug-fixes/agent-08-docker-fixes.md +164 -0
  18. package/docs/bug-fixes/agent-09-e2e-build-fixes.md +69 -0
  19. package/docs/bug-fixes/agent-10-e2e-fullstack-fixes.md +102 -0
  20. package/docs/bug-fixes/agent-11-e2e-session-fixes.md +70 -0
  21. package/docs/bug-fixes/agent-12-scenario-fixes.md +120 -0
  22. package/docs/bug-fixes/agent-13-enterprise-fixes.md +143 -0
  23. package/docs/bug-fixes/agent-14-uat-newuser-fixes.md +88 -0
  24. package/docs/bug-fixes/agent-15-uat-poweruser-fixes.md +132 -0
  25. package/docs/bug-fixes/agent-19-code-review.md +316 -0
  26. package/docs/bug-fixes/agent-20-architecture-review.md +331 -0
  27. package/docs/competitive/bolt-new-analysis.md +579 -0
  28. package/docs/competitive/emergence-others-analysis.md +605 -0
  29. package/docs/competitive/replit-lovable-analysis.md +622 -0
  30. package/docs/test-scenarios/edge-cases.md +813 -0
  31. package/docs/test-scenarios/enterprise-scenarios.md +732 -0
  32. package/mcp/__init__.py +1 -1
  33. package/mcp/server.py +49 -5
  34. package/memory/consolidation.py +33 -0
  35. package/memory/embeddings.py +10 -1
  36. package/memory/engine.py +83 -38
  37. package/memory/retrieval.py +36 -0
  38. package/memory/storage.py +56 -4
  39. package/memory/token_economics.py +14 -2
  40. package/memory/vector_index.py +36 -7
  41. package/package.json +1 -1
  42. package/providers/gemini.sh +89 -2
  43. package/templates/README.md +1 -1
  44. package/templates/cli-tool.md +30 -0
  45. package/templates/dashboard.md +4 -0
  46. package/templates/data-pipeline.md +4 -0
  47. package/templates/discord-bot.md +47 -0
  48. package/templates/game.md +4 -0
  49. package/templates/microservice.md +4 -0
  50. package/templates/npm-library.md +4 -0
  51. package/templates/rest-api-auth.md +50 -20
  52. package/templates/rest-api.md +15 -0
  53. package/templates/saas-starter.md +1 -1
  54. package/templates/slack-bot.md +36 -0
  55. package/templates/static-landing-page.md +9 -1
  56. package/templates/web-scraper.md +4 -0
  57. package/web-app/dist/assets/Badge-CeBkFjo6.js +1 -0
  58. package/web-app/dist/assets/Button-yuhqo8Fq.js +1 -0
  59. package/web-app/dist/assets/{Card-B1bV4syB.js → Card-BG17vsX0.js} +1 -1
  60. package/web-app/dist/assets/{HomePage-CZTV6Nea.js → HomePage-BMSQ7Apj.js} +3 -3
  61. package/web-app/dist/assets/{LoginPage-D4UdURJc.js → LoginPage-aH_6iolg.js} +1 -1
  62. package/web-app/dist/assets/{NotFoundPage-CCLSeL6j.js → NotFoundPage-Di8cNtB1.js} +1 -1
  63. package/web-app/dist/assets/ProjectPage-BtRssmw9.js +285 -0
  64. package/web-app/dist/assets/ProjectsPage-B-FTFagc.js +6 -0
  65. package/web-app/dist/assets/{SettingsPage-Xuv8EfAg.js → SettingsPage-DIJPBla4.js} +1 -1
  66. package/web-app/dist/assets/TeamsPage--19fNX7w.js +36 -0
  67. package/web-app/dist/assets/TemplatesPage-ChUQNOOv.js +11 -0
  68. package/web-app/dist/assets/TerminalOutput-Dwrzecyl.js +31 -0
  69. package/web-app/dist/assets/activity-BNRWeu9N.js +6 -0
  70. package/web-app/dist/assets/{arrow-left-CaGtolHc.js → arrow-left-Ce6g1_YE.js} +1 -1
  71. package/web-app/dist/assets/circle-alert-LIndawHL.js +11 -0
  72. package/web-app/dist/assets/clock-Bpj4VPlP.js +6 -0
  73. package/web-app/dist/assets/{external-link-CazyUyav.js → external-link-BhhdF0iQ.js} +1 -1
  74. package/web-app/dist/assets/folder-open-CM2LgfxI.js +11 -0
  75. package/web-app/dist/assets/index-8-KpWWq7.css +1 -0
  76. package/web-app/dist/assets/index-kPDW4e_b.js +236 -0
  77. package/web-app/dist/assets/lock-sAk3Xe54.js +16 -0
  78. package/web-app/dist/assets/search-CR-2i9by.js +6 -0
  79. package/web-app/dist/assets/server-DuFh4ymA.js +26 -0
  80. package/web-app/dist/assets/trash-2-BmkkT8V_.js +11 -0
  81. package/web-app/dist/index.html +2 -2
  82. package/web-app/server.py +1321 -53
  83. package/web-app/dist/assets/Badge-CBUx2PjL.js +0 -6
  84. package/web-app/dist/assets/Button-DsRiznlh.js +0 -21
  85. package/web-app/dist/assets/ProjectPage-D0w_X9tG.js +0 -237
  86. package/web-app/dist/assets/ProjectsPage-ByYxDlKC.js +0 -16
  87. package/web-app/dist/assets/TemplatesPage-BKWN07mc.js +0 -1
  88. package/web-app/dist/assets/TerminalOutput-Dj98V8Z-.js +0 -51
  89. package/web-app/dist/assets/clock-C_CDmobx.js +0 -11
  90. package/web-app/dist/assets/index-D452pFGl.css +0 -1
  91. package/web-app/dist/assets/index-Df4_kgLY.js +0 -196
package/autonomy/run.sh CHANGED
@@ -947,8 +947,9 @@ emit_event_json() {
947
947
  else
948
948
  json_data+=","
949
949
  fi
950
- # Quote string values, leave numbers/booleans as-is
951
- if [[ "$value" =~ ^[0-9]+$ ]] || [[ "$value" =~ ^(true|false|null)$ ]]; then
950
+ # Quote string values, leave numbers/booleans/floats as-is
951
+ # BUG-NEW-004: Also match floats (e.g., cost=3.14) not just integers
952
+ if [[ "$value" =~ ^[0-9]+\.?[0-9]*$ ]] || [[ "$value" =~ ^(true|false|null)$ ]]; then
952
953
  json_data+="\"$key\":$value"
953
954
  else
954
955
  # Escape backslashes, quotes, and special chars in value
@@ -1364,6 +1365,9 @@ get_phase_names() {
1364
1365
 
1365
1366
  # Global tier for current iteration (set by get_rarv_tier)
1366
1367
  CURRENT_TIER="development"
1368
+ # Export for provider helper functions (e.g., gemini.sh:provider_get_current_model)
1369
+ LOKI_CURRENT_TIER="$CURRENT_TIER"
1370
+ export LOKI_CURRENT_TIER
1367
1371
 
1368
1372
  # Get the appropriate tier based on RARV cycle step
1369
1373
  # Args: iteration_count (defaults to ITERATION_COUNT)
@@ -2159,6 +2163,17 @@ create_worktree() {
2159
2163
  return 0
2160
2164
  else
2161
2165
  log_error "Failed to create worktree: $stream_name"
2166
+ # BUG-PU-001: Clean up partial worktree on creation failure
2167
+ if [ -d "$worktree_path" ]; then
2168
+ git -C "$TARGET_DIR" worktree remove "$worktree_path" --force 2>/dev/null || \
2169
+ rm -rf "$worktree_path" 2>/dev/null || true
2170
+ fi
2171
+ # Clean up any orphaned branch created during the attempt
2172
+ if [ -n "$branch_name" ]; then
2173
+ git -C "$TARGET_DIR" branch -D "$branch_name" 2>/dev/null || true
2174
+ else
2175
+ git -C "$TARGET_DIR" branch -D "parallel-${stream_name}" 2>/dev/null || true
2176
+ fi
2162
2177
  return 1
2163
2178
  fi
2164
2179
  }
@@ -3026,7 +3041,15 @@ invoke_gemini() {
3026
3041
  local prompt="$1"
3027
3042
  shift
3028
3043
 
3029
- local model="${PROVIDER_MODEL:-${GEMINI_DEFAULT_PRO:-gemini-3-pro-preview}}"
3044
+ # BUG-PROV-001/006 fix: Use dynamic model resolution instead of frozen PROVIDER_MODEL.
3045
+ # provider_get_current_model() resolves based on LOKI_CURRENT_TIER at runtime.
3046
+ # Falls back to provider_get_tier_param if available, then to GEMINI_DEFAULT_PRO.
3047
+ local model
3048
+ if type provider_get_current_model &>/dev/null; then
3049
+ model=$(provider_get_current_model)
3050
+ else
3051
+ model="${GEMINI_DEFAULT_PRO:-gemini-3-pro-preview}"
3052
+ fi
3030
3053
  local fallback="${PROVIDER_MODEL_FALLBACK:-${GEMINI_DEFAULT_FLASH:-gemini-3-flash-preview}}"
3031
3054
 
3032
3055
  # Create temp file for output to preserve streaming while checking for rate limit
@@ -3057,7 +3080,13 @@ invoke_gemini_capture() {
3057
3080
  local prompt="$1"
3058
3081
  shift
3059
3082
 
3060
- local model="${PROVIDER_MODEL:-${GEMINI_DEFAULT_PRO:-gemini-3-pro-preview}}"
3083
+ # BUG-PROV-001/006 fix: Use dynamic model resolution instead of frozen PROVIDER_MODEL
3084
+ local model
3085
+ if type provider_get_current_model &>/dev/null; then
3086
+ model=$(provider_get_current_model)
3087
+ else
3088
+ model="${GEMINI_DEFAULT_PRO:-gemini-3-pro-preview}"
3089
+ fi
3061
3090
  local fallback="${PROVIDER_MODEL_FALLBACK:-${GEMINI_DEFAULT_FLASH:-gemini-3-flash-preview}}"
3062
3091
  local output
3063
3092
 
@@ -3249,15 +3278,21 @@ set_phase() {
3249
3278
 
3250
3279
  log_info "Phase changed: $current_phase -> $new_phase"
3251
3280
 
3252
- # Update orchestrator state
3281
+ # Update orchestrator state (atomic via temp file + mv)
3282
+ # BUG ARCH-001 fix: prevent state corruption if process is killed mid-write
3253
3283
  if [ -f "$orch_file" ]; then
3254
3284
  python3 -c "
3255
- import json, sys
3256
- with open(sys.argv[1], 'r') as f:
3285
+ import json, sys, os, tempfile
3286
+ orch_file = sys.argv[1]
3287
+ new_phase = sys.argv[2]
3288
+ with open(orch_file, 'r') as f:
3257
3289
  data = json.load(f)
3258
- data['currentPhase'] = sys.argv[2]
3259
- with open(sys.argv[1], 'w') as f:
3290
+ data['currentPhase'] = new_phase
3291
+ orch_dir = os.path.dirname(orch_file)
3292
+ fd, tmp = tempfile.mkstemp(dir=orch_dir, suffix='.json')
3293
+ with os.fdopen(fd, 'w') as f:
3260
3294
  json.dump(data, f, indent=2)
3295
+ os.replace(tmp, orch_file)
3261
3296
  " "$orch_file" "$new_phase" 2>/dev/null || true
3262
3297
  fi
3263
3298
  fi
@@ -3627,8 +3662,10 @@ print(json.dumps(data, indent=2))
3627
3662
  fi
3628
3663
  ) 200>"$lockfile"
3629
3664
 
3630
- # Update current-task.json
3631
- echo "$task_json" > .loki/queue/current-task.json
3665
+ # BUG-ST-014: Atomic current-task.json update via temp file + mv
3666
+ local ct_tmp=".loki/queue/current-task.json.tmp.$$"
3667
+ echo "$task_json" > "$ct_tmp"
3668
+ mv -f "$ct_tmp" .loki/queue/current-task.json
3632
3669
  }
3633
3670
 
3634
3671
  # Track iteration completion - move task to completed queue
@@ -3811,8 +3848,10 @@ except:
3811
3848
  " 2>/dev/null || true
3812
3849
  fi
3813
3850
 
3814
- # Clear current-task.json
3815
- echo "{}" > .loki/queue/current-task.json
3851
+ # BUG-ST-014: Atomic current-task.json clear via temp file + mv
3852
+ local ct_tmp=".loki/queue/current-task.json.tmp.$$"
3853
+ echo "{}" > "$ct_tmp"
3854
+ mv -f "$ct_tmp" .loki/queue/current-task.json
3816
3855
 
3817
3856
  # Write-back completed BMAD stories to source artifacts (v6.29.0)
3818
3857
  if [ "$exit_code" = "0" ]; then
@@ -5526,20 +5565,22 @@ enforce_test_coverage() {
5526
5565
 
5527
5566
  # JavaScript/TypeScript
5528
5567
  if [ -f "${TARGET_DIR:-.}/package.json" ]; then
5568
+ # BUG-EC-014: Wrap test runners with timeout to prevent hanging indefinitely
5569
+ local gate_timeout="${LOKI_GATE_TIMEOUT:-300}" # 5 minutes default
5529
5570
  if grep -q '"vitest"' "${TARGET_DIR:-.}/package.json" 2>/dev/null; then
5530
5571
  test_runner="vitest"
5531
5572
  local output
5532
- output=$(cd "${TARGET_DIR:-.}" && npx vitest run --reporter=json 2>&1) || test_passed=false
5573
+ output=$(cd "${TARGET_DIR:-.}" && timeout "$gate_timeout" npx vitest run --reporter=json 2>&1) || test_passed=false
5533
5574
  details="vitest: $(echo "$output" | tail -3 | tr '\n' ' ')"
5534
5575
  elif grep -q '"jest"' "${TARGET_DIR:-.}/package.json" 2>/dev/null; then
5535
5576
  test_runner="jest"
5536
5577
  local output
5537
- output=$(cd "${TARGET_DIR:-.}" && npx jest --passWithNoTests --forceExit 2>&1) || test_passed=false
5578
+ output=$(cd "${TARGET_DIR:-.}" && timeout "$gate_timeout" npx jest --passWithNoTests --forceExit 2>&1) || test_passed=false
5538
5579
  details="jest: $(echo "$output" | tail -3 | tr '\n' ' ')"
5539
5580
  elif grep -q '"mocha"' "${TARGET_DIR:-.}/package.json" 2>/dev/null; then
5540
5581
  test_runner="mocha"
5541
5582
  local output
5542
- output=$(cd "${TARGET_DIR:-.}" && npx mocha 2>&1) || test_passed=false
5583
+ output=$(cd "${TARGET_DIR:-.}" && timeout "$gate_timeout" npx mocha 2>&1) || test_passed=false
5543
5584
  details="mocha: $(echo "$output" | tail -3 | tr '\n' ' ')"
5544
5585
  fi
5545
5586
  fi
@@ -6859,16 +6900,32 @@ except Exception:
6859
6900
  PYEOF
6860
6901
  }
6861
6902
 
6862
- # Check provider health: API key exists + CLI installed
6903
+ # Check provider health: CLI installed + authentication available
6863
6904
  # Returns: 0 if healthy, 1 if unhealthy
6905
+ # BUG-PROV-003 fix: Claude Code supports OAuth sessions in addition to API keys.
6906
+ # Checking only for ANTHROPIC_API_KEY incorrectly marks OAuth users as unhealthy,
6907
+ # causing unnecessary failover to degraded providers. Now also checks for OAuth
6908
+ # session files and `claude auth status` as fallback.
6864
6909
  check_provider_health() {
6865
6910
  local provider="$1"
6866
6911
 
6867
- # Check CLI is installed
6912
+ # Check CLI is installed and authentication is available
6868
6913
  case "$provider" in
6869
6914
  claude)
6870
6915
  command -v claude &>/dev/null || return 1
6871
- [ -n "${ANTHROPIC_API_KEY:-}" ] || return 1
6916
+ # Accept API key OR OAuth session (Claude Code supports both)
6917
+ if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
6918
+ return 0
6919
+ fi
6920
+ # Check for OAuth session files (~/.claude/ stores sessions)
6921
+ if [ -d "${HOME}/.claude" ] && [ -f "${HOME}/.claude/.credentials.json" ]; then
6922
+ return 0
6923
+ fi
6924
+ # Last resort: ask the CLI if it has valid auth
6925
+ if claude auth status &>/dev/null 2>&1; then
6926
+ return 0
6927
+ fi
6928
+ return 1
6872
6929
  ;;
6873
6930
  codex)
6874
6931
  command -v codex &>/dev/null || return 1
@@ -6876,7 +6933,15 @@ check_provider_health() {
6876
6933
  ;;
6877
6934
  gemini)
6878
6935
  command -v gemini &>/dev/null || return 1
6879
- [ -n "${GOOGLE_API_KEY:-${GEMINI_API_KEY:-}}" ] || return 1
6936
+ # BUG-PROV-003: Also accept GEMINI_API_KEY and gcloud ADC
6937
+ if [ -n "${GOOGLE_API_KEY:-}" ] || [ -n "${GEMINI_API_KEY:-}" ]; then
6938
+ return 0
6939
+ fi
6940
+ # Check for gcloud Application Default Credentials
6941
+ if [ -f "${HOME}/.config/gcloud/application_default_credentials.json" ]; then
6942
+ return 0
6943
+ fi
6944
+ return 1
6880
6945
  ;;
6881
6946
  cline)
6882
6947
  command -v cline &>/dev/null || return 1
@@ -6944,7 +7009,13 @@ attempt_provider_failover() {
6944
7009
  update_failover_health "$provider" "healthy"
6945
7010
 
6946
7011
  # Update runtime provider vars
7012
+ # BUG-PROV-008 fix: Update BOTH PROVIDER_NAME and LOKI_PROVIDER.
7013
+ # Without this, subprocesses and the MCP server (which read LOKI_PROVIDER)
7014
+ # continue using the old provider name, causing provider-specific behavior
7015
+ # in child processes to use the wrong config.
6947
7016
  PROVIDER_NAME="$provider"
7017
+ LOKI_PROVIDER="$provider"
7018
+ export LOKI_PROVIDER
6948
7019
 
6949
7020
  emit_event_json "provider_failover" \
6950
7021
  "from=$current" \
@@ -6996,7 +7067,10 @@ check_primary_recovery() {
6996
7067
  update_failover_state "currentProvider" "$primary"
6997
7068
  update_failover_health "$primary" "healthy"
6998
7069
 
7070
+ # BUG-PROV-008 fix: Update BOTH PROVIDER_NAME and LOKI_PROVIDER on recovery
6999
7071
  PROVIDER_NAME="$primary"
7072
+ LOKI_PROVIDER="$primary"
7073
+ export LOKI_PROVIDER
7000
7074
 
7001
7075
  emit_event_json "provider_recovery" \
7002
7076
  "from=$current" \
@@ -7935,6 +8009,9 @@ save_state() {
7935
8009
  local status="$2"
7936
8010
  local exit_code="$3"
7937
8011
 
8012
+ # BUG-ST-013: Ensure .loki directory exists (defensive -- may be called from signal handler)
8013
+ mkdir -p .loki 2>/dev/null || true
8014
+
7938
8015
  # BUG-XC-004: Atomic write via temp file + mv
7939
8016
  local state_tmp=".loki/autonomy-state.json.tmp.$$"
7940
8017
  cat > "$state_tmp" << EOF
@@ -7954,8 +8031,44 @@ EOF
7954
8031
  }
7955
8032
 
7956
8033
  load_state() {
8034
+ # BUG-EP-015: Clean up orphaned temp files from kill -9 crashes
8035
+ # These are left behind when the process is killed during atomic writes
8036
+ find .loki/ -maxdepth 1 -name "*.tmp.*" -mmin +5 -delete 2>/dev/null || true
8037
+ find .loki/state/ -name "*.tmp.*" -mmin +5 -delete 2>/dev/null || true
8038
+
7957
8039
  if [ -f ".loki/autonomy-state.json" ]; then
7958
8040
  if command -v python3 &> /dev/null; then
8041
+ # BUG-ST-006: Validate checkpoint integrity before loading state
8042
+ local state_valid
8043
+ state_valid=$(python3 -c "
8044
+ import json, sys
8045
+ try:
8046
+ with open('.loki/autonomy-state.json') as f:
8047
+ d = json.load(f)
8048
+ # Validate required fields exist and have sane types
8049
+ rc = d.get('retryCount', 0)
8050
+ ic = d.get('iterationCount', 0)
8051
+ status = d.get('status', 'unknown')
8052
+ if not isinstance(rc, (int, float)) or not isinstance(ic, (int, float)):
8053
+ print('invalid')
8054
+ sys.exit(0)
8055
+ if rc < 0 or ic < 0:
8056
+ print('invalid')
8057
+ sys.exit(0)
8058
+ print('valid')
8059
+ except (json.JSONDecodeError, KeyError, TypeError, OSError):
8060
+ print('invalid')
8061
+ " 2>/dev/null || echo "invalid")
8062
+
8063
+ if [ "$state_valid" != "valid" ]; then
8064
+ log_warn "State file corrupted or invalid - starting fresh"
8065
+ RETRY_COUNT=0
8066
+ ITERATION_COUNT=0
8067
+ # Back up corrupted state file for diagnosis
8068
+ mv ".loki/autonomy-state.json" ".loki/autonomy-state.json.corrupt.$(date +%s)" 2>/dev/null || true
8069
+ return
8070
+ fi
8071
+
7959
8072
  # Load retry count, iteration count, and status from previous session
7960
8073
  local prev_status
7961
8074
  prev_status=$(python3 -c "import json; print(json.load(open('.loki/autonomy-state.json')).get('status', 'unknown'))" 2>/dev/null || echo "unknown")
@@ -9280,16 +9393,9 @@ run_autonomous() {
9280
9393
  fi
9281
9394
 
9282
9395
  while [ $retry -lt $MAX_RETRIES ]; do
9283
- # Increment iteration count
9284
- ((ITERATION_COUNT++))
9285
-
9286
- # Check max iterations
9287
- if check_max_iterations; then
9288
- save_state $retry "max_iterations_reached" 0
9289
- return 0
9290
- fi
9291
-
9292
- # Check for human intervention (PAUSE, HUMAN_INPUT.md, STOP)
9396
+ # Check for human intervention BEFORE incrementing iteration count
9397
+ # BUG-ST-010: Moved pause/stop checks before ITERATION_COUNT increment
9398
+ # to prevent spurious count increases when resuming from pause
9293
9399
  check_human_intervention
9294
9400
  local intervention_result=$?
9295
9401
  case $intervention_result in
@@ -9304,6 +9410,15 @@ run_autonomous() {
9304
9410
  continue # Will hit PAUSE check on next iteration
9305
9411
  fi
9306
9412
 
9413
+ # Increment iteration count (after pause/stop checks to avoid spurious increments)
9414
+ ((ITERATION_COUNT++))
9415
+
9416
+ # Check max iterations
9417
+ if check_max_iterations; then
9418
+ save_state $retry "max_iterations_reached" 0
9419
+ return 0
9420
+ fi
9421
+
9307
9422
  # Watchdog: periodic process health check (opt-in via LOKI_WATCHDOG=true)
9308
9423
  if [[ "$WATCHDOG_ENABLED" == "true" ]]; then
9309
9424
  local now_epoch
@@ -9375,6 +9490,11 @@ run_autonomous() {
9375
9490
 
9376
9491
  # Dynamic tier selection based on RARV cycle phase
9377
9492
  CURRENT_TIER=$(get_rarv_tier "$ITERATION_COUNT")
9493
+ # NEW BUG FIX: Export LOKI_CURRENT_TIER so provider helper functions
9494
+ # (e.g., gemini.sh:provider_get_current_model) can resolve the correct model.
9495
+ # Without this, LOKI_CURRENT_TIER is always empty and defaults to "planning".
9496
+ LOKI_CURRENT_TIER="$CURRENT_TIER"
9497
+ export LOKI_CURRENT_TIER
9378
9498
  local rarv_phase=$(get_rarv_phase_name "$ITERATION_COUNT")
9379
9499
  local tier_param=$(get_provider_tier_param "$CURRENT_TIER")
9380
9500
  echo "=== RARV Phase: $rarv_phase, Tier: $CURRENT_TIER ($tier_param) ===" | tee -a "$log_file" "$agent_log"
@@ -9635,25 +9755,46 @@ if __name__ == "__main__":
9635
9755
  gemini)
9636
9756
  # Gemini: Degraded mode - no stream-json, no agent tracking
9637
9757
  # Uses invoke_gemini helper for rate limit fallback to flash model
9638
- local model="${PROVIDER_MODEL:-${GEMINI_DEFAULT_PRO:-gemini-3-pro-preview}}"
9758
+ # BUG-PROV-001 fix: Use tier_param (resolved model) instead of frozen PROVIDER_MODEL
9759
+ # tier_param is computed above via get_provider_tier_param() -> resolve_model_for_tier()
9760
+ # which returns the correct model name for the current RARV tier
9761
+ local model="$tier_param"
9639
9762
  local fallback="${PROVIDER_MODEL_FALLBACK:-${GEMINI_DEFAULT_FLASH:-gemini-3-flash-preview}}"
9640
- echo "[loki] Gemini model: $model (fallback: $fallback), tier: $tier_param" >> "$log_file"
9641
- echo "[loki] Gemini model: $model (fallback: $fallback), tier: $tier_param" >> "$agent_log"
9763
+ echo "[loki] Gemini model: $model (fallback: $fallback), tier: $CURRENT_TIER" >> "$log_file"
9764
+ echo "[loki] Gemini model: $model (fallback: $fallback), tier: $CURRENT_TIER" >> "$agent_log"
9765
+
9766
+ # BUG-PROV-003: Resolve API key (supports GEMINI_API_KEY alias and ADC)
9767
+ if type _gemini_resolve_api_key &>/dev/null; then
9768
+ _gemini_resolve_api_key || true
9769
+ fi
9642
9770
 
9643
- # Try primary model, fallback on rate limit
9644
- local tmp_output
9771
+ # Try primary model, fallback on rate limit or auth error
9772
+ local tmp_output tmp_stderr
9645
9773
  tmp_output=$(mktemp)
9774
+ tmp_stderr=$(mktemp)
9646
9775
  # BUG-RUN-011/RUN-013: Use PIPESTATUS[0] for primary invocation too
9647
- gemini --approval-mode=yolo --model "$model" "$prompt" < /dev/null 2>&1 | tee "$tmp_output" | tee -a "$log_file" "$agent_log" "$iter_output"
9776
+ gemini --approval-mode=yolo --model "$model" "$prompt" < /dev/null 2>"$tmp_stderr" | tee "$tmp_output" | tee -a "$log_file" "$agent_log" "$iter_output"
9648
9777
  exit_code=${PIPESTATUS[0]}
9649
9778
 
9650
- if [[ $exit_code -ne 0 ]] && grep -qiE "(rate.?limit|429|quota|resource.?exhausted)" "$tmp_output"; then
9779
+ # BUG-PROV-003: Handle auth errors with API key rotation
9780
+ if [[ $exit_code -ne 0 ]] && grep -qiE "(401|403|unauthorized|forbidden|invalid.?api.?key|permission.?denied)" "$tmp_stderr" 2>/dev/null; then
9781
+ if type _gemini_rotate_api_key &>/dev/null && _gemini_rotate_api_key; then
9782
+ log_warn "Auth error on Gemini, rotated to next API key"
9783
+ rm -f "$tmp_output" "$tmp_stderr"
9784
+ tmp_output=$(mktemp)
9785
+ tmp_stderr=$(mktemp)
9786
+ gemini --approval-mode=yolo --model "$model" "$prompt" < /dev/null 2>"$tmp_stderr" | tee "$tmp_output" | tee -a "$log_file" "$agent_log" "$iter_output"
9787
+ exit_code=${PIPESTATUS[0]}
9788
+ fi
9789
+ fi
9790
+
9791
+ if [[ $exit_code -ne 0 ]] && grep -qiE "(rate.?limit|429|quota|resource.?exhausted)" "$tmp_stderr" "$tmp_output" 2>/dev/null; then
9651
9792
  log_warn "Rate limit hit on $model, falling back to $fallback"
9652
9793
  echo "[loki] Fallback to $fallback due to rate limit" >> "$log_file"
9653
9794
  gemini --approval-mode=yolo --model "$fallback" "$prompt" < /dev/null 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"
9654
9795
  exit_code=${PIPESTATUS[0]}
9655
9796
  fi
9656
- rm -f "$tmp_output"
9797
+ rm -f "$tmp_output" "$tmp_stderr"
9657
9798
  ;;
9658
9799
 
9659
9800
  cline)
@@ -9688,6 +9829,13 @@ if __name__ == "__main__":
9688
9829
  local duration=$((end_time - start_time))
9689
9830
 
9690
9831
  log_info "${PROVIDER_DISPLAY_NAME:-Claude} exited with code $exit_code after ${duration}s"
9832
+
9833
+ # BUG-EC-013: Detect empty provider output (0 bytes = no work done)
9834
+ if [ -f "$iter_output" ] && [ ! -s "$iter_output" ] && [ $exit_code -eq 0 ]; then
9835
+ log_warn "Provider returned empty output (0 bytes) despite exit code 0 -- treating as error"
9836
+ exit_code=1
9837
+ fi
9838
+
9691
9839
  save_state $retry "exited" $exit_code
9692
9840
 
9693
9841
  # Auto-track iteration completion (for dashboard task queue)
@@ -9775,6 +9923,16 @@ if __name__ == "__main__":
9775
9923
  log_warn "Static analysis FAILED ($sa_count consecutive) - findings injected into next iteration"
9776
9924
  fi
9777
9925
  fi
9926
+ # BUG-ST-002: Check pause signal between quality gates
9927
+ if [ -f "${TARGET_DIR:-.}/.loki/PAUSE" ] || [ -f "${TARGET_DIR:-.}/.loki/STOP" ]; then
9928
+ log_warn "Pause/stop signal detected between quality gates - deferring remaining gates"
9929
+ # Store partial gate failures before breaking out
9930
+ if [ -n "$gate_failures" ]; then
9931
+ echo "$gate_failures" > "${TARGET_DIR:-.}/.loki/quality/gate-failures.txt"
9932
+ fi
9933
+ # Let the main loop handle the pause/stop on next iteration
9934
+ continue
9935
+ fi
9778
9936
  # Test coverage gate
9779
9937
  if [ "${PHASE_UNIT_TESTS:-true}" = "true" ]; then
9780
9938
  log_info "Quality gate: test coverage..."
@@ -9787,6 +9945,14 @@ if __name__ == "__main__":
9787
9945
  log_warn "Test coverage gate FAILED ($tc_count consecutive) - must pass next iteration"
9788
9946
  fi
9789
9947
  fi
9948
+ # BUG-ST-002: Check pause signal between quality gates (after test coverage)
9949
+ if [ -f "${TARGET_DIR:-.}/.loki/PAUSE" ] || [ -f "${TARGET_DIR:-.}/.loki/STOP" ]; then
9950
+ log_warn "Pause/stop signal detected between quality gates - deferring remaining gates"
9951
+ if [ -n "$gate_failures" ]; then
9952
+ echo "$gate_failures" > "${TARGET_DIR:-.}/.loki/quality/gate-failures.txt"
9953
+ fi
9954
+ continue
9955
+ fi
9790
9956
  # Code review gate (upgraded from advisory, with escalation)
9791
9957
  if [ "$PHASE_CODE_REVIEW" = "true" ] && [ "$ITERATION_COUNT" -gt 0 ]; then
9792
9958
  log_info "Quality gate: code review..."
@@ -9850,6 +10016,8 @@ if __name__ == "__main__":
9850
10016
  log_info "Perpetual mode: Ignoring exit, continuing immediately..."
9851
10017
  # BUG-RUN-010: Reset retry counter on success (only count failures)
9852
10018
  retry=0
10019
+ # BUG-NEW-003/E2E-005: Clean up per-iteration output before continuing
10020
+ rm -f "$iter_output" 2>/dev/null
9853
10021
  continue # Immediately start next iteration, no wait
9854
10022
  fi
9855
10023
 
@@ -9896,6 +10064,8 @@ if __name__ == "__main__":
9896
10064
  log_step "Starting next iteration..."
9897
10065
  # BUG-RUN-010: Reset retry counter on success (only count failures)
9898
10066
  retry=0
10067
+ # BUG-NEW-003/E2E-005: Clean up per-iteration output before continuing
10068
+ rm -f "$iter_output" 2>/dev/null
9899
10069
  continue # Immediately start next iteration, no exponential backoff
9900
10070
  fi
9901
10071
 
@@ -10109,9 +10279,18 @@ check_human_intervention() {
10109
10279
 
10110
10280
  # Handle pause state - wait for resume
10111
10281
  handle_pause() {
10282
+ # BUG-ST-007: Guard against concurrent pause handler execution
10283
+ if [ "${_PAUSE_IN_PROGRESS:-0}" -eq 1 ]; then
10284
+ return 0
10285
+ fi
10286
+ _PAUSE_IN_PROGRESS=1
10287
+
10112
10288
  PAUSED=true
10113
10289
  local loki_dir="${TARGET_DIR:-.}/.loki"
10114
10290
 
10291
+ # Save state before pausing so it persists across potential crashes
10292
+ save_state ${RETRY_COUNT:-0} "paused" 0
10293
+
10115
10294
  log_header "Execution Paused"
10116
10295
  echo ""
10117
10296
  log_info "To resume: Remove .loki/PAUSE or press Enter"
@@ -10141,6 +10320,7 @@ EOF
10141
10320
  if [ -f "$loki_dir/STOP" ]; then
10142
10321
  rm -f "$loki_dir/STOP" "$loki_dir/PAUSED.md"
10143
10322
  PAUSED=false
10323
+ _PAUSE_IN_PROGRESS=0
10144
10324
  return 1
10145
10325
  fi
10146
10326
 
@@ -10163,6 +10343,7 @@ EOF
10163
10343
  rm -f "$loki_dir/PAUSED.md"
10164
10344
  log_info "Resuming execution..."
10165
10345
  PAUSED=false
10346
+ _PAUSE_IN_PROGRESS=0
10166
10347
  return 0
10167
10348
  }
10168
10349
 
@@ -7,7 +7,7 @@ Modules:
7
7
  control: Session control API (start/stop/pause/resume)
8
8
  """
9
9
 
10
- __version__ = "6.71.1"
10
+ __version__ = "6.72.0"
11
11
 
12
12
  # Expose the control app for easy import
13
13
  try: