loki-mode 6.71.1 → 6.72.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -1
- package/SKILL.md +2 -2
- package/VERSION +1 -1
- package/autonomy/hooks/migration-hooks.sh +26 -0
- package/autonomy/loki +429 -92
- package/autonomy/run.sh +219 -38
- package/dashboard/__init__.py +1 -1
- package/dashboard/server.py +101 -19
- package/docs/INSTALLATION.md +20 -11
- package/docs/bug-fixes/agent-01-cli-fixes.md +101 -0
- package/docs/bug-fixes/agent-02-purplelab-fixes.md +88 -0
- package/docs/bug-fixes/agent-03-dashboard-fixes.md +119 -0
- package/docs/bug-fixes/agent-04-memory-fixes.md +105 -0
- package/docs/bug-fixes/agent-05-provider-fixes.md +86 -0
- package/docs/bug-fixes/agent-06-integration-fixes.md +101 -0
- package/docs/bug-fixes/agent-07-dash-run-fixes.md +101 -0
- package/docs/bug-fixes/agent-08-docker-fixes.md +164 -0
- package/docs/bug-fixes/agent-09-e2e-build-fixes.md +69 -0
- package/docs/bug-fixes/agent-10-e2e-fullstack-fixes.md +102 -0
- package/docs/bug-fixes/agent-11-e2e-session-fixes.md +70 -0
- package/docs/bug-fixes/agent-12-scenario-fixes.md +120 -0
- package/docs/bug-fixes/agent-13-enterprise-fixes.md +143 -0
- package/docs/bug-fixes/agent-14-uat-newuser-fixes.md +88 -0
- package/docs/bug-fixes/agent-15-uat-poweruser-fixes.md +132 -0
- package/docs/bug-fixes/agent-19-code-review.md +316 -0
- package/docs/bug-fixes/agent-20-architecture-review.md +331 -0
- package/docs/competitive/bolt-new-analysis.md +579 -0
- package/docs/competitive/emergence-others-analysis.md +605 -0
- package/docs/competitive/replit-lovable-analysis.md +622 -0
- package/docs/test-scenarios/edge-cases.md +813 -0
- package/docs/test-scenarios/enterprise-scenarios.md +732 -0
- package/mcp/__init__.py +1 -1
- package/mcp/server.py +49 -5
- package/memory/consolidation.py +33 -0
- package/memory/embeddings.py +10 -1
- package/memory/engine.py +83 -38
- package/memory/retrieval.py +36 -0
- package/memory/storage.py +56 -4
- package/memory/token_economics.py +14 -2
- package/memory/vector_index.py +36 -7
- package/package.json +1 -1
- package/providers/gemini.sh +89 -2
- package/templates/README.md +1 -1
- package/templates/cli-tool.md +30 -0
- package/templates/dashboard.md +4 -0
- package/templates/data-pipeline.md +4 -0
- package/templates/discord-bot.md +47 -0
- package/templates/game.md +4 -0
- package/templates/microservice.md +4 -0
- package/templates/npm-library.md +4 -0
- package/templates/rest-api-auth.md +50 -20
- package/templates/rest-api.md +15 -0
- package/templates/saas-starter.md +1 -1
- package/templates/slack-bot.md +36 -0
- package/templates/static-landing-page.md +9 -1
- package/templates/web-scraper.md +4 -0
- package/web-app/dist/assets/Badge-CeBkFjo6.js +1 -0
- package/web-app/dist/assets/Button-yuhqo8Fq.js +1 -0
- package/web-app/dist/assets/{Card-B1bV4syB.js → Card-BG17vsX0.js} +1 -1
- package/web-app/dist/assets/{HomePage-CZTV6Nea.js → HomePage-BMSQ7Apj.js} +3 -3
- package/web-app/dist/assets/{LoginPage-D4UdURJc.js → LoginPage-aH_6iolg.js} +1 -1
- package/web-app/dist/assets/{NotFoundPage-CCLSeL6j.js → NotFoundPage-Di8cNtB1.js} +1 -1
- package/web-app/dist/assets/ProjectPage-BtRssmw9.js +285 -0
- package/web-app/dist/assets/ProjectsPage-B-FTFagc.js +6 -0
- package/web-app/dist/assets/{SettingsPage-Xuv8EfAg.js → SettingsPage-DIJPBla4.js} +1 -1
- package/web-app/dist/assets/TeamsPage--19fNX7w.js +36 -0
- package/web-app/dist/assets/TemplatesPage-ChUQNOOv.js +11 -0
- package/web-app/dist/assets/TerminalOutput-Dwrzecyl.js +31 -0
- package/web-app/dist/assets/activity-BNRWeu9N.js +6 -0
- package/web-app/dist/assets/{arrow-left-CaGtolHc.js → arrow-left-Ce6g1_YE.js} +1 -1
- package/web-app/dist/assets/circle-alert-LIndawHL.js +11 -0
- package/web-app/dist/assets/clock-Bpj4VPlP.js +6 -0
- package/web-app/dist/assets/{external-link-CazyUyav.js → external-link-BhhdF0iQ.js} +1 -1
- package/web-app/dist/assets/folder-open-CM2LgfxI.js +11 -0
- package/web-app/dist/assets/index-8-KpWWq7.css +1 -0
- package/web-app/dist/assets/index-kPDW4e_b.js +236 -0
- package/web-app/dist/assets/lock-sAk3Xe54.js +16 -0
- package/web-app/dist/assets/search-CR-2i9by.js +6 -0
- package/web-app/dist/assets/server-DuFh4ymA.js +26 -0
- package/web-app/dist/assets/trash-2-BmkkT8V_.js +11 -0
- package/web-app/dist/index.html +2 -2
- package/web-app/server.py +1321 -53
- package/web-app/dist/assets/Badge-CBUx2PjL.js +0 -6
- package/web-app/dist/assets/Button-DsRiznlh.js +0 -21
- package/web-app/dist/assets/ProjectPage-D0w_X9tG.js +0 -237
- package/web-app/dist/assets/ProjectsPage-ByYxDlKC.js +0 -16
- package/web-app/dist/assets/TemplatesPage-BKWN07mc.js +0 -1
- package/web-app/dist/assets/TerminalOutput-Dj98V8Z-.js +0 -51
- package/web-app/dist/assets/clock-C_CDmobx.js +0 -11
- package/web-app/dist/assets/index-D452pFGl.css +0 -1
- package/web-app/dist/assets/index-Df4_kgLY.js +0 -196
package/autonomy/run.sh
CHANGED
|
@@ -947,8 +947,9 @@ emit_event_json() {
|
|
|
947
947
|
else
|
|
948
948
|
json_data+=","
|
|
949
949
|
fi
|
|
950
|
-
# Quote string values, leave numbers/booleans as-is
|
|
951
|
-
|
|
950
|
+
# Quote string values, leave numbers/booleans/floats as-is
|
|
951
|
+
# BUG-NEW-004: Also match floats (e.g., cost=3.14) not just integers
|
|
952
|
+
if [[ "$value" =~ ^[0-9]+\.?[0-9]*$ ]] || [[ "$value" =~ ^(true|false|null)$ ]]; then
|
|
952
953
|
json_data+="\"$key\":$value"
|
|
953
954
|
else
|
|
954
955
|
# Escape backslashes, quotes, and special chars in value
|
|
@@ -1364,6 +1365,9 @@ get_phase_names() {
|
|
|
1364
1365
|
|
|
1365
1366
|
# Global tier for current iteration (set by get_rarv_tier)
|
|
1366
1367
|
CURRENT_TIER="development"
|
|
1368
|
+
# Export for provider helper functions (e.g., gemini.sh:provider_get_current_model)
|
|
1369
|
+
LOKI_CURRENT_TIER="$CURRENT_TIER"
|
|
1370
|
+
export LOKI_CURRENT_TIER
|
|
1367
1371
|
|
|
1368
1372
|
# Get the appropriate tier based on RARV cycle step
|
|
1369
1373
|
# Args: iteration_count (defaults to ITERATION_COUNT)
|
|
@@ -2159,6 +2163,17 @@ create_worktree() {
|
|
|
2159
2163
|
return 0
|
|
2160
2164
|
else
|
|
2161
2165
|
log_error "Failed to create worktree: $stream_name"
|
|
2166
|
+
# BUG-PU-001: Clean up partial worktree on creation failure
|
|
2167
|
+
if [ -d "$worktree_path" ]; then
|
|
2168
|
+
git -C "$TARGET_DIR" worktree remove "$worktree_path" --force 2>/dev/null || \
|
|
2169
|
+
rm -rf "$worktree_path" 2>/dev/null || true
|
|
2170
|
+
fi
|
|
2171
|
+
# Clean up any orphaned branch created during the attempt
|
|
2172
|
+
if [ -n "$branch_name" ]; then
|
|
2173
|
+
git -C "$TARGET_DIR" branch -D "$branch_name" 2>/dev/null || true
|
|
2174
|
+
else
|
|
2175
|
+
git -C "$TARGET_DIR" branch -D "parallel-${stream_name}" 2>/dev/null || true
|
|
2176
|
+
fi
|
|
2162
2177
|
return 1
|
|
2163
2178
|
fi
|
|
2164
2179
|
}
|
|
@@ -3026,7 +3041,15 @@ invoke_gemini() {
|
|
|
3026
3041
|
local prompt="$1"
|
|
3027
3042
|
shift
|
|
3028
3043
|
|
|
3029
|
-
|
|
3044
|
+
# BUG-PROV-001/006 fix: Use dynamic model resolution instead of frozen PROVIDER_MODEL.
|
|
3045
|
+
# provider_get_current_model() resolves based on LOKI_CURRENT_TIER at runtime.
|
|
3046
|
+
# Falls back to provider_get_tier_param if available, then to GEMINI_DEFAULT_PRO.
|
|
3047
|
+
local model
|
|
3048
|
+
if type provider_get_current_model &>/dev/null; then
|
|
3049
|
+
model=$(provider_get_current_model)
|
|
3050
|
+
else
|
|
3051
|
+
model="${GEMINI_DEFAULT_PRO:-gemini-3-pro-preview}"
|
|
3052
|
+
fi
|
|
3030
3053
|
local fallback="${PROVIDER_MODEL_FALLBACK:-${GEMINI_DEFAULT_FLASH:-gemini-3-flash-preview}}"
|
|
3031
3054
|
|
|
3032
3055
|
# Create temp file for output to preserve streaming while checking for rate limit
|
|
@@ -3057,7 +3080,13 @@ invoke_gemini_capture() {
|
|
|
3057
3080
|
local prompt="$1"
|
|
3058
3081
|
shift
|
|
3059
3082
|
|
|
3060
|
-
|
|
3083
|
+
# BUG-PROV-001/006 fix: Use dynamic model resolution instead of frozen PROVIDER_MODEL
|
|
3084
|
+
local model
|
|
3085
|
+
if type provider_get_current_model &>/dev/null; then
|
|
3086
|
+
model=$(provider_get_current_model)
|
|
3087
|
+
else
|
|
3088
|
+
model="${GEMINI_DEFAULT_PRO:-gemini-3-pro-preview}"
|
|
3089
|
+
fi
|
|
3061
3090
|
local fallback="${PROVIDER_MODEL_FALLBACK:-${GEMINI_DEFAULT_FLASH:-gemini-3-flash-preview}}"
|
|
3062
3091
|
local output
|
|
3063
3092
|
|
|
@@ -3249,15 +3278,21 @@ set_phase() {
|
|
|
3249
3278
|
|
|
3250
3279
|
log_info "Phase changed: $current_phase -> $new_phase"
|
|
3251
3280
|
|
|
3252
|
-
# Update orchestrator state
|
|
3281
|
+
# Update orchestrator state (atomic via temp file + mv)
|
|
3282
|
+
# BUG ARCH-001 fix: prevent state corruption if process is killed mid-write
|
|
3253
3283
|
if [ -f "$orch_file" ]; then
|
|
3254
3284
|
python3 -c "
|
|
3255
|
-
import json, sys
|
|
3256
|
-
|
|
3285
|
+
import json, sys, os, tempfile
|
|
3286
|
+
orch_file = sys.argv[1]
|
|
3287
|
+
new_phase = sys.argv[2]
|
|
3288
|
+
with open(orch_file, 'r') as f:
|
|
3257
3289
|
data = json.load(f)
|
|
3258
|
-
data['currentPhase'] =
|
|
3259
|
-
|
|
3290
|
+
data['currentPhase'] = new_phase
|
|
3291
|
+
orch_dir = os.path.dirname(orch_file)
|
|
3292
|
+
fd, tmp = tempfile.mkstemp(dir=orch_dir, suffix='.json')
|
|
3293
|
+
with os.fdopen(fd, 'w') as f:
|
|
3260
3294
|
json.dump(data, f, indent=2)
|
|
3295
|
+
os.replace(tmp, orch_file)
|
|
3261
3296
|
" "$orch_file" "$new_phase" 2>/dev/null || true
|
|
3262
3297
|
fi
|
|
3263
3298
|
fi
|
|
@@ -3627,8 +3662,10 @@ print(json.dumps(data, indent=2))
|
|
|
3627
3662
|
fi
|
|
3628
3663
|
) 200>"$lockfile"
|
|
3629
3664
|
|
|
3630
|
-
#
|
|
3631
|
-
|
|
3665
|
+
# BUG-ST-014: Atomic current-task.json update via temp file + mv
|
|
3666
|
+
local ct_tmp=".loki/queue/current-task.json.tmp.$$"
|
|
3667
|
+
echo "$task_json" > "$ct_tmp"
|
|
3668
|
+
mv -f "$ct_tmp" .loki/queue/current-task.json
|
|
3632
3669
|
}
|
|
3633
3670
|
|
|
3634
3671
|
# Track iteration completion - move task to completed queue
|
|
@@ -3811,8 +3848,10 @@ except:
|
|
|
3811
3848
|
" 2>/dev/null || true
|
|
3812
3849
|
fi
|
|
3813
3850
|
|
|
3814
|
-
#
|
|
3815
|
-
|
|
3851
|
+
# BUG-ST-014: Atomic current-task.json clear via temp file + mv
|
|
3852
|
+
local ct_tmp=".loki/queue/current-task.json.tmp.$$"
|
|
3853
|
+
echo "{}" > "$ct_tmp"
|
|
3854
|
+
mv -f "$ct_tmp" .loki/queue/current-task.json
|
|
3816
3855
|
|
|
3817
3856
|
# Write-back completed BMAD stories to source artifacts (v6.29.0)
|
|
3818
3857
|
if [ "$exit_code" = "0" ]; then
|
|
@@ -5526,20 +5565,22 @@ enforce_test_coverage() {
|
|
|
5526
5565
|
|
|
5527
5566
|
# JavaScript/TypeScript
|
|
5528
5567
|
if [ -f "${TARGET_DIR:-.}/package.json" ]; then
|
|
5568
|
+
# BUG-EC-014: Wrap test runners with timeout to prevent hanging indefinitely
|
|
5569
|
+
local gate_timeout="${LOKI_GATE_TIMEOUT:-300}" # 5 minutes default
|
|
5529
5570
|
if grep -q '"vitest"' "${TARGET_DIR:-.}/package.json" 2>/dev/null; then
|
|
5530
5571
|
test_runner="vitest"
|
|
5531
5572
|
local output
|
|
5532
|
-
output=$(cd "${TARGET_DIR:-.}" && npx vitest run --reporter=json 2>&1) || test_passed=false
|
|
5573
|
+
output=$(cd "${TARGET_DIR:-.}" && timeout "$gate_timeout" npx vitest run --reporter=json 2>&1) || test_passed=false
|
|
5533
5574
|
details="vitest: $(echo "$output" | tail -3 | tr '\n' ' ')"
|
|
5534
5575
|
elif grep -q '"jest"' "${TARGET_DIR:-.}/package.json" 2>/dev/null; then
|
|
5535
5576
|
test_runner="jest"
|
|
5536
5577
|
local output
|
|
5537
|
-
output=$(cd "${TARGET_DIR:-.}" && npx jest --passWithNoTests --forceExit 2>&1) || test_passed=false
|
|
5578
|
+
output=$(cd "${TARGET_DIR:-.}" && timeout "$gate_timeout" npx jest --passWithNoTests --forceExit 2>&1) || test_passed=false
|
|
5538
5579
|
details="jest: $(echo "$output" | tail -3 | tr '\n' ' ')"
|
|
5539
5580
|
elif grep -q '"mocha"' "${TARGET_DIR:-.}/package.json" 2>/dev/null; then
|
|
5540
5581
|
test_runner="mocha"
|
|
5541
5582
|
local output
|
|
5542
|
-
output=$(cd "${TARGET_DIR:-.}" && npx mocha 2>&1) || test_passed=false
|
|
5583
|
+
output=$(cd "${TARGET_DIR:-.}" && timeout "$gate_timeout" npx mocha 2>&1) || test_passed=false
|
|
5543
5584
|
details="mocha: $(echo "$output" | tail -3 | tr '\n' ' ')"
|
|
5544
5585
|
fi
|
|
5545
5586
|
fi
|
|
@@ -6859,16 +6900,32 @@ except Exception:
|
|
|
6859
6900
|
PYEOF
|
|
6860
6901
|
}
|
|
6861
6902
|
|
|
6862
|
-
# Check provider health:
|
|
6903
|
+
# Check provider health: CLI installed + authentication available
|
|
6863
6904
|
# Returns: 0 if healthy, 1 if unhealthy
|
|
6905
|
+
# BUG-PROV-003 fix: Claude Code supports OAuth sessions in addition to API keys.
|
|
6906
|
+
# Checking only for ANTHROPIC_API_KEY incorrectly marks OAuth users as unhealthy,
|
|
6907
|
+
# causing unnecessary failover to degraded providers. Now also checks for OAuth
|
|
6908
|
+
# session files and `claude auth status` as fallback.
|
|
6864
6909
|
check_provider_health() {
|
|
6865
6910
|
local provider="$1"
|
|
6866
6911
|
|
|
6867
|
-
# Check CLI is installed
|
|
6912
|
+
# Check CLI is installed and authentication is available
|
|
6868
6913
|
case "$provider" in
|
|
6869
6914
|
claude)
|
|
6870
6915
|
command -v claude &>/dev/null || return 1
|
|
6871
|
-
|
|
6916
|
+
# Accept API key OR OAuth session (Claude Code supports both)
|
|
6917
|
+
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
|
|
6918
|
+
return 0
|
|
6919
|
+
fi
|
|
6920
|
+
# Check for OAuth session files (~/.claude/ stores sessions)
|
|
6921
|
+
if [ -d "${HOME}/.claude" ] && [ -f "${HOME}/.claude/.credentials.json" ]; then
|
|
6922
|
+
return 0
|
|
6923
|
+
fi
|
|
6924
|
+
# Last resort: ask the CLI if it has valid auth
|
|
6925
|
+
if claude auth status &>/dev/null 2>&1; then
|
|
6926
|
+
return 0
|
|
6927
|
+
fi
|
|
6928
|
+
return 1
|
|
6872
6929
|
;;
|
|
6873
6930
|
codex)
|
|
6874
6931
|
command -v codex &>/dev/null || return 1
|
|
@@ -6876,7 +6933,15 @@ check_provider_health() {
|
|
|
6876
6933
|
;;
|
|
6877
6934
|
gemini)
|
|
6878
6935
|
command -v gemini &>/dev/null || return 1
|
|
6879
|
-
|
|
6936
|
+
# BUG-PROV-003: Also accept GEMINI_API_KEY and gcloud ADC
|
|
6937
|
+
if [ -n "${GOOGLE_API_KEY:-}" ] || [ -n "${GEMINI_API_KEY:-}" ]; then
|
|
6938
|
+
return 0
|
|
6939
|
+
fi
|
|
6940
|
+
# Check for gcloud Application Default Credentials
|
|
6941
|
+
if [ -f "${HOME}/.config/gcloud/application_default_credentials.json" ]; then
|
|
6942
|
+
return 0
|
|
6943
|
+
fi
|
|
6944
|
+
return 1
|
|
6880
6945
|
;;
|
|
6881
6946
|
cline)
|
|
6882
6947
|
command -v cline &>/dev/null || return 1
|
|
@@ -6944,7 +7009,13 @@ attempt_provider_failover() {
|
|
|
6944
7009
|
update_failover_health "$provider" "healthy"
|
|
6945
7010
|
|
|
6946
7011
|
# Update runtime provider vars
|
|
7012
|
+
# BUG-PROV-008 fix: Update BOTH PROVIDER_NAME and LOKI_PROVIDER.
|
|
7013
|
+
# Without this, subprocesses and the MCP server (which read LOKI_PROVIDER)
|
|
7014
|
+
# continue using the old provider name, causing provider-specific behavior
|
|
7015
|
+
# in child processes to use the wrong config.
|
|
6947
7016
|
PROVIDER_NAME="$provider"
|
|
7017
|
+
LOKI_PROVIDER="$provider"
|
|
7018
|
+
export LOKI_PROVIDER
|
|
6948
7019
|
|
|
6949
7020
|
emit_event_json "provider_failover" \
|
|
6950
7021
|
"from=$current" \
|
|
@@ -6996,7 +7067,10 @@ check_primary_recovery() {
|
|
|
6996
7067
|
update_failover_state "currentProvider" "$primary"
|
|
6997
7068
|
update_failover_health "$primary" "healthy"
|
|
6998
7069
|
|
|
7070
|
+
# BUG-PROV-008 fix: Update BOTH PROVIDER_NAME and LOKI_PROVIDER on recovery
|
|
6999
7071
|
PROVIDER_NAME="$primary"
|
|
7072
|
+
LOKI_PROVIDER="$primary"
|
|
7073
|
+
export LOKI_PROVIDER
|
|
7000
7074
|
|
|
7001
7075
|
emit_event_json "provider_recovery" \
|
|
7002
7076
|
"from=$current" \
|
|
@@ -7935,6 +8009,9 @@ save_state() {
|
|
|
7935
8009
|
local status="$2"
|
|
7936
8010
|
local exit_code="$3"
|
|
7937
8011
|
|
|
8012
|
+
# BUG-ST-013: Ensure .loki directory exists (defensive -- may be called from signal handler)
|
|
8013
|
+
mkdir -p .loki 2>/dev/null || true
|
|
8014
|
+
|
|
7938
8015
|
# BUG-XC-004: Atomic write via temp file + mv
|
|
7939
8016
|
local state_tmp=".loki/autonomy-state.json.tmp.$$"
|
|
7940
8017
|
cat > "$state_tmp" << EOF
|
|
@@ -7954,8 +8031,44 @@ EOF
|
|
|
7954
8031
|
}
|
|
7955
8032
|
|
|
7956
8033
|
load_state() {
|
|
8034
|
+
# BUG-EP-015: Clean up orphaned temp files from kill -9 crashes
|
|
8035
|
+
# These are left behind when the process is killed during atomic writes
|
|
8036
|
+
find .loki/ -maxdepth 1 -name "*.tmp.*" -mmin +5 -delete 2>/dev/null || true
|
|
8037
|
+
find .loki/state/ -name "*.tmp.*" -mmin +5 -delete 2>/dev/null || true
|
|
8038
|
+
|
|
7957
8039
|
if [ -f ".loki/autonomy-state.json" ]; then
|
|
7958
8040
|
if command -v python3 &> /dev/null; then
|
|
8041
|
+
# BUG-ST-006: Validate checkpoint integrity before loading state
|
|
8042
|
+
local state_valid
|
|
8043
|
+
state_valid=$(python3 -c "
|
|
8044
|
+
import json, sys
|
|
8045
|
+
try:
|
|
8046
|
+
with open('.loki/autonomy-state.json') as f:
|
|
8047
|
+
d = json.load(f)
|
|
8048
|
+
# Validate required fields exist and have sane types
|
|
8049
|
+
rc = d.get('retryCount', 0)
|
|
8050
|
+
ic = d.get('iterationCount', 0)
|
|
8051
|
+
status = d.get('status', 'unknown')
|
|
8052
|
+
if not isinstance(rc, (int, float)) or not isinstance(ic, (int, float)):
|
|
8053
|
+
print('invalid')
|
|
8054
|
+
sys.exit(0)
|
|
8055
|
+
if rc < 0 or ic < 0:
|
|
8056
|
+
print('invalid')
|
|
8057
|
+
sys.exit(0)
|
|
8058
|
+
print('valid')
|
|
8059
|
+
except (json.JSONDecodeError, KeyError, TypeError, OSError):
|
|
8060
|
+
print('invalid')
|
|
8061
|
+
" 2>/dev/null || echo "invalid")
|
|
8062
|
+
|
|
8063
|
+
if [ "$state_valid" != "valid" ]; then
|
|
8064
|
+
log_warn "State file corrupted or invalid - starting fresh"
|
|
8065
|
+
RETRY_COUNT=0
|
|
8066
|
+
ITERATION_COUNT=0
|
|
8067
|
+
# Back up corrupted state file for diagnosis
|
|
8068
|
+
mv ".loki/autonomy-state.json" ".loki/autonomy-state.json.corrupt.$(date +%s)" 2>/dev/null || true
|
|
8069
|
+
return
|
|
8070
|
+
fi
|
|
8071
|
+
|
|
7959
8072
|
# Load retry count, iteration count, and status from previous session
|
|
7960
8073
|
local prev_status
|
|
7961
8074
|
prev_status=$(python3 -c "import json; print(json.load(open('.loki/autonomy-state.json')).get('status', 'unknown'))" 2>/dev/null || echo "unknown")
|
|
@@ -9280,16 +9393,9 @@ run_autonomous() {
|
|
|
9280
9393
|
fi
|
|
9281
9394
|
|
|
9282
9395
|
while [ $retry -lt $MAX_RETRIES ]; do
|
|
9283
|
-
#
|
|
9284
|
-
|
|
9285
|
-
|
|
9286
|
-
# Check max iterations
|
|
9287
|
-
if check_max_iterations; then
|
|
9288
|
-
save_state $retry "max_iterations_reached" 0
|
|
9289
|
-
return 0
|
|
9290
|
-
fi
|
|
9291
|
-
|
|
9292
|
-
# Check for human intervention (PAUSE, HUMAN_INPUT.md, STOP)
|
|
9396
|
+
# Check for human intervention BEFORE incrementing iteration count
|
|
9397
|
+
# BUG-ST-010: Moved pause/stop checks before ITERATION_COUNT increment
|
|
9398
|
+
# to prevent spurious count increases when resuming from pause
|
|
9293
9399
|
check_human_intervention
|
|
9294
9400
|
local intervention_result=$?
|
|
9295
9401
|
case $intervention_result in
|
|
@@ -9304,6 +9410,15 @@ run_autonomous() {
|
|
|
9304
9410
|
continue # Will hit PAUSE check on next iteration
|
|
9305
9411
|
fi
|
|
9306
9412
|
|
|
9413
|
+
# Increment iteration count (after pause/stop checks to avoid spurious increments)
|
|
9414
|
+
((ITERATION_COUNT++))
|
|
9415
|
+
|
|
9416
|
+
# Check max iterations
|
|
9417
|
+
if check_max_iterations; then
|
|
9418
|
+
save_state $retry "max_iterations_reached" 0
|
|
9419
|
+
return 0
|
|
9420
|
+
fi
|
|
9421
|
+
|
|
9307
9422
|
# Watchdog: periodic process health check (opt-in via LOKI_WATCHDOG=true)
|
|
9308
9423
|
if [[ "$WATCHDOG_ENABLED" == "true" ]]; then
|
|
9309
9424
|
local now_epoch
|
|
@@ -9375,6 +9490,11 @@ run_autonomous() {
|
|
|
9375
9490
|
|
|
9376
9491
|
# Dynamic tier selection based on RARV cycle phase
|
|
9377
9492
|
CURRENT_TIER=$(get_rarv_tier "$ITERATION_COUNT")
|
|
9493
|
+
# NEW BUG FIX: Export LOKI_CURRENT_TIER so provider helper functions
|
|
9494
|
+
# (e.g., gemini.sh:provider_get_current_model) can resolve the correct model.
|
|
9495
|
+
# Without this, LOKI_CURRENT_TIER is always empty and defaults to "planning".
|
|
9496
|
+
LOKI_CURRENT_TIER="$CURRENT_TIER"
|
|
9497
|
+
export LOKI_CURRENT_TIER
|
|
9378
9498
|
local rarv_phase=$(get_rarv_phase_name "$ITERATION_COUNT")
|
|
9379
9499
|
local tier_param=$(get_provider_tier_param "$CURRENT_TIER")
|
|
9380
9500
|
echo "=== RARV Phase: $rarv_phase, Tier: $CURRENT_TIER ($tier_param) ===" | tee -a "$log_file" "$agent_log"
|
|
@@ -9635,25 +9755,46 @@ if __name__ == "__main__":
|
|
|
9635
9755
|
gemini)
|
|
9636
9756
|
# Gemini: Degraded mode - no stream-json, no agent tracking
|
|
9637
9757
|
# Uses invoke_gemini helper for rate limit fallback to flash model
|
|
9638
|
-
|
|
9758
|
+
# BUG-PROV-001 fix: Use tier_param (resolved model) instead of frozen PROVIDER_MODEL
|
|
9759
|
+
# tier_param is computed above via get_provider_tier_param() -> resolve_model_for_tier()
|
|
9760
|
+
# which returns the correct model name for the current RARV tier
|
|
9761
|
+
local model="$tier_param"
|
|
9639
9762
|
local fallback="${PROVIDER_MODEL_FALLBACK:-${GEMINI_DEFAULT_FLASH:-gemini-3-flash-preview}}"
|
|
9640
|
-
echo "[loki] Gemini model: $model (fallback: $fallback), tier: $
|
|
9641
|
-
echo "[loki] Gemini model: $model (fallback: $fallback), tier: $
|
|
9763
|
+
echo "[loki] Gemini model: $model (fallback: $fallback), tier: $CURRENT_TIER" >> "$log_file"
|
|
9764
|
+
echo "[loki] Gemini model: $model (fallback: $fallback), tier: $CURRENT_TIER" >> "$agent_log"
|
|
9765
|
+
|
|
9766
|
+
# BUG-PROV-003: Resolve API key (supports GEMINI_API_KEY alias and ADC)
|
|
9767
|
+
if type _gemini_resolve_api_key &>/dev/null; then
|
|
9768
|
+
_gemini_resolve_api_key || true
|
|
9769
|
+
fi
|
|
9642
9770
|
|
|
9643
|
-
# Try primary model, fallback on rate limit
|
|
9644
|
-
local tmp_output
|
|
9771
|
+
# Try primary model, fallback on rate limit or auth error
|
|
9772
|
+
local tmp_output tmp_stderr
|
|
9645
9773
|
tmp_output=$(mktemp)
|
|
9774
|
+
tmp_stderr=$(mktemp)
|
|
9646
9775
|
# BUG-RUN-011/RUN-013: Use PIPESTATUS[0] for primary invocation too
|
|
9647
|
-
gemini --approval-mode=yolo --model "$model" "$prompt" < /dev/null 2
|
|
9776
|
+
gemini --approval-mode=yolo --model "$model" "$prompt" < /dev/null 2>"$tmp_stderr" | tee "$tmp_output" | tee -a "$log_file" "$agent_log" "$iter_output"
|
|
9648
9777
|
exit_code=${PIPESTATUS[0]}
|
|
9649
9778
|
|
|
9650
|
-
|
|
9779
|
+
# BUG-PROV-003: Handle auth errors with API key rotation
|
|
9780
|
+
if [[ $exit_code -ne 0 ]] && grep -qiE "(401|403|unauthorized|forbidden|invalid.?api.?key|permission.?denied)" "$tmp_stderr" 2>/dev/null; then
|
|
9781
|
+
if type _gemini_rotate_api_key &>/dev/null && _gemini_rotate_api_key; then
|
|
9782
|
+
log_warn "Auth error on Gemini, rotated to next API key"
|
|
9783
|
+
rm -f "$tmp_output" "$tmp_stderr"
|
|
9784
|
+
tmp_output=$(mktemp)
|
|
9785
|
+
tmp_stderr=$(mktemp)
|
|
9786
|
+
gemini --approval-mode=yolo --model "$model" "$prompt" < /dev/null 2>"$tmp_stderr" | tee "$tmp_output" | tee -a "$log_file" "$agent_log" "$iter_output"
|
|
9787
|
+
exit_code=${PIPESTATUS[0]}
|
|
9788
|
+
fi
|
|
9789
|
+
fi
|
|
9790
|
+
|
|
9791
|
+
if [[ $exit_code -ne 0 ]] && grep -qiE "(rate.?limit|429|quota|resource.?exhausted)" "$tmp_stderr" "$tmp_output" 2>/dev/null; then
|
|
9651
9792
|
log_warn "Rate limit hit on $model, falling back to $fallback"
|
|
9652
9793
|
echo "[loki] Fallback to $fallback due to rate limit" >> "$log_file"
|
|
9653
9794
|
gemini --approval-mode=yolo --model "$fallback" "$prompt" < /dev/null 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"
|
|
9654
9795
|
exit_code=${PIPESTATUS[0]}
|
|
9655
9796
|
fi
|
|
9656
|
-
rm -f "$tmp_output"
|
|
9797
|
+
rm -f "$tmp_output" "$tmp_stderr"
|
|
9657
9798
|
;;
|
|
9658
9799
|
|
|
9659
9800
|
cline)
|
|
@@ -9688,6 +9829,13 @@ if __name__ == "__main__":
|
|
|
9688
9829
|
local duration=$((end_time - start_time))
|
|
9689
9830
|
|
|
9690
9831
|
log_info "${PROVIDER_DISPLAY_NAME:-Claude} exited with code $exit_code after ${duration}s"
|
|
9832
|
+
|
|
9833
|
+
# BUG-EC-013: Detect empty provider output (0 bytes = no work done)
|
|
9834
|
+
if [ -f "$iter_output" ] && [ ! -s "$iter_output" ] && [ $exit_code -eq 0 ]; then
|
|
9835
|
+
log_warn "Provider returned empty output (0 bytes) despite exit code 0 -- treating as error"
|
|
9836
|
+
exit_code=1
|
|
9837
|
+
fi
|
|
9838
|
+
|
|
9691
9839
|
save_state $retry "exited" $exit_code
|
|
9692
9840
|
|
|
9693
9841
|
# Auto-track iteration completion (for dashboard task queue)
|
|
@@ -9775,6 +9923,16 @@ if __name__ == "__main__":
|
|
|
9775
9923
|
log_warn "Static analysis FAILED ($sa_count consecutive) - findings injected into next iteration"
|
|
9776
9924
|
fi
|
|
9777
9925
|
fi
|
|
9926
|
+
# BUG-ST-002: Check pause signal between quality gates
|
|
9927
|
+
if [ -f "${TARGET_DIR:-.}/.loki/PAUSE" ] || [ -f "${TARGET_DIR:-.}/.loki/STOP" ]; then
|
|
9928
|
+
log_warn "Pause/stop signal detected between quality gates - deferring remaining gates"
|
|
9929
|
+
# Store partial gate failures before breaking out
|
|
9930
|
+
if [ -n "$gate_failures" ]; then
|
|
9931
|
+
echo "$gate_failures" > "${TARGET_DIR:-.}/.loki/quality/gate-failures.txt"
|
|
9932
|
+
fi
|
|
9933
|
+
# Let the main loop handle the pause/stop on next iteration
|
|
9934
|
+
continue
|
|
9935
|
+
fi
|
|
9778
9936
|
# Test coverage gate
|
|
9779
9937
|
if [ "${PHASE_UNIT_TESTS:-true}" = "true" ]; then
|
|
9780
9938
|
log_info "Quality gate: test coverage..."
|
|
@@ -9787,6 +9945,14 @@ if __name__ == "__main__":
|
|
|
9787
9945
|
log_warn "Test coverage gate FAILED ($tc_count consecutive) - must pass next iteration"
|
|
9788
9946
|
fi
|
|
9789
9947
|
fi
|
|
9948
|
+
# BUG-ST-002: Check pause signal between quality gates (after test coverage)
|
|
9949
|
+
if [ -f "${TARGET_DIR:-.}/.loki/PAUSE" ] || [ -f "${TARGET_DIR:-.}/.loki/STOP" ]; then
|
|
9950
|
+
log_warn "Pause/stop signal detected between quality gates - deferring remaining gates"
|
|
9951
|
+
if [ -n "$gate_failures" ]; then
|
|
9952
|
+
echo "$gate_failures" > "${TARGET_DIR:-.}/.loki/quality/gate-failures.txt"
|
|
9953
|
+
fi
|
|
9954
|
+
continue
|
|
9955
|
+
fi
|
|
9790
9956
|
# Code review gate (upgraded from advisory, with escalation)
|
|
9791
9957
|
if [ "$PHASE_CODE_REVIEW" = "true" ] && [ "$ITERATION_COUNT" -gt 0 ]; then
|
|
9792
9958
|
log_info "Quality gate: code review..."
|
|
@@ -9850,6 +10016,8 @@ if __name__ == "__main__":
|
|
|
9850
10016
|
log_info "Perpetual mode: Ignoring exit, continuing immediately..."
|
|
9851
10017
|
# BUG-RUN-010: Reset retry counter on success (only count failures)
|
|
9852
10018
|
retry=0
|
|
10019
|
+
# BUG-NEW-003/E2E-005: Clean up per-iteration output before continuing
|
|
10020
|
+
rm -f "$iter_output" 2>/dev/null
|
|
9853
10021
|
continue # Immediately start next iteration, no wait
|
|
9854
10022
|
fi
|
|
9855
10023
|
|
|
@@ -9896,6 +10064,8 @@ if __name__ == "__main__":
|
|
|
9896
10064
|
log_step "Starting next iteration..."
|
|
9897
10065
|
# BUG-RUN-010: Reset retry counter on success (only count failures)
|
|
9898
10066
|
retry=0
|
|
10067
|
+
# BUG-NEW-003/E2E-005: Clean up per-iteration output before continuing
|
|
10068
|
+
rm -f "$iter_output" 2>/dev/null
|
|
9899
10069
|
continue # Immediately start next iteration, no exponential backoff
|
|
9900
10070
|
fi
|
|
9901
10071
|
|
|
@@ -10109,9 +10279,18 @@ check_human_intervention() {
|
|
|
10109
10279
|
|
|
10110
10280
|
# Handle pause state - wait for resume
|
|
10111
10281
|
handle_pause() {
|
|
10282
|
+
# BUG-ST-007: Guard against concurrent pause handler execution
|
|
10283
|
+
if [ "${_PAUSE_IN_PROGRESS:-0}" -eq 1 ]; then
|
|
10284
|
+
return 0
|
|
10285
|
+
fi
|
|
10286
|
+
_PAUSE_IN_PROGRESS=1
|
|
10287
|
+
|
|
10112
10288
|
PAUSED=true
|
|
10113
10289
|
local loki_dir="${TARGET_DIR:-.}/.loki"
|
|
10114
10290
|
|
|
10291
|
+
# Save state before pausing so it persists across potential crashes
|
|
10292
|
+
save_state ${RETRY_COUNT:-0} "paused" 0
|
|
10293
|
+
|
|
10115
10294
|
log_header "Execution Paused"
|
|
10116
10295
|
echo ""
|
|
10117
10296
|
log_info "To resume: Remove .loki/PAUSE or press Enter"
|
|
@@ -10141,6 +10320,7 @@ EOF
|
|
|
10141
10320
|
if [ -f "$loki_dir/STOP" ]; then
|
|
10142
10321
|
rm -f "$loki_dir/STOP" "$loki_dir/PAUSED.md"
|
|
10143
10322
|
PAUSED=false
|
|
10323
|
+
_PAUSE_IN_PROGRESS=0
|
|
10144
10324
|
return 1
|
|
10145
10325
|
fi
|
|
10146
10326
|
|
|
@@ -10163,6 +10343,7 @@ EOF
|
|
|
10163
10343
|
rm -f "$loki_dir/PAUSED.md"
|
|
10164
10344
|
log_info "Resuming execution..."
|
|
10165
10345
|
PAUSED=false
|
|
10346
|
+
_PAUSE_IN_PROGRESS=0
|
|
10166
10347
|
return 0
|
|
10167
10348
|
}
|
|
10168
10349
|
|