loki-mode 5.37.0 → 5.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +2 -2
- package/VERSION +1 -1
- package/autonomy/loki +133 -0
- package/autonomy/run.sh +156 -0
- package/autonomy/sandbox.sh +2 -2
- package/dashboard/__init__.py +1 -1
- package/dashboard/audit.py +154 -0
- package/dashboard/auth.py +84 -5
- package/dashboard/server.py +220 -4
- package/docs/INSTALLATION.md +1 -1
- package/package.json +1 -1
- package/providers/gemini.sh +4 -2
package/SKILL.md
CHANGED
|
@@ -3,7 +3,7 @@ name: loki-mode
|
|
|
3
3
|
description: Multi-agent autonomous startup system. Triggers on "Loki Mode". Takes PRD to deployed product with zero human intervention. Requires --dangerously-skip-permissions flag.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
# Loki Mode v5.
|
|
6
|
+
# Loki Mode v5.38.0
|
|
7
7
|
|
|
8
8
|
**You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
|
|
9
9
|
|
|
@@ -260,4 +260,4 @@ The following features are documented in skill modules but not yet fully automat
|
|
|
260
260
|
| Quality gates 3-reviewer system | Implemented (v5.35.0) | 5 specialist reviewers in `skills/quality-gates.md`; execution in run.sh |
|
|
261
261
|
| Benchmarks (HumanEval, SWE-bench) | Infrastructure only | Runner scripts and datasets exist in `benchmarks/`; no published results |
|
|
262
262
|
|
|
263
|
-
**v5.
|
|
263
|
+
**v5.38.0 | feat: branch protection, prometheus metrics, log integrity, OpenClaw bridge | ~260 lines core**
|
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
5.
|
|
1
|
+
5.38.0
|
package/autonomy/loki
CHANGED
|
@@ -321,6 +321,7 @@ show_help() {
|
|
|
321
321
|
echo " memory [cmd] Cross-project learnings (list|show|search|stats)"
|
|
322
322
|
echo " compound [cmd] Knowledge compounding (list|show|search|run|stats)"
|
|
323
323
|
echo " council [cmd] Completion council (status|verdicts|convergence|force-review|report)"
|
|
324
|
+
echo " metrics Prometheus/OpenMetrics metrics from dashboard"
|
|
324
325
|
echo " dogfood Show self-development statistics"
|
|
325
326
|
echo " secrets [cmd] API key status and validation (status|validate)"
|
|
326
327
|
echo " reset [target] Reset session state (all|retries|failed)"
|
|
@@ -4309,6 +4310,12 @@ main() {
|
|
|
4309
4310
|
watchdog)
|
|
4310
4311
|
cmd_watchdog "$@"
|
|
4311
4312
|
;;
|
|
4313
|
+
audit)
|
|
4314
|
+
cmd_audit "$@"
|
|
4315
|
+
;;
|
|
4316
|
+
metrics)
|
|
4317
|
+
cmd_metrics "$@"
|
|
4318
|
+
;;
|
|
4312
4319
|
version|--version|-v)
|
|
4313
4320
|
cmd_version
|
|
4314
4321
|
;;
|
|
@@ -4326,6 +4333,87 @@ main() {
|
|
|
4326
4333
|
esac
|
|
4327
4334
|
}
|
|
4328
4335
|
|
|
4336
|
+
# Agent action audit log
|
|
4337
|
+
cmd_audit() {
|
|
4338
|
+
local subcommand="${1:-help}"
|
|
4339
|
+
local audit_file="$LOKI_DIR/logs/agent-audit.jsonl"
|
|
4340
|
+
|
|
4341
|
+
case "$subcommand" in
|
|
4342
|
+
log)
|
|
4343
|
+
if [ ! -f "$audit_file" ]; then
|
|
4344
|
+
echo -e "${YELLOW}No audit log found at $audit_file${NC}"
|
|
4345
|
+
echo "Agent action auditing records entries during loki sessions."
|
|
4346
|
+
exit 0
|
|
4347
|
+
fi
|
|
4348
|
+
local lines="${2:-50}"
|
|
4349
|
+
echo -e "${BOLD}Agent Audit Log${NC} (last $lines entries)"
|
|
4350
|
+
echo "---"
|
|
4351
|
+
tail -n "$lines" "$audit_file" | while IFS= read -r line; do
|
|
4352
|
+
if command -v python3 &>/dev/null; then
|
|
4353
|
+
python3 -c "
|
|
4354
|
+
import json, sys
|
|
4355
|
+
try:
|
|
4356
|
+
e = json.loads(sys.argv[1])
|
|
4357
|
+
ts = e.get('timestamp', '?')
|
|
4358
|
+
act = e.get('action', '?')
|
|
4359
|
+
desc = e.get('description', '')
|
|
4360
|
+
it = e.get('iteration', 0)
|
|
4361
|
+
print(f' [{ts}] [{act}] iter={it} {desc}')
|
|
4362
|
+
except: print(f' {sys.argv[1]}')
|
|
4363
|
+
" "$line" 2>/dev/null || echo " $line"
|
|
4364
|
+
else
|
|
4365
|
+
echo " $line"
|
|
4366
|
+
fi
|
|
4367
|
+
done
|
|
4368
|
+
;;
|
|
4369
|
+
count)
|
|
4370
|
+
if [ ! -f "$audit_file" ]; then
|
|
4371
|
+
echo -e "${YELLOW}No audit log found${NC}"
|
|
4372
|
+
exit 0
|
|
4373
|
+
fi
|
|
4374
|
+
echo -e "${BOLD}Agent Action Counts${NC}"
|
|
4375
|
+
echo "---"
|
|
4376
|
+
if command -v python3 &>/dev/null; then
|
|
4377
|
+
python3 -c "
|
|
4378
|
+
import json, sys
|
|
4379
|
+
from collections import Counter
|
|
4380
|
+
counts = Counter()
|
|
4381
|
+
for line in open(sys.argv[1]):
|
|
4382
|
+
try:
|
|
4383
|
+
e = json.loads(line)
|
|
4384
|
+
counts[e.get('action', 'unknown')] += 1
|
|
4385
|
+
except: pass
|
|
4386
|
+
for action, count in sorted(counts.items(), key=lambda x: -x[1]):
|
|
4387
|
+
print(f' {action:25s} {count}')
|
|
4388
|
+
print(f' {\"---\":25s} ---')
|
|
4389
|
+
print(f' {\"TOTAL\":25s} {sum(counts.values())}')
|
|
4390
|
+
" "$audit_file" 2>/dev/null
|
|
4391
|
+
else
|
|
4392
|
+
echo " python3 required for count summary"
|
|
4393
|
+
fi
|
|
4394
|
+
;;
|
|
4395
|
+
--help|-h|help)
|
|
4396
|
+
echo -e "${BOLD}loki audit${NC} - Agent action audit log"
|
|
4397
|
+
echo ""
|
|
4398
|
+
echo "Usage: loki audit <subcommand>"
|
|
4399
|
+
echo ""
|
|
4400
|
+
echo "Subcommands:"
|
|
4401
|
+
echo " log [N] Show last N audit log entries (default: 50)"
|
|
4402
|
+
echo " count Count actions by type"
|
|
4403
|
+
echo " help Show this help"
|
|
4404
|
+
echo ""
|
|
4405
|
+
echo "The agent audit log records actions taken during Loki sessions,"
|
|
4406
|
+
echo "including CLI invocations, git commits, and session lifecycle events."
|
|
4407
|
+
echo "Log file: $audit_file"
|
|
4408
|
+
;;
|
|
4409
|
+
*)
|
|
4410
|
+
echo -e "${RED}Unknown audit subcommand: $subcommand${NC}"
|
|
4411
|
+
echo "Run 'loki audit help' for usage."
|
|
4412
|
+
exit 1
|
|
4413
|
+
;;
|
|
4414
|
+
esac
|
|
4415
|
+
}
|
|
4416
|
+
|
|
4329
4417
|
# Reset session state
|
|
4330
4418
|
cmd_reset() {
|
|
4331
4419
|
require_jq
|
|
@@ -6962,6 +7050,51 @@ for line in sys.stdin:
|
|
|
6962
7050
|
esac
|
|
6963
7051
|
}
|
|
6964
7052
|
|
|
7053
|
+
# Fetch and display Prometheus metrics from dashboard
|
|
7054
|
+
cmd_metrics() {
|
|
7055
|
+
local subcommand="${1:-}"
|
|
7056
|
+
local port="${LOKI_DASHBOARD_PORT:-57374}"
|
|
7057
|
+
local host="127.0.0.1"
|
|
7058
|
+
|
|
7059
|
+
case "$subcommand" in
|
|
7060
|
+
help|--help|-h)
|
|
7061
|
+
echo -e "${BOLD}loki metrics${NC} - Prometheus/OpenMetrics metrics"
|
|
7062
|
+
echo ""
|
|
7063
|
+
echo "Usage: loki metrics [options]"
|
|
7064
|
+
echo ""
|
|
7065
|
+
echo "Fetches metrics from the dashboard API in Prometheus/OpenMetrics format."
|
|
7066
|
+
echo "The dashboard must be running (loki dashboard start or loki serve)."
|
|
7067
|
+
echo ""
|
|
7068
|
+
echo "Options:"
|
|
7069
|
+
echo " help Show this help"
|
|
7070
|
+
echo ""
|
|
7071
|
+
echo "Environment:"
|
|
7072
|
+
echo " LOKI_DASHBOARD_PORT Dashboard port (default: 57374)"
|
|
7073
|
+
echo ""
|
|
7074
|
+
echo "Examples:"
|
|
7075
|
+
echo " loki metrics # Display all metrics"
|
|
7076
|
+
echo " loki metrics | grep loki_cost_usd # Filter specific metric"
|
|
7077
|
+
;;
|
|
7078
|
+
"")
|
|
7079
|
+
# Fetch metrics from dashboard
|
|
7080
|
+
local url="http://${host}:${port}/metrics"
|
|
7081
|
+
local response
|
|
7082
|
+
response=$(curl -sf "$url" 2>/dev/null)
|
|
7083
|
+
if [ $? -ne 0 ]; then
|
|
7084
|
+
echo -e "${RED}Error: Could not connect to dashboard at ${url}${NC}"
|
|
7085
|
+
echo "Make sure the dashboard is running: loki serve"
|
|
7086
|
+
exit 1
|
|
7087
|
+
fi
|
|
7088
|
+
echo "$response"
|
|
7089
|
+
;;
|
|
7090
|
+
*)
|
|
7091
|
+
echo -e "${RED}Unknown metrics command: $subcommand${NC}"
|
|
7092
|
+
echo "Run 'loki metrics help' for usage."
|
|
7093
|
+
exit 1
|
|
7094
|
+
;;
|
|
7095
|
+
esac
|
|
7096
|
+
}
|
|
7097
|
+
|
|
6965
7098
|
# Output shell completion scripts
|
|
6966
7099
|
cmd_completions() {
|
|
6967
7100
|
local shell="${1:-bash}"
|
package/autonomy/run.sh
CHANGED
|
@@ -135,6 +135,11 @@
|
|
|
135
135
|
# LOKI_PROMPT_INJECTION - Enable HUMAN_INPUT.md processing (default: false)
|
|
136
136
|
# Set to "true" only in trusted environments
|
|
137
137
|
#
|
|
138
|
+
# Branch Protection (agent isolation):
|
|
139
|
+
# LOKI_BRANCH_PROTECTION - Create feature branch for agent changes (default: false)
|
|
140
|
+
# Agent works on loki/session-<timestamp>-<pid> branch
|
|
141
|
+
# Creates PR on session end if gh CLI is available
|
|
142
|
+
#
|
|
138
143
|
# Process Supervision (opt-in):
|
|
139
144
|
# LOKI_WATCHDOG - Enable process health monitoring (default: false)
|
|
140
145
|
# LOKI_WATCHDOG_INTERVAL - Check interval in seconds (default: 30)
|
|
@@ -1800,6 +1805,7 @@ merge_feature() {
|
|
|
1800
1805
|
if resolve_conflicts_with_ai "$feature"; then
|
|
1801
1806
|
# AI resolved conflicts, commit the merge
|
|
1802
1807
|
git commit -m "feat: Merge $feature (AI-resolved conflicts)"
|
|
1808
|
+
audit_agent_action "git_commit" "Committed changes" "merge=$feature,resolution=ai"
|
|
1803
1809
|
log_info "Merged with AI conflict resolution: $feature"
|
|
1804
1810
|
else
|
|
1805
1811
|
# AI resolution failed, abort merge
|
|
@@ -3403,6 +3409,145 @@ EOF
|
|
|
3403
3409
|
echo "$log_entry" >> "$audit_file"
|
|
3404
3410
|
}
|
|
3405
3411
|
|
|
3412
|
+
#===============================================================================
|
|
3413
|
+
# Branch Protection for Agent Changes
|
|
3414
|
+
#===============================================================================
|
|
3415
|
+
|
|
3416
|
+
setup_agent_branch() {
|
|
3417
|
+
# Create an isolated feature branch for agent changes.
|
|
3418
|
+
# This prevents agents from committing directly to the main branch.
|
|
3419
|
+
# Controlled by LOKI_BRANCH_PROTECTION env var (default: false).
|
|
3420
|
+
local branch_protection="${LOKI_BRANCH_PROTECTION:-false}"
|
|
3421
|
+
|
|
3422
|
+
if [ "$branch_protection" != "true" ]; then
|
|
3423
|
+
log_info "Branch protection disabled (LOKI_BRANCH_PROTECTION=${branch_protection})"
|
|
3424
|
+
return 0
|
|
3425
|
+
fi
|
|
3426
|
+
|
|
3427
|
+
# Ensure we are inside a git repository
|
|
3428
|
+
if ! git rev-parse --is-inside-work-tree &>/dev/null; then
|
|
3429
|
+
log_warn "Not a git repository - skipping branch protection"
|
|
3430
|
+
return 0
|
|
3431
|
+
fi
|
|
3432
|
+
|
|
3433
|
+
local timestamp
|
|
3434
|
+
timestamp=$(date +%s)
|
|
3435
|
+
local branch_name="loki/session-${timestamp}-$$"
|
|
3436
|
+
|
|
3437
|
+
log_info "Branch protection enabled - creating agent branch: $branch_name"
|
|
3438
|
+
|
|
3439
|
+
# Create and checkout the feature branch
|
|
3440
|
+
if ! git checkout -b "$branch_name" 2>/dev/null; then
|
|
3441
|
+
log_error "Failed to create agent branch: $branch_name"
|
|
3442
|
+
return 1
|
|
3443
|
+
fi
|
|
3444
|
+
|
|
3445
|
+
# Store the branch name for later use (PR creation, cleanup)
|
|
3446
|
+
mkdir -p .loki/state
|
|
3447
|
+
echo "$branch_name" > .loki/state/agent-branch.txt
|
|
3448
|
+
|
|
3449
|
+
log_info "Agent branch created: $branch_name"
|
|
3450
|
+
audit_log "BRANCH_PROTECTION" "branch=$branch_name"
|
|
3451
|
+
echo "$branch_name"
|
|
3452
|
+
}
|
|
3453
|
+
|
|
3454
|
+
create_session_pr() {
|
|
3455
|
+
# Push the agent branch and create a PR if gh CLI is available.
|
|
3456
|
+
# Called during session cleanup to submit agent changes for review.
|
|
3457
|
+
local branch_file=".loki/state/agent-branch.txt"
|
|
3458
|
+
|
|
3459
|
+
if [ ! -f "$branch_file" ]; then
|
|
3460
|
+
# No agent branch was created (branch protection was off)
|
|
3461
|
+
return 0
|
|
3462
|
+
fi
|
|
3463
|
+
|
|
3464
|
+
local branch_name
|
|
3465
|
+
branch_name=$(cat "$branch_file" 2>/dev/null)
|
|
3466
|
+
|
|
3467
|
+
if [ -z "$branch_name" ]; then
|
|
3468
|
+
return 0
|
|
3469
|
+
fi
|
|
3470
|
+
|
|
3471
|
+
log_info "Pushing agent branch: $branch_name"
|
|
3472
|
+
|
|
3473
|
+
# Check if there are any commits on this branch beyond the base
|
|
3474
|
+
local commit_count
|
|
3475
|
+
commit_count=$(git rev-list --count HEAD ^"$(git merge-base HEAD main 2>/dev/null || echo HEAD)" 2>/dev/null || echo "0")
|
|
3476
|
+
|
|
3477
|
+
if [ "$commit_count" = "0" ]; then
|
|
3478
|
+
log_info "No commits on agent branch - skipping PR creation"
|
|
3479
|
+
return 0
|
|
3480
|
+
fi
|
|
3481
|
+
|
|
3482
|
+
# Push the branch
|
|
3483
|
+
if ! git push -u origin "$branch_name" 2>/dev/null; then
|
|
3484
|
+
log_warn "Failed to push agent branch: $branch_name"
|
|
3485
|
+
return 1
|
|
3486
|
+
fi
|
|
3487
|
+
|
|
3488
|
+
# Create PR if gh CLI is available
|
|
3489
|
+
if command -v gh &>/dev/null; then
|
|
3490
|
+
local pr_url
|
|
3491
|
+
pr_url=$(gh pr create \
|
|
3492
|
+
--title "Loki Mode: Agent session changes ($branch_name)" \
|
|
3493
|
+
--body "Automated changes from Loki Mode agent session.
|
|
3494
|
+
|
|
3495
|
+
Branch: \`$branch_name\`
|
|
3496
|
+
Session PID: $$
|
|
3497
|
+
Created: $(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
|
3498
|
+
--head "$branch_name" 2>/dev/null) || true
|
|
3499
|
+
|
|
3500
|
+
if [ -n "$pr_url" ]; then
|
|
3501
|
+
log_info "PR created: $pr_url"
|
|
3502
|
+
audit_log "PR_CREATED" "branch=$branch_name,url=$pr_url"
|
|
3503
|
+
else
|
|
3504
|
+
log_warn "Failed to create PR - branch pushed to: $branch_name"
|
|
3505
|
+
fi
|
|
3506
|
+
else
|
|
3507
|
+
log_info "gh CLI not available - branch pushed to: $branch_name"
|
|
3508
|
+
log_info "Create a PR manually for branch: $branch_name"
|
|
3509
|
+
fi
|
|
3510
|
+
}
|
|
3511
|
+
|
|
3512
|
+
#===============================================================================
|
|
3513
|
+
# Agent Action Auditing
|
|
3514
|
+
#===============================================================================
|
|
3515
|
+
|
|
3516
|
+
audit_agent_action() {
|
|
3517
|
+
# Record agent actions to a JSONL audit trail.
|
|
3518
|
+
# Fire-and-forget: errors are silently ignored to avoid blocking execution.
|
|
3519
|
+
# Args: action_type, description, [details]
|
|
3520
|
+
local action_type="${1:-unknown}"
|
|
3521
|
+
local description="${2:-}"
|
|
3522
|
+
local details="${3:-}"
|
|
3523
|
+
local audit_file=".loki/logs/agent-audit.jsonl"
|
|
3524
|
+
|
|
3525
|
+
(
|
|
3526
|
+
mkdir -p .loki/logs 2>/dev/null
|
|
3527
|
+
|
|
3528
|
+
# Requires python3 for JSON formatting; skip silently if unavailable
|
|
3529
|
+
command -v python3 &>/dev/null || exit 0
|
|
3530
|
+
|
|
3531
|
+
local timestamp
|
|
3532
|
+
timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
3533
|
+
local iter="${ITERATION_COUNT:-0}"
|
|
3534
|
+
local pid="$$"
|
|
3535
|
+
|
|
3536
|
+
python3 -c "
|
|
3537
|
+
import json, sys
|
|
3538
|
+
entry = {
|
|
3539
|
+
'timestamp': sys.argv[1],
|
|
3540
|
+
'action': sys.argv[2],
|
|
3541
|
+
'description': sys.argv[3],
|
|
3542
|
+
'details': sys.argv[4],
|
|
3543
|
+
'iteration': int(sys.argv[5]),
|
|
3544
|
+
'pid': int(sys.argv[6])
|
|
3545
|
+
}
|
|
3546
|
+
print(json.dumps(entry))
|
|
3547
|
+
" "$timestamp" "$action_type" "$description" "$details" "$iter" "$pid" >> "$audit_file" 2>/dev/null
|
|
3548
|
+
) &
|
|
3549
|
+
}
|
|
3550
|
+
|
|
3406
3551
|
check_staged_autonomy() {
|
|
3407
3552
|
# In staged autonomy mode, write plan and wait for approval
|
|
3408
3553
|
local plan_file="$1"
|
|
@@ -5870,6 +6015,9 @@ run_autonomous() {
|
|
|
5870
6015
|
log_info "RARV Phase: $rarv_phase -> Tier: $CURRENT_TIER ($tier_param)"
|
|
5871
6016
|
|
|
5872
6017
|
set +e
|
|
6018
|
+
# Audit: record CLI invocation
|
|
6019
|
+
audit_agent_action "cli_invoke" "Starting iteration $ITERATION_COUNT" "provider=${PROVIDER_NAME:-claude},tier=$CURRENT_TIER"
|
|
6020
|
+
|
|
5873
6021
|
# Provider-specific invocation with dynamic tier selection
|
|
5874
6022
|
case "${PROVIDER_NAME:-claude}" in
|
|
5875
6023
|
claude)
|
|
@@ -6763,8 +6911,12 @@ main() {
|
|
|
6763
6911
|
load_solutions_context "general development"
|
|
6764
6912
|
fi
|
|
6765
6913
|
|
|
6914
|
+
# Setup agent branch protection (isolates agent changes to a feature branch)
|
|
6915
|
+
setup_agent_branch
|
|
6916
|
+
|
|
6766
6917
|
# Log session start for audit
|
|
6767
6918
|
audit_log "SESSION_START" "prd=$PRD_PATH,dashboard=$ENABLE_DASHBOARD,staged_autonomy=$STAGED_AUTONOMY,parallel=$PARALLEL_MODE"
|
|
6919
|
+
audit_agent_action "session_start" "Session started" "prd=$PRD_PATH,provider=${PROVIDER_NAME:-claude}"
|
|
6768
6920
|
|
|
6769
6921
|
# Emit session start event for dashboard
|
|
6770
6922
|
emit_event_json "session_start" \
|
|
@@ -6859,6 +7011,10 @@ main() {
|
|
|
6859
7011
|
--context "{\"provider\":\"${PROVIDER_NAME:-claude}\",\"iterations\":$ITERATION_COUNT,\"exit_code\":$result}"
|
|
6860
7012
|
fi
|
|
6861
7013
|
|
|
7014
|
+
# Create PR from agent branch if branch protection was enabled
|
|
7015
|
+
create_session_pr
|
|
7016
|
+
audit_agent_action "session_stop" "Session ended" "result=$result,iterations=$ITERATION_COUNT"
|
|
7017
|
+
|
|
6862
7018
|
# Cleanup
|
|
6863
7019
|
stop_dashboard
|
|
6864
7020
|
stop_status_monitor
|
package/autonomy/sandbox.sh
CHANGED
|
@@ -820,8 +820,8 @@ start_sandbox() {
|
|
|
820
820
|
"--security-opt=no-new-privileges:true"
|
|
821
821
|
"--cap-drop=ALL"
|
|
822
822
|
"--cap-add=CHOWN"
|
|
823
|
-
|
|
824
|
-
|
|
823
|
+
# SETUID/SETGID intentionally omitted: container runs as non-root (UID 1000)
|
|
824
|
+
# and should not be able to change UID/GID, which would undermine isolation
|
|
825
825
|
|
|
826
826
|
# Network
|
|
827
827
|
"--network=$SANDBOX_NETWORK"
|
package/dashboard/__init__.py
CHANGED
package/dashboard/audit.py
CHANGED
|
@@ -5,10 +5,20 @@ Enabled by default. Disable with LOKI_AUDIT_DISABLED=true environment variable.
|
|
|
5
5
|
Legacy env var LOKI_ENTERPRISE_AUDIT=true always enables audit (backward compat).
|
|
6
6
|
|
|
7
7
|
Audit logs: ~/.loki/dashboard/audit/
|
|
8
|
+
|
|
9
|
+
Syslog forwarding (optional):
|
|
10
|
+
Set LOKI_AUDIT_SYSLOG_HOST to enable forwarding to a centralized syslog server.
|
|
11
|
+
LOKI_AUDIT_SYSLOG_PORT defaults to 514.
|
|
12
|
+
LOKI_AUDIT_SYSLOG_PROTO defaults to "udp" (also supports "tcp").
|
|
8
13
|
"""
|
|
9
14
|
|
|
15
|
+
import hashlib
|
|
10
16
|
import json
|
|
17
|
+
import logging
|
|
18
|
+
import logging.handlers
|
|
11
19
|
import os
|
|
20
|
+
import socket
|
|
21
|
+
import sys
|
|
12
22
|
from datetime import datetime, timezone
|
|
13
23
|
from pathlib import Path
|
|
14
24
|
from typing import Any, Optional
|
|
@@ -25,12 +35,58 @@ AUDIT_DIR = Path.home() / ".loki" / "dashboard" / "audit"
|
|
|
25
35
|
MAX_LOG_SIZE_MB = int(os.environ.get("LOKI_AUDIT_MAX_SIZE_MB", "10"))
|
|
26
36
|
MAX_LOG_FILES = int(os.environ.get("LOKI_AUDIT_MAX_FILES", "10"))
|
|
27
37
|
|
|
38
|
+
# Syslog forwarding (optional, off by default)
|
|
39
|
+
_SYSLOG_HOST = os.environ.get("LOKI_AUDIT_SYSLOG_HOST", "").strip()
|
|
40
|
+
_SYSLOG_PORT = int(os.environ.get("LOKI_AUDIT_SYSLOG_PORT", "514"))
|
|
41
|
+
_SYSLOG_PROTO = os.environ.get("LOKI_AUDIT_SYSLOG_PROTO", "udp").lower().strip()
|
|
42
|
+
|
|
43
|
+
# Integrity chain hashing (tamper-evident logging)
|
|
44
|
+
# Disable with LOKI_AUDIT_NO_INTEGRITY=true
|
|
45
|
+
INTEGRITY_ENABLED = os.environ.get("LOKI_AUDIT_NO_INTEGRITY", "").lower() not in ("true", "1", "yes")
|
|
46
|
+
_last_hash: str = "0" * 64 # Genesis hash
|
|
47
|
+
|
|
48
|
+
# Actions considered security-relevant (logged at WARNING level in syslog)
|
|
49
|
+
_SECURITY_ACTIONS = frozenset({
|
|
50
|
+
"delete", "kill", "stop", "login", "logout",
|
|
51
|
+
"create_token", "revoke_token",
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
_syslog_handler: logging.handlers.SysLogHandler | None = None
|
|
55
|
+
SYSLOG_ENABLED: bool = False
|
|
56
|
+
|
|
57
|
+
if _SYSLOG_HOST:
|
|
58
|
+
try:
|
|
59
|
+
_socktype = socket.SOCK_STREAM if _SYSLOG_PROTO == "tcp" else socket.SOCK_DGRAM
|
|
60
|
+
_syslog_handler = logging.handlers.SysLogHandler(
|
|
61
|
+
address=(_SYSLOG_HOST, _SYSLOG_PORT),
|
|
62
|
+
facility=logging.handlers.SysLogHandler.LOG_LOCAL0,
|
|
63
|
+
socktype=_socktype,
|
|
64
|
+
)
|
|
65
|
+
_syslog_handler.setFormatter(logging.Formatter("loki-audit: %(message)s"))
|
|
66
|
+
SYSLOG_ENABLED = True
|
|
67
|
+
except Exception as _exc:
|
|
68
|
+
print(
|
|
69
|
+
f"[loki-audit] WARNING: Failed to configure syslog handler "
|
|
70
|
+
f"({_SYSLOG_HOST}:{_SYSLOG_PORT}/{_SYSLOG_PROTO}): {_exc}",
|
|
71
|
+
file=sys.stderr,
|
|
72
|
+
)
|
|
73
|
+
_syslog_handler = None
|
|
74
|
+
|
|
28
75
|
|
|
29
76
|
def _ensure_audit_dir() -> None:
|
|
30
77
|
"""Ensure the audit directory exists."""
|
|
31
78
|
AUDIT_DIR.mkdir(parents=True, exist_ok=True)
|
|
32
79
|
|
|
33
80
|
|
|
81
|
+
def _compute_chain_hash(entry_json: str, prev_hash: str) -> str:
|
|
82
|
+
"""Compute a SHA-256 chain hash linking this entry to the previous one.
|
|
83
|
+
|
|
84
|
+
Each hash depends on the previous entry's hash, creating a tamper-evident
|
|
85
|
+
chain. If any entry is modified, all subsequent hashes will be invalid.
|
|
86
|
+
"""
|
|
87
|
+
return hashlib.sha256((prev_hash + entry_json).encode("utf-8")).hexdigest()
|
|
88
|
+
|
|
89
|
+
|
|
34
90
|
def _get_current_log_file() -> Path:
|
|
35
91
|
"""Get the current audit log file (date-based)."""
|
|
36
92
|
_ensure_audit_dir()
|
|
@@ -68,6 +124,30 @@ def _cleanup_old_logs() -> None:
|
|
|
68
124
|
oldest.unlink()
|
|
69
125
|
|
|
70
126
|
|
|
127
|
+
def _forward_to_syslog(entry: dict) -> None:
|
|
128
|
+
"""Forward an audit entry to syslog if configured. Fire-and-forget."""
|
|
129
|
+
if _syslog_handler is None:
|
|
130
|
+
return
|
|
131
|
+
try:
|
|
132
|
+
message = json.dumps(entry, separators=(",", ":"))
|
|
133
|
+
action = entry.get("action", "")
|
|
134
|
+
is_security = action in _SECURITY_ACTIONS or not entry.get("success", True)
|
|
135
|
+
level = logging.WARNING if is_security else logging.INFO
|
|
136
|
+
record = logging.LogRecord(
|
|
137
|
+
name="loki-audit",
|
|
138
|
+
level=level,
|
|
139
|
+
pathname="",
|
|
140
|
+
lineno=0,
|
|
141
|
+
msg=message,
|
|
142
|
+
args=(),
|
|
143
|
+
exc_info=None,
|
|
144
|
+
)
|
|
145
|
+
_syslog_handler.emit(record)
|
|
146
|
+
except Exception:
|
|
147
|
+
# Fire-and-forget: never block the main audit write path
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
|
|
71
151
|
def log_event(
|
|
72
152
|
action: str,
|
|
73
153
|
resource_type: str,
|
|
@@ -115,12 +195,22 @@ def log_event(
|
|
|
115
195
|
"details": details or {},
|
|
116
196
|
}
|
|
117
197
|
|
|
198
|
+
# Tamper-evident chain hash
|
|
199
|
+
global _last_hash
|
|
200
|
+
if INTEGRITY_ENABLED:
|
|
201
|
+
entry_json = json.dumps(entry, sort_keys=True, default=str)
|
|
202
|
+
entry["_integrity_hash"] = _compute_chain_hash(entry_json, _last_hash)
|
|
203
|
+
_last_hash = entry["_integrity_hash"]
|
|
204
|
+
|
|
118
205
|
log_file = _get_current_log_file()
|
|
119
206
|
_rotate_logs_if_needed(log_file)
|
|
120
207
|
|
|
121
208
|
with open(log_file, "a") as f:
|
|
122
209
|
f.write(json.dumps(entry) + "\n")
|
|
123
210
|
|
|
211
|
+
# Forward to syslog if configured
|
|
212
|
+
_forward_to_syslog(entry)
|
|
213
|
+
|
|
124
214
|
return entry
|
|
125
215
|
|
|
126
216
|
|
|
@@ -257,3 +347,67 @@ def get_audit_summary(days: int = 7) -> dict:
|
|
|
257
347
|
def is_audit_enabled() -> bool:
|
|
258
348
|
"""Check if audit logging is enabled (on by default, disable with LOKI_AUDIT_DISABLED=true)."""
|
|
259
349
|
return ENTERPRISE_AUDIT_ENABLED
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def verify_log_integrity(log_file: str) -> dict:
|
|
353
|
+
"""Verify the integrity chain of a JSONL audit log file.
|
|
354
|
+
|
|
355
|
+
Reads each line, recomputes the chain hash from the genesis hash,
|
|
356
|
+
and compares it to the stored _integrity_hash. If any entry has been
|
|
357
|
+
tampered with, all subsequent hashes will also fail to match.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
log_file: Path to the JSONL audit log file to verify.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
A dict with:
|
|
364
|
+
- valid (bool): True if the entire chain is intact.
|
|
365
|
+
- entries_checked (int): Number of entries verified.
|
|
366
|
+
- first_tampered_line (int | None): 1-based line number of the
|
|
367
|
+
first entry where the hash chain broke, or None if valid.
|
|
368
|
+
"""
|
|
369
|
+
prev_hash = "0" * 64 # Genesis hash
|
|
370
|
+
entries_checked = 0
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
with open(log_file, "r") as f:
|
|
374
|
+
for line_num, line in enumerate(f, start=1):
|
|
375
|
+
line = line.strip()
|
|
376
|
+
if not line:
|
|
377
|
+
continue
|
|
378
|
+
|
|
379
|
+
try:
|
|
380
|
+
entry = json.loads(line)
|
|
381
|
+
except json.JSONDecodeError:
|
|
382
|
+
return {
|
|
383
|
+
"valid": False,
|
|
384
|
+
"entries_checked": entries_checked,
|
|
385
|
+
"first_tampered_line": line_num,
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
stored_hash = entry.pop("_integrity_hash", None)
|
|
389
|
+
if stored_hash is None:
|
|
390
|
+
# Entry has no integrity hash -- chain is broken
|
|
391
|
+
return {
|
|
392
|
+
"valid": False,
|
|
393
|
+
"entries_checked": entries_checked,
|
|
394
|
+
"first_tampered_line": line_num,
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
entry_json = json.dumps(entry, sort_keys=True, default=str)
|
|
398
|
+
expected_hash = _compute_chain_hash(entry_json, prev_hash)
|
|
399
|
+
|
|
400
|
+
if stored_hash != expected_hash:
|
|
401
|
+
return {
|
|
402
|
+
"valid": False,
|
|
403
|
+
"entries_checked": entries_checked,
|
|
404
|
+
"first_tampered_line": line_num,
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
prev_hash = stored_hash
|
|
408
|
+
entries_checked += 1
|
|
409
|
+
|
|
410
|
+
except FileNotFoundError:
|
|
411
|
+
return {"valid": True, "entries_checked": 0, "first_tampered_line": None}
|
|
412
|
+
|
|
413
|
+
return {"valid": True, "entries_checked": entries_checked, "first_tampered_line": None}
|
package/dashboard/auth.py
CHANGED
|
@@ -35,6 +35,23 @@ OIDC_CLIENT_ID = os.environ.get("LOKI_OIDC_CLIENT_ID", "")
|
|
|
35
35
|
OIDC_AUDIENCE = os.environ.get("LOKI_OIDC_AUDIENCE", "") # Usually same as client_id
|
|
36
36
|
OIDC_ENABLED = bool(OIDC_ISSUER and OIDC_CLIENT_ID)
|
|
37
37
|
|
|
38
|
+
# Role-to-scope mapping (predefined roles)
|
|
39
|
+
ROLES = {
|
|
40
|
+
"admin": ["*"], # Full access
|
|
41
|
+
"operator": ["control", "read", "write"], # Start/stop/pause, view+edit dashboard
|
|
42
|
+
"viewer": ["read"], # Read-only dashboard access
|
|
43
|
+
"auditor": ["read", "audit"], # Read dashboard + audit logs
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Scope hierarchy: higher scopes implicitly grant lower ones (single-level lookup).
|
|
47
|
+
# * -> control -> write -> read
|
|
48
|
+
# Each scope explicitly lists ALL scopes it grants (no transitive resolution).
|
|
49
|
+
_SCOPE_HIERARCHY = {
|
|
50
|
+
"*": {"control", "write", "read", "audit", "admin"},
|
|
51
|
+
"control": {"write", "read"},
|
|
52
|
+
"write": {"read"},
|
|
53
|
+
}
|
|
54
|
+
|
|
38
55
|
if OIDC_ENABLED:
|
|
39
56
|
import logging as _logging
|
|
40
57
|
_logging.getLogger("loki.auth").warning(
|
|
@@ -98,10 +115,39 @@ def _constant_time_compare(a: str, b: str) -> bool:
|
|
|
98
115
|
return secrets.compare_digest(a.encode(), b.encode())
|
|
99
116
|
|
|
100
117
|
|
|
118
|
+
def resolve_scopes(role_or_scopes) -> list[str]:
|
|
119
|
+
"""Resolve a role name or scope list into a concrete list of scopes.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
role_or_scopes: Either a role name (str), a single scope (str),
|
|
123
|
+
or a list of scopes.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
List of scope strings.
|
|
127
|
+
"""
|
|
128
|
+
if isinstance(role_or_scopes, list):
|
|
129
|
+
return role_or_scopes
|
|
130
|
+
if isinstance(role_or_scopes, str):
|
|
131
|
+
if role_or_scopes in ROLES:
|
|
132
|
+
return list(ROLES[role_or_scopes])
|
|
133
|
+
return [role_or_scopes]
|
|
134
|
+
return ["*"]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def list_roles() -> dict[str, list[str]]:
|
|
138
|
+
"""Return the predefined role-to-scope mapping.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Dict mapping role names to their scope lists.
|
|
142
|
+
"""
|
|
143
|
+
return dict(ROLES)
|
|
144
|
+
|
|
145
|
+
|
|
101
146
|
def generate_token(
|
|
102
147
|
name: str,
|
|
103
148
|
scopes: Optional[list[str]] = None,
|
|
104
149
|
expires_days: Optional[int] = None,
|
|
150
|
+
role: Optional[str] = None,
|
|
105
151
|
) -> dict:
|
|
106
152
|
"""
|
|
107
153
|
Generate a new API token.
|
|
@@ -110,12 +156,16 @@ def generate_token(
|
|
|
110
156
|
name: Human-readable name for the token
|
|
111
157
|
scopes: Optional list of permission scopes (default: all)
|
|
112
158
|
expires_days: Optional expiration in days (None = never expires)
|
|
159
|
+
role: Optional role name (admin, operator, viewer, auditor).
|
|
160
|
+
If provided, scopes are resolved from the role.
|
|
161
|
+
Cannot be combined with explicit scopes.
|
|
113
162
|
|
|
114
163
|
Returns:
|
|
115
164
|
Dict with token info (includes raw token - only shown once)
|
|
116
165
|
|
|
117
166
|
Raises:
|
|
118
|
-
ValueError: If name is empty/too long
|
|
167
|
+
ValueError: If name is empty/too long, expires_days is invalid,
|
|
168
|
+
or role is unrecognized
|
|
119
169
|
"""
|
|
120
170
|
# Validate inputs
|
|
121
171
|
if not name or not name.strip():
|
|
@@ -124,9 +174,19 @@ def generate_token(
|
|
|
124
174
|
raise ValueError("Token name too long (max 255 characters)")
|
|
125
175
|
if expires_days is not None and expires_days <= 0:
|
|
126
176
|
raise ValueError("expires_days must be positive (or None for no expiration)")
|
|
177
|
+
if role is not None and role not in ROLES:
|
|
178
|
+
raise ValueError(
|
|
179
|
+
f"Unknown role '{role}'. Valid roles: {', '.join(ROLES.keys())}"
|
|
180
|
+
)
|
|
127
181
|
|
|
128
182
|
name = name.strip()
|
|
129
183
|
|
|
184
|
+
# Resolve scopes: role takes precedence if provided
|
|
185
|
+
if role is not None:
|
|
186
|
+
resolved_scopes = resolve_scopes(role)
|
|
187
|
+
else:
|
|
188
|
+
resolved_scopes = scopes
|
|
189
|
+
|
|
130
190
|
# Generate secure random token
|
|
131
191
|
raw_token = f"loki_{secrets.token_urlsafe(32)}"
|
|
132
192
|
token_hash, token_salt = _hash_token(raw_token)
|
|
@@ -150,12 +210,14 @@ def generate_token(
|
|
|
150
210
|
"name": name,
|
|
151
211
|
"hash": token_hash,
|
|
152
212
|
"salt": token_salt,
|
|
153
|
-
"scopes":
|
|
213
|
+
"scopes": resolved_scopes or ["*"],
|
|
154
214
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
155
215
|
"expires_at": expires_at,
|
|
156
216
|
"last_used": None,
|
|
157
217
|
"revoked": False,
|
|
158
218
|
}
|
|
219
|
+
if role is not None:
|
|
220
|
+
token_entry["role"] = role
|
|
159
221
|
|
|
160
222
|
tokens["tokens"][token_id] = token_entry
|
|
161
223
|
_save_tokens(tokens)
|
|
@@ -247,6 +309,8 @@ def list_tokens(include_revoked: bool = False) -> list[dict]:
|
|
|
247
309
|
"last_used": token.get("last_used"),
|
|
248
310
|
"revoked": token.get("revoked", False),
|
|
249
311
|
}
|
|
312
|
+
if "role" in token:
|
|
313
|
+
safe_token["role"] = token["role"]
|
|
250
314
|
result.append(safe_token)
|
|
251
315
|
|
|
252
316
|
return result
|
|
@@ -297,17 +361,32 @@ def validate_token(raw_token: str) -> Optional[dict]:
|
|
|
297
361
|
|
|
298
362
|
def has_scope(token_info: dict, required_scope: str) -> bool:
|
|
299
363
|
"""
|
|
300
|
-
Check if a token has a required scope.
|
|
364
|
+
Check if a token has a required scope, respecting scope hierarchy.
|
|
365
|
+
|
|
366
|
+
Hierarchy (higher scopes implicitly grant lower ones):
|
|
367
|
+
* -> control -> write -> read
|
|
368
|
+
* also grants audit, admin, and all other scopes
|
|
301
369
|
|
|
302
370
|
Args:
|
|
303
371
|
token_info: Token metadata from validate_token
|
|
304
372
|
required_scope: The scope to check
|
|
305
373
|
|
|
306
374
|
Returns:
|
|
307
|
-
True if token has the scope (or
|
|
375
|
+
True if token has the scope (directly or via hierarchy)
|
|
308
376
|
"""
|
|
309
377
|
scopes = token_info.get("scopes", [])
|
|
310
|
-
|
|
378
|
+
|
|
379
|
+
# Direct match
|
|
380
|
+
if required_scope in scopes:
|
|
381
|
+
return True
|
|
382
|
+
|
|
383
|
+
# Check hierarchy: does any held scope implicitly grant the required one?
|
|
384
|
+
for scope in scopes:
|
|
385
|
+
implied = _SCOPE_HIERARCHY.get(scope, set())
|
|
386
|
+
if required_scope in implied:
|
|
387
|
+
return True
|
|
388
|
+
|
|
389
|
+
return False
|
|
311
390
|
|
|
312
391
|
|
|
313
392
|
# ---------------------------------------------------------------------------
|
package/dashboard/server.py
CHANGED
|
@@ -26,6 +26,7 @@ from fastapi import (
|
|
|
26
26
|
WebSocketDisconnect,
|
|
27
27
|
)
|
|
28
28
|
from fastapi.middleware.cors import CORSMiddleware
|
|
29
|
+
from fastapi.responses import PlainTextResponse
|
|
29
30
|
from pydantic import BaseModel, Field
|
|
30
31
|
from sqlalchemy import select, update, delete
|
|
31
32
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
@@ -246,7 +247,13 @@ app = FastAPI(
|
|
|
246
247
|
# Add CORS middleware - restricted to localhost by default.
|
|
247
248
|
# Set LOKI_DASHBOARD_CORS to override (comma-separated origins).
|
|
248
249
|
_cors_default = "http://localhost:57374,http://127.0.0.1:57374"
|
|
249
|
-
|
|
250
|
+
_cors_raw = os.environ.get("LOKI_DASHBOARD_CORS", _cors_default)
|
|
251
|
+
if _cors_raw.strip() == "*":
|
|
252
|
+
logger.warning(
|
|
253
|
+
"LOKI_DASHBOARD_CORS is set to '*' -- all origins are allowed. "
|
|
254
|
+
"This is insecure for production deployments."
|
|
255
|
+
)
|
|
256
|
+
_cors_origins = _cors_raw.split(",")
|
|
250
257
|
app.add_middleware(
|
|
251
258
|
CORSMiddleware,
|
|
252
259
|
allow_origins=[o.strip() for o in _cors_origins if o.strip()],
|
|
@@ -827,7 +834,36 @@ async def move_task(
|
|
|
827
834
|
# WebSocket endpoint
|
|
828
835
|
@app.websocket("/ws")
|
|
829
836
|
async def websocket_endpoint(websocket: WebSocket) -> None:
|
|
830
|
-
"""WebSocket endpoint for real-time updates.
|
|
837
|
+
"""WebSocket endpoint for real-time updates.
|
|
838
|
+
|
|
839
|
+
When enterprise auth or OIDC is enabled, a valid token must be passed
|
|
840
|
+
as a query parameter: ``/ws?token=loki_xxx`` (or a JWT for OIDC).
|
|
841
|
+
Browsers cannot send Authorization headers on WebSocket upgrade
|
|
842
|
+
requests, so query-parameter auth is the standard approach.
|
|
843
|
+
"""
|
|
844
|
+
# --- WebSocket authentication gate ---
|
|
845
|
+
# NOTE: Query-parameter auth is used because browsers cannot send
|
|
846
|
+
# Authorization headers on WS upgrade. Tokens may appear in reverse
|
|
847
|
+
# proxy access logs -- configure log sanitization for /ws in production.
|
|
848
|
+
# FastAPI Depends() is not supported on @app.websocket() routes.
|
|
849
|
+
if auth.is_enterprise_mode() or auth.is_oidc_mode():
|
|
850
|
+
ws_token: Optional[str] = websocket.query_params.get("token")
|
|
851
|
+
if not ws_token:
|
|
852
|
+
await websocket.close(code=1008) # Policy Violation
|
|
853
|
+
return
|
|
854
|
+
|
|
855
|
+
token_info: Optional[dict] = None
|
|
856
|
+
# Try OIDC first for JWT-style tokens
|
|
857
|
+
if auth.is_oidc_mode() and not ws_token.startswith("loki_"):
|
|
858
|
+
token_info = auth.validate_oidc_token(ws_token)
|
|
859
|
+
# Fall back to enterprise token auth
|
|
860
|
+
if token_info is None and auth.is_enterprise_mode():
|
|
861
|
+
token_info = auth.validate_token(ws_token)
|
|
862
|
+
|
|
863
|
+
if token_info is None:
|
|
864
|
+
await websocket.close(code=1008) # Policy Violation
|
|
865
|
+
return
|
|
866
|
+
|
|
831
867
|
await manager.connect(websocket)
|
|
832
868
|
try:
|
|
833
869
|
# Send initial connection confirmation
|
|
@@ -1043,7 +1079,7 @@ async def get_auth_info():
|
|
|
1043
1079
|
class TokenCreateRequest(BaseModel):
|
|
1044
1080
|
"""Schema for creating a token."""
|
|
1045
1081
|
name: str = Field(..., min_length=1, max_length=255, description="Human-readable token name")
|
|
1046
|
-
scopes: Optional[
|
|
1082
|
+
scopes: Optional[Any] = Field(None, description="Permission scopes (default: ['*'] for all)") # list[str], Any for Python 3.8
|
|
1047
1083
|
expires_days: Optional[int] = Field(None, gt=0, description="Days until expiration (must be positive)")
|
|
1048
1084
|
|
|
1049
1085
|
|
|
@@ -1051,7 +1087,7 @@ class TokenResponse(BaseModel):
|
|
|
1051
1087
|
"""Schema for token response."""
|
|
1052
1088
|
id: str
|
|
1053
1089
|
name: str
|
|
1054
|
-
scopes: list[str]
|
|
1090
|
+
scopes: Any # list[str], Any for Python 3.8
|
|
1055
1091
|
created_at: str
|
|
1056
1092
|
expires_at: Optional[str]
|
|
1057
1093
|
last_used: Optional[str]
|
|
@@ -2584,6 +2620,186 @@ async def get_process_health(token: Optional[dict] = Depends(auth.get_current_to
|
|
|
2584
2620
|
return result
|
|
2585
2621
|
|
|
2586
2622
|
|
|
2623
|
+
# =============================================================================
|
|
2624
|
+
# Prometheus / OpenMetrics Endpoint
|
|
2625
|
+
# =============================================================================
|
|
2626
|
+
|
|
2627
|
+
|
|
2628
|
+
def _build_metrics_text() -> str:
|
|
2629
|
+
"""Build Prometheus/OpenMetrics format metrics text from .loki/ flat files."""
|
|
2630
|
+
lines = [] # type: list[str] -- comment-style for Python 3.8
|
|
2631
|
+
loki_dir = _get_loki_dir()
|
|
2632
|
+
|
|
2633
|
+
# Validate LOKI_DIR exists before attempting to read metrics
|
|
2634
|
+
if not loki_dir.is_dir():
|
|
2635
|
+
return "# loki_up 0\n"
|
|
2636
|
+
|
|
2637
|
+
# -- Read dashboard-state.json (primary data source) ----------------------
|
|
2638
|
+
state: dict = {}
|
|
2639
|
+
state_file = loki_dir / "dashboard-state.json"
|
|
2640
|
+
if state_file.exists():
|
|
2641
|
+
try:
|
|
2642
|
+
state = json.loads(state_file.read_text())
|
|
2643
|
+
except (json.JSONDecodeError, OSError):
|
|
2644
|
+
pass
|
|
2645
|
+
|
|
2646
|
+
# 1. loki_session_status (gauge) ------------------------------------------
|
|
2647
|
+
mode = state.get("mode", "")
|
|
2648
|
+
status_val = 0 # stopped
|
|
2649
|
+
if mode == "paused":
|
|
2650
|
+
status_val = 2
|
|
2651
|
+
elif mode in ("autonomous", "running"):
|
|
2652
|
+
status_val = 1
|
|
2653
|
+
else:
|
|
2654
|
+
# Also check PID file
|
|
2655
|
+
pid_file = loki_dir / "loki.pid"
|
|
2656
|
+
if pid_file.exists():
|
|
2657
|
+
try:
|
|
2658
|
+
pid = int(pid_file.read_text().strip())
|
|
2659
|
+
os.kill(pid, 0)
|
|
2660
|
+
status_val = 1
|
|
2661
|
+
except (ValueError, OSError, ProcessLookupError):
|
|
2662
|
+
pass
|
|
2663
|
+
|
|
2664
|
+
lines.append("# HELP loki_session_status Current session status (0=stopped, 1=running, 2=paused)")
|
|
2665
|
+
lines.append("# TYPE loki_session_status gauge")
|
|
2666
|
+
lines.append(f"loki_session_status {status_val}")
|
|
2667
|
+
lines.append("")
|
|
2668
|
+
|
|
2669
|
+
# 2. loki_iteration_current (gauge) ---------------------------------------
|
|
2670
|
+
iteration = state.get("iteration", 0)
|
|
2671
|
+
lines.append("# HELP loki_iteration_current Current iteration number")
|
|
2672
|
+
lines.append("# TYPE loki_iteration_current gauge")
|
|
2673
|
+
lines.append(f"loki_iteration_current {iteration}")
|
|
2674
|
+
lines.append("")
|
|
2675
|
+
|
|
2676
|
+
# 3. loki_iteration_max (gauge) -------------------------------------------
|
|
2677
|
+
max_iterations = int(os.environ.get("LOKI_MAX_ITERATIONS", "1000"))
|
|
2678
|
+
lines.append("# HELP loki_iteration_max Maximum configured iterations")
|
|
2679
|
+
lines.append("# TYPE loki_iteration_max gauge")
|
|
2680
|
+
lines.append(f"loki_iteration_max {max_iterations}")
|
|
2681
|
+
lines.append("")
|
|
2682
|
+
|
|
2683
|
+
# 4. loki_tasks_total (gauge, label: status) ------------------------------
|
|
2684
|
+
tasks = state.get("tasks", {})
|
|
2685
|
+
pending_count = len(tasks.get("pending", []))
|
|
2686
|
+
in_progress_count = len(tasks.get("inProgress", []))
|
|
2687
|
+
completed_count = len(tasks.get("completed", []))
|
|
2688
|
+
failed_count = len(tasks.get("failed", []))
|
|
2689
|
+
|
|
2690
|
+
lines.append("# HELP loki_tasks_total Number of tasks by status")
|
|
2691
|
+
lines.append("# TYPE loki_tasks_total gauge")
|
|
2692
|
+
lines.append(f'loki_tasks_total{{status="pending"}} {pending_count}')
|
|
2693
|
+
lines.append(f'loki_tasks_total{{status="in_progress"}} {in_progress_count}')
|
|
2694
|
+
lines.append(f'loki_tasks_total{{status="completed"}} {completed_count}')
|
|
2695
|
+
lines.append(f'loki_tasks_total{{status="failed"}} {failed_count}')
|
|
2696
|
+
lines.append("")
|
|
2697
|
+
|
|
2698
|
+
# 5. loki_agents_active (gauge) -------------------------------------------
|
|
2699
|
+
# 6. loki_agents_total (counter) ------------------------------------------
|
|
2700
|
+
agents_active = 0
|
|
2701
|
+
agents_total = 0
|
|
2702
|
+
agents_file = loki_dir / "state" / "agents.json"
|
|
2703
|
+
if agents_file.exists():
|
|
2704
|
+
try:
|
|
2705
|
+
agents_data = json.loads(agents_file.read_text())
|
|
2706
|
+
if isinstance(agents_data, list):
|
|
2707
|
+
agents_total = len(agents_data)
|
|
2708
|
+
agents_active = sum(
|
|
2709
|
+
1 for a in agents_data
|
|
2710
|
+
if isinstance(a, dict) and a.get("status") == "active"
|
|
2711
|
+
)
|
|
2712
|
+
except (json.JSONDecodeError, OSError):
|
|
2713
|
+
pass
|
|
2714
|
+
|
|
2715
|
+
# Fallback to dashboard-state.json agents
|
|
2716
|
+
if agents_total == 0:
|
|
2717
|
+
state_agents = state.get("agents", [])
|
|
2718
|
+
if isinstance(state_agents, list):
|
|
2719
|
+
agents_total = len(state_agents)
|
|
2720
|
+
agents_active = sum(
|
|
2721
|
+
1 for a in state_agents
|
|
2722
|
+
if isinstance(a, dict) and a.get("status") == "active"
|
|
2723
|
+
)
|
|
2724
|
+
|
|
2725
|
+
lines.append("# HELP loki_agents_active Number of currently active agents")
|
|
2726
|
+
lines.append("# TYPE loki_agents_active gauge")
|
|
2727
|
+
lines.append(f"loki_agents_active {agents_active}")
|
|
2728
|
+
lines.append("")
|
|
2729
|
+
|
|
2730
|
+
lines.append("# HELP loki_agents_total Total number of agents registered")
|
|
2731
|
+
lines.append("# TYPE loki_agents_total gauge")
|
|
2732
|
+
lines.append(f"loki_agents_total {agents_total}")
|
|
2733
|
+
lines.append("")
|
|
2734
|
+
|
|
2735
|
+
# 7. loki_cost_usd (gauge) ------------------------------------------------
|
|
2736
|
+
estimated_cost = 0.0
|
|
2737
|
+
efficiency_dir = loki_dir / "metrics" / "efficiency"
|
|
2738
|
+
if efficiency_dir.exists():
|
|
2739
|
+
try:
|
|
2740
|
+
for eff_file in efficiency_dir.glob("*.json"):
|
|
2741
|
+
try:
|
|
2742
|
+
data = json.loads(eff_file.read_text())
|
|
2743
|
+
cost = data.get("cost_usd")
|
|
2744
|
+
if cost is not None:
|
|
2745
|
+
estimated_cost += float(cost)
|
|
2746
|
+
else:
|
|
2747
|
+
inp = data.get("input_tokens", 0)
|
|
2748
|
+
out = data.get("output_tokens", 0)
|
|
2749
|
+
estimated_cost += _calculate_model_cost(
|
|
2750
|
+
data.get("model", "sonnet").lower(), inp, out
|
|
2751
|
+
)
|
|
2752
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
2753
|
+
pass
|
|
2754
|
+
except OSError:
|
|
2755
|
+
pass
|
|
2756
|
+
|
|
2757
|
+
lines.append("# HELP loki_cost_usd Estimated total cost in USD")
|
|
2758
|
+
lines.append("# TYPE loki_cost_usd gauge")
|
|
2759
|
+
lines.append(f"loki_cost_usd {round(estimated_cost, 6)}")
|
|
2760
|
+
lines.append("")
|
|
2761
|
+
|
|
2762
|
+
# 8. loki_events_total (counter) ------------------------------------------
|
|
2763
|
+
events_count = 0
|
|
2764
|
+
events_file = loki_dir / "events.jsonl"
|
|
2765
|
+
if events_file.exists():
|
|
2766
|
+
try:
|
|
2767
|
+
content = events_file.read_text()
|
|
2768
|
+
events_count = sum(1 for line in content.strip().split("\n") if line.strip())
|
|
2769
|
+
except OSError:
|
|
2770
|
+
pass
|
|
2771
|
+
|
|
2772
|
+
lines.append("# HELP loki_events_total Total number of events recorded")
|
|
2773
|
+
lines.append("# TYPE loki_events_total counter")
|
|
2774
|
+
lines.append(f"loki_events_total {events_count}")
|
|
2775
|
+
lines.append("")
|
|
2776
|
+
|
|
2777
|
+
# 9. loki_uptime_seconds (gauge) ------------------------------------------
|
|
2778
|
+
uptime_seconds = 0.0
|
|
2779
|
+
started_at = state.get("startedAt", "")
|
|
2780
|
+
if started_at:
|
|
2781
|
+
try:
|
|
2782
|
+
start_dt = datetime.fromisoformat(started_at.replace("Z", "+00:00"))
|
|
2783
|
+
uptime_seconds = (datetime.now(timezone.utc) - start_dt).total_seconds()
|
|
2784
|
+
if uptime_seconds < 0:
|
|
2785
|
+
uptime_seconds = 0.0
|
|
2786
|
+
except (ValueError, TypeError):
|
|
2787
|
+
pass
|
|
2788
|
+
|
|
2789
|
+
lines.append("# HELP loki_uptime_seconds Seconds since session started")
|
|
2790
|
+
lines.append("# TYPE loki_uptime_seconds gauge")
|
|
2791
|
+
lines.append(f"loki_uptime_seconds {round(uptime_seconds, 1)}")
|
|
2792
|
+
lines.append("")
|
|
2793
|
+
|
|
2794
|
+
return "\n".join(lines) + "\n"
|
|
2795
|
+
|
|
2796
|
+
|
|
2797
|
+
@app.get("/metrics", response_class=PlainTextResponse)
|
|
2798
|
+
async def prometheus_metrics():
|
|
2799
|
+
"""Prometheus/OpenMetrics compatible metrics endpoint."""
|
|
2800
|
+
return _build_metrics_text()
|
|
2801
|
+
|
|
2802
|
+
|
|
2587
2803
|
# =============================================================================
|
|
2588
2804
|
# Static File Serving (Production/Docker)
|
|
2589
2805
|
# =============================================================================
|
package/docs/INSTALLATION.md
CHANGED
package/package.json
CHANGED
package/providers/gemini.sh
CHANGED
|
@@ -49,7 +49,7 @@ PROVIDER_MAX_PARALLEL=1
|
|
|
49
49
|
|
|
50
50
|
# Model Configuration
|
|
51
51
|
# Gemini CLI supports --model flag to specify model
|
|
52
|
-
# Primary: gemini-3-pro-preview (
|
|
52
|
+
# Primary: gemini-3-pro-preview (preview names - may change when GA is released)
|
|
53
53
|
# Fallback: gemini-3-flash-preview (for rate limit scenarios)
|
|
54
54
|
PROVIDER_MODEL="gemini-3-pro-preview"
|
|
55
55
|
PROVIDER_MODEL_FALLBACK="gemini-3-flash-preview"
|
|
@@ -69,7 +69,9 @@ PROVIDER_TASK_MODEL_VALUES=()
|
|
|
69
69
|
# Context and Limits
|
|
70
70
|
PROVIDER_CONTEXT_WINDOW=1000000 # Gemini 3 has 1M context
|
|
71
71
|
PROVIDER_MAX_OUTPUT_TOKENS=65536
|
|
72
|
-
|
|
72
|
+
# Rate limit varies by tier: Free=5-15 RPM, Tier1=150+ RPM, Tier2=500+ RPM
|
|
73
|
+
# Default to conservative free-tier value; override with LOKI_GEMINI_RPM env var
|
|
74
|
+
PROVIDER_RATE_LIMIT_RPM="${LOKI_GEMINI_RPM:-15}"
|
|
73
75
|
|
|
74
76
|
# Cost (USD per 1K tokens, approximate for Gemini 3 Pro)
|
|
75
77
|
PROVIDER_COST_INPUT_PLANNING=0.00125
|