agent-control-plane 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +305 -7
- package/hooks/pr-reconcile-hooks.sh +12 -1
- package/package.json +13 -9
- package/tools/bin/adapter-capabilities.sh +84 -0
- package/tools/bin/adapter-interface.sh +97 -0
- package/tools/bin/claude-adapter.sh +73 -0
- package/tools/bin/codex-adapter.sh +123 -0
- package/tools/bin/flow-runtime-doctor.sh +67 -0
- package/tools/bin/heartbeat-safe-auto.sh +161 -0
- package/tools/bin/kilo-adapter.sh +108 -0
- package/tools/bin/ollama-adapter.sh +160 -0
- package/tools/bin/openclaw-adapter.sh +69 -0
- package/tools/bin/opencode-adapter.sh +98 -0
- package/tools/bin/pi-adapter.sh +95 -0
- package/tools/bin/render-flow-config.sh +98 -0
- package/tools/bin/run-with-adapter.sh +34 -0
- package/tools/bin/sync-shared-agent-home.sh +23 -0
- package/tools/dashboard/__pycache__/server.cpython-311.pyc +0 -0
- package/tools/dashboard/app-v2.js +1120 -0
- package/tools/dashboard/app.js +129 -38
- package/tools/dashboard/index-inline.html +1533 -0
- package/tools/dashboard/index-v2.html +45 -0
- package/tools/dashboard/server.py +64 -15
- package/tools/dashboard/styles.css +595 -521
- package/tools/bin/profile-activate.sh +0 -109
- package/tools/bin/profile-adopt.sh +0 -225
- package/tools/bin/profile-smoke.sh +0 -461
- package/tools/bin/test-smoke.sh +0 -119
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# codex-adapter.sh
|
|
3
|
+
# Adapter implementation for Codex (Claude CLI)
|
|
4
|
+
# Implements: adapter-interface.sh
|
|
5
|
+
|
|
6
|
+
set -euo pipefail
|
|
7
|
+
|
|
8
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
9
|
+
source "${SCRIPT_DIR}/adapter-interface.sh"
|
|
10
|
+
source "${SCRIPT_DIR}/adapter-capabilities.sh"
|
|
11
|
+
|
|
12
|
+
# Codex adapter metadata
|
|
13
|
+
ADAPTER_ID="codex"
|
|
14
|
+
ADAPTER_NAME="Codex (Claude CLI)"
|
|
15
|
+
ADAPTER_TYPE="cloud-api"
|
|
16
|
+
ADAPTER_VERSION="1.0.0"
|
|
17
|
+
ADAPTER_MODEL="${CODEX_MODEL:-sonnet}"
|
|
18
|
+
ADAPTER_BASE_URL=""
|
|
19
|
+
|
|
20
|
+
# Codex capabilities
|
|
21
|
+
ADAPTER_CAP_CLOUD_API=true
|
|
22
|
+
ADAPTER_CAP_STREAMING=true
|
|
23
|
+
ADAPTER_CAP_JSON_OUTPUT=true
|
|
24
|
+
ADAPTER_CAP_MAX_TIMEOUT=900
|
|
25
|
+
|
|
26
|
+
# Print adapter info
|
|
27
|
+
adapter_info() {
|
|
28
|
+
cat <<EOF
|
|
29
|
+
id=${ADAPTER_ID}
|
|
30
|
+
name=${ADAPTER_NAME}
|
|
31
|
+
type=${ADAPTER_TYPE}
|
|
32
|
+
version=${ADAPTER_VERSION}
|
|
33
|
+
model=${ADAPTER_MODEL}
|
|
34
|
+
base_url=${ADAPTER_BASE_URL}
|
|
35
|
+
EOF
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# Health check: verify claude CLI is available
|
|
39
|
+
adapter_health_check() {
|
|
40
|
+
if ! command -v claude >/dev/null 2>&1; then
|
|
41
|
+
echo "ERROR: claude CLI not found in PATH"
|
|
42
|
+
return 1
|
|
43
|
+
fi
|
|
44
|
+
|
|
45
|
+
# Check if API key is set
|
|
46
|
+
if [[ -z "${ANTHROPIC_API_KEY:-}" && -z "${OPENROUTER_API_KEY:-}" ]]; then
|
|
47
|
+
echo "WARN: No ANTHROPIC_API_KEY or OPENROUTER_API_KEY found"
|
|
48
|
+
# Don't fail - user might use OAuth
|
|
49
|
+
fi
|
|
50
|
+
|
|
51
|
+
echo "OK: Codex adapter healthy (claude CLI available)"
|
|
52
|
+
return 0
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Run a task using codex (claude CLI)
|
|
56
|
+
adapter_run() {
|
|
57
|
+
local mode="${1:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
58
|
+
local session="${2:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
59
|
+
local worktree="${3:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
60
|
+
local prompt_file="${4:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
61
|
+
|
|
62
|
+
local permission_mode="${CLAUDE_PERMISSION_MODE:-acceptEdits}"
|
|
63
|
+
local timeout_seconds="${CLAUDE_TIMEOUT_SECONDS:-900}"
|
|
64
|
+
local max_attempts="${CLAUDE_MAX_ATTEMPTS:-3}"
|
|
65
|
+
|
|
66
|
+
echo "Codex adapter: Running session ${session} with model ${ADAPTER_MODEL}"
|
|
67
|
+
|
|
68
|
+
# Read the prompt
|
|
69
|
+
local prompt
|
|
70
|
+
prompt="$(cat "${prompt_file}")"
|
|
71
|
+
|
|
72
|
+
# Change to worktree
|
|
73
|
+
cd "${worktree}" || return 1
|
|
74
|
+
|
|
75
|
+
# Run claude with the prompt
|
|
76
|
+
if ! timeout "${timeout_seconds}" claude \
|
|
77
|
+
--permission-mode "${permission_mode}" \
|
|
78
|
+
--model "${ADAPTER_MODEL}" \
|
|
79
|
+
--print \
|
|
80
|
+
"${prompt}" 2>&1; then
|
|
81
|
+
echo "ERROR: Codex run failed or timed out after ${timeout_seconds}s"
|
|
82
|
+
return 1
|
|
83
|
+
fi
|
|
84
|
+
|
|
85
|
+
echo "Codex adapter: Session ${session} completed"
|
|
86
|
+
return 0
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# Status check
|
|
90
|
+
adapter_status() {
|
|
91
|
+
local runs_root="${1:?usage: adapter_status RUNS_ROOT SESSION}"
|
|
92
|
+
local session="${2:?usage: adapter_status RUNS_ROOT SESSION}"
|
|
93
|
+
local run_dir="${runs_root}/${session}"
|
|
94
|
+
|
|
95
|
+
if [[ ! -d "$run_dir" ]]; then
|
|
96
|
+
echo "NOT_FOUND"
|
|
97
|
+
return 1
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
# Check for result file
|
|
101
|
+
if [[ -f "$run_dir/result.env" ]]; then
|
|
102
|
+
source "$run_dir/result.env"
|
|
103
|
+
echo "${OUTCOME:-UNKNOWN}"
|
|
104
|
+
return 0
|
|
105
|
+
fi
|
|
106
|
+
|
|
107
|
+
# Check if claude process is running
|
|
108
|
+
if pgrep -f "claude.*${session}" >/dev/null 2>&1; then
|
|
109
|
+
echo "RUNNING"
|
|
110
|
+
return 0
|
|
111
|
+
fi
|
|
112
|
+
|
|
113
|
+
echo "UNKNOWN"
|
|
114
|
+
return 0
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
# Self-register: validate this adapter implements required functions
|
|
118
|
+
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|
119
|
+
# Running directly - print info
|
|
120
|
+
adapter_info
|
|
121
|
+
echo "---"
|
|
122
|
+
adapter_health_check
|
|
123
|
+
fi
|
|
@@ -86,9 +86,47 @@ printf 'RUNTIME_COMPAT_SKILL_DIR=%s\n' "${RUNTIME_COMPAT_SKILL_DIR}"
|
|
|
86
86
|
printf 'RUNTIME_COMPAT_EXISTS=%s\n' "${runtime_compat_exists}"
|
|
87
87
|
printf 'WORKFLOW_CATALOG=%s\n' "${CATALOG_FILE}"
|
|
88
88
|
printf 'WORKFLOW_CATALOG_EXISTS=%s\n' "${catalog_exists}"
|
|
89
|
+
# Check timeout command (needed for scheduler cross-platform)
|
|
90
|
+
if command -v timeout &>/dev/null; then
|
|
91
|
+
printf 'TIMEOUT_CMD=%s\n' "timeout"
|
|
92
|
+
elif command -v gtimeout &>/dev/null; then
|
|
93
|
+
printf 'TIMEOUT_CMD=%s\n' "gtimeout (from coreutils)"
|
|
94
|
+
else
|
|
95
|
+
printf 'TIMEOUT_CMD=%s\n' "missing (install coreutils for timeout command)"
|
|
96
|
+
fi
|
|
89
97
|
printf 'DOCTOR_STATUS=%s\n' "${status}"
|
|
90
98
|
|
|
99
|
+
# Provide clear next steps based on state
|
|
100
|
+
printf '\n=== NEXT STEPS ===\n'
|
|
101
|
+
if [[ "${status}" == "ok" ]]; then
|
|
102
|
+
printf '✓ All checks passed! No action required.\n'
|
|
103
|
+
printf 'Run ACP: bash %s/tools/bin/setup.sh --profile-id <id>\n' "${FLOW_SKILL_DIR}"
|
|
104
|
+
elif [[ "${status}" == "needs-sync" ]]; then
|
|
105
|
+
printf 'Status: NEEDS-SYNC\n'
|
|
106
|
+
printf 'Run sync to fix issues:\n'
|
|
107
|
+
printf ' bash %q %q %q\n' "${SYNC_SCRIPT}" "${SHARED_AGENT_HOME}" "${RUNTIME_HOME}"
|
|
108
|
+
printf '\nOr run setup with resume:\n'
|
|
109
|
+
printf ' bash %s/tools/bin/setup.sh --resume\n' "${FLOW_SKILL_DIR}"
|
|
110
|
+
if [[ -n "${PROFILE_SELECTION_HINT}" ]]; then
|
|
111
|
+
printf '\nProfile selection hint: %s\n' "${PROFILE_SELECTION_HINT}"
|
|
112
|
+
fi
|
|
113
|
+
else
|
|
114
|
+
printf 'Status: %s\n' "${status}"
|
|
115
|
+
printf 'Check the output above for details.\n'
|
|
116
|
+
fi
|
|
117
|
+
|
|
118
|
+
# Cross-platform tips
|
|
119
|
+
if [[ "${TIMEOUT_CMD}" == *"missing"* ]]; then
|
|
120
|
+
printf '\n⚠ Cross-Platform Tip: Install coreutils for timeout command:\n'
|
|
121
|
+
if [[ "$(uname -s)" == "Darwin" ]]; then
|
|
122
|
+
printf ' macOS: brew install coreutils\n'
|
|
123
|
+
else
|
|
124
|
+
printf ' Linux: sudo apt-get install coreutils (usually pre-installed)\n'
|
|
125
|
+
fi
|
|
126
|
+
fi
|
|
127
|
+
|
|
91
128
|
if [[ -n "${PROFILE_SELECTION_HINT}" ]]; then
|
|
129
|
+
printf '\n=== PROFILE SELECTION ===\n'
|
|
92
130
|
printf 'PROFILE_SELECTION_NEXT_STEP=ACP_PROJECT_ID=<id> bash %s/tools/bin/render-flow-config.sh\n' "${FLOW_SKILL_DIR}"
|
|
93
131
|
fi
|
|
94
132
|
|
|
@@ -99,3 +137,32 @@ if [[ "${status}" != "ok" ]]; then
|
|
|
99
137
|
printf ' bash %q %q %q\n' "${SYNC_SCRIPT}" "${SHARED_AGENT_HOME}" "${RUNTIME_HOME}"
|
|
100
138
|
printf '\nOr run: bash %s/tools/bin/setup.sh --resume\n' "${FLOW_SKILL_DIR}"
|
|
101
139
|
fi
|
|
140
|
+
|
|
141
|
+
# Cross-Platform Dependencies Check
|
|
142
|
+
printf '\n=== CROSS-PLATFORM DEPENDENCIES ===\n'
|
|
143
|
+
for cmd in rsync git python3 jq curl; do
|
|
144
|
+
if command -v "$cmd" &>/dev/null; then
|
|
145
|
+
printf '✓ %s: available\n' "$cmd"
|
|
146
|
+
else
|
|
147
|
+
printf '✗ %s: MISSING\n' "$cmd"
|
|
148
|
+
case "$cmd" in
|
|
149
|
+
rsync)
|
|
150
|
+
if [[ "$(uname -s)" == "Darwin" ]]; then
|
|
151
|
+
printf ' macOS: brew install rsync\n'
|
|
152
|
+
else
|
|
153
|
+
printf ' Linux: sudo apt-get install rsync\n'
|
|
154
|
+
fi
|
|
155
|
+
;;
|
|
156
|
+
python3)
|
|
157
|
+
printf ' Install Python 3 from https://python.org\n'
|
|
158
|
+
;;
|
|
159
|
+
jq|curl)
|
|
160
|
+
if [[ "$(uname -s)" == "Darwin" ]]; then
|
|
161
|
+
printf ' macOS: brew install %s\n' "$cmd"
|
|
162
|
+
else
|
|
163
|
+
printf ' Linux: sudo apt-get install %s\n' "$cmd"
|
|
164
|
+
fi
|
|
165
|
+
;;
|
|
166
|
+
esac
|
|
167
|
+
fi
|
|
168
|
+
done
|
|
@@ -75,8 +75,167 @@ QUOTA_LOCK_DIR="${STATE_ROOT}/quota-preflight.lock"
|
|
|
75
75
|
QUOTA_PID_FILE="${QUOTA_LOCK_DIR}/pid"
|
|
76
76
|
python_bin="$(flow_resolve_python_bin || true)"
|
|
77
77
|
|
|
78
|
+
# Stale lock detection and cleanup
|
|
79
|
+
cleanup_stale_locks() {
|
|
80
|
+
local lock_dir pid_file pid max_age_seconds=${1:-1800} # default 30 minutes
|
|
81
|
+
local lock_dirs=(
|
|
82
|
+
"${STATE_ROOT}/heartbeat-loop.lock"
|
|
83
|
+
"${STATE_ROOT}/quota-preflight.lock"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
for lock_dir in "${lock_dirs[@]}"; do
|
|
87
|
+
pid_file="${lock_dir}/pid"
|
|
88
|
+
if [[ -f "$pid_file" ]]; then
|
|
89
|
+
pid=$(cat "$pid_file" 2>/dev/null | tr -d '[:space:]')
|
|
90
|
+
if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
|
|
91
|
+
# Process is still running, check if parent is init (orphan)
|
|
92
|
+
local ppid
|
|
93
|
+
ppid=$(ps -p "$pid" -o ppid= 2>/dev/null | tr -d '[:space:]')
|
|
94
|
+
if [[ "$ppid" == "1" ]]; then
|
|
95
|
+
log_event "stale_lock_detected" "type" "orphan" "pid" "$pid" "lock_dir" "$lock_dir"
|
|
96
|
+
echo "Warning: Removing orphan lock (PID $pid, lock: $lock_dir)"
|
|
97
|
+
rm -rf "$lock_dir"
|
|
98
|
+
fi
|
|
99
|
+
else
|
|
100
|
+
# Process not running, check lock age
|
|
101
|
+
local lock_age
|
|
102
|
+
lock_age=$(($(date +%s) - $(stat -f %m "$pid_file" 2>/dev/null || stat -c %Y "$pid_file" 2>/dev/null || echo "0")))
|
|
103
|
+
if [[ $lock_age -gt $max_age_seconds ]]; then
|
|
104
|
+
log_event "stale_lock_detected" "type" "timeout" "pid" "$pid" "age_seconds" "$lock_age" "lock_dir" "$lock_dir"
|
|
105
|
+
echo "Warning: Removing stale lock (PID $pid, age: ${lock_age}s, lock: $lock_dir)"
|
|
106
|
+
rm -rf "$lock_dir"
|
|
107
|
+
fi
|
|
108
|
+
fi
|
|
109
|
+
fi
|
|
110
|
+
done
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# Structured logging for scheduler observability
|
|
114
|
+
LOG_FILE="${STATE_ROOT}/scheduler-events.jsonl"
|
|
115
|
+
mkdir -p "$(dirname "${LOG_FILE}")"
|
|
116
|
+
|
|
117
|
+
log_event() {
|
|
118
|
+
local event_type="$1"
|
|
119
|
+
shift
|
|
120
|
+
local timestamp
|
|
121
|
+
timestamp="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
122
|
+
local extra_fields=""
|
|
123
|
+
while [[ $# -gt 0 ]]; do
|
|
124
|
+
extra_fields="${extra_fields}, \"$1\": \"$2\""
|
|
125
|
+
shift 2
|
|
126
|
+
done
|
|
127
|
+
echo "{\"timestamp\": \"${timestamp}\", \"event\": \"${event_type}\", \"pid\": ${$}${extra_fields}}" >> "${LOG_FILE}"
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
# Health check: monitor system resources
|
|
131
|
+
check_system_resources() {
|
|
132
|
+
local cpu_usage mem_usage disk_usage
|
|
133
|
+
local warn=0
|
|
134
|
+
|
|
135
|
+
# CPU usage (1-min load average / number of cores)
|
|
136
|
+
if command -v nproc >/dev/null 2>&1 && command -v awk >/dev/null 2>&1; then
|
|
137
|
+
local load_1min disk_avail disk_total
|
|
138
|
+
load_1min=$(cat /proc/loadavg 2>/dev/null | awk '{print $1}' || echo "0")
|
|
139
|
+
local cores
|
|
140
|
+
cores=$(nproc 2>/dev/null || echo "1")
|
|
141
|
+
cpu_usage=$(echo "$load_1min $cores" | awk '{printf "%.0f", ($1/$2)*100}' 2>/dev/null || echo "0")
|
|
142
|
+
fi
|
|
143
|
+
|
|
144
|
+
# Memory usage
|
|
145
|
+
if command -v free >/dev/null 2>&1; then
|
|
146
|
+
mem_usage=$(free | awk '/Mem:/ {printf "%.0f", ($3/$2)*100}' 2>/dev/null || echo "0")
|
|
147
|
+
elif [[ -f /proc/meminfo ]]; then
|
|
148
|
+
local mem_total mem_available
|
|
149
|
+
mem_total=$(grep MemTotal /proc/meminfo 2>/dev/null | awk '{print $2}' || echo "1")
|
|
150
|
+
mem_available=$(grep MemAvailable /proc/meminfo 2>/dev/null | awk '{print $2}' || echo "0")
|
|
151
|
+
mem_usage=$(echo "$mem_total $mem_available" | awk '{printf "%.0f", (($1-$2)/$1)*100}' 2>/dev/null || echo "0")
|
|
152
|
+
fi
|
|
153
|
+
|
|
154
|
+
# Disk usage for STATE_ROOT
|
|
155
|
+
disk_usage=$(df "${STATE_ROOT}" 2>/dev/null | awk 'NR==2 {gsub(/%/,"",$5); print $5}' || echo "0")
|
|
156
|
+
|
|
157
|
+
# Log resource status
|
|
158
|
+
log_event "system_resources" "cpu_pct" "${cpu_usage:-0}" "mem_pct" "${mem_usage:-0}" "disk_pct" "${disk_usage:-0}"
|
|
159
|
+
|
|
160
|
+
# Warnings
|
|
161
|
+
if [[ "${cpu_usage:-0}" -gt 80 ]]; then
|
|
162
|
+
log_event "resource_warning" "type" "cpu" "value" "${cpu_usage}"
|
|
163
|
+
warn=1
|
|
164
|
+
fi
|
|
165
|
+
if [[ "${mem_usage:-0}" -gt 90 ]]; then
|
|
166
|
+
log_event "resource_warning" "type" "memory" "value" "${mem_usage}"
|
|
167
|
+
warn=1
|
|
168
|
+
fi
|
|
169
|
+
if [[ "${disk_usage:-0}" -gt 90 ]]; then
|
|
170
|
+
log_event "resource_warning" "type" "disk" "value" "${disk_usage}"
|
|
171
|
+
warn=1
|
|
172
|
+
fi
|
|
173
|
+
|
|
174
|
+
return $warn
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
# Collect scheduler metrics for observability
|
|
178
|
+
collect_metrics() {
|
|
179
|
+
local active_sessions=0
|
|
180
|
+
local queued_issues=0
|
|
181
|
+
local completed_today=0
|
|
182
|
+
local failed_today=0
|
|
183
|
+
|
|
184
|
+
# Count active tmux sessions for this repo
|
|
185
|
+
if command -v tmux >/dev/null 2>&1; then
|
|
186
|
+
active_sessions=$(tmux ls 2>/dev/null | grep -c "agent-" || echo "0")
|
|
187
|
+
fi
|
|
188
|
+
|
|
189
|
+
# Count queued issues (issues with agent-keep-open label but no active session)
|
|
190
|
+
if command -v gh >/dev/null 2>&1 && [[ -n "${REPO_SLUG:-}" ]]; then
|
|
191
|
+
queued_issues=$(gh issue list --repo "${REPO_SLUG}" --label "agent-keep-open" --state open --json number 2>/dev/null | grep -c '"number"' || echo "0")
|
|
192
|
+
fi
|
|
193
|
+
|
|
194
|
+
# Count completed/failed sessions from history (last 24h)
|
|
195
|
+
if [[ -d "${HISTORY_ROOT}" ]]; then
|
|
196
|
+
completed_today=$(find "${HISTORY_ROOT}" -name "*.json" -mtime 0 2>/dev/null | xargs grep -l '"status": "completed"' 2>/dev/null | wc -l || echo "0")
|
|
197
|
+
failed_today=$(find "${HISTORY_ROOT}" -name "*.json" -mtime 0 2>/dev/null | xargs grep -l '"status": "failed"' 2>/dev/null | wc -l || echo "0")
|
|
198
|
+
fi
|
|
199
|
+
|
|
200
|
+
# Log metrics
|
|
201
|
+
log_event "scheduler_metrics" \
|
|
202
|
+
"active_sessions" "$active_sessions" \
|
|
203
|
+
"queued_issues" "$queued_issues" \
|
|
204
|
+
"completed_today" "$completed_today" \
|
|
205
|
+
"failed_today" "$failed_today"
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
# Error tracking for scheduler observability
|
|
209
|
+
ERROR_LOG="${STATE_ROOT}/scheduler-errors.jsonl"
|
|
210
|
+
error_count=0
|
|
211
|
+
|
|
212
|
+
track_error() {
|
|
213
|
+
local error_type="$1"
|
|
214
|
+
local error_msg="$2"
|
|
215
|
+
local timestamp
|
|
216
|
+
timestamp="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
217
|
+
|
|
218
|
+
((error_count++))
|
|
219
|
+
|
|
220
|
+
# Log to JSONL
|
|
221
|
+
echo "{\"timestamp\": \"${timestamp}\", \"type\": \"${error_type}\", \"message\": \"${error_msg}\", \"pid\": ${$}}" >> "${ERROR_LOG}"
|
|
222
|
+
|
|
223
|
+
# Also log as event
|
|
224
|
+
log_event "scheduler_error" "type" "${error_type}" "message" "${error_msg}"
|
|
225
|
+
|
|
226
|
+
# Alert if too many errors
|
|
227
|
+
if [[ $error_count -gt 10 ]]; then
|
|
228
|
+
log_event "error_threshold_exceeded" "count" "$error_count"
|
|
229
|
+
echo "Warning: High error count detected ($error_count errors)" >&2
|
|
230
|
+
fi
|
|
231
|
+
}
|
|
232
|
+
|
|
78
233
|
mkdir -p "${AGENT_ROOT}" "${RUNS_ROOT}" "${STATE_ROOT}" "${HISTORY_ROOT}" "${WORKTREE_ROOT}" "${MEMORY_DIR}"
|
|
79
234
|
|
|
235
|
+
cleanup_stale_locks 1800 # Clean locks older than 30 minutes
|
|
236
|
+
collect_metrics
|
|
237
|
+
log_event "heartbeat_start" "repo_slug" "${REPO_SLUG}"
|
|
238
|
+
|
|
80
239
|
if [[ -z "${python_bin}" || ! -x "${python_bin}" ]]; then
|
|
81
240
|
echo "unable to resolve a runnable python interpreter for heartbeat-safe-auto.sh" >&2
|
|
82
241
|
exit 1
|
|
@@ -605,6 +764,7 @@ write_shared_loop_status "running" ""
|
|
|
605
764
|
--heavy-deferred-message "E2E-heavy issues remain queued until the single e2e slot is free."; then
|
|
606
765
|
write_shared_loop_status "idle" "0"
|
|
607
766
|
printf '[%s] shared heartbeat loop end status=0\n' "$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
767
|
+
log_event "heartbeat_complete" "status" "0"
|
|
608
768
|
else
|
|
609
769
|
loop_status=$?
|
|
610
770
|
write_shared_loop_status "idle" "${loop_status}"
|
|
@@ -612,6 +772,7 @@ else
|
|
|
612
772
|
printf 'HEARTBEAT_LOOP_TIMEOUT=yes\n'
|
|
613
773
|
fi
|
|
614
774
|
printf '[%s] shared heartbeat loop end status=%s\n' "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" "${loop_status}"
|
|
775
|
+
log_event "heartbeat_complete" "status" "${loop_status}"
|
|
615
776
|
exit "${loop_status}"
|
|
616
777
|
fi
|
|
617
778
|
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# kilo-adapter.sh
|
|
3
|
+
# Adapter implementation for Kilo Code
|
|
4
|
+
|
|
5
|
+
set -euo pipefail
|
|
6
|
+
|
|
7
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
8
|
+
source "${SCRIPT_DIR}/adapter-interface.sh"
|
|
9
|
+
source "${SCRIPT_DIR}/adapter-capabilities.sh"
|
|
10
|
+
|
|
11
|
+
ADAPTER_ID="kilo"
|
|
12
|
+
ADAPTER_NAME="Kilo Code"
|
|
13
|
+
ADAPTER_TYPE="cloud-api"
|
|
14
|
+
ADAPTER_VERSION="1.0.0"
|
|
15
|
+
ADAPTER_MODEL="${KILO_MODEL:-anthropic/claude-sonnet-4-20250514}"
|
|
16
|
+
|
|
17
|
+
# Kilo capabilities
|
|
18
|
+
ADAPTER_CAP_CLOUD_API=true
|
|
19
|
+
ADAPTER_CAP_STREAMING=true
|
|
20
|
+
ADAPTER_CAP_JSON_OUTPUT=true
|
|
21
|
+
ADAPTER_CAP_MAX_TIMEOUT=900
|
|
22
|
+
|
|
23
|
+
adapter_info() {
|
|
24
|
+
cat <<EOF
|
|
25
|
+
id=${ADAPTER_ID}
|
|
26
|
+
name=${ADAPTER_NAME}
|
|
27
|
+
type=${ADAPTER_TYPE}
|
|
28
|
+
version=${ADAPTER_VERSION}
|
|
29
|
+
model=${ADAPTER_MODEL}
|
|
30
|
+
EOF
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
adapter_health_check() {
|
|
34
|
+
if ! command -v kilo >/dev/null 2>&1; then
|
|
35
|
+
echo "ERROR: kilo CLI not found in PATH"
|
|
36
|
+
return 1
|
|
37
|
+
fi
|
|
38
|
+
|
|
39
|
+
# Verify kilo can actually run (version check)
|
|
40
|
+
if ! kilo --version >/dev/null 2>&1; then
|
|
41
|
+
echo "ERROR: kilo CLI cannot run (check installation)"
|
|
42
|
+
return 1
|
|
43
|
+
fi
|
|
44
|
+
|
|
45
|
+
local version
|
|
46
|
+
version="$(kilo --version 2>/dev/null || true)"
|
|
47
|
+
if [[ -z "$version" ]]; then
|
|
48
|
+
echo "WARN: Could not detect kilo version"
|
|
49
|
+
else
|
|
50
|
+
echo "INFO: Kilo version: $version"
|
|
51
|
+
fi
|
|
52
|
+
|
|
53
|
+
# Verify model is specified
|
|
54
|
+
if [[ -z "${ADAPTER_MODEL}" ]]; then
|
|
55
|
+
echo "WARN: No model specified for Kilo adapter"
|
|
56
|
+
fi
|
|
57
|
+
|
|
58
|
+
echo "OK: Kilo adapter healthy (model: ${ADAPTER_MODEL})"
|
|
59
|
+
return 0
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
adapter_run() {
|
|
63
|
+
local mode="${1:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
64
|
+
local session="${2:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
65
|
+
local worktree="${3:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
66
|
+
local prompt_file="${4:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
67
|
+
|
|
68
|
+
# Validate prompt file
|
|
69
|
+
if [[ ! -f "${prompt_file}" ]]; then
|
|
70
|
+
echo "ERROR: Prompt file not found: ${prompt_file}"
|
|
71
|
+
return 1
|
|
72
|
+
fi
|
|
73
|
+
if [[ ! -s "${prompt_file}" ]]; then
|
|
74
|
+
echo "ERROR: Prompt file is empty: ${prompt_file}"
|
|
75
|
+
return 1
|
|
76
|
+
fi
|
|
77
|
+
|
|
78
|
+
local timeout_seconds="${KILO_TIMEOUT_SECONDS:-900}"
|
|
79
|
+
|
|
80
|
+
echo "Kilo adapter: Running session ${session} with model ${ADAPTER_MODEL}"
|
|
81
|
+
|
|
82
|
+
cd "${worktree}" || return 1
|
|
83
|
+
|
|
84
|
+
prompt="$(cat "${prompt_file}")"
|
|
85
|
+
|
|
86
|
+
# Run kilo and capture output
|
|
87
|
+
local output
|
|
88
|
+
if ! output="$(timeout "${timeout_seconds}" kilo --model "${ADAPTER_MODEL}" "${prompt}" 2>&1)"; then
|
|
89
|
+
echo "ERROR: Kilo run failed or timed out after ${timeout_seconds}s"
|
|
90
|
+
return 1
|
|
91
|
+
fi
|
|
92
|
+
|
|
93
|
+
# Validate JSON stream output (kilo outputs JSON events)
|
|
94
|
+
if ! echo "$output" | python3 -c "import sys, json; [json.loads(line) for line in sys.stdin if line.strip()]" 2>/dev/null; then
|
|
95
|
+
echo "WARN: Kilo output is not valid JSON stream"
|
|
96
|
+
else
|
|
97
|
+
echo "INFO: Kilo output validated as JSON stream"
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
echo "Kilo adapter: Session ${session} completed"
|
|
101
|
+
return 0
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|
105
|
+
adapter_info
|
|
106
|
+
echo "---"
|
|
107
|
+
adapter_health_check
|
|
108
|
+
fi
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# ollama-adapter.sh
|
|
3
|
+
# Adapter implementation for Ollama local models
|
|
4
|
+
# Implements: adapter-interface.sh
|
|
5
|
+
|
|
6
|
+
set -euo pipefail
|
|
7
|
+
|
|
8
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
9
|
+
source "${SCRIPT_DIR}/adapter-interface.sh"
|
|
10
|
+
source "${SCRIPT_DIR}/adapter-capabilities.sh"
|
|
11
|
+
|
|
12
|
+
# Ollama adapter metadata
|
|
13
|
+
ADAPTER_ID="ollama"
|
|
14
|
+
ADAPTER_NAME="Ollama Local Models"
|
|
15
|
+
ADAPTER_TYPE="local-model"
|
|
16
|
+
ADAPTER_VERSION="1.0.0"
|
|
17
|
+
ADAPTER_MODEL="${OLLAMA_MODEL:-qwen2.5-coder:7b}"
|
|
18
|
+
ADAPTER_BASE_URL="${OLLAMA_BASE_URL:-http://localhost:11434}"
|
|
19
|
+
|
|
20
|
+
# Ollama capabilities
|
|
21
|
+
ADAPTER_CAP_LOCAL_MODEL=true
|
|
22
|
+
ADAPTER_CAP_STREAMING=true
|
|
23
|
+
ADAPTER_CAP_TOOLS_SUPPORT=true
|
|
24
|
+
ADAPTER_CAP_CONTEXT_WINDOW=32768 # Default, will be detected dynamically
|
|
25
|
+
ADAPTER_CAP_MAX_TIMEOUT=3600
|
|
26
|
+
|
|
27
|
+
# Print adapter info
|
|
28
|
+
adapter_info() {
|
|
29
|
+
cat <<EOF
|
|
30
|
+
id=${ADAPTER_ID}
|
|
31
|
+
name=${ADAPTER_NAME}
|
|
32
|
+
type=${ADAPTER_TYPE}
|
|
33
|
+
version=${ADAPTER_VERSION}
|
|
34
|
+
model=${ADAPTER_MODEL}
|
|
35
|
+
base_url=${ADAPTER_BASE_URL}
|
|
36
|
+
EOF
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Health check: verify ollama is running and model is available
|
|
40
|
+
adapter_health_check() {
|
|
41
|
+
# Check if ollama is running
|
|
42
|
+
if ! curl -sf "${ADAPTER_BASE_URL}/api/tags" >/dev/null 2>&1; then
|
|
43
|
+
echo "ERROR: Ollama not reachable at ${ADAPTER_BASE_URL}"
|
|
44
|
+
return 1
|
|
45
|
+
fi
|
|
46
|
+
|
|
47
|
+
# Check if model is available (try to pull if not)
|
|
48
|
+
if ! ollama list 2>/dev/null | grep -q "${ADAPTER_MODEL}"; then
|
|
49
|
+
echo "WARN: Model ${ADAPTER_MODEL} not found locally. Attempting pull..."
|
|
50
|
+
if ! ollama pull "${ADAPTER_MODEL}" 2>&1; then
|
|
51
|
+
echo "ERROR: Failed to pull model ${ADAPTER_MODEL}"
|
|
52
|
+
return 1
|
|
53
|
+
fi
|
|
54
|
+
fi
|
|
55
|
+
|
|
56
|
+
# Detect context window and update capability dynamically
|
|
57
|
+
local context_window
|
|
58
|
+
# Ollama API returns context_length inside model_info with architecture prefix
|
|
59
|
+
# e.g., qwen2.context_length, llama.context_length, etc.
|
|
60
|
+
if command -v jq &>/dev/null; then
|
|
61
|
+
context_window="$(curl -sf "${ADAPTER_BASE_URL}/api/show" -d "{\"name\":\"${ADAPTER_MODEL}\"}" 2>/dev/null | jq -r '.model_info // {} | to_entries[] | select(.key | endswith("context_length")) | .value' 2>/dev/null | head -1 || true)"
|
|
62
|
+
else
|
|
63
|
+
# Fallback: use grep for common patterns
|
|
64
|
+
context_window="$(curl -sf "${ADAPTER_BASE_URL}/api/show" -d "{\"name\":\"${ADAPTER_MODEL}\"}" 2>/dev/null | grep -o '"[a-z]*\.context_length":[0-9]*' | head -1 | grep -o '[0-9]*$' || true)"
|
|
65
|
+
fi
|
|
66
|
+
if [[ -n "$context_window" ]]; then
|
|
67
|
+
ADAPTER_CAP_CONTEXT_WINDOW="$context_window"
|
|
68
|
+
echo "INFO: Detected context window: $context_window tokens"
|
|
69
|
+
else
|
|
70
|
+
echo "WARN: Could not detect context window from Ollama API"
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
echo "OK: Ollama healthy, model ${ADAPTER_MODEL} available"
|
|
74
|
+
return 0
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Run a task using ollama
|
|
78
|
+
adapter_run() {
|
|
79
|
+
local mode="${1:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
80
|
+
local session="${2:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
81
|
+
local worktree="${3:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
82
|
+
local prompt_file="${4:?usage: adapter_run MODE SESSION WORKTREE PROMPT_FILE}"
|
|
83
|
+
|
|
84
|
+
# Validate prompt file
|
|
85
|
+
if [[ ! -f "${prompt_file}" ]]; then
|
|
86
|
+
echo "ERROR: Prompt file not found: ${prompt_file}"
|
|
87
|
+
return 1
|
|
88
|
+
fi
|
|
89
|
+
if [[ ! -s "${prompt_file}" ]]; then
|
|
90
|
+
echo "ERROR: Prompt file is empty: ${prompt_file}"
|
|
91
|
+
return 1
|
|
92
|
+
fi
|
|
93
|
+
|
|
94
|
+
local timeout_seconds="${OLLAMA_TIMEOUT_SECONDS:-900}"
|
|
95
|
+
|
|
96
|
+
echo "Ollama adapter: Running session ${session} with model ${ADAPTER_MODEL}"
|
|
97
|
+
|
|
98
|
+
# Read the prompt
|
|
99
|
+
local prompt
|
|
100
|
+
prompt="$(cat "${prompt_file}")"
|
|
101
|
+
|
|
102
|
+
# Run ollama with the prompt
|
|
103
|
+
# Use perl for timeout on macOS (which lacks GNU timeout)
|
|
104
|
+
if command -v timeout >/dev/null 2>&1; then
|
|
105
|
+
if ! timeout "${timeout_seconds}" ollama run "${ADAPTER_MODEL}" "${prompt}" 2>&1; then
|
|
106
|
+
echo "ERROR: Ollama run failed or timed out after ${timeout_seconds}s"
|
|
107
|
+
return 1
|
|
108
|
+
fi
|
|
109
|
+
elif command -v perl >/dev/null 2>&1; then
|
|
110
|
+
if ! perl -e "alarm ${timeout_seconds}; exec @ARGV" ollama run "${ADAPTER_MODEL}" "${prompt}" 2>&1; then
|
|
111
|
+
echo "ERROR: Ollama run failed or timed out after ${timeout_seconds}s"
|
|
112
|
+
return 1
|
|
113
|
+
fi
|
|
114
|
+
else
|
|
115
|
+
# No timeout available, run without timeout
|
|
116
|
+
if ! ollama run "${ADAPTER_MODEL}" "${prompt}" 2>&1; then
|
|
117
|
+
echo "ERROR: Ollama run failed"
|
|
118
|
+
return 1
|
|
119
|
+
fi
|
|
120
|
+
fi
|
|
121
|
+
|
|
122
|
+
echo "Ollama adapter: Session ${session} completed"
|
|
123
|
+
return 0
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
# Status check (override default)
|
|
127
|
+
adapter_status() {
|
|
128
|
+
local runs_root="${1:?usage: adapter_status RUNS_ROOT SESSION}"
|
|
129
|
+
local session="${2:?usage: adapter_status RUNS_ROOT SESSION}"
|
|
130
|
+
local run_dir="${runs_root}/${session}"
|
|
131
|
+
|
|
132
|
+
if [[ ! -d "$run_dir" ]]; then
|
|
133
|
+
echo "NOT_FOUND"
|
|
134
|
+
return 1
|
|
135
|
+
fi
|
|
136
|
+
|
|
137
|
+
# Check for result file
|
|
138
|
+
if [[ -f "$run_dir/result.env" ]]; then
|
|
139
|
+
source "$run_dir/result.env"
|
|
140
|
+
echo "${OUTCOME:-UNKNOWN}"
|
|
141
|
+
return 0
|
|
142
|
+
fi
|
|
143
|
+
|
|
144
|
+
# Check if ollama process is running
|
|
145
|
+
if pgrep -f "ollama run ${ADAPTER_MODEL}" >/dev/null 2>&1; then
|
|
146
|
+
echo "RUNNING"
|
|
147
|
+
return 0
|
|
148
|
+
fi
|
|
149
|
+
|
|
150
|
+
echo "UNKNOWN"
|
|
151
|
+
return 0
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
# Self-register: validate this adapter implements required functions
|
|
155
|
+
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|
156
|
+
# Running directly - print info
|
|
157
|
+
adapter_info
|
|
158
|
+
echo "---"
|
|
159
|
+
adapter_health_check
|
|
160
|
+
fi
|