agent-control-plane 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/npm/bin/agent-control-plane.js +39 -2
- package/package.json +6 -3
- package/tools/bin/agent-project-catch-up-merged-prs +1 -0
- package/tools/bin/agent-project-cleanup-session +49 -5
- package/tools/bin/agent-project-heartbeat-loop +119 -1471
- package/tools/bin/agent-project-reconcile-issue-session +66 -105
- package/tools/bin/agent-project-reconcile-pr-session +76 -111
- package/tools/bin/agent-project-run-claude-session +10 -0
- package/tools/bin/agent-project-run-codex-resilient +86 -9
- package/tools/bin/agent-project-run-codex-session +16 -5
- package/tools/bin/agent-project-run-kilo-session +10 -0
- package/tools/bin/agent-project-run-openclaw-session +10 -0
- package/tools/bin/agent-project-run-opencode-session +10 -0
- package/tools/bin/agent-project-worker-status +10 -7
- package/tools/bin/cleanup-worktree.sh +6 -1
- package/tools/bin/flow-config-lib.sh +80 -0
- package/tools/bin/flow-resident-worker-lib.sh +119 -1
- package/tools/bin/flow-shell-lib.sh +24 -0
- package/tools/bin/heartbeat-loop-cache-lib.sh +164 -0
- package/tools/bin/heartbeat-loop-counting-lib.sh +306 -0
- package/tools/bin/heartbeat-loop-pr-strategy-lib.sh +199 -0
- package/tools/bin/heartbeat-loop-scheduling-lib.sh +506 -0
- package/tools/bin/heartbeat-loop-worker-lib.sh +319 -0
- package/tools/bin/heartbeat-recovery-preflight.sh +12 -1
- package/tools/bin/heartbeat-safe-auto.sh +14 -3
- package/tools/bin/project-launchd-bootstrap.sh +11 -8
- package/tools/bin/reconcile-bootstrap-lib.sh +113 -0
- package/tools/bin/resident-issue-controller-lib.sh +448 -0
- package/tools/bin/resident-issue-queue-status.py +35 -0
- package/tools/bin/start-resident-issue-loop.sh +26 -437
- package/tools/dashboard/app.js +7 -0
- package/tools/dashboard/dashboard_snapshot.py +13 -29
- package/SKILL.md +0 -149
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# resident-issue-controller-lib.sh — controller_* functions for the resident
|
|
3
|
+
# issue loop. Sourced by start-resident-issue-loop.sh to keep the main script
|
|
4
|
+
# focused on the top-level loop logic.
|
|
5
|
+
|
|
6
|
+
controller_unregister_pending_issue() {
|
|
7
|
+
local issue_id="${1:-${ISSUE_ID:-}}"
|
|
8
|
+
[[ -n "${issue_id}" ]] || return 0
|
|
9
|
+
rm -f "$(issue_pending_file "${issue_id}")"
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
controller_register_pending_issue() {
|
|
13
|
+
[[ -n "${ISSUE_ID:-}" ]] || return 0
|
|
14
|
+
printf '%s\n' "$$" >"$(issue_pending_file "${ISSUE_ID}")"
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
controller_refresh_execution_context() {
|
|
18
|
+
unset \
|
|
19
|
+
ACP_CODING_WORKER \
|
|
20
|
+
ACP_CODEX_PROFILE_SAFE F_LOSNING_CODEX_PROFILE_SAFE \
|
|
21
|
+
ACP_CODEX_PROFILE_BYPASS F_LOSNING_CODEX_PROFILE_BYPASS \
|
|
22
|
+
ACP_CLAUDE_MODEL F_LOSNING_CLAUDE_MODEL \
|
|
23
|
+
ACP_CLAUDE_PERMISSION_MODE F_LOSNING_CLAUDE_PERMISSION_MODE \
|
|
24
|
+
ACP_CLAUDE_EFFORT F_LOSNING_CLAUDE_EFFORT \
|
|
25
|
+
ACP_CLAUDE_TIMEOUT_SECONDS F_LOSNING_CLAUDE_TIMEOUT_SECONDS \
|
|
26
|
+
ACP_CLAUDE_MAX_ATTEMPTS F_LOSNING_CLAUDE_MAX_ATTEMPTS \
|
|
27
|
+
ACP_CLAUDE_RETRY_BACKOFF_SECONDS F_LOSNING_CLAUDE_RETRY_BACKOFF_SECONDS \
|
|
28
|
+
ACP_OPENCLAW_MODEL F_LOSNING_OPENCLAW_MODEL \
|
|
29
|
+
ACP_OPENCLAW_THINKING F_LOSNING_OPENCLAW_THINKING \
|
|
30
|
+
ACP_OPENCLAW_TIMEOUT_SECONDS F_LOSNING_OPENCLAW_TIMEOUT_SECONDS \
|
|
31
|
+
ACP_OPENCLAW_STALL_SECONDS F_LOSNING_OPENCLAW_STALL_SECONDS \
|
|
32
|
+
ACP_ACTIVE_PROVIDER_POOL_NAME F_LOSNING_ACTIVE_PROVIDER_POOL_NAME \
|
|
33
|
+
ACP_ACTIVE_PROVIDER_BACKEND F_LOSNING_ACTIVE_PROVIDER_BACKEND \
|
|
34
|
+
ACP_ACTIVE_PROVIDER_MODEL F_LOSNING_ACTIVE_PROVIDER_MODEL \
|
|
35
|
+
ACP_ACTIVE_PROVIDER_KEY F_LOSNING_ACTIVE_PROVIDER_KEY \
|
|
36
|
+
ACP_PROVIDER_POOLS_EXHAUSTED F_LOSNING_PROVIDER_POOLS_EXHAUSTED \
|
|
37
|
+
ACP_PROVIDER_POOL_SELECTION_REASON F_LOSNING_PROVIDER_POOL_SELECTION_REASON \
|
|
38
|
+
ACP_PROVIDER_POOL_NEXT_ATTEMPT_EPOCH F_LOSNING_PROVIDER_POOL_NEXT_ATTEMPT_EPOCH \
|
|
39
|
+
ACP_PROVIDER_POOL_NEXT_ATTEMPT_AT F_LOSNING_PROVIDER_POOL_NEXT_ATTEMPT_AT \
|
|
40
|
+
ACP_PROVIDER_POOL_LAST_REASON F_LOSNING_PROVIDER_POOL_LAST_REASON
|
|
41
|
+
flow_export_execution_env "${CONFIG_YAML}"
|
|
42
|
+
flow_export_project_env_aliases
|
|
43
|
+
CODING_WORKER="${ACP_CODING_WORKER:-codex}"
|
|
44
|
+
controller_capture_active_provider_context
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
controller_refresh_issue_lane_context() {
|
|
48
|
+
local is_scheduled="${1:-no}"
|
|
49
|
+
local schedule_interval_seconds="${2:-0}"
|
|
50
|
+
|
|
51
|
+
if [[ "${is_scheduled}" == "yes" ]]; then
|
|
52
|
+
ACTIVE_RESIDENT_LANE_KIND="scheduled"
|
|
53
|
+
ACTIVE_RESIDENT_LANE_VALUE="${schedule_interval_seconds}"
|
|
54
|
+
else
|
|
55
|
+
ACTIVE_RESIDENT_LANE_KIND="recurring"
|
|
56
|
+
ACTIVE_RESIDENT_LANE_VALUE="general"
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
ACTIVE_RESIDENT_WORKER_KEY="$(flow_resident_issue_lane_key "${CODING_WORKER}" "${MODE}" "${ACTIVE_RESIDENT_LANE_KIND}" "${ACTIVE_RESIDENT_LANE_VALUE}")"
|
|
60
|
+
ACTIVE_RESIDENT_META_FILE="$(flow_resident_issue_lane_meta_file "${CONFIG_YAML}" "${ACTIVE_RESIDENT_WORKER_KEY}")"
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
controller_live_lane_peer() {
|
|
64
|
+
[[ -n "${ACTIVE_RESIDENT_WORKER_KEY}" ]] || return 1
|
|
65
|
+
flow_resident_live_issue_controller_for_key "${CONFIG_YAML}" "${ACTIVE_RESIDENT_WORKER_KEY}" "$$" || return 1
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
controller_yield_to_live_lane_peer() {
|
|
69
|
+
local live_controller=""
|
|
70
|
+
local controller_issue_id=""
|
|
71
|
+
local controller_state=""
|
|
72
|
+
|
|
73
|
+
live_controller="$(controller_live_lane_peer || true)"
|
|
74
|
+
[[ -n "${live_controller}" ]] || return 1
|
|
75
|
+
|
|
76
|
+
controller_issue_id="$(awk -F= '/^ISSUE_ID=/{print $2; exit}' <<<"${live_controller}")"
|
|
77
|
+
controller_state="$(awk -F= '/^CONTROLLER_STATE=/{print $2; exit}' <<<"${live_controller}")"
|
|
78
|
+
|
|
79
|
+
if [[ -n "${controller_issue_id}" && "${controller_issue_id}" != "${ISSUE_ID}" ]]; then
|
|
80
|
+
flow_resident_issue_enqueue "${CONFIG_YAML}" "${ISSUE_ID}" "resident-live-lane" >/dev/null || true
|
|
81
|
+
CONTROLLER_REASON="live-lane-controller-${controller_issue_id}-${controller_state:-running}"
|
|
82
|
+
else
|
|
83
|
+
CONTROLLER_REASON="duplicate-live-lane-controller"
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
return 0
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
controller_capture_active_provider_context() {
|
|
90
|
+
ACTIVE_PROVIDER_POOL_NAME="${ACP_ACTIVE_PROVIDER_POOL_NAME:-${F_LOSNING_ACTIVE_PROVIDER_POOL_NAME:-}}"
|
|
91
|
+
ACTIVE_PROVIDER_BACKEND="${ACP_ACTIVE_PROVIDER_BACKEND:-${F_LOSNING_ACTIVE_PROVIDER_BACKEND:-${CODING_WORKER:-}}}"
|
|
92
|
+
ACTIVE_PROVIDER_MODEL="${ACP_ACTIVE_PROVIDER_MODEL:-${F_LOSNING_ACTIVE_PROVIDER_MODEL:-}}"
|
|
93
|
+
ACTIVE_PROVIDER_KEY="${ACP_ACTIVE_PROVIDER_KEY:-${F_LOSNING_ACTIVE_PROVIDER_KEY:-}}"
|
|
94
|
+
ACTIVE_PROVIDER_SELECTION_REASON="${ACP_PROVIDER_POOL_SELECTION_REASON:-${F_LOSNING_PROVIDER_POOL_SELECTION_REASON:-}}"
|
|
95
|
+
ACTIVE_PROVIDER_NEXT_ATTEMPT_EPOCH="${ACP_PROVIDER_POOL_NEXT_ATTEMPT_EPOCH:-${F_LOSNING_PROVIDER_POOL_NEXT_ATTEMPT_EPOCH:-}}"
|
|
96
|
+
ACTIVE_PROVIDER_NEXT_ATTEMPT_AT="${ACP_PROVIDER_POOL_NEXT_ATTEMPT_AT:-${F_LOSNING_PROVIDER_POOL_NEXT_ATTEMPT_AT:-}}"
|
|
97
|
+
ACTIVE_PROVIDER_LAST_REASON="${ACP_PROVIDER_POOL_LAST_REASON:-${F_LOSNING_PROVIDER_POOL_LAST_REASON:-}}"
|
|
98
|
+
|
|
99
|
+
if [[ -z "${ACTIVE_PROVIDER_MODEL}" ]]; then
|
|
100
|
+
case "${ACTIVE_PROVIDER_BACKEND}" in
|
|
101
|
+
openclaw)
|
|
102
|
+
ACTIVE_PROVIDER_MODEL="${ACP_OPENCLAW_MODEL:-${F_LOSNING_OPENCLAW_MODEL:-}}"
|
|
103
|
+
;;
|
|
104
|
+
claude)
|
|
105
|
+
ACTIVE_PROVIDER_MODEL="${ACP_CLAUDE_MODEL:-${F_LOSNING_CLAUDE_MODEL:-}}"
|
|
106
|
+
;;
|
|
107
|
+
codex)
|
|
108
|
+
ACTIVE_PROVIDER_MODEL="${ACP_CODEX_PROFILE_SAFE:-${F_LOSNING_CODEX_PROFILE_SAFE:-}}"
|
|
109
|
+
;;
|
|
110
|
+
opencode)
|
|
111
|
+
ACTIVE_PROVIDER_MODEL="${ACP_OPENCODE_MODEL:-${F_LOSNING_OPENCODE_MODEL:-}}"
|
|
112
|
+
;;
|
|
113
|
+
kilo)
|
|
114
|
+
ACTIVE_PROVIDER_MODEL="${ACP_KILO_MODEL:-${F_LOSNING_KILO_MODEL:-}}"
|
|
115
|
+
;;
|
|
116
|
+
esac
|
|
117
|
+
fi
|
|
118
|
+
|
|
119
|
+
if [[ -z "${ACTIVE_PROVIDER_KEY}" && -n "${ACTIVE_PROVIDER_BACKEND}" && -n "${ACTIVE_PROVIDER_MODEL}" ]]; then
|
|
120
|
+
ACTIVE_PROVIDER_KEY="$(flow_sanitize_provider_key "${ACTIVE_PROVIDER_BACKEND}-${ACTIVE_PROVIDER_MODEL}")"
|
|
121
|
+
fi
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
controller_set_recorded_provider_from_active() {
|
|
125
|
+
LAST_RECORDED_PROVIDER_POOL_NAME="${ACTIVE_PROVIDER_POOL_NAME}"
|
|
126
|
+
LAST_RECORDED_PROVIDER_BACKEND="${ACTIVE_PROVIDER_BACKEND}"
|
|
127
|
+
LAST_RECORDED_PROVIDER_MODEL="${ACTIVE_PROVIDER_MODEL}"
|
|
128
|
+
LAST_RECORDED_PROVIDER_KEY="${ACTIVE_PROVIDER_KEY}"
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
controller_mark_provider_launched() {
|
|
132
|
+
LAST_LAUNCHED_PROVIDER_POOL_NAME="${ACTIVE_PROVIDER_POOL_NAME}"
|
|
133
|
+
LAST_LAUNCHED_PROVIDER_BACKEND="${ACTIVE_PROVIDER_BACKEND}"
|
|
134
|
+
LAST_LAUNCHED_PROVIDER_MODEL="${ACTIVE_PROVIDER_MODEL}"
|
|
135
|
+
LAST_LAUNCHED_PROVIDER_KEY="${ACTIVE_PROVIDER_KEY}"
|
|
136
|
+
|
|
137
|
+
if [[ -z "${LAST_RECORDED_PROVIDER_KEY}" ]]; then
|
|
138
|
+
controller_set_recorded_provider_from_active
|
|
139
|
+
fi
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
controller_track_provider_selection() {
|
|
143
|
+
local reason="${1:-provider-selection}"
|
|
144
|
+
local now_at=""
|
|
145
|
+
|
|
146
|
+
[[ -n "${ACTIVE_PROVIDER_KEY}" ]] || return 0
|
|
147
|
+
|
|
148
|
+
if [[ -z "${LAST_RECORDED_PROVIDER_KEY}" ]]; then
|
|
149
|
+
controller_set_recorded_provider_from_active
|
|
150
|
+
return 0
|
|
151
|
+
fi
|
|
152
|
+
|
|
153
|
+
if [[ "${ACTIVE_PROVIDER_KEY}" == "${LAST_RECORDED_PROVIDER_KEY}" ]]; then
|
|
154
|
+
return 0
|
|
155
|
+
fi
|
|
156
|
+
|
|
157
|
+
now_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
158
|
+
PROVIDER_SWITCH_COUNT=$((PROVIDER_SWITCH_COUNT + 1))
|
|
159
|
+
LAST_PROVIDER_SWITCH_AT="${now_at}"
|
|
160
|
+
LAST_PROVIDER_SWITCH_REASON="${reason}"
|
|
161
|
+
LAST_PROVIDER_FROM_POOL_NAME="${LAST_RECORDED_PROVIDER_POOL_NAME}"
|
|
162
|
+
LAST_PROVIDER_FROM_BACKEND="${LAST_RECORDED_PROVIDER_BACKEND}"
|
|
163
|
+
LAST_PROVIDER_FROM_MODEL="${LAST_RECORDED_PROVIDER_MODEL}"
|
|
164
|
+
LAST_PROVIDER_FROM_KEY="${LAST_RECORDED_PROVIDER_KEY}"
|
|
165
|
+
LAST_PROVIDER_TO_POOL_NAME="${ACTIVE_PROVIDER_POOL_NAME}"
|
|
166
|
+
LAST_PROVIDER_TO_BACKEND="${ACTIVE_PROVIDER_BACKEND}"
|
|
167
|
+
LAST_PROVIDER_TO_MODEL="${ACTIVE_PROVIDER_MODEL}"
|
|
168
|
+
LAST_PROVIDER_TO_KEY="${ACTIVE_PROVIDER_KEY}"
|
|
169
|
+
|
|
170
|
+
if [[ "${reason}" == "provider-failover" ]]; then
|
|
171
|
+
PROVIDER_FAILOVER_COUNT=$((PROVIDER_FAILOVER_COUNT + 1))
|
|
172
|
+
LAST_PROVIDER_FAILOVER_AT="${now_at}"
|
|
173
|
+
fi
|
|
174
|
+
|
|
175
|
+
controller_set_recorded_provider_from_active
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
controller_adopt_issue() {
|
|
179
|
+
local next_issue_id="${1:?issue id required}"
|
|
180
|
+
local previous_issue_id="${ISSUE_ID:-}"
|
|
181
|
+
local previous_controller_file="${CONTROLLER_FILE:-}"
|
|
182
|
+
|
|
183
|
+
if [[ -n "${previous_issue_id}" && "${previous_issue_id}" != "${next_issue_id}" ]]; then
|
|
184
|
+
controller_unregister_pending_issue "${previous_issue_id}"
|
|
185
|
+
if [[ -n "${previous_controller_file}" && -f "${previous_controller_file}" ]]; then
|
|
186
|
+
rm -f "${previous_controller_file}"
|
|
187
|
+
fi
|
|
188
|
+
fi
|
|
189
|
+
|
|
190
|
+
ISSUE_ID="${next_issue_id}"
|
|
191
|
+
SESSION="${ISSUE_SESSION_PREFIX}${ISSUE_ID}"
|
|
192
|
+
CONTROLLER_FILE="$(flow_resident_issue_controller_file "${CONFIG_YAML}" "${ISSUE_ID}")"
|
|
193
|
+
RESIDENT_META_FILE="$(flow_resident_issue_meta_file "${CONFIG_YAML}" "${ISSUE_ID}")"
|
|
194
|
+
CONTROLLER_LOOP_COUNT="0"
|
|
195
|
+
NEXT_WAKE_EPOCH=""
|
|
196
|
+
NEXT_WAKE_AT=""
|
|
197
|
+
IDLE_WAIT_STARTED_EPOCH=""
|
|
198
|
+
ACTIVE_RESIDENT_WORKER_KEY=""
|
|
199
|
+
ACTIVE_RESIDENT_META_FILE=""
|
|
200
|
+
ACTIVE_RESIDENT_LANE_KIND=""
|
|
201
|
+
ACTIVE_RESIDENT_LANE_VALUE=""
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
controller_mark_issue_running() {
|
|
205
|
+
local is_heavy="no"
|
|
206
|
+
|
|
207
|
+
if declare -F heartbeat_issue_is_heavy >/dev/null 2>&1; then
|
|
208
|
+
is_heavy="$(heartbeat_issue_is_heavy "${ISSUE_ID}" 2>/dev/null || printf 'no\n')"
|
|
209
|
+
fi
|
|
210
|
+
|
|
211
|
+
if declare -F heartbeat_mark_issue_running >/dev/null 2>&1; then
|
|
212
|
+
heartbeat_mark_issue_running "${ISSUE_ID}" "${is_heavy}" >/dev/null 2>&1 || true
|
|
213
|
+
fi
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
controller_rollback_issue_launch() {
|
|
217
|
+
if declare -F heartbeat_issue_launch_failed >/dev/null 2>&1; then
|
|
218
|
+
heartbeat_issue_launch_failed "${ISSUE_ID}" >/dev/null 2>&1 || true
|
|
219
|
+
fi
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
controller_adopt_next_recurring_issue() {
|
|
223
|
+
local next_issue_id=""
|
|
224
|
+
local claim_out=""
|
|
225
|
+
local claim_file=""
|
|
226
|
+
|
|
227
|
+
claim_out="$(flow_resident_issue_claim_next "${CONFIG_YAML}" "${SESSION}" "${ISSUE_ID}" || true)"
|
|
228
|
+
next_issue_id="$(awk -F= '/^ISSUE_ID=/{print $2}' <<<"${claim_out}")"
|
|
229
|
+
claim_file="$(awk -F= '/^CLAIM_FILE=/{print $2}' <<<"${claim_out}")"
|
|
230
|
+
if [[ -z "${next_issue_id}" ]]; then
|
|
231
|
+
next_issue_id="$(select_next_recurring_issue_id || true)"
|
|
232
|
+
fi
|
|
233
|
+
[[ -n "${next_issue_id}" ]] || return 1
|
|
234
|
+
|
|
235
|
+
controller_adopt_issue "${next_issue_id}"
|
|
236
|
+
flow_resident_issue_release_claim "${claim_file}"
|
|
237
|
+
CONTROLLER_REASON="adopted-next-recurring-issue"
|
|
238
|
+
controller_write_state "adopting-issue" ""
|
|
239
|
+
return 0
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
controller_wait_for_leased_issue() {
|
|
243
|
+
local idle_timeout="${IDLE_TIMEOUT_SECONDS:-0}"
|
|
244
|
+
local now_epoch=""
|
|
245
|
+
|
|
246
|
+
case "${idle_timeout}" in
|
|
247
|
+
''|*[!0-9]*) idle_timeout="0" ;;
|
|
248
|
+
esac
|
|
249
|
+
|
|
250
|
+
if [[ "${idle_timeout}" -le 0 ]]; then
|
|
251
|
+
return 1
|
|
252
|
+
fi
|
|
253
|
+
|
|
254
|
+
if [[ -z "${IDLE_WAIT_STARTED_EPOCH}" ]]; then
|
|
255
|
+
IDLE_WAIT_STARTED_EPOCH="$(date +%s)"
|
|
256
|
+
fi
|
|
257
|
+
|
|
258
|
+
while true; do
|
|
259
|
+
if controller_adopt_next_recurring_issue; then
|
|
260
|
+
return 0
|
|
261
|
+
fi
|
|
262
|
+
|
|
263
|
+
now_epoch="$(date +%s)"
|
|
264
|
+
if (( now_epoch - IDLE_WAIT_STARTED_EPOCH >= idle_timeout )); then
|
|
265
|
+
CONTROLLER_REASON="idle-timeout"
|
|
266
|
+
return 1
|
|
267
|
+
fi
|
|
268
|
+
|
|
269
|
+
controller_write_state "idle" ""
|
|
270
|
+
sleep "${POLL_SECONDS}"
|
|
271
|
+
done
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
controller_write_state() {
|
|
275
|
+
local state="${1:?state required}"
|
|
276
|
+
local reason="${2:-${CONTROLLER_REASON}}"
|
|
277
|
+
|
|
278
|
+
CONTROLLER_STATE="${state}"
|
|
279
|
+
CONTROLLER_REASON="${reason}"
|
|
280
|
+
flow_resident_write_metadata "${CONTROLLER_FILE}" \
|
|
281
|
+
"ISSUE_ID=${ISSUE_ID}" \
|
|
282
|
+
"SESSION=${SESSION}" \
|
|
283
|
+
"CONTROLLER_PID=$$" \
|
|
284
|
+
"CONTROLLER_MODE=${MODE}" \
|
|
285
|
+
"CONTROLLER_LOOP_COUNT=${CONTROLLER_LOOP_COUNT}" \
|
|
286
|
+
"CONTROLLER_STATE=${CONTROLLER_STATE}" \
|
|
287
|
+
"CONTROLLER_REASON=${CONTROLLER_REASON}" \
|
|
288
|
+
"ACTIVE_RESIDENT_WORKER_KEY=${ACTIVE_RESIDENT_WORKER_KEY}" \
|
|
289
|
+
"ACTIVE_RESIDENT_LANE_KIND=${ACTIVE_RESIDENT_LANE_KIND}" \
|
|
290
|
+
"ACTIVE_RESIDENT_LANE_VALUE=${ACTIVE_RESIDENT_LANE_VALUE}" \
|
|
291
|
+
"ACTIVE_PROVIDER_POOL_NAME=${ACTIVE_PROVIDER_POOL_NAME}" \
|
|
292
|
+
"ACTIVE_PROVIDER_BACKEND=${ACTIVE_PROVIDER_BACKEND}" \
|
|
293
|
+
"ACTIVE_PROVIDER_MODEL=${ACTIVE_PROVIDER_MODEL}" \
|
|
294
|
+
"ACTIVE_PROVIDER_KEY=${ACTIVE_PROVIDER_KEY}" \
|
|
295
|
+
"ACTIVE_PROVIDER_SELECTION_REASON=${ACTIVE_PROVIDER_SELECTION_REASON}" \
|
|
296
|
+
"ACTIVE_PROVIDER_NEXT_ATTEMPT_EPOCH=${ACTIVE_PROVIDER_NEXT_ATTEMPT_EPOCH}" \
|
|
297
|
+
"ACTIVE_PROVIDER_NEXT_ATTEMPT_AT=${ACTIVE_PROVIDER_NEXT_ATTEMPT_AT}" \
|
|
298
|
+
"ACTIVE_PROVIDER_LAST_REASON=${ACTIVE_PROVIDER_LAST_REASON}" \
|
|
299
|
+
"LAST_LAUNCHED_PROVIDER_POOL_NAME=${LAST_LAUNCHED_PROVIDER_POOL_NAME}" \
|
|
300
|
+
"LAST_LAUNCHED_PROVIDER_BACKEND=${LAST_LAUNCHED_PROVIDER_BACKEND}" \
|
|
301
|
+
"LAST_LAUNCHED_PROVIDER_MODEL=${LAST_LAUNCHED_PROVIDER_MODEL}" \
|
|
302
|
+
"LAST_LAUNCHED_PROVIDER_KEY=${LAST_LAUNCHED_PROVIDER_KEY}" \
|
|
303
|
+
"PROVIDER_SWITCH_COUNT=${PROVIDER_SWITCH_COUNT}" \
|
|
304
|
+
"PROVIDER_FAILOVER_COUNT=${PROVIDER_FAILOVER_COUNT}" \
|
|
305
|
+
"LAST_PROVIDER_SWITCH_AT=${LAST_PROVIDER_SWITCH_AT}" \
|
|
306
|
+
"LAST_PROVIDER_SWITCH_REASON=${LAST_PROVIDER_SWITCH_REASON}" \
|
|
307
|
+
"LAST_PROVIDER_FROM_POOL_NAME=${LAST_PROVIDER_FROM_POOL_NAME}" \
|
|
308
|
+
"LAST_PROVIDER_FROM_BACKEND=${LAST_PROVIDER_FROM_BACKEND}" \
|
|
309
|
+
"LAST_PROVIDER_FROM_MODEL=${LAST_PROVIDER_FROM_MODEL}" \
|
|
310
|
+
"LAST_PROVIDER_FROM_KEY=${LAST_PROVIDER_FROM_KEY}" \
|
|
311
|
+
"LAST_PROVIDER_TO_POOL_NAME=${LAST_PROVIDER_TO_POOL_NAME}" \
|
|
312
|
+
"LAST_PROVIDER_TO_BACKEND=${LAST_PROVIDER_TO_BACKEND}" \
|
|
313
|
+
"LAST_PROVIDER_TO_MODEL=${LAST_PROVIDER_TO_MODEL}" \
|
|
314
|
+
"LAST_PROVIDER_TO_KEY=${LAST_PROVIDER_TO_KEY}" \
|
|
315
|
+
"LAST_PROVIDER_FAILOVER_AT=${LAST_PROVIDER_FAILOVER_AT}" \
|
|
316
|
+
"PROVIDER_WAIT_COUNT=${PROVIDER_WAIT_COUNT}" \
|
|
317
|
+
"PROVIDER_WAIT_TOTAL_SECONDS=${PROVIDER_WAIT_TOTAL_SECONDS}" \
|
|
318
|
+
"PROVIDER_LAST_WAIT_SECONDS=${PROVIDER_LAST_WAIT_SECONDS}" \
|
|
319
|
+
"PROVIDER_LAST_WAIT_STARTED_AT=${PROVIDER_LAST_WAIT_STARTED_AT}" \
|
|
320
|
+
"PROVIDER_LAST_WAIT_COMPLETED_AT=${PROVIDER_LAST_WAIT_COMPLETED_AT}" \
|
|
321
|
+
"NEXT_WAKE_EPOCH=${NEXT_WAKE_EPOCH}" \
|
|
322
|
+
"NEXT_WAKE_AT=${NEXT_WAKE_AT}" \
|
|
323
|
+
"UPDATED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
|
324
|
+
|
|
325
|
+
if [[ "${CONTROLLER_STATE}" == "stopped" ]]; then
|
|
326
|
+
controller_unregister_pending_issue "${ISSUE_ID}"
|
|
327
|
+
elif flow_resident_issue_controller_counts_as_pending "${CONTROLLER_STATE}"; then
|
|
328
|
+
controller_register_pending_issue
|
|
329
|
+
else
|
|
330
|
+
controller_unregister_pending_issue "${ISSUE_ID}"
|
|
331
|
+
fi
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
controller_last_failure_reason() {
|
|
335
|
+
local metadata_file="${ACTIVE_RESIDENT_META_FILE:-${RESIDENT_META_FILE:-}}"
|
|
336
|
+
[[ -n "${metadata_file}" && -f "${metadata_file}" ]] || return 1
|
|
337
|
+
awk -F= '/^LAST_FAILURE_REASON=/{print $2; exit}' "${metadata_file}" 2>/dev/null | tr -d '"' || true
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
controller_provider_state() {
|
|
341
|
+
local provider_state_script="${FLOW_TOOLS_DIR}/provider-cooldown-state.sh"
|
|
342
|
+
local provider_state=""
|
|
343
|
+
|
|
344
|
+
if [[ ! -x "${provider_state_script}" ]]; then
|
|
345
|
+
printf 'READY=yes\n'
|
|
346
|
+
return 0
|
|
347
|
+
fi
|
|
348
|
+
|
|
349
|
+
provider_state="$(
|
|
350
|
+
env \
|
|
351
|
+
-u ACP_CODING_WORKER \
|
|
352
|
+
-u ACP_CODEX_PROFILE_SAFE -u F_LOSNING_CODEX_PROFILE_SAFE \
|
|
353
|
+
-u ACP_CODEX_PROFILE_BYPASS -u F_LOSNING_CODEX_PROFILE_BYPASS \
|
|
354
|
+
-u ACP_CLAUDE_MODEL -u F_LOSNING_CLAUDE_MODEL \
|
|
355
|
+
-u ACP_CLAUDE_PERMISSION_MODE -u F_LOSNING_CLAUDE_PERMISSION_MODE \
|
|
356
|
+
-u ACP_CLAUDE_EFFORT -u F_LOSNING_CLAUDE_EFFORT \
|
|
357
|
+
-u ACP_CLAUDE_TIMEOUT_SECONDS -u F_LOSNING_CLAUDE_TIMEOUT_SECONDS \
|
|
358
|
+
-u ACP_CLAUDE_MAX_ATTEMPTS -u F_LOSNING_CLAUDE_MAX_ATTEMPTS \
|
|
359
|
+
-u ACP_CLAUDE_RETRY_BACKOFF_SECONDS -u F_LOSNING_CLAUDE_RETRY_BACKOFF_SECONDS \
|
|
360
|
+
-u ACP_OPENCLAW_MODEL -u F_LOSNING_OPENCLAW_MODEL \
|
|
361
|
+
-u ACP_OPENCLAW_THINKING -u F_LOSNING_OPENCLAW_THINKING \
|
|
362
|
+
-u ACP_OPENCLAW_TIMEOUT_SECONDS -u F_LOSNING_OPENCLAW_TIMEOUT_SECONDS \
|
|
363
|
+
-u ACP_ACTIVE_PROVIDER_POOL_NAME -u F_LOSNING_ACTIVE_PROVIDER_POOL_NAME \
|
|
364
|
+
-u ACP_ACTIVE_PROVIDER_BACKEND -u F_LOSNING_ACTIVE_PROVIDER_BACKEND \
|
|
365
|
+
-u ACP_ACTIVE_PROVIDER_MODEL -u F_LOSNING_ACTIVE_PROVIDER_MODEL \
|
|
366
|
+
-u ACP_ACTIVE_PROVIDER_KEY -u F_LOSNING_ACTIVE_PROVIDER_KEY \
|
|
367
|
+
-u ACP_PROVIDER_POOLS_EXHAUSTED -u F_LOSNING_PROVIDER_POOLS_EXHAUSTED \
|
|
368
|
+
-u ACP_PROVIDER_POOL_SELECTION_REASON -u F_LOSNING_PROVIDER_POOL_SELECTION_REASON \
|
|
369
|
+
-u ACP_PROVIDER_POOL_NEXT_ATTEMPT_EPOCH -u F_LOSNING_PROVIDER_POOL_NEXT_ATTEMPT_EPOCH \
|
|
370
|
+
-u ACP_PROVIDER_POOL_NEXT_ATTEMPT_AT -u F_LOSNING_PROVIDER_POOL_NEXT_ATTEMPT_AT \
|
|
371
|
+
-u ACP_PROVIDER_POOL_LAST_REASON -u F_LOSNING_PROVIDER_POOL_LAST_REASON \
|
|
372
|
+
"${provider_state_script}" get 2>/dev/null || true
|
|
373
|
+
)"
|
|
374
|
+
if [[ -z "${provider_state}" ]]; then
|
|
375
|
+
printf 'READY=yes\n'
|
|
376
|
+
return 0
|
|
377
|
+
fi
|
|
378
|
+
|
|
379
|
+
printf '%s\n' "${provider_state}"
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
controller_wait_for_provider_capacity() {
|
|
383
|
+
local provider_state=""
|
|
384
|
+
local provider_ready=""
|
|
385
|
+
local provider_next_epoch=""
|
|
386
|
+
local provider_next_at=""
|
|
387
|
+
local now_epoch=""
|
|
388
|
+
local remaining=""
|
|
389
|
+
local sleep_seconds=""
|
|
390
|
+
local wait_started_epoch=""
|
|
391
|
+
local wait_completed_epoch=""
|
|
392
|
+
|
|
393
|
+
PROVIDER_WAITED="no"
|
|
394
|
+
|
|
395
|
+
while true; do
|
|
396
|
+
provider_state="$(controller_provider_state)"
|
|
397
|
+
provider_ready="$(flow_kv_get "${provider_state}" "READY")"
|
|
398
|
+
if [[ "${provider_ready}" == "yes" ]]; then
|
|
399
|
+
if [[ -n "${wait_started_epoch}" ]]; then
|
|
400
|
+
wait_completed_epoch="$(date +%s)"
|
|
401
|
+
if (( wait_completed_epoch >= wait_started_epoch )); then
|
|
402
|
+
PROVIDER_LAST_WAIT_SECONDS=$((wait_completed_epoch - wait_started_epoch))
|
|
403
|
+
PROVIDER_WAIT_TOTAL_SECONDS=$((PROVIDER_WAIT_TOTAL_SECONDS + PROVIDER_LAST_WAIT_SECONDS))
|
|
404
|
+
PROVIDER_LAST_WAIT_COMPLETED_AT="$(date -u -r "${wait_completed_epoch}" +"%Y-%m-%dT%H:%M:%SZ")"
|
|
405
|
+
fi
|
|
406
|
+
fi
|
|
407
|
+
NEXT_WAKE_EPOCH=""
|
|
408
|
+
NEXT_WAKE_AT=""
|
|
409
|
+
return 0
|
|
410
|
+
fi
|
|
411
|
+
|
|
412
|
+
provider_next_epoch="$(flow_kv_get "${provider_state}" "NEXT_ATTEMPT_EPOCH")"
|
|
413
|
+
provider_next_at="$(flow_kv_get "${provider_state}" "NEXT_ATTEMPT_AT")"
|
|
414
|
+
if ! [[ "${provider_next_epoch}" =~ ^[0-9]+$ ]] || [[ "${provider_next_epoch}" == "0" ]]; then
|
|
415
|
+
return 1
|
|
416
|
+
fi
|
|
417
|
+
|
|
418
|
+
if [[ -z "${wait_started_epoch}" ]]; then
|
|
419
|
+
wait_started_epoch="$(date +%s)"
|
|
420
|
+
PROVIDER_WAIT_COUNT=$((PROVIDER_WAIT_COUNT + 1))
|
|
421
|
+
PROVIDER_LAST_WAIT_STARTED_AT="$(date -u -r "${wait_started_epoch}" +"%Y-%m-%dT%H:%M:%SZ")"
|
|
422
|
+
fi
|
|
423
|
+
|
|
424
|
+
PROVIDER_WAITED="yes"
|
|
425
|
+
NEXT_WAKE_EPOCH="${provider_next_epoch}"
|
|
426
|
+
NEXT_WAKE_AT="${provider_next_at}"
|
|
427
|
+
CONTROLLER_REASON="provider-cooldown"
|
|
428
|
+
controller_write_state "waiting-provider" ""
|
|
429
|
+
|
|
430
|
+
now_epoch="$(date +%s)"
|
|
431
|
+
remaining=$((provider_next_epoch - now_epoch))
|
|
432
|
+
sleep_seconds="${POLL_SECONDS}"
|
|
433
|
+
if ! [[ "${sleep_seconds}" =~ ^[1-9][0-9]*$ ]]; then
|
|
434
|
+
sleep_seconds="60"
|
|
435
|
+
fi
|
|
436
|
+
if (( remaining > 0 && remaining < sleep_seconds )); then
|
|
437
|
+
sleep_seconds="${remaining}"
|
|
438
|
+
fi
|
|
439
|
+
if (( sleep_seconds <= 0 )); then
|
|
440
|
+
sleep_seconds="1"
|
|
441
|
+
fi
|
|
442
|
+
sleep "${sleep_seconds}"
|
|
443
|
+
done
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
controller_cleanup() {
|
|
447
|
+
controller_write_state "stopped" "${CONTROLLER_REASON:-stopped}"
|
|
448
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
ROOT_DIR = Path(__file__).resolve().parents[1]
|
|
12
|
+
DASHBOARD_DIR = ROOT_DIR / "dashboard"
|
|
13
|
+
if str(DASHBOARD_DIR) not in sys.path:
|
|
14
|
+
sys.path.insert(0, str(DASHBOARD_DIR))
|
|
15
|
+
|
|
16
|
+
from issue_queue_state import collect_issue_queue
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def main() -> int:
|
|
20
|
+
parser = argparse.ArgumentParser(description="Render resident issue queue state as JSON.")
|
|
21
|
+
parser.add_argument("--state-root", default=os.environ.get("ACP_STATE_ROOT", "").strip(), help="ACP runtime state root")
|
|
22
|
+
parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output")
|
|
23
|
+
args = parser.parse_args()
|
|
24
|
+
|
|
25
|
+
if not args.state_root:
|
|
26
|
+
parser.error("--state-root is required")
|
|
27
|
+
|
|
28
|
+
payload = collect_issue_queue(Path(args.state_root).expanduser())
|
|
29
|
+
json.dump(payload, sys.stdout, indent=2 if args.pretty else None, sort_keys=True)
|
|
30
|
+
sys.stdout.write("\n")
|
|
31
|
+
return 0
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
if __name__ == "__main__":
|
|
35
|
+
raise SystemExit(main())
|