agent-control-plane 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/npm/bin/agent-control-plane.js +39 -2
  2. package/package.json +6 -3
  3. package/tools/bin/agent-project-catch-up-merged-prs +1 -0
  4. package/tools/bin/agent-project-cleanup-session +49 -5
  5. package/tools/bin/agent-project-heartbeat-loop +119 -1471
  6. package/tools/bin/agent-project-reconcile-issue-session +66 -105
  7. package/tools/bin/agent-project-reconcile-pr-session +76 -111
  8. package/tools/bin/agent-project-run-claude-session +10 -0
  9. package/tools/bin/agent-project-run-codex-resilient +86 -9
  10. package/tools/bin/agent-project-run-codex-session +16 -5
  11. package/tools/bin/agent-project-run-kilo-session +10 -0
  12. package/tools/bin/agent-project-run-openclaw-session +10 -0
  13. package/tools/bin/agent-project-run-opencode-session +10 -0
  14. package/tools/bin/agent-project-worker-status +10 -7
  15. package/tools/bin/cleanup-worktree.sh +6 -1
  16. package/tools/bin/flow-config-lib.sh +80 -0
  17. package/tools/bin/flow-resident-worker-lib.sh +119 -1
  18. package/tools/bin/flow-shell-lib.sh +24 -0
  19. package/tools/bin/heartbeat-loop-cache-lib.sh +164 -0
  20. package/tools/bin/heartbeat-loop-counting-lib.sh +306 -0
  21. package/tools/bin/heartbeat-loop-pr-strategy-lib.sh +199 -0
  22. package/tools/bin/heartbeat-loop-scheduling-lib.sh +506 -0
  23. package/tools/bin/heartbeat-loop-worker-lib.sh +319 -0
  24. package/tools/bin/heartbeat-recovery-preflight.sh +12 -1
  25. package/tools/bin/heartbeat-safe-auto.sh +14 -3
  26. package/tools/bin/project-launchd-bootstrap.sh +11 -8
  27. package/tools/bin/reconcile-bootstrap-lib.sh +113 -0
  28. package/tools/bin/resident-issue-controller-lib.sh +448 -0
  29. package/tools/bin/resident-issue-queue-status.py +35 -0
  30. package/tools/bin/start-resident-issue-loop.sh +26 -437
  31. package/tools/dashboard/app.js +7 -0
  32. package/tools/dashboard/dashboard_snapshot.py +13 -29
  33. package/SKILL.md +0 -149
@@ -283,6 +283,16 @@ EOF
283
283
  done
284
284
  fi
285
285
 
286
+ # Always collect result.env from sandbox to artifact_dir
287
+ collect_copy_snippet+=$(
288
+ cat <<EOF
289
+ if [[ -f ${sandbox_run_dir_q}/result.env ]]; then
290
+ cp ${sandbox_run_dir_q}/result.env ${artifact_dir_q}/result.env
291
+ fi
292
+ EOF
293
+ )
294
+ collect_copy_snippet+=$'\n'
295
+
286
296
  reconcile_snippet=""
287
297
  if [[ -n "$reconcile_command" ]]; then
288
298
  printf -v delayed_reconcile_q '%q' "export ACP_EXPECTED_RUN_STARTED_AT=${started_at_q}; export F_LOSNING_EXPECTED_RUN_STARTED_AT=${started_at_q}; while tmux has-session -t ${session_q} 2>/dev/null; do sleep 1; done; sleep 2; $reconcile_command"
@@ -240,6 +240,16 @@ EOF
240
240
  done
241
241
  fi
242
242
 
243
+ # Always collect result.env from sandbox to artifact_dir
244
+ collect_copy_snippet+=$(
245
+ cat <<EOF
246
+ if [[ -f ${sandbox_run_dir_q}/result.env ]]; then
247
+ cp ${sandbox_run_dir_q}/result.env ${artifact_dir_q}/result.env
248
+ fi
249
+ EOF
250
+ )
251
+ collect_copy_snippet+=$'\n'
252
+
243
253
  reconcile_snippet=""
244
254
  if [[ -n "$reconcile_command" ]]; then
245
255
  printf -v delayed_reconcile_q '%q' "export ACP_EXPECTED_RUN_STARTED_AT=${started_at_q}; export F_LOSNING_EXPECTED_RUN_STARTED_AT=${started_at_q}; while tmux has-session -t ${session_q} 2>/dev/null; do sleep 1; done; sleep 2; $reconcile_command"
@@ -117,13 +117,6 @@ if [[ "$status" == "UNKNOWN" && -f "$output_file" ]]; then
117
117
  fi
118
118
  fi
119
119
 
120
- if [[ "$status" == "UNKNOWN" && -z "$failure_reason" ]]; then
121
- failure_reason="$(failure_reason_from_output || true)"
122
- if [[ -n "$failure_reason" ]]; then
123
- status="FAILED"
124
- fi
125
- fi
126
-
127
120
  if [[ "$status" == "UNKNOWN" && -n "$runner_state" ]]; then
128
121
  case "$runner_state" in
129
122
  running|waiting-auth-refresh|switching-account)
@@ -133,6 +126,7 @@ if [[ "$status" == "UNKNOWN" && -n "$runner_state" ]]; then
133
126
  # Check BEFORE stale result.env to avoid false SUCCEEDED when a prior
134
127
  # cycle's result.env happens to exist.
135
128
  status="FAILED"
129
+ failure_reason="$(failure_reason_from_output || true)"
136
130
  if [[ -z "$failure_reason" ]]; then
137
131
  failure_reason="runner-aborted-before-completion"
138
132
  fi
@@ -146,10 +140,19 @@ fi
146
140
  if [[ "$status" == "UNKNOWN" && -f "$result_file" ]]; then
147
141
  # A worker that managed to persist result.env already completed its contract,
148
142
  # even if the tmux session disappeared before the exit marker was flushed.
143
+ # Check BEFORE failure_reason_from_output so that a completed result.env
144
+ # is not overridden by transient failure text in the log.
149
145
  status="SUCCEEDED"
150
146
  result_only_completion="yes"
151
147
  fi
152
148
 
149
+ if [[ "$status" == "UNKNOWN" && -z "$failure_reason" ]]; then
150
+ failure_reason="$(failure_reason_from_output || true)"
151
+ if [[ -n "$failure_reason" ]]; then
152
+ status="FAILED"
153
+ fi
154
+ fi
155
+
153
156
  if [[ "$status" == "UNKNOWN" && -f "$output_file" ]]; then
154
157
  if rg -qi "You've hit your usage limit|You have reached your Codex usage limits|visit https://chatgpt.com/codex/settings/usage|Upgrade to Pro|rate limit exceeded|quota exceeded|usage cap (reached|exceeded)|usage quota (reached|exceeded)" "$output_file"; then
155
158
  status="FAILED"
@@ -37,11 +37,16 @@ if [[ -n "$SESSION" ]]; then
37
37
  ARGS+=(--session "$SESSION")
38
38
  fi
39
39
 
40
+ cleanup_exit=0
40
41
  AGENT_PROJECT_WORKTREE_ROOT="$WORKTREE_ROOT" \
41
42
  F_LOSNING_WORKTREE_ROOT="$WORKTREE_ROOT" \
42
- bash "${FLOW_TOOLS_DIR}/agent-project-cleanup-session" "${ARGS[@]}" >/dev/null
43
+ bash "${FLOW_TOOLS_DIR}/agent-project-cleanup-session" "${ARGS[@]}" >/dev/null || cleanup_exit=$?
43
44
 
44
45
  F_LOSNING_AGENT_REPO_ROOT="$AGENT_REPO_ROOT" \
45
46
  F_LOSNING_RETAINED_REPO_ROOT="$RETAINED_REPO_ROOT" \
46
47
  F_LOSNING_VSCODE_WORKSPACE_FILE="$VSCODE_WORKSPACE_FILE" \
47
48
  "${FLOW_TOOLS_DIR}/sync-vscode-workspace.sh" >/dev/null 2>&1 || true
49
+
50
+ if [[ "$cleanup_exit" -ne 0 ]]; then
51
+ exit "$cleanup_exit"
52
+ fi
@@ -1741,6 +1741,34 @@ flow_provider_pool_pi_timeout_seconds() {
1741
1741
  flow_provider_pool_value "${config_file}" "${pool_name}" "pi.timeout_seconds"
1742
1742
  }
1743
1743
 
1744
+ flow_provider_pool_opencode_model() {
1745
+ local config_file="${1:?config file required}"
1746
+ local pool_name="${2:?pool name required}"
1747
+
1748
+ flow_provider_pool_value "${config_file}" "${pool_name}" "opencode.model"
1749
+ }
1750
+
1751
+ flow_provider_pool_opencode_timeout_seconds() {
1752
+ local config_file="${1:?config file required}"
1753
+ local pool_name="${2:?pool name required}"
1754
+
1755
+ flow_provider_pool_value "${config_file}" "${pool_name}" "opencode.timeout_seconds"
1756
+ }
1757
+
1758
+ flow_provider_pool_kilo_model() {
1759
+ local config_file="${1:?config file required}"
1760
+ local pool_name="${2:?pool name required}"
1761
+
1762
+ flow_provider_pool_value "${config_file}" "${pool_name}" "kilo.model"
1763
+ }
1764
+
1765
+ flow_provider_pool_kilo_timeout_seconds() {
1766
+ local config_file="${1:?config file required}"
1767
+ local pool_name="${2:?pool name required}"
1768
+
1769
+ flow_provider_pool_value "${config_file}" "${pool_name}" "kilo.timeout_seconds"
1770
+ }
1771
+
1744
1772
  flow_sanitize_provider_key() {
1745
1773
  local raw_key="${1:?raw key required}"
1746
1774
 
@@ -1771,6 +1799,12 @@ flow_provider_pool_model_identity() {
1771
1799
  pi)
1772
1800
  flow_provider_pool_pi_model "${config_file}" "${pool_name}"
1773
1801
  ;;
1802
+ opencode)
1803
+ flow_provider_pool_opencode_model "${config_file}" "${pool_name}"
1804
+ ;;
1805
+ kilo)
1806
+ flow_provider_pool_kilo_model "${config_file}" "${pool_name}"
1807
+ ;;
1774
1808
  *)
1775
1809
  printf '\n'
1776
1810
  ;;
@@ -1810,6 +1844,10 @@ flow_provider_pool_state_get() {
1810
1844
  local pi_model=""
1811
1845
  local pi_thinking=""
1812
1846
  local pi_timeout_seconds=""
1847
+ local opencode_model=""
1848
+ local opencode_timeout_seconds=""
1849
+ local kilo_model=""
1850
+ local kilo_timeout_seconds=""
1813
1851
 
1814
1852
  backend="$(flow_provider_pool_backend "${config_file}" "${pool_name}")"
1815
1853
  safe_profile="$(flow_provider_pool_safe_profile "${config_file}" "${pool_name}")"
@@ -1829,6 +1867,10 @@ flow_provider_pool_state_get() {
1829
1867
  pi_model="$(flow_provider_pool_pi_model "${config_file}" "${pool_name}")"
1830
1868
  pi_thinking="$(flow_provider_pool_pi_thinking "${config_file}" "${pool_name}")"
1831
1869
  pi_timeout_seconds="$(flow_provider_pool_pi_timeout_seconds "${config_file}" "${pool_name}")"
1870
+ opencode_model="$(flow_provider_pool_opencode_model "${config_file}" "${pool_name}")"
1871
+ opencode_timeout_seconds="$(flow_provider_pool_opencode_timeout_seconds "${config_file}" "${pool_name}")"
1872
+ kilo_model="$(flow_provider_pool_kilo_model "${config_file}" "${pool_name}")"
1873
+ kilo_timeout_seconds="$(flow_provider_pool_kilo_timeout_seconds "${config_file}" "${pool_name}")"
1832
1874
  model="$(flow_provider_pool_model_identity "${config_file}" "${pool_name}")"
1833
1875
 
1834
1876
  case "${backend}" in
@@ -1847,6 +1889,12 @@ flow_provider_pool_state_get() {
1847
1889
  pi)
1848
1890
  [[ -n "${pi_model}" ]] || valid="no"
1849
1891
  ;;
1892
+ opencode)
1893
+ [[ -n "${opencode_model}" && -n "${opencode_timeout_seconds}" ]] || valid="no"
1894
+ ;;
1895
+ kilo)
1896
+ [[ -n "${kilo_model}" && -n "${kilo_timeout_seconds}" ]] || valid="no"
1897
+ ;;
1850
1898
  *)
1851
1899
  valid="no"
1852
1900
  ;;
@@ -1905,6 +1953,10 @@ flow_provider_pool_state_get() {
1905
1953
  printf 'PI_MODEL=%s\n' "${pi_model}"
1906
1954
  printf 'PI_THINKING=%s\n' "${pi_thinking}"
1907
1955
  printf 'PI_TIMEOUT_SECONDS=%s\n' "${pi_timeout_seconds}"
1956
+ printf 'OPENCODE_MODEL=%s\n' "${opencode_model}"
1957
+ printf 'OPENCODE_TIMEOUT_SECONDS=%s\n' "${opencode_timeout_seconds}"
1958
+ printf 'KILO_MODEL=%s\n' "${kilo_model}"
1959
+ printf 'KILO_TIMEOUT_SECONDS=%s\n' "${kilo_timeout_seconds}"
1908
1960
  }
1909
1961
 
1910
1962
  flow_selected_provider_pool_env() {
@@ -2118,6 +2170,10 @@ flow_export_execution_env() {
2118
2170
  local pi_model=""
2119
2171
  local pi_thinking=""
2120
2172
  local pi_timeout=""
2173
+ local opencode_model=""
2174
+ local opencode_timeout=""
2175
+ local kilo_model=""
2176
+ local kilo_timeout=""
2121
2177
 
2122
2178
  repo_id="$(flow_resolve_repo_id "${config_file}")"
2123
2179
  provider_quota_cooldowns="$(flow_resolve_provider_quota_cooldowns "${config_file}")"
@@ -2157,6 +2213,10 @@ flow_export_execution_env() {
2157
2213
  pi_model="$(flow_kv_get "${provider_pool_selection}" "PI_MODEL")"
2158
2214
  pi_thinking="$(flow_kv_get "${provider_pool_selection}" "PI_THINKING")"
2159
2215
  pi_timeout="$(flow_kv_get "${provider_pool_selection}" "PI_TIMEOUT_SECONDS")"
2216
+ opencode_model="$(flow_kv_get "${provider_pool_selection}" "OPENCODE_MODEL")"
2217
+ opencode_timeout="$(flow_kv_get "${provider_pool_selection}" "OPENCODE_TIMEOUT_SECONDS")"
2218
+ kilo_model="$(flow_kv_get "${provider_pool_selection}" "KILO_MODEL")"
2219
+ kilo_timeout="$(flow_kv_get "${provider_pool_selection}" "KILO_TIMEOUT_SECONDS")"
2160
2220
  else
2161
2221
  if [[ -n "${explicit_coding_worker}" ]]; then
2162
2222
  active_provider_selection_reason="env-override"
@@ -2180,6 +2240,10 @@ flow_export_execution_env() {
2180
2240
  pi_model="$(flow_env_or_config "${config_file}" "ACP_PI_MODEL F_LOSNING_PI_MODEL" "execution.pi.model" "")"
2181
2241
  pi_thinking="$(flow_env_or_config "${config_file}" "ACP_PI_THINKING F_LOSNING_PI_THINKING" "execution.pi.thinking" "")"
2182
2242
  pi_timeout="$(flow_env_or_config "${config_file}" "ACP_PI_TIMEOUT_SECONDS F_LOSNING_PI_TIMEOUT_SECONDS" "execution.pi.timeout_seconds" "")"
2243
+ opencode_model="$(flow_env_or_config "${config_file}" "ACP_OPENCODE_MODEL F_LOSNING_OPENCODE_MODEL" "execution.opencode.model" "")"
2244
+ opencode_timeout="$(flow_env_or_config "${config_file}" "ACP_OPENCODE_TIMEOUT_SECONDS F_LOSNING_OPENCODE_TIMEOUT_SECONDS" "execution.opencode.timeout_seconds" "")"
2245
+ kilo_model="$(flow_env_or_config "${config_file}" "ACP_KILO_MODEL F_LOSNING_KILO_MODEL" "execution.kilo.model" "")"
2246
+ kilo_timeout="$(flow_env_or_config "${config_file}" "ACP_KILO_TIMEOUT_SECONDS F_LOSNING_KILO_TIMEOUT_SECONDS" "execution.kilo.timeout_seconds" "")"
2183
2247
  fi
2184
2248
 
2185
2249
  if [[ -n "${coding_worker}" ]]; then
@@ -2287,6 +2351,22 @@ flow_export_execution_env() {
2287
2351
  export F_LOSNING_PI_TIMEOUT_SECONDS="${pi_timeout}"
2288
2352
  export ACP_PI_TIMEOUT_SECONDS="${pi_timeout}"
2289
2353
  fi
2354
+ if [[ -n "${opencode_model}" ]]; then
2355
+ export F_LOSNING_OPENCODE_MODEL="${opencode_model}"
2356
+ export ACP_OPENCODE_MODEL="${opencode_model}"
2357
+ fi
2358
+ if [[ -n "${opencode_timeout}" ]]; then
2359
+ export F_LOSNING_OPENCODE_TIMEOUT_SECONDS="${opencode_timeout}"
2360
+ export ACP_OPENCODE_TIMEOUT_SECONDS="${opencode_timeout}"
2361
+ fi
2362
+ if [[ -n "${kilo_model}" ]]; then
2363
+ export F_LOSNING_KILO_MODEL="${kilo_model}"
2364
+ export ACP_KILO_MODEL="${kilo_model}"
2365
+ fi
2366
+ if [[ -n "${kilo_timeout}" ]]; then
2367
+ export F_LOSNING_KILO_TIMEOUT_SECONDS="${kilo_timeout}"
2368
+ export ACP_KILO_TIMEOUT_SECONDS="${kilo_timeout}"
2369
+ fi
2290
2370
 
2291
2371
  flow_export_github_cli_auth_env "$(flow_resolve_repo_slug "${config_file}")"
2292
2372
  flow_export_project_env_aliases
@@ -285,6 +285,22 @@ flow_resident_issue_queue_file() {
285
285
  printf '%s/issue-%s.env\n' "$(flow_resident_issue_queue_pending_dir "${config_file}")" "${issue_id}"
286
286
  }
287
287
 
288
+ flow_resident_issue_claim_file() {
289
+ local config_file="${1:-}"
290
+ local issue_id="${2:?issue id required}"
291
+ local claimer_key="${3:?claimer key required}"
292
+
293
+ if [[ -z "${config_file}" ]]; then
294
+ config_file="$(resolve_flow_config_yaml "${BASH_SOURCE[1]:-${BASH_SOURCE[0]}}")"
295
+ fi
296
+
297
+ printf '%s/issue-%s.%s.%s.env\n' \
298
+ "$(flow_resident_issue_queue_claims_dir "${config_file}")" \
299
+ "${issue_id}" \
300
+ "${claimer_key}" \
301
+ "$$"
302
+ }
303
+
288
304
  flow_resident_issue_controller_file() {
289
305
  local config_file="${1:-}"
290
306
  local issue_id="${2:?issue id required}"
@@ -342,8 +358,11 @@ flow_resident_issue_enqueue() {
342
358
 
343
359
  tmp_file="${queue_file}.tmp.$$"
344
360
  flow_resident_write_metadata "${tmp_file}" \
361
+ "STATE_FORMAT_VERSION=1" \
362
+ "STATE_KIND=pending" \
345
363
  "ISSUE_ID=${issue_id}" \
346
364
  "QUEUED_BY=${queued_by}" \
365
+ "UPDATED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \
347
366
  "QUEUED_AT=$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
348
367
  mv "${tmp_file}" "${queue_file}"
349
368
 
@@ -361,6 +380,9 @@ flow_resident_issue_claim_next() {
361
380
  local issue_id=""
362
381
  local claim_file=""
363
382
  local claim_key=""
383
+ local queued_by=""
384
+ local queued_at=""
385
+ local claimed_at=""
364
386
 
365
387
  if [[ -z "${config_file}" ]]; then
366
388
  config_file="$(resolve_flow_config_yaml "${BASH_SOURCE[1]:-${BASH_SOURCE[0]}}")"
@@ -378,8 +400,22 @@ flow_resident_issue_claim_next() {
378
400
  [[ -n "${issue_id}" ]] || continue
379
401
  [[ "${issue_id}" != "${skip_issue_id}" ]] || continue
380
402
 
381
- claim_file="${claims_dir}/issue-${issue_id}.${claim_key}.$$"
403
+ queued_by="$(flow_resident_metadata_value "${queue_file}" "QUEUED_BY" || true)"
404
+ queued_at="$(flow_resident_metadata_value "${queue_file}" "QUEUED_AT" || true)"
405
+ claimed_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
406
+ claim_file="$(flow_resident_issue_claim_file "${config_file}" "${issue_id}" "${claim_key}")"
382
407
  if mv "${queue_file}" "${claim_file}" 2>/dev/null; then
408
+ flow_resident_write_metadata "${claim_file}" \
409
+ "STATE_FORMAT_VERSION=1" \
410
+ "STATE_KIND=claim" \
411
+ "ISSUE_ID=${issue_id}" \
412
+ "QUEUED_BY=${queued_by}" \
413
+ "QUEUED_AT=${queued_at}" \
414
+ "SESSION=${claimer_key}" \
415
+ "CLAIMED_BY=${claim_key}" \
416
+ "CLAIMED_AT=${claimed_at}" \
417
+ "UPDATED_AT=${claimed_at}" \
418
+ "CLAIM_FILE=${claim_file}"
383
419
  printf 'ISSUE_ID=%s\n' "${issue_id}"
384
420
  printf 'CLAIM_FILE=%s\n' "${claim_file}"
385
421
  return 0
@@ -441,6 +477,87 @@ flow_resident_issue_controller_reap_file() {
441
477
  return 0
442
478
  }
443
479
 
480
+ flow_resident_issue_reap_stale_claims() {
481
+ local config_file="${1:-}"
482
+ local claims_dir=""
483
+ local claim_file=""
484
+ local claim_token=""
485
+ local claim_pid=""
486
+ local issue_id=""
487
+ local queued_by=""
488
+ local queued_at=""
489
+ local claimed_at=""
490
+ local existing_pending_file=""
491
+ local other_claim=""
492
+ local other_token=""
493
+ local other_pid=""
494
+
495
+ if [[ -z "${config_file}" ]]; then
496
+ config_file="$(resolve_flow_config_yaml "${BASH_SOURCE[1]:-${BASH_SOURCE[0]}}")"
497
+ fi
498
+
499
+ claims_dir="$(flow_resident_issue_queue_claims_dir "${config_file}")"
500
+ mkdir -p "${claims_dir}"
501
+
502
+ for claim_file in "${claims_dir}"/issue-*.env; do
503
+ [[ -f "${claim_file}" ]] || continue
504
+
505
+ claim_token="${claim_file##*/}"
506
+ claim_token="${claim_token%.env}"
507
+ claim_pid="${claim_token##*.}"
508
+ [[ "${claim_pid}" =~ ^[0-9]+$ ]] || continue
509
+
510
+ if flow_resident_controller_pid_live "${claim_pid}" "start-resident-issue-loop.sh"; then
511
+ continue
512
+ fi
513
+
514
+ issue_id="$(flow_resident_metadata_value "${claim_file}" "ISSUE_ID" || true)"
515
+ [[ -n "${issue_id}" ]] || issue_id="${claim_token#issue-}"
516
+ issue_id="${issue_id%%.*}"
517
+ [[ -n "${issue_id}" ]] || continue
518
+
519
+ # If another live claim exists for the same issue, do not re-queue this one.
520
+ for other_claim in "${claims_dir}/issue-${issue_id}."*; do
521
+ [[ -f "${other_claim}" ]] || continue
522
+ other_token="${other_claim##*/}"
523
+ other_token="${other_token%.env}"
524
+ other_pid="${other_token##*.}"
525
+ [[ "${other_pid}" =~ ^[0-9]+$ ]] || continue
526
+ if [[ "${other_pid}" == "${claim_pid}" ]]; then
527
+ continue
528
+ fi
529
+ if flow_resident_controller_pid_live "${other_pid}" "start-resident-issue-loop.sh"; then
530
+ issue_id=""
531
+ break
532
+ fi
533
+ done
534
+ [[ -n "${issue_id}" ]] || continue
535
+
536
+ existing_pending_file="$(flow_resident_issue_queue_file "${config_file}" "${issue_id}")"
537
+ if [[ -f "${existing_pending_file}" ]]; then
538
+ rm -f "${claim_file}"
539
+ continue
540
+ fi
541
+
542
+ queued_by="$(flow_resident_metadata_value "${claim_file}" "QUEUED_BY" || true)"
543
+ queued_at="$(flow_resident_metadata_value "${claim_file}" "QUEUED_AT" || true)"
544
+ claimed_at="$(flow_resident_metadata_value "${claim_file}" "CLAIMED_AT" || true)"
545
+
546
+ [[ -n "${queued_by}" ]] || queued_by="heartbeat"
547
+ [[ -n "${queued_at}" ]] || queued_at="${claimed_at:-$(date -u +"%Y-%m-%dT%H:%M:%SZ")}"
548
+
549
+ flow_resident_write_metadata "${existing_pending_file}" \
550
+ "STATE_FORMAT_VERSION=1" \
551
+ "STATE_KIND=pending" \
552
+ "ISSUE_ID=${issue_id}" \
553
+ "QUEUED_BY=${queued_by}" \
554
+ "QUEUED_AT=${queued_at}" \
555
+ "UPDATED_AT=${claimed_at:-${queued_at}}"
556
+ rm -f "${claim_file}"
557
+
558
+ done
559
+ }
560
+
444
561
  flow_resident_issue_reap_stale_state() {
445
562
  local config_file="${1:-}"
446
563
  local resident_root=""
@@ -458,6 +575,7 @@ flow_resident_issue_reap_stale_state() {
458
575
  reaped=$((reaped + 1))
459
576
  fi
460
577
  done
578
+ flow_resident_issue_reap_stale_claims "${config_file}" || true
461
579
 
462
580
  printf '%s\n' "${reaped}"
463
581
  }
@@ -5,6 +5,30 @@ flow_canonical_skill_name() {
5
5
  printf '%s\n' "${AGENT_CONTROL_PLANE_SKILL_NAME:-agent-control-plane}"
6
6
  }
7
7
 
8
+ flow_resolve_python_bin() {
9
+ if [[ -n "${PYTHON_BIN:-}" && -x "${PYTHON_BIN:-}" ]]; then
10
+ printf '%s\n' "${PYTHON_BIN}"
11
+ return 0
12
+ fi
13
+
14
+ if command -v python3 >/dev/null 2>&1; then
15
+ command -v python3
16
+ return 0
17
+ fi
18
+
19
+ if [[ -x /opt/homebrew/bin/python3 ]]; then
20
+ printf '%s\n' "/opt/homebrew/bin/python3"
21
+ return 0
22
+ fi
23
+
24
+ if command -v python >/dev/null 2>&1; then
25
+ command -v python
26
+ return 0
27
+ fi
28
+
29
+ return 1
30
+ }
31
+
8
32
  flow_compat_skill_alias() {
9
33
  printf '%s\n' "${AGENT_CONTROL_PLANE_COMPAT_ALIAS:-}"
10
34
  }
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env bash
2
+ # heartbeat-loop-cache-lib.sh — scheduler cache management and attribute caching
3
+
4
+ cleanup_scheduler_caches() {
5
+ tmux_sessions_cache=""
6
+ tmux_sessions_cache_loaded="no"
7
+ all_running_workers_cache=""
8
+ all_running_workers_cache_loaded="no"
9
+ running_issue_workers_cache=""
10
+ running_issue_workers_cache_loaded="no"
11
+ running_pr_workers_cache=""
12
+ running_pr_workers_cache_loaded="no"
13
+ completed_workers_cache=""
14
+ completed_workers_cache_loaded="no"
15
+ ready_issue_ids_cache=""
16
+ ready_issue_ids_cache_loaded="no"
17
+ open_agent_pr_ids_cache=""
18
+ open_agent_pr_ids_cache_loaded="no"
19
+ running_issue_ids_cache=""
20
+ running_issue_ids_cache_loaded="no"
21
+ exclusive_issue_ids_cache=""
22
+ exclusive_issue_ids_cache_loaded="no"
23
+ exclusive_pr_ids_cache=""
24
+ exclusive_pr_ids_cache_loaded="no"
25
+ blocked_recovery_issue_ids_cache=""
26
+ blocked_recovery_issue_ids_cache_loaded="no"
27
+ ordered_ready_issue_ids_cache=""
28
+ ordered_ready_issue_ids_cache_loaded="no"
29
+ due_scheduled_issue_ids_cache=""
30
+ due_scheduled_issue_ids_cache_loaded="no"
31
+ due_blocked_recovery_issue_ids_cache=""
32
+ due_blocked_recovery_issue_ids_cache_loaded="no"
33
+ if [[ -n "${issue_attr_cache_dir:-}" && -d "${issue_attr_cache_dir}" ]]; then
34
+ rm -rf "${issue_attr_cache_dir}" || true
35
+ fi
36
+ if [[ -n "${pr_attr_cache_dir:-}" && -d "${pr_attr_cache_dir}" ]]; then
37
+ rm -rf "${pr_attr_cache_dir}" || true
38
+ fi
39
+ if [[ -n "${pr_risk_cache_dir:-}" && -d "${pr_risk_cache_dir}" ]]; then
40
+ rm -rf "${pr_risk_cache_dir}" || true
41
+ fi
42
+ if declare -F heartbeat_invalidate_snapshot_cache >/dev/null 2>&1; then
43
+ heartbeat_invalidate_snapshot_cache
44
+ fi
45
+ }
46
+
47
+ cache_prefix() {
48
+ local raw_prefix="${issue_prefix:-${pr_prefix:-agent-control-plane}}"
49
+ local sanitized=""
50
+
51
+ sanitized="$(printf '%s' "${raw_prefix}" | tr '/[:space:]' '-' | tr -cd '[:alnum:]_.-')"
52
+ if [[ -z "${sanitized}" ]]; then
53
+ sanitized="agent-control-plane"
54
+ fi
55
+
56
+ printf '%s\n' "${sanitized}"
57
+ }
58
+
59
+ ensure_issue_attr_cache_dir() {
60
+ if [[ -z "${issue_attr_cache_dir:-}" || ! -d "${issue_attr_cache_dir:-}" ]]; then
61
+ issue_attr_cache_dir="$(mktemp -d "${TMPDIR:-/tmp}/$(cache_prefix)-issue-attrs.XXXXXX")"
62
+ fi
63
+ }
64
+
65
+ ensure_pr_attr_cache_dir() {
66
+ if [[ -z "${pr_attr_cache_dir:-}" || ! -d "${pr_attr_cache_dir:-}" ]]; then
67
+ pr_attr_cache_dir="$(mktemp -d "${TMPDIR:-/tmp}/$(cache_prefix)-pr-attrs.XXXXXX")"
68
+ fi
69
+ }
70
+
71
+ ensure_pr_risk_cache_dir() {
72
+ if [[ -z "${pr_risk_cache_dir:-}" || ! -d "${pr_risk_cache_dir:-}" ]]; then
73
+ pr_risk_cache_dir="$(mktemp -d "${TMPDIR:-/tmp}/$(cache_prefix)-pr-risk.XXXXXX")"
74
+ fi
75
+ }
76
+
77
+ pr_risk_runtime_cache_fresh() {
78
+ local cache_file="${1:?cache file required}"
79
+ local modified_at now age
80
+ [[ -f "$cache_file" ]] || return 1
81
+ modified_at="$(stat -f '%m' "$cache_file" 2>/dev/null || true)"
82
+ [[ "$modified_at" =~ ^[0-9]+$ ]] || return 1
83
+ now="$(date +%s)"
84
+ age=$((now - modified_at))
85
+ (( age >= 0 && age <= pr_risk_runtime_cache_ttl_seconds ))
86
+ }
87
+
88
+ cached_issue_attr() {
89
+ local attr_name="${1:?attr name required}"
90
+ local issue_id="${2:?issue id required}"
91
+ local cache_file attr_value
92
+
93
+ ensure_issue_attr_cache_dir
94
+ cache_file="${issue_attr_cache_dir}/${issue_id}.${attr_name}"
95
+ if [[ -f "${cache_file}" ]]; then
96
+ cat "${cache_file}"
97
+ return 0
98
+ fi
99
+
100
+ case "${attr_name}" in
101
+ heavy)
102
+ attr_value="$(heartbeat_issue_is_heavy "${issue_id}")"
103
+ ;;
104
+ recurring)
105
+ attr_value="$(heartbeat_issue_is_recurring "${issue_id}")"
106
+ ;;
107
+ scheduled)
108
+ attr_value="$(heartbeat_issue_is_scheduled "${issue_id}")"
109
+ ;;
110
+ schedule_interval_seconds)
111
+ attr_value="$(heartbeat_issue_schedule_interval_seconds "${issue_id}")"
112
+ ;;
113
+ exclusive)
114
+ attr_value="$(heartbeat_issue_is_exclusive "${issue_id}")"
115
+ ;;
116
+ *)
117
+ echo "unsupported issue cache attr: ${attr_name}" >&2
118
+ return 1
119
+ ;;
120
+ esac
121
+
122
+ printf '%s\n' "${attr_value}" >"${cache_file}"
123
+ printf '%s\n' "${attr_value}"
124
+ }
125
+
126
+ cached_pr_is_exclusive() {
127
+ local pr_number="${1:?pr number required}"
128
+ local cache_file attr_value
129
+
130
+ ensure_pr_attr_cache_dir
131
+ cache_file="${pr_attr_cache_dir}/${pr_number}.exclusive"
132
+ if [[ -f "${cache_file}" ]]; then
133
+ cat "${cache_file}"
134
+ return 0
135
+ fi
136
+
137
+ attr_value="$(heartbeat_pr_is_exclusive "${pr_number}")"
138
+ printf '%s\n' "${attr_value}" >"${cache_file}"
139
+ printf '%s\n' "${attr_value}"
140
+ }
141
+
142
+ cached_pr_risk_json() {
143
+ local pr_number="${1:?pr number required}"
144
+ local cache_file runtime_cache_file risk_json
145
+
146
+ ensure_pr_risk_cache_dir
147
+ cache_file="${pr_risk_cache_dir}/${pr_number}.json"
148
+ runtime_cache_file="${pr_risk_runtime_cache_dir}/${pr_number}.json"
149
+ if [[ -f "${cache_file}" ]]; then
150
+ cat "${cache_file}"
151
+ return 0
152
+ fi
153
+
154
+ if pr_risk_runtime_cache_fresh "${runtime_cache_file}"; then
155
+ cp "${runtime_cache_file}" "${cache_file}"
156
+ cat "${cache_file}"
157
+ return 0
158
+ fi
159
+
160
+ risk_json="$(heartbeat_pr_risk_json "${pr_number}")"
161
+ printf '%s\n' "${risk_json}" >"${cache_file}"
162
+ printf '%s\n' "${risk_json}" >"${runtime_cache_file}"
163
+ printf '%s\n' "${risk_json}"
164
+ }