aidevops 3.13.73 → 3.13.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VERSION +1 -1
- package/aidevops.sh +1 -1
- package/package.json +1 -1
- package/setup-modules/agent-runtime.sh +3 -2
- package/setup.sh +112 -55
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
3.13.
|
|
1
|
+
3.13.75
|
package/aidevops.sh
CHANGED
package/package.json
CHANGED
|
@@ -256,7 +256,8 @@ deploy_agents_to_runtimes() {
|
|
|
256
256
|
# cannot be passed directly to the `timeout` binary. Instead we run it in a
|
|
257
257
|
# background subshell and poll for completion with a configurable wall-clock
|
|
258
258
|
# deadline. When the deadline expires the subshell is killed with SIGTERM then
|
|
259
|
-
# SIGKILL and
|
|
259
|
+
# SIGKILL and this non-critical step is treated as a warning so setup can
|
|
260
|
+
# continue after core deployment has already succeeded.
|
|
260
261
|
#
|
|
261
262
|
# Configurable via AIDEVOPS_DEPLOY_RUNTIMES_TIMEOUT (default 120s).
|
|
262
263
|
_deploy_agents_to_runtimes_bounded() {
|
|
@@ -275,7 +276,7 @@ _deploy_agents_to_runtimes_bounded() {
|
|
|
275
276
|
kill -KILL "$_pid" 2>/dev/null || true
|
|
276
277
|
wait "$_pid" 2>/dev/null || true
|
|
277
278
|
print_warning "Runtime agent deployment exceeded ${timeout_s}s — skipping (non-critical)"
|
|
278
|
-
return
|
|
279
|
+
return 0
|
|
279
280
|
fi
|
|
280
281
|
sleep 1
|
|
281
282
|
done
|
package/setup.sh
CHANGED
|
@@ -12,7 +12,7 @@ shopt -s inherit_errexit 2>/dev/null || true
|
|
|
12
12
|
# AI Assistant Server Access Framework Setup Script
|
|
13
13
|
# Helps developers set up the framework for their infrastructure
|
|
14
14
|
#
|
|
15
|
-
# Version: 3.13.
|
|
15
|
+
# Version: 3.13.75
|
|
16
16
|
#
|
|
17
17
|
# Quick Install:
|
|
18
18
|
# npm install -g aidevops && aidevops update (recommended)
|
|
@@ -1286,18 +1286,49 @@ _setup_noninteractive_signal_exit() {
|
|
|
1286
1286
|
exit "$exit_code"
|
|
1287
1287
|
}
|
|
1288
1288
|
|
|
1289
|
+
# Compute how many seconds the lock owner PID has been running.
|
|
1290
|
+
# Uses started_at_epoch (most accurate) falling back to ps etimes.
|
|
1291
|
+
# Prints the age in seconds on stdout; prints 0 when unknown.
|
|
1292
|
+
_setup_lock_owner_age() {
|
|
1293
|
+
local lock_dir="$1"
|
|
1294
|
+
local owner_pid="$2"
|
|
1295
|
+
local _start_epoch="" _now_epoch="" _age_tmp=""
|
|
1296
|
+
if [[ -r "$lock_dir/started_at_epoch" ]]; then
|
|
1297
|
+
_start_epoch=$(tr -d '[:space:]' <"$lock_dir/started_at_epoch" 2>/dev/null || true)
|
|
1298
|
+
_now_epoch=$(date +%s 2>/dev/null || printf '0')
|
|
1299
|
+
if [[ "$_start_epoch" =~ ^[0-9]+$ && "$_now_epoch" =~ ^[0-9]+$ && "$_now_epoch" -ge "$_start_epoch" ]]; then
|
|
1300
|
+
printf '%s' "$((_now_epoch - _start_epoch))"
|
|
1301
|
+
return 0
|
|
1302
|
+
fi
|
|
1303
|
+
fi
|
|
1304
|
+
# Fallback: ps etimes (seconds elapsed since process start).
|
|
1305
|
+
_age_tmp=$(ps -p "$owner_pid" -o etimes= 2>/dev/null | tr -d '[:space:]')
|
|
1306
|
+
if [[ "$_age_tmp" =~ ^[0-9]+$ ]]; then
|
|
1307
|
+
printf '%s' "$_age_tmp"
|
|
1308
|
+
return 0
|
|
1309
|
+
fi
|
|
1310
|
+
printf '0'
|
|
1311
|
+
return 0
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1289
1314
|
_setup_acquire_noninteractive_setup_lock() {
|
|
1290
1315
|
local lock_dir="${AIDEVOPS_SETUP_LOCK_DIR:-$HOME/.aidevops/locks/setup-noninteractive.lock.d}"
|
|
1291
|
-
|
|
1292
|
-
local
|
|
1293
|
-
|
|
1316
|
+
# Max seconds to wait for a live, non-stale owner before timing out.
|
|
1317
|
+
local wait_ceiling="${AIDEVOPS_SETUP_WAIT_TIMEOUT_S:-300}"
|
|
1318
|
+
# Max seconds a live owner may hold the lock before it is treated as
|
|
1319
|
+
# stale and reclaimed (0 disables stale-live reclaim).
|
|
1320
|
+
local stale_ceiling="${AIDEVOPS_SETUP_STALE_TIMEOUT_S:-1800}"
|
|
1321
|
+
local owner_pid="" owner_cmd="" owner_age=0
|
|
1322
|
+
local reclaim_attempts=0 waited=0
|
|
1323
|
+
local _diag_stl="$HOME/.aidevops/logs/setup-stage-timings.log"
|
|
1294
1324
|
mkdir -p "$(dirname "$lock_dir")" 2>/dev/null || true
|
|
1295
|
-
while
|
|
1325
|
+
while true; do
|
|
1296
1326
|
if mkdir "$lock_dir" 2>/dev/null; then
|
|
1297
1327
|
SETUP_NONINTERACTIVE_LOCK_DIR="$lock_dir"
|
|
1298
1328
|
SETUP_NONINTERACTIVE_LOCK_HELD=true
|
|
1299
1329
|
printf '%s\n' "$$" >"$lock_dir/owner.pid" 2>/dev/null || true
|
|
1300
1330
|
printf '%s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >"$lock_dir/started_at" 2>/dev/null || true
|
|
1331
|
+
printf '%s\n' "$(date +%s 2>/dev/null || printf '0')" >"$lock_dir/started_at_epoch" 2>/dev/null || true
|
|
1301
1332
|
printf '%s\n' "$0 $*" >"$lock_dir/command" 2>/dev/null || true
|
|
1302
1333
|
trap '_setup_cleanup_noninteractive_children; _setup_release_noninteractive_setup_lock' EXIT
|
|
1303
1334
|
trap '_setup_noninteractive_signal_exit TERM' TERM
|
|
@@ -1305,11 +1336,12 @@ _setup_acquire_noninteractive_setup_lock() {
|
|
|
1305
1336
|
return 0
|
|
1306
1337
|
fi
|
|
1307
1338
|
|
|
1339
|
+
# Lock exists — inspect owner.
|
|
1308
1340
|
owner_pid=""
|
|
1309
|
-
owner_cmd=""
|
|
1310
1341
|
if [[ -r "$lock_dir/owner.pid" ]]; then
|
|
1311
1342
|
owner_pid=$(tr -d '[:space:]' <"$lock_dir/owner.pid" 2>/dev/null || true)
|
|
1312
1343
|
fi
|
|
1344
|
+
|
|
1313
1345
|
if [[ -z "$owner_pid" ]]; then
|
|
1314
1346
|
local _lock_age="0"
|
|
1315
1347
|
_lock_age=$(_setup_lock_dir_age_seconds "$lock_dir")
|
|
@@ -1319,64 +1351,89 @@ _setup_acquire_noninteractive_setup_lock() {
|
|
|
1319
1351
|
fi
|
|
1320
1352
|
print_warning "Removing stale setup.sh --non-interactive lock with no owner at ${lock_dir} (age ${_lock_age}s)"
|
|
1321
1353
|
rm -rf "$lock_dir" 2>/dev/null || true
|
|
1322
|
-
|
|
1354
|
+
reclaim_attempts=$((reclaim_attempts + 1))
|
|
1355
|
+
if [[ "$reclaim_attempts" -ge 2 ]]; then
|
|
1356
|
+
print_error "Unable to acquire setup.sh --non-interactive lock at ${lock_dir} after ${reclaim_attempts} stale-lock removals"
|
|
1357
|
+
return 75
|
|
1358
|
+
fi
|
|
1323
1359
|
continue
|
|
1324
1360
|
fi
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
return 75
|
|
1332
|
-
fi
|
|
1333
|
-
print_warning "Removing stale setup.sh --non-interactive lock at ${lock_dir}; owner pid ${owner_pid} no longer appears to be setup.sh --non-interactive (age ${_owner_lock_age}s)"
|
|
1334
|
-
rm -rf "$lock_dir" 2>/dev/null || true
|
|
1335
|
-
attempts=$((attempts + 1))
|
|
1336
|
-
continue
|
|
1337
|
-
fi
|
|
1338
|
-
local _owner_started_age=""
|
|
1339
|
-
local _owner_pid_age=""
|
|
1340
|
-
_owner_started_age=$(_setup_lock_started_age_seconds "$lock_dir" 2>/dev/null || true)
|
|
1341
|
-
_owner_pid_age=$(_setup_pid_elapsed_seconds "$owner_pid" 2>/dev/null || true)
|
|
1342
|
-
if [[ "$_owner_started_age" =~ ^[0-9]+$ && "$_owner_pid_age" =~ ^[0-9]+$ && "$_owner_started_age" -gt 300 && "$_owner_started_age" -gt $((_owner_pid_age + 300)) ]]; then
|
|
1343
|
-
print_warning "Removing stale setup.sh --non-interactive lock at ${lock_dir}; lock age ${_owner_started_age}s is older than owner pid ${owner_pid} runtime ${_owner_pid_age}s"
|
|
1344
|
-
rm -rf "$lock_dir" 2>/dev/null || true
|
|
1345
|
-
attempts=$((attempts + 1))
|
|
1346
|
-
continue
|
|
1361
|
+
|
|
1362
|
+
if ! _setup_lock_pid_alive "$owner_pid"; then
|
|
1363
|
+
# Dead owner — reclaim the stale lock.
|
|
1364
|
+
if [[ "$reclaim_attempts" -ge 2 ]]; then
|
|
1365
|
+
print_error "Unable to acquire setup.sh --non-interactive lock at ${lock_dir} after ${reclaim_attempts} stale-lock removals"
|
|
1366
|
+
return 75
|
|
1347
1367
|
fi
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
_diag_secs=$((_diag_now - _diag_started))
|
|
1361
|
-
_diag_elapsed="elapsed ${_diag_secs}s; "
|
|
1362
|
-
fi
|
|
1368
|
+
print_warning "Removing stale setup.sh --non-interactive lock at ${lock_dir}"
|
|
1369
|
+
rm -rf "$lock_dir" 2>/dev/null || true
|
|
1370
|
+
reclaim_attempts=$((reclaim_attempts + 1))
|
|
1371
|
+
continue
|
|
1372
|
+
fi
|
|
1373
|
+
|
|
1374
|
+
if ! _setup_lock_pid_is_noninteractive_setup "$owner_pid"; then
|
|
1375
|
+
local _owner_lock_age="0"
|
|
1376
|
+
_owner_lock_age=$(_setup_lock_dir_age_seconds "$lock_dir")
|
|
1377
|
+
if [[ "$_owner_lock_age" -le 300 ]]; then
|
|
1378
|
+
print_error "Another setup.sh --non-interactive process may be acquiring the deploy lock (pid ${owner_pid}; lock: ${lock_dir}, age ${_owner_lock_age}s). Exiting to avoid overlapping deployments."
|
|
1379
|
+
return 75
|
|
1363
1380
|
fi
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1381
|
+
print_warning "Removing stale setup.sh --non-interactive lock at ${lock_dir}; owner pid ${owner_pid} no longer appears to be setup.sh --non-interactive (age ${_owner_lock_age}s)"
|
|
1382
|
+
rm -rf "$lock_dir" 2>/dev/null || true
|
|
1383
|
+
reclaim_attempts=$((reclaim_attempts + 1))
|
|
1384
|
+
continue
|
|
1385
|
+
fi
|
|
1386
|
+
|
|
1387
|
+
local _owner_started_age=""
|
|
1388
|
+
local _owner_pid_age=""
|
|
1389
|
+
_owner_started_age=$(_setup_lock_started_age_seconds "$lock_dir" 2>/dev/null || true)
|
|
1390
|
+
_owner_pid_age=$(_setup_pid_elapsed_seconds "$owner_pid" 2>/dev/null || true)
|
|
1391
|
+
if [[ "$_owner_started_age" =~ ^[0-9]+$ && "$_owner_pid_age" =~ ^[0-9]+$ && "$_owner_started_age" -gt 300 && "$_owner_started_age" -gt $((_owner_pid_age + 300)) ]]; then
|
|
1392
|
+
print_warning "Removing stale setup.sh --non-interactive lock at ${lock_dir}; lock age ${_owner_started_age}s is older than owner pid ${owner_pid} runtime ${_owner_pid_age}s"
|
|
1393
|
+
rm -rf "$lock_dir" 2>/dev/null || true
|
|
1394
|
+
reclaim_attempts=$((reclaim_attempts + 1))
|
|
1395
|
+
continue
|
|
1396
|
+
fi
|
|
1397
|
+
|
|
1398
|
+
# Owner is alive — compute age and read current setup stage.
|
|
1399
|
+
owner_age=$(_setup_lock_owner_age "$lock_dir" "$owner_pid")
|
|
1400
|
+
owner_cmd=""
|
|
1401
|
+
[[ -r "$lock_dir/command" ]] && owner_cmd=$(tr '\n' ' ' <"$lock_dir/command" 2>/dev/null || true)
|
|
1402
|
+
local _diag_stage=""
|
|
1403
|
+
if [[ -r "$_diag_stl" ]]; then
|
|
1404
|
+
local _diag_cur_stage=""
|
|
1405
|
+
_diag_cur_stage=$(awk -F'\t' '$4=="RUNNING"{s=$2} END{if(s)printf "%s",s}' "$_diag_stl" 2>/dev/null || true)
|
|
1406
|
+
[[ -n "$_diag_cur_stage" ]] && _diag_stage=", stage: ${_diag_cur_stage}"
|
|
1407
|
+
fi
|
|
1408
|
+
|
|
1409
|
+
# Stale-live reclaim: owner alive but running far too long.
|
|
1410
|
+
if [[ "$stale_ceiling" -gt 0 && "$owner_age" -ge "$stale_ceiling" ]]; then
|
|
1411
|
+
if [[ "$reclaim_attempts" -ge 2 ]]; then
|
|
1412
|
+
print_error "Unable to acquire setup.sh --non-interactive lock: owner (pid ${owner_pid}, age ${owner_age}s) exceeds stale ceiling but reclaim limit reached. Diagnose: ${_diag_stl}"
|
|
1413
|
+
return 75
|
|
1368
1414
|
fi
|
|
1369
|
-
|
|
1415
|
+
print_warning "setup.sh --non-interactive lock owner (pid ${owner_pid}) running ${owner_age}s — exceeds stale ceiling ${stale_ceiling}s (AIDEVOPS_SETUP_STALE_TIMEOUT_S)${owner_cmd:+; command: ${owner_cmd}}. Reclaiming lock."
|
|
1416
|
+
rm -rf "$lock_dir" 2>/dev/null || true
|
|
1417
|
+
reclaim_attempts=$((reclaim_attempts + 1))
|
|
1418
|
+
continue
|
|
1419
|
+
fi
|
|
1420
|
+
|
|
1421
|
+
# Live non-stale owner — check wait ceiling before sleeping.
|
|
1422
|
+
if [[ "$waited" -ge "$wait_ceiling" ]]; then
|
|
1423
|
+
print_error "Timed out waiting ${waited}s for setup.sh --non-interactive lock (owner pid ${owner_pid}, age ${owner_age}s${_diag_stage}${owner_cmd:+, command: ${owner_cmd}}). Increase AIDEVOPS_SETUP_WAIT_TIMEOUT_S (current: ${wait_ceiling}s) or kill pid ${owner_pid} to unblock. Diagnose: ${_diag_stl}"
|
|
1370
1424
|
return 75
|
|
1371
1425
|
fi
|
|
1372
1426
|
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1427
|
+
# Emit diagnostics on first block and every 60 s thereafter.
|
|
1428
|
+
if [[ "$waited" -eq 0 ]]; then
|
|
1429
|
+
print_info "Another setup.sh --non-interactive is running (pid ${owner_pid}, age ${owner_age}s${_diag_stage}${owner_cmd:+, command: ${owner_cmd}}). Waiting up to ${wait_ceiling}s (AIDEVOPS_SETUP_WAIT_TIMEOUT_S). Diagnose: ${_diag_stl}"
|
|
1430
|
+
elif [[ $(( waited % 60 )) -eq 0 ]]; then
|
|
1431
|
+
print_info "Still waiting for setup lock (owner pid ${owner_pid}, age ${owner_age}s, waited ${waited}s of ${wait_ceiling}s max)."
|
|
1432
|
+
fi
|
|
1377
1433
|
|
|
1378
|
-
|
|
1379
|
-
|
|
1434
|
+
sleep 10
|
|
1435
|
+
waited=$((waited + 10))
|
|
1436
|
+
done
|
|
1380
1437
|
}
|
|
1381
1438
|
|
|
1382
1439
|
# Non-interactive path: deploy agents and run safe migrations only (no prompts).
|