@keepur/hive 0.8.3 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pkg/cli.min.js +109 -107
- package/pkg/mcp/google.min.js +18 -18
- package/pkg/server.min.js +435 -423
- package/service/deploy.sh +44 -4
- package/service/deploy.test.sh +89 -1
package/service/deploy.sh
CHANGED
|
@@ -127,10 +127,32 @@ notify() {
|
|
|
127
127
|
> /dev/null
|
|
128
128
|
}
|
|
129
129
|
|
|
130
|
+
# log_size_before <log_file>
|
|
131
|
+
# Byte size of the log right now, or 0 if it doesn't exist. Captured
|
|
132
|
+
# immediately before `launchctl bootstrap` so health_check can scan only
|
|
133
|
+
# bytes written by the new boot and ignore any stale "Hive is running"
|
|
134
|
+
# from a prior run.
|
|
135
|
+
log_size_before() {
|
|
136
|
+
local log_file="$1"
|
|
137
|
+
if [[ -f "$log_file" ]]; then
|
|
138
|
+
wc -c < "$log_file" | awk '{print $1}'
|
|
139
|
+
else
|
|
140
|
+
echo 0
|
|
141
|
+
fi
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
# health_check <log_file> [start_offset]
|
|
145
|
+
# KPR-240: anchor the marker scan to the boot we just kicked off.
|
|
146
|
+
# Reads bytes after $start_offset and succeeds iff "Hive is running"
|
|
147
|
+
# appears after "Hive starting up" in that window. Avoids the tail -5
|
|
148
|
+
# race on busy boots (12 agents + scheduler + memory lifecycle can push
|
|
149
|
+
# the marker out of the last 5 lines within 1s) and refuses to match a
|
|
150
|
+
# stale marker from a previous run.
|
|
130
151
|
health_check() {
|
|
131
152
|
local log_file="$1"
|
|
153
|
+
local start_offset="${2:-0}"
|
|
132
154
|
if $DRY_RUN; then
|
|
133
|
-
echo "[DRY RUN] health_check: would check $log_file"
|
|
155
|
+
echo "[DRY RUN] health_check: would check $log_file (offset $start_offset)"
|
|
134
156
|
return 0
|
|
135
157
|
fi
|
|
136
158
|
for attempt in $(seq 1 "$HEALTH_CHECK_RETRIES"); do
|
|
@@ -140,7 +162,7 @@ health_check() {
|
|
|
140
162
|
fi
|
|
141
163
|
for _ in $(seq 1 "$HEALTH_CHECK_WINDOW"); do
|
|
142
164
|
sleep 1
|
|
143
|
-
if
|
|
165
|
+
if _scan_new_boot "$log_file" "$start_offset"; then
|
|
144
166
|
return 0
|
|
145
167
|
fi
|
|
146
168
|
done
|
|
@@ -149,6 +171,20 @@ health_check() {
|
|
|
149
171
|
return 1
|
|
150
172
|
}
|
|
151
173
|
|
|
174
|
+
# _scan_new_boot <log_file> <start_offset>
|
|
175
|
+
# True iff the bytes after $start_offset contain "Hive is running"
|
|
176
|
+
# preceded by "Hive starting up". tail -c +N is 1-indexed: start at byte N.
|
|
177
|
+
_scan_new_boot() {
|
|
178
|
+
local log_file="$1"
|
|
179
|
+
local start_offset="$2"
|
|
180
|
+
[[ -f "$log_file" ]] || return 1
|
|
181
|
+
tail -c "+$((start_offset + 1))" "$log_file" 2>/dev/null | awk '
|
|
182
|
+
/"Hive starting up"/ { started = 1; next }
|
|
183
|
+
started && /"Hive is running"/ { found = 1; exit }
|
|
184
|
+
END { if (found) exit 0; else exit 1 }
|
|
185
|
+
'
|
|
186
|
+
}
|
|
187
|
+
|
|
152
188
|
kill_ports() {
|
|
153
189
|
local ports_str="$1"
|
|
154
190
|
if $DRY_RUN; then
|
|
@@ -373,8 +409,10 @@ if $ROLLBACK; then
|
|
|
373
409
|
notify "Rollback FAILED for \`$id\`: no previous engine (.hive.prev missing)."
|
|
374
410
|
exit 1
|
|
375
411
|
fi
|
|
412
|
+
health_log="$instance_root/$logs_dir/hive.log"
|
|
413
|
+
health_offset=$(log_size_before "$health_log")
|
|
376
414
|
run_cmd launchctl bootstrap "gui/$(id -u)" "$plist_path"
|
|
377
|
-
if health_check "$
|
|
415
|
+
if health_check "$health_log" "$health_offset"; then
|
|
378
416
|
rollback_version=$(jq -r .version < "$instance_root/.hive/package.json" 2>/dev/null || echo "unknown")
|
|
379
417
|
notify "Rollback succeeded for \`$id\` → \`$rollback_version\`."
|
|
380
418
|
echo "Rollback complete."
|
|
@@ -529,11 +567,13 @@ for inst in "${INSTANCES[@]}"; do
|
|
|
529
567
|
echo " Swapping engine..."
|
|
530
568
|
swap_engine "$instance_root"
|
|
531
569
|
|
|
570
|
+
health_log="$instance_root/$logs_dir/hive.log"
|
|
571
|
+
health_offset=$(log_size_before "$health_log")
|
|
532
572
|
echo " Restarting $label..."
|
|
533
573
|
run_cmd launchctl bootstrap "gui/$(id -u)" "$plist_path"
|
|
534
574
|
|
|
535
575
|
echo " Checking health..."
|
|
536
|
-
if ! health_check "$
|
|
576
|
+
if ! health_check "$health_log" "$health_offset"; then
|
|
537
577
|
echo " Health check FAILED for $id — rolling back"
|
|
538
578
|
# New engine bound the port and failed health check — bootout it before swap.
|
|
539
579
|
run_cmd launchctl bootout "gui/$(id -u)/$label" 2>/dev/null || true
|
package/service/deploy.test.sh
CHANGED
|
@@ -65,11 +65,15 @@ echo '{"name":"@keepur/hive","version":"0.2.0-dev"}' > "$BUILD_DIR/package.json"
|
|
|
65
65
|
# function bodies via sed and source them in isolation. The inner `/!p` drops
|
|
66
66
|
# the closing delimiter line so we don't capture the `if $ROLLBACK; then` line
|
|
67
67
|
# (which would trip set -u on undefined ROLLBACK when sourced).
|
|
68
|
-
sed -n '/^# ---
|
|
68
|
+
sed -n '/^# --- Helpers ---/,/^# --- Short-circuit:/{/^# --- Short-circuit:/!p;}' \
|
|
69
69
|
"$SCRIPT_DIR/deploy.sh" > "$TESTROOT/helpers.sh"
|
|
70
70
|
# Helper bodies reference $DRY_RUN (added so --dry-run skips the destructive
|
|
71
71
|
# ops); set it false here so the helpers actually execute under set -u.
|
|
72
72
|
DRY_RUN=false
|
|
73
|
+
# Keep health_check's retry/window/wait small so the new tests don't burn 90s.
|
|
74
|
+
HEALTH_CHECK_RETRIES=1
|
|
75
|
+
HEALTH_CHECK_WINDOW=2
|
|
76
|
+
HEALTH_CHECK_WAIT_BETWEEN=0
|
|
73
77
|
# shellcheck source=/dev/null
|
|
74
78
|
source "$TESTROOT/helpers.sh"
|
|
75
79
|
|
|
@@ -205,4 +209,88 @@ if install_engine_deps "$DEPLOY_DIR" >/dev/null 2>&1; then
|
|
|
205
209
|
exit 1
|
|
206
210
|
fi
|
|
207
211
|
|
|
212
|
+
# --- Health-check tests (KPR-240) ---
|
|
213
|
+
# Anchor the marker scan to a captured byte offset so a busy boot logging
|
|
214
|
+
# many lines after "Hive is running" can't be falsely flagged failed, and
|
|
215
|
+
# so a stale marker from a previous run can't be falsely flagged healthy.
|
|
216
|
+
LOG_DIR=$(mktemp -d -t hive-health-test.XXXXXX)
|
|
217
|
+
trap 'rm -rf "$TESTROOT" "$LOG_DIR"' EXIT
|
|
218
|
+
|
|
219
|
+
# --- Test 11: busy boot logs many lines after marker, still passes ---
|
|
220
|
+
echo "test 11: busy boot passes despite marker scrolling out of tail -5"
|
|
221
|
+
LOG_FILE="$LOG_DIR/hive-busy.log"
|
|
222
|
+
: > "$LOG_FILE"
|
|
223
|
+
OFFSET=$(log_size_before "$LOG_FILE")
|
|
224
|
+
# Simulate the new boot: starting marker, running marker, then a flood of
|
|
225
|
+
# log lines (12 agents + scheduler + memory lifecycle) that pushes the
|
|
226
|
+
# marker far out of `tail -5`.
|
|
227
|
+
{
|
|
228
|
+
echo '{"ts":"2026-05-25T00:00:00Z","level":"info","component":"hive","msg":"Hive starting up","instance":"test"}'
|
|
229
|
+
echo '{"ts":"2026-05-25T00:00:00Z","level":"info","component":"hive","msg":"Hive is running"}'
|
|
230
|
+
for i in $(seq 1 50); do
|
|
231
|
+
echo '{"ts":"2026-05-25T00:00:00Z","level":"info","component":"agent","msg":"agent-'"$i"' ready"}'
|
|
232
|
+
done
|
|
233
|
+
} >> "$LOG_FILE"
|
|
234
|
+
if ! health_check "$LOG_FILE" "$OFFSET" >/dev/null; then
|
|
235
|
+
echo "FAIL: healthy boot with flood after marker should pass"
|
|
236
|
+
exit 1
|
|
237
|
+
fi
|
|
238
|
+
# Sanity: with the legacy tail -5 strategy this would have failed.
|
|
239
|
+
if tail -5 "$LOG_FILE" | grep -q '"Hive is running"'; then
|
|
240
|
+
echo "FAIL: tail -5 unexpectedly still contains the marker — test setup wrong"
|
|
241
|
+
exit 1
|
|
242
|
+
fi
|
|
243
|
+
|
|
244
|
+
# --- Test 12: genuine boot failure (never reaches marker) fails ---
|
|
245
|
+
echo "test 12: genuine boot failure fails health_check"
|
|
246
|
+
LOG_FILE="$LOG_DIR/hive-fail.log"
|
|
247
|
+
: > "$LOG_FILE"
|
|
248
|
+
OFFSET=$(log_size_before "$LOG_FILE")
|
|
249
|
+
{
|
|
250
|
+
echo '{"ts":"2026-05-25T00:00:00Z","level":"info","component":"hive","msg":"Hive starting up","instance":"test"}'
|
|
251
|
+
echo '{"ts":"2026-05-25T00:00:00Z","level":"error","component":"hive","msg":"Mongo unreachable, exiting"}'
|
|
252
|
+
} >> "$LOG_FILE"
|
|
253
|
+
if health_check "$LOG_FILE" "$OFFSET" >/dev/null 2>&1; then
|
|
254
|
+
echo "FAIL: boot that never reached 'Hive is running' should fail"
|
|
255
|
+
exit 1
|
|
256
|
+
fi
|
|
257
|
+
|
|
258
|
+
# --- Test 13: stale "Hive is running" before offset is ignored ---
|
|
259
|
+
echo "test 13: stale marker from previous boot is not matched"
|
|
260
|
+
LOG_FILE="$LOG_DIR/hive-stale.log"
|
|
261
|
+
: > "$LOG_FILE"
|
|
262
|
+
# Previous boot: full happy-path markers land in the file.
|
|
263
|
+
{
|
|
264
|
+
echo '{"ts":"2026-05-24T00:00:00Z","level":"info","component":"hive","msg":"Hive starting up","instance":"test"}'
|
|
265
|
+
echo '{"ts":"2026-05-24T00:00:00Z","level":"info","component":"hive","msg":"Hive is running"}'
|
|
266
|
+
} >> "$LOG_FILE"
|
|
267
|
+
# Capture offset AFTER the prior boot's markers — mimics the deploy.sh
|
|
268
|
+
# call site that snapshots wc -c right before launchctl bootstrap.
|
|
269
|
+
OFFSET=$(log_size_before "$LOG_FILE")
|
|
270
|
+
# New boot crashes before "Hive is running". The stale marker is still
|
|
271
|
+
# physically in the log, but past start_offset there is no marker.
|
|
272
|
+
{
|
|
273
|
+
echo '{"ts":"2026-05-25T00:00:00Z","level":"info","component":"hive","msg":"Hive starting up","instance":"test"}'
|
|
274
|
+
echo '{"ts":"2026-05-25T00:00:00Z","level":"error","component":"hive","msg":"crashed"}'
|
|
275
|
+
} >> "$LOG_FILE"
|
|
276
|
+
if health_check "$LOG_FILE" "$OFFSET" >/dev/null 2>&1; then
|
|
277
|
+
echo "FAIL: stale marker from previous boot should not satisfy health_check"
|
|
278
|
+
exit 1
|
|
279
|
+
fi
|
|
280
|
+
|
|
281
|
+
# --- Test 14: log file absent at start (fresh install), then created ---
|
|
282
|
+
echo "test 14: missing log at offset capture, populated by boot, passes"
|
|
283
|
+
LOG_FILE="$LOG_DIR/hive-fresh.log"
|
|
284
|
+
# No file yet — offset is 0 by contract.
|
|
285
|
+
OFFSET=$(log_size_before "$LOG_FILE")
|
|
286
|
+
[[ "$OFFSET" == "0" ]] || { echo "FAIL: log_size_before should return 0 for missing file (got '$OFFSET')"; exit 1; }
|
|
287
|
+
{
|
|
288
|
+
echo '{"ts":"2026-05-25T00:00:00Z","level":"info","component":"hive","msg":"Hive starting up","instance":"test"}'
|
|
289
|
+
echo '{"ts":"2026-05-25T00:00:00Z","level":"info","component":"hive","msg":"Hive is running"}'
|
|
290
|
+
} > "$LOG_FILE"
|
|
291
|
+
if ! health_check "$LOG_FILE" "$OFFSET" >/dev/null; then
|
|
292
|
+
echo "FAIL: fresh-install boot should pass health_check"
|
|
293
|
+
exit 1
|
|
294
|
+
fi
|
|
295
|
+
|
|
208
296
|
echo "all tests passed."
|