@keepur/hive 0.1.10 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -0
- package/install/bootstrap.sh +49 -0
- package/install/migrate-0.2.fixtures/loose-files.txt +24 -0
- package/install/migrate-0.2.sh +822 -0
- package/install/migrate-0.2.test.sh +122 -0
- package/package.json +4 -2
- package/pkg/cli.min.js +138 -136
- package/pkg/server.min.js +157 -156
- package/service/deploy-check.sh +88 -0
- package/service/deploy.sh +476 -0
- package/service/deploy.test.sh +208 -0
- package/service/install.sh +64 -0
- package/service/instances.conf +11 -0
- package/service/rotate-logs.sh +43 -0
|
@@ -0,0 +1,822 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# =============================================================================
|
|
3
|
+
# migrate-0.2.sh — migrate a 0.1.x Hive instance dir into the 0.2.0 layout
|
|
4
|
+
# =============================================================================
|
|
5
|
+
# Usage:
|
|
6
|
+
# bash migrate-0.2.sh <instance_dir>
|
|
7
|
+
# bash migrate-0.2.sh --dry-run <instance_dir>
|
|
8
|
+
#
|
|
9
|
+
# Standalone: does not depend on the 0.2.0 engine being installed yet (it
|
|
10
|
+
# installs it as part of step 7). Run before or after `npm i -g @keepur/hive@0.2.0`.
|
|
11
|
+
#
|
|
12
|
+
# Idempotent: re-running on an already-migrated dir exits 0 with "already on 0.2.0".
|
|
13
|
+
# =============================================================================
|
|
14
|
+
|
|
15
|
+
set -euo pipefail
|
|
16
|
+
|
|
17
|
+
DRY_RUN=false
|
|
18
|
+
INSTANCE_DIR=""
|
|
19
|
+
|
|
20
|
+
usage() {
|
|
21
|
+
cat <<USAGE
|
|
22
|
+
migrate-0.2.sh — migrate a 0.1.x Hive instance into the 0.2.0 layout.
|
|
23
|
+
|
|
24
|
+
Usage:
|
|
25
|
+
migrate-0.2.sh [--dry-run] <instance_dir>
|
|
26
|
+
|
|
27
|
+
Options:
|
|
28
|
+
--dry-run Run preflight + classifier only. No filesystem mutations.
|
|
29
|
+
-h, --help Show this help.
|
|
30
|
+
|
|
31
|
+
Examples:
|
|
32
|
+
bash migrate-0.2.sh --dry-run ~/services/hive/dodi
|
|
33
|
+
bash migrate-0.2.sh ~/services/hive/dodi
|
|
34
|
+
USAGE
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
for arg in "$@"; do
|
|
38
|
+
case "$arg" in
|
|
39
|
+
--dry-run) DRY_RUN=true ;;
|
|
40
|
+
-h|--help) usage; exit 0 ;;
|
|
41
|
+
--*) echo "ERROR: unknown flag: $arg" >&2; usage; exit 2 ;;
|
|
42
|
+
*)
|
|
43
|
+
if [[ -z "$INSTANCE_DIR" ]]; then
|
|
44
|
+
INSTANCE_DIR="$arg"
|
|
45
|
+
else
|
|
46
|
+
echo "ERROR: only one positional arg (instance_dir) accepted, got a second: $arg" >&2
|
|
47
|
+
exit 2
|
|
48
|
+
fi
|
|
49
|
+
;;
|
|
50
|
+
esac
|
|
51
|
+
done
|
|
52
|
+
|
|
53
|
+
if [[ -z "$INSTANCE_DIR" ]]; then
|
|
54
|
+
echo "ERROR: <instance_dir> required" >&2
|
|
55
|
+
usage
|
|
56
|
+
exit 2
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
INSTANCE_DIR="$(cd "$INSTANCE_DIR" && pwd)" # resolve to absolute
|
|
60
|
+
INSTANCE_ID="$(basename "$INSTANCE_DIR")"
|
|
61
|
+
|
|
62
|
+
# --- notify helper ---
|
|
63
|
+
notify() {
|
|
64
|
+
local message="$1"
|
|
65
|
+
echo "NOTIFY: $message"
|
|
66
|
+
if $DRY_RUN; then return; fi
|
|
67
|
+
|
|
68
|
+
# Source .env to get SLACK_BOT_TOKEN + DEVOPS_CHANNEL_ID. If .env is
|
|
69
|
+
# unreadable (preserved by preflight but maybe in a weird state), log only.
|
|
70
|
+
if [[ ! -f "$INSTANCE_DIR/.env" ]]; then
|
|
71
|
+
echo " (no .env, Slack skipped)"
|
|
72
|
+
return
|
|
73
|
+
fi
|
|
74
|
+
# shellcheck source=/dev/null
|
|
75
|
+
local token channel
|
|
76
|
+
token=$(grep -E '^SLACK_BOT_TOKEN=' "$INSTANCE_DIR/.env" | tail -n1 | cut -d= -f2- | tr -d '"')
|
|
77
|
+
channel=$(grep -E '^DEVOPS_CHANNEL_ID=' "$INSTANCE_DIR/.env" | tail -n1 | cut -d= -f2- | tr -d '"')
|
|
78
|
+
if [[ -z "$token" || -z "$channel" ]]; then
|
|
79
|
+
echo " (SLACK_BOT_TOKEN or DEVOPS_CHANNEL_ID missing in .env, Slack skipped)"
|
|
80
|
+
return
|
|
81
|
+
fi
|
|
82
|
+
|
|
83
|
+
local payload
|
|
84
|
+
payload=$(jq -n --arg channel "$channel" --arg text "$message" \
|
|
85
|
+
'{channel: $channel, text: $text}')
|
|
86
|
+
curl -s -X POST https://slack.com/api/chat.postMessage \
|
|
87
|
+
-H "Authorization: Bearer $token" \
|
|
88
|
+
-H "Content-Type: application/json" \
|
|
89
|
+
-d "$payload" > /dev/null || echo " (Slack POST failed, continuing)"
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# --- auto-rollback (defined early so every wrapped step can reach it) ---
|
|
93
|
+
# Discovers labels via ~/Library/LaunchAgents symlink resolution (not from
|
|
94
|
+
# hardcoded INSTANCE_ID-derived strings, because dodi's label is legacy
|
|
95
|
+
# com.hive.agent, not com.hive.dodi.agent).
|
|
96
|
+
auto_rollback() {
|
|
97
|
+
echo "==> AUTO-ROLLBACK"
|
|
98
|
+
|
|
99
|
+
# Defensive guard — rm -rf on $INSTANCE_DIR is catastrophic if empty. Line 59
|
|
100
|
+
# already resolves INSTANCE_DIR via `cd … && pwd` so this should never trip,
|
|
101
|
+
# but the cost is trivial and the blast radius of getting it wrong is the
|
|
102
|
+
# whole instance tree.
|
|
103
|
+
if [[ -z "$INSTANCE_DIR" || ! -d "$INSTANCE_DIR" ]]; then
|
|
104
|
+
echo "ERROR: auto_rollback refusing to run — INSTANCE_DIR unset or missing: '$INSTANCE_DIR'" >&2
|
|
105
|
+
return 1
|
|
106
|
+
fi
|
|
107
|
+
if [[ ! -d "$INSTANCE_DIR.pre-0.2-bak" ]]; then
|
|
108
|
+
echo "ERROR: auto_rollback cannot restore — $INSTANCE_DIR.pre-0.2-bak missing." >&2
|
|
109
|
+
return 1
|
|
110
|
+
fi
|
|
111
|
+
|
|
112
|
+
declare -a LABELS=()
|
|
113
|
+
for link in "$HOME/Library/LaunchAgents"/com.hive.*.plist; do
|
|
114
|
+
[[ -L "$link" ]] || continue
|
|
115
|
+
local abs
|
|
116
|
+
abs=$(realpath "$link" 2>/dev/null || true)
|
|
117
|
+
if [[ -n "$abs" && "$abs" == "$INSTANCE_DIR/service/"* ]]; then
|
|
118
|
+
LABELS+=("$(basename "$link" .plist)")
|
|
119
|
+
fi
|
|
120
|
+
done
|
|
121
|
+
|
|
122
|
+
if (( ${#LABELS[@]} > 0 )); then
|
|
123
|
+
for label in "${LABELS[@]}"; do
|
|
124
|
+
launchctl bootout "gui/$(id -u)/$label" 2>/dev/null || true
|
|
125
|
+
done
|
|
126
|
+
fi
|
|
127
|
+
|
|
128
|
+
rm -rf "$INSTANCE_DIR"
|
|
129
|
+
mv "$INSTANCE_DIR.pre-0.2-bak" "$INSTANCE_DIR"
|
|
130
|
+
|
|
131
|
+
if (( ${#LABELS[@]} > 0 )); then
|
|
132
|
+
for label in "${LABELS[@]}"; do
|
|
133
|
+
launchctl bootstrap "gui/$(id -u)" "$HOME/Library/LaunchAgents/$label.plist"
|
|
134
|
+
done
|
|
135
|
+
fi
|
|
136
|
+
|
|
137
|
+
notify "Migration to 0.2.0 FAILED and was rolled back for $INSTANCE_DIR. Instance(s) back on 0.1.x: ${LABELS[*]:-<none>}."
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
# rollback_or_die — standard post-failure cleanup. Tries auto_rollback; if the
|
|
141
|
+
# rollback itself fails (snapshot missing, INSTANCE_DIR unset), escalates with
|
|
142
|
+
# a distinct exit code (2) so incident response can distinguish "migration
|
|
143
|
+
# failed but instance restored" (exit 1) from "migration failed AND rollback
|
|
144
|
+
# failed — instance is in partial state" (exit 2). Always exits.
|
|
145
|
+
rollback_or_die() {
|
|
146
|
+
if auto_rollback; then
|
|
147
|
+
exit 1
|
|
148
|
+
fi
|
|
149
|
+
echo "CRITICAL: auto_rollback failed — $INSTANCE_DIR is in a partial migration state." >&2
|
|
150
|
+
echo " Snapshot may be available at: $INSTANCE_DIR.pre-0.2-bak" >&2
|
|
151
|
+
echo " Manual recovery required before retrying." >&2
|
|
152
|
+
notify "MIGRATION CRITICAL: auto-rollback failed for $INSTANCE_DIR. Manual recovery required."
|
|
153
|
+
exit 2
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# --- preflight ---
|
|
157
|
+
preflight() {
|
|
158
|
+
echo "==> Preflight: $INSTANCE_DIR"
|
|
159
|
+
|
|
160
|
+
if [[ ! -f "$INSTANCE_DIR/hive.yaml" ]]; then
|
|
161
|
+
echo "ERROR: $INSTANCE_DIR does not look like a Hive instance (no hive.yaml)." >&2
|
|
162
|
+
exit 1
|
|
163
|
+
fi
|
|
164
|
+
|
|
165
|
+
# Already migrated?
|
|
166
|
+
if [[ -f "$INSTANCE_DIR/.hive/pkg/server.min.js" ]]; then
|
|
167
|
+
echo " → .hive/pkg/server.min.js already populated; already on 0.2.0."
|
|
168
|
+
exit 0
|
|
169
|
+
fi
|
|
170
|
+
|
|
171
|
+
# Confirm it's a 0.1.x instance. Three valid shapes:
|
|
172
|
+
# 1. Repo-clone layout: dist/index.js present
|
|
173
|
+
# 2. Instance-git state: .hive/git/ present
|
|
174
|
+
# 3. Global-install layout: plist references a global npm path (not .hive/)
|
|
175
|
+
local is_01x=false
|
|
176
|
+
if [[ -f "$INSTANCE_DIR/dist/index.js" ]]; then
|
|
177
|
+
is_01x=true
|
|
178
|
+
elif [[ -d "$INSTANCE_DIR/.hive/git" ]]; then
|
|
179
|
+
is_01x=true
|
|
180
|
+
else
|
|
181
|
+
# Check if any plist in service/ points at a global npm install (0.1.x global pattern)
|
|
182
|
+
for plist in "$INSTANCE_DIR/service/"*.plist; do
|
|
183
|
+
[[ -f "$plist" ]] || continue
|
|
184
|
+
if grep -q 'node_modules/@keepur/hive/pkg/server.min.js' "$plist" 2>/dev/null; then
|
|
185
|
+
is_01x=true
|
|
186
|
+
break
|
|
187
|
+
fi
|
|
188
|
+
done
|
|
189
|
+
fi
|
|
190
|
+
if ! $is_01x; then
|
|
191
|
+
echo "ERROR: $INSTANCE_DIR doesn't look like 0.1.x (no dist/, .hive/git/, or global-install plist)." >&2
|
|
192
|
+
echo " Manual inspection required — refusing to proceed." >&2
|
|
193
|
+
exit 1
|
|
194
|
+
fi
|
|
195
|
+
|
|
196
|
+
# Existing .pre-0.2-bak means a prior migration attempt started and didn't finish.
|
|
197
|
+
if [[ -e "$INSTANCE_DIR.pre-0.2-bak" ]]; then
|
|
198
|
+
echo "ERROR: $INSTANCE_DIR.pre-0.2-bak already exists." >&2
|
|
199
|
+
echo " Rename or delete it before retrying, after confirming the instance is healthy." >&2
|
|
200
|
+
exit 1
|
|
201
|
+
fi
|
|
202
|
+
|
|
203
|
+
# Discover live plists in service/ — used by step 5 (engine-wipe preservation)
|
|
204
|
+
# and step 12 (auto-rollback label discovery). Step 10 drives plist regen from
|
|
205
|
+
# hive.yaml / hive-*.yaml files at the instance root, not from LIVE_PLISTS.
|
|
206
|
+
declare -g -a LIVE_PLISTS=()
|
|
207
|
+
for link in "$HOME/Library/LaunchAgents"/com.hive.*.plist; do
|
|
208
|
+
[[ -L "$link" ]] || continue
|
|
209
|
+
local abs
|
|
210
|
+
abs=$(realpath "$link" 2>/dev/null || true)
|
|
211
|
+
if [[ -n "$abs" && "$abs" == "$INSTANCE_DIR/service/"* ]]; then
|
|
212
|
+
LIVE_PLISTS+=("$(basename "$abs")")
|
|
213
|
+
fi
|
|
214
|
+
done
|
|
215
|
+
echo " Live plists rooted here: ${LIVE_PLISTS[*]:-<none>}"
|
|
216
|
+
|
|
217
|
+
# If any live plist is loaded right now, require the user to stop it first.
|
|
218
|
+
local running=""
|
|
219
|
+
if (( ${#LIVE_PLISTS[@]} > 0 )); then
|
|
220
|
+
for plist in "${LIVE_PLISTS[@]}"; do
|
|
221
|
+
local label="${plist%.plist}"
|
|
222
|
+
if launchctl print "gui/$(id -u)/$label" &>/dev/null; then
|
|
223
|
+
running="$running $label"
|
|
224
|
+
fi
|
|
225
|
+
done
|
|
226
|
+
fi
|
|
227
|
+
if [[ -n "$running" ]]; then
|
|
228
|
+
echo "WARNING: the following LaunchAgents are currently loaded:"
|
|
229
|
+
echo " $running"
|
|
230
|
+
echo ""
|
|
231
|
+
read -p "Stop them now via launchctl bootout? [y/N] " reply </dev/tty
|
|
232
|
+
if [[ "$reply" =~ ^[Yy]$ ]]; then
|
|
233
|
+
for label in $running; do
|
|
234
|
+
launchctl bootout "gui/$(id -u)/$label" 2>/dev/null || true
|
|
235
|
+
done
|
|
236
|
+
else
|
|
237
|
+
echo "ERROR: refusing to migrate while service is running. Stop it and retry." >&2
|
|
238
|
+
exit 1
|
|
239
|
+
fi
|
|
240
|
+
fi
|
|
241
|
+
|
|
242
|
+
# Disk space — need ~2x instance dir size for the snapshot.
|
|
243
|
+
local size_kb
|
|
244
|
+
size_kb=$(du -sk "$INSTANCE_DIR" | awk '{print $1}')
|
|
245
|
+
local needed_kb=$((size_kb * 2))
|
|
246
|
+
local free_kb
|
|
247
|
+
free_kb=$(df -k "$INSTANCE_DIR" | awk 'NR==2 {print $4}')
|
|
248
|
+
if [[ "$free_kb" -lt "$needed_kb" ]]; then
|
|
249
|
+
echo "ERROR: need at least $((needed_kb / 1024))MB free; only $((free_kb / 1024))MB available." >&2
|
|
250
|
+
exit 1
|
|
251
|
+
fi
|
|
252
|
+
|
|
253
|
+
# Required CLIs. yq (step 8 yaml surgery), jq (notify helper's Slack JSON
|
|
254
|
+
# payload + step 7 version check), rsync (step 7 populate), realpath/readlink
|
|
255
|
+
# (step 5 + step 12 live-plist discovery via ~/Library/LaunchAgents symlink
|
|
256
|
+
# resolution), launchctl (step 10 + step 12 bootout/bootstrap). All ship on
|
|
257
|
+
# macOS except yq and jq (Homebrew).
|
|
258
|
+
local missing=""
|
|
259
|
+
for cmd in yq jq rsync realpath readlink launchctl; do
|
|
260
|
+
command -v "$cmd" >/dev/null 2>&1 || missing="$missing $cmd"
|
|
261
|
+
done
|
|
262
|
+
if [[ -n "$missing" ]]; then
|
|
263
|
+
echo "ERROR: missing required CLI(s):$missing" >&2
|
|
264
|
+
echo " Install via: brew install${missing}" >&2
|
|
265
|
+
exit 1
|
|
266
|
+
fi
|
|
267
|
+
|
|
268
|
+
# Also check for lingering Playwright MCP children — see spec Runtime Failure Mode 6.
|
|
269
|
+
# Skip in dry-run: we're not mutating the instance, so lingering children
|
|
270
|
+
# aren't dangerous. Only check processes whose cwd is under INSTANCE_DIR —
|
|
271
|
+
# a global pgrep picks up unrelated host processes (e.g. the operator's own
|
|
272
|
+
# editor-side Playwright MCP) that have nothing to do with this instance.
|
|
273
|
+
if ! $DRY_RUN; then
|
|
274
|
+
local instance_pids=""
|
|
275
|
+
while IFS= read -r pid; do
|
|
276
|
+
# lsof -a -d cwd: get the current working directory for this PID
|
|
277
|
+
local cwd
|
|
278
|
+
cwd=$(lsof -a -d cwd -p "$pid" -Fn 2>/dev/null | awk '/^n/{print substr($0,2)}')
|
|
279
|
+
if [[ "$cwd" == "$INSTANCE_DIR"* ]]; then
|
|
280
|
+
instance_pids="$instance_pids $pid"
|
|
281
|
+
fi
|
|
282
|
+
done < <(pgrep -f playwright-mcp 2>/dev/null || true)
|
|
283
|
+
|
|
284
|
+
if [[ -n "$instance_pids" ]]; then
|
|
285
|
+
echo "WARNING: playwright-mcp child processes from this instance still running (PIDs:$instance_pids)."
|
|
286
|
+
echo " Waiting up to 5 seconds for them to exit..."
|
|
287
|
+
for _ in 1 2 3 4 5; do
|
|
288
|
+
sleep 1
|
|
289
|
+
local still_running=""
|
|
290
|
+
for pid in $instance_pids; do
|
|
291
|
+
kill -0 "$pid" 2>/dev/null && still_running="$still_running $pid"
|
|
292
|
+
done
|
|
293
|
+
instance_pids="$still_running"
|
|
294
|
+
[[ -z "$instance_pids" ]] && break
|
|
295
|
+
done
|
|
296
|
+
if [[ -n "$instance_pids" ]]; then
|
|
297
|
+
echo "ERROR: playwright-mcp from this instance still running (PIDs:$instance_pids). Kill and retry:" >&2
|
|
298
|
+
echo " kill$instance_pids" >&2
|
|
299
|
+
exit 1
|
|
300
|
+
fi
|
|
301
|
+
fi
|
|
302
|
+
fi
|
|
303
|
+
|
|
304
|
+
echo " ✓ preflight"
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
preflight
|
|
308
|
+
|
|
309
|
+
# =============================================================================
|
|
310
|
+
# Step 2 — Snapshot for rollback
|
|
311
|
+
# =============================================================================
|
|
312
|
+
step_snapshot() {
|
|
313
|
+
echo "==> Step 2: snapshot → $INSTANCE_DIR.pre-0.2-bak"
|
|
314
|
+
if $DRY_RUN; then
|
|
315
|
+
echo " [DRY RUN] would cp -a $INSTANCE_DIR $INSTANCE_DIR.pre-0.2-bak"
|
|
316
|
+
return
|
|
317
|
+
fi
|
|
318
|
+
cp -a "$INSTANCE_DIR" "$INSTANCE_DIR.pre-0.2-bak"
|
|
319
|
+
echo " ✓ snapshot complete ($(du -sh "$INSTANCE_DIR.pre-0.2-bak" | awk '{print $1}'))"
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
# =============================================================================
|
|
323
|
+
# Step 3 — Create new skeleton namespaces
|
|
324
|
+
# =============================================================================
|
|
325
|
+
step_skeleton() {
|
|
326
|
+
echo "==> Step 3: create namespace dirs"
|
|
327
|
+
local dirs=(
|
|
328
|
+
"$INSTANCE_DIR/agents"
|
|
329
|
+
"$INSTANCE_DIR/workflow"
|
|
330
|
+
"$INSTANCE_DIR/data"
|
|
331
|
+
"$INSTANCE_DIR/skills"
|
|
332
|
+
"$INSTANCE_DIR/plugins"
|
|
333
|
+
"$INSTANCE_DIR/.hive"
|
|
334
|
+
"$INSTANCE_DIR/.hive-state"
|
|
335
|
+
)
|
|
336
|
+
for d in "${dirs[@]}"; do
|
|
337
|
+
if $DRY_RUN; then
|
|
338
|
+
echo " [DRY RUN] mkdir -p $d"
|
|
339
|
+
else
|
|
340
|
+
mkdir -p "$d"
|
|
341
|
+
fi
|
|
342
|
+
done
|
|
343
|
+
echo " ✓ skeleton created"
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
step_snapshot
|
|
347
|
+
step_skeleton
|
|
348
|
+
|
|
349
|
+
# =============================================================================
|
|
350
|
+
# Step 4 — Relocate .hive/git/ + snapshots → .hive-state/
|
|
351
|
+
# =============================================================================
|
|
352
|
+
# Spec: destination names must match Phase 2's hiveStateDir/instanceGitDir in
|
|
353
|
+
# src/paths.ts. Cross-reference before shipping.
|
|
354
|
+
step_relocate_state() {
|
|
355
|
+
echo "==> Step 4: relocate .hive/git/ → .hive-state/"
|
|
356
|
+
if $DRY_RUN; then
|
|
357
|
+
[[ -d "$INSTANCE_DIR/.hive/git" ]] && echo " [DRY RUN] mv .hive/git .hive-state/git"
|
|
358
|
+
for f in installed-snapshot.json previous-snapshot.json upgrade-notice-emitted; do
|
|
359
|
+
if [[ -e "$INSTANCE_DIR/.hive/$f" ]]; then
|
|
360
|
+
echo " [DRY RUN] mv .hive/$f .hive-state/$f"
|
|
361
|
+
fi
|
|
362
|
+
done
|
|
363
|
+
return 0
|
|
364
|
+
fi
|
|
365
|
+
if [[ -d "$INSTANCE_DIR/.hive/git" ]]; then
|
|
366
|
+
mv "$INSTANCE_DIR/.hive/git" "$INSTANCE_DIR/.hive-state/git"
|
|
367
|
+
fi
|
|
368
|
+
for f in installed-snapshot.json previous-snapshot.json upgrade-notice-emitted; do
|
|
369
|
+
if [[ -e "$INSTANCE_DIR/.hive/$f" ]]; then
|
|
370
|
+
mv "$INSTANCE_DIR/.hive/$f" "$INSTANCE_DIR/.hive-state/$f"
|
|
371
|
+
fi
|
|
372
|
+
done
|
|
373
|
+
echo " ✓ state relocated"
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
# =============================================================================
|
|
377
|
+
# Step 5 — Wipe engine files at instance root + prune service/
|
|
378
|
+
# =============================================================================
|
|
379
|
+
# The rm list is enumerated explicitly — NO wildcard globs that could catch
|
|
380
|
+
# hive.yaml or .env. service/ is preserved; only engine-shipped scripts inside
|
|
381
|
+
# it are removed. Live plists (discovered in preflight via LIVE_PLISTS) are
|
|
382
|
+
# preserved.
|
|
383
|
+
step_wipe_engine() {
|
|
384
|
+
echo "==> Step 5: wipe old engine files"
|
|
385
|
+
local targets=(
|
|
386
|
+
"$INSTANCE_DIR/dist"
|
|
387
|
+
"$INSTANCE_DIR/node_modules"
|
|
388
|
+
"$INSTANCE_DIR/src"
|
|
389
|
+
"$INSTANCE_DIR/build"
|
|
390
|
+
"$INSTANCE_DIR/setup"
|
|
391
|
+
"$INSTANCE_DIR/seeds"
|
|
392
|
+
"$INSTANCE_DIR/templates"
|
|
393
|
+
"$INSTANCE_DIR/tests"
|
|
394
|
+
"$INSTANCE_DIR/scripts"
|
|
395
|
+
"$INSTANCE_DIR/docs"
|
|
396
|
+
"$INSTANCE_DIR/install"
|
|
397
|
+
"$INSTANCE_DIR/plugins/claude-code"
|
|
398
|
+
"$INSTANCE_DIR/package.json"
|
|
399
|
+
"$INSTANCE_DIR/package-lock.json"
|
|
400
|
+
"$INSTANCE_DIR/tsconfig.json"
|
|
401
|
+
"$INSTANCE_DIR/tsconfig.plugins.json"
|
|
402
|
+
"$INSTANCE_DIR/eslint.config.js"
|
|
403
|
+
"$INSTANCE_DIR/vitest.config.ts"
|
|
404
|
+
"$INSTANCE_DIR/AGENTS.md"
|
|
405
|
+
"$INSTANCE_DIR/README.md"
|
|
406
|
+
"$INSTANCE_DIR/CLAUDE.md"
|
|
407
|
+
"$INSTANCE_DIR/.github"
|
|
408
|
+
)
|
|
409
|
+
for t in "${targets[@]}"; do
|
|
410
|
+
if [[ -e "$t" ]]; then
|
|
411
|
+
if $DRY_RUN; then
|
|
412
|
+
echo " [DRY RUN] rm -rf $t"
|
|
413
|
+
else
|
|
414
|
+
rm -rf "$t"
|
|
415
|
+
fi
|
|
416
|
+
fi
|
|
417
|
+
done
|
|
418
|
+
|
|
419
|
+
# Inside service/: delete engine-shipped files, preserve live plists.
|
|
420
|
+
if [[ -d "$INSTANCE_DIR/service" ]]; then
|
|
421
|
+
for f in "$INSTANCE_DIR/service"/*; do
|
|
422
|
+
[[ -e "$f" ]] || continue
|
|
423
|
+
local name
|
|
424
|
+
name=$(basename "$f")
|
|
425
|
+
local is_live=false
|
|
426
|
+
# LIVE_PLISTS is always initialized by preflight() (declare -g -a LIVE_PLISTS=()),
|
|
427
|
+
# so direct expansion is safe. The ${#...} > 0 guard avoids iterating once with
|
|
428
|
+
# an empty string when the array is empty.
|
|
429
|
+
if (( ${#LIVE_PLISTS[@]} > 0 )); then
|
|
430
|
+
for live in "${LIVE_PLISTS[@]}"; do
|
|
431
|
+
[[ "$name" == "$live" ]] && is_live=true && break
|
|
432
|
+
done
|
|
433
|
+
fi
|
|
434
|
+
if ! $is_live; then
|
|
435
|
+
if $DRY_RUN; then
|
|
436
|
+
echo " [DRY RUN] rm -f service/$name"
|
|
437
|
+
else
|
|
438
|
+
rm -f "$f"
|
|
439
|
+
fi
|
|
440
|
+
fi
|
|
441
|
+
done
|
|
442
|
+
fi
|
|
443
|
+
|
|
444
|
+
echo " ✓ engine files wiped (live plists preserved: ${LIVE_PLISTS[*]:-<none>})"
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
# Steps 4-5 mutate the instance tree (mv/rm) after the snapshot exists — so
|
|
448
|
+
# rollback IS possible. Wrap them so a mid-wipe or mid-relocate failure
|
|
449
|
+
# triggers auto-rollback instead of leaving a half-rewritten instance that
|
|
450
|
+
# the operator has to manually reason about.
|
|
451
|
+
if ! step_relocate_state; then
|
|
452
|
+
rollback_or_die
|
|
453
|
+
fi
|
|
454
|
+
|
|
455
|
+
if ! step_wipe_engine; then
|
|
456
|
+
rollback_or_die
|
|
457
|
+
fi
|
|
458
|
+
|
|
459
|
+
# =============================================================================
|
|
460
|
+
# Step 6 — Classify and relocate loose agent files
|
|
461
|
+
# =============================================================================
|
|
462
|
+
# Ordering is load-bearing: agent-prefix patterns fire before content-type
|
|
463
|
+
# patterns. See spec §Step 6 for the canonical table.
|
|
464
|
+
#
|
|
465
|
+
# classify_file <basename> → emits destination dir (relative to $INSTANCE_DIR)
|
|
466
|
+
classify_file() {
|
|
467
|
+
local name="$1"
|
|
468
|
+
# Agent-prefix (case-insensitive) — first match wins
|
|
469
|
+
case "$name" in
|
|
470
|
+
milo-*|MILO-*) echo "agents/milo/reports/archive-pre-0.2" ;;
|
|
471
|
+
river-*|RIVER-*) echo "agents/river/reports/archive-pre-0.2" ;;
|
|
472
|
+
jessica-*|JESSICA-*) echo "agents/jessica/reports/archive-pre-0.2" ;;
|
|
473
|
+
wyatt-*|WYATT-*) echo "agents/wyatt/reports/archive-pre-0.2" ;;
|
|
474
|
+
rae-*|RAE-*) echo "agents/rae/reports/archive-pre-0.2" ;;
|
|
475
|
+
chloe-*|CHLOE-*) echo "agents/chloe/reports/archive-pre-0.2" ;;
|
|
476
|
+
colt-*|COLT-*) echo "agents/colt/reports/archive-pre-0.2" ;;
|
|
477
|
+
sige-*|SIGE-*) echo "agents/sige/reports/archive-pre-0.2" ;;
|
|
478
|
+
|
|
479
|
+
# Social scrapes (no single agent owner)
|
|
480
|
+
fb-*.md|linkedin-*.md|x-*.md|x-snapshot-*)
|
|
481
|
+
echo "data/archive-pre-0.2/social-scrapes" ;;
|
|
482
|
+
|
|
483
|
+
# Standups — Milo's domain
|
|
484
|
+
*-standup-*.md|*-standup-*.json|sales-*-standup-*)
|
|
485
|
+
echo "agents/milo/reports/archive-pre-0.2" ;;
|
|
486
|
+
|
|
487
|
+
# Permit data — River's
|
|
488
|
+
PERMIT-*|high-tier-permits*.csv|*-permits.csv)
|
|
489
|
+
echo "agents/river/reports/archive-pre-0.2" ;;
|
|
490
|
+
|
|
491
|
+
# Pipeline data — Milo
|
|
492
|
+
HUBSPOT-*|STALE-DEALS-*|stale-deals.csv|*-sales-pipeline-*)
|
|
493
|
+
echo "agents/milo/reports/archive-pre-0.2" ;;
|
|
494
|
+
|
|
495
|
+
# Ambiguous — safer to park
|
|
496
|
+
LEAD-SEGMENTATION-*|QUERY-RESULTS-*)
|
|
497
|
+
echo "data/archive-pre-0.2/unsorted" ;;
|
|
498
|
+
|
|
499
|
+
# Ad-hoc scripts at root — keep, don't delete (may be referenced in tickets)
|
|
500
|
+
analyze-*.ts|check-*.ts|create-*.ts|extract-*.ts|fetch-*.ts|get-*.ts|verify-*.ts|*.ts)
|
|
501
|
+
echo "data/archive-pre-0.2/scripts" ;;
|
|
502
|
+
|
|
503
|
+
# Per-artifact readmes — keep alongside data
|
|
504
|
+
README-*.md) echo "data/archive-pre-0.2" ;;
|
|
505
|
+
|
|
506
|
+
# Catch-all
|
|
507
|
+
*) echo "data/archive-pre-0.2/unsorted" ;;
|
|
508
|
+
esac
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
# =============================================================================
|
|
512
|
+
# Step 6 (continued) — iterate loose files at instance root
|
|
513
|
+
# =============================================================================
|
|
514
|
+
step_classify_loose_files() {
|
|
515
|
+
echo "==> Step 6: classify loose agent files"
|
|
516
|
+
|
|
517
|
+
# Build the list of "loose files" — anything at the instance root that
|
|
518
|
+
# isn't one of the known config/data dirs we're preserving.
|
|
519
|
+
local preserve_names=(
|
|
520
|
+
".env" ".env-personal"
|
|
521
|
+
"hive.yaml" "hive-personal.yaml" "beekeeper.yaml"
|
|
522
|
+
".hive-generated.json"
|
|
523
|
+
".hive" ".hive-state" ".hive.prev" ".hive.broken"
|
|
524
|
+
"agents" "workflow" "data" "skills" "plugins"
|
|
525
|
+
"logs" "logs-beekeeper" "logs-personal"
|
|
526
|
+
"service"
|
|
527
|
+
".git" # removed in Step 9; leave alone here
|
|
528
|
+
".DS_Store"
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
declare -a moves=()
|
|
532
|
+
shopt -s dotglob nullglob
|
|
533
|
+
for entry in "$INSTANCE_DIR"/*; do
|
|
534
|
+
local name
|
|
535
|
+
name=$(basename "$entry")
|
|
536
|
+
local skip=false
|
|
537
|
+
for preserve in "${preserve_names[@]}"; do
|
|
538
|
+
[[ "$name" == "$preserve" ]] && skip=true && break
|
|
539
|
+
done
|
|
540
|
+
$skip && continue
|
|
541
|
+
|
|
542
|
+
# Special case: .playwright-mcp/ is deleted outright (spec §Step 6)
|
|
543
|
+
if [[ "$name" == ".playwright-mcp" ]]; then
|
|
544
|
+
local size
|
|
545
|
+
size=$(du -sh "$entry" 2>/dev/null | awk '{print $1}')
|
|
546
|
+
if $DRY_RUN; then
|
|
547
|
+
echo " [DRY RUN] rm -rf .playwright-mcp ($size of console logs)"
|
|
548
|
+
else
|
|
549
|
+
echo " Removing .playwright-mcp ($size)..."
|
|
550
|
+
rm -rf "$entry"
|
|
551
|
+
fi
|
|
552
|
+
continue
|
|
553
|
+
fi
|
|
554
|
+
|
|
555
|
+
local dest
|
|
556
|
+
dest=$(classify_file "$name")
|
|
557
|
+
moves+=("$name|$dest")
|
|
558
|
+
done
|
|
559
|
+
shopt -u dotglob nullglob
|
|
560
|
+
|
|
561
|
+
# Emit the classification table
|
|
562
|
+
if [[ ${#moves[@]} -eq 0 ]]; then
|
|
563
|
+
echo " (no loose files to classify)"
|
|
564
|
+
else
|
|
565
|
+
printf " %-60s → %s\n" "FILE" "DESTINATION"
|
|
566
|
+
for m in "${moves[@]}"; do
|
|
567
|
+
IFS='|' read -r name dest <<< "$m"
|
|
568
|
+
printf " %-60s → %s\n" "$name" "$dest"
|
|
569
|
+
done
|
|
570
|
+
fi
|
|
571
|
+
|
|
572
|
+
# Execute the moves (unless dry-run)
|
|
573
|
+
if $DRY_RUN; then
|
|
574
|
+
return
|
|
575
|
+
fi
|
|
576
|
+
for m in "${moves[@]}"; do
|
|
577
|
+
IFS='|' read -r name dest <<< "$m"
|
|
578
|
+
mkdir -p "$INSTANCE_DIR/$dest"
|
|
579
|
+
mv "$INSTANCE_DIR/$name" "$INSTANCE_DIR/$dest/"
|
|
580
|
+
done
|
|
581
|
+
echo " ✓ ${#moves[@]} file(s) relocated"
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
step_classify_loose_files
|
|
585
|
+
|
|
586
|
+
if $DRY_RUN; then
|
|
587
|
+
echo ""
|
|
588
|
+
echo "==> Dry-run complete. No filesystem mutations performed."
|
|
589
|
+
echo " Review the classification table above. If destinations look right,"
|
|
590
|
+
echo " re-run without --dry-run to migrate for real."
|
|
591
|
+
exit 0
|
|
592
|
+
fi
|
|
593
|
+
|
|
594
|
+
# =============================================================================
|
|
595
|
+
# Step 7 — Populate .hive/ with the 0.2.0 engine
|
|
596
|
+
# =============================================================================
|
|
597
|
+
step_populate_engine() {
|
|
598
|
+
echo "==> Step 7: populate .hive/ with @keepur/hive@0.2.0"
|
|
599
|
+
|
|
600
|
+
# Install globally if not already on 0.2.x (any patch release — don't downgrade
|
|
601
|
+
# a customer who's on 0.2.1+ back to 0.2.0).
|
|
602
|
+
local current=""
|
|
603
|
+
if command -v hive >/dev/null 2>&1; then
|
|
604
|
+
current=$(hive --version 2>/dev/null | awk '{print $NF}' | sed 's/^v//')
|
|
605
|
+
fi
|
|
606
|
+
if [[ "$current" != 0.2.* ]]; then
|
|
607
|
+
echo " Installing @keepur/hive@0.2.0 globally (current: ${current:-<none>})..."
|
|
608
|
+
npm i -g "@keepur/hive@0.2.0"
|
|
609
|
+
else
|
|
610
|
+
echo " Global CLI already on $current (≥0.2.0 — keeping it)."
|
|
611
|
+
fi
|
|
612
|
+
|
|
613
|
+
local cli_bin
|
|
614
|
+
cli_bin=$(command -v hive)
|
|
615
|
+
if [[ -z "$cli_bin" ]]; then
|
|
616
|
+
echo "ERROR: hive CLI not found on PATH after npm install." >&2
|
|
617
|
+
return 1
|
|
618
|
+
fi
|
|
619
|
+
local cli_root
|
|
620
|
+
cli_root=$(dirname "$(realpath "$cli_bin")")/..
|
|
621
|
+
|
|
622
|
+
# PACKAGE_ENTRIES — must match Phase 4's src/setup/populate-engine.ts exactly.
|
|
623
|
+
# If you change this list, change it there too (and in deploy.sh fetch_engine).
|
|
624
|
+
local entries=(pkg seeds templates scripts/honeypot install service package.json)
|
|
625
|
+
for entry in "${entries[@]}"; do
|
|
626
|
+
local src="$cli_root/$entry"
|
|
627
|
+
if [[ ! -e "$src" ]]; then
|
|
628
|
+
echo "ERROR: expected engine entry '$entry' missing from $cli_root." >&2
|
|
629
|
+
echo " Verify @keepur/hive@0.2.0 installed correctly: npm ls -g @keepur/hive" >&2
|
|
630
|
+
return 1
|
|
631
|
+
fi
|
|
632
|
+
local dst="$INSTANCE_DIR/.hive/$entry"
|
|
633
|
+
mkdir -p "$(dirname "$dst")"
|
|
634
|
+
if [[ -d "$src" ]]; then
|
|
635
|
+
mkdir -p "$dst"
|
|
636
|
+
rsync -a "$src/" "$dst/"
|
|
637
|
+
else
|
|
638
|
+
rsync -a "$src" "$dst"
|
|
639
|
+
fi
|
|
640
|
+
done
|
|
641
|
+
|
|
642
|
+
# Sanity check
|
|
643
|
+
if [[ ! -f "$INSTANCE_DIR/.hive/pkg/server.min.js" ]]; then
|
|
644
|
+
echo "ERROR: .hive/pkg/server.min.js missing after populate" >&2
|
|
645
|
+
return 1
|
|
646
|
+
fi
|
|
647
|
+
echo " ✓ .hive/ populated (version: $(jq -r .version "$INSTANCE_DIR/.hive/package.json"))"
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
# =============================================================================
|
|
651
|
+
# Step 7b — Install engine runtime deps (mirror install_engine_deps in deploy.sh)
|
|
652
|
+
# =============================================================================
|
|
653
|
+
# The npm-packed .hive/ bundle lists 14 runtime externals in package.json that
|
|
654
|
+
# live OUTSIDE pkg/server.min.js (native modules, large SDKs, asset loaders).
|
|
655
|
+
# Without node_modules/, the engine crashes at startup. Mirrors
|
|
656
|
+
# service/deploy.sh:211 install_engine_deps() exactly.
|
|
657
|
+
step_install_engine_deps() {
|
|
658
|
+
echo "==> Step 7b: install engine runtime deps"
|
|
659
|
+
if [[ ! -f "$INSTANCE_DIR/.hive/package.json" ]]; then
|
|
660
|
+
echo "ERROR: install_engine_deps needs $INSTANCE_DIR/.hive/package.json" >&2
|
|
661
|
+
return 1
|
|
662
|
+
fi
|
|
663
|
+
(cd "$INSTANCE_DIR/.hive" && npm install --omit=dev --no-audit --no-fund --no-progress)
|
|
664
|
+
echo " ✓ engine deps installed"
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
# Steps 7 and 7b are wrapped in auto_rollback for the same reason steps 8-11
|
|
668
|
+
# are: a failure here (npm registry timeout, missing package entry, npm install
|
|
669
|
+
# blowup) leaves .hive/ partially populated. Without rollback the retry guard
|
|
670
|
+
# at line 102 would exit-0 on "already migrated" even though the engine is
|
|
671
|
+
# broken. Returning 1 from the steps lets the caller trigger rollback.
|
|
672
|
+
if ! step_populate_engine; then
|
|
673
|
+
rollback_or_die
|
|
674
|
+
fi
|
|
675
|
+
|
|
676
|
+
if ! step_install_engine_deps; then
|
|
677
|
+
rollback_or_die
|
|
678
|
+
fi
|
|
679
|
+
|
|
680
|
+
# =============================================================================
|
|
681
|
+
# Step 8 — Rewrite hive.yaml paths
|
|
682
|
+
# =============================================================================
|
|
683
|
+
# Only surgery: codeTask.pluginDirs, which pointed at the shared
|
|
684
|
+
# ~/services/hive/plugins/claude-code/... pre-migration. Post-migration each
|
|
685
|
+
# instance has its own .hive/plugins/claude-code/.
|
|
686
|
+
step_rewrite_yaml() {
|
|
687
|
+
echo "==> Step 8: rewrite hive.yaml codeTask.pluginDirs"
|
|
688
|
+
|
|
689
|
+
# yq's sub() returns a stream for each array element; wrapping in [...]
|
|
690
|
+
# is load-bearing to keep the result as an array.
|
|
691
|
+
yq -i '.codeTask.pluginDirs = [
|
|
692
|
+
.codeTask.pluginDirs[]
|
|
693
|
+
| sub("^~/services/hive/plugins/"; "~/services/hive/'"$INSTANCE_ID"'/.hive/plugins/")
|
|
694
|
+
]' "$INSTANCE_DIR/hive.yaml"
|
|
695
|
+
|
|
696
|
+
echo " ✓ hive.yaml rewritten"
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
# =============================================================================
|
|
700
|
+
# Step 9 — Remove instance-root .git/
|
|
701
|
+
# =============================================================================
|
|
702
|
+
step_remove_git() {
|
|
703
|
+
echo "==> Step 9: remove instance-root .git/"
|
|
704
|
+
if [[ -d "$INSTANCE_DIR/.git" ]]; then
|
|
705
|
+
rm -rf "$INSTANCE_DIR/.git"
|
|
706
|
+
echo " ✓ .git removed"
|
|
707
|
+
else
|
|
708
|
+
echo " (no .git to remove)"
|
|
709
|
+
fi
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
# Wrap steps 8-9 in auto_rollback to match steps 10-11. Without this, a yq
|
|
713
|
+
# failure mid-migration would leave the instance in a half-migrated state
|
|
714
|
+
# (new .hive/ populated, yaml still pointing at legacy paths), and the retry
|
|
715
|
+
# guard at the top of the script would exit-0 on "already migrated" instead
|
|
716
|
+
# of completing the yaml rewrite.
|
|
717
|
+
if ! step_rewrite_yaml; then
|
|
718
|
+
rollback_or_die
|
|
719
|
+
fi
|
|
720
|
+
|
|
721
|
+
if ! step_remove_git; then
|
|
722
|
+
rollback_or_die
|
|
723
|
+
fi
|
|
724
|
+
|
|
725
|
+
# =============================================================================
|
|
726
|
+
# Step 10 — Regenerate live plist(s) + bootstrap
|
|
727
|
+
# =============================================================================
|
|
728
|
+
step_regenerate_plists() {
|
|
729
|
+
echo "==> Step 10: regenerate LaunchAgent plists"
|
|
730
|
+
|
|
731
|
+
# 10a. Retire legacy labels that don't match com.hive.<id>.agent.
|
|
732
|
+
# Only dodi has this case (live label: com.hive.agent). Keepur's label is
|
|
733
|
+
# already com.hive.keepur.agent. If we add more exceptions, list them here.
|
|
734
|
+
local legacy_labels=(com.hive.agent)
|
|
735
|
+
for legacy in "${legacy_labels[@]}"; do
|
|
736
|
+
local link="$HOME/Library/LaunchAgents/$legacy.plist"
|
|
737
|
+
[[ -L "$link" ]] || continue
|
|
738
|
+
local abs
|
|
739
|
+
abs=$(realpath "$link" 2>/dev/null || true)
|
|
740
|
+
if [[ "$abs" == "$INSTANCE_DIR/service/"* ]]; then
|
|
741
|
+
echo " Retiring legacy label: $legacy"
|
|
742
|
+
launchctl bootout "gui/$(id -u)/$legacy" 2>/dev/null || true
|
|
743
|
+
rm -f "$link" "$INSTANCE_DIR/service/$legacy.plist"
|
|
744
|
+
fi
|
|
745
|
+
done
|
|
746
|
+
|
|
747
|
+
# 10b. Regenerate one plist per config file.
|
|
748
|
+
# hive.yaml (primary) + any hive-<suffix>.yaml (e.g., hive-personal.yaml).
|
|
749
|
+
# `hive start --daemon` reads HIVE_CONFIG, derives the label from that config's
|
|
750
|
+
# instance.id, writes service/<label>.plist, creates the LaunchAgents
|
|
751
|
+
# symlink, and launchctl-loads it (see src/cli/daemon.ts:84 startDaemon).
|
|
752
|
+
local regenerated=0
|
|
753
|
+
for yaml in "$INSTANCE_DIR"/hive.yaml "$INSTANCE_DIR"/hive-*.yaml; do
|
|
754
|
+
[[ -f "$yaml" ]] || continue
|
|
755
|
+
local cfg
|
|
756
|
+
cfg=$(basename "$yaml")
|
|
757
|
+
echo " hive start --daemon (HIVE_CONFIG=$cfg)"
|
|
758
|
+
if ! HIVE_HOME="$INSTANCE_DIR" HIVE_CONFIG="$cfg" hive start --daemon; then
|
|
759
|
+
echo "ERROR: hive start --daemon failed for $cfg." >&2
|
|
760
|
+
return 1
|
|
761
|
+
fi
|
|
762
|
+
regenerated=$((regenerated + 1))
|
|
763
|
+
done
|
|
764
|
+
|
|
765
|
+
if [[ $regenerated -eq 0 ]]; then
|
|
766
|
+
echo "ERROR: no hive.yaml or hive-*.yaml found at $INSTANCE_DIR — nothing to regenerate." >&2
|
|
767
|
+
return 1
|
|
768
|
+
fi
|
|
769
|
+
echo " ✓ $regenerated plist(s) regenerated"
|
|
770
|
+
|
|
771
|
+
# Note: com.hive.deploy-check.plist and com.hive.rotate-logs.plist (if present
|
|
772
|
+
# in service/) are utility jobs whose ProgramArguments reference scripts
|
|
773
|
+
# under service/, not the engine bundle. They need no regeneration and were
|
|
774
|
+
# preserved by Step 5's LIVE_PLISTS symlink-detection filter.
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
# =============================================================================
|
|
778
|
+
# Step 11 — Health check
|
|
779
|
+
# =============================================================================
|
|
780
|
+
# Reads the hive.log for "Hive is running". 30s timeout matches deploy.sh.
|
|
781
|
+
step_health_check() {
|
|
782
|
+
echo "==> Step 11: health check"
|
|
783
|
+
# Try to find logs dir — defaults to $INSTANCE_DIR/logs, but instances.conf
|
|
784
|
+
# may specify a different one. Peek the yaml if present.
|
|
785
|
+
local logs_dir="logs"
|
|
786
|
+
if command -v yq >/dev/null 2>&1; then
|
|
787
|
+
local yaml_logs
|
|
788
|
+
yaml_logs=$(yq -r '.logging.dir // ""' "$INSTANCE_DIR/hive.yaml" 2>/dev/null)
|
|
789
|
+
[[ -n "$yaml_logs" ]] && logs_dir="$yaml_logs"
|
|
790
|
+
fi
|
|
791
|
+
local log_file="$INSTANCE_DIR/$logs_dir/hive.log"
|
|
792
|
+
|
|
793
|
+
for _ in $(seq 1 30); do
|
|
794
|
+
sleep 1
|
|
795
|
+
if tail -20 "$log_file" 2>/dev/null | grep -q '"Hive is running"'; then
|
|
796
|
+
echo " ✓ healthy"
|
|
797
|
+
return 0
|
|
798
|
+
fi
|
|
799
|
+
done
|
|
800
|
+
|
|
801
|
+
echo " ✗ health check TIMEOUT (30s)"
|
|
802
|
+
return 1
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
# Step 12 — Auto-rollback is defined at the top of the script (near notify)
|
|
806
|
+
# so all wrapped call sites can reach it. rollback_or_die() is the shared
|
|
807
|
+
# post-failure escalation helper.
|
|
808
|
+
|
|
809
|
+
if ! step_regenerate_plists; then
|
|
810
|
+
rollback_or_die
|
|
811
|
+
fi
|
|
812
|
+
|
|
813
|
+
if ! step_health_check; then
|
|
814
|
+
rollback_or_die
|
|
815
|
+
fi
|
|
816
|
+
|
|
817
|
+
notify "Migration succeeded: $INSTANCE_DIR → 0.2.0."
|
|
818
|
+
echo ""
|
|
819
|
+
echo "==> Migration complete."
|
|
820
|
+
echo " Snapshot preserved at: $INSTANCE_DIR.pre-0.2-bak"
|
|
821
|
+
echo " Remove it once you've confirmed the instance is stable (24h+ recommended):"
|
|
822
|
+
echo " rm -rf $INSTANCE_DIR.pre-0.2-bak"
|