@stackframe/stack-cli 2.8.86 → 2.8.88
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/emulator/cloud-init/emulator/user-data +200 -16
- package/dist/emulator/common.sh +139 -0
- package/dist/emulator/run-emulator.sh +704 -60
- package/dist/index.js +848 -284
- package/dist/index.js.map +1 -1
- package/package.json +7 -5
|
@@ -12,6 +12,22 @@ VM_RAM="${EMULATOR_RAM:-4096}"
|
|
|
12
12
|
VM_CPUS="${EMULATOR_CPUS:-4}"
|
|
13
13
|
PORT_PREFIX="${PORT_PREFIX:-${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}}"
|
|
14
14
|
READY_TIMEOUT="${EMULATOR_READY_TIMEOUT:-240}"
|
|
15
|
+
# Shorter timeout when resuming from a snapshot: services are already running,
|
|
16
|
+
# we only need to wait for rotate-secrets + Node restart (~3-10s).
|
|
17
|
+
SNAPSHOT_READY_TIMEOUT="${EMULATOR_SNAPSHOT_READY_TIMEOUT:-45}"
|
|
18
|
+
# Set to 1 to force a cold boot and ignore any shipped savevm file.
|
|
19
|
+
EMULATOR_NO_SNAPSHOT="${EMULATOR_NO_SNAPSHOT:-0}"
|
|
20
|
+
# Skip the post-resume secret rotation. Keeps the baked placeholder secrets
|
|
21
|
+
# in place — acceptable for tests and CI that don't reach the emulator over
|
|
22
|
+
# a shared network. Shaves ~2-3s off `emulator start`.
|
|
23
|
+
EMULATOR_NO_ROTATION="${EMULATOR_NO_ROTATION:-0}"
|
|
24
|
+
# Internal: set to 1 by cmd_capture to build QEMU with the snapshot-compatible
|
|
25
|
+
# device layout (phantom ISOs, no virtfs, pcie-root-port, pinned 4096MB/4CPU)
|
|
26
|
+
# without the `-incoming defer` that resume mode adds. The captured snapshot
|
|
27
|
+
# must be byte-compatible with what the resume path will later feed to QEMU.
|
|
28
|
+
EMULATOR_CAPTURING_SNAPSHOT="${EMULATOR_CAPTURING_SNAPSHOT:-0}"
|
|
29
|
+
# Force re-capture even if a .savevm.zst is already present.
|
|
30
|
+
EMULATOR_FORCE_CAPTURE="${EMULATOR_FORCE_CAPTURE:-0}"
|
|
15
31
|
|
|
16
32
|
# Fixed host-side ports for the QEMU emulator (267xx range).
|
|
17
33
|
# Only user-facing services are exposed; internal deps stay inside the VM.
|
|
@@ -19,6 +35,7 @@ EMULATOR_DASHBOARD_PORT="${EMULATOR_DASHBOARD_PORT:-26700}"
|
|
|
19
35
|
EMULATOR_BACKEND_PORT="${EMULATOR_BACKEND_PORT:-26701}"
|
|
20
36
|
EMULATOR_MINIO_PORT="${EMULATOR_MINIO_PORT:-26702}"
|
|
21
37
|
EMULATOR_INBUCKET_PORT="${EMULATOR_INBUCKET_PORT:-26703}"
|
|
38
|
+
EMULATOR_MOCK_OAUTH_PORT="${EMULATOR_MOCK_OAUTH_PORT:-26704}"
|
|
22
39
|
|
|
23
40
|
RED='\033[0;31m'
|
|
24
41
|
GREEN='\033[0;32m'
|
|
@@ -62,10 +79,67 @@ image_path() {
|
|
|
62
79
|
echo "$IMAGE_DIR/stack-emulator-$ARCH.qcow2"
|
|
63
80
|
}
|
|
64
81
|
|
|
82
|
+
savevm_path() {
|
|
83
|
+
echo "$IMAGE_DIR/stack-emulator-$ARCH.savevm.zst"
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
# Cached, decompressed mapped-ram file. Created on first resume from the .zst
|
|
87
|
+
# and reused on subsequent resumes — mapped-ram format requires a seekable
|
|
88
|
+
# file, so we can't stream through zstd and use multifd at the same time.
|
|
89
|
+
savevm_raw_path() {
|
|
90
|
+
echo "$IMAGE_DIR/stack-emulator-$ARCH.savevm.raw"
|
|
91
|
+
}
|
|
92
|
+
|
|
65
93
|
runtime_iso_path() {
|
|
66
94
|
echo "$VM_DIR/runtime-config.iso"
|
|
67
95
|
}
|
|
68
96
|
|
|
97
|
+
snapshot_available() {
|
|
98
|
+
[ "$EMULATOR_NO_SNAPSHOT" != "1" ] && [ "$EMULATOR_CAPTURING_SNAPSHOT" != "1" ] && [ -s "$(savevm_path)" ]
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# True when QEMU must use the snapshot-compatible device layout — either to
|
|
102
|
+
# resume from an existing snapshot or to capture a new one. Resume adds
|
|
103
|
+
# `-incoming defer`; capture does not. Everything else (phantom ISOs, no
|
|
104
|
+
# virtfs, pcie-root-port, pinned RAM/SMP) matches.
|
|
105
|
+
snapshot_layout() {
|
|
106
|
+
snapshot_available || [ "$EMULATOR_CAPTURING_SNAPSHOT" = "1" ]
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
# Ensure the decompressed mapped-ram cache is up-to-date with the shipped
|
|
110
|
+
# .zst. Compares mtime: if .raw is older or missing, re-decompress.
|
|
111
|
+
ensure_savevm_raw() {
|
|
112
|
+
local zst raw
|
|
113
|
+
zst="$(savevm_path)"
|
|
114
|
+
raw="$(savevm_raw_path)"
|
|
115
|
+
|
|
116
|
+
local zst_ts raw_ts
|
|
117
|
+
case "$HOST_OS" in
|
|
118
|
+
darwin)
|
|
119
|
+
zst_ts="$(stat -f '%m' "$zst" 2>/dev/null || echo 0)"
|
|
120
|
+
raw_ts="$(stat -f '%m' "$raw" 2>/dev/null || echo 0)"
|
|
121
|
+
;;
|
|
122
|
+
*)
|
|
123
|
+
zst_ts="$(stat -c '%Y' "$zst" 2>/dev/null || echo 0)"
|
|
124
|
+
raw_ts="$(stat -c '%Y' "$raw" 2>/dev/null || echo 0)"
|
|
125
|
+
;;
|
|
126
|
+
esac
|
|
127
|
+
|
|
128
|
+
if [ -s "$raw" ] && [ "$raw_ts" -ge "$zst_ts" ]; then
|
|
129
|
+
return 0
|
|
130
|
+
fi
|
|
131
|
+
|
|
132
|
+
log "Decompressing snapshot cache (one-time; ~2-3GB sparse)..."
|
|
133
|
+
local tmp="${raw}.tmp"
|
|
134
|
+
rm -f "$tmp"
|
|
135
|
+
if ! zstd -dc "$zst" > "$tmp"; then
|
|
136
|
+
err "Failed to decompress $zst"
|
|
137
|
+
rm -f "$tmp"
|
|
138
|
+
return 1
|
|
139
|
+
fi
|
|
140
|
+
mv "$tmp" "$raw"
|
|
141
|
+
}
|
|
142
|
+
|
|
69
143
|
# Returns a fast fingerprint (size:mtime) of the base QEMU image.
|
|
70
144
|
# Used to detect whether the image has changed since the overlay was created.
|
|
71
145
|
base_image_fingerprint() {
|
|
@@ -77,10 +151,62 @@ base_image_fingerprint() {
|
|
|
77
151
|
esac
|
|
78
152
|
}
|
|
79
153
|
|
|
80
|
-
|
|
154
|
+
# Fingerprint used to detect stale overlays. Includes both the base qcow2 and
|
|
155
|
+
# the savevm file so the overlay is rebuilt whenever either input changes. The
|
|
156
|
+
# overlay disk must match the disk state the snapshot was taken against for
|
|
157
|
+
# -incoming resume to be consistent.
|
|
158
|
+
runtime_fingerprint() {
|
|
159
|
+
local base="$1"
|
|
160
|
+
local savevm="$2"
|
|
161
|
+
local base_fp savevm_fp
|
|
162
|
+
base_fp="$(base_image_fingerprint "$base")"
|
|
163
|
+
if [ -f "$savevm" ]; then
|
|
164
|
+
savevm_fp="$(base_image_fingerprint "$savevm")"
|
|
165
|
+
else
|
|
166
|
+
savevm_fp="no-savevm"
|
|
167
|
+
fi
|
|
168
|
+
printf '%s|%s\n' "$base_fp" "$savevm_fp"
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
ensure_runtime_config_iso() {
|
|
172
|
+
# When invoked via stack-cli, the CLI writes the runtime ISO natively
|
|
173
|
+
# (packages/stack-cli/src/lib/iso.ts) immediately before spawning us and
|
|
174
|
+
# sets STACK_EMULATOR_CLI_WROTE_ISO=1. Trust it and skip regeneration —
|
|
175
|
+
# otherwise we'd fall through to make_iso_from_dir and require
|
|
176
|
+
# hdiutil/mkisofs/genisoimage, which is exactly the host dep the CLI path
|
|
177
|
+
# is designed to remove.
|
|
178
|
+
if [ "${STACK_EMULATOR_CLI_WROTE_ISO:-}" = "1" ] && [ -s "$(runtime_iso_path)" ]; then
|
|
179
|
+
return 0
|
|
180
|
+
fi
|
|
181
|
+
# In capture mode, cmd_capture already wrote a specialized ISO with an
|
|
182
|
+
# empty STACK_EMULATOR_VM_DIR_HOST — required because virtfs is detached
|
|
183
|
+
# for snapshot compatibility, and run-stack-container would otherwise
|
|
184
|
+
# try to publish internal-pck to /host/... and restart-loop
|
|
185
|
+
# stack.service. Trust that write and don't overwrite it.
|
|
186
|
+
if [ "${EMULATOR_CAPTURING_SNAPSHOT:-}" = "1" ] && [ -s "$(runtime_iso_path)" ]; then
|
|
187
|
+
return 0
|
|
188
|
+
fi
|
|
189
|
+
# Direct-shell invocation path: regenerate unconditionally. Port env vars
|
|
190
|
+
# (PORT_PREFIX, EMULATOR_*_PORT) may have changed since the last run, and
|
|
191
|
+
# an ISO cached from a prior invocation would silently override them.
|
|
192
|
+
write_runtime_config_iso "$VM_DIR"
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
# Write a STACKCFG runtime-config.iso containing runtime.env + base.env.
|
|
196
|
+
# The VM_DIR_HOST arg is the path to publish internal-pck / stack.log to on
|
|
197
|
+
# /host; pass empty string to suppress publication (used by capture mode
|
|
198
|
+
# where /host isn't mounted — virtfs is detached for snapshot compatibility,
|
|
199
|
+
# so any host-side write would fail and restart-loop stack.service).
|
|
200
|
+
write_runtime_config_iso() {
|
|
201
|
+
local vm_dir_host="$1"
|
|
202
|
+
local base_env="$SCRIPT_DIR/../.env.development"
|
|
203
|
+
if [ ! -f "$base_env" ]; then
|
|
204
|
+
err "Cannot generate runtime config ISO: $base_env is missing."
|
|
205
|
+
err "Run 'pnpm run emulator:generate-env' first, or invoke via 'stack emulator start'."
|
|
206
|
+
exit 1
|
|
207
|
+
fi
|
|
208
|
+
|
|
81
209
|
local cfg_dir="$VM_DIR/runtime-config"
|
|
82
|
-
local cfg_iso
|
|
83
|
-
cfg_iso="$(runtime_iso_path)"
|
|
84
210
|
rm -rf "$cfg_dir"
|
|
85
211
|
mkdir -p "$cfg_dir"
|
|
86
212
|
{
|
|
@@ -89,10 +215,11 @@ prepare_runtime_config_iso() {
|
|
|
89
215
|
printf "STACK_EMULATOR_BACKEND_HOST_PORT=%s\n" "$EMULATOR_BACKEND_PORT"
|
|
90
216
|
printf "STACK_EMULATOR_MINIO_HOST_PORT=%s\n" "$EMULATOR_MINIO_PORT"
|
|
91
217
|
printf "STACK_EMULATOR_INBUCKET_HOST_PORT=%s\n" "$EMULATOR_INBUCKET_PORT"
|
|
92
|
-
printf "
|
|
218
|
+
printf "STACK_EMULATOR_MOCK_OAUTH_HOST_PORT=%s\n" "$EMULATOR_MOCK_OAUTH_PORT"
|
|
219
|
+
printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$vm_dir_host"
|
|
93
220
|
} > "$cfg_dir/runtime.env"
|
|
94
|
-
cp "$
|
|
95
|
-
make_iso_from_dir "$
|
|
221
|
+
cp "$base_env" "$cfg_dir/base.env"
|
|
222
|
+
make_iso_from_dir "$(runtime_iso_path)" "STACKCFG" "$cfg_dir"
|
|
96
223
|
}
|
|
97
224
|
|
|
98
225
|
service_is_up() {
|
|
@@ -145,7 +272,7 @@ wait_for_condition() {
|
|
|
145
272
|
log "${label} ready in ${elapsed}s"
|
|
146
273
|
return 0
|
|
147
274
|
fi
|
|
148
|
-
sleep
|
|
275
|
+
sleep 0.2
|
|
149
276
|
elapsed=$((SECONDS - started))
|
|
150
277
|
printf "\r [%3ds] %s..." "$elapsed" "$label"
|
|
151
278
|
done
|
|
@@ -154,8 +281,9 @@ wait_for_condition() {
|
|
|
154
281
|
}
|
|
155
282
|
|
|
156
283
|
build_qemu_cmd() {
|
|
157
|
-
local base_img
|
|
284
|
+
local base_img savevm_file
|
|
158
285
|
base_img="$(image_path)"
|
|
286
|
+
savevm_file="$(savevm_path)"
|
|
159
287
|
|
|
160
288
|
if [ ! -f "$base_img" ]; then
|
|
161
289
|
err "Missing QEMU image: $base_img"
|
|
@@ -166,18 +294,36 @@ build_qemu_cmd() {
|
|
|
166
294
|
mkdir -p "$VM_DIR"
|
|
167
295
|
local fingerprint_file="$VM_DIR/base-image.fingerprint"
|
|
168
296
|
local current_fp
|
|
169
|
-
current_fp="$(
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
297
|
+
current_fp="$(runtime_fingerprint "$base_img" "$savevm_file")"
|
|
298
|
+
|
|
299
|
+
if snapshot_layout; then
|
|
300
|
+
# The savevm RAM state was captured against the base image's exact disk
|
|
301
|
+
# state. An overlay with writes from a previous session diverges from
|
|
302
|
+
# that point, so -incoming would resume RAM against inconsistent disk.
|
|
303
|
+
# Always start from a fresh overlay in the snapshot path; per-session
|
|
304
|
+
# state is not preserved. Users who want persistence can opt out with
|
|
305
|
+
# EMULATOR_NO_SNAPSHOT=1. Capture mode also needs a clean overlay so the
|
|
306
|
+
# snapshot we write is taken against the base's known disk state.
|
|
307
|
+
if [ -f "$VM_DIR/disk.qcow2" ]; then
|
|
175
308
|
rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file"
|
|
176
309
|
fi
|
|
177
|
-
fi
|
|
178
|
-
if [ ! -f "$VM_DIR/disk.qcow2" ]; then
|
|
179
310
|
qemu-img create -f qcow2 -b "$base_img" -F qcow2 "$VM_DIR/disk.qcow2" >/dev/null
|
|
180
|
-
|
|
311
|
+
printf '%s' "$current_fp" > "$fingerprint_file"
|
|
312
|
+
else
|
|
313
|
+
# If the overlay was created against a different base or savevm, it will
|
|
314
|
+
# diverge from the snapshot's disk state — force a rebuild.
|
|
315
|
+
if [ -f "$VM_DIR/disk.qcow2" ]; then
|
|
316
|
+
if [ -f "$fingerprint_file" ] && [ "$(cat "$fingerprint_file")" = "$current_fp" ]; then
|
|
317
|
+
log "Reusing existing overlay disk (changes persist)"
|
|
318
|
+
else
|
|
319
|
+
warn "Base image or snapshot has changed — recreating overlay."
|
|
320
|
+
rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file"
|
|
321
|
+
fi
|
|
322
|
+
fi
|
|
323
|
+
if [ ! -f "$VM_DIR/disk.qcow2" ]; then
|
|
324
|
+
qemu-img create -f qcow2 -b "$base_img" -F qcow2 "$VM_DIR/disk.qcow2" >/dev/null
|
|
325
|
+
printf '%s' "$current_fp" > "$fingerprint_file"
|
|
326
|
+
fi
|
|
181
327
|
fi
|
|
182
328
|
|
|
183
329
|
local qemu_bin machine cpu firmware_args=()
|
|
@@ -207,34 +353,134 @@ build_qemu_cmd() {
|
|
|
207
353
|
netdev+=",hostfwd=tcp:127.0.0.1:${EMULATOR_BACKEND_PORT}-:${PORT_PREFIX}02"
|
|
208
354
|
netdev+=",hostfwd=tcp:127.0.0.1:${EMULATOR_MINIO_PORT}-:9090"
|
|
209
355
|
netdev+=",hostfwd=tcp:127.0.0.1:${EMULATOR_INBUCKET_PORT}-:9001"
|
|
210
|
-
# Mock OAuth server:
|
|
211
|
-
# (
|
|
212
|
-
#
|
|
213
|
-
#
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
356
|
+
# Mock OAuth server: the VM-internal mock binds to $EMULATOR_MOCK_OAUTH_PORT
|
|
357
|
+
# (overrides the pnpm-dev default of ${PORT_PREFIX}14 via STACK_OAUTH_MOCK_PORT
|
|
358
|
+
# threaded through runtime-config.iso). Host and guest use the same port so
|
|
359
|
+
# the OIDC issuer URL `http://localhost:${EMULATOR_MOCK_OAUTH_PORT}` resolves
|
|
360
|
+
# identically from the browser and from the backend inside the VM.
|
|
361
|
+
netdev+=",hostfwd=tcp:127.0.0.1:${EMULATOR_MOCK_OAUTH_PORT}-:${EMULATOR_MOCK_OAUTH_PORT}"
|
|
362
|
+
|
|
363
|
+
# In snapshot-resume mode the QEMU command-line MUST match the device set
|
|
364
|
+
# used at snapshot capture time, otherwise migration replay fails (broken
|
|
365
|
+
# pipe / device tree mismatch). At capture time the build attaches:
|
|
366
|
+
# disk(if=virtio) + seed.iso + bundle.iso + runtime.iso (all if=virtio)
|
|
367
|
+
# netdev + virtio-net-pci + monitor + QGA virtio-serial
|
|
368
|
+
# SMP=4, RAM=4096 (pinned in build-image.sh snapshot mode)
|
|
369
|
+
# We mirror that exactly. The seed/bundle ISOs were used by cloud-init at
|
|
370
|
+
# build and are not needed at runtime, but their virtio-blk slots must
|
|
371
|
+
# exist so the migration replay matches device IDs. Runtime-only devices
|
|
372
|
+
# (virtfs, balloon) live at higher slots — extra at destination is fine.
|
|
373
|
+
local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS" snapshot_ram="$VM_RAM"
|
|
374
|
+
if snapshot_layout; then
|
|
375
|
+
if snapshot_available; then
|
|
376
|
+
log "Snapshot found at $savevm_file — fast-resume enabled."
|
|
377
|
+
# -incoming defer: QEMU starts, waits for a QMP migrate-incoming command.
|
|
378
|
+
# We use that to set mapped-ram + multifd capabilities before loading,
|
|
379
|
+
# which enables parallel RAM restore (~2-3x faster than streamed decode).
|
|
380
|
+
snapshot_args+=(-incoming defer)
|
|
381
|
+
else
|
|
382
|
+
log "Capture mode: booting with snapshot-compatible layout (no -incoming)."
|
|
383
|
+
fi
|
|
384
|
+
snapshot_smp="${EMULATOR_SNAPSHOT_CPUS:-4}"
|
|
385
|
+
# RAM size is baked into the snapshot; migration replay requires an
|
|
386
|
+
# identical -m value. Pin to the build-time RAM (4096) and ignore
|
|
387
|
+
# EMULATOR_RAM — override via EMULATOR_SNAPSHOT_RAM if a different
|
|
388
|
+
# snapshot was produced.
|
|
389
|
+
snapshot_ram="${EMULATOR_SNAPSHOT_RAM:-4096}"
|
|
390
|
+
if [ "$snapshot_smp" != "$VM_CPUS" ]; then
|
|
391
|
+
log "Pinning SMP to ${snapshot_smp} for snapshot resume (build-time value)."
|
|
392
|
+
fi
|
|
393
|
+
if [ "$snapshot_ram" != "$VM_RAM" ]; then
|
|
394
|
+
log "Pinning RAM to ${snapshot_ram}MB for snapshot resume (ignoring EMULATOR_RAM=${VM_RAM})."
|
|
395
|
+
fi
|
|
396
|
+
|
|
397
|
+
# Tiny placeholder ISOs to match the seed.iso / bundle.iso slots present
|
|
398
|
+
# at snapshot time. Their content doesn't matter (cloud-init has already
|
|
399
|
+
# run); only the virtio-blk slot count must match.
|
|
400
|
+
local seed_phantom="$VM_DIR/seed.phantom"
|
|
401
|
+
local bundle_phantom="$VM_DIR/bundle.phantom"
|
|
402
|
+
if [ ! -s "$seed_phantom" ]; then
|
|
403
|
+
dd if=/dev/zero of="$seed_phantom" bs=1M count=1 status=none
|
|
404
|
+
fi
|
|
405
|
+
if [ ! -s "$bundle_phantom" ]; then
|
|
406
|
+
dd if=/dev/zero of="$bundle_phantom" bs=1M count=1 status=none
|
|
407
|
+
fi
|
|
408
|
+
runtime_only_args+=(
|
|
409
|
+
-drive "file=$seed_phantom,format=raw,if=virtio,readonly=on"
|
|
410
|
+
-drive "file=$bundle_phantom,format=raw,if=virtio,readonly=on"
|
|
411
|
+
)
|
|
412
|
+
else
|
|
413
|
+
# Cold-boot: include virtio-balloon and virtfs as before.
|
|
414
|
+
runtime_only_args+=(
|
|
415
|
+
-device virtio-balloon-pci
|
|
416
|
+
-virtfs "local,path=/,mount_tag=hostfs,security_model=none"
|
|
417
|
+
)
|
|
418
|
+
fi
|
|
419
|
+
|
|
420
|
+
if snapshot_layout; then
|
|
421
|
+
QEMU_CMD=(
|
|
422
|
+
"$qemu_bin"
|
|
423
|
+
-machine "$machine"
|
|
424
|
+
-accel "$ACCEL"
|
|
425
|
+
-cpu "$cpu"
|
|
426
|
+
"${firmware_args[@]}"
|
|
427
|
+
-boot order=c
|
|
428
|
+
-m "$snapshot_ram"
|
|
429
|
+
-smp "$snapshot_smp"
|
|
430
|
+
-drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
|
|
431
|
+
"${runtime_only_args[@]}"
|
|
432
|
+
-drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on"
|
|
433
|
+
-netdev "$netdev"
|
|
434
|
+
-device virtio-net-pci,netdev=net0
|
|
435
|
+
-chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off"
|
|
436
|
+
-mon "chardev=monitor,mode=control"
|
|
437
|
+
-chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0"
|
|
438
|
+
-device virtio-serial
|
|
439
|
+
-device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
|
|
440
|
+
# Empty PCIe root port reserved for runtime hot-plug of virtio-9p.
|
|
441
|
+
# MUST be the last explicit -device entry — slot order has to mirror
|
|
442
|
+
# build-image.sh exactly or migration replay stalls in inmigrate.
|
|
443
|
+
-device "pcie-root-port,id=hostfs-port,bus=pcie.0,chassis=1"
|
|
444
|
+
# Pre-create the host-side fsdev backend so the post-resume QMP
|
|
445
|
+
# device_add can attach to it by id. -fsdev is host-only state — not
|
|
446
|
+
# part of the migrated device tree — so it's safe to add here even
|
|
447
|
+
# though the snapshot was captured without it. Going through -fsdev
|
|
448
|
+
# avoids the HMP fsdev_add command, whose error path is invisible
|
|
449
|
+
# via human-monitor-command (errors come back as a return string,
|
|
450
|
+
# not a QMP error).
|
|
451
|
+
-fsdev "local,id=hostfs,path=/,security_model=none"
|
|
452
|
+
${snapshot_args[@]+"${snapshot_args[@]}"}
|
|
453
|
+
-serial "file:$VM_DIR/serial.log"
|
|
454
|
+
-display none
|
|
455
|
+
-daemonize
|
|
456
|
+
-pidfile "$VM_DIR/qemu.pid"
|
|
457
|
+
)
|
|
458
|
+
else
|
|
459
|
+
QEMU_CMD=(
|
|
460
|
+
"$qemu_bin"
|
|
461
|
+
-machine "$machine"
|
|
462
|
+
-accel "$ACCEL"
|
|
463
|
+
-cpu "$cpu"
|
|
464
|
+
"${firmware_args[@]}"
|
|
465
|
+
-boot order=c
|
|
466
|
+
-m "$VM_RAM"
|
|
467
|
+
-smp "$snapshot_smp"
|
|
468
|
+
-drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
|
|
469
|
+
-drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on"
|
|
470
|
+
-netdev "$netdev"
|
|
471
|
+
-device virtio-net-pci,netdev=net0
|
|
472
|
+
"${runtime_only_args[@]}"
|
|
473
|
+
-chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off"
|
|
474
|
+
-mon "chardev=monitor,mode=control"
|
|
475
|
+
-chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0"
|
|
476
|
+
-device virtio-serial
|
|
477
|
+
-device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
|
|
478
|
+
-serial "file:$VM_DIR/serial.log"
|
|
479
|
+
-display none
|
|
480
|
+
-daemonize
|
|
481
|
+
-pidfile "$VM_DIR/qemu.pid"
|
|
482
|
+
)
|
|
483
|
+
fi
|
|
238
484
|
|
|
239
485
|
}
|
|
240
486
|
|
|
@@ -256,7 +502,7 @@ tail_vm_logs() {
|
|
|
256
502
|
}
|
|
257
503
|
|
|
258
504
|
ensure_ports_free() {
|
|
259
|
-
local ports=("$EMULATOR_DASHBOARD_PORT" "$EMULATOR_BACKEND_PORT" "$EMULATOR_MINIO_PORT" "$EMULATOR_INBUCKET_PORT" "$
|
|
505
|
+
local ports=("$EMULATOR_DASHBOARD_PORT" "$EMULATOR_BACKEND_PORT" "$EMULATOR_MINIO_PORT" "$EMULATOR_INBUCKET_PORT" "$EMULATOR_MOCK_OAUTH_PORT")
|
|
260
506
|
local port
|
|
261
507
|
for port in "${ports[@]}"; do
|
|
262
508
|
if lsof -iTCP:"$port" -sTCP:LISTEN >/dev/null 2>&1; then
|
|
@@ -269,11 +515,225 @@ ensure_ports_free() {
|
|
|
269
515
|
start_vm() {
|
|
270
516
|
mkdir -p "$VM_DIR"
|
|
271
517
|
: > "$VM_DIR/serial.log"
|
|
272
|
-
|
|
518
|
+
ensure_runtime_config_iso
|
|
273
519
|
build_qemu_cmd
|
|
274
520
|
"${QEMU_CMD[@]}"
|
|
275
521
|
}
|
|
276
522
|
|
|
523
|
+
# Send one or more QMP commands over the monitor socket. Each line of stdin is
|
|
524
|
+
# a JSON object; capabilities are always negotiated first. Keep stdin open
|
|
525
|
+
# briefly after writing so socat doesn't close before QEMU responds — QMP
|
|
526
|
+
# typically replies in milliseconds so 0.3s is enough.
|
|
527
|
+
qmp_send() {
|
|
528
|
+
if [ ! -S "$VM_DIR/monitor.sock" ]; then
|
|
529
|
+
return 1
|
|
530
|
+
fi
|
|
531
|
+
local payload
|
|
532
|
+
payload="$(cat)"
|
|
533
|
+
{
|
|
534
|
+
printf '%s\n' '{"execute":"qmp_capabilities"}'
|
|
535
|
+
printf '%s\n' "$payload"
|
|
536
|
+
sleep 0.3
|
|
537
|
+
} | socat -t5 - "UNIX-CONNECT:$VM_DIR/monitor.sock" 2>/dev/null
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
# After -incoming defer, QEMU waits for a migrate-incoming command. This sets
|
|
541
|
+
# up mapped-ram + multifd capabilities and kicks off the RAM load from the
|
|
542
|
+
# decompressed cache file. Returns once the VM is running.
|
|
543
|
+
qmp_incoming_and_cont() {
|
|
544
|
+
local raw_file="$1"
|
|
545
|
+
|
|
546
|
+
# Set caps + parameters before migrate-incoming, same as source.
|
|
547
|
+
local setup_resp
|
|
548
|
+
setup_resp=$( {
|
|
549
|
+
printf '%s\n' '{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}'
|
|
550
|
+
printf '%s\n' '{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}'
|
|
551
|
+
} | qmp_send)
|
|
552
|
+
if printf '%s' "$setup_resp" | grep -q '"error"'; then
|
|
553
|
+
err "QMP caps setup failed: $setup_resp"
|
|
554
|
+
return 1
|
|
555
|
+
fi
|
|
556
|
+
|
|
557
|
+
# Kick off the incoming migration from the mapped-ram file.
|
|
558
|
+
local inc_cmd inc_resp
|
|
559
|
+
inc_cmd=$(printf '{"execute":"migrate-incoming","arguments":{"uri":"file:%s"}}' "$raw_file")
|
|
560
|
+
inc_resp=$(printf '%s\n' "$inc_cmd" | qmp_send)
|
|
561
|
+
if printf '%s' "$inc_resp" | grep -q '"error"'; then
|
|
562
|
+
err "QMP migrate-incoming failed: $inc_resp"
|
|
563
|
+
return 1
|
|
564
|
+
fi
|
|
565
|
+
|
|
566
|
+
# Poll until status reaches a runnable state, then cont.
|
|
567
|
+
local deadline=$((SECONDS + 60))
|
|
568
|
+
while [ "$SECONDS" -lt "$deadline" ]; do
|
|
569
|
+
local out status
|
|
570
|
+
out=$(printf '%s\n' '{"execute":"query-status"}' | qmp_send || true)
|
|
571
|
+
status=$(printf '%s' "$out" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')
|
|
572
|
+
case "$status" in
|
|
573
|
+
running)
|
|
574
|
+
return 0
|
|
575
|
+
;;
|
|
576
|
+
paused|postmigrate|prelaunch)
|
|
577
|
+
printf '%s\n' '{"execute":"cont"}' | qmp_send >/dev/null || true
|
|
578
|
+
return 0
|
|
579
|
+
;;
|
|
580
|
+
inmigrate|"")
|
|
581
|
+
;;
|
|
582
|
+
*)
|
|
583
|
+
log "unexpected QMP status: $status"
|
|
584
|
+
;;
|
|
585
|
+
esac
|
|
586
|
+
sleep 0.2
|
|
587
|
+
done
|
|
588
|
+
return 1
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
# Placeholder PCK baked into the snapshot. Kept in sync with the value in
|
|
592
|
+
# docker/local-emulator/qemu/cloud-init/emulator/user-data.
|
|
593
|
+
SNAPSHOT_PLACEHOLDER_PCK="00000000000000000000000000000000ffffffffffffffffffffffffffffffff"
|
|
594
|
+
|
|
595
|
+
# Write the internal PCK to the host path the CLI reads (see
|
|
596
|
+
# readInternalPck() in packages/stack-cli/src/commands/emulator.ts). In
|
|
597
|
+
# cold-boot mode the guest publishes this via virtfs/9p, but snapshot mode
|
|
598
|
+
# drops virtfs, so the host has to write it itself.
|
|
599
|
+
write_internal_pck_for_cli() {
|
|
600
|
+
local pck="$1"
|
|
601
|
+
(umask 077 && printf '%s' "$pck" > "$VM_DIR/internal-pck")
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
# Drive qemu-guest-agent via its virtserialport socket. QGA speaks the same
|
|
605
|
+
# JSON protocol as QMP but over a separate channel. We use guest-sync to make
|
|
606
|
+
# sure the agent is responsive, then guest-exec to fire trigger-fast-rotate.
|
|
607
|
+
qga_send() {
|
|
608
|
+
if [ ! -S "$VM_DIR/qga.sock" ]; then
|
|
609
|
+
return 1
|
|
610
|
+
fi
|
|
611
|
+
# socat closes the connection on stdin EOF before QGA can reply, so keep
|
|
612
|
+
# stdin open for a short window after writing the request to give the
|
|
613
|
+
# agent time to respond. QGA replies in milliseconds; the only reason this
|
|
614
|
+
# isn't 0.1s is to absorb scheduling jitter on a busy host.
|
|
615
|
+
local payload
|
|
616
|
+
payload="$(cat)"
|
|
617
|
+
( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t10 - "UNIX-CONNECT:$VM_DIR/qga.sock" 2>/dev/null
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
qga_wait_ready() {
|
|
621
|
+
local deadline=$((SECONDS + 30))
|
|
622
|
+
while [ "$SECONDS" -lt "$deadline" ]; do
|
|
623
|
+
local resp
|
|
624
|
+
resp=$(printf '%s\n' '{"execute":"guest-sync","arguments":{"id":424242}}' | qga_send || true)
|
|
625
|
+
if printf '%s' "$resp" | grep -q '"return":[[:space:]]*424242'; then
|
|
626
|
+
return 0
|
|
627
|
+
fi
|
|
628
|
+
sleep 0.2
|
|
629
|
+
done
|
|
630
|
+
return 1
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
# Hot-plug a virtio-9p device backed by host `/` after a snapshot resume.
|
|
634
|
+
# The snapshot was captured WITHOUT virtfs (QEMU disallows migration while
|
|
635
|
+
# 9p is mounted in the guest), so the resumed VM has no host filesystem
|
|
636
|
+
# available until we add one here. The fsdev backend was pre-created by
|
|
637
|
+
# the -fsdev option in build_qemu_cmd; we only need the device_add half.
|
|
638
|
+
qmp_hotplug_9p() {
|
|
639
|
+
local resp
|
|
640
|
+
resp=$(printf '%s\n' \
|
|
641
|
+
'{"execute":"device_add","arguments":{"driver":"virtio-9p-pci","id":"hostfs-dev","fsdev":"hostfs","mount_tag":"hostfs","bus":"hostfs-port"}}' \
|
|
642
|
+
| qmp_send)
|
|
643
|
+
if printf '%s' "$resp" | grep -q '"error"'; then
|
|
644
|
+
err "QMP device_add virtio-9p-pci failed: $resp"
|
|
645
|
+
return 1
|
|
646
|
+
fi
|
|
647
|
+
return 0
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
# Run /usr/local/bin/mount-host-fs --post-resume in the guest. The script
|
|
651
|
+
# mounts the freshly-hot-plugged 9p device on /host, which is a shared
|
|
652
|
+
# mount point — so the new mount propagates into the running stack
|
|
653
|
+
# container's `-v /host:/host:rshared` bind mount without a container
|
|
654
|
+
# restart.
|
|
655
|
+
qga_mount_host_fs() {
|
|
656
|
+
local cmd resp pid status_resp exited exitcode
|
|
657
|
+
cmd='{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/mount-host-fs","arg":["--post-resume"],"capture-output":true}}'
|
|
658
|
+
resp=$(printf '%s\n' "$cmd" | qga_send || true)
|
|
659
|
+
pid=$(printf '%s' "$resp" | grep -o '"pid"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
|
|
660
|
+
if [ -z "$pid" ]; then
|
|
661
|
+
err "guest-exec mount-host-fs did not return a pid; response: $resp"
|
|
662
|
+
return 1
|
|
663
|
+
fi
|
|
664
|
+
local deadline=$((SECONDS + 20))
|
|
665
|
+
while [ "$SECONDS" -lt "$deadline" ]; do
|
|
666
|
+
status_resp=$(printf '%s\n' "{\"execute\":\"guest-exec-status\",\"arguments\":{\"pid\":${pid}}}" | qga_send || true)
|
|
667
|
+
exited=$(printf '%s' "$status_resp" | grep -o '"exited"[[:space:]]*:[[:space:]]*\(true\|false\)' | head -1 | sed -E 's/.*:[[:space:]]*(true|false).*/\1/')
|
|
668
|
+
if [ "$exited" = "true" ]; then
|
|
669
|
+
exitcode=$(printf '%s' "$status_resp" | grep -o '"exitcode"[[:space:]]*:[[:space:]]*-\{0,1\}[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*(-?[0-9]+).*/\1/')
|
|
670
|
+
if [ "${exitcode:-0}" = "0" ]; then
|
|
671
|
+
log "host fs mounted in guest"
|
|
672
|
+
return 0
|
|
673
|
+
fi
|
|
674
|
+
err "mount-host-fs exited with code ${exitcode:-unknown}; response: $status_resp"
|
|
675
|
+
return 1
|
|
676
|
+
fi
|
|
677
|
+
sleep 0.2
|
|
678
|
+
done
|
|
679
|
+
err "mount-host-fs did not complete within 20s"
|
|
680
|
+
return 1
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
qga_trigger_fast_rotate() {
|
|
684
|
+
# guest-exec returns a pid; we then poll guest-exec-status until the
|
|
685
|
+
# process exits, and surface its exit code. Capture output so a failure
|
|
686
|
+
# message is available in serial.log. We pipe the fresh-secrets env file
|
|
687
|
+
# (as base64) to the script via input-data — keeps secrets off the
|
|
688
|
+
# filesystem and avoids needing virtfs.
|
|
689
|
+
local fresh_pck fresh_ssk fresh_sak fresh_cron payload secrets_b64 resp pid
|
|
690
|
+
fresh_pck="$(openssl rand -hex 32)"
|
|
691
|
+
fresh_ssk="$(openssl rand -hex 32)"
|
|
692
|
+
fresh_sak="$(openssl rand -hex 32)"
|
|
693
|
+
fresh_cron="$(openssl rand -hex 32)"
|
|
694
|
+
payload=$(
|
|
695
|
+
printf 'STACK_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$fresh_pck"
|
|
696
|
+
printf 'STACK_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$fresh_ssk"
|
|
697
|
+
printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$fresh_sak"
|
|
698
|
+
printf 'CRON_SECRET=%s\n' "$fresh_cron"
|
|
699
|
+
)
|
|
700
|
+
# Publish the fresh PCK to the host path the CLI reads. Writing before the
|
|
701
|
+
# guest-exec so a --config-file flow that polls from another process can
|
|
702
|
+
# pick it up the moment rotation completes.
|
|
703
|
+
write_internal_pck_for_cli "$fresh_pck"
|
|
704
|
+
secrets_b64=$(printf '%s' "$payload" | base64 | tr -d '\n')
|
|
705
|
+
local cmd
|
|
706
|
+
cmd=$(printf '{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/trigger-fast-rotate","capture-output":true,"input-data":"%s"}}' "$secrets_b64")
|
|
707
|
+
resp=$(printf '%s\n' "$cmd" | qga_send || true)
|
|
708
|
+
pid=$(printf '%s' "$resp" | grep -o '"pid"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
|
|
709
|
+
if [ -z "$pid" ]; then
|
|
710
|
+
err "guest-exec did not return a pid; response: $resp"
|
|
711
|
+
return 1
|
|
712
|
+
fi
|
|
713
|
+
|
|
714
|
+
# Rotation (sed + UPDATE + supervisorctl restart + node startup) fits well
|
|
715
|
+
# inside this window.
|
|
716
|
+
local deadline=$((SECONDS + 60))
|
|
717
|
+
while [ "$SECONDS" -lt "$deadline" ]; do
|
|
718
|
+
local status_resp exited exitcode
|
|
719
|
+
status_resp=$(printf '%s\n' "{\"execute\":\"guest-exec-status\",\"arguments\":{\"pid\":${pid}}}" | qga_send || true)
|
|
720
|
+
exited=$(printf '%s' "$status_resp" | grep -o '"exited"[[:space:]]*:[[:space:]]*\(true\|false\)' | head -1 | sed -E 's/.*:[[:space:]]*(true|false).*/\1/')
|
|
721
|
+
if [ "$exited" = "true" ]; then
|
|
722
|
+
exitcode=$(printf '%s' "$status_resp" | grep -o '"exitcode"[[:space:]]*:[[:space:]]*-\{0,1\}[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*(-?[0-9]+).*/\1/')
|
|
723
|
+
if [ "${exitcode:-0}" = "0" ]; then
|
|
724
|
+
log "rotate-secrets completed."
|
|
725
|
+
return 0
|
|
726
|
+
fi
|
|
727
|
+
err "rotate-secrets exited with code ${exitcode:-unknown}"
|
|
728
|
+
err "response: $status_resp"
|
|
729
|
+
return 1
|
|
730
|
+
fi
|
|
731
|
+
sleep 0.2
|
|
732
|
+
done
|
|
733
|
+
err "rotate-secrets did not complete within 60s"
|
|
734
|
+
return 1
|
|
735
|
+
}
|
|
736
|
+
|
|
277
737
|
stop_vm() {
|
|
278
738
|
if [ ! -f "$VM_DIR/qemu.pid" ]; then
|
|
279
739
|
return 0
|
|
@@ -292,9 +752,10 @@ stop_vm() {
|
|
|
292
752
|
kill -9 "$pid" 2>/dev/null || true
|
|
293
753
|
fi
|
|
294
754
|
fi
|
|
295
|
-
rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/serial.log"
|
|
296
|
-
|
|
297
|
-
|
|
755
|
+
rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock" "$VM_DIR/serial.log"
|
|
756
|
+
# runtime-config.iso is left in place; ensure_runtime_config_iso regenerates
|
|
757
|
+
# it on the next start. `cmd_reset` wipes $RUN_DIR entirely when a full reset
|
|
758
|
+
# is wanted.
|
|
298
759
|
}
|
|
299
760
|
|
|
300
761
|
cmd_start() {
|
|
@@ -303,20 +764,94 @@ cmd_start() {
|
|
|
303
764
|
|
|
304
765
|
info "Starting QEMU local emulator"
|
|
305
766
|
info "Arch: $ARCH | Accel: $ACCEL"
|
|
306
|
-
info "Ports: Dashboard=$EMULATOR_DASHBOARD_PORT Backend=$EMULATOR_BACKEND_PORT MinIO=$EMULATOR_MINIO_PORT Inbucket=$EMULATOR_INBUCKET_PORT"
|
|
767
|
+
info "Ports: Dashboard=$EMULATOR_DASHBOARD_PORT Backend=$EMULATOR_BACKEND_PORT MinIO=$EMULATOR_MINIO_PORT Inbucket=$EMULATOR_INBUCKET_PORT MockOAuth=$EMULATOR_MOCK_OAUTH_PORT"
|
|
768
|
+
|
|
769
|
+
local using_snapshot=0
|
|
770
|
+
if snapshot_available; then
|
|
771
|
+
if ! ensure_savevm_raw; then
|
|
772
|
+
warn "Snapshot decompression failed — falling back to cold boot."
|
|
773
|
+
snapshot_fallback_to_cold_boot
|
|
774
|
+
return
|
|
775
|
+
fi
|
|
776
|
+
using_snapshot=1
|
|
777
|
+
fi
|
|
307
778
|
|
|
308
779
|
start_vm
|
|
309
780
|
|
|
310
781
|
info "VM: ${VM_RAM}MB / ${VM_CPUS} CPUs"
|
|
311
782
|
|
|
312
|
-
if
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
783
|
+
if [ "$using_snapshot" = "1" ]; then
|
|
784
|
+
log "Resuming from snapshot (mapped-ram + multifd)..."
|
|
785
|
+
if ! qmp_incoming_and_cont "$(savevm_raw_path)"; then
|
|
786
|
+
warn "Snapshot resume did not reach a runnable state — falling back to cold boot."
|
|
787
|
+
snapshot_fallback_to_cold_boot
|
|
788
|
+
return
|
|
789
|
+
fi
|
|
316
790
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
791
|
+
log "VM resumed; waiting for guest agent..."
|
|
792
|
+
if ! qga_wait_ready; then
|
|
793
|
+
warn "Guest agent did not respond — falling back to cold boot."
|
|
794
|
+
snapshot_fallback_to_cold_boot
|
|
795
|
+
return
|
|
796
|
+
fi
|
|
797
|
+
|
|
798
|
+
# Hot-plug the host filesystem. The snapshot was captured without
|
|
799
|
+
# virtfs, so the running container has an empty /host bind mount until
|
|
800
|
+
# we add the 9p device and mount it in the guest. Required for routes
|
|
801
|
+
# like /local-emulator/project that read user-supplied paths via /host.
|
|
802
|
+
log "Hot-plugging host filesystem..."
|
|
803
|
+
if ! qmp_hotplug_9p; then
|
|
804
|
+
warn "Failed to hot-plug 9p device — falling back to cold boot."
|
|
805
|
+
snapshot_fallback_to_cold_boot
|
|
806
|
+
return
|
|
807
|
+
fi
|
|
808
|
+
if ! qga_mount_host_fs; then
|
|
809
|
+
warn "Failed to mount host fs in guest — falling back to cold boot."
|
|
810
|
+
snapshot_fallback_to_cold_boot
|
|
811
|
+
return
|
|
812
|
+
fi
|
|
813
|
+
|
|
814
|
+
if [ "$EMULATOR_NO_ROTATION" = "1" ]; then
|
|
815
|
+
warn "EMULATOR_NO_ROTATION=1: snapshot's placeholder secrets are in effect — do not expose this instance."
|
|
816
|
+
# The placeholder PCK is live in the running image; publish it to the
|
|
817
|
+
# host path so --config-file flows still work.
|
|
818
|
+
write_internal_pck_for_cli "$SNAPSHOT_PLACEHOLDER_PCK"
|
|
819
|
+
if ! wait_for_condition "services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then
|
|
820
|
+
warn "Services did not respond after resume — falling back to cold boot."
|
|
821
|
+
tail_vm_logs
|
|
822
|
+
snapshot_fallback_to_cold_boot
|
|
823
|
+
return
|
|
824
|
+
fi
|
|
825
|
+
else
|
|
826
|
+
log "Generating fresh secrets + triggering rotation..."
|
|
827
|
+
if ! qga_trigger_fast_rotate; then
|
|
828
|
+
warn "Failed to trigger rotate-secrets — falling back to cold boot."
|
|
829
|
+
snapshot_fallback_to_cold_boot
|
|
830
|
+
return
|
|
831
|
+
fi
|
|
832
|
+
|
|
833
|
+
# Wait for the *new* backend (post-supervisor-restart) to actually be
|
|
834
|
+
# listening. all_ready may briefly return true against the OLD Node
|
|
835
|
+
# processes between when supervisor sends SIGTERM and when the children
|
|
836
|
+
# die; sleep a beat so we measure the real readiness.
|
|
837
|
+
sleep 1
|
|
838
|
+
if ! wait_for_condition "rotated services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then
|
|
839
|
+
warn "Services did not recover after rotation — falling back to cold boot."
|
|
840
|
+
tail_vm_logs
|
|
841
|
+
snapshot_fallback_to_cold_boot
|
|
842
|
+
return
|
|
843
|
+
fi
|
|
844
|
+
fi
|
|
845
|
+
else
|
|
846
|
+
if ! wait_for_condition "deps services" "$READY_TIMEOUT" deps_ready; then
|
|
847
|
+
tail_vm_logs
|
|
848
|
+
exit 1
|
|
849
|
+
fi
|
|
850
|
+
|
|
851
|
+
if ! wait_for_condition "dashboard/backend" "$READY_TIMEOUT" app_ready; then
|
|
852
|
+
tail_vm_logs
|
|
853
|
+
exit 1
|
|
854
|
+
fi
|
|
320
855
|
fi
|
|
321
856
|
|
|
322
857
|
log "All services are green."
|
|
@@ -324,6 +859,20 @@ cmd_start() {
|
|
|
324
859
|
info "Backend: http://localhost:${EMULATOR_BACKEND_PORT}"
|
|
325
860
|
}
|
|
326
861
|
|
|
862
|
+
# If anything about the snapshot resume fails, stop the VM, wipe the overlay,
|
|
863
|
+
# and retry as a cold boot. Keeps the user unblocked even when the snapshot is
|
|
864
|
+
# broken (e.g. stale, incompatible host-arch/QEMU-version mismatch).
|
|
865
|
+
snapshot_fallback_to_cold_boot() {
|
|
866
|
+
warn "Retrying with cold boot (EMULATOR_NO_SNAPSHOT=1)..."
|
|
867
|
+
stop_vm
|
|
868
|
+
# Wipe the overlay + fingerprint so build_qemu_cmd re-creates a fresh one.
|
|
869
|
+
# runtime-config.iso is regenerated by ensure_runtime_config_iso on recursion.
|
|
870
|
+
rm -f "$VM_DIR/disk.qcow2" "$VM_DIR/base-image.fingerprint" \
|
|
871
|
+
"$VM_DIR/seed.phantom" "$VM_DIR/bundle.phantom"
|
|
872
|
+
EMULATOR_NO_SNAPSHOT=1
|
|
873
|
+
cmd_start
|
|
874
|
+
}
|
|
875
|
+
|
|
327
876
|
cmd_stop() {
|
|
328
877
|
stop_vm
|
|
329
878
|
log "QEMU emulator stopped."
|
|
@@ -335,6 +884,100 @@ cmd_reset() {
|
|
|
335
884
|
log "Emulator state reset. Next start will be a fresh boot."
|
|
336
885
|
}
|
|
337
886
|
|
|
887
|
+
# Cold-boot the VM with the snapshot-compatible device layout, wait for all
|
|
888
|
+
# services to be healthy, then capture a snapshot via QMP migrate and compress
|
|
889
|
+
# it to .savevm.zst. Called by `stack emulator pull` so first-run users get a
|
|
890
|
+
# fast-resume snapshot that's guaranteed compatible with their host's QEMU
|
|
891
|
+
# version + accelerator (which CI-built snapshots can't guarantee across
|
|
892
|
+
# KVM/HVF/TCG).
|
|
893
|
+
cmd_capture() {
|
|
894
|
+
if [ ! -f "$(image_path)" ]; then
|
|
895
|
+
err "Missing qcow2: $(image_path). Run 'stack emulator pull' first."
|
|
896
|
+
exit 1
|
|
897
|
+
fi
|
|
898
|
+
if [ -s "$(savevm_path)" ] && [ "$EMULATOR_FORCE_CAPTURE" != "1" ]; then
|
|
899
|
+
log "Snapshot already present at $(savevm_path); skipping capture."
|
|
900
|
+
log "Pass EMULATOR_FORCE_CAPTURE=1 to rebuild it."
|
|
901
|
+
return 0
|
|
902
|
+
fi
|
|
903
|
+
if is_running; then
|
|
904
|
+
err "Emulator is already running; stop it first (stack emulator stop)."
|
|
905
|
+
exit 1
|
|
906
|
+
fi
|
|
907
|
+
|
|
908
|
+
# Start with a clean slate if we're force-recapturing; stale raw/zst would
|
|
909
|
+
# otherwise make snapshot_available() return true and flip QEMU into
|
|
910
|
+
# -incoming defer mode.
|
|
911
|
+
rm -f "$(savevm_path)" "$(savevm_raw_path)"
|
|
912
|
+
|
|
913
|
+
ensure_ports_free
|
|
914
|
+
mkdir -p "$RUN_DIR" "$VM_DIR"
|
|
915
|
+
# Regenerate runtime-config.iso with STACK_EMULATOR_VM_DIR_HOST empty —
|
|
916
|
+
# virtfs is detached in capture mode, so run-stack-container's
|
|
917
|
+
# `install internal-pck → /host/$VM_DIR_HOST/...` would fail and restart-loop
|
|
918
|
+
# stack.service. Mirrors build-image.sh's CI runtime.env shape.
|
|
919
|
+
rm -f "$(runtime_iso_path)"
|
|
920
|
+
write_runtime_config_iso ""
|
|
921
|
+
|
|
922
|
+
info "Cold-booting VM to capture local snapshot (one-time, ~1-3 min)..."
|
|
923
|
+
EMULATOR_CAPTURING_SNAPSHOT=1
|
|
924
|
+
start_vm
|
|
925
|
+
info "VM: 4096MB / 4 CPUs (pinned for snapshot compatibility)"
|
|
926
|
+
|
|
927
|
+
# Cold boot with snapshot-compatible layout drops virtfs, so stack.service
|
|
928
|
+
# starts without /host mounted — fine for capture; hostfs is hot-plugged on
|
|
929
|
+
# resume via qmp_hotplug_9p.
|
|
930
|
+
if ! wait_for_condition "all services" "$READY_TIMEOUT" all_ready; then
|
|
931
|
+
tail_vm_logs
|
|
932
|
+
stop_vm
|
|
933
|
+
err "Services did not come up; capture aborted."
|
|
934
|
+
exit 1
|
|
935
|
+
fi
|
|
936
|
+
|
|
937
|
+
local raw tmp_raw zst tmp_zst
|
|
938
|
+
raw="$(savevm_raw_path)"
|
|
939
|
+
tmp_raw="${raw}.capture.tmp"
|
|
940
|
+
zst="$(savevm_path)"
|
|
941
|
+
tmp_zst="${zst}.capture.tmp"
|
|
942
|
+
rm -f "$tmp_raw" "$tmp_zst"
|
|
943
|
+
|
|
944
|
+
log "Capturing VM state via QMP (mapped-ram + multifd)..."
|
|
945
|
+
if ! capture_vm_state "$VM_DIR/monitor.sock" "$tmp_raw"; then
|
|
946
|
+
err "QMP capture failed."
|
|
947
|
+
stop_vm
|
|
948
|
+
exit 1
|
|
949
|
+
fi
|
|
950
|
+
|
|
951
|
+
# capture_vm_state sent QMP quit; wait for QEMU to exit, then clean sockets.
|
|
952
|
+
local waited=0
|
|
953
|
+
while [ "$waited" -lt 30 ] && is_running; do
|
|
954
|
+
sleep 1
|
|
955
|
+
waited=$((waited + 1))
|
|
956
|
+
done
|
|
957
|
+
if is_running; then
|
|
958
|
+
warn "QEMU did not exit after QMP quit; forcing."
|
|
959
|
+
stop_vm
|
|
960
|
+
fi
|
|
961
|
+
rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock"
|
|
962
|
+
|
|
963
|
+
if [ ! -s "$tmp_raw" ]; then
|
|
964
|
+
err "Captured raw file is empty: $tmp_raw"
|
|
965
|
+
exit 1
|
|
966
|
+
fi
|
|
967
|
+
|
|
968
|
+
log "Compressing snapshot with zstd..."
|
|
969
|
+
zstd -1 -T0 -f -o "$tmp_zst" "$tmp_raw"
|
|
970
|
+
mv "$tmp_zst" "$zst"
|
|
971
|
+
# Keep the uncompressed file too — resume reads it directly via mapped-ram,
|
|
972
|
+
# and ensure_savevm_raw skips re-decompression when the raw's mtime >= zst's.
|
|
973
|
+
mv "$tmp_raw" "$raw"
|
|
974
|
+
touch -r "$zst" "$raw"
|
|
975
|
+
|
|
976
|
+
local size
|
|
977
|
+
size="$(du -h "$zst" | cut -f1)"
|
|
978
|
+
log "Snapshot captured: $zst (${size})"
|
|
979
|
+
}
|
|
980
|
+
|
|
338
981
|
STATUS_FAILED=0
|
|
339
982
|
|
|
340
983
|
print_service_status() {
|
|
@@ -382,12 +1025,12 @@ ACTION="start"
|
|
|
382
1025
|
|
|
383
1026
|
while [[ $# -gt 0 ]]; do
|
|
384
1027
|
case "$1" in
|
|
385
|
-
start|stop|reset|status|bench)
|
|
1028
|
+
start|stop|reset|status|bench|capture)
|
|
386
1029
|
ACTION="$1"
|
|
387
1030
|
shift
|
|
388
1031
|
;;
|
|
389
1032
|
*)
|
|
390
|
-
echo "Usage: $0 [start|stop|reset|status|bench]"
|
|
1033
|
+
echo "Usage: $0 [start|stop|reset|status|bench|capture]"
|
|
391
1034
|
exit 1
|
|
392
1035
|
;;
|
|
393
1036
|
esac
|
|
@@ -399,4 +1042,5 @@ case "$ACTION" in
|
|
|
399
1042
|
reset) cmd_reset ;;
|
|
400
1043
|
status) cmd_status ;;
|
|
401
1044
|
bench) cmd_bench ;;
|
|
1045
|
+
capture) cmd_capture ;;
|
|
402
1046
|
esac
|