fireclaw 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/vm-ctl CHANGED
@@ -11,6 +11,7 @@ usage() {
11
11
  Usage: $CMD_NAME <command> [instance]
12
12
 
13
13
  Commands:
14
+ doctor
14
15
  list
15
16
  status [id]
16
17
  start <id>
@@ -24,9 +25,10 @@ EOF
24
25
  }
25
26
 
26
27
  ssh_run() {
28
+ local id="$1"; shift
27
29
  local ip="$1"; shift
28
30
  local key="${SSH_KEY_PATH:-$HOME/.ssh/vmdemo_vm}"
29
- ssh -i "$key" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null "ubuntu@$ip" "$@"
31
+ ssh -i "$key" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile="$(ssh_known_hosts_file "$id")" "ubuntu@$ip" "$@"
30
32
  }
31
33
 
32
34
  _color() {
@@ -40,6 +42,15 @@ _color() {
40
42
  esac
41
43
  }
42
44
 
45
+ # printf pads by byte count, so colorize after padding to keep columns aligned.
46
+ _color_cell() {
47
+ local width="$1"
48
+ local val="$2"
49
+ local pad=$(( width - ${#val} ))
50
+ (( pad > 0 )) || pad=0
51
+ printf '%b%*s' "$(_color "$val")" "$pad" ""
52
+ }
53
+
43
54
  _print_status_table() {
44
55
  local -a ids=() ips=() ports=() vms=() proxies=() healths=()
45
56
  local id ip port vm proxy health
@@ -58,46 +69,63 @@ _print_status_table() {
58
69
  "--------" "----------" "-----" "------" "-------" "------"
59
70
 
60
71
  for i in "${!ids[@]}"; do
61
- printf "%-14s %-14s %-7s %-10b %-10b %-8b\n" \
72
+ printf "%-14s %-14s %-7s %s %s %s\n" \
62
73
  "${ids[$i]}" "${ips[$i]}" "${ports[$i]}" \
63
- "$(_color "${vms[$i]}")" "$(_color "${proxies[$i]}")" "$(_color "${healths[$i]}")"
74
+ "$(_color_cell 10 "${vms[$i]}")" "$(_color_cell 10 "${proxies[$i]}")" "$(_color_cell 8 "${healths[$i]}")"
64
75
  done
65
76
  }
66
77
 
67
78
  cmd_list() {
79
+ require_root
80
+ local nullglob_was_set=0
81
+ shopt -q nullglob && nullglob_was_set=1
68
82
  shopt -s nullglob
69
83
  local rows=()
70
84
  for d in "$STATE_ROOT"/.vm-*/; do
71
- local id
85
+ local id row
72
86
  id="$(basename "$d" | sed 's/^\.vm-//')"
73
87
  if [[ ! "$id" =~ ^[a-z0-9_-]+$ ]]; then
74
88
  warn "Skipping invalid instance state directory: $d"
75
89
  continue
76
90
  fi
77
- load_instance_env "$id"
78
- local ssh_key="${SSH_KEY_PATH:-$HOME/.ssh/vmdemo_vm}"
79
- local health="down"
80
- local host_health="down"
81
- local guest_health="down"
82
- curl -fsS "http://127.0.0.1:$HOST_PORT/health" >/dev/null 2>&1 && host_health="up"
83
- if check_guest_health "$id" "$VM_IP" "$ssh_key"; then
84
- guest_health="up"
85
- fi
86
- if [[ "$host_health" == "up" || "$guest_health" == "up" ]]; then
87
- health="up"
91
+ # Subshell so one corrupt .env degrades to an error row instead of
92
+ # killing the whole fleet view, and loaded values cannot leak across
93
+ # instances.
94
+ if row="$(
95
+ load_instance_env "$id" >/dev/null 2>&1 || exit 1
96
+ ssh_key="${SSH_KEY_PATH:-$HOME/.ssh/vmdemo_vm}"
97
+ health="down"
98
+ host_health="down"
99
+ guest_health="down"
100
+ curl -fsS "http://127.0.0.1:$HOST_PORT/health" >/dev/null 2>&1 && host_health="up"
101
+ if check_guest_health "$id" "$VM_IP" "$ssh_key"; then
102
+ guest_health="up"
103
+ fi
104
+ if [[ "$host_health" == "up" || "$guest_health" == "up" ]]; then
105
+ health="up"
106
+ fi
107
+ vm_state="$(systemctl is-active "$(vm_service "$id")" 2>/dev/null || true)"
108
+ proxy_state="$(systemctl is-active "$(proxy_service "$id")" 2>/dev/null || true)"
109
+ printf '%s|%s|%s|%s|%s|%s' "$id" "$VM_IP" "$HOST_PORT" "${vm_state:-inactive}" "${proxy_state:-inactive}" "$health"
110
+ )"; then
111
+ rows+=("$row")
112
+ else
113
+ warn "Unreadable instance state for '$id' (inspect: $d.env)"
114
+ rows+=("${id}|?|?|error|error|down")
88
115
  fi
89
- local vm_state proxy_state
90
- vm_state="$(systemctl is-active "$(vm_service "$id")" 2>/dev/null)" || vm_state="inactive"
91
- proxy_state="$(systemctl is-active "$(proxy_service "$id")" 2>/dev/null)" || proxy_state="inactive"
92
- rows+=("${id}|${VM_IP}|${HOST_PORT}|${vm_state}|${proxy_state}|${health}")
93
116
  done
94
- shopt -u nullglob
95
- printf '%s\n' "${rows[@]}" | _print_status_table
117
+ (( nullglob_was_set )) || shopt -u nullglob
118
+ if (( ${#rows[@]} == 0 )); then
119
+ _print_status_table < /dev/null
120
+ else
121
+ printf '%s\n' "${rows[@]}" | _print_status_table
122
+ fi
96
123
  }
97
124
 
98
125
  cmd_status_one() {
99
126
  local id="$1"
100
127
  validate_instance_id "$id"
128
+ require_root
101
129
  load_instance_env "$id"
102
130
  local ssh_key="${SSH_KEY_PATH:-$HOME/.ssh/vmdemo_vm}"
103
131
  local vm_state proxy_state health host_health guest_health guest
@@ -108,8 +136,8 @@ cmd_status_one() {
108
136
  guest_health="down"
109
137
  curl -fsS "http://127.0.0.1:$HOST_PORT/health" >/dev/null 2>&1 && host_health="up"
110
138
  guest="unknown"
111
- if wait_for_ssh "$VM_IP" "$ssh_key" 1; then
112
- guest="$(ssh_run "$VM_IP" "systemctl is-active openclaw-$id.service" 2>/dev/null)" || guest="unknown"
139
+ if ssh_reachable "$VM_IP" "$ssh_key" "$id"; then
140
+ guest="$(ssh_run "$id" "$VM_IP" "systemctl is-active openclaw-$id.service" 2>/dev/null)" || guest="unknown"
113
141
  if check_guest_health "$id" "$VM_IP" "$ssh_key"; then
114
142
  guest_health="up"
115
143
  fi
@@ -145,11 +173,16 @@ cmd_start() {
145
173
  load_instance_env "$id"
146
174
 
147
175
  systemctl enable --now "$(vm_service "$id")"
148
- wait_for_ssh "$VM_IP" "$SSH_KEY_PATH" 180 || die "VM started but SSH unreachable"
176
+ wait_for_ssh "$VM_IP" "$SSH_KEY_PATH" 180 "$id" || die "VM started but SSH unreachable"
149
177
 
150
- ssh_run "$VM_IP" "sudo systemctl enable --now openclaw-$id.service" || warn "Guest service start failed"
178
+ ssh_run "$id" "$VM_IP" "sudo systemctl enable --now openclaw-$id.service" || warn "Guest service start failed"
151
179
  systemctl enable --now "$(proxy_service "$id")"
152
180
 
181
+ if ! wait_for_instance_health "$id" "$VM_IP" "$HOST_PORT" "$SSH_KEY_PATH" 30; then
182
+ cmd_status_one "$id"
183
+ die "Health checks did not pass for $id after start"
184
+ fi
185
+
153
186
  cmd_status_one "$id"
154
187
  }
155
188
 
@@ -160,10 +193,15 @@ cmd_stop() {
160
193
  load_instance_env "$id"
161
194
 
162
195
  systemctl stop "$(proxy_service "$id")" 2>/dev/null || true
163
- if wait_for_ssh "$VM_IP" "$SSH_KEY_PATH" 1; then
164
- ssh_run "$VM_IP" "sudo systemctl stop openclaw-$id.service" || true
196
+ if ssh_reachable "$VM_IP" "$SSH_KEY_PATH" "$id"; then
197
+ ssh_run "$id" "$VM_IP" "sudo systemctl stop openclaw-$id.service" || true
198
+ else
199
+ warn "VM SSH unavailable; skipping guest service stop"
165
200
  fi
166
- systemctl stop "$(vm_service "$id")"
201
+ systemctl stop "$(vm_service "$id")" || warn "Failed to stop $(vm_service "$id")"
202
+ # Without disabling, a stopped instance silently resurrects on host reboot.
203
+ systemctl disable "$(proxy_service "$id")" 2>/dev/null || true
204
+ systemctl disable "$(vm_service "$id")" 2>/dev/null || true
167
205
  cmd_status_one "$id"
168
206
  }
169
207
 
@@ -176,43 +214,59 @@ cmd_logs() {
176
214
  local id="$1"
177
215
  local mode="${2:-guest}"
178
216
  validate_instance_id "$id"
217
+ require_root
179
218
  load_instance_env "$id"
219
+ [[ "$mode" == "guest" || "$mode" == "host" ]] || die "Usage: $CMD_NAME logs <id> [guest|host]"
180
220
 
181
221
  if [[ "$mode" == "host" ]]; then
182
222
  journalctl -u "$(vm_service "$id")" -u "$(proxy_service "$id")" -f
183
223
  return
184
224
  fi
185
225
 
186
- wait_for_ssh "$VM_IP" "$SSH_KEY_PATH" 30 || die "VM SSH unavailable"
187
- ssh_run "$VM_IP" "sudo journalctl -u openclaw-$id.service -f"
226
+ wait_for_ssh "$VM_IP" "$SSH_KEY_PATH" 30 "$id" || die "VM SSH unavailable"
227
+ ssh_run "$id" "$VM_IP" "sudo journalctl -u openclaw-$id.service -f"
188
228
  }
189
229
 
190
230
  cmd_shell() {
191
231
  local id="$1"
192
232
  shift || true
193
233
  validate_instance_id "$id"
234
+ require_root
194
235
  load_instance_env "$id"
195
- wait_for_ssh "$VM_IP" "$SSH_KEY_PATH" 30 || die "VM SSH unavailable"
236
+ wait_for_ssh "$VM_IP" "$SSH_KEY_PATH" 30 "$id" || die "VM SSH unavailable"
196
237
 
197
238
  if [[ $# -gt 0 ]]; then
198
- ssh_run "$VM_IP" "$*"
239
+ ssh_run "$id" "$VM_IP" "$*"
199
240
  else
200
- ssh -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null "ubuntu@$VM_IP"
241
+ ssh -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile="$(ssh_known_hosts_file "$id")" "ubuntu@$VM_IP"
201
242
  fi
202
243
  }
203
244
 
204
245
  cmd_token() {
205
246
  local id="$1"
206
247
  validate_instance_id "$id"
248
+ require_root
207
249
  cat "$(instance_token "$id")"
208
250
  }
209
251
 
210
252
  cmd_destroy() {
253
+ [[ $# -eq 1 || ( $# -eq 2 && "$2" == "--force" ) ]] || die "Usage: $CMD_NAME destroy <id> [--force]"
211
254
  local id="$1"
212
255
  local force="${2:-}"
213
256
  validate_instance_id "$id"
214
257
  require_root
215
- load_instance_env "$id"
258
+
259
+ local env_ok="true"
260
+ if ! (load_instance_env "$id") >/dev/null 2>&1; then
261
+ env_ok="false"
262
+ if [[ "$force" != "--force" ]]; then
263
+ die "Cannot read state for '$id'; use --force to remove its units and directories anyway"
264
+ fi
265
+ warn "State for '$id' is unreadable; best-effort cleanup of units and directories"
266
+ fi
267
+ if [[ "$env_ok" == "true" ]]; then
268
+ load_instance_env "$id"
269
+ fi
216
270
 
217
271
  if [[ "$force" != "--force" ]]; then
218
272
  read -r -p "Destroy '$id' and remove VM assets? [y/N] " confirm
@@ -227,27 +281,118 @@ cmd_destroy() {
227
281
  rm -f "/etc/systemd/system/$(proxy_service "$id")"
228
282
  rm -f "/etc/systemd/system/$(vm_service "$id")"
229
283
  systemctl daemon-reload
284
+ systemctl reset-failed "$(proxy_service "$id")" "$(vm_service "$id")" 2>/dev/null || true
285
+
286
+ if [[ "$env_ok" == "true" ]]; then
287
+ if [[ -n "${VM_TAP:-}" ]]; then
288
+ ip link set "$VM_TAP" down 2>/dev/null || true
289
+ ip link del "$VM_TAP" 2>/dev/null || true
290
+ fi
291
+ if [[ -n "${API_SOCK:-}" ]]; then
292
+ rm -f "$API_SOCK"
293
+ fi
294
+ fi
230
295
 
231
296
  rm -rf "$(instance_dir "$id")" "$(fc_instance_dir "$id")"
232
297
 
233
298
  echo "Destroyed: $id"
234
299
  }
235
300
 
301
+ cmd_doctor() {
302
+ local failures=0
303
+
304
+ _check() {
305
+ local label="$1"
306
+ local ok="$2"
307
+ local detail="${3:-}"
308
+ local green=$'\033[32m' red=$'\033[31m' yellow=$'\033[33m' reset=$'\033[0m'
309
+ if [[ "$ok" == "pass" ]]; then
310
+ printf '%s✓%s %s%s\n' "$green" "$reset" "$label" "${detail:+ ($detail)}"
311
+ elif [[ "$ok" == "skip" ]]; then
312
+ printf '%s-%s %s%s\n' "$yellow" "$reset" "$label" "${detail:+ ($detail)}"
313
+ else
314
+ printf '%s✗%s %s%s\n' "$red" "$reset" "$label" "${detail:+ ($detail)}"
315
+ failures=$((failures + 1))
316
+ fi
317
+ }
318
+
319
+ local c
320
+ for c in firecracker systemctl ip bridge iptables openssl jq cloud-localds ssh scp socat curl qemu-img install flock; do
321
+ if command -v "$c" >/dev/null 2>&1; then
322
+ _check "command: $c" pass
323
+ else
324
+ _check "command: $c" fail "not found on PATH"
325
+ fi
326
+ done
327
+
328
+ if [[ -e /dev/kvm ]]; then
329
+ if [[ -r /dev/kvm && -w /dev/kvm ]]; then
330
+ _check "/dev/kvm" pass
331
+ else
332
+ _check "/dev/kvm" fail "exists but not accessible by $(id -un)"
333
+ fi
334
+ else
335
+ _check "/dev/kvm" fail "missing (KVM required)"
336
+ fi
337
+
338
+ local img
339
+ for img in "${BASE_KERNEL:-${BASE_IMAGES_DIR:-/srv/firecracker/base/images}/vmlinux}" "${BASE_ROOTFS:-${BASE_IMAGES_DIR:-/srv/firecracker/base/images}/rootfs.ext4}"; do
340
+ if [[ -f "$img" ]]; then
341
+ _check "base image: $img" pass
342
+ else
343
+ _check "base image: $img" fail "missing"
344
+ fi
345
+ done
346
+
347
+ if ip link show "$BRIDGE_NAME" >/dev/null 2>&1; then
348
+ _check "bridge: $BRIDGE_NAME" pass "$(ip -4 -o addr show dev "$BRIDGE_NAME" | awk '{print $4}' | head -1)"
349
+ else
350
+ _check "bridge: $BRIDGE_NAME" skip "absent (setup creates it)"
351
+ fi
352
+
353
+ if [[ $EUID -eq 0 ]]; then
354
+ if iptables -t nat -C POSTROUTING -s "$SUBNET_CIDR" ! -o "$BRIDGE_NAME" -j MASQUERADE >/dev/null 2>&1; then
355
+ _check "NAT rule for $SUBNET_CIDR" pass
356
+ else
357
+ _check "NAT rule for $SUBNET_CIDR" skip "absent (setup adds it)"
358
+ fi
359
+ if [[ -d "$STATE_ROOT" && -w "$STATE_ROOT" ]]; then
360
+ _check "state root: $STATE_ROOT" pass
361
+ else
362
+ _check "state root: $STATE_ROOT" skip "absent (setup creates it)"
363
+ fi
364
+ else
365
+ _check "NAT rule / state root" skip "requires root"
366
+ fi
367
+
368
+ local mem_avail_mib disk_avail
369
+ mem_avail_mib="$(awk '/^MemAvailable:/ {print int($2/1024)}' /proc/meminfo 2>/dev/null || echo "?")"
370
+ disk_avail="$(df -h --output=avail "$FC_ROOT" 2>/dev/null | tail -1 | tr -d ' ' || echo "?")"
371
+ _check "capacity" pass "MemAvailable: ${mem_avail_mib} MiB, free on $FC_ROOT: ${disk_avail:-?}"
372
+
373
+ echo
374
+ if (( failures > 0 )); then
375
+ die "$failures check(s) failed"
376
+ fi
377
+ echo "All checks passed"
378
+ }
379
+
236
380
  [[ $# -ge 1 ]] || { usage; exit 1; }
237
381
 
238
382
  cmd="$1"
239
383
  shift || true
240
384
 
241
385
  case "$cmd" in
386
+ doctor) [[ $# -eq 0 ]] || die "Usage: $CMD_NAME doctor"; cmd_doctor ;;
242
387
  list) cmd_list ;;
243
- status) cmd_status "$@" ;;
388
+ status) [[ $# -le 1 ]] || die "Usage: $CMD_NAME status [id]"; cmd_status "$@" ;;
244
389
  start) [[ $# -eq 1 ]] || die "Usage: $CMD_NAME start <id>"; cmd_start "$1" ;;
245
390
  stop) [[ $# -eq 1 ]] || die "Usage: $CMD_NAME stop <id>"; cmd_stop "$1" ;;
246
391
  restart) [[ $# -eq 1 ]] || die "Usage: $CMD_NAME restart <id>"; cmd_restart "$1" ;;
247
- logs) [[ $# -ge 1 ]] || die "Usage: $CMD_NAME logs <id> [guest|host]"; cmd_logs "$@" ;;
392
+ logs) [[ $# -ge 1 && $# -le 2 ]] || die "Usage: $CMD_NAME logs <id> [guest|host]"; cmd_logs "$@" ;;
248
393
  shell) [[ $# -ge 1 ]] || die "Usage: $CMD_NAME shell <id> [command...]"; id="$1"; shift; cmd_shell "$id" "$@" ;;
249
394
  token) [[ $# -eq 1 ]] || die "Usage: $CMD_NAME token <id>"; cmd_token "$1" ;;
250
- destroy) [[ $# -ge 1 ]] || die "Usage: $CMD_NAME destroy <id> [--force]"; cmd_destroy "$@" ;;
395
+ destroy) cmd_destroy "$@" ;;
251
396
  -h|--help|help) usage ;;
252
397
  *) die "Unknown command: $cmd" ;;
253
398
  esac
package/bin/vm-provision CHANGED
@@ -4,51 +4,185 @@ set -euo pipefail
4
4
  SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "$0")")" && pwd)"
5
5
  source "$SCRIPT_DIR/vm-common.sh"
6
6
 
7
+ CMD_NAME="${VM_PROVISION_CMD_NAME:-fireclaw provision}"
8
+
7
9
  usage() {
8
10
  cat <<EOF
9
- Usage: fireclaw provision <instance>
11
+ Usage: $CMD_NAME <instance> [options]
12
+
13
+ Each option updates the saved instance config, then guest provisioning reruns
14
+ with the result. With no options, provisioning reruns with the saved config.
15
+
16
+ Options:
17
+ --telegram-token <token>
18
+ --no-telegram (clear the saved token; disables Telegram in the guest)
19
+ --telegram-users <csv>
20
+ --model <id>
21
+ --skills <csv>
22
+ --openclaw-image <image>
23
+ --anthropic-api-key <key>
24
+ --openai-api-key <key>
25
+ --minimax-api-key <key>
26
+ --skip-browser-install
27
+ --browser-install (re-enable browser install)
28
+ -h|--help
10
29
  EOF
11
30
  }
12
31
 
13
- [[ $# -eq 1 ]] || { usage; exit 1; }
32
+ csv_values() {
33
+ printf '%s' "$1" | tr ',' '\n' | sed 's/^[[:space:]]*//; s/[[:space:]]*$//; /^$/d'
34
+ }
35
+
36
+ validate_no_newline() {
37
+ local name="$1"
38
+ local value="$2"
39
+ [[ "$value" != *$'\n'* && "$value" != *$'\r'* ]] || die "$name must not contain newlines"
40
+ }
41
+
42
+ require_option_value() {
43
+ local opt="$1"
44
+ local value="${2-}"
45
+ [[ -n "$value" && "$value" != --* ]] || die "Missing value for $opt"
46
+ }
47
+
48
+ set_kv() {
49
+ local file="$1"
50
+ local key="$2"
51
+ local value="$3"
52
+ local tmp
53
+ validate_no_newline "key" "$key"
54
+ validate_no_newline "$key" "$value"
55
+ tmp="$(mktemp)"
56
+ awk -F= -v key="$key" '$1 != key { print }' "$file" > "$tmp"
57
+ printf "%s=%s\n" "$key" "$value" >> "$tmp"
58
+ install -m 600 "$tmp" "$file"
59
+ rm -f "$tmp"
60
+ }
61
+
62
+ saved_value() {
63
+ local file="$1"
64
+ local key="$2"
65
+ grep "^$key=" "$file" | tail -n 1 | cut -d= -f2- || true
66
+ }
67
+
68
+ if [[ "${1:-}" == "-h" || "${1:-}" == "--help" || "${1:-}" == "help" ]]; then
69
+ usage
70
+ exit 0
71
+ fi
72
+
73
+ [[ $# -ge 1 ]] || { usage; exit 1; }
14
74
 
15
75
  INSTANCE="$1"
76
+ shift
77
+
78
+ declare -A OVERRIDES=()
79
+
80
+ while [[ $# -gt 0 ]]; do
81
+ case "$1" in
82
+ --telegram-token) require_option_value "$1" "${2-}"; OVERRIDES[TELEGRAM_TOKEN]="$2"; shift 2 ;;
83
+ --no-telegram) OVERRIDES[TELEGRAM_TOKEN]=""; shift ;;
84
+ --telegram-users) require_option_value "$1" "${2-}"; OVERRIDES[TELEGRAM_USERS]="$2"; shift 2 ;;
85
+ --model) require_option_value "$1" "${2-}"; OVERRIDES[MODEL]="$2"; shift 2 ;;
86
+ --skills) require_option_value "$1" "${2-}"; OVERRIDES[SKILLS]="$2"; shift 2 ;;
87
+ --openclaw-image) require_option_value "$1" "${2-}"; OVERRIDES[OPENCLAW_IMAGE]="$2"; shift 2 ;;
88
+ --anthropic-api-key) require_option_value "$1" "${2-}"; OVERRIDES[ANTHROPIC_API_KEY]="$2"; shift 2 ;;
89
+ --openai-api-key) require_option_value "$1" "${2-}"; OVERRIDES[OPENAI_API_KEY]="$2"; shift 2 ;;
90
+ --minimax-api-key) require_option_value "$1" "${2-}"; OVERRIDES[MINIMAX_API_KEY]="$2"; shift 2 ;;
91
+ --skip-browser-install) OVERRIDES[SKIP_BROWSER_INSTALL]="true"; shift ;;
92
+ --browser-install) OVERRIDES[SKIP_BROWSER_INSTALL]="false"; shift ;;
93
+ *)
94
+ die "Unknown option: $1"
95
+ ;;
96
+ esac
97
+ done
98
+
16
99
  validate_instance_id "$INSTANCE"
17
100
  require_root
18
101
 
102
+ # load_instance_env resets the key variables from saved state, so remember
103
+ # what the caller passed in the environment before it runs.
104
+ ENV_ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}"
105
+ ENV_OPENAI_API_KEY="${OPENAI_API_KEY:-}"
106
+ ENV_MINIMAX_API_KEY="${MINIMAX_API_KEY:-}"
107
+
19
108
  load_instance_env "$INSTANCE"
20
109
 
110
+ ENV_FILE="$(instance_env "$INSTANCE")"
21
111
  PROVISION_VARS="$(instance_dir "$INSTANCE")/provision.vars"
22
112
  [[ -f "$PROVISION_VARS" ]] || die "Missing provision vars: $PROVISION_VARS"
23
113
  [[ -f "$REPO_ROOT/scripts/provision-guest.sh" ]] || die "Missing: $REPO_ROOT/scripts/provision-guest.sh"
24
114
 
25
- if ! wait_for_ssh "$VM_IP" "$SSH_KEY_PATH" 180; then
115
+ if [[ -n "${OVERRIDES[TELEGRAM_USERS]:-}" ]]; then
116
+ [[ -n "$(csv_values "${OVERRIDES[TELEGRAM_USERS]}")" ]] || die "--telegram-users must include at least one allowed Telegram user ID"
117
+ fi
118
+
119
+ effective() {
120
+ local key="$1"
121
+ local fallback="$2"
122
+ if [[ -v "OVERRIDES[$key]" ]]; then
123
+ printf '%s' "${OVERRIDES[$key]}"
124
+ else
125
+ printf '%s' "$fallback"
126
+ fi
127
+ }
128
+
129
+ # Keys passed via the environment fill in for empty saved values, matching
130
+ # the behavior the provider-key error message advertises.
131
+ if [[ -z "$(effective ANTHROPIC_API_KEY "${ANTHROPIC_API_KEY:-}")" && -n "$ENV_ANTHROPIC_API_KEY" ]]; then
132
+ OVERRIDES[ANTHROPIC_API_KEY]="$ENV_ANTHROPIC_API_KEY"
133
+ fi
134
+ if [[ -z "$(effective OPENAI_API_KEY "${OPENAI_API_KEY:-}")" && -n "$ENV_OPENAI_API_KEY" ]]; then
135
+ OVERRIDES[OPENAI_API_KEY]="$ENV_OPENAI_API_KEY"
136
+ fi
137
+ if [[ -z "$(effective MINIMAX_API_KEY "${MINIMAX_API_KEY:-}")" && -n "$ENV_MINIMAX_API_KEY" ]]; then
138
+ OVERRIDES[MINIMAX_API_KEY]="$ENV_MINIMAX_API_KEY"
139
+ fi
140
+
141
+ # Validate the post-override view BEFORE persisting anything, so a rejected
142
+ # run leaves the saved config exactly as it was.
143
+ EFFECTIVE_TELEGRAM_TOKEN="$(effective TELEGRAM_TOKEN "$(saved_value "$PROVISION_VARS" TELEGRAM_TOKEN)")"
144
+ EFFECTIVE_TELEGRAM_USERS="$(effective TELEGRAM_USERS "${TELEGRAM_USERS:-}")"
145
+ if [[ -n "$EFFECTIVE_TELEGRAM_TOKEN" ]]; then
146
+ if [[ -z "$(csv_values "$EFFECTIVE_TELEGRAM_USERS")" ]]; then
147
+ die "Telegram is enabled but TELEGRAM_USERS is empty; rerun with: sudo $CMD_NAME $INSTANCE --telegram-users <csv>"
148
+ fi
149
+ fi
150
+
151
+ EFFECTIVE_MODEL="$(effective MODEL "${MODEL:-}")"
152
+ ANTHROPIC_API_KEY="$(effective ANTHROPIC_API_KEY "${ANTHROPIC_API_KEY:-}")"
153
+ OPENAI_API_KEY="$(effective OPENAI_API_KEY "${OPENAI_API_KEY:-}")"
154
+ MINIMAX_API_KEY="$(effective MINIMAX_API_KEY "${MINIMAX_API_KEY:-}")"
155
+ require_model_provider_key "$EFFECTIVE_MODEL"
156
+
157
+ # TELEGRAM_TOKEN lives only in provision.vars; every other key is mirrored
158
+ # into the .env state file that load_instance_env reads.
159
+ for key in "${!OVERRIDES[@]}"; do
160
+ set_kv "$PROVISION_VARS" "$key" "${OVERRIDES[$key]}"
161
+ if [[ "$key" != "TELEGRAM_TOKEN" ]]; then
162
+ set_kv "$ENV_FILE" "$key" "${OVERRIDES[$key]}"
163
+ fi
164
+ done
165
+
166
+ if ! wait_for_ssh "$VM_IP" "$SSH_KEY_PATH" 180 "$INSTANCE"; then
26
167
  die "VM SSH unreachable. Start it first: fireclaw start $INSTANCE"
27
168
  fi
28
169
 
29
- scp -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null \
170
+ scp -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile="$(ssh_known_hosts_file "$INSTANCE")" \
30
171
  "$REPO_ROOT/scripts/provision-guest.sh" "ubuntu@$VM_IP:/tmp/provision-guest.sh"
31
- scp -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null \
172
+ scp -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile="$(ssh_known_hosts_file "$INSTANCE")" \
32
173
  "$PROVISION_VARS" "ubuntu@$VM_IP:/tmp/provision.vars"
33
174
 
34
- ssh -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null \
175
+ ssh -i "$SSH_KEY_PATH" -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile="$(ssh_known_hosts_file "$INSTANCE")" \
35
176
  "ubuntu@$VM_IP" "sudo bash /tmp/provision-guest.sh /tmp/provision.vars"
36
177
 
37
- systemctl enable --now "$(proxy_service "$INSTANCE")" >/dev/null 2>&1 || true
178
+ systemctl enable --now "$(proxy_service "$INSTANCE")"
38
179
 
39
- health_ok="false"
40
- for _ in {1..30}; do
41
- if curl -fsS "http://127.0.0.1:$HOST_PORT/health" >/dev/null 2>&1; then
42
- health_ok="true"
43
- break
44
- fi
45
- sleep 2
46
- done
180
+ if ! wait_for_instance_health "$INSTANCE" "$VM_IP" "$HOST_PORT" "$SSH_KEY_PATH" 30; then
181
+ die "Health checks did not pass for $INSTANCE after provisioning"
182
+ fi
47
183
 
48
184
  echo "✓ VM provisioning complete"
49
185
  echo " Instance: $INSTANCE"
50
186
  echo " VM IP: $VM_IP"
51
187
  echo " Port: $HOST_PORT"
52
- if [[ "$health_ok" != "true" ]]; then
53
- echo " Health: pending (service may still be warming up)"
54
- fi
188
+ echo " Health: up (guest + proxy)"