kento-core 1.6.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kento/create.py ADDED
@@ -0,0 +1,1205 @@
1
+ """Create an instance backed by an OCI image."""
2
+
3
+ import logging
4
+ import os
5
+ import shutil
6
+ import subprocess
7
+ from pathlib import Path
8
+
9
+ from kento import (LXC_BASE, VM_BASE, _scan_namespace, next_instance_name,
10
+ require_root, sanitize_image_name, upper_base, validate_name)
11
+ from kento.cloudinit import detect_cloudinit, write_seed
12
+ from kento.defaults import (LXC_TTY, LXC_MOUNT_AUTO, LXC_MOUNT_AUTO_NESTING,
13
+ LXC_ARG_DENYLIST, PVE_ARG_DENYLIST,
14
+ QEMU_ARG_DENYLIST)
15
+ from kento.errors import (InstanceExistsError, ModeError, StateError,
16
+ SubprocessError, ValidationError)
17
+ from kento.hook import write_hook
18
+ from kento.inject import write_inject
19
+ from kento.layers import resolve_layers
20
+ from kento.locking import kento_lock
21
+
22
+ logger = logging.getLogger("kento")
23
+
24
+
25
+ def _apparmor_active() -> bool:
26
+ """True if the kernel has AppArmor enabled as an active LSM.
27
+
28
+ Reads the canonical sysfs flag. Kept tiny so tests can monkeypatch it
29
+ rather than depending on the test host's real LSM state. A missing
30
+ module/file (OSError) means AppArmor is not in play → `generated` is a
31
+ harmless no-op, so we report False.
32
+ """
33
+ try:
34
+ return Path("/sys/module/apparmor/parameters/enabled").read_text().strip() == "Y"
35
+ except OSError:
36
+ return False
37
+
38
+
39
+ def _apparmor_parser_present() -> bool:
40
+ """True if `apparmor_parser` is on PATH (needed to load `generated`)."""
41
+ return shutil.which("apparmor_parser") is not None
42
+
43
+
44
+ def _perlayer_idmap_supported() -> bool:
45
+ """True iff this kernel supports per-layer idmap for overlayfs.
46
+
47
+ Tests the mainline (5.19+) path: idmap a lower directory via util-linux
48
+ 2.40+ ``X-mount.idmap``, then mount an overlayfs on top of the idmapped
49
+ lower with ``userxattr,index=off,metacopy=off``. Both mounts succeeding
50
+ means the per-layer idmap path is available (overlay-over-idmapped-lowers,
51
+ approach 3a). This is the path kento uses for ``--unprivileged``.
52
+
53
+ Returns False on ANY error or unsupported configuration and NEVER raises.
54
+ Returns False if not root (mount requires CAP_SYS_ADMIN). Returns True on
55
+ kernels >= 5.19 with util-linux >= 2.40.
56
+
57
+ This is a functional (not version-sniffing) probe — it flips to True
58
+ automatically once both capabilities are present with no kento change.
59
+ All mounts are unmounted and the temp directory is removed in ``finally``;
60
+ cleanup errors are ignored.
61
+ """
62
+ import tempfile
63
+ tmp = None
64
+ lower_id_mounted = False
65
+ overlay_mounted = False
66
+ try:
67
+ if os.geteuid() != 0:
68
+ return False
69
+ tmp = tempfile.mkdtemp(prefix="kento-idmap-probe-")
70
+ base = Path(tmp)
71
+ lower = base / "lower"
72
+ lower_id = base / "lower_id"
73
+ upper = base / "upper"
74
+ work = base / "work"
75
+ merged = base / "merged"
76
+ for d in (lower, lower_id, upper, work, merged):
77
+ d.mkdir()
78
+ (lower / "probe").write_text("x")
79
+
80
+ # Step 1: idmap the lower directory (per-layer bind with X-mount.idmap).
81
+ # util-linux >= 2.40 supports this option. On-disk uid 0 → presented
82
+ # as uid 100000 to the overlay (same mapping as lxc.idmap u 0 100000 65536).
83
+ r = subprocess.run(
84
+ ["mount", "--bind", "-o",
85
+ "X-mount.idmap=u:0:100000:1 g:0:100000:1",
86
+ str(lower), str(lower_id)],
87
+ capture_output=True, text=True,
88
+ )
89
+ if r.returncode != 0:
90
+ return False
91
+ lower_id_mounted = True
92
+
93
+ # Step 2: overlay over the idmapped lower (mainline kernel >= 5.19).
94
+ # userxattr avoids trusted.overlay.* xattr issues with podman layers;
95
+ # index=off and metacopy=off are required for this overlay configuration.
96
+ r = subprocess.run(
97
+ ["mount", "-t", "overlay", "overlay", "-o",
98
+ f"lowerdir={lower_id},upperdir={upper},workdir={work},"
99
+ "userxattr,index=off,metacopy=off",
100
+ str(merged)],
101
+ capture_output=True, text=True,
102
+ )
103
+ if r.returncode != 0:
104
+ return False
105
+ overlay_mounted = True
106
+ return True
107
+ except Exception: # noqa: BLE001 — probe must never raise
108
+ return False
109
+ finally:
110
+ try:
111
+ if overlay_mounted:
112
+ subprocess.run(["umount", str(merged)],
113
+ capture_output=True, text=True)
114
+ if lower_id_mounted:
115
+ subprocess.run(["umount", str(lower_id)],
116
+ capture_output=True, text=True)
117
+ if tmp is not None:
118
+ shutil.rmtree(tmp, ignore_errors=True)
119
+ except Exception: # noqa: BLE001 — best-effort cleanup
120
+ pass
121
+
122
+
123
+ # PVE's unprivileged container default idmap range. A fresh unprivileged
124
+ # container maps container uid/gid 0 onto host 100000 for 65536 ids unless the
125
+ # admin configures a custom range. kento doesn't expose custom ranges yet, so
126
+ # this default is the normal case — but _pve_idmap_range() honours a custom
127
+ # `lxc.idmap = u 0 B C` line if one is present (e.g. via --pve-arg) so the state
128
+ # file always records the EFFECTIVE range.
129
+ _PVE_DEFAULT_IDMAP_BASE = 100000
130
+ _PVE_DEFAULT_IDMAP_COUNT = 65536
131
+
132
+
133
+ def _pve_idmap_range(pve_conf_text: str) -> tuple[int, int]:
134
+ """Return (BASE, COUNT) for the unprivileged idmap, following PVE.
135
+
136
+ If ``pve_conf_text`` carries a custom ``lxc.idmap = u 0 <B> <C>`` line, use
137
+ its BASE/COUNT; otherwise fall back to PVE's unprivileged default
138
+ (100000/65536). Matches the uid (``u``) mapping for container id 0 — the
139
+ range kento's hook idmaps every lowerdir with.
140
+
141
+ The state file written from this lets the hook resolve the range
142
+ independently of WHEN PVE populates lxc.idmap into the runtime config.
143
+ """
144
+ import re
145
+ pat = re.compile(r"^\s*lxc\.idmap\s*=\s*u\s+0\s+(\d+)\s+(\d+)\s*$")
146
+ for line in pve_conf_text.splitlines():
147
+ m = pat.match(line)
148
+ if m:
149
+ return int(m.group(1)), int(m.group(2))
150
+ return _PVE_DEFAULT_IDMAP_BASE, _PVE_DEFAULT_IDMAP_COUNT
151
+
152
+
153
+ def _run_start_or_rollback(cmd: list[str], *, name: str, scope: str) -> None:
154
+ """Run a start command inside create()'s try block.
155
+
156
+ On failure, raises RuntimeError instead of letting CalledProcessError
157
+ propagate. The surrounding try/except in create() catches it and runs
158
+ the rollback undos (which include the matching stop). We *don't* use
159
+ run_or_die here because run_or_die raises SubprocessError which would
160
+ also trigger the rollback but with a duplicate error message from the
161
+ `Error during create:` line — explicit RuntimeError keeps a single
162
+ clear message.
163
+ """
164
+ try:
165
+ result = subprocess.run(cmd, capture_output=True, text=True)
166
+ except FileNotFoundError as e:
167
+ raise RuntimeError(
168
+ f"failed to start {name}: '{e.filename}' not found on PATH. "
169
+ f"Instance created; run 'kento {scope} start {name}' to retry or "
170
+ f"'kento {scope} destroy {name}' to remove."
171
+ ) from e
172
+ if result.returncode != 0:
173
+ err = (result.stderr or "").strip() or f"(exit {result.returncode})"
174
+ raise RuntimeError(
175
+ f"failed to start {name}: {err}. "
176
+ f"Instance created; run 'kento {scope} start {name}' to retry or "
177
+ f"'kento {scope} destroy {name}' to remove."
178
+ )
179
+
180
+
181
+ def _validate_qemu_args(qemu_args: list[str]) -> None:
182
+ """Reject --qemu-arg values that clash with kento-managed QEMU flags.
183
+
184
+ See QEMU_ARG_DENYLIST for the reserved substrings. Any match kills the
185
+ create with an actionable error; the whole point of pass-through is to
186
+ be an escape hatch, so the denylist is deliberately short.
187
+ """
188
+ for arg in qemu_args:
189
+ for needle in QEMU_ARG_DENYLIST:
190
+ if needle in arg:
191
+ raise ValidationError(
192
+ f"kento manages {needle!r} directly — "
193
+ f"--qemu-arg {arg!r} would collide with kento's own "
194
+ "QEMU argv. Drop the flag or file an issue if you "
195
+ "need it overridable."
196
+ )
197
+
198
+
199
+ def _validate_pve_args(pve_args: list[str]) -> None:
200
+ """Reject --pve-arg values that duplicate kento-managed PVE config keys.
201
+
202
+ See PVE_ARG_DENYLIST. Same escape-hatch reasoning as qemu-arg.
203
+ """
204
+ for arg in pve_args:
205
+ for needle in PVE_ARG_DENYLIST:
206
+ if needle in arg:
207
+ raise ValidationError(
208
+ f"kento manages {needle!r} directly — "
209
+ f"--pve-arg {arg!r} would collide with kento's own "
210
+ "PVE config. Drop the flag or file an issue if you "
211
+ "need it overridable."
212
+ )
213
+
214
+
215
+ def _validate_lxc_args(lxc_args: list[str]) -> None:
216
+ """Reject --lxc-arg values that duplicate kento-managed plain-LXC keys.
217
+
218
+ See LXC_ARG_DENYLIST. Same escape-hatch reasoning as qemu-arg/pve-arg:
219
+ the denylist names only the structural keys generate_config() emits (plus
220
+ the cgroup lines `kento set` manages); everything else is user-authored
221
+ and passed through verbatim.
222
+ """
223
+ for arg in lxc_args:
224
+ for needle in LXC_ARG_DENYLIST:
225
+ if needle in arg:
226
+ raise ValidationError(
227
+ f"kento manages {needle!r} directly — "
228
+ f"--lxc-arg {arg!r} would collide with kento's own "
229
+ "plain-LXC config. Drop the flag or file an issue if "
230
+ "you need it overridable."
231
+ )
232
+
233
+
234
+ def _validate_env(env: list[str]) -> None:
235
+ """Reject --env entries that aren't clean KEY=VALUE pairs.
236
+
237
+ Each entry is written verbatim into three places: the cloud-init
238
+ user-data ``content: |`` block scalar (cloudinit.py), /etc/environment,
239
+ and ``lxc.environment = <e>``. An embedded newline (or other control
240
+ char) would terminate the YAML block scalar early and silently drop
241
+ later directives (ssh keys etc.), and corrupt the other targets too. The
242
+ help text promises KEY=VALUE, so enforce it before any state is written:
243
+ the key must be a valid shell-ish identifier, there must be an ``=``, and
244
+ no control characters (including newline/tab/CR) may appear anywhere.
245
+ """
246
+ import re
247
+ key_re = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
248
+ for e in env:
249
+ if any(ord(c) < 0x20 or ord(c) == 0x7f for c in e):
250
+ raise ValidationError(
251
+ f"--env value contains a control character: {e!r}. "
252
+ "Each --env must be a single-line KEY=VALUE pair."
253
+ )
254
+ if "=" not in e:
255
+ raise ValidationError(
256
+ f"--env value is not KEY=VALUE (missing '='): {e!r}."
257
+ )
258
+ key = e.split("=", 1)[0]
259
+ if not key_re.match(key):
260
+ raise ValidationError(
261
+ f"--env key {key!r} is invalid in {e!r}; keys must "
262
+ "match [A-Za-z_][A-Za-z0-9_]*."
263
+ )
264
+
265
+
266
+ def _run_cleanup(undos: list[tuple[str, object]]) -> None:
267
+ """Run cleanup callables in reverse order. Best-effort — log and continue on errors.
268
+
269
+ Each entry is ``(label, callable)``. The callable takes no args and its
270
+ return value is ignored. Exceptions are caught so one cleanup failure
271
+ doesn't mask the others (or the original failure).
272
+ """
273
+ while undos:
274
+ label, undo = undos.pop()
275
+ try:
276
+ undo()
277
+ except Exception as cleanup_err: # noqa: BLE001 — best-effort cleanup
278
+ logger.warning("rollback step %r failed: %s", label, cleanup_err)
279
+
280
+
281
+ def generate_config(name: str, lxc_dir: Path, *, bridge: str | None = None,
282
+ net_type: str | None = None,
283
+ nesting: bool = False,
284
+ ip: str | None = None, gateway: str | None = None,
285
+ env: list[str] | None = None,
286
+ port: str | None = None,
287
+ memory: int | None = None,
288
+ cores: int | None = None,
289
+ unprivileged: bool = False,
290
+ mode: str = "lxc") -> str:
291
+ hook = lxc_dir / "kento-hook"
292
+ lines = [
293
+ f"lxc.uts.name = {name}",
294
+ f"lxc.rootfs.path = dir:{lxc_dir}/rootfs",
295
+ "",
296
+ "lxc.hook.version = 1",
297
+ f"lxc.hook.pre-start = {hook}",
298
+ f"lxc.hook.post-stop = {hook}",
299
+ ]
300
+ if port is not None:
301
+ lines.append(f"lxc.hook.start-host = {hook}")
302
+ # Network config based on net_type
303
+ if net_type == "bridge" and bridge:
304
+ lines += [
305
+ "",
306
+ "lxc.net.0.type = veth",
307
+ f"lxc.net.0.link = {bridge}",
308
+ "lxc.net.0.flags = up",
309
+ ]
310
+ if ip:
311
+ lines.append(f"lxc.net.0.ipv4.address = {ip}")
312
+ if gateway:
313
+ lines.append(f"lxc.net.0.ipv4.gateway = {gateway}")
314
+ elif net_type == "host":
315
+ lines += [
316
+ "",
317
+ "lxc.net.0.type = none", # shares host network
318
+ ]
319
+ elif bridge: # backward compat: bridge passed without net_type
320
+ lines += [
321
+ "",
322
+ "lxc.net.0.type = veth",
323
+ f"lxc.net.0.link = {bridge}",
324
+ "lxc.net.0.flags = up",
325
+ ]
326
+ if ip:
327
+ lines.append(f"lxc.net.0.ipv4.address = {ip}")
328
+ if gateway:
329
+ lines.append(f"lxc.net.0.ipv4.gateway = {gateway}")
330
+ # net_type == "none" or net_type is None with no bridge: no network lines
331
+ mount_auto = LXC_MOUNT_AUTO_NESTING if nesting else LXC_MOUNT_AUTO
332
+ lines += [
333
+ "",
334
+ f"lxc.mount.auto = {mount_auto}",
335
+ f"lxc.tty.max = {LXC_TTY}",
336
+ ]
337
+
338
+ # Plain-LXC on modern OCI images (systemd 256+) needs AppArmor profile=generated.
339
+ # The stock lxc-container-default-with-nesting profile blocks the credentials
340
+ # tmpfs mount used by ImportCredential= directives, making systemd-journald,
341
+ # systemd-networkd, systemd-tmpfiles-setup fail with status=243/CREDENTIALS.
342
+ # profile=generated is a built-in LXC feature (not PVE-specific): LXC builds a
343
+ # per-container profile that enforces the host/container boundary but labels
344
+ # in-container processes :unconfined, so PAM helpers (unix_chkpwd) and other
345
+ # setuid binaries still load glibc RELRO correctly. PVE-LXC takes the same
346
+ # approach via pct's config.
347
+ #
348
+ # Escape hatch: KENTO_APPARMOR_PROFILE env var overrides the default. Set it
349
+ # to "unconfined" when running kento inside an outer LXC (nested scenario) —
350
+ # apparmor_parser calls needed to load `generated` are blocked in that case,
351
+ # and `unconfined` is safe because the outer profile still enforces the
352
+ # host/container boundary. Accepts only "generated" or "unconfined".
353
+ #
354
+ # common.conf must be included BEFORE nesting.conf so apparmor.profile ends up
355
+ # set AFTER both includes (otherwise nesting.conf would override it).
356
+ if mode == "lxc":
357
+ lines.append("lxc.include = /usr/share/lxc/config/common.conf")
358
+ if nesting:
359
+ lines.append("lxc.include = /usr/share/lxc/config/nesting.conf")
360
+ lines.append("lxc.mount.entry = /dev/fuse dev/fuse none bind,create=file,optional 0 0")
361
+ lines.append("lxc.mount.entry = /dev/net/tun dev/net/tun none bind,create=file,optional 0 0")
362
+ if mode == "lxc":
363
+ profile = os.environ.get("KENTO_APPARMOR_PROFILE", "generated")
364
+ if profile not in ("generated", "unconfined"):
365
+ raise ValidationError(
366
+ f"KENTO_APPARMOR_PROFILE must be 'generated' or "
367
+ f"'unconfined', got {profile!r}"
368
+ )
369
+ # Fail-closed pre-flight: `generated` is loaded by apparmor_parser at
370
+ # lxc-start time on a host whose kernel has AppArmor active as an LSM.
371
+ # If the parser is absent the container HARD-FAILS at start ("Cannot
372
+ # use generated profile: apparmor_parser not available") — it does not
373
+ # degrade. Catch it here (config-gen time) with an actionable message
374
+ # rather than writing a doomed config that fails confusingly later.
375
+ # Only `generated` needs the parser; explicit `unconfined` is fine.
376
+ if (profile == "generated" and _apparmor_active()
377
+ and not _apparmor_parser_present()):
378
+ raise StateError(
379
+ "AppArmor is active in this kernel but 'apparmor_parser' is not\n"
380
+ "installed, so LXC's default 'generated' profile cannot be loaded and the\n"
381
+ "instance would fail to start. Fix one of:\n"
382
+ " - install the 'apparmor' package (provides apparmor_parser), or\n"
383
+ " - set KENTO_APPARMOR_PROFILE=unconfined (namespaces/cgroups still\n"
384
+ " enforce the host/container boundary; in-kernel MAC confinement off)."
385
+ )
386
+ lines.append(f"lxc.apparmor.profile = {profile}")
387
+ lines.append("lxc.apparmor.allow_nesting = 1")
388
+ lines.append("lxc.apparmor.allow_incomplete = 1")
389
+ # Unprivileged plain-LXC: map container UID/GID 0 onto an unprivileged
390
+ # host range (100000:65536). The actual rootfs idmap is done per-layer
391
+ # by kento's hook (kento-hook): for each lowerdir, the hook creates an
392
+ # idmapped bind mount (X-mount.idmap=u:0:100000:65536 g:0:100000:65536)
393
+ # and overlays over the idmapped lowers with userxattr,index=off,
394
+ # metacopy=off. This uses the mainline kernel >= 5.19 per-layer idmap
395
+ # path (overlay-over-idmapped-lowers), NOT the non-mainline merged-
396
+ # overlay idmap (lxc.rootfs.options=idmap=container which requires an
397
+ # unmerged kernel patch). create() gates this behind _perlayer_idmap_supported().
398
+ if unprivileged:
399
+ lines.append("lxc.idmap = u 0 100000 65536")
400
+ lines.append("lxc.idmap = g 0 100000 65536")
401
+ if env:
402
+ for e in env:
403
+ lines.append(f"lxc.environment = {e}")
404
+ if memory is not None:
405
+ lines.append(f"lxc.cgroup2.memory.max = {memory * 1048576}")
406
+ if cores is not None:
407
+ lines.append(f"lxc.cgroup2.cpu.max = {cores * 100000} 100000")
408
+
409
+ # Pass-through lines (E1b): each non-empty line in kento-lxc-args is
410
+ # appended verbatim AFTER kento's own lines. LXC's config parser is
411
+ # last-value-wins, so appending lets the user override non-structural
412
+ # defaults. The LXC_ARG_DENYLIST (checked in create.py / set_cmd.py)
413
+ # already rejected the structural collisions.
414
+ from kento.pve import _read_passthrough_lines
415
+ lines.extend(_read_passthrough_lines(lxc_dir / "kento-lxc-args"))
416
+
417
+ return "\n".join(lines) + "\n"
418
+
419
+
420
+ def _inject_network_config(state_dir: Path, ip: str,
421
+ gateway: str | None = None,
422
+ dns: str | None = None,
423
+ searchdomain: str | None = None,
424
+ mode: str = "lxc") -> None:
425
+ """Write 05-kento-static.network into the overlayfs upper layer.
426
+
427
+ The 05- prefix sorts before any image-baked drop-in (e.g. a generic
428
+ Kind=veth Unmanaged=yes unit). In pve-lxc the guest eth0 presents
429
+ Kind=veth, so such a unit would otherwise match and win; the 05- prefix
430
+ makes kento's per-instance config authoritative.
431
+ """
432
+ # VM modes use predictable naming (e.g. enp0s2), so match by type.
433
+ # LXC/PVE modes always have eth0 (configured by LXC veth).
434
+ match_line = "Type=ether" if mode in ("vm", "pve-vm") else "Name=eth0"
435
+ lines = [
436
+ "[Match]",
437
+ match_line,
438
+ "",
439
+ "[Network]",
440
+ f"Address={ip}",
441
+ ]
442
+ if gateway:
443
+ lines.append(f"Gateway={gateway}")
444
+ if dns:
445
+ lines.append(f"DNS={dns}")
446
+ if searchdomain:
447
+ lines.append(f"Domains={searchdomain}")
448
+ lines.append("")
449
+
450
+ net_dir = state_dir / "upper" / "etc" / "systemd" / "network"
451
+ net_dir.mkdir(parents=True, exist_ok=True)
452
+ (net_dir / "05-kento-static.network").write_text("\n".join(lines))
453
+
454
+
455
+ def _inject_hostname(state_dir: Path, hostname: str) -> None:
456
+ """Write /etc/hostname into the overlayfs upper layer."""
457
+ etc = state_dir / "upper" / "etc"
458
+ etc.mkdir(parents=True, exist_ok=True)
459
+ (etc / "hostname").write_text(hostname + "\n")
460
+
461
+
462
+ def _inject_timezone(state_dir: Path, timezone: str) -> None:
463
+ """Write timezone config into the overlayfs upper layer."""
464
+ etc = state_dir / "upper" / "etc"
465
+ etc.mkdir(parents=True, exist_ok=True)
466
+ localtime = etc / "localtime"
467
+ localtime.unlink(missing_ok=True)
468
+ localtime.symlink_to(f"/usr/share/zoneinfo/{timezone}")
469
+ (etc / "timezone").write_text(timezone + "\n")
470
+
471
+
472
+ def _inject_env(state_dir: Path, env_list: list[str]) -> None:
473
+ """Write /etc/environment into the overlayfs upper layer."""
474
+ etc = state_dir / "upper" / "etc"
475
+ etc.mkdir(parents=True, exist_ok=True)
476
+ (etc / "environment").write_text("\n".join(env_list) + "\n")
477
+
478
+
479
+ def _generate_ssh_host_keys(dest_dir: Path) -> None:
480
+ """Generate SSH host key pairs (rsa, ecdsa, ed25519) in dest_dir."""
481
+ dest_dir.mkdir(parents=True, exist_ok=True)
482
+ for key_type, extra_args in [("rsa", ["-b", "4096"]), ("ecdsa", []), ("ed25519", [])]:
483
+ key_path = dest_dir / f"ssh_host_{key_type}_key"
484
+ cmd = ["ssh-keygen", "-t", key_type] + extra_args + ["-f", str(key_path), "-N", ""]
485
+ try:
486
+ subprocess.run(cmd, check=True, capture_output=True)
487
+ except FileNotFoundError:
488
+ raise SubprocessError(
489
+ "ssh-keygen not found. Install openssh-client to use --ssh-host-keys.",
490
+ cmd=cmd,
491
+ )
492
+ except subprocess.CalledProcessError as e:
493
+ stderr = (e.stderr or b"").decode("utf-8", "replace").strip()
494
+ raise SubprocessError(
495
+ f"ssh-keygen failed for {key_type} host key: {stderr}",
496
+ cmd=cmd,
497
+ returncode=e.returncode,
498
+ )
499
+
500
+
501
+ def _copy_ssh_host_keys(src_dir: Path, dest_dir: Path) -> None:
502
+ """Copy ssh_host_* files from src_dir into dest_dir."""
503
+ dest_dir.mkdir(parents=True, exist_ok=True)
504
+ for f in sorted(src_dir.iterdir()):
505
+ if f.name.startswith("ssh_host_") and f.is_file():
506
+ shutil.copy2(f, dest_dir / f.name)
507
+
508
+
509
+ def create(image: str, *, name: str | None = None, bridge: str | None = None,
510
+ nesting: bool = False,
511
+ start: bool = False, mode: str,
512
+ pve: bool | None = None,
513
+ vmid: int = 0, memory: int | None = None, cores: int | None = None,
514
+ port: str | None = None,
515
+ ip: str | None = None, gateway: str | None = None,
516
+ dns: str | None = None, searchdomain: str | None = None,
517
+ timezone: str | None = None,
518
+ env: list[str] | None = None,
519
+ ssh_keys: list[str] | None = None,
520
+ ssh_key_user: str = "root",
521
+ ssh_host_keys: bool = False,
522
+ ssh_host_key_dir: str | None = None,
523
+ mac: str | None = None,
524
+ config_mode: str = "auto",
525
+ qemu_args: list[str] | None = None,
526
+ pve_args: list[str] | None = None,
527
+ lxc_args: list[str] | None = None,
528
+ net_type: str | None = None,
529
+ unprivileged: bool = False,
530
+ force: bool = False) -> None:
531
+ require_root()
532
+
533
+ # Validate pass-through denylists before any state mutation. Failures
534
+ # here are pure user-input errors; fail fast with a clear pointer at
535
+ # the offending value.
536
+ if qemu_args:
537
+ _validate_qemu_args(qemu_args)
538
+ if pve_args:
539
+ _validate_pve_args(pve_args)
540
+ if lxc_args:
541
+ _validate_lxc_args(lxc_args)
542
+ # Validate --env shape BEFORE any seed/config/env-file is written. A bad
543
+ # entry (embedded newline, missing '=', bad key) would otherwise corrupt
544
+ # the cloud-init YAML block scalar / /etc/environment / lxc.environment.
545
+ if env:
546
+ _validate_env(env)
547
+
548
+ # Validate and read SSH key files early (before any filesystem changes)
549
+ ssh_key_contents: str | None = None
550
+ if ssh_keys:
551
+ parts = []
552
+ for key_path in ssh_keys:
553
+ p = Path(key_path)
554
+ if not p.is_file():
555
+ raise ValidationError(f"SSH key file not found: {key_path}")
556
+ parts.append(p.read_text())
557
+ ssh_key_contents = "\n".join(parts)
558
+ if not ssh_key_contents.endswith("\n"):
559
+ ssh_key_contents += "\n"
560
+
561
+ # Validate --ssh-host-key-dir early
562
+ if ssh_host_key_dir is not None:
563
+ src = Path(ssh_host_key_dir)
564
+ if not src.is_dir():
565
+ raise ValidationError(
566
+ f"SSH host key directory not found: {ssh_host_key_dir}"
567
+ )
568
+ has_key = any(f.name.startswith("ssh_host_") and f.name.endswith("_key")
569
+ and f.is_file() for f in src.iterdir())
570
+ if not has_key:
571
+ raise ValidationError(
572
+ f"no ssh_host_*_key files found in {ssh_host_key_dir}"
573
+ )
574
+
575
+ # Resolve PVE promotion
576
+ from kento.pve import is_pve
577
+ if pve is True:
578
+ if not is_pve():
579
+ raise ModeError("--pve specified but this is not a PVE host")
580
+ if mode == "vm":
581
+ mode = "pve-vm"
582
+ else:
583
+ mode = "pve"
584
+ elif pve is False:
585
+ pass
586
+ else:
587
+ if is_pve():
588
+ if mode == "vm":
589
+ mode = "pve-vm"
590
+ else:
591
+ mode = "pve"
592
+
593
+ # --lxc-arg targets plain-LXC's native config ONLY. On a PVE host the
594
+ # LXC config IS the PVE .conf (which carries raw lxc.* lines via
595
+ # --pve-arg), and VM modes have no native LXC config at all. Reject here
596
+ # — after PVE promotion so `mode` is the resolved one — rather than
597
+ # silently writing kento-lxc-args that nothing would ever consume.
598
+ if lxc_args:
599
+ if mode == "pve":
600
+ raise ModeError(
601
+ "--lxc-arg is not supported on a PVE host. On PVE "
602
+ "the LXC config is the PVE config; use --pve-arg, which "
603
+ "carries raw lxc.* lines."
604
+ )
605
+ if mode in ("vm", "pve-vm"):
606
+ raise ModeError(
607
+ "--lxc-arg is not applicable to VM modes (no native "
608
+ "LXC config)."
609
+ )
610
+
611
+ # --unprivileged: validate mode + run the fail-closed per-layer idmap probe
612
+ # BEFORE any filesystem mutation (mirrors the apparmor pre-flight's
613
+ # fail-closed placement). `mode` is the PVE-resolved mode here.
614
+ # Supported: lxc (plain) and pve (pve-lxc). Not supported: vm/pve-vm.
615
+ if unprivileged:
616
+ if mode in ("vm", "pve-vm"):
617
+ raise ModeError(
618
+ "--unprivileged applies to LXC modes only (VMs have their "
619
+ "own isolation)."
620
+ )
621
+ # For both lxc and pve (pve-lxc): probe the per-layer idmap capability
622
+ # (kernel >= 5.19 overlay-over-idmapped-lowers + util-linux >= 2.40
623
+ # X-mount.idmap). The hook idmaps each lowerdir; no merged-overlay
624
+ # idmap (lxc.rootfs.options=idmap=container) is required or emitted.
625
+ if not _perlayer_idmap_supported():
626
+ raise StateError(
627
+ "--unprivileged requires per-layer idmap support:\n"
628
+ " - kernel >= 5.19 (overlay-over-idmapped-lowers, mainline)\n"
629
+ " - util-linux >= 2.40 (X-mount.idmap mount option)\n"
630
+ "This kernel or util-linux version does not meet the requirement.\n"
631
+ "Create without --unprivileged to use the default (privileged) mode."
632
+ )
633
+
634
+ # Pre-validate PVE snippets storage (before any filesystem writes).
635
+ # pve-vm always needs it; pve-lxc needs it when port/memory/cores set
636
+ # (PVE strips lxc.hook.start-host, so we use hookscript: instead).
637
+ _snippets_info = None
638
+ if mode == "pve-vm":
639
+ from kento.vm_hook import find_snippets_dir
640
+ _snippets_info = find_snippets_dir()
641
+ elif mode == "pve" and (port is not None or memory is not None
642
+ or cores is not None):
643
+ from kento.vm_hook import find_snippets_dir
644
+ _snippets_info = find_snippets_dir()
645
+
646
+ # Resolve network configuration
647
+ from kento import resolve_network
648
+ network = resolve_network(net_type, bridge, mode, port)
649
+ bridge = network["bridge"]
650
+ port = network["port"]
651
+
652
+ # Plain VM mode has no tap/bridge wiring in start_vm (QEMU would need a
653
+ # tap device; only -netdev user is implemented). Reject bridge networking
654
+ # up front so the VM doesn't boot with zero NICs and no warning.
655
+ if mode == "vm" and network["type"] == "bridge":
656
+ raise ModeError(
657
+ "plain VM mode does not support bridge networking.\n"
658
+ " Use --network usermode (default) for outbound access and\n"
659
+ " port forwarding via --port, or run on a PVE host for\n"
660
+ " bridged VMs."
661
+ )
662
+
663
+ # Determine base directory for this mode
664
+ base_dir = VM_BASE if mode in ("vm", "pve-vm") else LXC_BASE
665
+
666
+ # Validate mode-specific flags (pure validation — no state mutation, so
667
+ # no need to hold the lock around these).
668
+ if vmid and mode not in ("pve", "pve-vm"):
669
+ raise ModeError(f"--vmid cannot be used with {mode.upper()} mode")
670
+ if port is not None and mode in ("lxc", "pve"):
671
+ if network["type"] != "bridge":
672
+ raise ValidationError(
673
+ "--port requires bridge networking for LXC/PVE mode"
674
+ )
675
+ if port is not None and mode in ("vm", "pve-vm"):
676
+ if network["type"] == "bridge":
677
+ raise ValidationError(
678
+ "--port cannot be used with bridge networking in VM mode"
679
+ )
680
+ if gateway and not ip:
681
+ raise ValidationError("--gateway requires --ip")
682
+ # F10: --ip / --gateway only make sense with bridge networking. Silent
683
+ # acceptance with usermode/host/none produced broken configs: usermode
684
+ # gets a conflicting DHCP lease from QEMU's built-in 10.0.2.x while
685
+ # systemd-networkd fights for the static address; none/host have no
686
+ # interface for the address to bind to.
687
+ if network["type"] in ("none", "host", "usermode"):
688
+ if ip:
689
+ raise ValidationError(
690
+ f"--ip requires bridge networking; got --network {network['type']}.\n"
691
+ " Use --network bridge (or bridge=<name>) for a static IP, "
692
+ "or remove --ip."
693
+ )
694
+ if gateway:
695
+ raise ValidationError(
696
+ f"--gateway requires bridge networking; got --network {network['type']}."
697
+ )
698
+
699
+ # Resolve layers (validates image exists). Done BEFORE the lock and before
700
+ # any directory is created: image resolution depends only on ``image``, not
701
+ # on name/vmid/container_dir, so a missing image fails here with ZERO
702
+ # filesystem side effects (no orphan instance dir left behind — F2). Staying
703
+ # outside the lock also avoids serializing image pulls across concurrent
704
+ # creates. resolve_layers either returns a non-empty string or raises
705
+ # ImageNotFoundError on a missing image, so no defensive empty-string check
706
+ # is needed here.
707
+ layers = resolve_layers(image)
708
+
709
+ # detect_cloudinit() does filesystem I/O over every layer. ``layers`` is
710
+ # resolved once and never reassigned, so probe once and reuse the boolean
711
+ # at all three decision sites below (cloudinit-mode precondition, the
712
+ # root-ssh advisory, and the effective config-mode selection).
713
+ has_cloudinit = detect_cloudinit(layers)
714
+
715
+ # F7 + F11: hold the cross-process kento lock across the entire
716
+ # allocate-and-commit sequence. Two concurrent `kento create` processes
717
+ # would otherwise race on next_instance_name / next_vmid / container_dir
718
+ # exists check, potentially ending up with the same name, VMID, or
719
+ # stomping each other's directory. Lock covers from just before name
720
+ # resolution through the container_dir mkdir; slower work (resolve_layers,
721
+ # image pulls, config writes) happens after release so we don't serialize
722
+ # the hot path. Port allocation further down has its own narrower lock.
723
+ with kento_lock():
724
+ # Resolve container name
725
+ if name is None:
726
+ base_name = sanitize_image_name(image)
727
+ other_dir = LXC_BASE if base_dir == VM_BASE else VM_BASE
728
+ name = next_instance_name(base_name, base_dir, other_dir=other_dir)
729
+ # Defend against pathological image refs that sanitize into something
730
+ # unsafe (e.g. leading-dot or embedded slash after transformation).
731
+ # The CLI validates explicit --name; this covers the auto-generated
732
+ # path so downstream hook templates / path joins never see a bad name.
733
+ validate_name(name, what="auto-generated name")
734
+ else:
735
+ # Scan both namespaces: for PVE-LXC/PVE-VM, container_id is the VMID
736
+ # while `name` lives in kento-name, so a bare (base_dir / name).exists()
737
+ # misses same-name duplicates across VMIDs. When --force is set, only
738
+ # scan the current namespace — the user has opted in to duplicate names
739
+ # across namespaces (bare shortcuts like `kento start foo` then require
740
+ # explicit `kento lxc start foo` / `kento vm start foo`).
741
+ if force:
742
+ conflict = _scan_namespace(name, base_dir) is not None
743
+ else:
744
+ conflict = (_scan_namespace(name, LXC_BASE) is not None
745
+ or _scan_namespace(name, VM_BASE) is not None)
746
+ if conflict:
747
+ raise InstanceExistsError(f"instance name already taken: {name}")
748
+
749
+ # Resolve container_id for directory paths
750
+ if mode == "pve":
751
+ from kento.pve import next_vmid, validate_vmid, generate_pve_config, write_pve_config
752
+ if vmid:
753
+ validate_vmid(vmid)
754
+ else:
755
+ vmid = next_vmid()
756
+ container_id = str(vmid)
757
+ logger.info("Mode: pve (VMID %s)", vmid)
758
+ elif mode == "pve-vm":
759
+ from kento.pve import next_vmid, validate_vmid, generate_qm_config, write_qm_config
760
+ from kento.vm_hook import write_vm_hook, write_snippets_wrapper
761
+ if vmid:
762
+ validate_vmid(vmid)
763
+ else:
764
+ vmid = next_vmid()
765
+ container_id = name # VM_BASE uses name, not VMID
766
+ logger.info("Mode: pve-vm (VMID %s)", vmid)
767
+ elif mode == "vm":
768
+ container_id = name
769
+ logger.info("Mode: vm")
770
+ else:
771
+ container_id = name
772
+ logger.info("Mode: lxc")
773
+
774
+ container_dir = base_dir / container_id
775
+
776
+ if container_dir.exists():
777
+ raise InstanceExistsError(f"instance already exists: {container_id}")
778
+
779
+ # Create container_dir inside the lock so a concurrent create() sees
780
+ # it on its own .exists() check above. Slower post-setup happens
781
+ # outside the lock; the try/undos below registers the rmtree cleanup
782
+ # as the very first undo so any later failure rolls back this mkdir.
783
+ (container_dir / "rootfs").mkdir(parents=True)
784
+
785
+ # F14: explicit --config-mode cloudinit without cloud-init in the image
786
+ # is a user error — the seed we'd write would never be consumed and the
787
+ # guest would boot unconfigured. Reject up front rather than warning and
788
+ # silently producing a broken instance. ``auto`` falls back to injection.
789
+ if config_mode == "cloudinit" and not has_cloudinit:
790
+ shutil.rmtree(container_dir, ignore_errors=True)
791
+ raise ValidationError(
792
+ f"--config-mode cloudinit requires cloud-init in the "
793
+ f"image, but none was detected in {image}.\n"
794
+ " Drop --config-mode to auto-detect, or use "
795
+ "--config-mode injection."
796
+ )
797
+
798
+ # Advisory (non-fatal): cloud images (Debian/Ubuntu cloud) lock root SSH
799
+ # login and expect a distro login user (e.g. ``debian``). Injecting keys
800
+ # for root on such an image is a footgun, so warn — but do NOT change
801
+ # behavior or exit. Applies regardless of config_mode: the root-login
802
+ # restriction affects both injection and cloudinit seeding.
803
+ if (ssh_key_contents is not None and ssh_key_user == "root"
804
+ and has_cloudinit):
805
+ logger.warning(
806
+ "injecting SSH keys for 'root' on a cloud-init image. Cloud images\n"
807
+ " usually disable root SSH login; if you can't connect, "
808
+ "recreate with\n"
809
+ " --ssh-key-user <user> (e.g. 'debian' for Debian cloud "
810
+ "images)."
811
+ )
812
+
813
+ # Accumulator of rollback actions for every side-effecting step past
814
+ # this point. On exception, each undo runs in LIFO order — see F4 in
815
+ # the edge-case audit for the original motivation. container-dir goes
816
+ # first so it's the last thing unwound (after image-hold / state-dir).
817
+ undos: list[tuple[str, object]] = [
818
+ ("container-dir",
819
+ lambda: shutil.rmtree(container_dir, ignore_errors=True)),
820
+ ]
821
+
822
+ try:
823
+ from kento.layers import create_image_hold, remove_image_hold
824
+ create_image_hold(image, name)
825
+ undos.append(("image-hold", lambda: remove_image_hold(name)))
826
+
827
+ # Compute state_dir — upper/work may be outside container_dir for sudo users
828
+ state_dir = upper_base(container_id, base_dir if mode in ("vm", "pve-vm") else None)
829
+
830
+ state_dir_existed_outside = state_dir != container_dir and state_dir.exists()
831
+ state_dir.mkdir(parents=True, exist_ok=True)
832
+ if state_dir != container_dir and not state_dir_existed_outside:
833
+ # Only schedule removal of a state_dir we just created (don't
834
+ # nuke a pre-existing directory that happened to share the path).
835
+ undos.append(("state-dir",
836
+ lambda: shutil.rmtree(state_dir, ignore_errors=True)))
837
+ (state_dir / "upper").mkdir(exist_ok=True)
838
+ (state_dir / "work").mkdir(exist_ok=True)
839
+
840
+ # Write image reference, layer paths, state dir, mode, and name
841
+ (container_dir / "kento-image").write_text(image + "\n")
842
+ (container_dir / "kento-layers").write_text(layers + "\n")
843
+ (container_dir / "kento-state").write_text(str(state_dir) + "\n")
844
+ (container_dir / "kento-mode").write_text(mode + "\n")
845
+ (container_dir / "kento-name").write_text(name + "\n")
846
+
847
+ # Persist the resolved network identity so a future `kento set`
848
+ # net-rewrite can faithfully re-emit network config without
849
+ # re-deriving the type/bridge. Common to all four modes — every
850
+ # instance has a resolved type (bridge/host/usermode/none). The
851
+ # bridge name only exists for bridge modes, so write kento-bridge
852
+ # only when present (consumers tolerate absence). Preserved verbatim
853
+ # across scrub (reset.py never deletes unknown kento-* files).
854
+ (container_dir / "kento-net-type").write_text(network["type"] + "\n")
855
+ if network["bridge"] is not None:
856
+ (container_dir / "kento-bridge").write_text(network["bridge"] + "\n")
857
+
858
+ # Persist pass-through flags (v1.2.0 Phase B). Consumed by:
859
+ # - vm.py start_vm() : appends kento-qemu-args to QEMU argv (B2)
860
+ # - pve.py write_*_config() : appends kento-pve-args lines (B3)
861
+ # - info.py --verbose : surfaces both (B4)
862
+ # Only create the file if flags were passed — consumers tolerate
863
+ # absence. Preserved verbatim across scrub.
864
+ if qemu_args:
865
+ (container_dir / "kento-qemu-args").write_text(
866
+ "\n".join(qemu_args) + "\n")
867
+ if pve_args:
868
+ (container_dir / "kento-pve-args").write_text(
869
+ "\n".join(pve_args) + "\n")
870
+ # --lxc-arg (E1b): raw lines into plain-LXC's native config. Written
871
+ # BEFORE generate_config() so it can read the file back and append
872
+ # the block verbatim after kento's own lines. Scope-guarded above to
873
+ # plain lxc only; preserved verbatim across scrub.
874
+ if lxc_args:
875
+ (container_dir / "kento-lxc-args").write_text(
876
+ "\n".join(lxc_args) + "\n")
877
+
878
+ # Write static IP config if requested
879
+ if ip or dns or searchdomain:
880
+ net_parts = []
881
+ if ip:
882
+ net_parts.append(f"ip={ip}")
883
+ if gateway:
884
+ net_parts.append(f"gateway={gateway}")
885
+ if dns:
886
+ net_parts.append(f"dns={dns}")
887
+ if searchdomain:
888
+ net_parts.append(f"searchdomain={searchdomain}")
889
+ (container_dir / "kento-net").write_text("\n".join(net_parts) + "\n")
890
+ if ip:
891
+ _inject_network_config(state_dir, ip, gateway, dns, searchdomain,
892
+ mode=mode)
893
+ elif dns or searchdomain:
894
+ resolved_dir = state_dir / "upper" / "etc" / "systemd" / "resolved.conf.d"
895
+ resolved_dir.mkdir(parents=True, exist_ok=True)
896
+ lines = ["[Resolve]"]
897
+ if dns:
898
+ lines.append(f"DNS={dns}")
899
+ if searchdomain:
900
+ lines.append(f"Domains={searchdomain}")
901
+ lines.append("")
902
+ (resolved_dir / "90-kento.conf").write_text("\n".join(lines))
903
+
904
+ # Write hostname into guest
905
+ _inject_hostname(state_dir, name)
906
+
907
+ # Write timezone config if requested
908
+ if timezone:
909
+ (container_dir / "kento-tz").write_text(timezone + "\n")
910
+ _inject_timezone(state_dir, timezone)
911
+
912
+ # Write environment variables if requested
913
+ if env:
914
+ (container_dir / "kento-env").write_text("\n".join(env) + "\n")
915
+ _inject_env(state_dir, env)
916
+
917
+ # Write SSH authorized_keys metadata if requested. Hook copies this into
918
+ # the guest's ~/.ssh/authorized_keys on every start (target user controlled
919
+ # by kento-ssh-user, defaulting to root).
920
+ if ssh_key_contents is not None:
921
+ (container_dir / "kento-authorized-keys").write_text(ssh_key_contents)
922
+ if ssh_key_user != "root":
923
+ (container_dir / "kento-ssh-user").write_text(ssh_key_user + "\n")
924
+
925
+ # Generate or copy SSH host keys
926
+ if ssh_host_keys:
927
+ _generate_ssh_host_keys(container_dir / "ssh-host-keys")
928
+ elif ssh_host_key_dir is not None:
929
+ _copy_ssh_host_keys(Path(ssh_host_key_dir), container_dir / "ssh-host-keys")
930
+
931
+ # Determine config mode (injection vs cloud-init). The
932
+ # --config-mode=cloudinit without detected cloud-init case is
933
+ # rejected earlier (F14); here we only choose between valid modes.
934
+ if config_mode == "auto":
935
+ if has_cloudinit:
936
+ effective_config_mode = "cloudinit"
937
+ else:
938
+ effective_config_mode = "injection"
939
+ else:
940
+ effective_config_mode = config_mode
941
+
942
+ # Write config mode metadata
943
+ (container_dir / "kento-config-mode").write_text(effective_config_mode + "\n")
944
+
945
+ # Generate cloud-init seed if in cloudinit mode
946
+ if effective_config_mode == "cloudinit":
947
+ host_key_dir = container_dir / "ssh-host-keys"
948
+ write_seed(
949
+ container_dir, name=name,
950
+ ip=ip, gateway=gateway, dns=dns, searchdomain=searchdomain,
951
+ timezone=timezone, env=env,
952
+ ssh_keys=ssh_key_contents, ssh_key_user=ssh_key_user,
953
+ ssh_host_key_dir=host_key_dir if host_key_dir.is_dir() else None,
954
+ )
955
+
956
+ if mode in ("vm", "pve-vm"):
957
+ # Resolve MAC address for VM modes: user override wins, otherwise
958
+ # auto-generate a stable deterministic MAC from the container name
959
+ # (plain VM) or VMID (PVE-VM). Writing the result to kento-mac means
960
+ # scrub/recreate keep the same MAC (external DHCP reservations work).
961
+ from kento.vm import generate_mac
962
+ if mac is None:
963
+ if mode == "pve-vm":
964
+ mac_value = generate_mac(str(vmid))
965
+ else:
966
+ mac_value = generate_mac(name)
967
+ else:
968
+ mac_value = mac
969
+ (container_dir / "kento-mac").write_text(mac_value + "\n")
970
+
971
+ # Resolve memory/cores: CLI > config file > hardcoded defaults
972
+ from kento.defaults import get_vm_defaults
973
+ vm_defaults = get_vm_defaults()
974
+ effective_memory = memory if memory is not None else vm_defaults["memory"]
975
+ effective_cores = cores if cores is not None else vm_defaults["cores"]
976
+ (container_dir / "kento-memory").write_text(str(effective_memory) + "\n")
977
+ (container_dir / "kento-cores").write_text(str(effective_cores) + "\n")
978
+ (container_dir / "kento-nesting").write_text(
979
+ "1\n" if nesting else "0\n")
980
+
981
+ # Write port mapping (usermode networking only).
982
+ # Hold kento_lock around allocate_port so two concurrent creates
983
+ # can't both pick the same host port (the allocator reads all
984
+ # existing kento-port files and bind-tests, but without a lock
985
+ # the scan and the write are a classic TOCTOU race).
986
+ if network["type"] == "usermode":
987
+ from kento.vm import allocate_port
988
+ if port is None:
989
+ with kento_lock():
990
+ host_port = allocate_port()
991
+ (container_dir / "kento-port").write_text(
992
+ f"{host_port}:22\n")
993
+ guest_port = 22
994
+ elif port == "auto":
995
+ with kento_lock():
996
+ host_port = allocate_port()
997
+ (container_dir / "kento-port").write_text(
998
+ f"{host_port}:22\n")
999
+ guest_port = 22
1000
+ else:
1001
+ host_port, guest_port = port.split(":")
1002
+ host_port, guest_port = int(host_port), int(guest_port)
1003
+ (container_dir / "kento-port").write_text(
1004
+ f"{host_port}:{guest_port}\n")
1005
+
1006
+ if mode == "pve-vm":
1007
+ # Generate VM hookscript + inject.sh (hookscript invokes inject.sh
1008
+ # in its pre-start phase after overlayfs mount, before virtiofsd).
1009
+ write_vm_hook(container_dir, layers, name, state_dir)
1010
+ write_inject(container_dir)
1011
+
1012
+ # Write snippets wrapper and get PVE reference
1013
+ hookscript_ref = write_snippets_wrapper(
1014
+ vmid, container_dir / "kento-hook",
1015
+ snippets_dir=_snippets_info[0],
1016
+ storage_name=_snippets_info[1],
1017
+ )
1018
+ from kento.vm_hook import delete_snippets_wrapper
1019
+ undos.append(("vm-snippets-wrapper",
1020
+ lambda v=vmid: delete_snippets_wrapper(v)))
1021
+
1022
+ # Write VMID reference
1023
+ (container_dir / "kento-vmid").write_text(str(vmid) + "\n")
1024
+
1025
+ # Generate and write QM config
1026
+ qm_conf = write_qm_config(
1027
+ vmid,
1028
+ generate_qm_config(
1029
+ name, vmid, container_dir,
1030
+ hookscript_ref=hookscript_ref,
1031
+ memory=effective_memory,
1032
+ cores=effective_cores,
1033
+ machine=vm_defaults["machine"],
1034
+ kvm=vm_defaults["kvm"],
1035
+ bridge=bridge,
1036
+ net_type=network.get("type"),
1037
+ mac=mac_value,
1038
+ ),
1039
+ )
1040
+ from kento.pve import delete_qm_config
1041
+ undos.append(("qm-config",
1042
+ lambda v=vmid: delete_qm_config(v)))
1043
+
1044
+ logger.info("\nVM created: %s", name)
1045
+ logger.info(" Image: %s", image)
1046
+ logger.info(" VMID: %s", vmid)
1047
+ if network["type"] == "usermode":
1048
+ logger.info(" Port: %s:%s", host_port, guest_port)
1049
+ elif network["type"] == "bridge":
1050
+ logger.info(" Bridge: %s", bridge)
1051
+ logger.info(" Config: %s", qm_conf)
1052
+ logger.info(" Nesting: %s", 'allowed' if nesting else 'disabled')
1053
+ logger.info(" Dir: %s", container_dir)
1054
+ else:
1055
+ # Plain VM mode (no PVE)
1056
+ write_inject(container_dir)
1057
+ logger.info("\nContainer created: %s", name)
1058
+ logger.info(" Image: %s", image)
1059
+ if network["type"] == "usermode":
1060
+ logger.info(" Port: %s:%s", host_port, guest_port)
1061
+ logger.info(" Nesting: %s", 'allowed' if nesting else 'disabled')
1062
+ logger.info(" Dir: %s", container_dir)
1063
+ else:
1064
+ # Port forwarding for LXC/PVE modes. Hold kento_lock across
1065
+ # allocate + write so concurrent creates don't collide on the
1066
+ # same free port (same race as the VM-mode branch above).
1067
+ if port is not None:
1068
+ from kento.vm import allocate_port
1069
+ if port == "auto":
1070
+ with kento_lock():
1071
+ host_port = allocate_port()
1072
+ (container_dir / "kento-port").write_text(
1073
+ f"{host_port}:22\n")
1074
+ guest_port = 22
1075
+ else:
1076
+ host_port, guest_port = port.split(":")
1077
+ host_port, guest_port = int(host_port), int(guest_port)
1078
+ (container_dir / "kento-port").write_text(
1079
+ f"{host_port}:{guest_port}\n")
1080
+
1081
+ # Persist memory/cores so the start-host hook can propagate the limit
1082
+ # into the inner ns cgroup on PVE-LXC (outer cgroup gets the ceiling
1083
+ # from PVE's `memory:`/`cpulimit:`, but processes live in ns/ and
1084
+ # read "max" without this).
1085
+ if memory is not None:
1086
+ (container_dir / "kento-memory").write_text(str(memory) + "\n")
1087
+ if cores is not None:
1088
+ (container_dir / "kento-cores").write_text(str(cores) + "\n")
1089
+ (container_dir / "kento-nesting").write_text(
1090
+ "1\n" if nesting else "0\n")
1091
+ if unprivileged:
1092
+ (container_dir / "kento-unprivileged").write_text("1\n")
1093
+
1094
+ # Generate hook (LXC/PVE only) + inject.sh (shared with VM/PVE-VM modes)
1095
+ write_hook(container_dir, layers, name, state_dir)
1096
+ write_inject(container_dir)
1097
+
1098
+ # Generate config
1099
+ if mode == "pve":
1100
+ hookscript_ref = None
1101
+ if _snippets_info is not None:
1102
+ from kento.lxc_hook import (write_lxc_snippets_wrapper,
1103
+ delete_lxc_snippets_wrapper)
1104
+ hookscript_ref = write_lxc_snippets_wrapper(
1105
+ vmid, container_dir / "kento-hook",
1106
+ snippets_dir=_snippets_info[0],
1107
+ storage_name=_snippets_info[1],
1108
+ )
1109
+ undos.append(("lxc-snippets-wrapper",
1110
+ lambda v=vmid: delete_lxc_snippets_wrapper(v)))
1111
+ pve_conf_text = generate_pve_config(
1112
+ name, vmid, container_dir, bridge=bridge,
1113
+ net_type=network.get("type"),
1114
+ nesting=nesting, ip=ip,
1115
+ gateway=gateway, nameserver=dns,
1116
+ searchdomain=searchdomain,
1117
+ timezone=timezone, env=env,
1118
+ port=port,
1119
+ memory=memory, cores=cores,
1120
+ hookscript_ref=hookscript_ref,
1121
+ unprivileged=unprivileged)
1122
+ # Record the effective idmap range for the unprivileged hook.
1123
+ # At PVE pre-start time the hook idmaps each lowerdir, but PVE
1124
+ # may not have populated lxc.idmap into the runtime config yet
1125
+ # (that timing is PVE-internal). Persisting the range here makes
1126
+ # the hook independent of that ordering. Follow PVE: honour a
1127
+ # custom `lxc.idmap = u 0 B C` line if present, else PVE's
1128
+ # unprivileged default (100000 65536). See _pve_idmap_range().
1129
+ if unprivileged:
1130
+ base, count = _pve_idmap_range(pve_conf_text)
1131
+ (container_dir / "kento-idmap-range").write_text(
1132
+ f"{base} {count}\n")
1133
+ pve_conf = write_pve_config(vmid, pve_conf_text)
1134
+ from kento.pve import delete_pve_config
1135
+ undos.append(("pve-config",
1136
+ lambda v=vmid: delete_pve_config(v)))
1137
+ config_path = str(pve_conf)
1138
+ else:
1139
+ (container_dir / "config").write_text(
1140
+ generate_config(name, container_dir, bridge=bridge,
1141
+ net_type=network.get("type"),
1142
+ nesting=nesting,
1143
+ ip=ip, gateway=gateway, env=env,
1144
+ port=port,
1145
+ memory=memory, cores=cores,
1146
+ unprivileged=unprivileged,
1147
+ mode=mode)
1148
+ )
1149
+ config_path = f"{container_dir}/config"
1150
+
1151
+ logger.info("\nContainer created: %s", name)
1152
+ logger.info(" Image: %s", image)
1153
+ logger.info(" Bridge: %s", bridge)
1154
+ if mode == "pve":
1155
+ logger.info(" VMID: %s", vmid)
1156
+ if port is not None:
1157
+ logger.info(" Port: %s:%s", host_port, guest_port)
1158
+ logger.info(" Nesting: %s", 'allowed' if nesting else 'disabled')
1159
+ logger.info(" Config: %s", config_path)
1160
+
1161
+ if start:
1162
+ logger.info("\nStarting...")
1163
+ # Register the stop undo BEFORE issuing start: if the start call
1164
+ # succeeds partially (container registers, then crashes), we still
1165
+ # want to attempt the stop on rollback.
1166
+ if mode == "pve-vm":
1167
+ undos.append(("qm-stop",
1168
+ lambda v=vmid: subprocess.run(
1169
+ ["qm", "stop", str(v)],
1170
+ capture_output=True, check=False)))
1171
+ _run_start_or_rollback(
1172
+ ["qm", "start", str(vmid)], name=name, scope="vm",
1173
+ )
1174
+ elif mode == "vm":
1175
+ from kento.vm import start_vm, stop_vm
1176
+ undos.append(("vm-stop",
1177
+ lambda d=container_dir:
1178
+ stop_vm(d, force=True)))
1179
+ start_vm(container_dir, name)
1180
+ elif mode == "pve":
1181
+ undos.append(("pct-stop",
1182
+ lambda v=vmid: subprocess.run(
1183
+ ["pct", "stop", str(v)],
1184
+ capture_output=True, check=False)))
1185
+ _run_start_or_rollback(
1186
+ ["pct", "start", str(vmid)], name=name, scope="lxc",
1187
+ )
1188
+ else:
1189
+ undos.append(("lxc-stop",
1190
+ lambda n=name: subprocess.run(
1191
+ ["lxc-stop", "-n", n],
1192
+ capture_output=True, check=False)))
1193
+ _run_start_or_rollback(
1194
+ ["lxc-start", "-n", name], name=name, scope="lxc",
1195
+ )
1196
+ logger.info(" Status: running")
1197
+ else:
1198
+ logger.info(" Status: stopped (use 'kento start %s' to boot)", name)
1199
+ except BaseException as exc:
1200
+ # Rollback every side-effect we successfully made before re-raising.
1201
+ # Use BaseException so KentoError/KeyboardInterrupt also trigger cleanup.
1202
+ logger.info("\nError during create: %s", exc)
1203
+ logger.info("Rolling back partial state...")
1204
+ _run_cleanup(undos)
1205
+ raise