@specific.dev/spectest 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +38 -0
- package/src/browser.ts +824 -0
- package/src/components/index.ts +32 -0
- package/src/components/k3s.ts +1324 -0
- package/src/components/postgres.ts +281 -0
- package/src/components/replayFake.ts +515 -0
- package/src/daemon.ts +3910 -0
- package/src/index.ts +1601 -0
- package/src/ingress.ts +288 -0
- package/src/inspect.ts +604 -0
- package/src/record-secrets.ts +41 -0
- package/src/recorder.ts +659 -0
- package/src/resolver.ts +351 -0
- package/src/terminal.ts +740 -0
- package/src/vendor/rrweb-plugin-console-record.umd.js +520 -0
- package/src/vendor/rrweb-record.min.js +5 -0
|
@@ -0,0 +1,1324 @@
|
|
|
1
|
+
import { AsyncLocalStorage } from "node:async_hooks";
|
|
2
|
+
import { spawn as nodeSpawn } from "node:child_process";
|
|
3
|
+
import { randomUUID } from "node:crypto";
|
|
4
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
5
|
+
import { readFile, unlink } from "node:fs/promises";
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
AppsV1Api,
|
|
9
|
+
CoreV1Api,
|
|
10
|
+
KubeConfig,
|
|
11
|
+
KubernetesObjectApi,
|
|
12
|
+
ResponseContext,
|
|
13
|
+
ServerConfiguration,
|
|
14
|
+
createConfiguration,
|
|
15
|
+
loadAllYaml,
|
|
16
|
+
type KubernetesObject,
|
|
17
|
+
type RequestContext,
|
|
18
|
+
} from "@kubernetes/client-node";
|
|
19
|
+
import { Observable } from "@kubernetes/client-node/dist/gen/rxjsStub.js";
|
|
20
|
+
|
|
21
|
+
import type { ServiceDefinition } from "../index.js";
|
|
22
|
+
import { dnsName, provides, SELF_SERVICE_TOKEN } from "../index.js";
|
|
23
|
+
import { readRaw, readTag, wrap } from "../inspect.js";
|
|
24
|
+
import type { Wrapped } from "../inspect.js";
|
|
25
|
+
import { recorderAnnotate, recorderRemove } from "../recorder.js";
|
|
26
|
+
|
|
27
|
+
export interface K3sOptions {
|
|
28
|
+
/** Image tag for the official `rancher/k3s` image. Default `"v1.30.6-k3s1"`. */
|
|
29
|
+
version?: string;
|
|
30
|
+
/**
|
|
31
|
+
* Extra arguments appended to `k3s server`. Useful for `--tls-san=...`,
|
|
32
|
+
* additional `--disable=<addon>`, custom CIDRs, etc.
|
|
33
|
+
*/
|
|
34
|
+
extraArgs?: string[];
|
|
35
|
+
/**
|
|
36
|
+
* Readiness probe timeout in seconds. k3s on a warm image is ready in
|
|
37
|
+
* a few seconds; the first cold start of an env (image pull + cluster
|
|
38
|
+
* bootstrap) can take 30–60s. Default `120`.
|
|
39
|
+
*/
|
|
40
|
+
readyTimeoutSecs?: number;
|
|
41
|
+
/**
|
|
42
|
+
* Run an in-cluster OCI registry (CNCF `distribution` / `registry:2`)
|
|
43
|
+
* that the cluster's own containerd trusts. This is the hermetic
|
|
44
|
+
* stand-in for a cloud registry (ECR/GCR/GHCR): a peer service builds
|
|
45
|
+
* an image, pushes it here over plain HTTP, references
|
|
46
|
+
* `<cluster-key>.internal:5000/...` from a Deployment, and the kubelet
|
|
47
|
+
* pulls it straight back. Lets you test a real
|
|
48
|
+
* `build → push → deploy → pull` pipeline with no external registry
|
|
49
|
+
* and no image pre-baking.
|
|
50
|
+
*
|
|
51
|
+
* **The registry's push/pull address is `<cluster-key>.internal:5000`**,
|
|
52
|
+
* where `<cluster-key>` is the key you give this service in the
|
|
53
|
+
* `services` map (e.g. a cluster at `services.cluster` is reachable at
|
|
54
|
+
* `cluster.internal:5000`). That's the cluster service's own
|
|
55
|
+
* unconditional `.internal` alias — peer-reachable and never clobbered
|
|
56
|
+
* by any `hostnames` you set — so it resolves identically from peer
|
|
57
|
+
* containers (push) and the cluster's own containerd (pull). Wire it
|
|
58
|
+
* into your platform, e.g. `env: { REGISTRY_URL: "cluster.internal:5000" }`.
|
|
59
|
+
* Plain HTTP, so configure your push client for an insecure registry.
|
|
60
|
+
*
|
|
61
|
+
* On by default. Set `false` for clusters that only ever run public
|
|
62
|
+
* images — that skips the extra pod.
|
|
63
|
+
*/
|
|
64
|
+
registry?: boolean;
|
|
65
|
+
/**
|
|
66
|
+
* Domains to route into this cluster's ingress via **wildcard DNS**. For
|
|
67
|
+
* each `"example.com"`, spectest-resolver answers any `*.example.com`
|
|
68
|
+
* query with the cluster container's IP, where Traefik dispatches by Host
|
|
69
|
+
* to the matching Ingress. This lets a test `kubectl apply` an Ingress for
|
|
70
|
+
* any host under the domain and reach it immediately — no need to
|
|
71
|
+
* pre-declare each hostname in `hostnames`.
|
|
72
|
+
*
|
|
73
|
+
* ```ts
|
|
74
|
+
* services: { k8s: k3s({ ingressDomains: ["example.com"] }) }
|
|
75
|
+
* // a test then applies an Ingress for foo.example.com and fetches it.
|
|
76
|
+
* ```
|
|
77
|
+
*
|
|
78
|
+
* For one-off hosts not under a declared domain, a test can also register
|
|
79
|
+
* dynamically with `ctx.dnsName(host, { service: "k8s" })`.
|
|
80
|
+
*
|
|
81
|
+
* **TLS.** Setting `ingressDomains` also makes those domains reachable
|
|
82
|
+
* over **HTTPS**: Traefik gains a `:443` entrypoint and serves a default
|
|
83
|
+
* certificate minted from the in-VM root CA with SANs `*.<domain>` for
|
|
84
|
+
* each declared domain. The CA is already trusted by the test framework
|
|
85
|
+
* (Node `fetch`, `ctx.browser()`, Python, the system store), so
|
|
86
|
+
* `ctx.fetch("https://foo.example.com")` gets a clean handshake — no
|
|
87
|
+
* per-Ingress `spec.tls` and no `--insecure` needed. Only hosts **under**
|
|
88
|
+
* a declared domain are covered by the cert; static `hostnames` not under
|
|
89
|
+
* one (and one-off `ctx.dnsName` hosts) remain HTTP-only.
|
|
90
|
+
*/
|
|
91
|
+
ingressDomains?: string[];
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** Port the in-cluster registry listens on (plain HTTP). */
|
|
95
|
+
const K3S_REGISTRY_PORT = 5000;
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Rewrites a `@kubernetes/client-node` API class so each method's resolved
|
|
99
|
+
* value comes back **inspect-wrapped** ({@link Wrapped}) — exactly what
|
|
100
|
+
* `withTagging` does at runtime. The method *signatures* (argument types) are
|
|
101
|
+
* untouched; only the `Promise<R>` result becomes `Promise<Wrapped<R>>`, so
|
|
102
|
+
* `expect(pod.status.phase)` links to the API call with no cast and you
|
|
103
|
+
* `.unwrap()` before using a value as raw data. Non-method
|
|
104
|
+
* members pass through unchanged. Mirrors {@link RecordingSqlClient} on the
|
|
105
|
+
* postgres side.
|
|
106
|
+
*/
|
|
107
|
+
type Tagged<T> = {
|
|
108
|
+
[K in keyof T]: T[K] extends (...args: infer A) => Promise<infer R>
|
|
109
|
+
? (...args: A) => Promise<Wrapped<R>>
|
|
110
|
+
: T[K];
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Pre-instantiated `@kubernetes/client-node` API clients sharing the
|
|
115
|
+
* same recording HTTP transport. Every method call lands on the test
|
|
116
|
+
* event log as an HTTP event alongside `fetch` calls, and its result is
|
|
117
|
+
* inspect-wrapped (see {@link Tagged}) so assertions on it stay linked.
|
|
118
|
+
*/
|
|
119
|
+
export interface K3sClient {
|
|
120
|
+
core: Tagged<CoreV1Api>;
|
|
121
|
+
apps: Tagged<AppsV1Api>;
|
|
122
|
+
/** Generic object API — `create()`, `read()`, `patch()`, `delete()`
|
|
123
|
+
* against any Kubernetes resource (custom resources included). */
|
|
124
|
+
objects: Tagged<KubernetesObjectApi>;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/** Helpers a `k3s(...)` service exposes on `ctx.svc.<name>`. */
|
|
128
|
+
export interface K3sHelpers {
|
|
129
|
+
/**
|
|
130
|
+
* Fully-loaded `KubeConfig`. The cluster server URL is rewritten to
|
|
131
|
+
* `https://<service-name>.internal:6443`, the auto-assigned DNS name
|
|
132
|
+
* for this service on `spectest-net`. TLS verification is off because
|
|
133
|
+
* Bun's fetch doesn't honor an https.Agent's CA option (the client
|
|
134
|
+
* cert from the kubeconfig still flows through for auth), so the
|
|
135
|
+
* server's cert SAN list doesn't need to include the .internal name.
|
|
136
|
+
*/
|
|
137
|
+
kubeconfig: KubeConfig;
|
|
138
|
+
/** Pre-built API clients. */
|
|
139
|
+
client: K3sClient;
|
|
140
|
+
/**
|
|
141
|
+
* Apply a (multi-document) YAML manifest. Each parsed document is
|
|
142
|
+
* created via `KubernetesObjectApi.create`. Returns the API server's
|
|
143
|
+
* response objects in input order — each element inspect-wrapped (the
|
|
144
|
+
* array container itself is plain), so `expect(created[0]!.metadata.uid)`
|
|
145
|
+
* links to its create call.
|
|
146
|
+
*/
|
|
147
|
+
apply: (manifest: string) => Promise<Wrapped<KubernetesObject>[]>;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
interface DockerExecResult {
|
|
151
|
+
stdout: string;
|
|
152
|
+
stderr: string;
|
|
153
|
+
code: number;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
function runProcess(
|
|
157
|
+
cmd: string,
|
|
158
|
+
args: string[],
|
|
159
|
+
timeoutMs = 30_000,
|
|
160
|
+
): Promise<DockerExecResult> {
|
|
161
|
+
return new Promise((resolve, reject) => {
|
|
162
|
+
const cp = nodeSpawn(cmd, args, { stdio: ["ignore", "pipe", "pipe"] });
|
|
163
|
+
const out: Buffer[] = [];
|
|
164
|
+
const err: Buffer[] = [];
|
|
165
|
+
cp.stdout!.on("data", (c) => out.push(c));
|
|
166
|
+
cp.stderr!.on("data", (c) => err.push(c));
|
|
167
|
+
const t = setTimeout(() => cp.kill("SIGKILL"), timeoutMs);
|
|
168
|
+
cp.on("error", (e) => {
|
|
169
|
+
clearTimeout(t);
|
|
170
|
+
reject(e);
|
|
171
|
+
});
|
|
172
|
+
cp.on("close", (code) => {
|
|
173
|
+
clearTimeout(t);
|
|
174
|
+
resolve({
|
|
175
|
+
stdout: Buffer.concat(out).toString("utf8"),
|
|
176
|
+
stderr: Buffer.concat(err).toString("utf8"),
|
|
177
|
+
code: code ?? -1,
|
|
178
|
+
});
|
|
179
|
+
});
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function runDocker(
|
|
184
|
+
args: string[],
|
|
185
|
+
timeoutMs = 30_000,
|
|
186
|
+
): Promise<DockerExecResult> {
|
|
187
|
+
return runProcess("docker", args, timeoutMs);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// In-VM root CA, generated once into the base snapshot (see
|
|
191
|
+
// control-plane `base.rs`). Trusted everywhere the test framework runs —
|
|
192
|
+
// Node (`NODE_EXTRA_CA_CERTS`), Chromium (NSS DB), Python, the system
|
|
193
|
+
// store — so a leaf signed by it gives `ctx.fetch`/`ctx.browser()` a
|
|
194
|
+
// clean HTTPS handshake. The k3s `setup` hook runs inside the daemon's
|
|
195
|
+
// Bun process (root in the VM), so it can read the CA key and mint
|
|
196
|
+
// directly. These constants are intentionally redeclared here rather than
|
|
197
|
+
// imported from the daemon: the SDK ships to end users and must not
|
|
198
|
+
// depend on daemon internals.
|
|
199
|
+
const CA_PATH = process.env.SPECTEST_CA_PATH ?? "/etc/spectest/ca.crt";
|
|
200
|
+
const CA_KEY_PATH = process.env.SPECTEST_CA_KEY_PATH ?? "/etc/spectest/ca.key";
|
|
201
|
+
|
|
202
|
+
function caPresent(): boolean {
|
|
203
|
+
return existsSync(CA_PATH) && existsSync(CA_KEY_PATH);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Mint a leaf certificate from the in-VM root CA covering `hostnames`
|
|
208
|
+
* (used here as the SANs of the cluster's wildcard ingress domains).
|
|
209
|
+
* Returns the cert + key as PEM strings. Self-contained openssl shell-out
|
|
210
|
+
* — deliberately not shared with the daemon's own cert minting to keep
|
|
211
|
+
* the distributed SDK decoupled from daemon code.
|
|
212
|
+
*/
|
|
213
|
+
async function issueIngressCert(
|
|
214
|
+
hostnames: string[],
|
|
215
|
+
): Promise<{ cert: string; key: string }> {
|
|
216
|
+
const id = `spectest-k3s-ingress-${randomUUID().slice(0, 8)}`;
|
|
217
|
+
const keyPath = `/tmp/${id}.key`;
|
|
218
|
+
const crtPath = `/tmp/${id}.crt`;
|
|
219
|
+
const sans = hostnames.map((h) => `DNS:${h}`).join(",");
|
|
220
|
+
const r = await runProcess(
|
|
221
|
+
"openssl",
|
|
222
|
+
[
|
|
223
|
+
"req",
|
|
224
|
+
"-newkey",
|
|
225
|
+
"rsa:2048",
|
|
226
|
+
"-nodes",
|
|
227
|
+
"-keyout",
|
|
228
|
+
keyPath,
|
|
229
|
+
"-out",
|
|
230
|
+
crtPath,
|
|
231
|
+
"-x509",
|
|
232
|
+
"-CA",
|
|
233
|
+
CA_PATH,
|
|
234
|
+
"-CAkey",
|
|
235
|
+
CA_KEY_PATH,
|
|
236
|
+
"-days",
|
|
237
|
+
"3650",
|
|
238
|
+
"-subj",
|
|
239
|
+
"/CN=spectest-k3s-ingress",
|
|
240
|
+
"-addext",
|
|
241
|
+
`subjectAltName=${sans}`,
|
|
242
|
+
"-addext",
|
|
243
|
+
"basicConstraints=CA:FALSE",
|
|
244
|
+
"-addext",
|
|
245
|
+
"extendedKeyUsage=serverAuth",
|
|
246
|
+
"-addext",
|
|
247
|
+
"keyUsage=digitalSignature,keyEncipherment",
|
|
248
|
+
],
|
|
249
|
+
30_000,
|
|
250
|
+
);
|
|
251
|
+
if (r.code !== 0) {
|
|
252
|
+
throw new Error(
|
|
253
|
+
`k3s ingress cert minting failed (openssl rc=${r.code}): ${
|
|
254
|
+
r.stderr.trim() || r.stdout.trim()
|
|
255
|
+
}`,
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
try {
|
|
259
|
+
const [cert, key] = await Promise.all([
|
|
260
|
+
readFile(crtPath, "utf8"),
|
|
261
|
+
readFile(keyPath, "utf8"),
|
|
262
|
+
]);
|
|
263
|
+
return { cert, key };
|
|
264
|
+
} finally {
|
|
265
|
+
await Promise.all([
|
|
266
|
+
unlink(keyPath).catch(() => {}),
|
|
267
|
+
unlink(crtPath).catch(() => {}),
|
|
268
|
+
]);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Holds the inspector `sourceSeq` for the most recent HTTP call inside
|
|
273
|
+
// a single API-method invocation. Filled in by `doFetch` after each
|
|
274
|
+
// request, read by the `withTagging` proxy when the method's promise
|
|
275
|
+
// resolves so the returned parsed object carries the back-reference.
|
|
276
|
+
// AsyncLocalStorage is the right scope here — every call to an Api
|
|
277
|
+
// method runs in its own holder and concurrent calls don't race.
|
|
278
|
+
interface CallSlot {
|
|
279
|
+
seq?: number;
|
|
280
|
+
}
|
|
281
|
+
const callContext = new AsyncLocalStorage<CallSlot>();
|
|
282
|
+
|
|
283
|
+
// HTTP transport for `@kubernetes/client-node` that routes through
|
|
284
|
+
// `globalThis.fetch`. Two reasons:
|
|
285
|
+
// 1. The daemon's per-test `installFetchWrapper` already records every
|
|
286
|
+
// `globalThis.fetch` call as an HTTP event — using fetch here gets
|
|
287
|
+
// k8s API calls recorded for free, no library-specific wiring.
|
|
288
|
+
// 2. Bun's fetch needs Bun-shaped TLS options (`tls: { ... }`) for
|
|
289
|
+
// mTLS; node-fetch's `agent` parameter — which the library's
|
|
290
|
+
// default transport relies on — is silently ignored under Bun.
|
|
291
|
+
// We honor the lib's auth flow (`KubeConfig.applySecurityAuthentication`
|
|
292
|
+
// sets an Agent on the request) by extracting cert/key off that
|
|
293
|
+
// agent and passing them via the Bun-shaped option.
|
|
294
|
+
class FetchHttpLibrary {
|
|
295
|
+
send(request: RequestContext): Observable<ResponseContext> {
|
|
296
|
+
const promise = doFetch(request);
|
|
297
|
+
return new Observable(promise);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Parsed Kubernetes semantics of a single API request, derived purely
|
|
303
|
+
* from the HTTP method + request path. Fed to `recorderAnnotate` to
|
|
304
|
+
* reclassify the generic `http` event the fetch wrapper recorded into a
|
|
305
|
+
* Kubernetes-specific `kube` event.
|
|
306
|
+
*/
|
|
307
|
+
interface KubeRequestMeta {
|
|
308
|
+
verb: string;
|
|
309
|
+
group?: string;
|
|
310
|
+
apiVersion?: string;
|
|
311
|
+
resource?: string;
|
|
312
|
+
subresource?: string;
|
|
313
|
+
name?: string;
|
|
314
|
+
namespace?: string;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Map a Kubernetes API request to its `(verb, group/version, resource,
|
|
319
|
+
* namespace, name, subresource)` from the URL + HTTP method alone.
|
|
320
|
+
*
|
|
321
|
+
* Path grammar (the two API roots):
|
|
322
|
+
* - core group: `/api/<version>/...`
|
|
323
|
+
* - named group: `/apis/<group>/<version>/...`
|
|
324
|
+
* after which the remainder is either a cluster-scoped resource
|
|
325
|
+
* (`nodes`, `namespaces`, …) or `namespaces/<ns>/<resource>...`. The
|
|
326
|
+
* trailing `<resource>[/<name>[/<subresource>]]` shape plus the method
|
|
327
|
+
* (and `?watch=`) yields the verb.
|
|
328
|
+
*
|
|
329
|
+
* Returns `null` for non-resource paths — discovery (`/api`, `/apis`,
|
|
330
|
+
* `/apis/<group>/<version>`), `/version`, `/healthz`, `/openapi/...` —
|
|
331
|
+
* so those stay rendered as plain `http`.
|
|
332
|
+
*/
|
|
333
|
+
function describeKubeRequest(
|
|
334
|
+
method: string,
|
|
335
|
+
rawUrl: string,
|
|
336
|
+
): KubeRequestMeta | null {
|
|
337
|
+
let path: string;
|
|
338
|
+
let query: URLSearchParams;
|
|
339
|
+
try {
|
|
340
|
+
const u = new URL(rawUrl);
|
|
341
|
+
path = u.pathname;
|
|
342
|
+
query = u.searchParams;
|
|
343
|
+
} catch {
|
|
344
|
+
const q = rawUrl.indexOf("?");
|
|
345
|
+
path = q === -1 ? rawUrl : rawUrl.slice(0, q);
|
|
346
|
+
query = new URLSearchParams(q === -1 ? "" : rawUrl.slice(q + 1));
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
const segs = path.split("/").filter((s) => s.length > 0);
|
|
350
|
+
if (segs.length === 0) return null;
|
|
351
|
+
|
|
352
|
+
let group: string | undefined;
|
|
353
|
+
let apiVersion: string | undefined;
|
|
354
|
+
let rest: string[];
|
|
355
|
+
if (segs[0] === "api") {
|
|
356
|
+
group = "";
|
|
357
|
+
apiVersion = segs[1];
|
|
358
|
+
rest = segs.slice(2);
|
|
359
|
+
} else if (segs[0] === "apis") {
|
|
360
|
+
group = segs[1];
|
|
361
|
+
apiVersion = segs[2];
|
|
362
|
+
rest = segs.slice(3);
|
|
363
|
+
} else {
|
|
364
|
+
return null; // /version, /healthz, /openapi, …
|
|
365
|
+
}
|
|
366
|
+
if (!apiVersion) return null; // discovery root (/api, /apis/<group>)
|
|
367
|
+
|
|
368
|
+
// `namespaces/<ns>/<resource>...` is namespaced; everything else
|
|
369
|
+
// (including `namespaces` and `namespaces/<name>` themselves, and
|
|
370
|
+
// cluster-scoped resources like `nodes`) is taken as-is.
|
|
371
|
+
let namespace: string | undefined;
|
|
372
|
+
let resourcePath = rest;
|
|
373
|
+
if (rest[0] === "namespaces" && rest.length >= 3) {
|
|
374
|
+
namespace = rest[1];
|
|
375
|
+
resourcePath = rest.slice(2);
|
|
376
|
+
}
|
|
377
|
+
if (resourcePath.length === 0) return null; // APIResourceList discovery
|
|
378
|
+
|
|
379
|
+
const resource = resourcePath[0];
|
|
380
|
+
const name = resourcePath.length >= 2 ? resourcePath[1] : undefined;
|
|
381
|
+
const subresource = resourcePath.length >= 3 ? resourcePath[2] : undefined;
|
|
382
|
+
|
|
383
|
+
const watchParam = query.get("watch");
|
|
384
|
+
const watch = watchParam === "true" || watchParam === "1";
|
|
385
|
+
const hasName = name !== undefined;
|
|
386
|
+
let verb: string;
|
|
387
|
+
switch (method.toUpperCase()) {
|
|
388
|
+
case "GET":
|
|
389
|
+
case "HEAD":
|
|
390
|
+
verb = hasName ? "get" : watch ? "watch" : "list";
|
|
391
|
+
break;
|
|
392
|
+
case "POST":
|
|
393
|
+
verb = "create";
|
|
394
|
+
break;
|
|
395
|
+
case "PUT":
|
|
396
|
+
verb = "update";
|
|
397
|
+
break;
|
|
398
|
+
case "PATCH":
|
|
399
|
+
verb = "patch";
|
|
400
|
+
break;
|
|
401
|
+
case "DELETE":
|
|
402
|
+
verb = hasName ? "delete" : "deletecollection";
|
|
403
|
+
break;
|
|
404
|
+
default:
|
|
405
|
+
verb = method.toLowerCase();
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
return { verb, group, apiVersion, resource, subresource, name, namespace };
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
/**
|
|
412
|
+
* True for Kubernetes API *discovery* paths — the version/group/resource
|
|
413
|
+
* enumeration endpoints (`/api`, `/api/<version>`, `/apis`, `/apis/<group>`,
|
|
414
|
+
* `/apis/<group>/<version>`) the dynamic client hits to resolve a kind to
|
|
415
|
+
* its resource path. They carry no resource operation (so
|
|
416
|
+
* `describeKubeRequest` returns null), and `doFetch` retracts their events
|
|
417
|
+
* from the timeline. Non-resource paths that are NOT discovery (`/healthz`,
|
|
418
|
+
* `/version`, `/openapi`, …) are deliberately not matched — they stay as
|
|
419
|
+
* `http`.
|
|
420
|
+
*/
|
|
421
|
+
function isKubeDiscoveryPath(rawUrl: string): boolean {
|
|
422
|
+
let path: string;
|
|
423
|
+
try {
|
|
424
|
+
path = new URL(rawUrl).pathname;
|
|
425
|
+
} catch {
|
|
426
|
+
const q = rawUrl.indexOf("?");
|
|
427
|
+
path = q === -1 ? rawUrl : rawUrl.slice(0, q);
|
|
428
|
+
}
|
|
429
|
+
const segs = path.split("/").filter((s) => s.length > 0);
|
|
430
|
+
if (segs.length === 0) return false;
|
|
431
|
+
// `/api` + `/api/<version>`; `/apis` + `/apis/<group>` + `/apis/<group>/<version>`.
|
|
432
|
+
// Anything longer carries a resource segment and is handled as `kube`.
|
|
433
|
+
if (segs[0] === "api") return segs.length <= 2;
|
|
434
|
+
if (segs[0] === "apis") return segs.length <= 3;
|
|
435
|
+
return false;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
async function doFetch(request: RequestContext): Promise<ResponseContext> {
|
|
439
|
+
const url = request.getUrl();
|
|
440
|
+
const method = String(request.getHttpMethod());
|
|
441
|
+
const body = request.getBody();
|
|
442
|
+
const reqHeaders: Record<string, string> = {};
|
|
443
|
+
for (const [k, v] of Object.entries(request.getHeaders())) {
|
|
444
|
+
reqHeaders[k] = String(v);
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// The library's auth flow puts client cert/key on an https.Agent
|
|
448
|
+
// attached to the request. Pull them out so we can hand them to Bun's
|
|
449
|
+
// fetch via its `tls` option.
|
|
450
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
451
|
+
const agent = request.getAgent() as any;
|
|
452
|
+
const agentOpts = agent?.options ?? {};
|
|
453
|
+
const tlsOpts: Record<string, unknown> = { rejectUnauthorized: false };
|
|
454
|
+
if (agentOpts.cert) tlsOpts.cert = agentOpts.cert;
|
|
455
|
+
if (agentOpts.key) tlsOpts.key = agentOpts.key;
|
|
456
|
+
|
|
457
|
+
const wrapped = await fetch(url, {
|
|
458
|
+
method,
|
|
459
|
+
headers: reqHeaders,
|
|
460
|
+
body: body as BodyInit | undefined,
|
|
461
|
+
signal: request.getSignal(),
|
|
462
|
+
// Bun-specific TLS shape; under Node this option is ignored.
|
|
463
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
464
|
+
tls: tlsOpts,
|
|
465
|
+
} as RequestInit);
|
|
466
|
+
|
|
467
|
+
// Before unwrapping, capture the inspector tag the daemon's fetch
|
|
468
|
+
// wrapper installed on the Response. We feed the seq back through
|
|
469
|
+
// AsyncLocalStorage so the API method's eventual return value can
|
|
470
|
+
// re-acquire it — otherwise the chain `core.listNode() →
|
|
471
|
+
// expect(result)` would record assertions with no back-reference.
|
|
472
|
+
const tag = readTag(wrapped);
|
|
473
|
+
const slot = callContext.getStore();
|
|
474
|
+
if (slot && tag) slot.seq = tag.sourceSeq;
|
|
475
|
+
|
|
476
|
+
// Reclassify the `http` event the fetch wrapper just recorded into a
|
|
477
|
+
// Kubernetes-specific `kube` event (verb/resource/namespace/name), so
|
|
478
|
+
// the timeline reads `list pods · default` rather than the raw API URL.
|
|
479
|
+
// A `tag` is only present when recording was active for this call, so
|
|
480
|
+
// this is a no-op outside instrumented test runs.
|
|
481
|
+
if (tag && tag.sourceSeq !== undefined) {
|
|
482
|
+
const meta = describeKubeRequest(method, url);
|
|
483
|
+
if (meta) {
|
|
484
|
+
recorderAnnotate(tag.sourceSeq, { kind: "kube", ...meta });
|
|
485
|
+
} else if (isKubeDiscoveryPath(url)) {
|
|
486
|
+
// The dynamic client (`objects`, KubernetesObjectApi) can't know a
|
|
487
|
+
// kind's resource path ahead of time, so before the real request it
|
|
488
|
+
// GETs the group's resource list (`/apis/<group>/<version>` →
|
|
489
|
+
// APIResourceList) to map e.g. Ingress → `ingresses`/namespaced, then
|
|
490
|
+
// caches it (apiVersionResourceCache). That discovery GET is library
|
|
491
|
+
// plumbing the test author never wrote, and because the cache is
|
|
492
|
+
// per-daemon-process it surfaces non-deterministically across forks
|
|
493
|
+
// (first access pays it; `dependsOn` children inheriting the warm
|
|
494
|
+
// cache don't). Retract it rather than leave a bare `http` row — the
|
|
495
|
+
// real list/read that follows is recorded and reclassified as usual.
|
|
496
|
+
recorderRemove(tag.sourceSeq);
|
|
497
|
+
}
|
|
498
|
+
// Other non-resource paths (/healthz, /version, /openapi, …) fall
|
|
499
|
+
// through and stay rendered as plain `http`.
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// The daemon's fetch wrapper proxies `Response.status` and similar
|
|
503
|
+
// primitives as carrier objects (so test assertions can fold under
|
|
504
|
+
// the originating HTTP event). The kubernetes/client-node lib calls
|
|
505
|
+
// `httpStatusCode.toString()` which would then return
|
|
506
|
+
// "[object Object]" and the status-code dispatch falls through to
|
|
507
|
+
// "Unknown API Status Code!". Pull out the raw Response.
|
|
508
|
+
const response =
|
|
509
|
+
(wrapped as { unwrap?: () => Response }).unwrap?.() ?? wrapped;
|
|
510
|
+
|
|
511
|
+
const resHeaders: Record<string, string> = {};
|
|
512
|
+
response.headers.forEach((v, k) => {
|
|
513
|
+
resHeaders[k] = v;
|
|
514
|
+
});
|
|
515
|
+
const buf = Buffer.from(await response.arrayBuffer());
|
|
516
|
+
return new ResponseContext(response.status, resHeaders, {
|
|
517
|
+
text: async () => buf.toString("utf8"),
|
|
518
|
+
binary: async () => buf,
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
/**
|
|
523
|
+
* Recursively strip the inspector's carrier/proxy wrappers from a
|
|
524
|
+
* value. Needed for arguments flowing into the kubernetes/client-node
|
|
525
|
+
* API methods — if a wrapped pod's `metadata.name` (a primitive-carrier
|
|
526
|
+
* object) reaches a URL template, the lib stringifies it to
|
|
527
|
+
* `"[object Object]"` and the request 404s.
|
|
528
|
+
*/
|
|
529
|
+
function deepUnwrap(value: unknown): unknown {
|
|
530
|
+
if (value === null || value === undefined) return value;
|
|
531
|
+
const raw = readRaw(value);
|
|
532
|
+
if (raw !== value) return deepUnwrap(raw);
|
|
533
|
+
if (typeof value !== "object") return value;
|
|
534
|
+
if (Array.isArray(value)) return value.map(deepUnwrap);
|
|
535
|
+
const out: Record<string, unknown> = {};
|
|
536
|
+
for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
|
|
537
|
+
out[k] = deepUnwrap(v);
|
|
538
|
+
}
|
|
539
|
+
return out;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
/**
|
|
543
|
+
* Wrap a `@kubernetes/client-node` Api instance so each method call
|
|
544
|
+
* runs in its own AsyncLocalStorage slot — `doFetch` writes the HTTP
|
|
545
|
+
* event's `sourceSeq` into the slot, and after the lib parses the
|
|
546
|
+
* response we re-attach the seq to the returned object. Downstream
|
|
547
|
+
* `expect(result.items[0].status…)` assertions then fold under that
|
|
548
|
+
* HTTP event in the test event log, the same way `expect(res.status)`
|
|
549
|
+
* does for plain `fetch` calls.
|
|
550
|
+
*
|
|
551
|
+
* Method arguments are deep-unwrapped on the way in so values pulled
|
|
552
|
+
* from a previous API response (still carrying the inspector wrappers)
|
|
553
|
+
* can be passed straight back into another call.
|
|
554
|
+
*
|
|
555
|
+
* Non-function properties pass through untagged.
|
|
556
|
+
*/
|
|
557
|
+
function withTagging<T extends object>(api: T): Tagged<T> {
|
|
558
|
+
return new Proxy(api, {
|
|
559
|
+
get(target, prop, receiver) {
|
|
560
|
+
const value = Reflect.get(target, prop, receiver);
|
|
561
|
+
if (typeof value !== "function") return value;
|
|
562
|
+
// Bind the original method to `target` so the lib's internal
|
|
563
|
+
// `this.configuration` accesses keep working.
|
|
564
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
565
|
+
const method = (value as any).bind(target);
|
|
566
|
+
return (...args: unknown[]): unknown => {
|
|
567
|
+
const unwrappedArgs = args.map(deepUnwrap);
|
|
568
|
+
const slot: CallSlot = {};
|
|
569
|
+
const result = callContext.run(slot, () =>
|
|
570
|
+
method(...unwrappedArgs),
|
|
571
|
+
);
|
|
572
|
+
// Wrap unconditionally — `slot.seq` is undefined when no event was
|
|
573
|
+
// recorded (setup/eval, no active recorder), but the result's type is
|
|
574
|
+
// wrapped, so the value must be wrapped at runtime too (just without a
|
|
575
|
+
// provenance link). Keeps `.unwrap()` available in every context.
|
|
576
|
+
if (result && typeof (result as Promise<unknown>).then === "function") {
|
|
577
|
+
return (result as Promise<unknown>).then((v) => wrap(v, slot.seq));
|
|
578
|
+
}
|
|
579
|
+
return wrap(result, slot.seq);
|
|
580
|
+
};
|
|
581
|
+
},
|
|
582
|
+
}) as unknown as Tagged<T>;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
/**
|
|
586
|
+
* Image tag for the Traefik we install. Pulled on first cluster boot
|
|
587
|
+
* through the host `zot` mirror (`docker.io` → cache) configured in
|
|
588
|
+
* `registries.yaml`, then captured into the warm-template snapshot so
|
|
589
|
+
* warm starts never pull.
|
|
590
|
+
*/
|
|
591
|
+
const TRAEFIK_IMAGE = "rancher/mirrored-library-traefik:3.3.2";
|
|
592
|
+
|
|
593
|
+
/**
|
|
594
|
+
* Names of the in-cluster resources that carry the CA-signed default
|
|
595
|
+
* ingress certificate (created in `setupK3sCluster` when TLS is enabled).
|
|
596
|
+
* The Secret holds the leaf cert+key; the ConfigMap holds the Traefik
|
|
597
|
+
* file-provider snippet that points the `default` TLS store at it.
|
|
598
|
+
*/
|
|
599
|
+
const TRAEFIK_TLS_SECRET = "traefik-default-tls";
|
|
600
|
+
const TRAEFIK_DYNAMIC_CONFIGMAP = "traefik-dynamic";
|
|
601
|
+
|
|
602
|
+
/**
|
|
603
|
+
* Traefik file-provider dynamic config: make the in-VM-CA leaf the
|
|
604
|
+
* `default` store certificate, so every router on the `websecure`
|
|
605
|
+
* entrypoint (which we force to TLS) serves it with no per-Ingress
|
|
606
|
+
* `spec.tls` needed.
|
|
607
|
+
*/
|
|
608
|
+
const TRAEFIK_DYNAMIC_TLS = `tls:
|
|
609
|
+
stores:
|
|
610
|
+
default:
|
|
611
|
+
defaultCertificate:
|
|
612
|
+
certFile: /certs/tls.crt
|
|
613
|
+
keyFile: /certs/tls.key
|
|
614
|
+
`;
|
|
615
|
+
|
|
616
|
+
/**
|
|
617
|
+
* Traefik manifest applied during setup(). `hostNetwork: true` puts
|
|
618
|
+
* Traefik in the k3s container's netns, so it binds the container's :80
|
|
619
|
+
* (and, with `tls`, :443) directly — no CNI portmap involved (that path
|
|
620
|
+
* still trips on the kernel's missing xt_comment match).
|
|
621
|
+
*
|
|
622
|
+
* When `tls` is set we add a `websecure` :443 entrypoint with TLS forced
|
|
623
|
+
* on (served from the `default` store, i.e. the in-VM-CA leaf mounted
|
|
624
|
+
* from the `traefik-default-tls` Secret via the file provider). HTTPS
|
|
625
|
+
* then works for any routed host under the cluster's `ingressDomains`
|
|
626
|
+
* with zero per-Ingress config; the :80 `web` entrypoint is unchanged.
|
|
627
|
+
*/
|
|
628
|
+
function buildTraefikManifest(tls: boolean): string {
|
|
629
|
+
const args = [
|
|
630
|
+
" - --entrypoints.web.address=:80",
|
|
631
|
+
...(tls
|
|
632
|
+
? [
|
|
633
|
+
" - --entrypoints.websecure.address=:443",
|
|
634
|
+
" - --entrypoints.websecure.http.tls=true",
|
|
635
|
+
]
|
|
636
|
+
: []),
|
|
637
|
+
" - --providers.kubernetesingress=true",
|
|
638
|
+
" - --providers.kubernetesingress.ingressclass=traefik",
|
|
639
|
+
...(tls
|
|
640
|
+
? [
|
|
641
|
+
" - --providers.file.directory=/dynamic",
|
|
642
|
+
" - --providers.file.watch=true",
|
|
643
|
+
]
|
|
644
|
+
: []),
|
|
645
|
+
" - --log.level=INFO",
|
|
646
|
+
].join("\n");
|
|
647
|
+
const ports = [
|
|
648
|
+
" - name: web",
|
|
649
|
+
" containerPort: 80",
|
|
650
|
+
...(tls
|
|
651
|
+
? [" - name: websecure", " containerPort: 443"]
|
|
652
|
+
: []),
|
|
653
|
+
].join("\n");
|
|
654
|
+
const volumeMounts = tls
|
|
655
|
+
? `
|
|
656
|
+
volumeMounts:
|
|
657
|
+
- name: default-cert
|
|
658
|
+
mountPath: /certs
|
|
659
|
+
readOnly: true
|
|
660
|
+
- name: dynamic
|
|
661
|
+
mountPath: /dynamic
|
|
662
|
+
readOnly: true`
|
|
663
|
+
: "";
|
|
664
|
+
const volumes = tls
|
|
665
|
+
? `
|
|
666
|
+
volumes:
|
|
667
|
+
- name: default-cert
|
|
668
|
+
secret:
|
|
669
|
+
secretName: ${TRAEFIK_TLS_SECRET}
|
|
670
|
+
- name: dynamic
|
|
671
|
+
configMap:
|
|
672
|
+
name: ${TRAEFIK_DYNAMIC_CONFIGMAP}`
|
|
673
|
+
: "";
|
|
674
|
+
return `apiVersion: v1
|
|
675
|
+
kind: ServiceAccount
|
|
676
|
+
metadata:
|
|
677
|
+
name: traefik
|
|
678
|
+
namespace: kube-system
|
|
679
|
+
---
|
|
680
|
+
apiVersion: rbac.authorization.k8s.io/v1
|
|
681
|
+
kind: ClusterRole
|
|
682
|
+
metadata:
|
|
683
|
+
name: traefik
|
|
684
|
+
rules:
|
|
685
|
+
- apiGroups: [""]
|
|
686
|
+
resources: ["services", "endpoints", "secrets", "nodes"]
|
|
687
|
+
verbs: ["get", "list", "watch"]
|
|
688
|
+
- apiGroups: ["discovery.k8s.io"]
|
|
689
|
+
resources: ["endpointslices"]
|
|
690
|
+
verbs: ["get", "list", "watch"]
|
|
691
|
+
- apiGroups: ["networking.k8s.io"]
|
|
692
|
+
resources: ["ingresses", "ingressclasses"]
|
|
693
|
+
verbs: ["get", "list", "watch"]
|
|
694
|
+
- apiGroups: ["networking.k8s.io"]
|
|
695
|
+
resources: ["ingresses/status"]
|
|
696
|
+
verbs: ["update"]
|
|
697
|
+
---
|
|
698
|
+
apiVersion: rbac.authorization.k8s.io/v1
|
|
699
|
+
kind: ClusterRoleBinding
|
|
700
|
+
metadata:
|
|
701
|
+
name: traefik
|
|
702
|
+
roleRef:
|
|
703
|
+
apiGroup: rbac.authorization.k8s.io
|
|
704
|
+
kind: ClusterRole
|
|
705
|
+
name: traefik
|
|
706
|
+
subjects:
|
|
707
|
+
- kind: ServiceAccount
|
|
708
|
+
name: traefik
|
|
709
|
+
namespace: kube-system
|
|
710
|
+
---
|
|
711
|
+
apiVersion: networking.k8s.io/v1
|
|
712
|
+
kind: IngressClass
|
|
713
|
+
metadata:
|
|
714
|
+
name: traefik
|
|
715
|
+
annotations:
|
|
716
|
+
ingressclass.kubernetes.io/is-default-class: "true"
|
|
717
|
+
spec:
|
|
718
|
+
controller: traefik.io/ingress-controller
|
|
719
|
+
---
|
|
720
|
+
apiVersion: apps/v1
|
|
721
|
+
kind: Deployment
|
|
722
|
+
metadata:
|
|
723
|
+
name: traefik
|
|
724
|
+
namespace: kube-system
|
|
725
|
+
labels:
|
|
726
|
+
app: traefik
|
|
727
|
+
spec:
|
|
728
|
+
replicas: 1
|
|
729
|
+
selector:
|
|
730
|
+
matchLabels:
|
|
731
|
+
app: traefik
|
|
732
|
+
template:
|
|
733
|
+
metadata:
|
|
734
|
+
labels:
|
|
735
|
+
app: traefik
|
|
736
|
+
spec:
|
|
737
|
+
serviceAccountName: traefik
|
|
738
|
+
hostNetwork: true
|
|
739
|
+
dnsPolicy: Default
|
|
740
|
+
tolerations:
|
|
741
|
+
- operator: Exists
|
|
742
|
+
containers:
|
|
743
|
+
- name: traefik
|
|
744
|
+
image: ${TRAEFIK_IMAGE}
|
|
745
|
+
imagePullPolicy: IfNotPresent
|
|
746
|
+
args:
|
|
747
|
+
${args}
|
|
748
|
+
ports:
|
|
749
|
+
${ports}${volumeMounts}${volumes}
|
|
750
|
+
`;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
/**
|
|
754
|
+
* Host-side `zot` pull-through cache layout (local Firecracker provider
|
|
755
|
+
* only). One zot instance per upstream registry, all bound to the
|
|
756
|
+
* `spectest-br0` gateway `10.42.0.1` on the ports below — **kept in sync
|
|
757
|
+
* with `scripts/install-zot.sh`**. We mirror the cluster's containerd
|
|
758
|
+
* through these so every image pull reuses the shared host cache instead
|
|
759
|
+
* of hitting the public registry, and we list the canonical upstream as
|
|
760
|
+
* a fallback endpoint so a missing/cold mirror only ever slows a pull,
|
|
761
|
+
* never breaks it.
|
|
762
|
+
*/
|
|
763
|
+
const ZOT_MIRRORS: Array<{ registry: string; port: number; upstream: string }> = [
|
|
764
|
+
{ registry: "docker.io", port: 5000, upstream: "https://registry-1.docker.io" },
|
|
765
|
+
{ registry: "ghcr.io", port: 5001, upstream: "https://ghcr.io" },
|
|
766
|
+
{ registry: "quay.io", port: 5002, upstream: "https://quay.io" },
|
|
767
|
+
{ registry: "registry.k8s.io", port: 5003, upstream: "https://registry.k8s.io" },
|
|
768
|
+
{ registry: "public.ecr.aws", port: 5004, upstream: "https://public.ecr.aws" },
|
|
769
|
+
{ registry: "gcr.io", port: 5005, upstream: "https://gcr.io" },
|
|
770
|
+
{ registry: "mcr.microsoft.com", port: 5006, upstream: "https://mcr.microsoft.com" },
|
|
771
|
+
];
|
|
772
|
+
|
|
773
|
+
/**
|
|
774
|
+
* Discover the host-side image cache gateway by reading the same
|
|
775
|
+
* `registry-mirrors` entry the in-VM dockerd already uses (baked into
|
|
776
|
+
* the local provider's golden `/etc/docker/daemon.json`). Returns the
|
|
777
|
+
* gateway host (`"10.42.0.1"`) when present, or `null` when there's no
|
|
778
|
+
* host cache — e.g. on Freestyle, where the cluster then pulls every
|
|
779
|
+
* image direct. Runs inside the daemon (VM) at `index.ts` load time, so
|
|
780
|
+
* the result is stable per host and never poisons the warm-template
|
|
781
|
+
* cache.
|
|
782
|
+
*/
|
|
783
|
+
function detectHostMirrorGateway(): string | null {
|
|
784
|
+
try {
|
|
785
|
+
const cfg = JSON.parse(
|
|
786
|
+
readFileSync("/etc/docker/daemon.json", "utf8"),
|
|
787
|
+
) as { "registry-mirrors"?: string[] };
|
|
788
|
+
const first = cfg["registry-mirrors"]?.[0];
|
|
789
|
+
return first ? new URL(first).hostname || null : null;
|
|
790
|
+
} catch {
|
|
791
|
+
return null;
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
/**
|
|
796
|
+
* Build `/etc/rancher/k3s/registries.yaml`. k3s reads this **once, at
|
|
797
|
+
* startup**, to configure its embedded containerd — which is why it has
|
|
798
|
+
* to be seeded via `files` (a pre-start bind mount) rather than a
|
|
799
|
+
* `setup` hook. Two jobs:
|
|
800
|
+
* 1. Mirror the cluster's image pulls through the host `zot` cache
|
|
801
|
+
* (local provider only; omitted when there's no host cache).
|
|
802
|
+
* 2. Trust the in-cluster registry, addressed as `<key>.internal:5000`
|
|
803
|
+
* (the `{{SPECTEST_SERVICE}}` token is expanded to the cluster's
|
|
804
|
+
* service key when the file is written). Image *references* use
|
|
805
|
+
* that peer-reachable name, but containerd pulls via the loopback
|
|
806
|
+
* endpoint `http://127.0.0.1:5000` — the hostNetwork registry pod
|
|
807
|
+
* shares the node's netns, so this needs no in-container DNS and
|
|
808
|
+
* can't be broken by a clobbered `hostnames`.
|
|
809
|
+
* Returns `null` when there's nothing to configure (no host cache and
|
|
810
|
+
* `registry` disabled), in which case no file is injected.
|
|
811
|
+
*/
|
|
812
|
+
function buildRegistriesYaml(registryEnabled: boolean): string | null {
|
|
813
|
+
const gateway = detectHostMirrorGateway();
|
|
814
|
+
if (!gateway && !registryEnabled) return null;
|
|
815
|
+
|
|
816
|
+
const lines: string[] = ["mirrors:"];
|
|
817
|
+
if (gateway) {
|
|
818
|
+
for (const { registry, port, upstream } of ZOT_MIRRORS) {
|
|
819
|
+
lines.push(
|
|
820
|
+
` "${registry}":`,
|
|
821
|
+
` endpoint:`,
|
|
822
|
+
` - "http://${gateway}:${port}"`,
|
|
823
|
+
` - "${upstream}"`,
|
|
824
|
+
);
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
if (registryEnabled) {
|
|
828
|
+
const host = `{{SPECTEST_SERVICE}}.internal:${K3S_REGISTRY_PORT}`;
|
|
829
|
+
lines.push(
|
|
830
|
+
` "${host}":`,
|
|
831
|
+
` endpoint:`,
|
|
832
|
+
` - "http://127.0.0.1:${K3S_REGISTRY_PORT}"`,
|
|
833
|
+
"configs:",
|
|
834
|
+
// The endpoint is plain HTTP; the config (keyed by endpoint host)
|
|
835
|
+
// makes that explicit and disables any TLS attempt against it.
|
|
836
|
+
` "127.0.0.1:${K3S_REGISTRY_PORT}":`,
|
|
837
|
+
` tls:`,
|
|
838
|
+
` insecure_skip_verify: true`,
|
|
839
|
+
);
|
|
840
|
+
}
|
|
841
|
+
return lines.join("\n") + "\n";
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
/**
|
|
845
|
+
* In-cluster OCI registry (CNCF `distribution`). `hostNetwork: true`
|
|
846
|
+
* binds the cluster container's `:5000` directly — the same trick
|
|
847
|
+
* Traefik uses — so peer services reach it at `<cluster-key>.internal:5000`
|
|
848
|
+
* (the cluster service's own alias) and the node's own containerd reaches
|
|
849
|
+
* it at `127.0.0.1:5000`. Storage is an `emptyDir`, so pushed images live
|
|
850
|
+
* in the cluster and are captured by snapshot / isolated per test fork
|
|
851
|
+
* like all other in-VM state.
|
|
852
|
+
*/
|
|
853
|
+
const REGISTRY_MANIFEST = `apiVersion: apps/v1
|
|
854
|
+
kind: Deployment
|
|
855
|
+
metadata:
|
|
856
|
+
name: spectest-registry
|
|
857
|
+
namespace: kube-system
|
|
858
|
+
labels:
|
|
859
|
+
app: spectest-registry
|
|
860
|
+
spec:
|
|
861
|
+
replicas: 1
|
|
862
|
+
selector:
|
|
863
|
+
matchLabels:
|
|
864
|
+
app: spectest-registry
|
|
865
|
+
template:
|
|
866
|
+
metadata:
|
|
867
|
+
labels:
|
|
868
|
+
app: spectest-registry
|
|
869
|
+
spec:
|
|
870
|
+
hostNetwork: true
|
|
871
|
+
dnsPolicy: Default
|
|
872
|
+
tolerations:
|
|
873
|
+
- operator: Exists
|
|
874
|
+
containers:
|
|
875
|
+
- name: registry
|
|
876
|
+
image: registry:2
|
|
877
|
+
imagePullPolicy: IfNotPresent
|
|
878
|
+
env:
|
|
879
|
+
- name: REGISTRY_HTTP_ADDR
|
|
880
|
+
value: ":${K3S_REGISTRY_PORT}"
|
|
881
|
+
- name: REGISTRY_STORAGE_DELETE_ENABLED
|
|
882
|
+
value: "true"
|
|
883
|
+
ports:
|
|
884
|
+
- name: registry
|
|
885
|
+
containerPort: ${K3S_REGISTRY_PORT}
|
|
886
|
+
volumeMounts:
|
|
887
|
+
- name: data
|
|
888
|
+
mountPath: /var/lib/registry
|
|
889
|
+
volumes:
|
|
890
|
+
- name: data
|
|
891
|
+
emptyDir: {}
|
|
892
|
+
`;
|
|
893
|
+
|
|
894
|
+
/**
|
|
895
|
+
* Wait for a Deployment to reach its desired ready-replica count,
|
|
896
|
+
* polling once a second up to `timeoutMs`. Throws with the last-seen
|
|
897
|
+
* status (plus kube-system pod diagnostics) on timeout.
|
|
898
|
+
*/
|
|
899
|
+
async function waitForDeployment(
|
|
900
|
+
clusterName: string,
|
|
901
|
+
helpers: K3sHelpers,
|
|
902
|
+
deployment: string,
|
|
903
|
+
timeoutMs: number,
|
|
904
|
+
): Promise<void> {
|
|
905
|
+
const deadline = Date.now() + timeoutMs;
|
|
906
|
+
let lastErr: string | undefined;
|
|
907
|
+
while (Date.now() < deadline) {
|
|
908
|
+
try {
|
|
909
|
+
// `.unwrap()` recovers the plain object — the client wraps its result in
|
|
910
|
+
// every context now (provenance-free here, since this internal poll runs
|
|
911
|
+
// during setup with no active recorder). We're reading for control flow,
|
|
912
|
+
// not asserting, so go straight to raw.
|
|
913
|
+
const dep = (
|
|
914
|
+
await helpers.client.apps.readNamespacedDeployment({
|
|
915
|
+
name: deployment,
|
|
916
|
+
namespace: "kube-system",
|
|
917
|
+
})
|
|
918
|
+
).unwrap();
|
|
919
|
+
const ready = dep.status?.readyReplicas ?? 0;
|
|
920
|
+
const want = dep.spec?.replicas ?? 1;
|
|
921
|
+
if (ready >= want && want > 0) return;
|
|
922
|
+
lastErr = `${deployment} Deployment exists but only ${ready}/${want} replicas Ready`;
|
|
923
|
+
} catch (err) {
|
|
924
|
+
const msg = (err as Error)?.message ?? String(err);
|
|
925
|
+
lastErr = /not found|404/i.test(msg)
|
|
926
|
+
? `${deployment} Deployment does not exist yet`
|
|
927
|
+
: msg;
|
|
928
|
+
}
|
|
929
|
+
// 250ms: the two sequential rollout waits in setup sit on the cold
|
|
930
|
+
// start's critical path, and a 1s poll wasted up to ~2s of it.
|
|
931
|
+
await new Promise((r) => setTimeout(r, 250));
|
|
932
|
+
}
|
|
933
|
+
const diag = await collectTraefikDiagnostics(helpers);
|
|
934
|
+
throw new Error(
|
|
935
|
+
`k3s(${clusterName}): ${deployment} did not reach Ready within ${
|
|
936
|
+
timeoutMs / 1000
|
|
937
|
+
}s. ${lastErr ?? ""}\n${diag}`,
|
|
938
|
+
);
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
/**
|
|
942
|
+
* Post-Ready setup. Apply the Traefik manifest (hostNetwork) and, when
|
|
943
|
+
* enabled, the in-cluster registry; wait for each Deployment to come
|
|
944
|
+
* Ready. Captured by the warm-template snapshot, so warm starts pay none
|
|
945
|
+
* of this cost.
|
|
946
|
+
*
|
|
947
|
+
* When the cluster declares `ingressDomains` (and the in-VM CA is
|
|
948
|
+
* present), TLS is enabled: we mint a CA-signed leaf covering `*.<domain>`
|
|
949
|
+
* for each domain, stash it in the `traefik-default-tls` Secret + a
|
|
950
|
+
* file-provider ConfigMap, and bring Traefik up with a `websecure` :443
|
|
951
|
+
* entrypoint serving it as the default cert. Those domains are then
|
|
952
|
+
* reachable over HTTPS with a cert the test framework already trusts.
|
|
953
|
+
*/
|
|
954
|
+
async function setupK3sCluster(
|
|
955
|
+
name: string,
|
|
956
|
+
helpers: K3sHelpers,
|
|
957
|
+
opts: { registry: boolean; ingressDomains: string[] },
|
|
958
|
+
): Promise<void> {
|
|
959
|
+
const tlsEnabled = opts.ingressDomains.length > 0 && caPresent();
|
|
960
|
+
if (tlsEnabled) {
|
|
961
|
+
const { cert, key } = await issueIngressCert(
|
|
962
|
+
opts.ingressDomains.map((d) => `*.${d}`),
|
|
963
|
+
);
|
|
964
|
+
// Apply the cert Secret + dynamic-config ConfigMap before the
|
|
965
|
+
// Deployment that mounts them. `stringData` lets us hand over plain
|
|
966
|
+
// PEM; the API server base64-encodes it.
|
|
967
|
+
await helpers.client.core.createNamespacedSecret({
|
|
968
|
+
namespace: "kube-system",
|
|
969
|
+
body: {
|
|
970
|
+
metadata: { name: TRAEFIK_TLS_SECRET, namespace: "kube-system" },
|
|
971
|
+
type: "kubernetes.io/tls",
|
|
972
|
+
stringData: { "tls.crt": cert, "tls.key": key },
|
|
973
|
+
},
|
|
974
|
+
});
|
|
975
|
+
await helpers.client.core.createNamespacedConfigMap({
|
|
976
|
+
namespace: "kube-system",
|
|
977
|
+
body: {
|
|
978
|
+
metadata: {
|
|
979
|
+
name: TRAEFIK_DYNAMIC_CONFIGMAP,
|
|
980
|
+
namespace: "kube-system",
|
|
981
|
+
},
|
|
982
|
+
data: { "tls.yaml": TRAEFIK_DYNAMIC_TLS },
|
|
983
|
+
},
|
|
984
|
+
});
|
|
985
|
+
}
|
|
986
|
+
await helpers.apply(buildTraefikManifest(tlsEnabled));
|
|
987
|
+
if (opts.registry) await helpers.apply(REGISTRY_MANIFEST);
|
|
988
|
+
// Both rollouts proceed independently inside the cluster — wait on them
|
|
989
|
+
// concurrently (they used to serialize, wasting up to a rollout's tail).
|
|
990
|
+
const waits = [waitForDeployment(name, helpers, "traefik", 120_000)];
|
|
991
|
+
if (opts.registry) {
|
|
992
|
+
waits.push(waitForDeployment(name, helpers, "spectest-registry", 120_000));
|
|
993
|
+
}
|
|
994
|
+
await Promise.all(waits);
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
/**
|
|
998
|
+
* Snapshot of kube-system state, dumped on traefik-wait timeout. With
|
|
999
|
+
* the static install, the failure surface is just "did our Deployment
|
|
1000
|
+
* schedule and become Ready?" — pod listing covers that.
|
|
1001
|
+
*/
|
|
1002
|
+
async function collectTraefikDiagnostics(helpers: K3sHelpers): Promise<string> {
|
|
1003
|
+
const lines: string[] = [];
|
|
1004
|
+
try {
|
|
1005
|
+
const pods = await helpers.client.core.listNamespacedPod({
|
|
1006
|
+
namespace: "kube-system",
|
|
1007
|
+
});
|
|
1008
|
+
lines.push(`kube-system pods (${pods.items.length}):`);
|
|
1009
|
+
for (const p of pods.items) {
|
|
1010
|
+
const phase = p.status?.phase ?? "?";
|
|
1011
|
+
const cs = p.status?.containerStatuses ?? [];
|
|
1012
|
+
const reasons = cs
|
|
1013
|
+
.map((c) => c.state?.waiting?.reason ?? c.state?.terminated?.reason ?? "")
|
|
1014
|
+
.filter((s) => s)
|
|
1015
|
+
.join(",");
|
|
1016
|
+
lines.push(
|
|
1017
|
+
` ${p.metadata?.name ?? "?"}: phase=${phase}${reasons ? ` reasons=${reasons}` : ""}`,
|
|
1018
|
+
);
|
|
1019
|
+
// The waiting `message` carries containerd's actual error — e.g. the
|
|
1020
|
+
// failing endpoint, an upstream `429 Too Many Requests`, or a
|
|
1021
|
+
// `connection refused`. The `reason` alone (`ErrImagePull`) hides all
|
|
1022
|
+
// of that, which is exactly what we need when a pull won't settle.
|
|
1023
|
+
for (const c of cs) {
|
|
1024
|
+
const msg =
|
|
1025
|
+
c.state?.waiting?.message ?? c.state?.terminated?.message ?? "";
|
|
1026
|
+
if (msg) lines.push(` ${c.name}: ${msg.replace(/\s+/g, " ").trim()}`);
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
} catch (err) {
|
|
1030
|
+
lines.push(`(listing pods failed: ${(err as Error)?.message ?? String(err)})`);
|
|
1031
|
+
}
|
|
1032
|
+
// Recent Warning events surface pull failures the kubelet emits before a
|
|
1033
|
+
// container status even settles (FailedPull / Failed / BackOff), with the
|
|
1034
|
+
// raw containerd message attached. Best-effort: never let diagnostics throw.
|
|
1035
|
+
try {
|
|
1036
|
+
const events = await helpers.client.core.listNamespacedEvent({
|
|
1037
|
+
namespace: "kube-system",
|
|
1038
|
+
});
|
|
1039
|
+
const warnings = (events.items ?? [])
|
|
1040
|
+
.filter((e) => e.type === "Warning")
|
|
1041
|
+
.map((e) => ({
|
|
1042
|
+
obj: e.involvedObject?.name ?? "?",
|
|
1043
|
+
reason: e.reason ?? "?",
|
|
1044
|
+
message: (e.message ?? "").replace(/\s+/g, " ").trim(),
|
|
1045
|
+
}))
|
|
1046
|
+
.filter((e) => e.message);
|
|
1047
|
+
if (warnings.length) {
|
|
1048
|
+
lines.push(`kube-system Warning events (${warnings.length}):`);
|
|
1049
|
+
// Keep the tail — newest events are appended last by the API.
|
|
1050
|
+
for (const w of warnings.slice(-12)) {
|
|
1051
|
+
lines.push(` ${w.obj} [${w.reason}] ${w.message}`);
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
} catch (err) {
|
|
1055
|
+
lines.push(`(listing events failed: ${(err as Error)?.message ?? String(err)})`);
|
|
1056
|
+
}
|
|
1057
|
+
return lines.join("\n");
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
/**
|
|
1061
|
+
* A ready-to-use single-node Kubernetes cluster (k3s). Drop into
|
|
1062
|
+
* `environment.services`:
|
|
1063
|
+
*
|
|
1064
|
+
* ```ts
|
|
1065
|
+
* services: { k8s: k3s() }
|
|
1066
|
+
* ```
|
|
1067
|
+
*
|
|
1068
|
+
* Tests get `@kubernetes/client-node` API objects pre-wired to this
|
|
1069
|
+
* cluster at `ctx.svc.<key>.client` — `core`, `apps`, and a generic
|
|
1070
|
+
* `objects` (`KubernetesObjectApi`). Every API call is recorded on the
|
|
1071
|
+
* test event log alongside `fetch` calls. There's also `apply(yaml)`
|
|
1072
|
+
* sugar for piping a multi-document manifest in.
|
|
1073
|
+
*
|
|
1074
|
+
* **Ingress.** We deploy Traefik ourselves in `hostNetwork` mode
|
|
1075
|
+
* during `setup()`. Traefik binds the cluster container's :80 directly
|
|
1076
|
+
* (no ServiceLB / klipper-lb needed), watches Ingress objects via the
|
|
1077
|
+
* API, and routes incoming requests to pod Endpoints. Any `hostnames`
|
|
1078
|
+
* declared on this service in env.ts therefore route through Traefik:
|
|
1079
|
+
* a peer doing `fetch("http://app.example.com")` resolves the host to
|
|
1080
|
+
* the k3s container's IP (via systest-resolver), lands on Traefik,
|
|
1081
|
+
* and gets dispatched to the matching Ingress rule's backend pods.
|
|
1082
|
+
*
|
|
1083
|
+
* **Workarounds for Freestyle's kernel** (Linux 6.1.0-x-freestyle).
|
|
1084
|
+
* The stock kernel is missing the `xt_comment` netfilter match
|
|
1085
|
+
* extension. Two consequences, each handled below:
|
|
1086
|
+
*
|
|
1087
|
+
* 1. *kube-proxy* in default iptables mode generates rules with
|
|
1088
|
+
* `-m comment --comment "..."`, which the kernel rejects —
|
|
1089
|
+
* breaking pod→ClusterIP routing and every pod that talks to the
|
|
1090
|
+
* in-cluster API (helm-install Jobs, CoreDNS, …). Fixed by
|
|
1091
|
+
* `--kube-proxy-arg=proxy-mode=nftables`: kube-proxy emits
|
|
1092
|
+
* native nftables rules where comments are a first-class
|
|
1093
|
+
* construct, no xt_comment dependency. nftables proxy mode is
|
|
1094
|
+
* GA in k8s 1.32, which is why we pin that.
|
|
1095
|
+
*
|
|
1096
|
+
* 2. *CNI portmap plugin* (used by klipper-lb's hostPort to expose
|
|
1097
|
+
* LoadBalancer ports on the host) still uses iptables-nft and
|
|
1098
|
+
* hits the same xt_comment failure — there's no equivalent
|
|
1099
|
+
* flag to switch it to native nftables. Workaround: disable the
|
|
1100
|
+
* bundled traefik + ServiceLB and run Traefik with
|
|
1101
|
+
* `hostNetwork: true` ourselves. hostNetwork pods don't go
|
|
1102
|
+
* through portmap at all (they share the node's netns directly),
|
|
1103
|
+
* so the broken plugin is never invoked.
|
|
1104
|
+
*
|
|
1105
|
+
* Flannel uses the `host-gw` backend because Freestyle's stock kernel
|
|
1106
|
+
* lacks the VXLAN module — fine for single-node clusters.
|
|
1107
|
+
*/
|
|
1108
|
+
/**
|
|
1109
|
+
* Default k3s docker image tag.
|
|
1110
|
+
*
|
|
1111
|
+
* The cluster's system images (the `rancher/k3s` image itself, plus
|
|
1112
|
+
* coredns / local-path-provisioner / pause and the Traefik we deploy)
|
|
1113
|
+
* are pulled on first boot through the host `zot` pull-through cache —
|
|
1114
|
+
* `registries.yaml` (seeded via `files`) mirrors `docker.io` and
|
|
1115
|
+
* `registry.k8s.io` at it. The first-ever cluster boot on a cold-cache
|
|
1116
|
+
* host pays the upstream pull once; thereafter zot serves the blobs
|
|
1117
|
+
* host-wide and the warm-template snapshot captures the booted cluster,
|
|
1118
|
+
* so neither cold-cache nor warm starts re-pull. Any `opts.version`
|
|
1119
|
+
* works — there's no base-snapshot release to keep in sync with.
|
|
1120
|
+
*
|
|
1121
|
+
* **Why v1.32.x:** kube-proxy's `nftables` proxy mode is GA in k8s 1.32
|
|
1122
|
+
* (beta in 1.31, alpha-gated in 1.30). The component runs kube-proxy in
|
|
1123
|
+
* this mode to sidestep Freestyle's missing `xt_comment` netfilter
|
|
1124
|
+
* extension; dropping below 1.31 reintroduces the broken iptables path.
|
|
1125
|
+
*/
|
|
1126
|
+
const DEFAULT_K3S_VERSION = "v1.32.1-k3s1";
|
|
1127
|
+
|
|
1128
|
+
export function k3s(opts: K3sOptions = {}) {
|
|
1129
|
+
const version = opts.version ?? DEFAULT_K3S_VERSION;
|
|
1130
|
+
const extra = opts.extraArgs ?? [];
|
|
1131
|
+
const registryEnabled = opts.registry !== false;
|
|
1132
|
+
// Wildcard ingress domains. Drives both the `provides(... dnsName)`
|
|
1133
|
+
// wiring below and (when non-empty) the CA-signed TLS default cert that
|
|
1134
|
+
// setupK3sCluster mints so these domains are reachable over HTTPS.
|
|
1135
|
+
const ingressDomains = opts.ingressDomains ?? [];
|
|
1136
|
+
// `/etc/rancher/k3s/registries.yaml` (host-cache mirrors + trust for
|
|
1137
|
+
// the in-cluster registry). Seeded via `files` because k3s reads it
|
|
1138
|
+
// only at startup, before any setup hook could run.
|
|
1139
|
+
const registriesYaml = buildRegistriesYaml(registryEnabled);
|
|
1140
|
+
const serverArgs = [
|
|
1141
|
+
"k3s",
|
|
1142
|
+
"server",
|
|
1143
|
+
// CoreDNS / pod DNS upstream. Without this, k3s sees only the
|
|
1144
|
+
// loopback 127.0.0.11 (Docker's embedded DNS) in the container's
|
|
1145
|
+
// /etc/resolv.conf, decides no usable nameserver exists, and writes a
|
|
1146
|
+
// fallback `nameserver 8.8.8.8` that CoreDNS then forwards to — so
|
|
1147
|
+
// pods reach the public internet but NOT peer services on
|
|
1148
|
+
// spectest-net (`<svc>.internal`, fakes, service-TLS hosts all
|
|
1149
|
+
// NXDOMAIN). We instead point k3s at the container's default gateway
|
|
1150
|
+
// — the spectest-net bridge gateway, where spectest-resolver binds a
|
|
1151
|
+
// second listener for exactly this. The file is written by the
|
|
1152
|
+
// command wrapper below because the gateway IP is only known at
|
|
1153
|
+
// container start.
|
|
1154
|
+
"--resolv-conf=/run/spectest-resolv.conf",
|
|
1155
|
+
// metrics-server isn't useful in a test cluster.
|
|
1156
|
+
"--disable=metrics-server",
|
|
1157
|
+
// traefik + servicelb disabled: their klipper-lb DaemonSet uses
|
|
1158
|
+
// CNI portmap to bind host port 80, which still needs xt_comment
|
|
1159
|
+
// (the iptables compat path that the kernel can't satisfy).
|
|
1160
|
+
// We install Traefik with hostNetwork in setup() — same effect,
|
|
1161
|
+
// no portmap involved. local-storage stays enabled: it's a
|
|
1162
|
+
// controller pod that doesn't bind host ports.
|
|
1163
|
+
"--disable=traefik",
|
|
1164
|
+
"--disable=servicelb",
|
|
1165
|
+
// Pod CIDR MUST avoid 10.42.0.0/16: that's the spectest-br0 host
|
|
1166
|
+
// bridge subnet, whose gateway 10.42.0.1 fronts the host image caches
|
|
1167
|
+
// (zot :5000-5007, buildkitd :1234). k3s's *default* pod CIDR is also
|
|
1168
|
+
// 10.42.0.0/16 — with --flannel-backend=host-gw, flannel programs that
|
|
1169
|
+
// route into the node's own routing table and gives cni0 the subnet's
|
|
1170
|
+
// .1 (10.42.0.1). That shadows the route to the host gateway, so once
|
|
1171
|
+
// CNI comes up the node can no longer reach 10.42.0.1 and every
|
|
1172
|
+
// subsequent registry pull dies with "connect: connection refused"
|
|
1173
|
+
// (e.g. the in-cluster registry's `registry:2`, applied after the
|
|
1174
|
+
// cluster is up — the airgap-bundled system images pull *before* CNI
|
|
1175
|
+
// and so sneak through). Move pods to 10.44/service to 10.45.
|
|
1176
|
+
"--cluster-cidr=10.44.0.0/16",
|
|
1177
|
+
"--service-cidr=10.45.0.0/16",
|
|
1178
|
+
"--cluster-dns=10.45.0.10",
|
|
1179
|
+
"--flannel-backend=host-gw",
|
|
1180
|
+
"--write-kubeconfig-mode=644",
|
|
1181
|
+
// kube-proxy in nftables mode: native nftables rules, no
|
|
1182
|
+
// xt_comment dependency. Pod→ClusterIP routing works, so
|
|
1183
|
+
// CoreDNS / helm-install / anything-talking-to-the-API works.
|
|
1184
|
+
// GA in k8s 1.32.
|
|
1185
|
+
"--kube-proxy-arg=proxy-mode=nftables",
|
|
1186
|
+
...extra,
|
|
1187
|
+
].join(" ");
|
|
1188
|
+
// The service `command` runs under `/bin/sh -c` (see runContainer in
|
|
1189
|
+
// daemon.ts), so derive the bridge gateway from the container's default
|
|
1190
|
+
// route at start time, write it as the k3s resolv-conf, then exec k3s
|
|
1191
|
+
// (exec so it stays the container's main process and signals / the
|
|
1192
|
+
// readyCheck behave exactly as before). `/run` is a tmpfs on this
|
|
1193
|
+
// service, so the file is writable and never persisted into a snapshot.
|
|
1194
|
+
const cmd =
|
|
1195
|
+
"GW=\"$(ip route 2>/dev/null | awk '/^default/{print $3; exit}')\"; " +
|
|
1196
|
+
'if [ -n "$GW" ]; then ' +
|
|
1197
|
+
"printf 'nameserver %s\\noptions ndots:0\\n' \"$GW\" > /run/spectest-resolv.conf; " +
|
|
1198
|
+
"else echo 'spectest: no default gateway found; k3s pod DNS for peer services will not resolve' >&2; " +
|
|
1199
|
+
": > /run/spectest-resolv.conf; fi; " +
|
|
1200
|
+
`exec ${serverArgs}`;
|
|
1201
|
+
// Plain /readyz probe. On a warm zot cache the cluster's images are
|
|
1202
|
+
// already local, so the first boot completes in seconds; the
|
|
1203
|
+
// first-ever boot on a cold-cache host pulls through the mirror and
|
|
1204
|
+
// can take a couple of minutes (covered by readyTimeoutSecs).
|
|
1205
|
+
const readyCmd = "kubectl get --raw=/readyz >/dev/null 2>&1";
|
|
1206
|
+
const def = {
|
|
1207
|
+
image: { type: "registry", reference: `rancher/k3s:${version}` },
|
|
1208
|
+
command: cmd,
|
|
1209
|
+
privileged: true,
|
|
1210
|
+
tmpfs: ["/run", "/var/run"],
|
|
1211
|
+
cgroupns: "host",
|
|
1212
|
+
// 80/443 are advisory — peer services and host code reach them via
|
|
1213
|
+
// the k3s container's IP. ServiceLB (klipper-lb) binds them inside
|
|
1214
|
+
// the container's netns and forwards to the traefik pod. 5000 is the
|
|
1215
|
+
// in-cluster registry (hostNetwork pod bound to the container netns),
|
|
1216
|
+
// reached by peers at the cluster's own `<key>.internal:5000` alias.
|
|
1217
|
+
ports: registryEnabled ? [80, 443, 6443, K3S_REGISTRY_PORT] : [80, 443, 6443],
|
|
1218
|
+
// NOTE: do NOT mount /var/lib/rancher/k3s/agent/containerd as a cache
|
|
1219
|
+
// volume. It was tried (to spare a recreated cluster re-pulling its
|
|
1220
|
+
// system images on delta restores) and a fresh k3s server against the
|
|
1221
|
+
// previous container's containerd store — killed un-cleanly by the
|
|
1222
|
+
// teardown's `docker rm -f` — wedged the apiserver minutes in
|
|
1223
|
+
// (rollouts never settled, pod listing started failing). The zot
|
|
1224
|
+
// mirror already makes those re-pulls cheap; the residual win wasn't
|
|
1225
|
+
// worth the recovery semantics of a crash-state store under a fresh
|
|
1226
|
+
// cluster db.
|
|
1227
|
+
...(registriesYaml
|
|
1228
|
+
? {
|
|
1229
|
+
files: [
|
|
1230
|
+
{ path: "/etc/rancher/k3s/registries.yaml", content: registriesYaml },
|
|
1231
|
+
],
|
|
1232
|
+
}
|
|
1233
|
+
: {}),
|
|
1234
|
+
readyCheck: {
|
|
1235
|
+
type: "exec" as const,
|
|
1236
|
+
command: readyCmd,
|
|
1237
|
+
timeoutSecs: opts.readyTimeoutSecs ?? 120,
|
|
1238
|
+
},
|
|
1239
|
+
setup: async ({ name, helpers }: { name: string; helpers: K3sHelpers }) => {
|
|
1240
|
+
await setupK3sCluster(name, helpers, {
|
|
1241
|
+
registry: registryEnabled,
|
|
1242
|
+
ingressDomains,
|
|
1243
|
+
});
|
|
1244
|
+
},
|
|
1245
|
+
helpers: async ({ name }: { name: string }): Promise<K3sHelpers> => {
|
|
1246
|
+
// Read the cluster's kubeconfig and address the API server by its
|
|
1247
|
+
// auto-assigned `<name>.internal` hostname on spectest-net. TLS
|
|
1248
|
+
// verification is off (see the K3sHelpers docstring), so the
|
|
1249
|
+
// server's cert SAN list doesn't need to include the .internal
|
|
1250
|
+
// name.
|
|
1251
|
+
const kcRead = await runDocker([
|
|
1252
|
+
"exec",
|
|
1253
|
+
name,
|
|
1254
|
+
"cat",
|
|
1255
|
+
"/etc/rancher/k3s/k3s.yaml",
|
|
1256
|
+
]);
|
|
1257
|
+
if (kcRead.code !== 0) {
|
|
1258
|
+
throw new Error(
|
|
1259
|
+
`k3s(${name}): failed to read kubeconfig from container: ${kcRead.stderr.trim()}`,
|
|
1260
|
+
);
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
const kubeconfig = new KubeConfig();
|
|
1264
|
+
kubeconfig.loadFromString(kcRead.stdout);
|
|
1265
|
+
|
|
1266
|
+
const server = `https://${name}.internal:6443`;
|
|
1267
|
+
// Update kc.clusters so any code that reads kubeconfig sees the
|
|
1268
|
+
// right server URL, but the actual request server comes from the
|
|
1269
|
+
// Configuration we build below. `Cluster.server` is typed `readonly`
|
|
1270
|
+
// by @kubernetes/client-node, but the loaded object is a plain mutable
|
|
1271
|
+
// record — write through a mutable view rather than rebuild the config.
|
|
1272
|
+
for (const cluster of kubeconfig.clusters) {
|
|
1273
|
+
(cluster as { -readonly [K in keyof typeof cluster]: typeof cluster[K] }).server =
|
|
1274
|
+
server;
|
|
1275
|
+
}
|
|
1276
|
+
|
|
1277
|
+
const httpApi = new FetchHttpLibrary();
|
|
1278
|
+
const baseServer = new ServerConfiguration(server, {});
|
|
1279
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1280
|
+
const config = createConfiguration({
|
|
1281
|
+
baseServer,
|
|
1282
|
+
authMethods: { default: kubeconfig as any },
|
|
1283
|
+
httpApi: httpApi as any,
|
|
1284
|
+
});
|
|
1285
|
+
|
|
1286
|
+
const core = withTagging(new CoreV1Api(config));
|
|
1287
|
+
const apps = withTagging(new AppsV1Api(config));
|
|
1288
|
+
const objects = withTagging(new KubernetesObjectApi(config));
|
|
1289
|
+
|
|
1290
|
+
const apply = async (
|
|
1291
|
+
manifest: string,
|
|
1292
|
+
): Promise<Wrapped<KubernetesObject>[]> => {
|
|
1293
|
+
const docs = loadAllYaml(manifest) as KubernetesObject[];
|
|
1294
|
+
const out: Wrapped<KubernetesObject>[] = [];
|
|
1295
|
+
for (const doc of docs) {
|
|
1296
|
+
if (!doc || typeof doc !== "object" || !("kind" in doc)) continue;
|
|
1297
|
+
// `objects.create` is wrapped by `withTagging`, so each returned
|
|
1298
|
+
// object already carries the back-reference to its create call.
|
|
1299
|
+
const created = await objects.create(doc);
|
|
1300
|
+
out.push(created as unknown as Wrapped<KubernetesObject>);
|
|
1301
|
+
}
|
|
1302
|
+
return out;
|
|
1303
|
+
};
|
|
1304
|
+
|
|
1305
|
+
return {
|
|
1306
|
+
kubeconfig,
|
|
1307
|
+
client: { core, apps, objects },
|
|
1308
|
+
apply,
|
|
1309
|
+
};
|
|
1310
|
+
},
|
|
1311
|
+
} satisfies ServiceDefinition<K3sHelpers>;
|
|
1312
|
+
|
|
1313
|
+
// Wildcard ingress domains → a dnsName(`*.<domain>`, { service: self })
|
|
1314
|
+
// each, attached via provides(). SELF_SERVICE_TOKEN resolves to this
|
|
1315
|
+
// service's key at load time (the component can't know it here). The
|
|
1316
|
+
// resolver then points every host under the domain at the cluster.
|
|
1317
|
+
if (ingressDomains.length === 0) return def;
|
|
1318
|
+
return provides(
|
|
1319
|
+
def,
|
|
1320
|
+
ingressDomains.map((domain) =>
|
|
1321
|
+
dnsName(`*.${domain}`, { service: SELF_SERVICE_TOKEN }),
|
|
1322
|
+
),
|
|
1323
|
+
);
|
|
1324
|
+
}
|