@checkstack/script-packages-backend 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/CHANGELOG.md +273 -0
  2. package/drizzle/0000_flashy_squadron_supreme.sql +63 -0
  3. package/drizzle/0001_flawless_drax.sql +15 -0
  4. package/drizzle/meta/0000_snapshot.json +395 -0
  5. package/drizzle/meta/0001_snapshot.json +491 -0
  6. package/drizzle/meta/_journal.json +20 -0
  7. package/drizzle.config.ts +7 -0
  8. package/package.json +32 -0
  9. package/src/atomic-symlink.test.ts +47 -0
  10. package/src/atomic-symlink.ts +66 -0
  11. package/src/blob-gc-runner.test.ts +120 -0
  12. package/src/blob-gc-runner.ts +139 -0
  13. package/src/blob-gc.test.ts +182 -0
  14. package/src/blob-gc.ts +161 -0
  15. package/src/blob-hash.test.ts +70 -0
  16. package/src/blob-hash.ts +56 -0
  17. package/src/blob-store-registry.test.ts +78 -0
  18. package/src/blob-store-registry.ts +75 -0
  19. package/src/blob-store.ts +51 -0
  20. package/src/cache-archive.test.ts +164 -0
  21. package/src/cache-archive.ts +192 -0
  22. package/src/cache-layout.ts +64 -0
  23. package/src/data-dir.test.ts +41 -0
  24. package/src/data-dir.ts +42 -0
  25. package/src/e2e-install-reconcile.test.ts +121 -0
  26. package/src/hooks.ts +20 -0
  27. package/src/index.ts +594 -0
  28. package/src/install-controller.test.ts +257 -0
  29. package/src/install-controller.ts +144 -0
  30. package/src/install-service.test.ts +104 -0
  31. package/src/install-service.ts +116 -0
  32. package/src/install-state-store.ts +131 -0
  33. package/src/lockfile.test.ts +60 -0
  34. package/src/lockfile.ts +0 -0
  35. package/src/npmrc.test.ts +48 -0
  36. package/src/npmrc.ts +42 -0
  37. package/src/package-types.test.ts +293 -0
  38. package/src/package-types.ts +408 -0
  39. package/src/parse-bun-lock.test.ts +62 -0
  40. package/src/parse-bun-lock.ts +59 -0
  41. package/src/reconcile-diff.test.ts +41 -0
  42. package/src/reconcile-diff.ts +26 -0
  43. package/src/reconcile-fs.ts +199 -0
  44. package/src/reconciler.test.ts +289 -0
  45. package/src/reconciler.ts +81 -0
  46. package/src/registry-client.test.ts +314 -0
  47. package/src/registry-client.ts +0 -0
  48. package/src/registry-request-config.ts +63 -0
  49. package/src/registry-token.test.ts +124 -0
  50. package/src/registry-token.ts +104 -0
  51. package/src/resolution-root.test.ts +82 -0
  52. package/src/resolution-root.ts +127 -0
  53. package/src/resolver.test.ts +133 -0
  54. package/src/resolver.ts +132 -0
  55. package/src/router.ts +273 -0
  56. package/src/schema.ts +166 -0
  57. package/src/size-cap.test.ts +32 -0
  58. package/src/size-cap.ts +40 -0
  59. package/src/storage-migration.test.ts +318 -0
  60. package/src/storage-migration.ts +213 -0
  61. package/src/stores.ts +533 -0
  62. package/src/tree-gc.test.ts +184 -0
  63. package/src/tree-gc.ts +160 -0
  64. package/src/tree-retirement.ts +81 -0
  65. package/src/type-acquisition-route.ts +178 -0
  66. package/tsconfig.json +23 -0
@@ -0,0 +1,56 @@
1
+ import { createHash } from "node:crypto";
2
+ import type { ManifestEntry } from "@checkstack/script-packages-common";
3
+
4
+ /**
5
+ * Content hashing + verification for distributed blobs.
6
+ *
7
+ * A package's distributable blob is our gzip-tar of its Bun cache entry, NOT
8
+ * the upstream npm tarball — so the SRI `integrity` key (which hashes the
9
+ * npm tarball) does NOT cover the transported bytes. To detect corruption or
10
+ * tampering in transit (shared blob store on core, HTTP/WS on satellites) we
11
+ * additionally carry `blobSha256` (sha-256 of the blob) on each manifest
12
+ * entry and verify it before extracting.
13
+ */
14
+
15
+ /** Bytes as they arrive at the hash boundary from any blob source. */
16
+ export type BlobBytes = Uint8Array | ArrayBuffer;
17
+
18
+ /**
19
+ * Normalize blob bytes to a `Uint8Array` at the consume boundary.
20
+ *
21
+ * Blob sources differ in what view they hand back: the central resolver and
22
+ * the Postgres codec yield a `Uint8Array`, but `Bun.S3File.arrayBuffer()` (and
23
+ * any `Response.arrayBuffer()` transport) yields a raw `ArrayBuffer`. Node's
24
+ * `crypto.Hash.update()` rejects a bare `ArrayBuffer`
25
+ * ("Received an instance of ArrayBuffer"), so we wrap it in a `Uint8Array`
26
+ * view here. A `Uint8Array` view over the same bytes hashes identically to the
27
+ * underlying buffer, so this never changes a computed content hash.
28
+ */
29
+ export function toUint8Array(bytes: BlobBytes): Uint8Array {
30
+ return bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes);
31
+ }
32
+
33
+ /** SHA-256 (hex) of a blob's bytes. */
34
+ export function blobSha256(bytes: BlobBytes): string {
35
+ return createHash("sha256").update(toUint8Array(bytes)).digest("hex");
36
+ }
37
+
38
+ /**
39
+ * Verify `bytes` against a manifest entry's `blobSha256`. Returns `ok: true`
40
+ * when the hash matches OR the entry predates the field (backward-safe: no
41
+ * recorded hash means nothing to verify against). Returns `ok: false` with
42
+ * the expected/actual hashes on a mismatch so callers can error clearly and
43
+ * refuse to materialize the blob.
44
+ */
45
+ export function verifyBlobSha256({
46
+ entry,
47
+ bytes,
48
+ }: {
49
+ entry: Pick<ManifestEntry, "blobSha256">;
50
+ bytes: BlobBytes;
51
+ }): { ok: true } | { ok: false; expected: string; actual: string } {
52
+ if (!entry.blobSha256) return { ok: true };
53
+ const actual = blobSha256(bytes);
54
+ if (actual === entry.blobSha256) return { ok: true };
55
+ return { ok: false, expected: entry.blobSha256, actual };
56
+ }
@@ -0,0 +1,78 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import type { BlobStore } from "./blob-store";
3
+ import { createBlobStoreRegistry } from "./blob-store-registry";
4
+
5
+ function memStore(id: string, seed: Record<string, string> = {}): BlobStore {
6
+ const map = new Map<string, Uint8Array>(
7
+ Object.entries(seed).map(([k, v]) => [k, new TextEncoder().encode(v)]),
8
+ );
9
+ return {
10
+ id,
11
+ async put({ integrity, bytes }) {
12
+ map.set(integrity, bytes);
13
+ },
14
+ async get({ integrity }) {
15
+ return map.get(integrity);
16
+ },
17
+ async has({ integrity }) {
18
+ return map.has(integrity);
19
+ },
20
+ async delete({ integrity }) {
21
+ map.delete(integrity);
22
+ },
23
+ async list() {
24
+ return [...map.keys()];
25
+ },
26
+ };
27
+ }
28
+
29
+ describe("BlobStoreRegistry", () => {
30
+ test("registers and resolves stores by id", () => {
31
+ const reg = createBlobStoreRegistry();
32
+ reg.register(memStore("postgres"));
33
+ reg.register(memStore("s3"));
34
+ expect(reg.ids().sort()).toEqual(["postgres", "s3"]);
35
+ expect(reg.has("s3")).toBe(true);
36
+ expect(reg.get("postgres").id).toBe("postgres");
37
+ });
38
+
39
+ test("throws a helpful error for an unregistered backend", () => {
40
+ const reg = createBlobStoreRegistry();
41
+ reg.register(memStore("postgres"));
42
+ expect(() => reg.get("s3")).toThrow(/not registered.*postgres/i);
43
+ });
44
+
45
+ test("reads from the active backend when present", async () => {
46
+ const reg = createBlobStoreRegistry();
47
+ reg.register(memStore("postgres", { "sha-1": "from-pg" }));
48
+ reg.register(memStore("s3", { "sha-1": "from-s3" }));
49
+ const res = await reg.readWithFallback({
50
+ integrity: "sha-1",
51
+ activeBackendId: "s3",
52
+ });
53
+ expect(res?.servedBy).toBe("s3");
54
+ expect(new TextDecoder().decode(res?.bytes)).toBe("from-s3");
55
+ });
56
+
57
+ test("falls back to another backend when the active one lacks the blob", async () => {
58
+ const reg = createBlobStoreRegistry();
59
+ reg.register(memStore("postgres", { "sha-2": "only-in-pg" }));
60
+ reg.register(memStore("s3"));
61
+ const res = await reg.readWithFallback({
62
+ integrity: "sha-2",
63
+ activeBackendId: "s3",
64
+ });
65
+ expect(res?.servedBy).toBe("postgres");
66
+ expect(new TextDecoder().decode(res?.bytes)).toBe("only-in-pg");
67
+ });
68
+
69
+ test("returns undefined when no backend has the blob", async () => {
70
+ const reg = createBlobStoreRegistry();
71
+ reg.register(memStore("postgres"));
72
+ const res = await reg.readWithFallback({
73
+ integrity: "missing",
74
+ activeBackendId: "postgres",
75
+ });
76
+ expect(res).toBeUndefined();
77
+ });
78
+ });
@@ -0,0 +1,75 @@
1
+ import type { BlobStore } from "./blob-store";
2
+
3
+ /**
4
+ * Collects every registered {@link BlobStore} (one per store plugin) and
5
+ * resolves the active one by id. Also offers a read-with-fallback used
6
+ * during a storage migration: when the active backend doesn't yet hold a
7
+ * blob, fall back to the other registered backends so script execution
8
+ * never breaks mid-migration.
9
+ */
10
+ export interface BlobStoreRegistry {
11
+ register(store: BlobStore): void;
12
+ /** All registered store ids (for the admin backend selector). */
13
+ ids(): string[];
14
+ has(id: string): boolean;
15
+ /** Resolve a specific store by id. Throws if not registered. */
16
+ get(id: string): BlobStore;
17
+ /**
18
+ * Read a blob from `activeBackendId`, falling back across the other
19
+ * registered backends if the active one lacks it. Returns the bytes +
20
+ * the id of the backend that served them, or undefined if none have it.
21
+ */
22
+ readWithFallback(input: {
23
+ integrity: string;
24
+ activeBackendId: string;
25
+ }): Promise<{ bytes: Uint8Array; servedBy: string } | undefined>;
26
+ }
27
+
28
+ export function createBlobStoreRegistry(): BlobStoreRegistry {
29
+ const stores = new Map<string, BlobStore>();
30
+
31
+ return {
32
+ register(store) {
33
+ stores.set(store.id, store);
34
+ },
35
+
36
+ ids() {
37
+ return [...stores.keys()];
38
+ },
39
+
40
+ has(id) {
41
+ return stores.has(id);
42
+ },
43
+
44
+ get(id) {
45
+ const store = stores.get(id);
46
+ if (!store) {
47
+ throw new Error(
48
+ `Blob store backend "${id}" is not registered. Available: ${
49
+ [...stores.keys()].join(", ") || "(none)"
50
+ }`,
51
+ );
52
+ }
53
+ return store;
54
+ },
55
+
56
+ async readWithFallback({ integrity, activeBackendId }) {
57
+ const active = stores.get(activeBackendId);
58
+ if (active) {
59
+ const bytes = await active.get({ integrity });
60
+ if (bytes !== undefined) {
61
+ return { bytes, servedBy: activeBackendId };
62
+ }
63
+ }
64
+ // Fallback across the rest (migration in flight, partial state).
65
+ for (const [id, store] of stores) {
66
+ if (id === activeBackendId) continue;
67
+ const bytes = await store.get({ integrity });
68
+ if (bytes !== undefined) {
69
+ return { bytes, servedBy: id };
70
+ }
71
+ }
72
+ return;
73
+ },
74
+ };
75
+ }
@@ -0,0 +1,51 @@
1
+ import { createExtensionPoint } from "@checkstack/backend-api";
2
+ import type { PluginMetadata } from "@checkstack/common";
3
+
4
+ /**
5
+ * Pluggable, content-addressed blob persistence for script-package
6
+ * artifacts. Blobs are keyed by their integrity hash (the npm tarball /
7
+ * Bun-cache entry per `name@version`), so the integrity is the stable
8
+ * identity across backends - only the locator differs.
9
+ *
10
+ * Two built-ins ship as plugins: `script-packages-store-postgres` (the
11
+ * default, Postgres large-objects, zero extra infra) and
12
+ * `script-packages-store-s3` (preferred when configured). Adding a third
13
+ * later is just another plugin implementing this interface; no schema
14
+ * change.
15
+ *
16
+ * Implementations MUST be content-addressed and idempotent: `put` of an
17
+ * already-present integrity is a no-op (or overwrite with identical bytes).
18
+ */
19
+ export interface BlobStore {
20
+ /** Stable backend id recorded in `script_package_blob.backend`. */
21
+ readonly id: string;
22
+
23
+ /** Store the (already-compressed) bytes for `integrity`. Idempotent. */
24
+ put(input: { integrity: string; bytes: Uint8Array }): Promise<void>;
25
+
26
+ /** Fetch the bytes for `integrity`, or undefined if this backend lacks it. */
27
+ get(input: { integrity: string }): Promise<Uint8Array | undefined>;
28
+
29
+ /** Whether this backend holds `integrity`. */
30
+ has(input: { integrity: string }): Promise<boolean>;
31
+
32
+ /** Delete the blob for `integrity` (GC / post-migration cleanup). Idempotent. */
33
+ delete(input: { integrity: string }): Promise<void>;
34
+
35
+ /** Every integrity this backend currently holds (for migration / GC). */
36
+ list(): Promise<string[]>;
37
+ }
38
+
39
+ /**
40
+ * Extension point a blob-store plugin registers its implementation with.
41
+ * The active backend is selected via `script_package_storage_config`; the
42
+ * backend resolves the registered store by id.
43
+ */
44
+ export interface BlobStoreExtensionPoint {
45
+ registerBlobStore(store: BlobStore, metadata: PluginMetadata): void;
46
+ }
47
+
48
+ export const blobStoreExtensionPoint =
49
+ createExtensionPoint<BlobStoreExtensionPoint>(
50
+ "script-packages.blobStoreExtensionPoint",
51
+ );
@@ -0,0 +1,164 @@
1
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
+ import { spawn } from "bun";
3
+ import {
4
+ mkdtemp,
5
+ mkdir,
6
+ writeFile,
7
+ readFile,
8
+ rm,
9
+ readdir,
10
+ symlink,
11
+ } from "node:fs/promises";
12
+ import { tmpdir } from "node:os";
13
+ import path from "node:path";
14
+ import { packDir, unpackInto } from "./cache-archive";
15
+
16
+ /** Build a gzip tar whose single entry is `entryName` (allowing `..`/abs). */
17
+ async function makeArchiveWithEntry(
18
+ cwd: string,
19
+ entryName: string,
20
+ ): Promise<Uint8Array> {
21
+ // `-P`/`--absolute-names` lets us store traversing or absolute names that
22
+ // tar would otherwise strip — exactly the malicious shape we defend against.
23
+ const proc = spawn({
24
+ cmd: ["tar", "-czf", "-", "-P", entryName],
25
+ cwd,
26
+ stdout: "pipe",
27
+ stderr: "pipe",
28
+ });
29
+ const [bytes, exitCode] = await Promise.all([
30
+ new Response(proc.stdout).bytes(),
31
+ proc.exited,
32
+ ]);
33
+ if (exitCode !== 0) throw new Error("failed to build malicious archive");
34
+ return bytes;
35
+ }
36
+
37
+ describe("cache-archive pack/unpack", () => {
38
+ let work: string;
39
+
40
+ beforeEach(async () => {
41
+ work = await mkdtemp(path.join(tmpdir(), "cs-archive-"));
42
+ });
43
+ afterEach(async () => {
44
+ await rm(work, { recursive: true, force: true });
45
+ });
46
+
47
+ test("round-trips a directory tree through tar+gzip", async () => {
48
+ const src = path.join(work, "src");
49
+ const entry = "pkg@1.0.0";
50
+ const entryDir = path.join(src, entry);
51
+ await mkdir(path.join(entryDir, "sub"), { recursive: true });
52
+ await writeFile(path.join(entryDir, "index.js"), "module.exports = 1;\n");
53
+ await writeFile(path.join(entryDir, "sub", "x.txt"), "deep\n");
54
+
55
+ const blob = await packDir({ parentDir: src, entryName: entry });
56
+ expect(blob.byteLength).toBeGreaterThan(0);
57
+
58
+ const dest = path.join(work, "dest");
59
+ await mkdir(dest, { recursive: true });
60
+ await unpackInto({ targetDir: dest, bytes: blob });
61
+
62
+ expect(await readFile(path.join(dest, entry, "index.js"), "utf8")).toBe(
63
+ "module.exports = 1;\n",
64
+ );
65
+ expect(await readFile(path.join(dest, entry, "sub", "x.txt"), "utf8")).toBe(
66
+ "deep\n",
67
+ );
68
+ });
69
+
70
+ test("throws on a corrupt archive", async () => {
71
+ const dest = path.join(work, "dest");
72
+ await mkdir(dest, { recursive: true });
73
+ await expect(
74
+ unpackInto({ targetDir: dest, bytes: new Uint8Array([1, 2, 3, 4]) }),
75
+ ).rejects.toThrow(/tar extract failed/i);
76
+ });
77
+
78
+ test("refuses an archive entry that traverses out with ..", async () => {
79
+ // Build an archive whose entry NAME is `../escape`. tar resolves the
80
+ // name relative to its cwd, so the file lives one level up from cwd.
81
+ const base = path.join(work, "base");
82
+ const cwd = path.join(base, "inner");
83
+ await mkdir(cwd, { recursive: true });
84
+ await writeFile(path.join(base, "escape"), "pwned\n");
85
+ const blob = await makeArchiveWithEntry(cwd, "../escape");
86
+
87
+ const dest = path.join(work, "dest");
88
+ await mkdir(dest, { recursive: true });
89
+
90
+ await expect(
91
+ unpackInto({ targetDir: dest, bytes: blob }),
92
+ ).rejects.toThrow(/unsafe archive entry/i);
93
+
94
+ // Nothing was written outside dest (the sibling "escape" must not appear).
95
+ const siblings = await readdir(work);
96
+ expect(siblings).not.toContain("escape");
97
+ });
98
+
99
+ test("refuses a symlink entry with a safe name but an escaping target", async () => {
100
+ // A symlink entry whose NAME is harmless (`evil`, no `..`/abs) but whose
101
+ // TARGET escapes (`-> /etc`). The old name-only listing pass let it
102
+ // through, then a later regular-file entry could write THROUGH the link
103
+ // and escape targetDir. unpackInto must reject any symlink entry outright.
104
+ const src = path.join(work, "linksrc");
105
+ await mkdir(src, { recursive: true });
106
+ await symlink("/etc", path.join(src, "evil"));
107
+ const blob = await packDir({ parentDir: work, entryName: "linksrc" });
108
+
109
+ const dest = path.join(work, "dest");
110
+ await mkdir(dest, { recursive: true });
111
+ await expect(
112
+ unpackInto({ targetDir: dest, bytes: blob }),
113
+ ).rejects.toThrow(/symlink|link/i);
114
+
115
+ // Nothing materialized: the link must not exist under dest.
116
+ expect(
117
+ await readdir(dest).catch(() => []),
118
+ ).not.toContain("linksrc");
119
+ });
120
+
121
+ test("refuses a relative symlink target that traverses with ..", async () => {
122
+ const src = path.join(work, "linksrc2");
123
+ await mkdir(src, { recursive: true });
124
+ await symlink("../../../escape", path.join(src, "ln"));
125
+ const blob = await packDir({ parentDir: work, entryName: "linksrc2" });
126
+
127
+ const dest = path.join(work, "dest2");
128
+ await mkdir(dest, { recursive: true });
129
+ await expect(
130
+ unpackInto({ targetDir: dest, bytes: blob }),
131
+ ).rejects.toThrow(/symlink|link/i);
132
+ });
133
+
134
+ test("still round-trips a plain (link-free) directory after the link guard", async () => {
135
+ const src = path.join(work, "plainsrc");
136
+ await mkdir(path.join(src, "pkg@1.0.0"), { recursive: true });
137
+ await writeFile(
138
+ path.join(src, "pkg@1.0.0", "index.js"),
139
+ "module.exports = 2;\n",
140
+ );
141
+ const blob = await packDir({ parentDir: src, entryName: "pkg@1.0.0" });
142
+ const dest = path.join(work, "plaindest");
143
+ await mkdir(dest, { recursive: true });
144
+ await unpackInto({ targetDir: dest, bytes: blob });
145
+ expect(
146
+ await readFile(path.join(dest, "pkg@1.0.0", "index.js"), "utf8"),
147
+ ).toBe("module.exports = 2;\n");
148
+ });
149
+
150
+ test("refuses an archive entry with an absolute path", async () => {
151
+ const payloadDir = path.join(work, "payload2");
152
+ await mkdir(payloadDir, { recursive: true });
153
+ await writeFile(path.join(payloadDir, "abs.txt"), "x\n");
154
+ // Absolute entry name (e.g. /tmp/.../abs.txt).
155
+ const absName = path.join(payloadDir, "abs.txt");
156
+ const blob = await makeArchiveWithEntry("/", absName);
157
+
158
+ const dest = path.join(work, "dest");
159
+ await mkdir(dest, { recursive: true });
160
+ await expect(
161
+ unpackInto({ targetDir: dest, bytes: blob }),
162
+ ).rejects.toThrow(/unsafe archive entry/i);
163
+ });
164
+ });
@@ -0,0 +1,192 @@
1
+ import { spawn } from "bun";
2
+
3
+ /**
4
+ * Archive helpers for the content-addressed distribution unit.
5
+ *
6
+ * The distributable blob for each `name@version` is a gzip-compressed tar
7
+ * of that package's Bun cache entry directory. On reconcile a host extracts
8
+ * every needed blob back into its `BUN_INSTALL_CACHE_DIR`, then runs
9
+ * `bun install --offline` which reconstructs `node_modules` with zero
10
+ * network (empirically verified). Bun does the hoisting, so we never have
11
+ * to reconstruct the flat tree ourselves - this keeps the model
12
+ * Bun-version-tolerant while preserving per-package delta sync.
13
+ *
14
+ * We shell to POSIX `tar` (universal on Linux/macOS containers) and use
15
+ * gzip (via tar's `-z`) rather than zstd so there's no external `zstd`
16
+ * binary dependency. The plan named zstd; gzip is the portable substitute.
17
+ */
18
+
19
+ async function runTar(args: string[], cwd?: string): Promise<Uint8Array> {
20
+ const proc = spawn({
21
+ cmd: ["tar", ...args],
22
+ cwd,
23
+ stdout: "pipe",
24
+ stderr: "pipe",
25
+ });
26
+ const [stdout, stderr, exitCode] = await Promise.all([
27
+ new Response(proc.stdout).bytes(),
28
+ new Response(proc.stderr).text(),
29
+ proc.exited,
30
+ ]);
31
+ if (exitCode !== 0) {
32
+ throw new Error(`tar failed (exit ${exitCode}): ${stderr.slice(0, 500)}`);
33
+ }
34
+ return stdout;
35
+ }
36
+
37
+ /**
38
+ * Pack a single directory entry (`entryName`) located under `parentDir`
39
+ * into a gzip-compressed tar streamed to stdout. The archive stores the
40
+ * entry by its relative name so it extracts back to the same layout.
41
+ */
42
+ export async function packDir({
43
+ parentDir,
44
+ entryName,
45
+ }: {
46
+ parentDir: string;
47
+ entryName: string;
48
+ }): Promise<Uint8Array> {
49
+ return runTar(["-czf", "-", entryName], parentDir);
50
+ }
51
+
52
+ /**
53
+ * Reject an archive entry path that would escape the extraction directory
54
+ * (zip-slip): an absolute path or any `..` path component. Returns the
55
+ * offending reason, or `undefined` when the path is safe + confined.
56
+ */
57
+ function unsafeArchivePath(entryPath: string): string | undefined {
58
+ const trimmed = entryPath.trim();
59
+ if (trimmed === "") return undefined; // tar can emit a trailing blank line
60
+ // Absolute (POSIX or Windows-style) — would extract outside targetDir.
61
+ if (trimmed.startsWith("/") || /^[a-zA-Z]:[\\/]/.test(trimmed)) {
62
+ return `absolute path "${trimmed}"`;
63
+ }
64
+ // Any `..` segment (handle both / and \ separators) — path traversal.
65
+ const segments = trimmed.split(/[\\/]/);
66
+ if (segments.includes("..")) {
67
+ return `parent-directory traversal in "${trimmed}"`;
68
+ }
69
+ return undefined;
70
+ }
71
+
72
+ /**
73
+ * Reject any archive entry that is NOT a plain file or directory.
74
+ *
75
+ * Path-only validation ({@link unsafeArchivePath}) is blind to LINK TARGETS:
76
+ * a symlink with a harmless NAME (e.g. `evil`, no `..`/abs) but an escaping
77
+ * TARGET (`-> /etc`, `-> ../../..`) passes the name check, then a later
78
+ * regular-file entry can be written THROUGH the link and escape `targetDir`.
79
+ * Reconstructed Bun-cache trees are plain files + dirs, so we reject every
80
+ * symlink/hardlink/device/fifo entry outright (defence in depth, independent
81
+ * of any tar extraction flag).
82
+ *
83
+ * `line` is one row of `tar -tzvf` verbose output; its first character is the
84
+ * POSIX type flag (`-` file, `d` dir, `l` symlink, `h` hardlink, etc.) on
85
+ * both GNU and BSD/libarchive tar. Returns the offending reason, or
86
+ * `undefined` for a safe (file/dir) entry or a blank line.
87
+ */
88
+ function unsafeArchiveEntryType(line: string): string | undefined {
89
+ const trimmed = line.trimEnd();
90
+ if (trimmed.trim() === "") return undefined; // trailing blank line
91
+ const typeFlag = trimmed[0];
92
+ if (typeFlag === "-" || typeFlag === "d") return undefined; // file or dir
93
+ if (typeFlag === "l" || typeFlag === "h") {
94
+ return `link entry (type "${typeFlag}") in "${trimmed}"`;
95
+ }
96
+ // Anything else (block/char device "b"/"c", fifo "p", socket "s", …) is
97
+ // never part of a package cache tree → reject.
98
+ return `non-regular entry (type "${typeFlag}") in "${trimmed}"`;
99
+ }
100
+
101
+ /**
102
+ * Extract a gzip-compressed tar blob into `targetDir`.
103
+ *
104
+ * Hardened against zip-slip on two axes, both checked BEFORE any bytes are
105
+ * written (a violation aborts the whole extract, materializing nothing):
106
+ *
107
+ * 1. Entry PATHS must be relative and free of `..` components — a
108
+ * traversing / absolute name would write outside `targetDir`.
109
+ * 2. Entry TYPES must be plain file or directory — a symlink/hardlink with
110
+ * a harmless name but an escaping target would let a later file write
111
+ * THROUGH it and escape `targetDir`. Reconstructed Bun-cache trees never
112
+ * contain links, so any link/device/fifo entry is rejected.
113
+ *
114
+ * We validate explicitly (rather than relying on a tar flag) so the behaviour
115
+ * is identical across GNU and BSD/libarchive tar. The verbose listing
116
+ * (`-tzvf`) carries both the type flag (column 0) and the path, so a single
117
+ * listing pass covers both checks.
118
+ */
119
+ export async function unpackInto({
120
+ targetDir,
121
+ bytes,
122
+ }: {
123
+ targetDir: string;
124
+ bytes: Uint8Array;
125
+ }): Promise<void> {
126
+ // 1. List entries (verbose: type flag + path) and validate BEFORE extract.
127
+ // `-tzvf` rows look like `lrwxr-xr-x 0 user grp 0 <date> name -> target`;
128
+ // the path/name portion is what `-tzf` would print, so we run the plain
129
+ // listing for the path check and the verbose listing for the type check.
130
+ const [pathListing, typeListing] = await Promise.all([
131
+ runTarCapture(["-tzf", "-"], bytes),
132
+ runTarCapture(["-tzvf", "-"], bytes),
133
+ ]);
134
+ for (const line of pathListing.split("\n")) {
135
+ const reason = unsafeArchivePath(line);
136
+ if (reason) {
137
+ throw new Error(`refusing to extract unsafe archive entry: ${reason}`);
138
+ }
139
+ }
140
+ for (const line of typeListing.split("\n")) {
141
+ const reason = unsafeArchiveEntryType(line);
142
+ if (reason) {
143
+ throw new Error(`refusing to extract unsafe archive entry: ${reason}`);
144
+ }
145
+ }
146
+
147
+ // 2. Extract. `--no-same-owner` avoids surprising ownership; paths + types
148
+ // are already proven relative + confined + link-free above.
149
+ const proc = spawn({
150
+ cmd: ["tar", "-xzf", "-", "-C", targetDir],
151
+ stdin: bytes,
152
+ stdout: "pipe",
153
+ stderr: "pipe",
154
+ });
155
+ const [stderr, exitCode] = await Promise.all([
156
+ new Response(proc.stderr).text(),
157
+ proc.exited,
158
+ ]);
159
+ if (exitCode !== 0) {
160
+ throw new Error(
161
+ `tar extract failed (exit ${exitCode}): ${stderr.slice(0, 500)}`,
162
+ );
163
+ }
164
+ }
165
+
166
+ /**
167
+ * Run `tar` with the blob piped to stdin and return stdout as text. Shared
168
+ * by the listing pass in {@link unpackInto}; throws on a non-zero exit (e.g.
169
+ * a corrupt archive) so callers surface a clear error.
170
+ */
171
+ async function runTarCapture(
172
+ args: string[],
173
+ stdin: Uint8Array,
174
+ ): Promise<string> {
175
+ const proc = spawn({
176
+ cmd: ["tar", ...args],
177
+ stdin,
178
+ stdout: "pipe",
179
+ stderr: "pipe",
180
+ });
181
+ const [stdout, stderr, exitCode] = await Promise.all([
182
+ new Response(proc.stdout).text(),
183
+ new Response(proc.stderr).text(),
184
+ proc.exited,
185
+ ]);
186
+ if (exitCode !== 0) {
187
+ throw new Error(
188
+ `tar extract failed (exit ${exitCode}): ${stderr.slice(0, 500)}`,
189
+ );
190
+ }
191
+ return stdout;
192
+ }
@@ -0,0 +1,64 @@
1
+ import { readdir } from "node:fs/promises";
2
+
3
+ /**
4
+ * Map manifest entries to their on-disk Bun cache entry directory names.
5
+ *
6
+ * Bun extracts each package into `<cacheDir>/<name>@<version>@@@<n>` (the
7
+ * `@@@<n>` suffix is an internal dedupe counter). We discover the actual
8
+ * entry dir by listing the cache and matching the `<name>@<version>`
9
+ * prefix, rather than hardcoding the suffix - keeping us tolerant of Bun's
10
+ * internal counter. Scoped packages (`@scope/name`) live under a `@scope/`
11
+ * subdir in the cache.
12
+ */
13
+
14
+ export interface CacheEntryLocation {
15
+ /** Directory the entry sits *under* (its parent), for tar's cwd. */
16
+ parentDir: string;
17
+ /** The entry dir name relative to `parentDir`. */
18
+ entryName: string;
19
+ }
20
+
21
+ function specPrefix(name: string, version: string): string {
22
+ return `${name}@${version}@@@`;
23
+ }
24
+
25
+ /**
26
+ * Find the cache entry dir for `name@version`. Returns undefined if the
27
+ * cache doesn't contain it (caller treats as a resolve error).
28
+ */
29
+ export async function findCacheEntry({
30
+ cacheDir,
31
+ name,
32
+ version,
33
+ }: {
34
+ cacheDir: string;
35
+ name: string;
36
+ version: string;
37
+ }): Promise<CacheEntryLocation | undefined> {
38
+ if (name.startsWith("@")) {
39
+ // Scoped: `<cacheDir>/@scope/name@version@@@n`
40
+ const [scope, bare] = name.split("/");
41
+ const scopeDir = `${cacheDir}/${scope}`;
42
+ const prefix = `${bare}@${version}@@@`;
43
+ const match = await firstMatch(scopeDir, prefix);
44
+ if (!match) return;
45
+ return { parentDir: scopeDir, entryName: match };
46
+ }
47
+ const prefix = specPrefix(name, version);
48
+ const match = await firstMatch(cacheDir, prefix);
49
+ if (!match) return;
50
+ return { parentDir: cacheDir, entryName: match };
51
+ }
52
+
53
+ async function firstMatch(
54
+ dir: string,
55
+ prefix: string,
56
+ ): Promise<string | undefined> {
57
+ let names: string[];
58
+ try {
59
+ names = await readdir(dir);
60
+ } catch {
61
+ return;
62
+ }
63
+ return names.find((n) => n.startsWith(prefix));
64
+ }