@checkstack/script-packages-backend 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +273 -0
- package/drizzle/0000_flashy_squadron_supreme.sql +63 -0
- package/drizzle/0001_flawless_drax.sql +15 -0
- package/drizzle/meta/0000_snapshot.json +395 -0
- package/drizzle/meta/0001_snapshot.json +491 -0
- package/drizzle/meta/_journal.json +20 -0
- package/drizzle.config.ts +7 -0
- package/package.json +32 -0
- package/src/atomic-symlink.test.ts +47 -0
- package/src/atomic-symlink.ts +66 -0
- package/src/blob-gc-runner.test.ts +120 -0
- package/src/blob-gc-runner.ts +139 -0
- package/src/blob-gc.test.ts +182 -0
- package/src/blob-gc.ts +161 -0
- package/src/blob-hash.test.ts +70 -0
- package/src/blob-hash.ts +56 -0
- package/src/blob-store-registry.test.ts +78 -0
- package/src/blob-store-registry.ts +75 -0
- package/src/blob-store.ts +51 -0
- package/src/cache-archive.test.ts +164 -0
- package/src/cache-archive.ts +192 -0
- package/src/cache-layout.ts +64 -0
- package/src/data-dir.test.ts +41 -0
- package/src/data-dir.ts +42 -0
- package/src/e2e-install-reconcile.test.ts +121 -0
- package/src/hooks.ts +20 -0
- package/src/index.ts +594 -0
- package/src/install-controller.test.ts +257 -0
- package/src/install-controller.ts +144 -0
- package/src/install-service.test.ts +104 -0
- package/src/install-service.ts +116 -0
- package/src/install-state-store.ts +131 -0
- package/src/lockfile.test.ts +60 -0
- package/src/lockfile.ts +0 -0
- package/src/npmrc.test.ts +48 -0
- package/src/npmrc.ts +42 -0
- package/src/package-types.test.ts +293 -0
- package/src/package-types.ts +408 -0
- package/src/parse-bun-lock.test.ts +62 -0
- package/src/parse-bun-lock.ts +59 -0
- package/src/reconcile-diff.test.ts +41 -0
- package/src/reconcile-diff.ts +26 -0
- package/src/reconcile-fs.ts +199 -0
- package/src/reconciler.test.ts +289 -0
- package/src/reconciler.ts +81 -0
- package/src/registry-client.test.ts +314 -0
- package/src/registry-client.ts +0 -0
- package/src/registry-request-config.ts +63 -0
- package/src/registry-token.test.ts +124 -0
- package/src/registry-token.ts +104 -0
- package/src/resolution-root.test.ts +82 -0
- package/src/resolution-root.ts +127 -0
- package/src/resolver.test.ts +133 -0
- package/src/resolver.ts +132 -0
- package/src/router.ts +273 -0
- package/src/schema.ts +166 -0
- package/src/size-cap.test.ts +32 -0
- package/src/size-cap.ts +40 -0
- package/src/storage-migration.test.ts +318 -0
- package/src/storage-migration.ts +213 -0
- package/src/stores.ts +533 -0
- package/src/tree-gc.test.ts +184 -0
- package/src/tree-gc.ts +160 -0
- package/src/tree-retirement.ts +81 -0
- package/src/type-acquisition-route.ts +178 -0
- package/tsconfig.json +23 -0
package/src/blob-hash.ts
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import type { ManifestEntry } from "@checkstack/script-packages-common";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Content hashing + verification for distributed blobs.
|
|
6
|
+
*
|
|
7
|
+
* A package's distributable blob is our gzip-tar of its Bun cache entry, NOT
|
|
8
|
+
* the upstream npm tarball — so the SRI `integrity` key (which hashes the
|
|
9
|
+
* npm tarball) does NOT cover the transported bytes. To detect corruption or
|
|
10
|
+
* tampering in transit (shared blob store on core, HTTP/WS on satellites) we
|
|
11
|
+
* additionally carry `blobSha256` (sha-256 of the blob) on each manifest
|
|
12
|
+
* entry and verify it before extracting.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/** Bytes as they arrive at the hash boundary from any blob source. */
|
|
16
|
+
export type BlobBytes = Uint8Array | ArrayBuffer;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Normalize blob bytes to a `Uint8Array` at the consume boundary.
|
|
20
|
+
*
|
|
21
|
+
* Blob sources differ in what view they hand back: the central resolver and
|
|
22
|
+
* the Postgres codec yield a `Uint8Array`, but `Bun.S3File.arrayBuffer()` (and
|
|
23
|
+
* any `Response.arrayBuffer()` transport) yields a raw `ArrayBuffer`. Node's
|
|
24
|
+
* `crypto.Hash.update()` rejects a bare `ArrayBuffer`
|
|
25
|
+
* ("Received an instance of ArrayBuffer"), so we wrap it in a `Uint8Array`
|
|
26
|
+
* view here. A `Uint8Array` view over the same bytes hashes identically to the
|
|
27
|
+
* underlying buffer, so this never changes a computed content hash.
|
|
28
|
+
*/
|
|
29
|
+
export function toUint8Array(bytes: BlobBytes): Uint8Array {
|
|
30
|
+
return bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** SHA-256 (hex) of a blob's bytes. */
|
|
34
|
+
export function blobSha256(bytes: BlobBytes): string {
|
|
35
|
+
return createHash("sha256").update(toUint8Array(bytes)).digest("hex");
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Verify `bytes` against a manifest entry's `blobSha256`. Returns `ok: true`
|
|
40
|
+
* when the hash matches OR the entry predates the field (backward-safe: no
|
|
41
|
+
* recorded hash means nothing to verify against). Returns `ok: false` with
|
|
42
|
+
* the expected/actual hashes on a mismatch so callers can error clearly and
|
|
43
|
+
* refuse to materialize the blob.
|
|
44
|
+
*/
|
|
45
|
+
export function verifyBlobSha256({
|
|
46
|
+
entry,
|
|
47
|
+
bytes,
|
|
48
|
+
}: {
|
|
49
|
+
entry: Pick<ManifestEntry, "blobSha256">;
|
|
50
|
+
bytes: BlobBytes;
|
|
51
|
+
}): { ok: true } | { ok: false; expected: string; actual: string } {
|
|
52
|
+
if (!entry.blobSha256) return { ok: true };
|
|
53
|
+
const actual = blobSha256(bytes);
|
|
54
|
+
if (actual === entry.blobSha256) return { ok: true };
|
|
55
|
+
return { ok: false, expected: entry.blobSha256, actual };
|
|
56
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import type { BlobStore } from "./blob-store";
|
|
3
|
+
import { createBlobStoreRegistry } from "./blob-store-registry";
|
|
4
|
+
|
|
5
|
+
function memStore(id: string, seed: Record<string, string> = {}): BlobStore {
|
|
6
|
+
const map = new Map<string, Uint8Array>(
|
|
7
|
+
Object.entries(seed).map(([k, v]) => [k, new TextEncoder().encode(v)]),
|
|
8
|
+
);
|
|
9
|
+
return {
|
|
10
|
+
id,
|
|
11
|
+
async put({ integrity, bytes }) {
|
|
12
|
+
map.set(integrity, bytes);
|
|
13
|
+
},
|
|
14
|
+
async get({ integrity }) {
|
|
15
|
+
return map.get(integrity);
|
|
16
|
+
},
|
|
17
|
+
async has({ integrity }) {
|
|
18
|
+
return map.has(integrity);
|
|
19
|
+
},
|
|
20
|
+
async delete({ integrity }) {
|
|
21
|
+
map.delete(integrity);
|
|
22
|
+
},
|
|
23
|
+
async list() {
|
|
24
|
+
return [...map.keys()];
|
|
25
|
+
},
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
describe("BlobStoreRegistry", () => {
|
|
30
|
+
test("registers and resolves stores by id", () => {
|
|
31
|
+
const reg = createBlobStoreRegistry();
|
|
32
|
+
reg.register(memStore("postgres"));
|
|
33
|
+
reg.register(memStore("s3"));
|
|
34
|
+
expect(reg.ids().sort()).toEqual(["postgres", "s3"]);
|
|
35
|
+
expect(reg.has("s3")).toBe(true);
|
|
36
|
+
expect(reg.get("postgres").id).toBe("postgres");
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
test("throws a helpful error for an unregistered backend", () => {
|
|
40
|
+
const reg = createBlobStoreRegistry();
|
|
41
|
+
reg.register(memStore("postgres"));
|
|
42
|
+
expect(() => reg.get("s3")).toThrow(/not registered.*postgres/i);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test("reads from the active backend when present", async () => {
|
|
46
|
+
const reg = createBlobStoreRegistry();
|
|
47
|
+
reg.register(memStore("postgres", { "sha-1": "from-pg" }));
|
|
48
|
+
reg.register(memStore("s3", { "sha-1": "from-s3" }));
|
|
49
|
+
const res = await reg.readWithFallback({
|
|
50
|
+
integrity: "sha-1",
|
|
51
|
+
activeBackendId: "s3",
|
|
52
|
+
});
|
|
53
|
+
expect(res?.servedBy).toBe("s3");
|
|
54
|
+
expect(new TextDecoder().decode(res?.bytes)).toBe("from-s3");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test("falls back to another backend when the active one lacks the blob", async () => {
|
|
58
|
+
const reg = createBlobStoreRegistry();
|
|
59
|
+
reg.register(memStore("postgres", { "sha-2": "only-in-pg" }));
|
|
60
|
+
reg.register(memStore("s3"));
|
|
61
|
+
const res = await reg.readWithFallback({
|
|
62
|
+
integrity: "sha-2",
|
|
63
|
+
activeBackendId: "s3",
|
|
64
|
+
});
|
|
65
|
+
expect(res?.servedBy).toBe("postgres");
|
|
66
|
+
expect(new TextDecoder().decode(res?.bytes)).toBe("only-in-pg");
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
test("returns undefined when no backend has the blob", async () => {
|
|
70
|
+
const reg = createBlobStoreRegistry();
|
|
71
|
+
reg.register(memStore("postgres"));
|
|
72
|
+
const res = await reg.readWithFallback({
|
|
73
|
+
integrity: "missing",
|
|
74
|
+
activeBackendId: "postgres",
|
|
75
|
+
});
|
|
76
|
+
expect(res).toBeUndefined();
|
|
77
|
+
});
|
|
78
|
+
});
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import type { BlobStore } from "./blob-store";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Collects every registered {@link BlobStore} (one per store plugin) and
|
|
5
|
+
* resolves the active one by id. Also offers a read-with-fallback used
|
|
6
|
+
* during a storage migration: when the active backend doesn't yet hold a
|
|
7
|
+
* blob, fall back to the other registered backends so script execution
|
|
8
|
+
* never breaks mid-migration.
|
|
9
|
+
*/
|
|
10
|
+
export interface BlobStoreRegistry {
|
|
11
|
+
register(store: BlobStore): void;
|
|
12
|
+
/** All registered store ids (for the admin backend selector). */
|
|
13
|
+
ids(): string[];
|
|
14
|
+
has(id: string): boolean;
|
|
15
|
+
/** Resolve a specific store by id. Throws if not registered. */
|
|
16
|
+
get(id: string): BlobStore;
|
|
17
|
+
/**
|
|
18
|
+
* Read a blob from `activeBackendId`, falling back across the other
|
|
19
|
+
* registered backends if the active one lacks it. Returns the bytes +
|
|
20
|
+
* the id of the backend that served them, or undefined if none have it.
|
|
21
|
+
*/
|
|
22
|
+
readWithFallback(input: {
|
|
23
|
+
integrity: string;
|
|
24
|
+
activeBackendId: string;
|
|
25
|
+
}): Promise<{ bytes: Uint8Array; servedBy: string } | undefined>;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function createBlobStoreRegistry(): BlobStoreRegistry {
|
|
29
|
+
const stores = new Map<string, BlobStore>();
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
register(store) {
|
|
33
|
+
stores.set(store.id, store);
|
|
34
|
+
},
|
|
35
|
+
|
|
36
|
+
ids() {
|
|
37
|
+
return [...stores.keys()];
|
|
38
|
+
},
|
|
39
|
+
|
|
40
|
+
has(id) {
|
|
41
|
+
return stores.has(id);
|
|
42
|
+
},
|
|
43
|
+
|
|
44
|
+
get(id) {
|
|
45
|
+
const store = stores.get(id);
|
|
46
|
+
if (!store) {
|
|
47
|
+
throw new Error(
|
|
48
|
+
`Blob store backend "${id}" is not registered. Available: ${
|
|
49
|
+
[...stores.keys()].join(", ") || "(none)"
|
|
50
|
+
}`,
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
return store;
|
|
54
|
+
},
|
|
55
|
+
|
|
56
|
+
async readWithFallback({ integrity, activeBackendId }) {
|
|
57
|
+
const active = stores.get(activeBackendId);
|
|
58
|
+
if (active) {
|
|
59
|
+
const bytes = await active.get({ integrity });
|
|
60
|
+
if (bytes !== undefined) {
|
|
61
|
+
return { bytes, servedBy: activeBackendId };
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
// Fallback across the rest (migration in flight, partial state).
|
|
65
|
+
for (const [id, store] of stores) {
|
|
66
|
+
if (id === activeBackendId) continue;
|
|
67
|
+
const bytes = await store.get({ integrity });
|
|
68
|
+
if (bytes !== undefined) {
|
|
69
|
+
return { bytes, servedBy: id };
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return;
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { createExtensionPoint } from "@checkstack/backend-api";
|
|
2
|
+
import type { PluginMetadata } from "@checkstack/common";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Pluggable, content-addressed blob persistence for script-package
|
|
6
|
+
* artifacts. Blobs are keyed by their integrity hash (the npm tarball /
|
|
7
|
+
* Bun-cache entry per `name@version`), so the integrity is the stable
|
|
8
|
+
* identity across backends - only the locator differs.
|
|
9
|
+
*
|
|
10
|
+
* Two built-ins ship as plugins: `script-packages-store-postgres` (the
|
|
11
|
+
* default, Postgres large-objects, zero extra infra) and
|
|
12
|
+
* `script-packages-store-s3` (preferred when configured). Adding a third
|
|
13
|
+
* later is just another plugin implementing this interface; no schema
|
|
14
|
+
* change.
|
|
15
|
+
*
|
|
16
|
+
* Implementations MUST be content-addressed and idempotent: `put` of an
|
|
17
|
+
* already-present integrity is a no-op (or overwrite with identical bytes).
|
|
18
|
+
*/
|
|
19
|
+
export interface BlobStore {
|
|
20
|
+
/** Stable backend id recorded in `script_package_blob.backend`. */
|
|
21
|
+
readonly id: string;
|
|
22
|
+
|
|
23
|
+
/** Store the (already-compressed) bytes for `integrity`. Idempotent. */
|
|
24
|
+
put(input: { integrity: string; bytes: Uint8Array }): Promise<void>;
|
|
25
|
+
|
|
26
|
+
/** Fetch the bytes for `integrity`, or undefined if this backend lacks it. */
|
|
27
|
+
get(input: { integrity: string }): Promise<Uint8Array | undefined>;
|
|
28
|
+
|
|
29
|
+
/** Whether this backend holds `integrity`. */
|
|
30
|
+
has(input: { integrity: string }): Promise<boolean>;
|
|
31
|
+
|
|
32
|
+
/** Delete the blob for `integrity` (GC / post-migration cleanup). Idempotent. */
|
|
33
|
+
delete(input: { integrity: string }): Promise<void>;
|
|
34
|
+
|
|
35
|
+
/** Every integrity this backend currently holds (for migration / GC). */
|
|
36
|
+
list(): Promise<string[]>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Extension point a blob-store plugin registers its implementation with.
|
|
41
|
+
* The active backend is selected via `script_package_storage_config`; the
|
|
42
|
+
* backend resolves the registered store by id.
|
|
43
|
+
*/
|
|
44
|
+
export interface BlobStoreExtensionPoint {
|
|
45
|
+
registerBlobStore(store: BlobStore, metadata: PluginMetadata): void;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export const blobStoreExtensionPoint =
|
|
49
|
+
createExtensionPoint<BlobStoreExtensionPoint>(
|
|
50
|
+
"script-packages.blobStoreExtensionPoint",
|
|
51
|
+
);
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { spawn } from "bun";
|
|
3
|
+
import {
|
|
4
|
+
mkdtemp,
|
|
5
|
+
mkdir,
|
|
6
|
+
writeFile,
|
|
7
|
+
readFile,
|
|
8
|
+
rm,
|
|
9
|
+
readdir,
|
|
10
|
+
symlink,
|
|
11
|
+
} from "node:fs/promises";
|
|
12
|
+
import { tmpdir } from "node:os";
|
|
13
|
+
import path from "node:path";
|
|
14
|
+
import { packDir, unpackInto } from "./cache-archive";
|
|
15
|
+
|
|
16
|
+
/** Build a gzip tar whose single entry is `entryName` (allowing `..`/abs). */
|
|
17
|
+
async function makeArchiveWithEntry(
|
|
18
|
+
cwd: string,
|
|
19
|
+
entryName: string,
|
|
20
|
+
): Promise<Uint8Array> {
|
|
21
|
+
// `-P`/`--absolute-names` lets us store traversing or absolute names that
|
|
22
|
+
// tar would otherwise strip — exactly the malicious shape we defend against.
|
|
23
|
+
const proc = spawn({
|
|
24
|
+
cmd: ["tar", "-czf", "-", "-P", entryName],
|
|
25
|
+
cwd,
|
|
26
|
+
stdout: "pipe",
|
|
27
|
+
stderr: "pipe",
|
|
28
|
+
});
|
|
29
|
+
const [bytes, exitCode] = await Promise.all([
|
|
30
|
+
new Response(proc.stdout).bytes(),
|
|
31
|
+
proc.exited,
|
|
32
|
+
]);
|
|
33
|
+
if (exitCode !== 0) throw new Error("failed to build malicious archive");
|
|
34
|
+
return bytes;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
describe("cache-archive pack/unpack", () => {
|
|
38
|
+
let work: string;
|
|
39
|
+
|
|
40
|
+
beforeEach(async () => {
|
|
41
|
+
work = await mkdtemp(path.join(tmpdir(), "cs-archive-"));
|
|
42
|
+
});
|
|
43
|
+
afterEach(async () => {
|
|
44
|
+
await rm(work, { recursive: true, force: true });
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test("round-trips a directory tree through tar+gzip", async () => {
|
|
48
|
+
const src = path.join(work, "src");
|
|
49
|
+
const entry = "pkg@1.0.0";
|
|
50
|
+
const entryDir = path.join(src, entry);
|
|
51
|
+
await mkdir(path.join(entryDir, "sub"), { recursive: true });
|
|
52
|
+
await writeFile(path.join(entryDir, "index.js"), "module.exports = 1;\n");
|
|
53
|
+
await writeFile(path.join(entryDir, "sub", "x.txt"), "deep\n");
|
|
54
|
+
|
|
55
|
+
const blob = await packDir({ parentDir: src, entryName: entry });
|
|
56
|
+
expect(blob.byteLength).toBeGreaterThan(0);
|
|
57
|
+
|
|
58
|
+
const dest = path.join(work, "dest");
|
|
59
|
+
await mkdir(dest, { recursive: true });
|
|
60
|
+
await unpackInto({ targetDir: dest, bytes: blob });
|
|
61
|
+
|
|
62
|
+
expect(await readFile(path.join(dest, entry, "index.js"), "utf8")).toBe(
|
|
63
|
+
"module.exports = 1;\n",
|
|
64
|
+
);
|
|
65
|
+
expect(await readFile(path.join(dest, entry, "sub", "x.txt"), "utf8")).toBe(
|
|
66
|
+
"deep\n",
|
|
67
|
+
);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test("throws on a corrupt archive", async () => {
|
|
71
|
+
const dest = path.join(work, "dest");
|
|
72
|
+
await mkdir(dest, { recursive: true });
|
|
73
|
+
await expect(
|
|
74
|
+
unpackInto({ targetDir: dest, bytes: new Uint8Array([1, 2, 3, 4]) }),
|
|
75
|
+
).rejects.toThrow(/tar extract failed/i);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test("refuses an archive entry that traverses out with ..", async () => {
|
|
79
|
+
// Build an archive whose entry NAME is `../escape`. tar resolves the
|
|
80
|
+
// name relative to its cwd, so the file lives one level up from cwd.
|
|
81
|
+
const base = path.join(work, "base");
|
|
82
|
+
const cwd = path.join(base, "inner");
|
|
83
|
+
await mkdir(cwd, { recursive: true });
|
|
84
|
+
await writeFile(path.join(base, "escape"), "pwned\n");
|
|
85
|
+
const blob = await makeArchiveWithEntry(cwd, "../escape");
|
|
86
|
+
|
|
87
|
+
const dest = path.join(work, "dest");
|
|
88
|
+
await mkdir(dest, { recursive: true });
|
|
89
|
+
|
|
90
|
+
await expect(
|
|
91
|
+
unpackInto({ targetDir: dest, bytes: blob }),
|
|
92
|
+
).rejects.toThrow(/unsafe archive entry/i);
|
|
93
|
+
|
|
94
|
+
// Nothing was written outside dest (the sibling "escape" must not appear).
|
|
95
|
+
const siblings = await readdir(work);
|
|
96
|
+
expect(siblings).not.toContain("escape");
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
test("refuses a symlink entry with a safe name but an escaping target", async () => {
|
|
100
|
+
// A symlink entry whose NAME is harmless (`evil`, no `..`/abs) but whose
|
|
101
|
+
// TARGET escapes (`-> /etc`). The old name-only listing pass let it
|
|
102
|
+
// through, then a later regular-file entry could write THROUGH the link
|
|
103
|
+
// and escape targetDir. unpackInto must reject any symlink entry outright.
|
|
104
|
+
const src = path.join(work, "linksrc");
|
|
105
|
+
await mkdir(src, { recursive: true });
|
|
106
|
+
await symlink("/etc", path.join(src, "evil"));
|
|
107
|
+
const blob = await packDir({ parentDir: work, entryName: "linksrc" });
|
|
108
|
+
|
|
109
|
+
const dest = path.join(work, "dest");
|
|
110
|
+
await mkdir(dest, { recursive: true });
|
|
111
|
+
await expect(
|
|
112
|
+
unpackInto({ targetDir: dest, bytes: blob }),
|
|
113
|
+
).rejects.toThrow(/symlink|link/i);
|
|
114
|
+
|
|
115
|
+
// Nothing materialized: the link must not exist under dest.
|
|
116
|
+
expect(
|
|
117
|
+
await readdir(dest).catch(() => []),
|
|
118
|
+
).not.toContain("linksrc");
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
test("refuses a relative symlink target that traverses with ..", async () => {
|
|
122
|
+
const src = path.join(work, "linksrc2");
|
|
123
|
+
await mkdir(src, { recursive: true });
|
|
124
|
+
await symlink("../../../escape", path.join(src, "ln"));
|
|
125
|
+
const blob = await packDir({ parentDir: work, entryName: "linksrc2" });
|
|
126
|
+
|
|
127
|
+
const dest = path.join(work, "dest2");
|
|
128
|
+
await mkdir(dest, { recursive: true });
|
|
129
|
+
await expect(
|
|
130
|
+
unpackInto({ targetDir: dest, bytes: blob }),
|
|
131
|
+
).rejects.toThrow(/symlink|link/i);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
test("still round-trips a plain (link-free) directory after the link guard", async () => {
|
|
135
|
+
const src = path.join(work, "plainsrc");
|
|
136
|
+
await mkdir(path.join(src, "pkg@1.0.0"), { recursive: true });
|
|
137
|
+
await writeFile(
|
|
138
|
+
path.join(src, "pkg@1.0.0", "index.js"),
|
|
139
|
+
"module.exports = 2;\n",
|
|
140
|
+
);
|
|
141
|
+
const blob = await packDir({ parentDir: src, entryName: "pkg@1.0.0" });
|
|
142
|
+
const dest = path.join(work, "plaindest");
|
|
143
|
+
await mkdir(dest, { recursive: true });
|
|
144
|
+
await unpackInto({ targetDir: dest, bytes: blob });
|
|
145
|
+
expect(
|
|
146
|
+
await readFile(path.join(dest, "pkg@1.0.0", "index.js"), "utf8"),
|
|
147
|
+
).toBe("module.exports = 2;\n");
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
test("refuses an archive entry with an absolute path", async () => {
|
|
151
|
+
const payloadDir = path.join(work, "payload2");
|
|
152
|
+
await mkdir(payloadDir, { recursive: true });
|
|
153
|
+
await writeFile(path.join(payloadDir, "abs.txt"), "x\n");
|
|
154
|
+
// Absolute entry name (e.g. /tmp/.../abs.txt).
|
|
155
|
+
const absName = path.join(payloadDir, "abs.txt");
|
|
156
|
+
const blob = await makeArchiveWithEntry("/", absName);
|
|
157
|
+
|
|
158
|
+
const dest = path.join(work, "dest");
|
|
159
|
+
await mkdir(dest, { recursive: true });
|
|
160
|
+
await expect(
|
|
161
|
+
unpackInto({ targetDir: dest, bytes: blob }),
|
|
162
|
+
).rejects.toThrow(/unsafe archive entry/i);
|
|
163
|
+
});
|
|
164
|
+
});
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import { spawn } from "bun";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Archive helpers for the content-addressed distribution unit.
|
|
5
|
+
*
|
|
6
|
+
* The distributable blob for each `name@version` is a gzip-compressed tar
|
|
7
|
+
* of that package's Bun cache entry directory. On reconcile a host extracts
|
|
8
|
+
* every needed blob back into its `BUN_INSTALL_CACHE_DIR`, then runs
|
|
9
|
+
* `bun install --offline` which reconstructs `node_modules` with zero
|
|
10
|
+
* network (empirically verified). Bun does the hoisting, so we never have
|
|
11
|
+
* to reconstruct the flat tree ourselves - this keeps the model
|
|
12
|
+
* Bun-version-tolerant while preserving per-package delta sync.
|
|
13
|
+
*
|
|
14
|
+
* We shell to POSIX `tar` (universal on Linux/macOS containers) and use
|
|
15
|
+
* gzip (via tar's `-z`) rather than zstd so there's no external `zstd`
|
|
16
|
+
* binary dependency. The plan named zstd; gzip is the portable substitute.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
async function runTar(args: string[], cwd?: string): Promise<Uint8Array> {
|
|
20
|
+
const proc = spawn({
|
|
21
|
+
cmd: ["tar", ...args],
|
|
22
|
+
cwd,
|
|
23
|
+
stdout: "pipe",
|
|
24
|
+
stderr: "pipe",
|
|
25
|
+
});
|
|
26
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
27
|
+
new Response(proc.stdout).bytes(),
|
|
28
|
+
new Response(proc.stderr).text(),
|
|
29
|
+
proc.exited,
|
|
30
|
+
]);
|
|
31
|
+
if (exitCode !== 0) {
|
|
32
|
+
throw new Error(`tar failed (exit ${exitCode}): ${stderr.slice(0, 500)}`);
|
|
33
|
+
}
|
|
34
|
+
return stdout;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Pack a single directory entry (`entryName`) located under `parentDir`
|
|
39
|
+
* into a gzip-compressed tar streamed to stdout. The archive stores the
|
|
40
|
+
* entry by its relative name so it extracts back to the same layout.
|
|
41
|
+
*/
|
|
42
|
+
export async function packDir({
|
|
43
|
+
parentDir,
|
|
44
|
+
entryName,
|
|
45
|
+
}: {
|
|
46
|
+
parentDir: string;
|
|
47
|
+
entryName: string;
|
|
48
|
+
}): Promise<Uint8Array> {
|
|
49
|
+
return runTar(["-czf", "-", entryName], parentDir);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Reject an archive entry path that would escape the extraction directory
|
|
54
|
+
* (zip-slip): an absolute path or any `..` path component. Returns the
|
|
55
|
+
* offending reason, or `undefined` when the path is safe + confined.
|
|
56
|
+
*/
|
|
57
|
+
function unsafeArchivePath(entryPath: string): string | undefined {
|
|
58
|
+
const trimmed = entryPath.trim();
|
|
59
|
+
if (trimmed === "") return undefined; // tar can emit a trailing blank line
|
|
60
|
+
// Absolute (POSIX or Windows-style) — would extract outside targetDir.
|
|
61
|
+
if (trimmed.startsWith("/") || /^[a-zA-Z]:[\\/]/.test(trimmed)) {
|
|
62
|
+
return `absolute path "${trimmed}"`;
|
|
63
|
+
}
|
|
64
|
+
// Any `..` segment (handle both / and \ separators) — path traversal.
|
|
65
|
+
const segments = trimmed.split(/[\\/]/);
|
|
66
|
+
if (segments.includes("..")) {
|
|
67
|
+
return `parent-directory traversal in "${trimmed}"`;
|
|
68
|
+
}
|
|
69
|
+
return undefined;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Reject any archive entry that is NOT a plain file or directory.
|
|
74
|
+
*
|
|
75
|
+
* Path-only validation ({@link unsafeArchivePath}) is blind to LINK TARGETS:
|
|
76
|
+
* a symlink with a harmless NAME (e.g. `evil`, no `..`/abs) but an escaping
|
|
77
|
+
* TARGET (`-> /etc`, `-> ../../..`) passes the name check, then a later
|
|
78
|
+
* regular-file entry can be written THROUGH the link and escape `targetDir`.
|
|
79
|
+
* Reconstructed Bun-cache trees are plain files + dirs, so we reject every
|
|
80
|
+
* symlink/hardlink/device/fifo entry outright (defence in depth, independent
|
|
81
|
+
* of any tar extraction flag).
|
|
82
|
+
*
|
|
83
|
+
* `line` is one row of `tar -tzvf` verbose output; its first character is the
|
|
84
|
+
* POSIX type flag (`-` file, `d` dir, `l` symlink, `h` hardlink, etc.) on
|
|
85
|
+
* both GNU and BSD/libarchive tar. Returns the offending reason, or
|
|
86
|
+
* `undefined` for a safe (file/dir) entry or a blank line.
|
|
87
|
+
*/
|
|
88
|
+
function unsafeArchiveEntryType(line: string): string | undefined {
|
|
89
|
+
const trimmed = line.trimEnd();
|
|
90
|
+
if (trimmed.trim() === "") return undefined; // trailing blank line
|
|
91
|
+
const typeFlag = trimmed[0];
|
|
92
|
+
if (typeFlag === "-" || typeFlag === "d") return undefined; // file or dir
|
|
93
|
+
if (typeFlag === "l" || typeFlag === "h") {
|
|
94
|
+
return `link entry (type "${typeFlag}") in "${trimmed}"`;
|
|
95
|
+
}
|
|
96
|
+
// Anything else (block/char device "b"/"c", fifo "p", socket "s", …) is
|
|
97
|
+
// never part of a package cache tree → reject.
|
|
98
|
+
return `non-regular entry (type "${typeFlag}") in "${trimmed}"`;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Extract a gzip-compressed tar blob into `targetDir`.
|
|
103
|
+
*
|
|
104
|
+
* Hardened against zip-slip on two axes, both checked BEFORE any bytes are
|
|
105
|
+
* written (a violation aborts the whole extract, materializing nothing):
|
|
106
|
+
*
|
|
107
|
+
* 1. Entry PATHS must be relative and free of `..` components — a
|
|
108
|
+
* traversing / absolute name would write outside `targetDir`.
|
|
109
|
+
* 2. Entry TYPES must be plain file or directory — a symlink/hardlink with
|
|
110
|
+
* a harmless name but an escaping target would let a later file write
|
|
111
|
+
* THROUGH it and escape `targetDir`. Reconstructed Bun-cache trees never
|
|
112
|
+
* contain links, so any link/device/fifo entry is rejected.
|
|
113
|
+
*
|
|
114
|
+
* We validate explicitly (rather than relying on a tar flag) so the behaviour
|
|
115
|
+
* is identical across GNU and BSD/libarchive tar. The verbose listing
|
|
116
|
+
* (`-tzvf`) carries both the type flag (column 0) and the path, so a single
|
|
117
|
+
* listing pass covers both checks.
|
|
118
|
+
*/
|
|
119
|
+
export async function unpackInto({
|
|
120
|
+
targetDir,
|
|
121
|
+
bytes,
|
|
122
|
+
}: {
|
|
123
|
+
targetDir: string;
|
|
124
|
+
bytes: Uint8Array;
|
|
125
|
+
}): Promise<void> {
|
|
126
|
+
// 1. List entries (verbose: type flag + path) and validate BEFORE extract.
|
|
127
|
+
// `-tzvf` rows look like `lrwxr-xr-x 0 user grp 0 <date> name -> target`;
|
|
128
|
+
// the path/name portion is what `-tzf` would print, so we run the plain
|
|
129
|
+
// listing for the path check and the verbose listing for the type check.
|
|
130
|
+
const [pathListing, typeListing] = await Promise.all([
|
|
131
|
+
runTarCapture(["-tzf", "-"], bytes),
|
|
132
|
+
runTarCapture(["-tzvf", "-"], bytes),
|
|
133
|
+
]);
|
|
134
|
+
for (const line of pathListing.split("\n")) {
|
|
135
|
+
const reason = unsafeArchivePath(line);
|
|
136
|
+
if (reason) {
|
|
137
|
+
throw new Error(`refusing to extract unsafe archive entry: ${reason}`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
for (const line of typeListing.split("\n")) {
|
|
141
|
+
const reason = unsafeArchiveEntryType(line);
|
|
142
|
+
if (reason) {
|
|
143
|
+
throw new Error(`refusing to extract unsafe archive entry: ${reason}`);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// 2. Extract. `--no-same-owner` avoids surprising ownership; paths + types
|
|
148
|
+
// are already proven relative + confined + link-free above.
|
|
149
|
+
const proc = spawn({
|
|
150
|
+
cmd: ["tar", "-xzf", "-", "-C", targetDir],
|
|
151
|
+
stdin: bytes,
|
|
152
|
+
stdout: "pipe",
|
|
153
|
+
stderr: "pipe",
|
|
154
|
+
});
|
|
155
|
+
const [stderr, exitCode] = await Promise.all([
|
|
156
|
+
new Response(proc.stderr).text(),
|
|
157
|
+
proc.exited,
|
|
158
|
+
]);
|
|
159
|
+
if (exitCode !== 0) {
|
|
160
|
+
throw new Error(
|
|
161
|
+
`tar extract failed (exit ${exitCode}): ${stderr.slice(0, 500)}`,
|
|
162
|
+
);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Run `tar` with the blob piped to stdin and return stdout as text. Shared
|
|
168
|
+
* by the listing pass in {@link unpackInto}; throws on a non-zero exit (e.g.
|
|
169
|
+
* a corrupt archive) so callers surface a clear error.
|
|
170
|
+
*/
|
|
171
|
+
async function runTarCapture(
|
|
172
|
+
args: string[],
|
|
173
|
+
stdin: Uint8Array,
|
|
174
|
+
): Promise<string> {
|
|
175
|
+
const proc = spawn({
|
|
176
|
+
cmd: ["tar", ...args],
|
|
177
|
+
stdin,
|
|
178
|
+
stdout: "pipe",
|
|
179
|
+
stderr: "pipe",
|
|
180
|
+
});
|
|
181
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
182
|
+
new Response(proc.stdout).text(),
|
|
183
|
+
new Response(proc.stderr).text(),
|
|
184
|
+
proc.exited,
|
|
185
|
+
]);
|
|
186
|
+
if (exitCode !== 0) {
|
|
187
|
+
throw new Error(
|
|
188
|
+
`tar extract failed (exit ${exitCode}): ${stderr.slice(0, 500)}`,
|
|
189
|
+
);
|
|
190
|
+
}
|
|
191
|
+
return stdout;
|
|
192
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { readdir } from "node:fs/promises";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Map manifest entries to their on-disk Bun cache entry directory names.
|
|
5
|
+
*
|
|
6
|
+
* Bun extracts each package into `<cacheDir>/<name>@<version>@@@<n>` (the
|
|
7
|
+
* `@@@<n>` suffix is an internal dedupe counter). We discover the actual
|
|
8
|
+
* entry dir by listing the cache and matching the `<name>@<version>`
|
|
9
|
+
* prefix, rather than hardcoding the suffix - keeping us tolerant of Bun's
|
|
10
|
+
* internal counter. Scoped packages (`@scope/name`) live under a `@scope/`
|
|
11
|
+
* subdir in the cache.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
export interface CacheEntryLocation {
|
|
15
|
+
/** Directory the entry sits *under* (its parent), for tar's cwd. */
|
|
16
|
+
parentDir: string;
|
|
17
|
+
/** The entry dir name relative to `parentDir`. */
|
|
18
|
+
entryName: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function specPrefix(name: string, version: string): string {
|
|
22
|
+
return `${name}@${version}@@@`;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Find the cache entry dir for `name@version`. Returns undefined if the
|
|
27
|
+
* cache doesn't contain it (caller treats as a resolve error).
|
|
28
|
+
*/
|
|
29
|
+
export async function findCacheEntry({
|
|
30
|
+
cacheDir,
|
|
31
|
+
name,
|
|
32
|
+
version,
|
|
33
|
+
}: {
|
|
34
|
+
cacheDir: string;
|
|
35
|
+
name: string;
|
|
36
|
+
version: string;
|
|
37
|
+
}): Promise<CacheEntryLocation | undefined> {
|
|
38
|
+
if (name.startsWith("@")) {
|
|
39
|
+
// Scoped: `<cacheDir>/@scope/name@version@@@n`
|
|
40
|
+
const [scope, bare] = name.split("/");
|
|
41
|
+
const scopeDir = `${cacheDir}/${scope}`;
|
|
42
|
+
const prefix = `${bare}@${version}@@@`;
|
|
43
|
+
const match = await firstMatch(scopeDir, prefix);
|
|
44
|
+
if (!match) return;
|
|
45
|
+
return { parentDir: scopeDir, entryName: match };
|
|
46
|
+
}
|
|
47
|
+
const prefix = specPrefix(name, version);
|
|
48
|
+
const match = await firstMatch(cacheDir, prefix);
|
|
49
|
+
if (!match) return;
|
|
50
|
+
return { parentDir: cacheDir, entryName: match };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async function firstMatch(
|
|
54
|
+
dir: string,
|
|
55
|
+
prefix: string,
|
|
56
|
+
): Promise<string | undefined> {
|
|
57
|
+
let names: string[];
|
|
58
|
+
try {
|
|
59
|
+
names = await readdir(dir);
|
|
60
|
+
} catch {
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
return names.find((n) => n.startsWith(prefix));
|
|
64
|
+
}
|