@openparachute/hub 0.7.4 → 0.7.5-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,176 @@
1
+ /**
2
+ * Deploy hand-off for the Surface Git Transport (Phase 0b, design doc
3
+ * 2026-06-30-surface-git-transport.md §5 step 5 + §7).
4
+ *
5
+ * After a successful `git push` to `/git/<name>` (receive-pack), the hub
6
+ * NOTIFIES the surface module over HTTP so it can pull + build + serve the new
7
+ * source. This is the settled "service-to-service via HTTP, not shell-out"
8
+ * seam: the `post-receive` hook does NOT build the pushed tree (that would run
9
+ * attacker-influenceable code as the hub/git user — RCE §7); the exec authority
10
+ * stays inside surface-host's own sandbox. The hub only sends an authenticated
11
+ * signal + a short-lived, narrowly-scoped read credential.
12
+ *
13
+ * Two hub-minted tokens ride this hand-off (both SHORT-LIVED + UNREGISTERED —
14
+ * they expire in minutes and are consumed inline, mirroring the H4
15
+ * credential-delivery provisioning-token pattern in admin-connections.ts):
16
+ *
17
+ * 1. notify-auth — a `surface:admin` bearer (aud `surface`) that
18
+ * authenticates the hub→surface-host POST. surface-host validates it with
19
+ * the SAME `enforceScope(surface:admin)` it uses for the hub's credential
20
+ * deliveries, so a random on-box process can't forge a push-notify.
21
+ * 2. pull-token — a `surface:<name>:read` bearer (aud `surface`) that
22
+ * surface-host presents back to THIS hub's `/git/<name>` endpoint to
23
+ * `git clone` the freshly-pushed source. Least-privilege: read on exactly
24
+ * the one surface, valid only long enough to clone.
25
+ *
26
+ * Modular by design (§1): surface-host pulls over the network (not a shared
27
+ * disk), so the seam already works when hub + surface-host are separate
28
+ * containers. `clone_url` is the hub's own loopback origin today; a cloud
29
+ * deploy supplies the internal hub URL instead. The token's `iss` is the hub
30
+ * issuer, which is a member of the hub's own bound-origin set, so the clone
31
+ * validates when it comes back in over loopback.
32
+ */
33
+ import type { Database } from "bun:sqlite";
34
+ import { signAccessToken } from "./jwt-sign.ts";
35
+
36
+ /** Provenance identity stamped on the hub-internal notify + pull tokens. */
37
+ const NOTIFY_SUBJECT = "surface-git-transport";
38
+ const NOTIFY_CLIENT_ID = "surface-git-transport";
39
+
40
+ /** aud of both minted tokens — surface-host declares `aud: "surface"`. */
41
+ const SURFACE_AUDIENCE = "surface";
42
+
43
+ /**
44
+ * notify-auth TTL. The POST is fired immediately; a small window covers a
45
+ * momentarily-busy loopback without leaving a usable credential lying around.
46
+ */
47
+ const NOTIFY_TTL_SECONDS = 120;
48
+
49
+ /**
50
+ * pull-token TTL. Long enough for surface-host to `git clone --depth 1` a
51
+ * source surface right after the notify lands, short enough that a leaked
52
+ * token is near-useless. Both TTLs here MUST stay well under the hub's
53
+ * registered-mint threshold (admin-connections REGISTERED_MINT_TTL_THRESHOLD,
54
+ * 600s) so these fire-and-forget tokens remain unregistered-by-policy — bumping
55
+ * either past it without registering them would leak unrevocable tokens.
56
+ */
57
+ const PULL_TTL_SECONDS = 300;
58
+
59
+ /** Bound the notify HTTP call so a wedged surface-host can't hang the caller. */
60
+ const NOTIFY_FETCH_TIMEOUT_MS = 10_000;
61
+
62
+ export interface GitNotifyLog {
63
+ warn: (...args: unknown[]) => void;
64
+ info: (...args: unknown[]) => void;
65
+ }
66
+
67
+ export interface NotifySurfacePushedDeps {
68
+ /** Hub DB — for `signAccessToken`'s active-signing-key lookup. */
69
+ db: Database;
70
+ /**
71
+ * Hub issuer (the `iss` claim), resolved per-request via `resolveIssuer`
72
+ * (`oauthDeps(req).issuer`). Both minted tokens carry it; the pull token
73
+ * validates against the hub's own bound-origin set on the clone-back.
74
+ */
75
+ issuer: string;
76
+ /**
77
+ * Resolve a module's loopback origin by short name (`makeResolveModuleOrigin`
78
+ * over services.json). Returns null when the surface module isn't installed —
79
+ * in which case there's nothing to notify and we no-op.
80
+ */
81
+ resolveModuleOrigin: (short: string) => string | null;
82
+ /**
83
+ * Origin surface-host should `git clone` from — the hub's own loopback origin
84
+ * today (`http://127.0.0.1:<port>`). The `/git/<name>` suffix is appended
85
+ * here so the module gets a ready-to-use URL.
86
+ */
87
+ cloneBaseOrigin: string;
88
+ fetchImpl?: typeof fetch;
89
+ now?: () => Date;
90
+ log?: GitNotifyLog;
91
+ /** Test seam — defaults to the real `signAccessToken`. */
92
+ signToken?: typeof signAccessToken;
93
+ }
94
+
95
+ /**
96
+ * Notify surface-host that surface `<name>` was pushed. Best-effort +
97
+ * fire-and-forget from the caller's perspective: this never throws (the git
98
+ * transport handler already returned the push response to the client); every
99
+ * failure path logs and returns.
100
+ *
101
+ * Returns a small outcome for tests/log assertions; production ignores it.
102
+ */
103
+ export async function notifySurfacePushed(
104
+ name: string,
105
+ deps: NotifySurfacePushedDeps,
106
+ ): Promise<{ notified: boolean; reason?: string }> {
107
+ const log = deps.log ?? console;
108
+ const sign = deps.signToken ?? signAccessToken;
109
+
110
+ const moduleOrigin = deps.resolveModuleOrigin("surface");
111
+ if (!moduleOrigin) {
112
+ // No surface module installed on this hub — nothing to serve the push.
113
+ log.info(`[git-notify] surface module not installed; skipping notify for "${name}"`);
114
+ return { notified: false, reason: "surface-module-not-installed" };
115
+ }
116
+
117
+ let notifyAuth: string;
118
+ let pullToken: string;
119
+ try {
120
+ const now = deps.now;
121
+ const common = {
122
+ sub: NOTIFY_SUBJECT,
123
+ clientId: NOTIFY_CLIENT_ID,
124
+ issuer: deps.issuer,
125
+ audience: SURFACE_AUDIENCE,
126
+ ...(now !== undefined ? { now } : {}),
127
+ };
128
+ notifyAuth = (
129
+ await sign(deps.db, { ...common, scopes: ["surface:admin"], ttlSeconds: NOTIFY_TTL_SECONDS })
130
+ ).token;
131
+ pullToken = (
132
+ await sign(deps.db, {
133
+ ...common,
134
+ scopes: [`surface:${name}:read`],
135
+ ttlSeconds: PULL_TTL_SECONDS,
136
+ })
137
+ ).token;
138
+ } catch (err) {
139
+ const msg = err instanceof Error ? err.message : String(err);
140
+ log.warn(`[git-notify] failed to mint notify tokens for "${name}": ${msg}`);
141
+ return { notified: false, reason: "mint-failed" };
142
+ }
143
+
144
+ const cloneUrl = `${deps.cloneBaseOrigin.replace(/\/+$/, "")}/git/${name}`;
145
+ const endpoint = `${moduleOrigin.replace(/\/+$/, "")}/surface/api/git-pushed`;
146
+ const fetchImpl = deps.fetchImpl ?? fetch;
147
+
148
+ try {
149
+ const res = await fetchImpl(endpoint, {
150
+ method: "POST",
151
+ headers: {
152
+ authorization: `Bearer ${notifyAuth}`,
153
+ "content-type": "application/json",
154
+ },
155
+ body: JSON.stringify({ surface: name, clone_url: cloneUrl, pull_token: pullToken }),
156
+ signal: AbortSignal.timeout(NOTIFY_FETCH_TIMEOUT_MS),
157
+ });
158
+ if (!res.ok) {
159
+ let detail = `HTTP ${res.status}`;
160
+ try {
161
+ const text = (await res.text()).trim();
162
+ if (text) detail += `: ${text.slice(0, 300)}`;
163
+ } catch {
164
+ // best-effort detail
165
+ }
166
+ log.warn(`[git-notify] surface-host rejected push notify for "${name}" (${detail})`);
167
+ return { notified: false, reason: `notify-rejected:${res.status}` };
168
+ }
169
+ log.info(`[git-notify] notified surface-host of push to "${name}"`);
170
+ return { notified: true };
171
+ } catch (err) {
172
+ const msg = err instanceof Error ? err.message : String(err);
173
+ log.warn(`[git-notify] push notify to surface-host failed for "${name}": ${msg}`);
174
+ return { notified: false, reason: "notify-error" };
175
+ }
176
+ }
@@ -0,0 +1,515 @@
1
+ /**
2
+ * Hub-authenticated git smart-HTTP transport — the Surface Git Transport
3
+ * substrate (Phase 0a, design doc 2026-06-30-surface-git-transport.md).
4
+ *
5
+ * The hub provides ONE general primitive: an authenticated `git http-backend`
6
+ * endpoint at `/git/<name>/*` backed by a bare repo per `<name>`. A client
7
+ * (agent, human, or a standalone Claude Code session) authenticates with a
8
+ * hub-issued JWT carrying `surface:<name>:write` (push) or `surface:<name>:read`
9
+ * (fetch) and does a plain `git push` / `git clone`. Surfaces are the first
10
+ * consumer; "hub-authenticated git" generalizes to any module that wants
11
+ * versioned, authenticated, file-shaped content movement.
12
+ *
13
+ * What this layer does NOT do (by deliberate trust boundary, §7): it never
14
+ * BUILDS or executes the pushed tree. The hub only receives + stores bytes;
15
+ * the `post-receive` hook here is a Phase-0a placeholder that logs the refs.
16
+ * Building pushed source is surface-host's sandboxed job (Phase 0b) — keeping
17
+ * the RCE surface out of the substrate is the whole point of the split.
18
+ *
19
+ * The mechanism (grounded in git's smart-HTTP protocol):
20
+ * 1. Discovery `GET /git/<name>/info/refs?service=git-(upload|receive)-pack`
21
+ * then transfer `POST /git/<name>/git-(upload|receive)-pack`.
22
+ * Scope keys PURELY off the service/path — no pack parsing:
23
+ * receive-pack ⇒ write, upload-pack ⇒ read.
24
+ * 2. The 401 dance: an unauthenticated request gets `401` +
25
+ * `WWW-Authenticate` (LOAD-BEARING — git won't invoke its credential
26
+ * helper / retry without it). Enforced at BOTH the info/refs GET and the
27
+ * transfer POST.
28
+ * 3. Bearer or Basic: git ≥2.46 sends `Authorization: Bearer <jwt>`; older
29
+ * git uses Basic with `x-access-token:<jwt>` (GitHub's compat trick).
30
+ * Both are accepted.
31
+ * 4. The gate validates the JWT (signature → hub keys; `iss` ∈ the
32
+ * multi-origin hub-bound set; revocation — the existing
33
+ * `validateAccessToken` path) and checks the scope, then streams the
34
+ * request + response bodies through `git http-backend` with CGI env.
35
+ * Never buffers whole packs.
36
+ */
37
+ import type { Database } from "bun:sqlite";
38
+ import { spawnSync } from "node:child_process";
39
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
40
+ import { join } from "node:path";
41
+ import { validateAccessToken } from "./jwt-sign.ts";
42
+
43
+ /** Logger seam — defaults to `console`. */
44
+ export interface GitTransportLog {
45
+ warn: (...args: unknown[]) => void;
46
+ info: (...args: unknown[]) => void;
47
+ }
48
+
49
+ export interface GitTransportDeps {
50
+ /** Hub DB handle — for signature/kid lookup + revocation in `validateAccessToken`. */
51
+ db: Database;
52
+ /**
53
+ * Directory holding the bare repos. Each surface lives at
54
+ * `<gitRoot>/<name>.git`. Production: `<CONFIG_DIR>/hub/git`. Tests point
55
+ * this at a tmpdir.
56
+ */
57
+ gitRoot: string;
58
+ /**
59
+ * The SET of origins this hub legitimately answers on
60
+ * (`buildHubBoundOrigins` — loopback ∪ expose-state ∪ platform ∪ per-request
61
+ * issuer). Passed straight to `validateAccessToken` as the `iss` allow-set so
62
+ * a credential minted under a still-valid prior origin keeps validating
63
+ * across an origin switch. SECURITY: must come ONLY from
64
+ * `buildHubBoundOrigins`, never a raw request Host (the signature is verified
65
+ * against the hub's own key first, so this is an additive `iss` relaxation
66
+ * only — see `validateAccessToken`).
67
+ */
68
+ knownIssuers: () => readonly string[];
69
+ /** Resolved peer address, surfaced to the backend as REMOTE_ADDR. */
70
+ peerAddr?: string | null;
71
+ /**
72
+ * Fired AFTER a `git-receive-pack` POST subprocess exits 0 — i.e. a push
73
+ * landed (the refs are updated + the post-receive hook has run by the time
74
+ * `http-backend` exits). The deploy hand-off (design §5 step 5): the hub
75
+ * notifies surface-host over HTTP + a hub JWT, NEVER a shell-out that builds
76
+ * the pushed tree (that exec authority belongs to the module's sandbox, not
77
+ * this substrate). Fire-and-forget + best-effort: a notify failure never
78
+ * affects the push response the client already received. Keyed off the
79
+ * subprocess exit, not the streamed response, so it observes the true push
80
+ * outcome. Phase 0b wires this in hub-server.ts; tests inject a spy.
81
+ *
82
+ * Precision note: this fires on every SUCCESSFUL receive-pack, not strictly
83
+ * per ref-update — a no-op re-push (no new objects) still exits 0 and
84
+ * notifies. surface-host's re-pull→re-build→re-serve is idempotent, so the
85
+ * worst case is a redundant rebuild of identical bytes.
86
+ */
87
+ onPushed?: (name: string) => void | Promise<void>;
88
+ log?: GitTransportLog;
89
+ }
90
+
91
+ /**
92
+ * Surface-name charset. Kebab/alnum only — NO slashes or dots, so a parsed
93
+ * name can never escape `gitRoot` via path traversal. A trailing `.git` on the
94
+ * URL segment is stripped before this check (so `/git/foo.git/...` and
95
+ * `/git/foo/...` both resolve to `foo`). Bounded length keeps a hostile name
96
+ * from ballooning a path.
97
+ */
98
+ const SURFACE_NAME_RE = /^[a-zA-Z0-9][a-zA-Z0-9_-]{0,63}$/;
99
+
100
+ /** Which authority a request needs, keyed purely off the git service/path. */
101
+ type Access = "read" | "write";
102
+
103
+ interface ParsedGitPath {
104
+ /** Canonical surface name (trailing `.git` stripped). */
105
+ name: string;
106
+ /** The git subpath after the name, e.g. `info/refs` or `git-receive-pack`. */
107
+ gitSubpath: string;
108
+ }
109
+
110
+ /**
111
+ * Parse `/git/<name>/<gitSubpath>` (the `<name>` may carry a trailing `.git`).
112
+ * Returns null when the path is not a well-formed, safe git route — the caller
113
+ * 404s (we don't distinguish malformed-name from unknown-surface, to avoid
114
+ * leaking which names exist).
115
+ */
116
+ export function parseGitPath(pathname: string): ParsedGitPath | null {
117
+ if (!pathname.startsWith("/git/")) return null;
118
+ const rest = pathname.slice("/git/".length);
119
+ const slash = rest.indexOf("/");
120
+ // A bare `/git/<name>` with no git subpath is never a real smart-HTTP request.
121
+ if (slash <= 0) return null;
122
+ const rawName = rest.slice(0, slash);
123
+ const gitSubpath = rest.slice(slash + 1);
124
+ if (gitSubpath.length === 0) return null;
125
+ const name = rawName.endsWith(".git") ? rawName.slice(0, -".git".length) : rawName;
126
+ if (!SURFACE_NAME_RE.test(name)) return null;
127
+ // Defense-in-depth: reject any traversal sequence in the remaining subpath.
128
+ // `git http-backend` confines itself to GIT_PROJECT_ROOT, but we never want a
129
+ // `..` to reach it. (Legitimate subpaths are `info/refs`, `git-upload-pack`,
130
+ // `git-receive-pack`, `objects/...` — none contain `..`.)
131
+ if (gitSubpath.split("/").some((seg) => seg === "..")) return null;
132
+ return { name, gitSubpath };
133
+ }
134
+
135
+ /**
136
+ * Required authority for a request: write for receive-pack (push), read for
137
+ * upload-pack (fetch) and any other discovery/dumb path. Keys purely off the
138
+ * service param / path — no pack inspection.
139
+ */
140
+ export function requiredAccess(gitSubpath: string, serviceParam: string | null): Access {
141
+ if (gitSubpath === "git-receive-pack") return "write";
142
+ if (gitSubpath === "git-upload-pack") return "read";
143
+ if (gitSubpath === "info/refs") {
144
+ return serviceParam === "git-receive-pack" ? "write" : "read";
145
+ }
146
+ // Dumb-HTTP object/ref fetches (objects/*, HEAD, packed-refs) are read-only.
147
+ return "read";
148
+ }
149
+
150
+ /**
151
+ * Extract the presented JWT from either `Authorization: Bearer <jwt>` or HTTP
152
+ * Basic. Returns null when no credential is present.
153
+ *
154
+ * Basic forms accepted (GitHub-compat, §6.3):
155
+ * - `x-access-token:<jwt>` → token in the password (the documented form);
156
+ * - `<jwt>:x-oauth-basic` → token in the username (legacy);
157
+ * - `<jwt>:` → token in the username (empty password).
158
+ */
159
+ export function extractToken(req: Request): string | null {
160
+ const header = req.headers.get("authorization");
161
+ if (!header) return null;
162
+ const bearer = header.match(/^Bearer\s+(.+)$/i);
163
+ if (bearer?.[1]) return bearer[1].trim();
164
+ const basic = header.match(/^Basic\s+(.+)$/i);
165
+ if (basic?.[1]) {
166
+ let decoded: string;
167
+ try {
168
+ decoded = Buffer.from(basic[1].trim(), "base64").toString("utf8");
169
+ } catch {
170
+ return null;
171
+ }
172
+ const idx = decoded.indexOf(":");
173
+ const user = idx === -1 ? decoded : decoded.slice(0, idx);
174
+ const pass = idx === -1 ? "" : decoded.slice(idx + 1);
175
+ if (user === "x-access-token") return pass || null;
176
+ if (pass && pass !== "x-oauth-basic") return pass;
177
+ return user || null;
178
+ }
179
+ return null;
180
+ }
181
+
182
+ /**
183
+ * 401 — missing or invalid credential. The `WWW-Authenticate` header is
184
+ * LOAD-BEARING: without it git won't invoke its credential helper or retry.
185
+ * We advertise BOTH `Bearer` (git ≥2.46 native + modern helpers) and `Basic`
186
+ * (older git's helper-based retry with `x-access-token:<jwt>`), so the widest
187
+ * range of clients re-authenticates.
188
+ */
189
+ function unauthorized(reason: string): Response {
190
+ const headers = new Headers({
191
+ "content-type": "text/plain; charset=utf-8",
192
+ "cache-control": "no-store",
193
+ });
194
+ headers.append("www-authenticate", "Bearer");
195
+ headers.append("www-authenticate", 'Basic realm="Parachute Surface Git"');
196
+ return new Response(`Unauthorized: ${reason}\n`, { status: 401, headers });
197
+ }
198
+
199
+ /**
200
+ * 403 — a VALID credential that lacks the required scope. Deliberately NOT a
201
+ * 401: re-prompting the same identity yields no more authority, so a 401 would
202
+ * only spin the credential helper. The `WWW-Authenticate: ... insufficient_scope`
203
+ * header makes the reason machine-readable (RFC 6750), mirroring
204
+ * `adminAuthErrorResponse`.
205
+ */
206
+ function forbidden(scope: string): Response {
207
+ return new Response(`Forbidden: token missing required scope ${scope}\n`, {
208
+ status: 403,
209
+ headers: {
210
+ "content-type": "text/plain; charset=utf-8",
211
+ "cache-control": "no-store",
212
+ "www-authenticate": `Bearer error="insufficient_scope", scope="${scope}"`,
213
+ },
214
+ });
215
+ }
216
+
217
+ /**
218
+ * Ensure `<gitRoot>/<name>.git` exists as an exportable bare repo, creating it
219
+ * on first authenticated access (Phase 1 will add a real registry; this keeps
220
+ * it simple now). Returns the repo dir. Only ever called AFTER the auth gate
221
+ * passes, so unauthenticated probing can never provision a repo.
222
+ *
223
+ * `http.receivepack = true` is REQUIRED for push: `git http-backend` enables
224
+ * upload-pack from `GIT_HTTP_EXPORT_ALL` alone but refuses receive-pack unless
225
+ * the repo opts in explicitly.
226
+ */
227
+ function ensureBareRepo(gitRoot: string, name: string, log: GitTransportLog): string {
228
+ const repoDir = join(gitRoot, `${name}.git`);
229
+ if (existsSync(repoDir)) return repoDir;
230
+ mkdirSync(gitRoot, { recursive: true });
231
+ const init = spawnSync("git", ["init", "--bare", repoDir], { encoding: "utf8" });
232
+ if (init.status !== 0) {
233
+ throw new Error(`git init --bare failed: ${init.stderr || init.error?.message || "unknown"}`);
234
+ }
235
+ const cfg = spawnSync("git", ["-C", repoDir, "config", "http.receivepack", "true"], {
236
+ encoding: "utf8",
237
+ });
238
+ if (cfg.status !== 0) {
239
+ throw new Error(`git config http.receivepack failed: ${cfg.stderr || "unknown"}`);
240
+ }
241
+ writePostReceiveHook(repoDir, name);
242
+ log.info(`[git-transport] provisioned bare repo for surface "${name}" at ${repoDir}`);
243
+ return repoDir;
244
+ }
245
+
246
+ /**
247
+ * Phase-0a placeholder hook: log the received refs (to stdout, relayed to the
248
+ * pusher as `remote:` lines, and appended to `post-receive.log` in the repo
249
+ * dir for verification). Phase 0b replaces the body with an HTTP + hub-JWT
250
+ * notify to surface-host (NEVER a shell-out that builds the pushed tree — §5/§7).
251
+ */
252
+ function writePostReceiveHook(repoDir: string, name: string): void {
253
+ const hook = `#!/bin/sh
254
+ # Parachute Surface Git Transport — Phase 0a placeholder.
255
+ # Logs received refs only. Phase 0b: notify surface-host over HTTP + a hub JWT
256
+ # (never build the pushed tree in this process — that exec authority belongs to
257
+ # the module's sandbox, not the substrate).
258
+ while read -r oldrev newrev refname; do
259
+ printf '[parachute] surface %s received %s (%s..%s)\\n' "${name}" "$refname" "$oldrev" "$newrev"
260
+ printf '%s %s %s\\n' "$oldrev" "$newrev" "$refname" >> post-receive.log
261
+ done
262
+ `;
263
+ const hookPath = join(repoDir, "hooks", "post-receive");
264
+ writeFileSync(hookPath, hook, { mode: 0o755 });
265
+ }
266
+
267
+ /**
268
+ * The byte offset + separator length where CGI headers end (first blank line).
269
+ * Handles both `\r\n\r\n` (4) and `\n\n` (2). Returns null if no boundary yet.
270
+ * Exported for unit testing.
271
+ */
272
+ export function findHeaderEnd(buf: Uint8Array): { idx: number; sepLen: number } | null {
273
+ for (let i = 0; i + 1 < buf.length; i++) {
274
+ if (buf[i] === 0x0a && buf[i + 1] === 0x0a) return { idx: i, sepLen: 2 };
275
+ if (
276
+ i + 3 < buf.length &&
277
+ buf[i] === 0x0d &&
278
+ buf[i + 1] === 0x0a &&
279
+ buf[i + 2] === 0x0d &&
280
+ buf[i + 3] === 0x0a
281
+ ) {
282
+ return { idx: i, sepLen: 4 };
283
+ }
284
+ }
285
+ return null;
286
+ }
287
+
288
+ /**
289
+ * Parse CGI response headers (the block before the first blank line) into an
290
+ * HTTP status + Headers. `Status: NNN reason` maps to the HTTP status (default
291
+ * 200 when absent); every other `Key: Value` line is forwarded verbatim.
292
+ * Exported for unit testing.
293
+ */
294
+ export function parseCgiHeaders(headerBlock: string): { status: number; headers: Headers } {
295
+ const headers = new Headers();
296
+ let status = 200;
297
+ for (const rawLine of headerBlock.split(/\r?\n/)) {
298
+ const line = rawLine.trim();
299
+ if (line.length === 0) continue;
300
+ const colon = line.indexOf(":");
301
+ if (colon === -1) continue;
302
+ const key = line.slice(0, colon).trim();
303
+ const value = line.slice(colon + 1).trim();
304
+ if (key.toLowerCase() === "status") {
305
+ const code = Number.parseInt(value.split(/\s+/)[0] ?? "", 10);
306
+ if (Number.isFinite(code) && code >= 100 && code < 600) status = code;
307
+ continue;
308
+ }
309
+ headers.append(key, value);
310
+ }
311
+ return { status, headers };
312
+ }
313
+
314
+ const MAX_CGI_HEADER_BYTES = 64 * 1024;
315
+
316
+ function concatBytes(a: Uint8Array, b: Uint8Array): Uint8Array {
317
+ const out = new Uint8Array(a.length + b.length);
318
+ out.set(a, 0);
319
+ out.set(b, a.length);
320
+ return out;
321
+ }
322
+
323
+ /**
324
+ * Read the CGI header block off `stdout`, then return a Response whose body
325
+ * STREAMS the remainder (leftover bytes already read + the rest of the stream).
326
+ * Never buffers the whole pack — only the small header block is accumulated.
327
+ */
328
+ async function cgiResponse(stdout: ReadableStream<Uint8Array>): Promise<Response> {
329
+ const reader = stdout.getReader();
330
+ let buf: Uint8Array = new Uint8Array(0);
331
+ let boundary: { idx: number; sepLen: number } | null = null;
332
+ for (;;) {
333
+ const { done, value } = await reader.read();
334
+ if (value && value.length > 0) {
335
+ buf = concatBytes(buf, value);
336
+ boundary = findHeaderEnd(buf);
337
+ if (boundary) break;
338
+ if (buf.length > MAX_CGI_HEADER_BYTES) {
339
+ reader.cancel().catch(() => {});
340
+ return new Response("bad gateway: git http-backend emitted no CGI header block\n", {
341
+ status: 502,
342
+ headers: { "content-type": "text/plain; charset=utf-8" },
343
+ });
344
+ }
345
+ }
346
+ if (done) break;
347
+ }
348
+
349
+ const headerEnd = boundary ? boundary.idx : buf.length;
350
+ const sepLen = boundary ? boundary.sepLen : 0;
351
+ const headerBlock = new TextDecoder().decode(buf.slice(0, headerEnd));
352
+ const leftover = buf.slice(headerEnd + sepLen);
353
+ const { status, headers } = parseCgiHeaders(headerBlock);
354
+
355
+ const body = new ReadableStream<Uint8Array>({
356
+ start(controller) {
357
+ if (leftover.length > 0) controller.enqueue(leftover);
358
+ if (boundary === null) controller.close();
359
+ },
360
+ async pull(controller) {
361
+ const { done, value } = await reader.read();
362
+ if (done) {
363
+ controller.close();
364
+ return;
365
+ }
366
+ if (value && value.length > 0) controller.enqueue(value);
367
+ },
368
+ cancel(reason) {
369
+ reader.cancel(reason).catch(() => {});
370
+ },
371
+ });
372
+ return new Response(body, { status, headers });
373
+ }
374
+
375
+ /**
376
+ * Handle a `/git/<name>/*` request: parse → auth-gate → ensure bare repo →
377
+ * stream-proxy to `git http-backend`. Always returns a Response (the caller
378
+ * gates on the `/git/` prefix). A null `parseGitPath` 404s.
379
+ */
380
+ export async function handleGitTransport(req: Request, deps: GitTransportDeps): Promise<Response> {
381
+ const log = deps.log ?? console;
382
+ const url = new URL(req.url);
383
+ const parsed = parseGitPath(url.pathname);
384
+ if (!parsed) return new Response("not found", { status: 404 });
385
+ const { name, gitSubpath } = parsed;
386
+
387
+ const serviceParam = url.searchParams.get("service");
388
+ const access = requiredAccess(gitSubpath, serviceParam);
389
+
390
+ // --- Auth gate (BEFORE touching the filesystem or spawning anything) ------
391
+ const token = extractToken(req);
392
+ if (!token) return unauthorized("a hub access token is required");
393
+
394
+ let sub: string;
395
+ let scopes: string[];
396
+ try {
397
+ const validated = await validateAccessToken(deps.db, token, deps.knownIssuers());
398
+ const subClaim = validated.payload.sub;
399
+ if (typeof subClaim !== "string" || subClaim.length === 0) {
400
+ return unauthorized("token missing required `sub` claim");
401
+ }
402
+ sub = subClaim;
403
+ const scopeClaim = (validated.payload as { scope?: unknown }).scope;
404
+ scopes = typeof scopeClaim === "string" ? scopeClaim.split(/\s+/).filter(Boolean) : [];
405
+ } catch (err) {
406
+ const msg = err instanceof Error ? err.message : String(err);
407
+ return unauthorized(`invalid token: ${msg}`);
408
+ }
409
+
410
+ // Authority check. Write requires `surface:<name>:write`. Read is satisfied
411
+ // by either `surface:<name>:read` OR `surface:<name>:write` (write ⊇ read —
412
+ // a writer can always fetch, matching GitHub's model).
413
+ const writeScope = `surface:${name}:write`;
414
+ const readScope = `surface:${name}:read`;
415
+ const ok =
416
+ access === "write"
417
+ ? scopes.includes(writeScope)
418
+ : scopes.includes(readScope) || scopes.includes(writeScope);
419
+ if (!ok) return forbidden(access === "write" ? writeScope : readScope);
420
+
421
+ // --- Provision (first access) + proxy -------------------------------------
422
+ try {
423
+ ensureBareRepo(deps.gitRoot, name, log);
424
+ } catch (err) {
425
+ const msg = err instanceof Error ? err.message : String(err);
426
+ log.warn(`[git-transport] repo provisioning failed for "${name}": ${msg}`);
427
+ return new Response("internal error: could not provision surface repo\n", {
428
+ status: 500,
429
+ headers: { "content-type": "text/plain; charset=utf-8" },
430
+ });
431
+ }
432
+
433
+ // Minimal CGI env — we deliberately do NOT inherit the hub's full process
434
+ // env (no hub secrets reach the subprocess). REMOTE_USER is the validated
435
+ // token subject only. GIT_PROTOCOL passes the client's protocol negotiation
436
+ // (v2) through; QUERY_STRING/CONTENT_TYPE/REQUEST_METHOD are standard CGI.
437
+ const query = url.search.startsWith("?") ? url.search.slice(1) : url.search;
438
+ const env: Record<string, string> = {
439
+ PATH: process.env.PATH ?? "",
440
+ GIT_PROJECT_ROOT: deps.gitRoot,
441
+ GIT_HTTP_EXPORT_ALL: "1",
442
+ PATH_INFO: `/${name}.git/${gitSubpath}`,
443
+ REQUEST_METHOD: req.method,
444
+ QUERY_STRING: query,
445
+ CONTENT_TYPE: req.headers.get("content-type") ?? "",
446
+ REMOTE_USER: sub,
447
+ REMOTE_ADDR: deps.peerAddr ?? "",
448
+ GIT_PROTOCOL: req.headers.get("git-protocol") ?? "",
449
+ };
450
+ // Set CONTENT_LENGTH only for non-chunked bodies. Large pushes use chunked
451
+ // transfer (no Content-Length): the smart-service POST path reads the
452
+ // self-delimiting pkt-line/pack stream off stdin to its natural end, so we
453
+ // simply pipe the request body and let stdin EOF terminate it — never
454
+ // buffering the pack to compute a length.
455
+ const contentLength = req.headers.get("content-length");
456
+ if (contentLength) env.CONTENT_LENGTH = contentLength;
457
+
458
+ let proc: ReturnType<typeof Bun.spawn>;
459
+ try {
460
+ proc = Bun.spawn(["git", "http-backend"], {
461
+ env,
462
+ // Stream the request body straight to the backend's stdin (Bun pumps it
463
+ // concurrently with our stdout read — no deadlock, no buffering). GET
464
+ // discovery has no body.
465
+ stdin: req.body ?? "ignore",
466
+ stdout: "pipe",
467
+ stderr: "pipe",
468
+ });
469
+ } catch (err) {
470
+ const msg = err instanceof Error ? err.message : String(err);
471
+ log.warn(`[git-transport] failed to spawn git http-backend: ${msg}`);
472
+ return new Response("internal error: git http-backend unavailable\n", {
473
+ status: 500,
474
+ headers: { "content-type": "text/plain; charset=utf-8" },
475
+ });
476
+ }
477
+
478
+ // Drain stderr in the background — surfaces hook output + backend errors in
479
+ // the hub log without blocking the response stream.
480
+ void (async () => {
481
+ try {
482
+ const text = await new Response(proc.stderr as ReadableStream<Uint8Array>).text();
483
+ if (text.trim().length > 0) log.info(`[git-transport] ${name}: ${text.trim()}`);
484
+ } catch {
485
+ // stderr drain is best-effort.
486
+ }
487
+ })();
488
+
489
+ // Deploy hand-off (§5 step 5). On a SUCCESSFUL push (receive-pack POST exits
490
+ // 0 → refs updated, post-receive ran), notify the surface module so it pulls
491
+ // + builds + serves. Fire-and-forget, observed off the subprocess exit (not
492
+ // the streamed response), and fully decoupled from the client's response:
493
+ // a notify error is logged, never surfaced to the pusher. The hub NEVER
494
+ // builds here — `onPushed` only sends an authenticated HTTP notify.
495
+ if (access === "write" && gitSubpath === "git-receive-pack" && deps.onPushed) {
496
+ const onPushed = deps.onPushed;
497
+ void (async () => {
498
+ let code: number;
499
+ try {
500
+ code = await proc.exited;
501
+ } catch {
502
+ return; // subprocess vanished — nothing to notify about
503
+ }
504
+ if (code !== 0) return;
505
+ try {
506
+ await onPushed(name);
507
+ } catch (err) {
508
+ const msg = err instanceof Error ? err.message : String(err);
509
+ log.warn(`[git-transport] post-push notify failed for "${name}": ${msg}`);
510
+ }
511
+ })();
512
+ }
513
+
514
+ return cgiResponse(proc.stdout as ReadableStream<Uint8Array>);
515
+ }