@usecontextlayer/pggit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,1362 @@
1
+ import { gunzipSync } from "node:zlib";
2
+ import { Hono } from "hono";
3
+ import { cors } from "hono/cors";
4
+ import { AsyncLocalStorage } from "node:async_hooks";
5
+ import { performance } from "node:perf_hooks";
6
+ import { Kysely, sql } from "kysely";
7
+ import { PostgresJSDialect } from "kysely-postgres-js";
8
+
9
+ //#region src/instrument.ts
10
+ const als = new AsyncLocalStorage();
11
+ const collected = [];
12
+ function newCollector(method, path) {
13
+ return {
14
+ counters: /* @__PURE__ */ new Map(),
15
+ current: "request",
16
+ label: "",
17
+ method,
18
+ path,
19
+ phaseMs: /* @__PURE__ */ new Map(),
20
+ queries: []
21
+ };
22
+ }
23
+ /** Run `fn` inside a fresh per-request collector; record the collector when done. */
24
+ async function runRequest(meta, fn) {
25
+ const collector = newCollector(meta.method, meta.path);
26
+ return als.run(collector, async () => {
27
+ try {
28
+ return await fn();
29
+ } finally {
30
+ collected.push(collector);
31
+ }
32
+ });
33
+ }
34
+ /** Measure `fn`'s wall time into the active collector under `name`; no-op when inactive. */
35
+ async function withPhase(name, fn) {
36
+ const collector = als.getStore();
37
+ if (!collector) return fn();
38
+ const previous = collector.current;
39
+ collector.current = name;
40
+ const start = performance.now();
41
+ try {
42
+ return await fn();
43
+ } finally {
44
+ const elapsed = performance.now() - start;
45
+ collector.phaseMs.set(name, (collector.phaseMs.get(name) ?? 0) + elapsed);
46
+ collector.current = previous;
47
+ }
48
+ }
49
+ function count(metric, n = 1) {
50
+ const collector = als.getStore();
51
+ if (!collector) return;
52
+ collector.counters.set(metric, (collector.counters.get(metric) ?? 0) + n);
53
+ }
54
+ function label(name) {
55
+ const collector = als.getStore();
56
+ if (collector) collector.label = name;
57
+ }
58
+ function recordQuery(sql, durationMs) {
59
+ const collector = als.getStore();
60
+ if (!collector) return;
61
+ collector.queries.push({
62
+ durationMs,
63
+ phase: collector.current,
64
+ sql
65
+ });
66
+ }
67
+
68
+ //#endregion
69
+ //#region src/protocol/errors.ts
70
+ /**
71
+ * A malformed-request / unsupported-capability error detected at the git wire
72
+ * boundary (bad command list, unknown command, unsupported object-format or
73
+ * filter, a request body in an encoding we don't accept). It is the CLIENT's
74
+ * fault, so the HTTP layer maps it to a 400 with the message — distinct from an
75
+ * internal failure (a missing object mid-serve, a DB error), which stays a 500.
76
+ * Validate at the boundary, fail loud, and let the type carry the status.
77
+ */
78
+ var GitProtocolError = class extends Error {
79
+ constructor(message) {
80
+ super(message);
81
+ this.name = "GitProtocolError";
82
+ }
83
+ };
84
+ /**
85
+ * A fetch `want` names an object this repo does not have — a CLIENT condition (a
86
+ * stale/force-pushed tip, a lost promisor blob), not an internal failure. Real git
87
+ * upload-pack answers it IN-BAND with `ERR upload-pack: not our ref <oid>` (an HTTP
88
+ * 200 protocol error the client reads), so it must NOT escape as a 500. Carries the
89
+ * absent OIDs; `handleFetch` maps it to the ERR pkt-line. Distinct from a generic
90
+ * `Error` out of the serve path (a real backend fault), which still propagates → 500.
91
+ */
92
+ var WantNotFoundError = class extends Error {
93
+ oids;
94
+ constructor(oids) {
95
+ super(`upload-pack: not our ref ${oids.join(" ")}`);
96
+ this.oids = oids;
97
+ this.name = "WantNotFoundError";
98
+ }
99
+ };
100
+
101
+ //#endregion
102
+ //#region src/protocol/pkt-line.ts
103
+ /**
104
+ * pkt-line framing (git wire protocol). A pkt-line is a 4-byte hex length prefix
105
+ * (the length INCLUDES the 4 prefix bytes) followed by `length - 4` payload bytes.
106
+ * Three special zero-payload packets: flush `0000`, delim `0001`, response-end
107
+ * `0002`. See gitprotocol-common + design spec §5.
108
+ */
109
+ const FLUSH_PKT = Buffer.from("0000", "latin1");
110
+ const DELIM_PKT = Buffer.from("0001", "latin1");
111
+ const RESPONSE_END_PKT = Buffer.from("0002", "latin1");
112
+ /** Largest payload we will emit (git's conservative writer cap). */
113
+ const WRITER_MAX_PAYLOAD = 65515;
114
+ /** Largest payload we will accept on read (git's LARGE_PACKET_DATA_MAX). */
115
+ const READER_MAX_PAYLOAD = 65516;
116
+ /** Frame a data payload as a pkt-line: `<4-hex len><payload>`. */
117
+ function encodePktLine(payload) {
118
+ if (payload.length > 65515) throw new Error(`pkt-line: payload ${payload.length} exceeds writer cap ${WRITER_MAX_PAYLOAD}`);
119
+ const prefix = (payload.length + 4).toString(16).padStart(4, "0");
120
+ return Buffer.concat([Buffer.from(prefix, "latin1"), payload]);
121
+ }
122
+ /** Frame any packet — data or one of the three special zero-payload markers. */
123
+ function encodePkt(pkt) {
124
+ switch (pkt.type) {
125
+ case "data": return encodePktLine(pkt.payload);
126
+ case "flush": return FLUSH_PKT;
127
+ case "delim": return DELIM_PKT;
128
+ case "response-end": return RESPONSE_END_PKT;
129
+ }
130
+ }
131
+ function parseLen(buf, offset) {
132
+ const hex = buf.toString("latin1", offset, offset + 4);
133
+ if (!/^[0-9a-f]{4}$/i.test(hex)) throw new GitProtocolError(`pkt-line: invalid length prefix ${JSON.stringify(hex)}`);
134
+ return Number.parseInt(hex, 16);
135
+ }
136
+ /**
137
+ * Decode a buffer into a sequence of packets. Streaming-safe: a trailing
138
+ * partial packet is left in `rest` for the caller to prepend to the next chunk.
139
+ *
140
+ * With `stopAtFlush`, decoding returns at the first flush (which is NOT included
141
+ * in `packets`), leaving the bytes after it in `rest`. The receive-pack request
142
+ * splits here: a pkt-line command list, a flush, then the raw (un-framed) pack.
143
+ *
144
+ * `flushed` reports whether a flush actually terminated the stream in
145
+ * `stopAtFlush` mode — the parser uses it to reject an unterminated command list
146
+ * on a COMPLETE request body (where "more bytes coming" is not an option).
147
+ */
148
+ function decodePktStream(buf, opts = {}) {
149
+ const packets = [];
150
+ let offset = 0;
151
+ while (offset + 4 <= buf.length) {
152
+ const len = parseLen(buf, offset);
153
+ if (len === 0) {
154
+ offset += 4;
155
+ if (opts.stopAtFlush) return {
156
+ flushed: true,
157
+ packets,
158
+ rest: buf.subarray(offset)
159
+ };
160
+ packets.push({ type: "flush" });
161
+ continue;
162
+ }
163
+ if (len === 1) {
164
+ packets.push({ type: "delim" });
165
+ offset += 4;
166
+ continue;
167
+ }
168
+ if (len === 2) {
169
+ packets.push({ type: "response-end" });
170
+ offset += 4;
171
+ continue;
172
+ }
173
+ if (len === 3) throw new GitProtocolError("pkt-line: reserved length 0003");
174
+ const payloadLen = len - 4;
175
+ if (payloadLen > 65516) throw new GitProtocolError(`pkt-line: declared payload ${payloadLen} exceeds reader bound ${READER_MAX_PAYLOAD}`);
176
+ if (offset + len > buf.length) break;
177
+ const payload = buf.subarray(offset + 4, offset + len);
178
+ packets.push({
179
+ payload,
180
+ type: "data"
181
+ });
182
+ offset += len;
183
+ }
184
+ return {
185
+ flushed: false,
186
+ packets,
187
+ rest: buf.subarray(offset)
188
+ };
189
+ }
190
+
191
+ //#endregion
192
+ //#region src/protocol/capabilities.ts
193
+ const AGENT = "pggit/0.0.0";
194
+ /**
195
+ * Reject a client negotiating a non-sha1 object hash. pggit is SHA-1 only (the
196
+ * charter) and assumes 40-hex / 20-byte OIDs everywhere; a sha256 client would
197
+ * otherwise fail deep in the parser on a 64-hex OID. Catch it at the boundary
198
+ * with a clear message. An absent `object-format` cap defaults to sha1 (git's
199
+ * default), so it is accepted.
200
+ */
201
+ function assertSupportedObjectFormat(caps) {
202
+ const fmt = caps.find((c) => c.startsWith("object-format="));
203
+ if (fmt !== void 0 && fmt !== "object-format=sha1") throw new GitProtocolError(`unsupported ${fmt} — only object-format=sha1 is supported`);
204
+ }
205
+
206
+ //#endregion
207
+ //#region src/protocol/sideband.ts
208
+ const MAX_BAND_DATA = 65514;
209
+ /**
210
+ * Multiplex `data` onto sideband `band`: each ≤MAX_BAND_DATA slice becomes a
211
+ * pkt-line of `[band byte | slice]`. Returns the concatenated band pkt-lines with
212
+ * NO trailing flush — the caller owns the section framing (the `packfile\n` header
213
+ * for fetch, the bare report for push) and appends its own flush.
214
+ */
215
+ function encodeSideband(band, data) {
216
+ const parts = [];
217
+ for (let i = 0; i < data.length; i += MAX_BAND_DATA) {
218
+ const chunk = data.subarray(i, i + MAX_BAND_DATA);
219
+ parts.push(encodePktLine(Buffer.concat([Buffer.from([band]), chunk])));
220
+ }
221
+ return Buffer.concat(parts);
222
+ }
223
+
224
+ //#endregion
225
+ //#region src/protocol/receive-pack.ts
226
+ const ZERO_OID$1 = "0".repeat(40);
227
+ /** A ref name longer than this (bytes) is rejected at the boundary: `git_ref`'s PK is
228
+ * a btree on (repo_id, name) whose index entry overflows past ~2704 bytes, which
229
+ * Postgres raises as an opaque storage error. The cap sits far above any real ref name
230
+ * and safely under the btree limit, so a too-long name fails loud + in-band (`ng`),
231
+ * never as an HTTP 500 that has already orphaned the ingested pack. */
232
+ const MAX_REF_NAME_BYTES = 2e3;
233
+ const RECEIVE_CAPS = [
234
+ "report-status",
235
+ "delete-refs",
236
+ "side-band-64k",
237
+ "atomic",
238
+ "object-format=sha1",
239
+ `agent=${AGENT}`
240
+ ];
241
+ /**
242
+ * v0 ref advertisement for receive-pack (push). An empty repo — the dominant
243
+ * first-push state — emits the synthetic `0{40} capabilities^{}` line so the
244
+ * client has somewhere to read the push capabilities.
245
+ */
246
+ function encodeReceivePackAdvertisement(refs) {
247
+ const capStr = RECEIVE_CAPS.join(" ");
248
+ const lines = [];
249
+ if (refs.length === 0) lines.push(encodePktLine(Buffer.from(`${ZERO_OID$1} capabilities^{}\0${capStr}\n`)));
250
+ else refs.forEach((r, i) => {
251
+ const base = `${r.oid} ${r.name}`;
252
+ lines.push(encodePktLine(Buffer.from(i === 0 ? `${base}\0${capStr}\n` : `${base}\n`)));
253
+ });
254
+ lines.push(encodePkt({ type: "flush" }));
255
+ return Buffer.concat(lines);
256
+ }
257
+ /**
258
+ * Parse the receive-pack POST body: a pkt-line command list (`<old> <new> <ref>`,
259
+ * caps after a NUL on the first line), a flush, then the raw packfile.
260
+ */
261
+ function parseReceivePack(body) {
262
+ const { packets, rest, flushed } = decodePktStream(body, { stopAtFlush: true });
263
+ if (!flushed && body.length > 0) throw new GitProtocolError("receive-pack: command list not terminated by a flush (truncated or length-overrunning pkt-line)");
264
+ const commands = [];
265
+ let caps = [];
266
+ for (const p of packets) {
267
+ if (p.type !== "data") continue;
268
+ let line = p.payload.toString("utf8").replace(/\n$/, "");
269
+ const nul = line.indexOf("\0");
270
+ if (nul >= 0) {
271
+ caps = line.slice(nul + 1).split(" ").filter(Boolean);
272
+ line = line.slice(0, nul);
273
+ }
274
+ const parts = line.split(" ");
275
+ const [oldOid, newOid, ref] = parts;
276
+ if (parts.length !== 3 || !oldOid || !newOid || !ref) throw new GitProtocolError(`receive-pack: malformed command line ${JSON.stringify(line)}`);
277
+ commands.push({
278
+ newOid,
279
+ oldOid,
280
+ ref
281
+ });
282
+ }
283
+ return {
284
+ caps,
285
+ commands,
286
+ pack: rest
287
+ };
288
+ }
289
+ /**
290
+ * report-status: `unpack <status>` then `ok <ref>` / `ng <ref> <reason>` per
291
+ * command, flush. When side-band-64k is negotiated the whole stream rides band 1.
292
+ */
293
+ function encodeReportStatus(unpack, results, useSideband) {
294
+ const lines = [encodePktLine(Buffer.from(`unpack ${unpack}\n`))];
295
+ for (const r of results) {
296
+ const line = r.ok ? `ok ${r.ref}\n` : `ng ${r.ref} ${r.reason ?? "failed"}\n`;
297
+ lines.push(encodePktLine(Buffer.from(line)));
298
+ }
299
+ lines.push(encodePkt({ type: "flush" }));
300
+ const report = Buffer.concat(lines);
301
+ if (!useSideband) return report;
302
+ return Buffer.concat([encodeSideband(1, report), encodePkt({ type: "flush" })]);
303
+ }
304
+ /**
305
+ * Handle a receive-pack POST: ingest the pack (if any), then apply the ref
306
+ * commands under CAS — atomically when the client negotiated `atomic` — and
307
+ * report status. A failed unpack fails every ref; an atomic failure ng's every
308
+ * ref (none applied). Non-ff is accepted by default (CAS guards concurrency, not
309
+ * ancestry — spec §3.6).
310
+ */
311
+ async function handleReceivePack(body, backend) {
312
+ const { commands, caps, pack } = parseReceivePack(body);
313
+ assertSupportedObjectFormat(caps);
314
+ const useSideband = caps.includes("side-band-64k");
315
+ const atomic = caps.includes("atomic");
316
+ const nameTooLong = commands.map((c) => Buffer.byteLength(c.ref, "utf8") > MAX_REF_NAME_BYTES);
317
+ const anyApplicable = nameTooLong.length === 0 || nameTooLong.some((t) => !t);
318
+ let unpackStatus = "ok";
319
+ if (pack.length > 0 && anyApplicable) try {
320
+ await backend.ingest(pack);
321
+ } catch (e) {
322
+ unpackStatus = (e instanceof Error ? e.message : "unpack failed").replace(/\n/g, " ");
323
+ }
324
+ if (unpackStatus !== "ok") {
325
+ const failed = commands.map((c) => ({
326
+ ok: false,
327
+ reason: "unpacker error",
328
+ ref: c.ref
329
+ }));
330
+ return encodeReportStatus(unpackStatus, failed, useSideband);
331
+ }
332
+ const connected = await Promise.all(commands.map((c, i) => nameTooLong[i] || c.newOid === ZERO_OID$1 ? Promise.resolve(true) : backend.isConnected(c.newOid)));
333
+ const reasons = commands.map((_, i) => nameTooLong[i] ? "funny refname (too long to store)" : connected[i] ? null : "missing necessary objects");
334
+ if (atomic && reasons.some((r) => r !== null)) {
335
+ const failed = commands.map((c, i) => ({
336
+ ok: false,
337
+ reason: reasons[i] ?? "atomic transaction failed",
338
+ ref: c.ref
339
+ }));
340
+ return encodeReportStatus(unpackStatus, failed, useSideband);
341
+ }
342
+ const oks = await backend.applyRefUpdates(commands.filter((_, i) => reasons[i] === null), atomic);
343
+ let applied = 0;
344
+ const results = commands.map((c, i) => {
345
+ const reason = reasons[i];
346
+ if (reason !== null) return {
347
+ ok: false,
348
+ reason,
349
+ ref: c.ref
350
+ };
351
+ return oks[applied++] ? {
352
+ ok: true,
353
+ ref: c.ref
354
+ } : {
355
+ ok: false,
356
+ reason: atomic ? "atomic transaction failed" : "stale ref (compare-and-swap failed)",
357
+ ref: c.ref
358
+ };
359
+ });
360
+ for (const [i, c] of commands.entries()) {
361
+ if (!results[i]?.ok) continue;
362
+ try {
363
+ await backend.syncRefSnapshot?.(c.ref, c.newOid);
364
+ } catch (err) {
365
+ console.error(`pggit: snapshot refresh failed for ${c.ref} (the push is already applied):`, err);
366
+ }
367
+ }
368
+ return encodeReportStatus(unpackStatus, results, useSideband);
369
+ }
370
+
371
+ //#endregion
372
+ //#region src/protocol/v2.ts
373
+ /**
374
+ * The v2 capability advertisement (GET info/refs body, minus HTTP framing).
375
+ * We advertise ONLY what we honor (spec §4): ls-refs (with `unborn`) and fetch
376
+ * with the `filter` (partial clone) and `include-tag` (auto-follow annotated tags)
377
+ * features. No shallow / ref-in-want — those have no milestone owner and
378
+ * advertising them flips clients onto unimplemented paths.
379
+ */
380
+ function encodeAdvertisement() {
381
+ const caps = [
382
+ "version 2",
383
+ `agent=${AGENT}`,
384
+ "ls-refs=unborn",
385
+ "fetch=filter include-tag",
386
+ "object-format=sha1"
387
+ ];
388
+ return Buffer.concat([...caps.map((c) => encodePktLine(Buffer.from(`${c}\n`))), encodePkt({ type: "flush" })]);
389
+ }
390
+ /** Decode a `command=… <caps> 0001 <args> 0000` v2 request body. */
391
+ function parseV2Request(body) {
392
+ const { packets, rest } = decodePktStream(body);
393
+ if (rest.length > 0) throw new GitProtocolError(`pkt-line: ${rest.length} trailing bytes after the request — incomplete or length-overrunning packet`);
394
+ let command = "";
395
+ const capabilities = [];
396
+ const args = [];
397
+ let afterDelim = false;
398
+ for (const p of packets) {
399
+ if (p.type === "delim") {
400
+ afterDelim = true;
401
+ continue;
402
+ }
403
+ if (p.type !== "data") continue;
404
+ const line = p.payload.toString("utf8").replace(/\n$/, "");
405
+ if (afterDelim) args.push(line);
406
+ else if (line.startsWith("command=")) command = line.slice(8);
407
+ else capabilities.push(line);
408
+ }
409
+ return {
410
+ args,
411
+ capabilities,
412
+ command
413
+ };
414
+ }
415
+ /** Fetch features pggit deliberately does NOT advertise (encodeAdvertisement): a
416
+ * client that drives one anyway must FAIL LOUDLY, never be silently dropped to an
417
+ * empty result (the charter). `ref-in-want` (`want-ref`) and the `shallow`/`deepen`
418
+ * family are the unimplemented ones. */
419
+ const UNSUPPORTED_FETCH_ARG = /^(want-ref|deepen|shallow)\b/;
420
+ const OID = /^[0-9a-f]{40}$/;
421
+ function parseFetch(req) {
422
+ const wants = [];
423
+ const haves = [];
424
+ let done = false;
425
+ let filter;
426
+ let includeTag = false;
427
+ for (const arg of req.args) {
428
+ if (UNSUPPORTED_FETCH_ARG.test(arg)) throw new GitProtocolError(`fetch: unsupported feature ${JSON.stringify(arg.split(" ")[0])} — pggit does not advertise it`);
429
+ if (arg.startsWith("want ")) {
430
+ const oid = arg.slice(5);
431
+ if (!OID.test(oid)) throw new GitProtocolError(`fetch: malformed want object id ${JSON.stringify(oid)}`);
432
+ wants.push(oid);
433
+ } else if (arg.startsWith("have ")) haves.push(arg.slice(5));
434
+ else if (arg.startsWith("filter ")) filter = arg.slice(7);
435
+ else if (arg === "include-tag") includeTag = true;
436
+ else if (arg === "done") done = true;
437
+ }
438
+ return {
439
+ done,
440
+ filter,
441
+ haves,
442
+ includeTag,
443
+ wants
444
+ };
445
+ }
446
+ /** ls-refs response: one line per ref (+ symref-target / peeled), then flush. */
447
+ function encodeLsRefsResponse(entries) {
448
+ const lines = entries.map((e) => {
449
+ let line = "unborn" in e ? `unborn ${e.name}` : `${e.oid} ${e.name}`;
450
+ if (e.symrefTarget) line += ` symref-target:${e.symrefTarget}`;
451
+ if ("peeled" in e && e.peeled) line += ` peeled:${e.peeled}`;
452
+ return encodePktLine(Buffer.from(`${line}\n`));
453
+ });
454
+ return Buffer.concat([...lines, encodePkt({ type: "flush" })]);
455
+ }
456
+ /** The `acknowledgments` section lines: header, ACKs / NAK, optional `ready`. */
457
+ function acknowledgmentLines(common, ready) {
458
+ const lines = [encodePktLine(Buffer.from("acknowledgments\n"))];
459
+ if (common.length === 0 && !ready) lines.push(encodePktLine(Buffer.from("NAK\n")));
460
+ else {
461
+ for (const oid of common) lines.push(encodePktLine(Buffer.from(`ACK ${oid}\n`)));
462
+ if (ready) lines.push(encodePktLine(Buffer.from("ready\n")));
463
+ }
464
+ return Buffer.concat(lines);
465
+ }
466
+ /**
467
+ * fetch `acknowledgments` response for a negotiation round that is NOT yet ready
468
+ * (no `done`): the section + flush, no pack. The client sends more haves or
469
+ * `done` (spec §4 shape b).
470
+ */
471
+ function encodeAcknowledgments(common, ready) {
472
+ return Buffer.concat([acknowledgmentLines(common, ready), encodePkt({ type: "flush" })]);
473
+ }
474
+ /**
475
+ * fetch response when the server becomes `ready` mid-negotiation: the
476
+ * acknowledgments section (with `ready`), a delim-pkt, then the packfile — git
477
+ * requires the pack to follow `ready` in the same response (not a later round).
478
+ */
479
+ function encodeReadyWithPack(common, pack) {
480
+ return Buffer.concat([
481
+ acknowledgmentLines(common, true),
482
+ encodePkt({ type: "delim" }),
483
+ encodePackfileResponse(pack)
484
+ ]);
485
+ }
486
+ /**
487
+ * A v2 error response: a single `ERR <message>` pkt-line. git's packet reader
488
+ * recognizes the `ERR ` prefix and the client dies with `remote error: <message>`
489
+ * — the in-band channel for a request that cannot be served (e.g. a `want` the repo
490
+ * does not have): an HTTP-200 protocol error the client can read, NOT a transport 500.
491
+ */
492
+ function encodeErr(message) {
493
+ return encodePktLine(Buffer.from(`ERR ${message}\n`));
494
+ }
495
+ /**
496
+ * fetch response for the clone path (client sent `done`, no haves): the
497
+ * `packfile` section header, the pack multiplexed over sideband band-1, flush.
498
+ */
499
+ function encodePackfileResponse(pack) {
500
+ return Buffer.concat([
501
+ encodePktLine(Buffer.from("packfile\n")),
502
+ encodeSideband(1, pack),
503
+ encodePkt({ type: "flush" })
504
+ ]);
505
+ }
506
+
507
+ //#endregion
508
+ //#region src/protocol/upload-pack.ts
509
+ async function handleLsRefs(req, backend) {
510
+ label("ls-refs");
511
+ return withPhase("ref-advertise", async () => {
512
+ const wantPeel = req.args.includes("peel");
513
+ const wantSymrefs = req.args.includes("symrefs");
514
+ const prefixes = req.args.filter((a) => a.startsWith("ref-prefix ")).map((a) => a.slice(11));
515
+ const matches = (name) => prefixes.length === 0 || prefixes.some((p) => name.startsWith(p));
516
+ const refs = await backend.listRefs();
517
+ const byName = new Map(refs.map((r) => [r.name, r.oid]));
518
+ const entries = [];
519
+ const wantUnborn = req.args.includes("unborn");
520
+ const headTarget = await backend.getSymref("HEAD");
521
+ if (headTarget && matches("HEAD")) {
522
+ const headOid = byName.get(headTarget);
523
+ if (headOid) entries.push({
524
+ name: "HEAD",
525
+ oid: headOid,
526
+ symrefTarget: wantSymrefs ? headTarget : void 0
527
+ });
528
+ else if (wantUnborn && wantSymrefs) entries.push({
529
+ name: "HEAD",
530
+ symrefTarget: headTarget,
531
+ unborn: true
532
+ });
533
+ }
534
+ for (const ref of refs) {
535
+ if (!matches(ref.name)) continue;
536
+ const entry = {
537
+ name: ref.name,
538
+ oid: ref.oid
539
+ };
540
+ if (wantPeel && ref.peeled) entry.peeled = ref.peeled;
541
+ entries.push(entry);
542
+ }
543
+ return encodeLsRefsResponse(entries);
544
+ });
545
+ }
546
+ /**
547
+ * Translate the wire filter spec to a walk option. We optimize the common
548
+ * `blob:none` (blobless partial clone) by omitting blobs; any other filter
549
+ * (`tree:0`, `blob:limit=…`, …) serves the FULL closure. The protocol lets a
550
+ * server send more than a filter requests — the client accepts the superset and
551
+ * has nothing to lazily fetch — so over-serving completes the clone that a hard
552
+ * rejection would abort, without implementing every filter spec.
553
+ */
554
+ function filterOmitsBlobs(filter) {
555
+ return filter === "blob:none";
556
+ }
557
+ async function handleFetch(req, backend) {
558
+ label("fetch");
559
+ const { wants, haves, done, filter, includeTag } = parseFetch(req);
560
+ const omitBlobs = filterOmitsBlobs(filter);
561
+ const common = await backend.commonHaves(haves);
562
+ try {
563
+ if (!done) {
564
+ if (!await backend.readyToGiveUp(wants, common)) return encodeAcknowledgments(common, false);
565
+ return encodeReadyWithPack(common, await backend.buildPack(wants, common, omitBlobs, includeTag));
566
+ }
567
+ return encodePackfileResponse(await backend.buildPack(wants, common, omitBlobs, includeTag));
568
+ } catch (err) {
569
+ if (err instanceof WantNotFoundError) return encodeErr(err.message);
570
+ throw err;
571
+ }
572
+ }
573
+ /** Dispatch a v2 upload-pack POST body to ls-refs or fetch. */
574
+ async function handleUploadPack(body, backend) {
575
+ const req = parseV2Request(body);
576
+ assertSupportedObjectFormat(req.capabilities);
577
+ if (req.command === "ls-refs") return handleLsRefs(req, backend);
578
+ if (req.command === "fetch") return handleFetch(req, backend);
579
+ throw new GitProtocolError(`upload-pack: unsupported command ${JSON.stringify(req.command)}`);
580
+ }
581
+
582
+ //#endregion
583
+ //#region src/object/format-error.ts
584
+ var GitFormatError = class extends Error {
585
+ code;
586
+ constructor(code, message) {
587
+ super(message);
588
+ this.name = "GitFormatError";
589
+ this.code = code;
590
+ }
591
+ };
592
+
593
+ //#endregion
594
+ //#region src/object/object.ts
595
+ /** OIDs in the leading `key <oid>` headers (up to the blank line) for given keys. */
596
+ function headerOids(content, keys) {
597
+ const oids = [];
598
+ for (const line of content.toString("latin1").split("\n")) {
599
+ if (line === "") break;
600
+ const sp = line.indexOf(" ");
601
+ if (sp > 0 && keys.has(line.slice(0, sp))) oids.push(line.slice(sp + 1));
602
+ }
603
+ return oids;
604
+ }
605
+ /** A tree's entries — `<mode> <name>\0<20-byte oid>` repeated. */
606
+ function treeEntries(content) {
607
+ const entries = [];
608
+ let pos = 0;
609
+ while (pos < content.length) {
610
+ const space = content.indexOf(32, pos);
611
+ const nul = content.indexOf(0, pos);
612
+ if (space < 0 || nul < 0 || space > nul || nul + 21 > content.length) throw new GitFormatError("malformed-tree", `tree: malformed entry at offset ${pos}`);
613
+ const mode = content.subarray(pos, space).toString("latin1");
614
+ const name = content.subarray(space + 1, nul).toString("utf8");
615
+ const oid = content.subarray(nul + 1, nul + 21).toString("hex");
616
+ entries.push({
617
+ mode,
618
+ name,
619
+ oid
620
+ });
621
+ pos = nul + 21;
622
+ }
623
+ return entries;
624
+ }
625
+ /** A tree entry's mode marks a subtree (directory), not a blob or gitlink. */
626
+ function isTreeEntryMode(mode) {
627
+ return mode === "40000";
628
+ }
629
+ /** A commit's root tree OID. Every commit has exactly one `tree` header. */
630
+ function commitTreeOid(content) {
631
+ const [tree] = headerOids(content, /* @__PURE__ */ new Set(["tree"]));
632
+ if (!tree) throw new GitFormatError("missing-tree-header", "commitTreeOid: commit has no tree header");
633
+ return tree;
634
+ }
635
+
636
+ //#endregion
637
+ //#region src/repo-view/build-file-list.ts
638
+ /** Gitlink/submodule entries point at a commit in another repo — no blob here. */
639
+ const GITLINK_MODE$1 = "160000";
640
+ /**
641
+ * The flat path→blob index of a commit's tree (the `git ls-tree -r` of a commit,
642
+ * read straight from the object store): one FileEntry per blob — full path from the
643
+ * root, raw mode, blob oid. Subtrees are recursed; gitlinks (submodules) are skipped
644
+ * (no blob in this repo). Blob CONTENT is NOT read — it lives in git_object and is
645
+ * joined at query time (§4.5 collapse), so this walk touches only commits + trees.
646
+ */
647
+ async function buildFileList(read, commitOid) {
648
+ const commit = await read(commitOid);
649
+ const files = [];
650
+ const walk = async (treeOid, prefix) => {
651
+ const tree = await read(treeOid);
652
+ for (const entry of treeEntries(tree.content)) {
653
+ const path = prefix + entry.name;
654
+ if (isTreeEntryMode(entry.mode)) await walk(entry.oid, `${path}/`);
655
+ else if (entry.mode !== GITLINK_MODE$1) files.push({
656
+ blobOid: entry.oid,
657
+ mode: entry.mode,
658
+ path
659
+ });
660
+ }
661
+ };
662
+ await walk(commitTreeOid(commit.content), "");
663
+ return { files };
664
+ }
665
+
666
+ //#endregion
667
+ //#region src/repo-view/config.ts
668
+ /**
669
+ * Which refs get a queryable file snapshot. Branches only — tags, notes, and
670
+ * `refs/pull/*` are skipped. One edit to widen the projection later.
671
+ */
672
+ const SNAPSHOT_REFS = (refName) => refName.startsWith("refs/heads/");
673
+
674
+ //#endregion
675
+ //#region src/repo-view/rebuild.ts
676
+ const ZERO_OID = "0".repeat(40);
677
+ /**
678
+ * Refresh `refName`'s file snapshot after a push applied it. Non-branch refs are
679
+ * ignored (§ SNAPSHOT_REFS); a delete (zero oid) drops the snapshot; otherwise
680
+ * the new tip's tree is walked — objects are already present post-ingest — into a
681
+ * fresh snapshot. Runs after the push commits, so a failure here never rolls back
682
+ * the git operation (the projection is rebuildable from the packs).
683
+ */
684
+ async function syncRefSnapshot(deps, repoId, refName, newOid) {
685
+ if (!SNAPSHOT_REFS(refName)) return;
686
+ if (newOid === ZERO_OID) {
687
+ await deps.snapshots.dropRefSnapshot(repoId, refName);
688
+ return;
689
+ }
690
+ const read = async (oid) => {
691
+ const obj = await deps.objects.getObject(repoId, oid);
692
+ if (!obj) throw new Error(`repo-view: object ${oid} missing while building ${refName}`);
693
+ return obj;
694
+ };
695
+ await deps.snapshots.rebuildRefSnapshot(repoId, refName, await buildFileList(read, newOid));
696
+ }
697
+
698
+ //#endregion
699
+ //#region src/database/postgres.ts
700
+ const EVENT_SIGNS = {
701
+ error: "🔴",
702
+ query: "🟢"
703
+ };
704
+ /** Wrap a porsager client in a typed Kysely. Dev builds log query/error events. */
705
+ function initKysely(pg) {
706
+ return new Kysely({
707
+ dialect: new PostgresJSDialect({ postgres: pg }),
708
+ log(event) {
709
+ if (event.level === "query" || event.level === "error") {
710
+ recordQuery(event.query.sql, event.queryDurationMillis);
711
+ if (process.env.NODE_ENV === "development") console.debug(`${EVENT_SIGNS[event.level]} ${event.queryDurationMillis}ms ${event.query.sql}`);
712
+ }
713
+ }
714
+ });
715
+ }
716
+
717
+ //#endregion
718
+ //#region src/database/copy-insert.ts
719
+ const HEADER = Buffer.concat([Buffer.from([
720
+ 80,
721
+ 71,
722
+ 67,
723
+ 79,
724
+ 80,
725
+ 89,
726
+ 10,
727
+ 255,
728
+ 13,
729
+ 10,
730
+ 0
731
+ ]), Buffer.alloc(8)]);
732
+ const TRAILER = (() => {
733
+ const b = Buffer.alloc(2);
734
+ b.writeInt16BE(-1);
735
+ return b;
736
+ })();
737
+ function encodeValue(field) {
738
+ switch (field.t) {
739
+ case "int2": {
740
+ const b = Buffer.alloc(2);
741
+ b.writeInt16BE(field.v);
742
+ return b;
743
+ }
744
+ case "int4": {
745
+ const b = Buffer.alloc(4);
746
+ b.writeInt32BE(field.v);
747
+ return b;
748
+ }
749
+ case "int8": {
750
+ const b = Buffer.alloc(8);
751
+ b.writeBigInt64BE(BigInt(field.v));
752
+ return b;
753
+ }
754
+ case "bytea": return field.v;
755
+ case "text": return Buffer.from(field.v, "utf8");
756
+ }
757
+ }
758
+ /** Encode rows as one PGCOPY binary payload: header, then per row a field count
759
+ * and each field as `<int32 length><raw bytes>`, then the trailer. */
760
+ function encodeBinaryCopy(rows) {
761
+ const parts = [HEADER];
762
+ for (const row of rows) {
763
+ const fieldCount = Buffer.alloc(2);
764
+ fieldCount.writeInt16BE(row.length);
765
+ parts.push(fieldCount);
766
+ for (const field of row) {
767
+ const value = encodeValue(field);
768
+ const len = Buffer.alloc(4);
769
+ len.writeInt32BE(value.length);
770
+ parts.push(len, value);
771
+ }
772
+ }
773
+ parts.push(TRAILER);
774
+ return Buffer.concat(parts);
775
+ }
776
+ /**
777
+ * COPY `rows` into `target` (a temp staging table shaped from `target`'s columns,
778
+ * then `INSERT … SELECT … ON CONFLICT DO NOTHING`). `tx` must be a
779
+ * transaction-scoped porsager `Sql`. `target` and `columns` are internal constants
780
+ * (never client input), interpolated as SQL identifiers.
781
+ */
782
+ async function copyInsert(tx, target, columns, rows) {
783
+ if (rows.length === 0) return;
784
+ const cols = columns.join(", ");
785
+ const staging = `copy_stg_${target}`;
786
+ await tx.unsafe(`create temp table ${staging} on commit drop as select ${cols} from ${target} with no data`);
787
+ const writable = await tx`copy ${tx(staging)} (${tx.unsafe(cols)}) from stdin (format binary)`.writable();
788
+ await new Promise((resolve, reject) => {
789
+ writable.on("error", reject);
790
+ writable.on("finish", () => resolve());
791
+ writable.write(encodeBinaryCopy(rows), (err) => {
792
+ if (err) reject(err);
793
+ else writable.end();
794
+ });
795
+ });
796
+ await tx.unsafe(`insert into ${target} (${cols}) select ${cols} from ${staging} on conflict do nothing`);
797
+ }
798
+
799
+ //#endregion
800
+ //#region src/object/edges.ts
801
+ /** A tree entry pointing at a commit in *another* repo — no blob, no edge here. */
802
+ const GITLINK_MODE = "160000";
803
+ /**
804
+ * The blob OIDs directly in a tree — the §4.3 standing rule's other half: blobs
805
+ * are enumerated from tree content, never stored as edges. A tree entry is a blob
806
+ * unless it is a subtree (`deriveEdges` covers those as kind-3 edges) or a gitlink
807
+ * (`160000`, a submodule commit living in another repo — neither blob nor edge).
808
+ * Connectivity uses this to find the blobs a present tree requires, since no
809
+ * tree→blob edge exists to anchor a missing one.
810
+ */
811
+ function treeBlobOids(content) {
812
+ return treeEntries(content).filter((e) => !isTreeEntryMode(e.mode) && e.mode !== GITLINK_MODE).map((e) => e.oid);
813
+ }
814
+
815
+ //#endregion
816
+ //#region src/pack/object-header.ts
817
+ /**
818
+ * Pack object header: a variable-length encoding of (type, uncompressed size)
819
+ * that prefixes every object entry in a packfile.
820
+ *
821
+ * First byte: `[c|ttt|ssss]` — continuation bit `c`, 3-bit type `ttt`, low 4 bits
822
+ * of size. Each continuation byte contributes 7 more size bits, least-significant
823
+ * group first. See gitformat-pack.
824
+ *
825
+ * Size arithmetic uses `*`/`Math.floor`, NOT `<<`/`>>` — JS bitwise ops are
826
+ * 32-bit and would corrupt object sizes ≥ 2³¹.
827
+ */
828
+ const PACK_OBJ_TYPE = {
829
+ BLOB: 3,
830
+ COMMIT: 1,
831
+ OFS_DELTA: 6,
832
+ REF_DELTA: 7,
833
+ TAG: 4,
834
+ TREE: 2
835
+ };
836
+
837
+ //#endregion
838
+ //#region src/store/reachability.ts
839
+ /** Objects looked up per round-trip when chunking tree/blob existence queries. */
840
+ const LOOKUP_BATCH = 1e3;
841
+ /** Split `items` into consecutive batches of at most `size`. */
842
+ function batches(items, size) {
843
+ const out = [];
844
+ for (let i = 0; i < items.length; i += size) out.push(items.slice(i, i + size));
845
+ return out;
846
+ }
847
+ /**
848
+ * The objects reachable from `roots` over the stored DAG — the ONE reachability
849
+ * engine shared by connectivity, clone, and incremental fetch (so they can never
850
+ * disagree). A recursive CTE walks `git_edge` (all stored kinds 1,2,3,5) for the
851
+ * commit/tree/tag closure; the LEFT JOIN marks which are present. Blobs are not
852
+ * edges (§4.3), so unless `omitBlobs` they are enumerated from each present tree's
853
+ * content (mode-aware) and their presence checked. Returns the reachable set
854
+ * partitioned into present / missing. `::bigint`/`::bytea` casts and the
855
+ * `VALUES (…::bytea)` seed pin types in the raw CTE (the porsager driver can't
856
+ * bind a raw `bytea[]`, OQ-13); array lookups use Kysely's `in`-expansion.
857
+ */
858
+ async function reachableClosure(db, id, roots, omitBlobs) {
859
+ const present = /* @__PURE__ */ new Set();
860
+ const missing = /* @__PURE__ */ new Set();
861
+ if (roots.length === 0) return {
862
+ missing,
863
+ present
864
+ };
865
+ const closure = await sql`
866
+ with recursive closure(oid) as (
867
+ select oid from (values ${sql.join(roots.map((r) => sql`(${Buffer.from(r, "hex")}::bytea)`))}) as roots(oid)
868
+ union
869
+ select e.child from git_edge e
870
+ join closure c on e.parent = c.oid
871
+ where e.repo_id = ${id}::bigint
872
+ )
873
+ select c.oid, o.type
874
+ from closure c
875
+ left join git_object o on o.repo_id = ${id}::bigint and o.oid = c.oid
876
+ `.execute(db);
877
+ const treeOids = [];
878
+ for (const r of closure.rows) {
879
+ const hex = r.oid.toString("hex");
880
+ if (r.type === null) missing.add(hex);
881
+ else {
882
+ present.add(hex);
883
+ if (r.type === PACK_OBJ_TYPE.TREE) treeOids.push(r.oid);
884
+ }
885
+ }
886
+ if (omitBlobs || treeOids.length === 0) return {
887
+ missing,
888
+ present
889
+ };
890
+ const blobCandidates = /* @__PURE__ */ new Set();
891
+ for (const batch of batches(treeOids, LOOKUP_BATCH)) {
892
+ const trees = await db.selectFrom("git_object").select("content").where("repo_id", "=", id).where("oid", "in", batch).execute();
893
+ for (const t of trees) for (const blob of treeBlobOids(t.content)) blobCandidates.add(blob);
894
+ }
895
+ if (blobCandidates.size === 0) return {
896
+ missing,
897
+ present
898
+ };
899
+ const presentBlobs = /* @__PURE__ */ new Set();
900
+ for (const batch of batches([...blobCandidates], LOOKUP_BATCH)) {
901
+ const rows = await db.selectFrom("git_object").select("oid").where("repo_id", "=", id).where("oid", "in", batch.map((h) => Buffer.from(h, "hex"))).execute();
902
+ for (const r of rows) presentBlobs.add(r.oid.toString("hex"));
903
+ }
904
+ for (const b of blobCandidates) (presentBlobs.has(b) ? present : missing).add(b);
905
+ return {
906
+ missing,
907
+ present
908
+ };
909
+ }
910
+
911
+ //#endregion
912
+ //#region src/store/repo-resolver.ts
913
+ /**
914
+ * Resolves a wire repo name to its `repos.id` surrogate, memoized. The object and
915
+ * ref stores both key on the bigint `repo_id`, so each builds one of these as its
916
+ * name→id boundary.
917
+ *
918
+ * The mapping is immutable once a repo exists (ids are `generated always`, names
919
+ * are unique), so a found id is cached for the resolver's lifetime — keeping the
920
+ * per-object hot path (getObject) at one point-read, not a join. Misses are NEVER
921
+ * cached: a name the lookup didn't find may be created by a later push, and a
922
+ * cached `null` would mask it.
923
+ *
924
+ * Reads resolve (lookup; `null` ⇒ the repo has never been written, i.e. empty).
925
+ * Writes ensure (race-safe get-or-create).
926
+ */
927
+ function createRepoResolver(db) {
928
+ const cache = /* @__PURE__ */ new Map();
929
+ return {
930
+ /** The repo's id, creating the row if absent. Race-safe under concurrent
931
+ * first-pushes, and avoids a no-op UPDATE on the common (exists) path. */
932
+ async ensureRepoId(name) {
933
+ const cached = cache.get(name);
934
+ if (cached !== void 0) return cached;
935
+ const existing = await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirst();
936
+ if (existing) {
937
+ cache.set(name, existing.id);
938
+ return existing.id;
939
+ }
940
+ const id = (await db.insertInto("repos").values({ name }).onConflict((oc) => oc.doNothing()).returning("id").executeTakeFirst())?.id ?? (await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirstOrThrow()).id;
941
+ cache.set(name, id);
942
+ return id;
943
+ },
944
+ /** The repo's id, or `null` if it has never been written to. */
945
+ async resolveRepoId(name) {
946
+ const cached = cache.get(name);
947
+ if (cached !== void 0) return cached;
948
+ const row = await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirst();
949
+ if (!row) return null;
950
+ cache.set(name, row.id);
951
+ return row.id;
952
+ }
953
+ };
954
+ }
955
+
956
+ //#endregion
957
+ //#region src/store/gc.ts
958
+ /** Default per-batch DELETE cap when the caller omits `batchLimit`. Large enough to
959
+ * sweep a typical force-commit orphan set in one or two batches, small enough to
960
+ * bound the dead-tuple burst and lock duration per transaction (§7). */
961
+ const DEFAULT_BATCH_LIMIT = 1e4;
962
+ /** OIDs loaded per COPY round-trip into the live table (the live set can be the whole
963
+ * reachable tree, so it streams in bounded batches, never one giant payload). */
964
+ const LIVE_LOAD_BATCH = 1e4;
965
+ /**
966
+ * Build the GC over a porsager client (the same wire→DB boundary the object and ref
967
+ * stores take). `gc(repo, opts)` reclaims a single repo's unreachable-and-old-enough
968
+ * objects offline; reachable objects are always retained.
969
+ */
970
+ function createGc(pg) {
971
+ const repos = createRepoResolver(initKysely(pg));
972
+ return { async gc(repo, opts) {
973
+ const id = await repos.resolveRepoId(repo);
974
+ if (id === null) return {
975
+ deletedEdges: 0,
976
+ deletedObjects: 0
977
+ };
978
+ const batchLimit = opts.batchLimit ?? DEFAULT_BATCH_LIMIT;
979
+ const live = `gc_live_${id}`;
980
+ await pg.unsafe(`create unlogged table if not exists ${live} (oid bytea primary key)`);
981
+ try {
982
+ await pg.unsafe(`truncate ${live}`);
983
+ await loadLive(live, await liveSet(id));
984
+ await opts._hooks?.afterLiveSet?.();
985
+ const deletedObjects = await sweepObjects(id, live, opts.graceSeconds, batchLimit);
986
+ const deletedEdges = await sweepEdges(id, batchLimit);
987
+ if (opts.maintain !== false && deletedObjects + deletedEdges > 0) await maintain();
988
+ return {
989
+ deletedEdges,
990
+ deletedObjects
991
+ };
992
+ } finally {
993
+ await pg.unsafe(`drop table if exists ${live}`);
994
+ }
995
+ } };
996
+ /**
997
+ * The live set: the reachable closure from every ref tip, read under ONE
998
+ * REPEATABLE READ snapshot so the ref-tip read and the multi-statement closure
999
+ * walk cannot interleave with a concurrent push's ref update (§5 defense (a)).
1000
+ *
1001
+ * `reachableClosure` is the shared engine and takes a `Kysely`, but the
1002
+ * kysely-postgres-js dialect drives queries by calling `.reserve()` on its
1003
+ * `postgres` client for EACH query — so a plain pooled Kysely would scatter the
1004
+ * closure's statements across connections (no shared snapshot), and a
1005
+ * transaction-scoped `Sql` has no `.reserve()` at all. So we pin ONE porsager
1006
+ * connection, open a REPEATABLE READ transaction on it, and back a Kysely with a
1007
+ * shim whose `reserve()` always returns that pinned connection with a no-op
1008
+ * `release()` — every closure statement then runs on the one snapshotted
1009
+ * connection. The transaction is read-only; it commits (releasing the snapshot)
1010
+ * before the sweep's own short write transactions begin.
1011
+ */
1012
+ async function liveSet(id) {
1013
+ const conn = await pg.reserve();
1014
+ try {
1015
+ await conn`begin isolation level repeatable read`;
1016
+ const pinned = pinnedKysely(conn);
1017
+ const rows = await conn`
1018
+ select oid, peeled_oid from git_ref where repo_id = ${id} and oid is not null
1019
+ `;
1020
+ const tips = /* @__PURE__ */ new Set();
1021
+ for (const r of rows) {
1022
+ if (r.oid) tips.add(r.oid.toString("hex"));
1023
+ if (r.peeled_oid) tips.add(r.peeled_oid.toString("hex"));
1024
+ }
1025
+ const { present } = await reachableClosure(pinned, id, [...tips], false);
1026
+ await conn`commit`;
1027
+ return present;
1028
+ } finally {
1029
+ conn.release();
1030
+ }
1031
+ }
1032
+ /** A Kysely pinned to a single porsager connection: its dialect `reserve()`s the
1033
+ * same connection for every statement (so a multi-statement read shares one MVCC
1034
+ * snapshot) and `release()` is a no-op (the caller owns the connection's lifetime).
1035
+ * The shim is a callable with a `reserve` property, the shape the dialect probes
1036
+ * for (`isPostgresJSSql`). */
1037
+ function pinnedKysely(conn) {
1038
+ const nonReleasing = new Proxy(conn, { get: (target, prop) => prop === "release" ? () => {} : Reflect.get(target, prop, target) });
1039
+ return initKysely(Object.assign(() => {
1040
+ throw new Error("pggit gc: pinned client used as a tagged template");
1041
+ }, { reserve: async () => nonReleasing }));
1042
+ }
1043
+ /** Bulk-load the live OID set into the UNLOGGED `live` table via binary COPY (the
1044
+ * one bytea-safe bulk path, copy-insert.ts), batched so the payload stays bounded.
1045
+ * Each COPY runs in its own transaction so the staging temp table drops on commit. */
1046
+ async function loadLive(live, oids) {
1047
+ if (oids.size === 0) return;
1048
+ const all = [...oids];
1049
+ for (let i = 0; i < all.length; i += LIVE_LOAD_BATCH) {
1050
+ const chunk = all.slice(i, i + LIVE_LOAD_BATCH);
1051
+ await pg.begin(async (tx) => {
1052
+ await copyInsert(tx, live, ["oid"], chunk.map((hex) => [{
1053
+ t: "bytea",
1054
+ v: Buffer.from(hex, "hex")
1055
+ }]));
1056
+ });
1057
+ }
1058
+ }
1059
+ /** Batched object sweep. Postgres `DELETE` has no `LIMIT`, so each batch picks a
1060
+ * `LIMIT`-bounded set of victim OIDs then deletes them by PRIMARY KEY `(repo_id,
1061
+ * oid)`. The match is on the PK — NOT `ctid`: `ctid` is per-partition-relative, so
1062
+ * matching `ctid` across the HASH-partitioned table would delete same-ctid rows in
1063
+ * OTHER partitions (other tenants). The loop ends when a batch deletes nothing.
1064
+ * Each batch is its own (implicit) transaction, so `clock_timestamp()` re-evaluates
1065
+ * per batch and the grace cutoff advances. Returns total rows deleted. */
1066
+ async function sweepObjects(id, live, graceSeconds, batchLimit) {
1067
+ let total = 0;
1068
+ for (;;) {
1069
+ const deleted = await pg.unsafe(`with victims as (
1070
+ select o.oid from git_object o
1071
+ where o.repo_id = $1::bigint
1072
+ and not exists (select 1 from ${live} l where l.oid = o.oid)
1073
+ and o.created_at < clock_timestamp() - make_interval(secs => $2::float8)
1074
+ limit $3::int
1075
+ )
1076
+ delete from git_object o using victims v
1077
+ where o.repo_id = $1::bigint and o.oid = v.oid returning 1 as n`, [
1078
+ String(id),
1079
+ String(graceSeconds),
1080
+ String(batchLimit)
1081
+ ]);
1082
+ if (deleted.length === 0) break;
1083
+ total += deleted.length;
1084
+ }
1085
+ return total;
1086
+ }
1087
+ /** Batched edge sweep: delete every `git_edge` row whose PARENT object no longer
1088
+ * exists in `git_object` (a deleted object's outgoing edges). No FK cascade exists
1089
+ * (0003_git_edge.ts), so dangling edges must be swept explicitly. Anti-join on the
1090
+ * parent only: a surviving parent is reachable, so all its children are reachable
1091
+ * and present — its edges never dangle. Like the object sweep, each batch picks a
1092
+ * `LIMIT`-bounded victim set then deletes by PRIMARY KEY `(repo_id, parent, child)`
1093
+ * — never `ctid`, which is per-partition and would reach into other tenants. */
1094
+ async function sweepEdges(id, batchLimit) {
1095
+ let total = 0;
1096
+ for (;;) {
1097
+ const deleted = await pg.unsafe(`with victims as (
1098
+ select e.parent, e.child from git_edge e
1099
+ where e.repo_id = $1::bigint
1100
+ and not exists (
1101
+ select 1 from git_object o where o.repo_id = e.repo_id and o.oid = e.parent
1102
+ )
1103
+ limit $2::int
1104
+ )
1105
+ delete from git_edge e using victims v
1106
+ where e.repo_id = $1::bigint and e.parent = v.parent and e.child = v.child
1107
+ returning 1 as n`, [String(id), String(batchLimit)]);
1108
+ if (deleted.length === 0) break;
1109
+ total += deleted.length;
1110
+ }
1111
+ return total;
1112
+ }
1113
+ /** Post-sweep maintenance (best-effort): reclaim the dead tuples GC produced in
1114
+ * the heap + TOAST and refresh planner stats, then reindex the walk index.
1115
+ * `VACUUM` cannot run inside a transaction block, so these are standalone
1116
+ * statements run outside any txn. */
1117
+ async function maintain() {
1118
+ await pg.unsafe(`vacuum (analyze) git_object`);
1119
+ await pg.unsafe(`vacuum (analyze) git_edge`);
1120
+ await pg.unsafe(`reindex index git_edge_walk`);
1121
+ }
1122
+ }
1123
+
1124
+ //#endregion
1125
+ //#region src/gc-scheduler.ts
1126
+ /**
1127
+ * Build the GC scheduler over a porsager client (the same wire→DB boundary the
1128
+ * stores take). `drainOnce()` runs one poll+sweep pass; `start()`/`stop()` drive
1129
+ * it on `intervalMs`. Reachable objects are never touched — it only invokes the
1130
+ * per-repo GC primitive, which is reachability-safe.
1131
+ */
1132
+ function createGcScheduler(pg, opts) {
1133
+ const gc = createGc(pg);
1134
+ let timer;
1135
+ let inFlight;
1136
+ /** The eligible repos for this pass — the §2 predicate. */
1137
+ async function selectCandidates() {
1138
+ return pg`
1139
+ select r.id::text as id, r.name
1140
+ from repos r
1141
+ where r.last_pushed_at is not null
1142
+ and (r.last_gc_at is null or r.last_pushed_at > r.last_gc_at)
1143
+ `;
1144
+ }
1145
+ /**
1146
+ * GC one candidate. `t0 = clock_timestamp()` is captured BEFORE `gc()` opens its
1147
+ * snapshot, then written as `last_gc_at` after the sweep: any push committing
1148
+ * after t0 re-stamps `last_pushed_at > t0` (the store stamps after commit) and
1149
+ * re-qualifies the repo next pass (no lost garbage). A per-repo failure is
1150
+ * ISOLATED — logged and skipped (the repo keeps its old `last_gc_at`, so it
1151
+ * re-qualifies and is retried next pass) — so one poison repo never aborts the
1152
+ * rest of the pass. `maintain: false`: the drain leans on autovacuum, never a
1153
+ * per-pass full-table VACUUM (gc.ts).
1154
+ */
1155
+ async function drainRepo(c) {
1156
+ try {
1157
+ const [t] = await pg`select clock_timestamp()::text as t0`;
1158
+ if (!t) throw new Error("pggit gc-scheduler: clock_timestamp() returned no row");
1159
+ const { deletedObjects, deletedEdges } = await gc.gc(c.name, {
1160
+ graceSeconds: opts.graceSeconds,
1161
+ maintain: false
1162
+ });
1163
+ await pg`update repos set last_gc_at = ${t.t0}::timestamptz where id = ${c.id}::bigint`;
1164
+ return {
1165
+ deletedEdges,
1166
+ deletedObjects,
1167
+ repo: c.name
1168
+ };
1169
+ } catch (err) {
1170
+ console.error(`pggit gc-scheduler: GC of repo ${JSON.stringify(c.name)} failed (retried next pass):`, err);
1171
+ return null;
1172
+ }
1173
+ }
1174
+ /** One drain pass: GC every eligible repo (bounded concurrency, distinct repos so
1175
+ * a pass never double-GCs one). Returns an entry per repo GC'd this pass — a repo
1176
+ * whose GC threw is skipped (not in the summary) and retried next pass. */
1177
+ async function drainOnce() {
1178
+ return (await mapPool(await selectCandidates(), Math.max(1, opts.concurrency), drainRepo)).filter((e) => e !== null);
1179
+ }
1180
+ /** Run the drain on `intervalMs`. The `inFlight` guard ensures passes never
1181
+ * overlap — so two passes can never touch the same repo at once — and a slow pass
1182
+ * simply skips the next tick. A pass failure is logged, never thrown into the
1183
+ * timer. The timer is `unref`'d so it alone does not keep the process alive (the
1184
+ * server's socket does). */
1185
+ function start() {
1186
+ if (timer) return;
1187
+ timer = setInterval(() => {
1188
+ if (inFlight) return;
1189
+ inFlight = drainOnce().catch((err) => {
1190
+ console.error("pggit gc-scheduler: drain pass failed:", err);
1191
+ }).finally(() => {
1192
+ inFlight = void 0;
1193
+ });
1194
+ }, opts.intervalMs);
1195
+ timer.unref?.();
1196
+ }
1197
+ /** Halt the background drain and AWAIT any pass already in flight, so a caller may
1198
+ * safely tear the connection pool down afterwards (no query runs into a closed
1199
+ * pool). Idempotent. */
1200
+ async function stop() {
1201
+ if (timer) {
1202
+ clearInterval(timer);
1203
+ timer = void 0;
1204
+ }
1205
+ await inFlight;
1206
+ }
1207
+ return {
1208
+ drainOnce,
1209
+ start,
1210
+ stop
1211
+ };
1212
+ }
1213
+ /** Run `fn` over `items` with at most `limit` concurrent, preserving result order.
1214
+ * A bounded worker pool — `limit` workers pull from a shared cursor — so one
1215
+ * large-orphan repo cannot head-of-line-block the rest of a pass. */
1216
+ async function mapPool(items, limit, fn) {
1217
+ const results = new Array(items.length);
1218
+ let cursor = 0;
1219
+ async function worker() {
1220
+ for (;;) {
1221
+ const i = cursor++;
1222
+ if (i >= items.length) return;
1223
+ results[i] = await fn(items[i]);
1224
+ }
1225
+ }
1226
+ const workers = Array.from({ length: Math.min(limit, items.length) }, worker);
1227
+ await Promise.all(workers);
1228
+ return results;
1229
+ }
1230
+
1231
+ //#endregion
1232
+ //#region src/index.ts
1233
+ const UPLOAD_PACK_ADVERTISEMENT = Buffer.concat([
1234
+ encodePktLine(Buffer.from("# service=git-upload-pack\n")),
1235
+ encodePkt({ type: "flush" }),
1236
+ encodeAdvertisement()
1237
+ ]);
1238
+ function toArrayBuffer(buf) {
1239
+ return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
1240
+ }
1241
+ const ADVERTISEMENT_BODY = toArrayBuffer(UPLOAD_PACK_ADVERTISEMENT);
1242
+ /**
1243
+ * Read a smart-HTTP POST body, honoring `Content-Encoding`. Git compresses the
1244
+ * upload-pack/receive-pack request body with gzip once it is large enough
1245
+ * (`remote-curl.c`), exactly as `git http-backend` decompresses on the server
1246
+ * side — so we must too. Any other declared encoding is a hard error, never fed
1247
+ * raw to the pkt-line parser.
1248
+ */
1249
+ async function readRequestBody(c) {
1250
+ const raw = Buffer.from(await c.req.arrayBuffer());
1251
+ const encoding = c.req.header("content-encoding")?.toLowerCase();
1252
+ if (encoding === void 0 || encoding === "identity") return raw;
1253
+ if (encoding === "gzip" || encoding === "x-gzip") try {
1254
+ return gunzipSync(raw);
1255
+ } catch (err) {
1256
+ throw new GitProtocolError(`request body declared Content-Encoding ${JSON.stringify(encoding)} but failed to gunzip: ${err instanceof Error ? err.message : String(err)}`);
1257
+ }
1258
+ throw new GitProtocolError(`unsupported request Content-Encoding: ${JSON.stringify(encoding)}`);
1259
+ }
1260
+ /**
1261
+ * Fetch is served over git protocol v2 ONLY (the charter). git requests v2 with a
1262
+ * `Git-Protocol: version=2` header (a `:`-joined key list; git ≥ 2.26 sends it by
1263
+ * default). A v0/v1 client sends no such header and cannot parse the v2
1264
+ * advertisement — it would read the `version 2` line + flush as an empty repo and
1265
+ * silently clone nothing. So reject the unnegotiated case loudly at the boundary
1266
+ * (400) instead of handing back an advertisement it will misread.
1267
+ */
1268
+ function assertProtocolV2(header) {
1269
+ if (!(header ?? "").split(":").map((s) => s.trim()).includes("version=2")) throw new GitProtocolError("pggit serves fetch over git protocol v2 only; set protocol.version=2 (git ≥ 2.26 negotiates it by default)");
1270
+ }
1271
+ function backendFor(deps, repoId) {
1272
+ return {
1273
+ buildPack: (wants, haves, omitBlobs, includeTag) => deps.objects.buildPack(repoId, wants, haves, omitBlobs, includeTag),
1274
+ commonHaves: (haves) => deps.objects.commonHaves(repoId, haves),
1275
+ getSymref: (name) => deps.refs.getSymref(repoId, name),
1276
+ listRefs: () => deps.refs.listRefs(repoId),
1277
+ readyToGiveUp: (wants, common) => deps.objects.readyToGiveUp(repoId, wants, common)
1278
+ };
1279
+ }
1280
+ function receiveBackendFor(deps, repoId) {
1281
+ const backend = {
1282
+ applyRefUpdates: (commands, atomic) => deps.refs.applyRefUpdates(repoId, commands, atomic),
1283
+ ingest: async (pack) => {
1284
+ await deps.objects.ingestPack(repoId, pack);
1285
+ },
1286
+ isConnected: (oid) => deps.objects.isConnected(repoId, oid)
1287
+ };
1288
+ if (deps.snapshots) {
1289
+ const sdeps = {
1290
+ objects: deps.objects,
1291
+ snapshots: deps.snapshots
1292
+ };
1293
+ backend.syncRefSnapshot = (ref, newOid) => syncRefSnapshot(sdeps, repoId, ref, newOid);
1294
+ }
1295
+ return backend;
1296
+ }
1297
+ /** v0 receive-pack ref advertisement body: the `# service` preamble + ref list. */
1298
+ async function receivePackAdvertBody(deps, repoId) {
1299
+ const refs = await deps.refs.listRefs(repoId);
1300
+ return Buffer.concat([
1301
+ encodePktLine(Buffer.from("# service=git-receive-pack\n")),
1302
+ encodePkt({ type: "flush" }),
1303
+ encodeReceivePackAdvertisement(refs)
1304
+ ]);
1305
+ }
1306
+ /**
1307
+ * Build the git-remote Hono app (smart-HTTP, protocol v2 fetch). Mountable into
1308
+ * a host app via `host.route("/git", createGitApp(deps))`; the host owns the
1309
+ * Postgres lifecycle behind `deps`.
1310
+ */
1311
+ function createGitApp(deps, opts = {}) {
1312
+ const app = new Hono();
1313
+ if (opts.instrument) app.use((c, next) => runRequest({
1314
+ method: c.req.method,
1315
+ path: c.req.path
1316
+ }, () => next()));
1317
+ app.use(cors());
1318
+ app.onError((err, c) => {
1319
+ if (err instanceof GitProtocolError) return c.text(err.message, 400);
1320
+ console.error(err);
1321
+ return c.text("internal server error", 500);
1322
+ });
1323
+ app.get("/health", (c) => c.text("ok"));
1324
+ app.get("/:repo/info/refs", async (c) => {
1325
+ const service = c.req.query("service");
1326
+ if (service === "git-upload-pack") {
1327
+ assertProtocolV2(c.req.header("git-protocol"));
1328
+ return c.body(ADVERTISEMENT_BODY, 200, {
1329
+ "Cache-Control": "no-cache",
1330
+ "Content-Type": "application/x-git-upload-pack-advertisement"
1331
+ });
1332
+ }
1333
+ if (service === "git-receive-pack") {
1334
+ const body = await receivePackAdvertBody(deps, c.req.param("repo"));
1335
+ return c.body(toArrayBuffer(body), 200, {
1336
+ "Cache-Control": "no-cache",
1337
+ "Content-Type": "application/x-git-receive-pack-advertisement"
1338
+ });
1339
+ }
1340
+ return c.text(`unsupported service ${JSON.stringify(service)}`, 403);
1341
+ });
1342
+ app.post("/:repo/git-upload-pack", async (c) => {
1343
+ const out = await handleUploadPack(await readRequestBody(c), backendFor(deps, c.req.param("repo")));
1344
+ count("wireBytes", out.length);
1345
+ return c.body(toArrayBuffer(out), 200, {
1346
+ "Cache-Control": "no-cache",
1347
+ "Content-Type": "application/x-git-upload-pack-result"
1348
+ });
1349
+ });
1350
+ app.post("/:repo/git-receive-pack", async (c) => {
1351
+ const out = await handleReceivePack(await readRequestBody(c), receiveBackendFor(deps, c.req.param("repo")));
1352
+ return c.body(toArrayBuffer(out), 200, {
1353
+ "Cache-Control": "no-cache",
1354
+ "Content-Type": "application/x-git-receive-pack-result"
1355
+ });
1356
+ });
1357
+ return app;
1358
+ }
1359
+
1360
+ //#endregion
1361
+ export { createGc, createGcScheduler, createGitApp };
1362
+ //# sourceMappingURL=index.mjs.map