@usecontextlayer/pggit 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +132 -0
- package/dist/index.d.mts +274 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +1362 -0
- package/dist/index.mjs.map +1 -0
- package/dist/schema.d.mts +100 -0
- package/dist/schema.d.mts.map +1 -0
- package/dist/schema.mjs +1 -0
- package/package.json +91 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,1362 @@
|
|
|
1
|
+
import { gunzipSync } from "node:zlib";
|
|
2
|
+
import { Hono } from "hono";
|
|
3
|
+
import { cors } from "hono/cors";
|
|
4
|
+
import { AsyncLocalStorage } from "node:async_hooks";
|
|
5
|
+
import { performance } from "node:perf_hooks";
|
|
6
|
+
import { Kysely, sql } from "kysely";
|
|
7
|
+
import { PostgresJSDialect } from "kysely-postgres-js";
|
|
8
|
+
|
|
9
|
+
//#region src/instrument.ts
|
|
10
|
+
const als = new AsyncLocalStorage();
|
|
11
|
+
const collected = [];
|
|
12
|
+
function newCollector(method, path) {
|
|
13
|
+
return {
|
|
14
|
+
counters: /* @__PURE__ */ new Map(),
|
|
15
|
+
current: "request",
|
|
16
|
+
label: "",
|
|
17
|
+
method,
|
|
18
|
+
path,
|
|
19
|
+
phaseMs: /* @__PURE__ */ new Map(),
|
|
20
|
+
queries: []
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
/** Run `fn` inside a fresh per-request collector; record the collector when done. */
|
|
24
|
+
async function runRequest(meta, fn) {
|
|
25
|
+
const collector = newCollector(meta.method, meta.path);
|
|
26
|
+
return als.run(collector, async () => {
|
|
27
|
+
try {
|
|
28
|
+
return await fn();
|
|
29
|
+
} finally {
|
|
30
|
+
collected.push(collector);
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
/** Measure `fn`'s wall time into the active collector under `name`; no-op when inactive. */
|
|
35
|
+
async function withPhase(name, fn) {
|
|
36
|
+
const collector = als.getStore();
|
|
37
|
+
if (!collector) return fn();
|
|
38
|
+
const previous = collector.current;
|
|
39
|
+
collector.current = name;
|
|
40
|
+
const start = performance.now();
|
|
41
|
+
try {
|
|
42
|
+
return await fn();
|
|
43
|
+
} finally {
|
|
44
|
+
const elapsed = performance.now() - start;
|
|
45
|
+
collector.phaseMs.set(name, (collector.phaseMs.get(name) ?? 0) + elapsed);
|
|
46
|
+
collector.current = previous;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
function count(metric, n = 1) {
|
|
50
|
+
const collector = als.getStore();
|
|
51
|
+
if (!collector) return;
|
|
52
|
+
collector.counters.set(metric, (collector.counters.get(metric) ?? 0) + n);
|
|
53
|
+
}
|
|
54
|
+
function label(name) {
|
|
55
|
+
const collector = als.getStore();
|
|
56
|
+
if (collector) collector.label = name;
|
|
57
|
+
}
|
|
58
|
+
function recordQuery(sql, durationMs) {
|
|
59
|
+
const collector = als.getStore();
|
|
60
|
+
if (!collector) return;
|
|
61
|
+
collector.queries.push({
|
|
62
|
+
durationMs,
|
|
63
|
+
phase: collector.current,
|
|
64
|
+
sql
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
//#endregion
|
|
69
|
+
//#region src/protocol/errors.ts
|
|
70
|
+
/**
|
|
71
|
+
* A malformed-request / unsupported-capability error detected at the git wire
|
|
72
|
+
* boundary (bad command list, unknown command, unsupported object-format or
|
|
73
|
+
* filter, a request body in an encoding we don't accept). It is the CLIENT's
|
|
74
|
+
* fault, so the HTTP layer maps it to a 400 with the message — distinct from an
|
|
75
|
+
* internal failure (a missing object mid-serve, a DB error), which stays a 500.
|
|
76
|
+
* Validate at the boundary, fail loud, and let the type carry the status.
|
|
77
|
+
*/
|
|
78
|
+
var GitProtocolError = class extends Error {
|
|
79
|
+
constructor(message) {
|
|
80
|
+
super(message);
|
|
81
|
+
this.name = "GitProtocolError";
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
/**
|
|
85
|
+
* A fetch `want` names an object this repo does not have — a CLIENT condition (a
|
|
86
|
+
* stale/force-pushed tip, a lost promisor blob), not an internal failure. Real git
|
|
87
|
+
* upload-pack answers it IN-BAND with `ERR upload-pack: not our ref <oid>` (an HTTP
|
|
88
|
+
* 200 protocol error the client reads), so it must NOT escape as a 500. Carries the
|
|
89
|
+
* absent OIDs; `handleFetch` maps it to the ERR pkt-line. Distinct from a generic
|
|
90
|
+
* `Error` out of the serve path (a real backend fault), which still propagates → 500.
|
|
91
|
+
*/
|
|
92
|
+
var WantNotFoundError = class extends Error {
|
|
93
|
+
oids;
|
|
94
|
+
constructor(oids) {
|
|
95
|
+
super(`upload-pack: not our ref ${oids.join(" ")}`);
|
|
96
|
+
this.oids = oids;
|
|
97
|
+
this.name = "WantNotFoundError";
|
|
98
|
+
}
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
//#endregion
|
|
102
|
+
//#region src/protocol/pkt-line.ts
|
|
103
|
+
/**
|
|
104
|
+
* pkt-line framing (git wire protocol). A pkt-line is a 4-byte hex length prefix
|
|
105
|
+
* (the length INCLUDES the 4 prefix bytes) followed by `length - 4` payload bytes.
|
|
106
|
+
* Three special zero-payload packets: flush `0000`, delim `0001`, response-end
|
|
107
|
+
* `0002`. See gitprotocol-common + design spec §5.
|
|
108
|
+
*/
|
|
109
|
+
const FLUSH_PKT = Buffer.from("0000", "latin1");
|
|
110
|
+
const DELIM_PKT = Buffer.from("0001", "latin1");
|
|
111
|
+
const RESPONSE_END_PKT = Buffer.from("0002", "latin1");
|
|
112
|
+
/** Largest payload we will emit (git's conservative writer cap). */
|
|
113
|
+
const WRITER_MAX_PAYLOAD = 65515;
|
|
114
|
+
/** Largest payload we will accept on read (git's LARGE_PACKET_DATA_MAX). */
|
|
115
|
+
const READER_MAX_PAYLOAD = 65516;
|
|
116
|
+
/** Frame a data payload as a pkt-line: `<4-hex len><payload>`. */
|
|
117
|
+
function encodePktLine(payload) {
|
|
118
|
+
if (payload.length > 65515) throw new Error(`pkt-line: payload ${payload.length} exceeds writer cap ${WRITER_MAX_PAYLOAD}`);
|
|
119
|
+
const prefix = (payload.length + 4).toString(16).padStart(4, "0");
|
|
120
|
+
return Buffer.concat([Buffer.from(prefix, "latin1"), payload]);
|
|
121
|
+
}
|
|
122
|
+
/** Frame any packet — data or one of the three special zero-payload markers. */
|
|
123
|
+
function encodePkt(pkt) {
|
|
124
|
+
switch (pkt.type) {
|
|
125
|
+
case "data": return encodePktLine(pkt.payload);
|
|
126
|
+
case "flush": return FLUSH_PKT;
|
|
127
|
+
case "delim": return DELIM_PKT;
|
|
128
|
+
case "response-end": return RESPONSE_END_PKT;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
function parseLen(buf, offset) {
|
|
132
|
+
const hex = buf.toString("latin1", offset, offset + 4);
|
|
133
|
+
if (!/^[0-9a-f]{4}$/i.test(hex)) throw new GitProtocolError(`pkt-line: invalid length prefix ${JSON.stringify(hex)}`);
|
|
134
|
+
return Number.parseInt(hex, 16);
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Decode a buffer into a sequence of packets. Streaming-safe: a trailing
|
|
138
|
+
* partial packet is left in `rest` for the caller to prepend to the next chunk.
|
|
139
|
+
*
|
|
140
|
+
* With `stopAtFlush`, decoding returns at the first flush (which is NOT included
|
|
141
|
+
* in `packets`), leaving the bytes after it in `rest`. The receive-pack request
|
|
142
|
+
* splits here: a pkt-line command list, a flush, then the raw (un-framed) pack.
|
|
143
|
+
*
|
|
144
|
+
* `flushed` reports whether a flush actually terminated the stream in
|
|
145
|
+
* `stopAtFlush` mode — the parser uses it to reject an unterminated command list
|
|
146
|
+
* on a COMPLETE request body (where "more bytes coming" is not an option).
|
|
147
|
+
*/
|
|
148
|
+
function decodePktStream(buf, opts = {}) {
|
|
149
|
+
const packets = [];
|
|
150
|
+
let offset = 0;
|
|
151
|
+
while (offset + 4 <= buf.length) {
|
|
152
|
+
const len = parseLen(buf, offset);
|
|
153
|
+
if (len === 0) {
|
|
154
|
+
offset += 4;
|
|
155
|
+
if (opts.stopAtFlush) return {
|
|
156
|
+
flushed: true,
|
|
157
|
+
packets,
|
|
158
|
+
rest: buf.subarray(offset)
|
|
159
|
+
};
|
|
160
|
+
packets.push({ type: "flush" });
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
if (len === 1) {
|
|
164
|
+
packets.push({ type: "delim" });
|
|
165
|
+
offset += 4;
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
if (len === 2) {
|
|
169
|
+
packets.push({ type: "response-end" });
|
|
170
|
+
offset += 4;
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
if (len === 3) throw new GitProtocolError("pkt-line: reserved length 0003");
|
|
174
|
+
const payloadLen = len - 4;
|
|
175
|
+
if (payloadLen > 65516) throw new GitProtocolError(`pkt-line: declared payload ${payloadLen} exceeds reader bound ${READER_MAX_PAYLOAD}`);
|
|
176
|
+
if (offset + len > buf.length) break;
|
|
177
|
+
const payload = buf.subarray(offset + 4, offset + len);
|
|
178
|
+
packets.push({
|
|
179
|
+
payload,
|
|
180
|
+
type: "data"
|
|
181
|
+
});
|
|
182
|
+
offset += len;
|
|
183
|
+
}
|
|
184
|
+
return {
|
|
185
|
+
flushed: false,
|
|
186
|
+
packets,
|
|
187
|
+
rest: buf.subarray(offset)
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
//#endregion
|
|
192
|
+
//#region src/protocol/capabilities.ts
|
|
193
|
+
const AGENT = "pggit/0.0.0";
|
|
194
|
+
/**
|
|
195
|
+
* Reject a client negotiating a non-sha1 object hash. pggit is SHA-1 only (the
|
|
196
|
+
* charter) and assumes 40-hex / 20-byte OIDs everywhere; a sha256 client would
|
|
197
|
+
* otherwise fail deep in the parser on a 64-hex OID. Catch it at the boundary
|
|
198
|
+
* with a clear message. An absent `object-format` cap defaults to sha1 (git's
|
|
199
|
+
* default), so it is accepted.
|
|
200
|
+
*/
|
|
201
|
+
function assertSupportedObjectFormat(caps) {
|
|
202
|
+
const fmt = caps.find((c) => c.startsWith("object-format="));
|
|
203
|
+
if (fmt !== void 0 && fmt !== "object-format=sha1") throw new GitProtocolError(`unsupported ${fmt} — only object-format=sha1 is supported`);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
//#endregion
|
|
207
|
+
//#region src/protocol/sideband.ts
|
|
208
|
+
const MAX_BAND_DATA = 65514;
|
|
209
|
+
/**
|
|
210
|
+
* Multiplex `data` onto sideband `band`: each ≤MAX_BAND_DATA slice becomes a
|
|
211
|
+
* pkt-line of `[band byte | slice]`. Returns the concatenated band pkt-lines with
|
|
212
|
+
* NO trailing flush — the caller owns the section framing (the `packfile\n` header
|
|
213
|
+
* for fetch, the bare report for push) and appends its own flush.
|
|
214
|
+
*/
|
|
215
|
+
function encodeSideband(band, data) {
|
|
216
|
+
const parts = [];
|
|
217
|
+
for (let i = 0; i < data.length; i += MAX_BAND_DATA) {
|
|
218
|
+
const chunk = data.subarray(i, i + MAX_BAND_DATA);
|
|
219
|
+
parts.push(encodePktLine(Buffer.concat([Buffer.from([band]), chunk])));
|
|
220
|
+
}
|
|
221
|
+
return Buffer.concat(parts);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
//#endregion
|
|
225
|
+
//#region src/protocol/receive-pack.ts
|
|
226
|
+
const ZERO_OID$1 = "0".repeat(40);
|
|
227
|
+
/** A ref name longer than this (bytes) is rejected at the boundary: `git_ref`'s PK is
|
|
228
|
+
* a btree on (repo_id, name) whose index entry overflows past ~2704 bytes, which
|
|
229
|
+
* Postgres raises as an opaque storage error. The cap sits far above any real ref name
|
|
230
|
+
* and safely under the btree limit, so a too-long name fails loud + in-band (`ng`),
|
|
231
|
+
* never as an HTTP 500 that has already orphaned the ingested pack. */
|
|
232
|
+
const MAX_REF_NAME_BYTES = 2e3;
|
|
233
|
+
const RECEIVE_CAPS = [
|
|
234
|
+
"report-status",
|
|
235
|
+
"delete-refs",
|
|
236
|
+
"side-band-64k",
|
|
237
|
+
"atomic",
|
|
238
|
+
"object-format=sha1",
|
|
239
|
+
`agent=${AGENT}`
|
|
240
|
+
];
|
|
241
|
+
/**
|
|
242
|
+
* v0 ref advertisement for receive-pack (push). An empty repo — the dominant
|
|
243
|
+
* first-push state — emits the synthetic `0{40} capabilities^{}` line so the
|
|
244
|
+
* client has somewhere to read the push capabilities.
|
|
245
|
+
*/
|
|
246
|
+
function encodeReceivePackAdvertisement(refs) {
|
|
247
|
+
const capStr = RECEIVE_CAPS.join(" ");
|
|
248
|
+
const lines = [];
|
|
249
|
+
if (refs.length === 0) lines.push(encodePktLine(Buffer.from(`${ZERO_OID$1} capabilities^{}\0${capStr}\n`)));
|
|
250
|
+
else refs.forEach((r, i) => {
|
|
251
|
+
const base = `${r.oid} ${r.name}`;
|
|
252
|
+
lines.push(encodePktLine(Buffer.from(i === 0 ? `${base}\0${capStr}\n` : `${base}\n`)));
|
|
253
|
+
});
|
|
254
|
+
lines.push(encodePkt({ type: "flush" }));
|
|
255
|
+
return Buffer.concat(lines);
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Parse the receive-pack POST body: a pkt-line command list (`<old> <new> <ref>`,
|
|
259
|
+
* caps after a NUL on the first line), a flush, then the raw packfile.
|
|
260
|
+
*/
|
|
261
|
+
function parseReceivePack(body) {
|
|
262
|
+
const { packets, rest, flushed } = decodePktStream(body, { stopAtFlush: true });
|
|
263
|
+
if (!flushed && body.length > 0) throw new GitProtocolError("receive-pack: command list not terminated by a flush (truncated or length-overrunning pkt-line)");
|
|
264
|
+
const commands = [];
|
|
265
|
+
let caps = [];
|
|
266
|
+
for (const p of packets) {
|
|
267
|
+
if (p.type !== "data") continue;
|
|
268
|
+
let line = p.payload.toString("utf8").replace(/\n$/, "");
|
|
269
|
+
const nul = line.indexOf("\0");
|
|
270
|
+
if (nul >= 0) {
|
|
271
|
+
caps = line.slice(nul + 1).split(" ").filter(Boolean);
|
|
272
|
+
line = line.slice(0, nul);
|
|
273
|
+
}
|
|
274
|
+
const parts = line.split(" ");
|
|
275
|
+
const [oldOid, newOid, ref] = parts;
|
|
276
|
+
if (parts.length !== 3 || !oldOid || !newOid || !ref) throw new GitProtocolError(`receive-pack: malformed command line ${JSON.stringify(line)}`);
|
|
277
|
+
commands.push({
|
|
278
|
+
newOid,
|
|
279
|
+
oldOid,
|
|
280
|
+
ref
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
return {
|
|
284
|
+
caps,
|
|
285
|
+
commands,
|
|
286
|
+
pack: rest
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* report-status: `unpack <status>` then `ok <ref>` / `ng <ref> <reason>` per
|
|
291
|
+
* command, flush. When side-band-64k is negotiated the whole stream rides band 1.
|
|
292
|
+
*/
|
|
293
|
+
function encodeReportStatus(unpack, results, useSideband) {
|
|
294
|
+
const lines = [encodePktLine(Buffer.from(`unpack ${unpack}\n`))];
|
|
295
|
+
for (const r of results) {
|
|
296
|
+
const line = r.ok ? `ok ${r.ref}\n` : `ng ${r.ref} ${r.reason ?? "failed"}\n`;
|
|
297
|
+
lines.push(encodePktLine(Buffer.from(line)));
|
|
298
|
+
}
|
|
299
|
+
lines.push(encodePkt({ type: "flush" }));
|
|
300
|
+
const report = Buffer.concat(lines);
|
|
301
|
+
if (!useSideband) return report;
|
|
302
|
+
return Buffer.concat([encodeSideband(1, report), encodePkt({ type: "flush" })]);
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Handle a receive-pack POST: ingest the pack (if any), then apply the ref
|
|
306
|
+
* commands under CAS — atomically when the client negotiated `atomic` — and
|
|
307
|
+
* report status. A failed unpack fails every ref; an atomic failure ng's every
|
|
308
|
+
* ref (none applied). Non-ff is accepted by default (CAS guards concurrency, not
|
|
309
|
+
* ancestry — spec §3.6).
|
|
310
|
+
*/
|
|
311
|
+
async function handleReceivePack(body, backend) {
|
|
312
|
+
const { commands, caps, pack } = parseReceivePack(body);
|
|
313
|
+
assertSupportedObjectFormat(caps);
|
|
314
|
+
const useSideband = caps.includes("side-band-64k");
|
|
315
|
+
const atomic = caps.includes("atomic");
|
|
316
|
+
const nameTooLong = commands.map((c) => Buffer.byteLength(c.ref, "utf8") > MAX_REF_NAME_BYTES);
|
|
317
|
+
const anyApplicable = nameTooLong.length === 0 || nameTooLong.some((t) => !t);
|
|
318
|
+
let unpackStatus = "ok";
|
|
319
|
+
if (pack.length > 0 && anyApplicable) try {
|
|
320
|
+
await backend.ingest(pack);
|
|
321
|
+
} catch (e) {
|
|
322
|
+
unpackStatus = (e instanceof Error ? e.message : "unpack failed").replace(/\n/g, " ");
|
|
323
|
+
}
|
|
324
|
+
if (unpackStatus !== "ok") {
|
|
325
|
+
const failed = commands.map((c) => ({
|
|
326
|
+
ok: false,
|
|
327
|
+
reason: "unpacker error",
|
|
328
|
+
ref: c.ref
|
|
329
|
+
}));
|
|
330
|
+
return encodeReportStatus(unpackStatus, failed, useSideband);
|
|
331
|
+
}
|
|
332
|
+
const connected = await Promise.all(commands.map((c, i) => nameTooLong[i] || c.newOid === ZERO_OID$1 ? Promise.resolve(true) : backend.isConnected(c.newOid)));
|
|
333
|
+
const reasons = commands.map((_, i) => nameTooLong[i] ? "funny refname (too long to store)" : connected[i] ? null : "missing necessary objects");
|
|
334
|
+
if (atomic && reasons.some((r) => r !== null)) {
|
|
335
|
+
const failed = commands.map((c, i) => ({
|
|
336
|
+
ok: false,
|
|
337
|
+
reason: reasons[i] ?? "atomic transaction failed",
|
|
338
|
+
ref: c.ref
|
|
339
|
+
}));
|
|
340
|
+
return encodeReportStatus(unpackStatus, failed, useSideband);
|
|
341
|
+
}
|
|
342
|
+
const oks = await backend.applyRefUpdates(commands.filter((_, i) => reasons[i] === null), atomic);
|
|
343
|
+
let applied = 0;
|
|
344
|
+
const results = commands.map((c, i) => {
|
|
345
|
+
const reason = reasons[i];
|
|
346
|
+
if (reason !== null) return {
|
|
347
|
+
ok: false,
|
|
348
|
+
reason,
|
|
349
|
+
ref: c.ref
|
|
350
|
+
};
|
|
351
|
+
return oks[applied++] ? {
|
|
352
|
+
ok: true,
|
|
353
|
+
ref: c.ref
|
|
354
|
+
} : {
|
|
355
|
+
ok: false,
|
|
356
|
+
reason: atomic ? "atomic transaction failed" : "stale ref (compare-and-swap failed)",
|
|
357
|
+
ref: c.ref
|
|
358
|
+
};
|
|
359
|
+
});
|
|
360
|
+
for (const [i, c] of commands.entries()) {
|
|
361
|
+
if (!results[i]?.ok) continue;
|
|
362
|
+
try {
|
|
363
|
+
await backend.syncRefSnapshot?.(c.ref, c.newOid);
|
|
364
|
+
} catch (err) {
|
|
365
|
+
console.error(`pggit: snapshot refresh failed for ${c.ref} (the push is already applied):`, err);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
return encodeReportStatus(unpackStatus, results, useSideband);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
//#endregion
|
|
372
|
+
//#region src/protocol/v2.ts
|
|
373
|
+
/**
|
|
374
|
+
* The v2 capability advertisement (GET info/refs body, minus HTTP framing).
|
|
375
|
+
* We advertise ONLY what we honor (spec §4): ls-refs (with `unborn`) and fetch
|
|
376
|
+
* with the `filter` (partial clone) and `include-tag` (auto-follow annotated tags)
|
|
377
|
+
* features. No shallow / ref-in-want — those have no milestone owner and
|
|
378
|
+
* advertising them flips clients onto unimplemented paths.
|
|
379
|
+
*/
|
|
380
|
+
function encodeAdvertisement() {
|
|
381
|
+
const caps = [
|
|
382
|
+
"version 2",
|
|
383
|
+
`agent=${AGENT}`,
|
|
384
|
+
"ls-refs=unborn",
|
|
385
|
+
"fetch=filter include-tag",
|
|
386
|
+
"object-format=sha1"
|
|
387
|
+
];
|
|
388
|
+
return Buffer.concat([...caps.map((c) => encodePktLine(Buffer.from(`${c}\n`))), encodePkt({ type: "flush" })]);
|
|
389
|
+
}
|
|
390
|
+
/** Decode a `command=… <caps> 0001 <args> 0000` v2 request body. */
|
|
391
|
+
function parseV2Request(body) {
|
|
392
|
+
const { packets, rest } = decodePktStream(body);
|
|
393
|
+
if (rest.length > 0) throw new GitProtocolError(`pkt-line: ${rest.length} trailing bytes after the request — incomplete or length-overrunning packet`);
|
|
394
|
+
let command = "";
|
|
395
|
+
const capabilities = [];
|
|
396
|
+
const args = [];
|
|
397
|
+
let afterDelim = false;
|
|
398
|
+
for (const p of packets) {
|
|
399
|
+
if (p.type === "delim") {
|
|
400
|
+
afterDelim = true;
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
if (p.type !== "data") continue;
|
|
404
|
+
const line = p.payload.toString("utf8").replace(/\n$/, "");
|
|
405
|
+
if (afterDelim) args.push(line);
|
|
406
|
+
else if (line.startsWith("command=")) command = line.slice(8);
|
|
407
|
+
else capabilities.push(line);
|
|
408
|
+
}
|
|
409
|
+
return {
|
|
410
|
+
args,
|
|
411
|
+
capabilities,
|
|
412
|
+
command
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
/** Fetch features pggit deliberately does NOT advertise (encodeAdvertisement): a
|
|
416
|
+
* client that drives one anyway must FAIL LOUDLY, never be silently dropped to an
|
|
417
|
+
* empty result (the charter). `ref-in-want` (`want-ref`) and the `shallow`/`deepen`
|
|
418
|
+
* family are the unimplemented ones. */
|
|
419
|
+
const UNSUPPORTED_FETCH_ARG = /^(want-ref|deepen|shallow)\b/;
|
|
420
|
+
const OID = /^[0-9a-f]{40}$/;
|
|
421
|
+
function parseFetch(req) {
|
|
422
|
+
const wants = [];
|
|
423
|
+
const haves = [];
|
|
424
|
+
let done = false;
|
|
425
|
+
let filter;
|
|
426
|
+
let includeTag = false;
|
|
427
|
+
for (const arg of req.args) {
|
|
428
|
+
if (UNSUPPORTED_FETCH_ARG.test(arg)) throw new GitProtocolError(`fetch: unsupported feature ${JSON.stringify(arg.split(" ")[0])} — pggit does not advertise it`);
|
|
429
|
+
if (arg.startsWith("want ")) {
|
|
430
|
+
const oid = arg.slice(5);
|
|
431
|
+
if (!OID.test(oid)) throw new GitProtocolError(`fetch: malformed want object id ${JSON.stringify(oid)}`);
|
|
432
|
+
wants.push(oid);
|
|
433
|
+
} else if (arg.startsWith("have ")) haves.push(arg.slice(5));
|
|
434
|
+
else if (arg.startsWith("filter ")) filter = arg.slice(7);
|
|
435
|
+
else if (arg === "include-tag") includeTag = true;
|
|
436
|
+
else if (arg === "done") done = true;
|
|
437
|
+
}
|
|
438
|
+
return {
|
|
439
|
+
done,
|
|
440
|
+
filter,
|
|
441
|
+
haves,
|
|
442
|
+
includeTag,
|
|
443
|
+
wants
|
|
444
|
+
};
|
|
445
|
+
}
|
|
446
|
+
/** ls-refs response: one line per ref (+ symref-target / peeled), then flush. */
|
|
447
|
+
function encodeLsRefsResponse(entries) {
|
|
448
|
+
const lines = entries.map((e) => {
|
|
449
|
+
let line = "unborn" in e ? `unborn ${e.name}` : `${e.oid} ${e.name}`;
|
|
450
|
+
if (e.symrefTarget) line += ` symref-target:${e.symrefTarget}`;
|
|
451
|
+
if ("peeled" in e && e.peeled) line += ` peeled:${e.peeled}`;
|
|
452
|
+
return encodePktLine(Buffer.from(`${line}\n`));
|
|
453
|
+
});
|
|
454
|
+
return Buffer.concat([...lines, encodePkt({ type: "flush" })]);
|
|
455
|
+
}
|
|
456
|
+
/** The `acknowledgments` section lines: header, ACKs / NAK, optional `ready`. */
|
|
457
|
+
function acknowledgmentLines(common, ready) {
|
|
458
|
+
const lines = [encodePktLine(Buffer.from("acknowledgments\n"))];
|
|
459
|
+
if (common.length === 0 && !ready) lines.push(encodePktLine(Buffer.from("NAK\n")));
|
|
460
|
+
else {
|
|
461
|
+
for (const oid of common) lines.push(encodePktLine(Buffer.from(`ACK ${oid}\n`)));
|
|
462
|
+
if (ready) lines.push(encodePktLine(Buffer.from("ready\n")));
|
|
463
|
+
}
|
|
464
|
+
return Buffer.concat(lines);
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* fetch `acknowledgments` response for a negotiation round that is NOT yet ready
|
|
468
|
+
* (no `done`): the section + flush, no pack. The client sends more haves or
|
|
469
|
+
* `done` (spec §4 shape b).
|
|
470
|
+
*/
|
|
471
|
+
function encodeAcknowledgments(common, ready) {
|
|
472
|
+
return Buffer.concat([acknowledgmentLines(common, ready), encodePkt({ type: "flush" })]);
|
|
473
|
+
}
|
|
474
|
+
/**
|
|
475
|
+
* fetch response when the server becomes `ready` mid-negotiation: the
|
|
476
|
+
* acknowledgments section (with `ready`), a delim-pkt, then the packfile — git
|
|
477
|
+
* requires the pack to follow `ready` in the same response (not a later round).
|
|
478
|
+
*/
|
|
479
|
+
function encodeReadyWithPack(common, pack) {
|
|
480
|
+
return Buffer.concat([
|
|
481
|
+
acknowledgmentLines(common, true),
|
|
482
|
+
encodePkt({ type: "delim" }),
|
|
483
|
+
encodePackfileResponse(pack)
|
|
484
|
+
]);
|
|
485
|
+
}
|
|
486
|
+
/**
|
|
487
|
+
* A v2 error response: a single `ERR <message>` pkt-line. git's packet reader
|
|
488
|
+
* recognizes the `ERR ` prefix and the client dies with `remote error: <message>`
|
|
489
|
+
* — the in-band channel for a request that cannot be served (e.g. a `want` the repo
|
|
490
|
+
* does not have): an HTTP-200 protocol error the client can read, NOT a transport 500.
|
|
491
|
+
*/
|
|
492
|
+
function encodeErr(message) {
|
|
493
|
+
return encodePktLine(Buffer.from(`ERR ${message}\n`));
|
|
494
|
+
}
|
|
495
|
+
/**
|
|
496
|
+
* fetch response for the clone path (client sent `done`, no haves): the
|
|
497
|
+
* `packfile` section header, the pack multiplexed over sideband band-1, flush.
|
|
498
|
+
*/
|
|
499
|
+
function encodePackfileResponse(pack) {
|
|
500
|
+
return Buffer.concat([
|
|
501
|
+
encodePktLine(Buffer.from("packfile\n")),
|
|
502
|
+
encodeSideband(1, pack),
|
|
503
|
+
encodePkt({ type: "flush" })
|
|
504
|
+
]);
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
//#endregion
|
|
508
|
+
//#region src/protocol/upload-pack.ts
|
|
509
|
+
async function handleLsRefs(req, backend) {
|
|
510
|
+
label("ls-refs");
|
|
511
|
+
return withPhase("ref-advertise", async () => {
|
|
512
|
+
const wantPeel = req.args.includes("peel");
|
|
513
|
+
const wantSymrefs = req.args.includes("symrefs");
|
|
514
|
+
const prefixes = req.args.filter((a) => a.startsWith("ref-prefix ")).map((a) => a.slice(11));
|
|
515
|
+
const matches = (name) => prefixes.length === 0 || prefixes.some((p) => name.startsWith(p));
|
|
516
|
+
const refs = await backend.listRefs();
|
|
517
|
+
const byName = new Map(refs.map((r) => [r.name, r.oid]));
|
|
518
|
+
const entries = [];
|
|
519
|
+
const wantUnborn = req.args.includes("unborn");
|
|
520
|
+
const headTarget = await backend.getSymref("HEAD");
|
|
521
|
+
if (headTarget && matches("HEAD")) {
|
|
522
|
+
const headOid = byName.get(headTarget);
|
|
523
|
+
if (headOid) entries.push({
|
|
524
|
+
name: "HEAD",
|
|
525
|
+
oid: headOid,
|
|
526
|
+
symrefTarget: wantSymrefs ? headTarget : void 0
|
|
527
|
+
});
|
|
528
|
+
else if (wantUnborn && wantSymrefs) entries.push({
|
|
529
|
+
name: "HEAD",
|
|
530
|
+
symrefTarget: headTarget,
|
|
531
|
+
unborn: true
|
|
532
|
+
});
|
|
533
|
+
}
|
|
534
|
+
for (const ref of refs) {
|
|
535
|
+
if (!matches(ref.name)) continue;
|
|
536
|
+
const entry = {
|
|
537
|
+
name: ref.name,
|
|
538
|
+
oid: ref.oid
|
|
539
|
+
};
|
|
540
|
+
if (wantPeel && ref.peeled) entry.peeled = ref.peeled;
|
|
541
|
+
entries.push(entry);
|
|
542
|
+
}
|
|
543
|
+
return encodeLsRefsResponse(entries);
|
|
544
|
+
});
|
|
545
|
+
}
|
|
546
|
+
/**
|
|
547
|
+
* Translate the wire filter spec to a walk option. We optimize the common
|
|
548
|
+
* `blob:none` (blobless partial clone) by omitting blobs; any other filter
|
|
549
|
+
* (`tree:0`, `blob:limit=…`, …) serves the FULL closure. The protocol lets a
|
|
550
|
+
* server send more than a filter requests — the client accepts the superset and
|
|
551
|
+
* has nothing to lazily fetch — so over-serving completes the clone that a hard
|
|
552
|
+
* rejection would abort, without implementing every filter spec.
|
|
553
|
+
*/
|
|
554
|
+
function filterOmitsBlobs(filter) {
|
|
555
|
+
return filter === "blob:none";
|
|
556
|
+
}
|
|
557
|
+
async function handleFetch(req, backend) {
|
|
558
|
+
label("fetch");
|
|
559
|
+
const { wants, haves, done, filter, includeTag } = parseFetch(req);
|
|
560
|
+
const omitBlobs = filterOmitsBlobs(filter);
|
|
561
|
+
const common = await backend.commonHaves(haves);
|
|
562
|
+
try {
|
|
563
|
+
if (!done) {
|
|
564
|
+
if (!await backend.readyToGiveUp(wants, common)) return encodeAcknowledgments(common, false);
|
|
565
|
+
return encodeReadyWithPack(common, await backend.buildPack(wants, common, omitBlobs, includeTag));
|
|
566
|
+
}
|
|
567
|
+
return encodePackfileResponse(await backend.buildPack(wants, common, omitBlobs, includeTag));
|
|
568
|
+
} catch (err) {
|
|
569
|
+
if (err instanceof WantNotFoundError) return encodeErr(err.message);
|
|
570
|
+
throw err;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
/** Dispatch a v2 upload-pack POST body to ls-refs or fetch. */
|
|
574
|
+
async function handleUploadPack(body, backend) {
|
|
575
|
+
const req = parseV2Request(body);
|
|
576
|
+
assertSupportedObjectFormat(req.capabilities);
|
|
577
|
+
if (req.command === "ls-refs") return handleLsRefs(req, backend);
|
|
578
|
+
if (req.command === "fetch") return handleFetch(req, backend);
|
|
579
|
+
throw new GitProtocolError(`upload-pack: unsupported command ${JSON.stringify(req.command)}`);
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
//#endregion
|
|
583
|
+
//#region src/object/format-error.ts
|
|
584
|
+
var GitFormatError = class extends Error {
|
|
585
|
+
code;
|
|
586
|
+
constructor(code, message) {
|
|
587
|
+
super(message);
|
|
588
|
+
this.name = "GitFormatError";
|
|
589
|
+
this.code = code;
|
|
590
|
+
}
|
|
591
|
+
};
|
|
592
|
+
|
|
593
|
+
//#endregion
|
|
594
|
+
//#region src/object/object.ts
|
|
595
|
+
/** OIDs in the leading `key <oid>` headers (up to the blank line) for given keys. */
|
|
596
|
+
function headerOids(content, keys) {
|
|
597
|
+
const oids = [];
|
|
598
|
+
for (const line of content.toString("latin1").split("\n")) {
|
|
599
|
+
if (line === "") break;
|
|
600
|
+
const sp = line.indexOf(" ");
|
|
601
|
+
if (sp > 0 && keys.has(line.slice(0, sp))) oids.push(line.slice(sp + 1));
|
|
602
|
+
}
|
|
603
|
+
return oids;
|
|
604
|
+
}
|
|
605
|
+
/** A tree's entries — `<mode> <name>\0<20-byte oid>` repeated. */
|
|
606
|
+
function treeEntries(content) {
|
|
607
|
+
const entries = [];
|
|
608
|
+
let pos = 0;
|
|
609
|
+
while (pos < content.length) {
|
|
610
|
+
const space = content.indexOf(32, pos);
|
|
611
|
+
const nul = content.indexOf(0, pos);
|
|
612
|
+
if (space < 0 || nul < 0 || space > nul || nul + 21 > content.length) throw new GitFormatError("malformed-tree", `tree: malformed entry at offset ${pos}`);
|
|
613
|
+
const mode = content.subarray(pos, space).toString("latin1");
|
|
614
|
+
const name = content.subarray(space + 1, nul).toString("utf8");
|
|
615
|
+
const oid = content.subarray(nul + 1, nul + 21).toString("hex");
|
|
616
|
+
entries.push({
|
|
617
|
+
mode,
|
|
618
|
+
name,
|
|
619
|
+
oid
|
|
620
|
+
});
|
|
621
|
+
pos = nul + 21;
|
|
622
|
+
}
|
|
623
|
+
return entries;
|
|
624
|
+
}
|
|
625
|
+
/** A tree entry's mode marks a subtree (directory), not a blob or gitlink. */
|
|
626
|
+
function isTreeEntryMode(mode) {
|
|
627
|
+
return mode === "40000";
|
|
628
|
+
}
|
|
629
|
+
/** A commit's root tree OID. Every commit has exactly one `tree` header. */
|
|
630
|
+
function commitTreeOid(content) {
|
|
631
|
+
const [tree] = headerOids(content, /* @__PURE__ */ new Set(["tree"]));
|
|
632
|
+
if (!tree) throw new GitFormatError("missing-tree-header", "commitTreeOid: commit has no tree header");
|
|
633
|
+
return tree;
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
//#endregion
|
|
637
|
+
//#region src/repo-view/build-file-list.ts
|
|
638
|
+
/** Gitlink/submodule entries point at a commit in another repo — no blob here. */
|
|
639
|
+
const GITLINK_MODE$1 = "160000";
|
|
640
|
+
/**
|
|
641
|
+
* The flat path→blob index of a commit's tree (the `git ls-tree -r` of a commit,
|
|
642
|
+
* read straight from the object store): one FileEntry per blob — full path from the
|
|
643
|
+
* root, raw mode, blob oid. Subtrees are recursed; gitlinks (submodules) are skipped
|
|
644
|
+
* (no blob in this repo). Blob CONTENT is NOT read — it lives in git_object and is
|
|
645
|
+
* joined at query time (§4.5 collapse), so this walk touches only commits + trees.
|
|
646
|
+
*/
|
|
647
|
+
async function buildFileList(read, commitOid) {
|
|
648
|
+
const commit = await read(commitOid);
|
|
649
|
+
const files = [];
|
|
650
|
+
const walk = async (treeOid, prefix) => {
|
|
651
|
+
const tree = await read(treeOid);
|
|
652
|
+
for (const entry of treeEntries(tree.content)) {
|
|
653
|
+
const path = prefix + entry.name;
|
|
654
|
+
if (isTreeEntryMode(entry.mode)) await walk(entry.oid, `${path}/`);
|
|
655
|
+
else if (entry.mode !== GITLINK_MODE$1) files.push({
|
|
656
|
+
blobOid: entry.oid,
|
|
657
|
+
mode: entry.mode,
|
|
658
|
+
path
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
};
|
|
662
|
+
await walk(commitTreeOid(commit.content), "");
|
|
663
|
+
return { files };
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
//#endregion
|
|
667
|
+
//#region src/repo-view/config.ts
|
|
668
|
+
/**
|
|
669
|
+
* Which refs get a queryable file snapshot. Branches only — tags, notes, and
|
|
670
|
+
* `refs/pull/*` are skipped. One edit to widen the projection later.
|
|
671
|
+
*/
|
|
672
|
+
const SNAPSHOT_REFS = (refName) => refName.startsWith("refs/heads/");
|
|
673
|
+
|
|
674
|
+
//#endregion
|
|
675
|
+
//#region src/repo-view/rebuild.ts
|
|
676
|
+
const ZERO_OID = "0".repeat(40);
|
|
677
|
+
/**
|
|
678
|
+
* Refresh `refName`'s file snapshot after a push applied it. Non-branch refs are
|
|
679
|
+
* ignored (§ SNAPSHOT_REFS); a delete (zero oid) drops the snapshot; otherwise
|
|
680
|
+
* the new tip's tree is walked — objects are already present post-ingest — into a
|
|
681
|
+
* fresh snapshot. Runs after the push commits, so a failure here never rolls back
|
|
682
|
+
* the git operation (the projection is rebuildable from the packs).
|
|
683
|
+
*/
|
|
684
|
+
async function syncRefSnapshot(deps, repoId, refName, newOid) {
|
|
685
|
+
if (!SNAPSHOT_REFS(refName)) return;
|
|
686
|
+
if (newOid === ZERO_OID) {
|
|
687
|
+
await deps.snapshots.dropRefSnapshot(repoId, refName);
|
|
688
|
+
return;
|
|
689
|
+
}
|
|
690
|
+
const read = async (oid) => {
|
|
691
|
+
const obj = await deps.objects.getObject(repoId, oid);
|
|
692
|
+
if (!obj) throw new Error(`repo-view: object ${oid} missing while building ${refName}`);
|
|
693
|
+
return obj;
|
|
694
|
+
};
|
|
695
|
+
await deps.snapshots.rebuildRefSnapshot(repoId, refName, await buildFileList(read, newOid));
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
//#endregion
|
|
699
|
+
//#region src/database/postgres.ts
|
|
700
|
+
const EVENT_SIGNS = {
|
|
701
|
+
error: "🔴",
|
|
702
|
+
query: "🟢"
|
|
703
|
+
};
|
|
704
|
+
/** Wrap a porsager client in a typed Kysely. Dev builds log query/error events. */
|
|
705
|
+
function initKysely(pg) {
|
|
706
|
+
return new Kysely({
|
|
707
|
+
dialect: new PostgresJSDialect({ postgres: pg }),
|
|
708
|
+
log(event) {
|
|
709
|
+
if (event.level === "query" || event.level === "error") {
|
|
710
|
+
recordQuery(event.query.sql, event.queryDurationMillis);
|
|
711
|
+
if (process.env.NODE_ENV === "development") console.debug(`${EVENT_SIGNS[event.level]} ${event.queryDurationMillis}ms ${event.query.sql}`);
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
});
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
//#endregion
|
|
718
|
+
//#region src/database/copy-insert.ts
|
|
719
|
+
const HEADER = Buffer.concat([Buffer.from([
|
|
720
|
+
80,
|
|
721
|
+
71,
|
|
722
|
+
67,
|
|
723
|
+
79,
|
|
724
|
+
80,
|
|
725
|
+
89,
|
|
726
|
+
10,
|
|
727
|
+
255,
|
|
728
|
+
13,
|
|
729
|
+
10,
|
|
730
|
+
0
|
|
731
|
+
]), Buffer.alloc(8)]);
|
|
732
|
+
const TRAILER = (() => {
|
|
733
|
+
const b = Buffer.alloc(2);
|
|
734
|
+
b.writeInt16BE(-1);
|
|
735
|
+
return b;
|
|
736
|
+
})();
|
|
737
|
+
function encodeValue(field) {
|
|
738
|
+
switch (field.t) {
|
|
739
|
+
case "int2": {
|
|
740
|
+
const b = Buffer.alloc(2);
|
|
741
|
+
b.writeInt16BE(field.v);
|
|
742
|
+
return b;
|
|
743
|
+
}
|
|
744
|
+
case "int4": {
|
|
745
|
+
const b = Buffer.alloc(4);
|
|
746
|
+
b.writeInt32BE(field.v);
|
|
747
|
+
return b;
|
|
748
|
+
}
|
|
749
|
+
case "int8": {
|
|
750
|
+
const b = Buffer.alloc(8);
|
|
751
|
+
b.writeBigInt64BE(BigInt(field.v));
|
|
752
|
+
return b;
|
|
753
|
+
}
|
|
754
|
+
case "bytea": return field.v;
|
|
755
|
+
case "text": return Buffer.from(field.v, "utf8");
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
/** Encode rows as one PGCOPY binary payload: header, then per row a field count
|
|
759
|
+
* and each field as `<int32 length><raw bytes>`, then the trailer. */
|
|
760
|
+
function encodeBinaryCopy(rows) {
|
|
761
|
+
const parts = [HEADER];
|
|
762
|
+
for (const row of rows) {
|
|
763
|
+
const fieldCount = Buffer.alloc(2);
|
|
764
|
+
fieldCount.writeInt16BE(row.length);
|
|
765
|
+
parts.push(fieldCount);
|
|
766
|
+
for (const field of row) {
|
|
767
|
+
const value = encodeValue(field);
|
|
768
|
+
const len = Buffer.alloc(4);
|
|
769
|
+
len.writeInt32BE(value.length);
|
|
770
|
+
parts.push(len, value);
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
parts.push(TRAILER);
|
|
774
|
+
return Buffer.concat(parts);
|
|
775
|
+
}
|
|
776
|
+
/**
|
|
777
|
+
* COPY `rows` into `target` (a temp staging table shaped from `target`'s columns,
|
|
778
|
+
* then `INSERT … SELECT … ON CONFLICT DO NOTHING`). `tx` must be a
|
|
779
|
+
* transaction-scoped porsager `Sql`. `target` and `columns` are internal constants
|
|
780
|
+
* (never client input), interpolated as SQL identifiers.
|
|
781
|
+
*/
|
|
782
|
+
async function copyInsert(tx, target, columns, rows) {
|
|
783
|
+
if (rows.length === 0) return;
|
|
784
|
+
const cols = columns.join(", ");
|
|
785
|
+
const staging = `copy_stg_${target}`;
|
|
786
|
+
await tx.unsafe(`create temp table ${staging} on commit drop as select ${cols} from ${target} with no data`);
|
|
787
|
+
const writable = await tx`copy ${tx(staging)} (${tx.unsafe(cols)}) from stdin (format binary)`.writable();
|
|
788
|
+
await new Promise((resolve, reject) => {
|
|
789
|
+
writable.on("error", reject);
|
|
790
|
+
writable.on("finish", () => resolve());
|
|
791
|
+
writable.write(encodeBinaryCopy(rows), (err) => {
|
|
792
|
+
if (err) reject(err);
|
|
793
|
+
else writable.end();
|
|
794
|
+
});
|
|
795
|
+
});
|
|
796
|
+
await tx.unsafe(`insert into ${target} (${cols}) select ${cols} from ${staging} on conflict do nothing`);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
//#endregion
|
|
800
|
+
//#region src/object/edges.ts
|
|
801
|
+
/** A tree entry pointing at a commit in *another* repo — no blob, no edge here. */
|
|
802
|
+
const GITLINK_MODE = "160000";
|
|
803
|
+
/**
|
|
804
|
+
* The blob OIDs directly in a tree — the §4.3 standing rule's other half: blobs
|
|
805
|
+
* are enumerated from tree content, never stored as edges. A tree entry is a blob
|
|
806
|
+
* unless it is a subtree (`deriveEdges` covers those as kind-3 edges) or a gitlink
|
|
807
|
+
* (`160000`, a submodule commit living in another repo — neither blob nor edge).
|
|
808
|
+
* Connectivity uses this to find the blobs a present tree requires, since no
|
|
809
|
+
* tree→blob edge exists to anchor a missing one.
|
|
810
|
+
*/
|
|
811
|
+
function treeBlobOids(content) {
|
|
812
|
+
return treeEntries(content).filter((e) => !isTreeEntryMode(e.mode) && e.mode !== GITLINK_MODE).map((e) => e.oid);
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
//#endregion
|
|
816
|
+
//#region src/pack/object-header.ts
|
|
817
|
+
/**
|
|
818
|
+
* Pack object header: a variable-length encoding of (type, uncompressed size)
|
|
819
|
+
* that prefixes every object entry in a packfile.
|
|
820
|
+
*
|
|
821
|
+
* First byte: `[c|ttt|ssss]` — continuation bit `c`, 3-bit type `ttt`, low 4 bits
|
|
822
|
+
* of size. Each continuation byte contributes 7 more size bits, least-significant
|
|
823
|
+
* group first. See gitformat-pack.
|
|
824
|
+
*
|
|
825
|
+
* Size arithmetic uses `*`/`Math.floor`, NOT `<<`/`>>` — JS bitwise ops are
|
|
826
|
+
* 32-bit and would corrupt object sizes ≥ 2³¹.
|
|
827
|
+
*/
|
|
828
|
+
const PACK_OBJ_TYPE = {
|
|
829
|
+
BLOB: 3,
|
|
830
|
+
COMMIT: 1,
|
|
831
|
+
OFS_DELTA: 6,
|
|
832
|
+
REF_DELTA: 7,
|
|
833
|
+
TAG: 4,
|
|
834
|
+
TREE: 2
|
|
835
|
+
};
|
|
836
|
+
|
|
837
|
+
//#endregion
|
|
838
|
+
//#region src/store/reachability.ts
|
|
839
|
+
/** Objects looked up per round-trip when chunking tree/blob existence queries. */
|
|
840
|
+
const LOOKUP_BATCH = 1e3;
|
|
841
|
+
/** Split `items` into consecutive batches of at most `size`. */
|
|
842
|
+
function batches(items, size) {
|
|
843
|
+
const out = [];
|
|
844
|
+
for (let i = 0; i < items.length; i += size) out.push(items.slice(i, i + size));
|
|
845
|
+
return out;
|
|
846
|
+
}
|
|
847
|
+
/**
|
|
848
|
+
* The objects reachable from `roots` over the stored DAG — the ONE reachability
|
|
849
|
+
* engine shared by connectivity, clone, and incremental fetch (so they can never
|
|
850
|
+
* disagree). A recursive CTE walks `git_edge` (all stored kinds 1,2,3,5) for the
|
|
851
|
+
* commit/tree/tag closure; the LEFT JOIN marks which are present. Blobs are not
|
|
852
|
+
* edges (§4.3), so unless `omitBlobs` they are enumerated from each present tree's
|
|
853
|
+
* content (mode-aware) and their presence checked. Returns the reachable set
|
|
854
|
+
* partitioned into present / missing. `::bigint`/`::bytea` casts and the
|
|
855
|
+
* `VALUES (…::bytea)` seed pin types in the raw CTE (the porsager driver can't
|
|
856
|
+
* bind a raw `bytea[]`, OQ-13); array lookups use Kysely's `in`-expansion.
|
|
857
|
+
*/
|
|
858
|
+
async function reachableClosure(db, id, roots, omitBlobs) {
|
|
859
|
+
const present = /* @__PURE__ */ new Set();
|
|
860
|
+
const missing = /* @__PURE__ */ new Set();
|
|
861
|
+
if (roots.length === 0) return {
|
|
862
|
+
missing,
|
|
863
|
+
present
|
|
864
|
+
};
|
|
865
|
+
const closure = await sql`
|
|
866
|
+
with recursive closure(oid) as (
|
|
867
|
+
select oid from (values ${sql.join(roots.map((r) => sql`(${Buffer.from(r, "hex")}::bytea)`))}) as roots(oid)
|
|
868
|
+
union
|
|
869
|
+
select e.child from git_edge e
|
|
870
|
+
join closure c on e.parent = c.oid
|
|
871
|
+
where e.repo_id = ${id}::bigint
|
|
872
|
+
)
|
|
873
|
+
select c.oid, o.type
|
|
874
|
+
from closure c
|
|
875
|
+
left join git_object o on o.repo_id = ${id}::bigint and o.oid = c.oid
|
|
876
|
+
`.execute(db);
|
|
877
|
+
const treeOids = [];
|
|
878
|
+
for (const r of closure.rows) {
|
|
879
|
+
const hex = r.oid.toString("hex");
|
|
880
|
+
if (r.type === null) missing.add(hex);
|
|
881
|
+
else {
|
|
882
|
+
present.add(hex);
|
|
883
|
+
if (r.type === PACK_OBJ_TYPE.TREE) treeOids.push(r.oid);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
if (omitBlobs || treeOids.length === 0) return {
|
|
887
|
+
missing,
|
|
888
|
+
present
|
|
889
|
+
};
|
|
890
|
+
const blobCandidates = /* @__PURE__ */ new Set();
|
|
891
|
+
for (const batch of batches(treeOids, LOOKUP_BATCH)) {
|
|
892
|
+
const trees = await db.selectFrom("git_object").select("content").where("repo_id", "=", id).where("oid", "in", batch).execute();
|
|
893
|
+
for (const t of trees) for (const blob of treeBlobOids(t.content)) blobCandidates.add(blob);
|
|
894
|
+
}
|
|
895
|
+
if (blobCandidates.size === 0) return {
|
|
896
|
+
missing,
|
|
897
|
+
present
|
|
898
|
+
};
|
|
899
|
+
const presentBlobs = /* @__PURE__ */ new Set();
|
|
900
|
+
for (const batch of batches([...blobCandidates], LOOKUP_BATCH)) {
|
|
901
|
+
const rows = await db.selectFrom("git_object").select("oid").where("repo_id", "=", id).where("oid", "in", batch.map((h) => Buffer.from(h, "hex"))).execute();
|
|
902
|
+
for (const r of rows) presentBlobs.add(r.oid.toString("hex"));
|
|
903
|
+
}
|
|
904
|
+
for (const b of blobCandidates) (presentBlobs.has(b) ? present : missing).add(b);
|
|
905
|
+
return {
|
|
906
|
+
missing,
|
|
907
|
+
present
|
|
908
|
+
};
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
//#endregion
|
|
912
|
+
//#region src/store/repo-resolver.ts
|
|
913
|
+
/**
|
|
914
|
+
* Resolves a wire repo name to its `repos.id` surrogate, memoized. The object and
|
|
915
|
+
* ref stores both key on the bigint `repo_id`, so each builds one of these as its
|
|
916
|
+
* name→id boundary.
|
|
917
|
+
*
|
|
918
|
+
* The mapping is immutable once a repo exists (ids are `generated always`, names
|
|
919
|
+
* are unique), so a found id is cached for the resolver's lifetime — keeping the
|
|
920
|
+
* per-object hot path (getObject) at one point-read, not a join. Misses are NEVER
|
|
921
|
+
* cached: a name the lookup didn't find may be created by a later push, and a
|
|
922
|
+
* cached `null` would mask it.
|
|
923
|
+
*
|
|
924
|
+
* Reads resolve (lookup; `null` ⇒ the repo has never been written, i.e. empty).
|
|
925
|
+
* Writes ensure (race-safe get-or-create).
|
|
926
|
+
*/
|
|
927
|
+
function createRepoResolver(db) {
|
|
928
|
+
const cache = /* @__PURE__ */ new Map();
|
|
929
|
+
return {
|
|
930
|
+
/** The repo's id, creating the row if absent. Race-safe under concurrent
|
|
931
|
+
* first-pushes, and avoids a no-op UPDATE on the common (exists) path. */
|
|
932
|
+
async ensureRepoId(name) {
|
|
933
|
+
const cached = cache.get(name);
|
|
934
|
+
if (cached !== void 0) return cached;
|
|
935
|
+
const existing = await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirst();
|
|
936
|
+
if (existing) {
|
|
937
|
+
cache.set(name, existing.id);
|
|
938
|
+
return existing.id;
|
|
939
|
+
}
|
|
940
|
+
const id = (await db.insertInto("repos").values({ name }).onConflict((oc) => oc.doNothing()).returning("id").executeTakeFirst())?.id ?? (await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirstOrThrow()).id;
|
|
941
|
+
cache.set(name, id);
|
|
942
|
+
return id;
|
|
943
|
+
},
|
|
944
|
+
/** The repo's id, or `null` if it has never been written to. */
|
|
945
|
+
async resolveRepoId(name) {
|
|
946
|
+
const cached = cache.get(name);
|
|
947
|
+
if (cached !== void 0) return cached;
|
|
948
|
+
const row = await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirst();
|
|
949
|
+
if (!row) return null;
|
|
950
|
+
cache.set(name, row.id);
|
|
951
|
+
return row.id;
|
|
952
|
+
}
|
|
953
|
+
};
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
//#endregion
|
|
957
|
+
//#region src/store/gc.ts
|
|
958
|
+
/** Default per-batch DELETE cap when the caller omits `batchLimit`. Large enough to
|
|
959
|
+
* sweep a typical force-commit orphan set in one or two batches, small enough to
|
|
960
|
+
* bound the dead-tuple burst and lock duration per transaction (§7). */
|
|
961
|
+
const DEFAULT_BATCH_LIMIT = 1e4;
|
|
962
|
+
/** OIDs loaded per COPY round-trip into the live table (the live set can be the whole
|
|
963
|
+
* reachable tree, so it streams in bounded batches, never one giant payload). */
|
|
964
|
+
const LIVE_LOAD_BATCH = 1e4;
|
|
965
|
+
/**
|
|
966
|
+
* Build the GC over a porsager client (the same wire→DB boundary the object and ref
|
|
967
|
+
* stores take). `gc(repo, opts)` reclaims a single repo's unreachable-and-old-enough
|
|
968
|
+
* objects offline; reachable objects are always retained.
|
|
969
|
+
*/
|
|
970
|
+
function createGc(pg) {
|
|
971
|
+
const repos = createRepoResolver(initKysely(pg));
|
|
972
|
+
return { async gc(repo, opts) {
|
|
973
|
+
const id = await repos.resolveRepoId(repo);
|
|
974
|
+
if (id === null) return {
|
|
975
|
+
deletedEdges: 0,
|
|
976
|
+
deletedObjects: 0
|
|
977
|
+
};
|
|
978
|
+
const batchLimit = opts.batchLimit ?? DEFAULT_BATCH_LIMIT;
|
|
979
|
+
const live = `gc_live_${id}`;
|
|
980
|
+
await pg.unsafe(`create unlogged table if not exists ${live} (oid bytea primary key)`);
|
|
981
|
+
try {
|
|
982
|
+
await pg.unsafe(`truncate ${live}`);
|
|
983
|
+
await loadLive(live, await liveSet(id));
|
|
984
|
+
await opts._hooks?.afterLiveSet?.();
|
|
985
|
+
const deletedObjects = await sweepObjects(id, live, opts.graceSeconds, batchLimit);
|
|
986
|
+
const deletedEdges = await sweepEdges(id, batchLimit);
|
|
987
|
+
if (opts.maintain !== false && deletedObjects + deletedEdges > 0) await maintain();
|
|
988
|
+
return {
|
|
989
|
+
deletedEdges,
|
|
990
|
+
deletedObjects
|
|
991
|
+
};
|
|
992
|
+
} finally {
|
|
993
|
+
await pg.unsafe(`drop table if exists ${live}`);
|
|
994
|
+
}
|
|
995
|
+
} };
|
|
996
|
+
/**
|
|
997
|
+
* The live set: the reachable closure from every ref tip, read under ONE
|
|
998
|
+
* REPEATABLE READ snapshot so the ref-tip read and the multi-statement closure
|
|
999
|
+
* walk cannot interleave with a concurrent push's ref update (§5 defense (a)).
|
|
1000
|
+
*
|
|
1001
|
+
* `reachableClosure` is the shared engine and takes a `Kysely`, but the
|
|
1002
|
+
* kysely-postgres-js dialect drives queries by calling `.reserve()` on its
|
|
1003
|
+
* `postgres` client for EACH query — so a plain pooled Kysely would scatter the
|
|
1004
|
+
* closure's statements across connections (no shared snapshot), and a
|
|
1005
|
+
* transaction-scoped `Sql` has no `.reserve()` at all. So we pin ONE porsager
|
|
1006
|
+
* connection, open a REPEATABLE READ transaction on it, and back a Kysely with a
|
|
1007
|
+
* shim whose `reserve()` always returns that pinned connection with a no-op
|
|
1008
|
+
* `release()` — every closure statement then runs on the one snapshotted
|
|
1009
|
+
* connection. The transaction is read-only; it commits (releasing the snapshot)
|
|
1010
|
+
* before the sweep's own short write transactions begin.
|
|
1011
|
+
*/
|
|
1012
|
+
async function liveSet(id) {
|
|
1013
|
+
const conn = await pg.reserve();
|
|
1014
|
+
try {
|
|
1015
|
+
await conn`begin isolation level repeatable read`;
|
|
1016
|
+
const pinned = pinnedKysely(conn);
|
|
1017
|
+
const rows = await conn`
|
|
1018
|
+
select oid, peeled_oid from git_ref where repo_id = ${id} and oid is not null
|
|
1019
|
+
`;
|
|
1020
|
+
const tips = /* @__PURE__ */ new Set();
|
|
1021
|
+
for (const r of rows) {
|
|
1022
|
+
if (r.oid) tips.add(r.oid.toString("hex"));
|
|
1023
|
+
if (r.peeled_oid) tips.add(r.peeled_oid.toString("hex"));
|
|
1024
|
+
}
|
|
1025
|
+
const { present } = await reachableClosure(pinned, id, [...tips], false);
|
|
1026
|
+
await conn`commit`;
|
|
1027
|
+
return present;
|
|
1028
|
+
} finally {
|
|
1029
|
+
conn.release();
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
/** A Kysely pinned to a single porsager connection: its dialect `reserve()`s the
|
|
1033
|
+
* same connection for every statement (so a multi-statement read shares one MVCC
|
|
1034
|
+
* snapshot) and `release()` is a no-op (the caller owns the connection's lifetime).
|
|
1035
|
+
* The shim is a callable with a `reserve` property, the shape the dialect probes
|
|
1036
|
+
* for (`isPostgresJSSql`). */
|
|
1037
|
+
function pinnedKysely(conn) {
|
|
1038
|
+
const nonReleasing = new Proxy(conn, { get: (target, prop) => prop === "release" ? () => {} : Reflect.get(target, prop, target) });
|
|
1039
|
+
return initKysely(Object.assign(() => {
|
|
1040
|
+
throw new Error("pggit gc: pinned client used as a tagged template");
|
|
1041
|
+
}, { reserve: async () => nonReleasing }));
|
|
1042
|
+
}
|
|
1043
|
+
/** Bulk-load the live OID set into the UNLOGGED `live` table via binary COPY (the
|
|
1044
|
+
* one bytea-safe bulk path, copy-insert.ts), batched so the payload stays bounded.
|
|
1045
|
+
* Each COPY runs in its own transaction so the staging temp table drops on commit. */
|
|
1046
|
+
async function loadLive(live, oids) {
|
|
1047
|
+
if (oids.size === 0) return;
|
|
1048
|
+
const all = [...oids];
|
|
1049
|
+
for (let i = 0; i < all.length; i += LIVE_LOAD_BATCH) {
|
|
1050
|
+
const chunk = all.slice(i, i + LIVE_LOAD_BATCH);
|
|
1051
|
+
await pg.begin(async (tx) => {
|
|
1052
|
+
await copyInsert(tx, live, ["oid"], chunk.map((hex) => [{
|
|
1053
|
+
t: "bytea",
|
|
1054
|
+
v: Buffer.from(hex, "hex")
|
|
1055
|
+
}]));
|
|
1056
|
+
});
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
/** Batched object sweep. Postgres `DELETE` has no `LIMIT`, so each batch picks a
|
|
1060
|
+
* `LIMIT`-bounded set of victim OIDs then deletes them by PRIMARY KEY `(repo_id,
|
|
1061
|
+
* oid)`. The match is on the PK — NOT `ctid`: `ctid` is per-partition-relative, so
|
|
1062
|
+
* matching `ctid` across the HASH-partitioned table would delete same-ctid rows in
|
|
1063
|
+
* OTHER partitions (other tenants). The loop ends when a batch deletes nothing.
|
|
1064
|
+
* Each batch is its own (implicit) transaction, so `clock_timestamp()` re-evaluates
|
|
1065
|
+
* per batch and the grace cutoff advances. Returns total rows deleted. */
|
|
1066
|
+
async function sweepObjects(id, live, graceSeconds, batchLimit) {
|
|
1067
|
+
let total = 0;
|
|
1068
|
+
for (;;) {
|
|
1069
|
+
const deleted = await pg.unsafe(`with victims as (
|
|
1070
|
+
select o.oid from git_object o
|
|
1071
|
+
where o.repo_id = $1::bigint
|
|
1072
|
+
and not exists (select 1 from ${live} l where l.oid = o.oid)
|
|
1073
|
+
and o.created_at < clock_timestamp() - make_interval(secs => $2::float8)
|
|
1074
|
+
limit $3::int
|
|
1075
|
+
)
|
|
1076
|
+
delete from git_object o using victims v
|
|
1077
|
+
where o.repo_id = $1::bigint and o.oid = v.oid returning 1 as n`, [
|
|
1078
|
+
String(id),
|
|
1079
|
+
String(graceSeconds),
|
|
1080
|
+
String(batchLimit)
|
|
1081
|
+
]);
|
|
1082
|
+
if (deleted.length === 0) break;
|
|
1083
|
+
total += deleted.length;
|
|
1084
|
+
}
|
|
1085
|
+
return total;
|
|
1086
|
+
}
|
|
1087
|
+
/** Batched edge sweep: delete every `git_edge` row whose PARENT object no longer
|
|
1088
|
+
* exists in `git_object` (a deleted object's outgoing edges). No FK cascade exists
|
|
1089
|
+
* (0003_git_edge.ts), so dangling edges must be swept explicitly. Anti-join on the
|
|
1090
|
+
* parent only: a surviving parent is reachable, so all its children are reachable
|
|
1091
|
+
* and present — its edges never dangle. Like the object sweep, each batch picks a
|
|
1092
|
+
* `LIMIT`-bounded victim set then deletes by PRIMARY KEY `(repo_id, parent, child)`
|
|
1093
|
+
* — never `ctid`, which is per-partition and would reach into other tenants. */
|
|
1094
|
+
async function sweepEdges(id, batchLimit) {
|
|
1095
|
+
let total = 0;
|
|
1096
|
+
for (;;) {
|
|
1097
|
+
const deleted = await pg.unsafe(`with victims as (
|
|
1098
|
+
select e.parent, e.child from git_edge e
|
|
1099
|
+
where e.repo_id = $1::bigint
|
|
1100
|
+
and not exists (
|
|
1101
|
+
select 1 from git_object o where o.repo_id = e.repo_id and o.oid = e.parent
|
|
1102
|
+
)
|
|
1103
|
+
limit $2::int
|
|
1104
|
+
)
|
|
1105
|
+
delete from git_edge e using victims v
|
|
1106
|
+
where e.repo_id = $1::bigint and e.parent = v.parent and e.child = v.child
|
|
1107
|
+
returning 1 as n`, [String(id), String(batchLimit)]);
|
|
1108
|
+
if (deleted.length === 0) break;
|
|
1109
|
+
total += deleted.length;
|
|
1110
|
+
}
|
|
1111
|
+
return total;
|
|
1112
|
+
}
|
|
1113
|
+
/** Post-sweep maintenance (best-effort): reclaim the dead tuples GC produced in
|
|
1114
|
+
* the heap + TOAST and refresh planner stats, then reindex the walk index.
|
|
1115
|
+
* `VACUUM` cannot run inside a transaction block, so these are standalone
|
|
1116
|
+
* statements run outside any txn. */
|
|
1117
|
+
async function maintain() {
|
|
1118
|
+
await pg.unsafe(`vacuum (analyze) git_object`);
|
|
1119
|
+
await pg.unsafe(`vacuum (analyze) git_edge`);
|
|
1120
|
+
await pg.unsafe(`reindex index git_edge_walk`);
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
|
|
1124
|
+
//#endregion
|
|
1125
|
+
//#region src/gc-scheduler.ts
|
|
1126
|
+
/**
|
|
1127
|
+
* Build the GC scheduler over a porsager client (the same wire→DB boundary the
|
|
1128
|
+
* stores take). `drainOnce()` runs one poll+sweep pass; `start()`/`stop()` drive
|
|
1129
|
+
* it on `intervalMs`. Reachable objects are never touched — it only invokes the
|
|
1130
|
+
* per-repo GC primitive, which is reachability-safe.
|
|
1131
|
+
*/
|
|
1132
|
+
function createGcScheduler(pg, opts) {
|
|
1133
|
+
const gc = createGc(pg);
|
|
1134
|
+
let timer;
|
|
1135
|
+
let inFlight;
|
|
1136
|
+
/** The eligible repos for this pass — the §2 predicate. */
|
|
1137
|
+
async function selectCandidates() {
|
|
1138
|
+
return pg`
|
|
1139
|
+
select r.id::text as id, r.name
|
|
1140
|
+
from repos r
|
|
1141
|
+
where r.last_pushed_at is not null
|
|
1142
|
+
and (r.last_gc_at is null or r.last_pushed_at > r.last_gc_at)
|
|
1143
|
+
`;
|
|
1144
|
+
}
|
|
1145
|
+
/**
|
|
1146
|
+
* GC one candidate. `t0 = clock_timestamp()` is captured BEFORE `gc()` opens its
|
|
1147
|
+
* snapshot, then written as `last_gc_at` after the sweep: any push committing
|
|
1148
|
+
* after t0 re-stamps `last_pushed_at > t0` (the store stamps after commit) and
|
|
1149
|
+
* re-qualifies the repo next pass (no lost garbage). A per-repo failure is
|
|
1150
|
+
* ISOLATED — logged and skipped (the repo keeps its old `last_gc_at`, so it
|
|
1151
|
+
* re-qualifies and is retried next pass) — so one poison repo never aborts the
|
|
1152
|
+
* rest of the pass. `maintain: false`: the drain leans on autovacuum, never a
|
|
1153
|
+
* per-pass full-table VACUUM (gc.ts).
|
|
1154
|
+
*/
|
|
1155
|
+
async function drainRepo(c) {
|
|
1156
|
+
try {
|
|
1157
|
+
const [t] = await pg`select clock_timestamp()::text as t0`;
|
|
1158
|
+
if (!t) throw new Error("pggit gc-scheduler: clock_timestamp() returned no row");
|
|
1159
|
+
const { deletedObjects, deletedEdges } = await gc.gc(c.name, {
|
|
1160
|
+
graceSeconds: opts.graceSeconds,
|
|
1161
|
+
maintain: false
|
|
1162
|
+
});
|
|
1163
|
+
await pg`update repos set last_gc_at = ${t.t0}::timestamptz where id = ${c.id}::bigint`;
|
|
1164
|
+
return {
|
|
1165
|
+
deletedEdges,
|
|
1166
|
+
deletedObjects,
|
|
1167
|
+
repo: c.name
|
|
1168
|
+
};
|
|
1169
|
+
} catch (err) {
|
|
1170
|
+
console.error(`pggit gc-scheduler: GC of repo ${JSON.stringify(c.name)} failed (retried next pass):`, err);
|
|
1171
|
+
return null;
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
/** One drain pass: GC every eligible repo (bounded concurrency, distinct repos so
|
|
1175
|
+
* a pass never double-GCs one). Returns an entry per repo GC'd this pass — a repo
|
|
1176
|
+
* whose GC threw is skipped (not in the summary) and retried next pass. */
|
|
1177
|
+
async function drainOnce() {
|
|
1178
|
+
return (await mapPool(await selectCandidates(), Math.max(1, opts.concurrency), drainRepo)).filter((e) => e !== null);
|
|
1179
|
+
}
|
|
1180
|
+
/** Run the drain on `intervalMs`. The `inFlight` guard ensures passes never
|
|
1181
|
+
* overlap — so two passes can never touch the same repo at once — and a slow pass
|
|
1182
|
+
* simply skips the next tick. A pass failure is logged, never thrown into the
|
|
1183
|
+
* timer. The timer is `unref`'d so it alone does not keep the process alive (the
|
|
1184
|
+
* server's socket does). */
|
|
1185
|
+
function start() {
|
|
1186
|
+
if (timer) return;
|
|
1187
|
+
timer = setInterval(() => {
|
|
1188
|
+
if (inFlight) return;
|
|
1189
|
+
inFlight = drainOnce().catch((err) => {
|
|
1190
|
+
console.error("pggit gc-scheduler: drain pass failed:", err);
|
|
1191
|
+
}).finally(() => {
|
|
1192
|
+
inFlight = void 0;
|
|
1193
|
+
});
|
|
1194
|
+
}, opts.intervalMs);
|
|
1195
|
+
timer.unref?.();
|
|
1196
|
+
}
|
|
1197
|
+
/** Halt the background drain and AWAIT any pass already in flight, so a caller may
|
|
1198
|
+
* safely tear the connection pool down afterwards (no query runs into a closed
|
|
1199
|
+
* pool). Idempotent. */
|
|
1200
|
+
async function stop() {
|
|
1201
|
+
if (timer) {
|
|
1202
|
+
clearInterval(timer);
|
|
1203
|
+
timer = void 0;
|
|
1204
|
+
}
|
|
1205
|
+
await inFlight;
|
|
1206
|
+
}
|
|
1207
|
+
return {
|
|
1208
|
+
drainOnce,
|
|
1209
|
+
start,
|
|
1210
|
+
stop
|
|
1211
|
+
};
|
|
1212
|
+
}
|
|
1213
|
+
/** Run `fn` over `items` with at most `limit` concurrent, preserving result order.
|
|
1214
|
+
* A bounded worker pool — `limit` workers pull from a shared cursor — so one
|
|
1215
|
+
* large-orphan repo cannot head-of-line-block the rest of a pass. */
|
|
1216
|
+
async function mapPool(items, limit, fn) {
|
|
1217
|
+
const results = new Array(items.length);
|
|
1218
|
+
let cursor = 0;
|
|
1219
|
+
async function worker() {
|
|
1220
|
+
for (;;) {
|
|
1221
|
+
const i = cursor++;
|
|
1222
|
+
if (i >= items.length) return;
|
|
1223
|
+
results[i] = await fn(items[i]);
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
const workers = Array.from({ length: Math.min(limit, items.length) }, worker);
|
|
1227
|
+
await Promise.all(workers);
|
|
1228
|
+
return results;
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
//#endregion
|
|
1232
|
+
//#region src/index.ts
|
|
1233
|
+
const UPLOAD_PACK_ADVERTISEMENT = Buffer.concat([
|
|
1234
|
+
encodePktLine(Buffer.from("# service=git-upload-pack\n")),
|
|
1235
|
+
encodePkt({ type: "flush" }),
|
|
1236
|
+
encodeAdvertisement()
|
|
1237
|
+
]);
|
|
1238
|
+
function toArrayBuffer(buf) {
|
|
1239
|
+
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
1240
|
+
}
|
|
1241
|
+
const ADVERTISEMENT_BODY = toArrayBuffer(UPLOAD_PACK_ADVERTISEMENT);
|
|
1242
|
+
/**
|
|
1243
|
+
* Read a smart-HTTP POST body, honoring `Content-Encoding`. Git compresses the
|
|
1244
|
+
* upload-pack/receive-pack request body with gzip once it is large enough
|
|
1245
|
+
* (`remote-curl.c`), exactly as `git http-backend` decompresses on the server
|
|
1246
|
+
* side — so we must too. Any other declared encoding is a hard error, never fed
|
|
1247
|
+
* raw to the pkt-line parser.
|
|
1248
|
+
*/
|
|
1249
|
+
async function readRequestBody(c) {
|
|
1250
|
+
const raw = Buffer.from(await c.req.arrayBuffer());
|
|
1251
|
+
const encoding = c.req.header("content-encoding")?.toLowerCase();
|
|
1252
|
+
if (encoding === void 0 || encoding === "identity") return raw;
|
|
1253
|
+
if (encoding === "gzip" || encoding === "x-gzip") try {
|
|
1254
|
+
return gunzipSync(raw);
|
|
1255
|
+
} catch (err) {
|
|
1256
|
+
throw new GitProtocolError(`request body declared Content-Encoding ${JSON.stringify(encoding)} but failed to gunzip: ${err instanceof Error ? err.message : String(err)}`);
|
|
1257
|
+
}
|
|
1258
|
+
throw new GitProtocolError(`unsupported request Content-Encoding: ${JSON.stringify(encoding)}`);
|
|
1259
|
+
}
|
|
1260
|
+
/**
|
|
1261
|
+
* Fetch is served over git protocol v2 ONLY (the charter). git requests v2 with a
|
|
1262
|
+
* `Git-Protocol: version=2` header (a `:`-joined key list; git ≥ 2.26 sends it by
|
|
1263
|
+
* default). A v0/v1 client sends no such header and cannot parse the v2
|
|
1264
|
+
* advertisement — it would read the `version 2` line + flush as an empty repo and
|
|
1265
|
+
* silently clone nothing. So reject the unnegotiated case loudly at the boundary
|
|
1266
|
+
* (400) instead of handing back an advertisement it will misread.
|
|
1267
|
+
*/
|
|
1268
|
+
function assertProtocolV2(header) {
|
|
1269
|
+
if (!(header ?? "").split(":").map((s) => s.trim()).includes("version=2")) throw new GitProtocolError("pggit serves fetch over git protocol v2 only; set protocol.version=2 (git ≥ 2.26 negotiates it by default)");
|
|
1270
|
+
}
|
|
1271
|
+
function backendFor(deps, repoId) {
|
|
1272
|
+
return {
|
|
1273
|
+
buildPack: (wants, haves, omitBlobs, includeTag) => deps.objects.buildPack(repoId, wants, haves, omitBlobs, includeTag),
|
|
1274
|
+
commonHaves: (haves) => deps.objects.commonHaves(repoId, haves),
|
|
1275
|
+
getSymref: (name) => deps.refs.getSymref(repoId, name),
|
|
1276
|
+
listRefs: () => deps.refs.listRefs(repoId),
|
|
1277
|
+
readyToGiveUp: (wants, common) => deps.objects.readyToGiveUp(repoId, wants, common)
|
|
1278
|
+
};
|
|
1279
|
+
}
|
|
1280
|
+
function receiveBackendFor(deps, repoId) {
|
|
1281
|
+
const backend = {
|
|
1282
|
+
applyRefUpdates: (commands, atomic) => deps.refs.applyRefUpdates(repoId, commands, atomic),
|
|
1283
|
+
ingest: async (pack) => {
|
|
1284
|
+
await deps.objects.ingestPack(repoId, pack);
|
|
1285
|
+
},
|
|
1286
|
+
isConnected: (oid) => deps.objects.isConnected(repoId, oid)
|
|
1287
|
+
};
|
|
1288
|
+
if (deps.snapshots) {
|
|
1289
|
+
const sdeps = {
|
|
1290
|
+
objects: deps.objects,
|
|
1291
|
+
snapshots: deps.snapshots
|
|
1292
|
+
};
|
|
1293
|
+
backend.syncRefSnapshot = (ref, newOid) => syncRefSnapshot(sdeps, repoId, ref, newOid);
|
|
1294
|
+
}
|
|
1295
|
+
return backend;
|
|
1296
|
+
}
|
|
1297
|
+
/** v0 receive-pack ref advertisement body: the `# service` preamble + ref list. */
|
|
1298
|
+
async function receivePackAdvertBody(deps, repoId) {
|
|
1299
|
+
const refs = await deps.refs.listRefs(repoId);
|
|
1300
|
+
return Buffer.concat([
|
|
1301
|
+
encodePktLine(Buffer.from("# service=git-receive-pack\n")),
|
|
1302
|
+
encodePkt({ type: "flush" }),
|
|
1303
|
+
encodeReceivePackAdvertisement(refs)
|
|
1304
|
+
]);
|
|
1305
|
+
}
|
|
1306
|
+
/**
|
|
1307
|
+
* Build the git-remote Hono app (smart-HTTP, protocol v2 fetch). Mountable into
|
|
1308
|
+
* a host app via `host.route("/git", createGitApp(deps))`; the host owns the
|
|
1309
|
+
* Postgres lifecycle behind `deps`.
|
|
1310
|
+
*/
|
|
1311
|
+
function createGitApp(deps, opts = {}) {
|
|
1312
|
+
const app = new Hono();
|
|
1313
|
+
if (opts.instrument) app.use((c, next) => runRequest({
|
|
1314
|
+
method: c.req.method,
|
|
1315
|
+
path: c.req.path
|
|
1316
|
+
}, () => next()));
|
|
1317
|
+
app.use(cors());
|
|
1318
|
+
app.onError((err, c) => {
|
|
1319
|
+
if (err instanceof GitProtocolError) return c.text(err.message, 400);
|
|
1320
|
+
console.error(err);
|
|
1321
|
+
return c.text("internal server error", 500);
|
|
1322
|
+
});
|
|
1323
|
+
app.get("/health", (c) => c.text("ok"));
|
|
1324
|
+
app.get("/:repo/info/refs", async (c) => {
|
|
1325
|
+
const service = c.req.query("service");
|
|
1326
|
+
if (service === "git-upload-pack") {
|
|
1327
|
+
assertProtocolV2(c.req.header("git-protocol"));
|
|
1328
|
+
return c.body(ADVERTISEMENT_BODY, 200, {
|
|
1329
|
+
"Cache-Control": "no-cache",
|
|
1330
|
+
"Content-Type": "application/x-git-upload-pack-advertisement"
|
|
1331
|
+
});
|
|
1332
|
+
}
|
|
1333
|
+
if (service === "git-receive-pack") {
|
|
1334
|
+
const body = await receivePackAdvertBody(deps, c.req.param("repo"));
|
|
1335
|
+
return c.body(toArrayBuffer(body), 200, {
|
|
1336
|
+
"Cache-Control": "no-cache",
|
|
1337
|
+
"Content-Type": "application/x-git-receive-pack-advertisement"
|
|
1338
|
+
});
|
|
1339
|
+
}
|
|
1340
|
+
return c.text(`unsupported service ${JSON.stringify(service)}`, 403);
|
|
1341
|
+
});
|
|
1342
|
+
app.post("/:repo/git-upload-pack", async (c) => {
|
|
1343
|
+
const out = await handleUploadPack(await readRequestBody(c), backendFor(deps, c.req.param("repo")));
|
|
1344
|
+
count("wireBytes", out.length);
|
|
1345
|
+
return c.body(toArrayBuffer(out), 200, {
|
|
1346
|
+
"Cache-Control": "no-cache",
|
|
1347
|
+
"Content-Type": "application/x-git-upload-pack-result"
|
|
1348
|
+
});
|
|
1349
|
+
});
|
|
1350
|
+
app.post("/:repo/git-receive-pack", async (c) => {
|
|
1351
|
+
const out = await handleReceivePack(await readRequestBody(c), receiveBackendFor(deps, c.req.param("repo")));
|
|
1352
|
+
return c.body(toArrayBuffer(out), 200, {
|
|
1353
|
+
"Cache-Control": "no-cache",
|
|
1354
|
+
"Content-Type": "application/x-git-receive-pack-result"
|
|
1355
|
+
});
|
|
1356
|
+
});
|
|
1357
|
+
return app;
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
//#endregion
|
|
1361
|
+
export { createGc, createGcScheduler, createGitApp };
|
|
1362
|
+
//# sourceMappingURL=index.mjs.map
|