@usecontextlayer/pggit 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,8 +1,9 @@
1
- import { gunzipSync } from "node:zlib";
1
+ import { createInflate, deflateSync, gunzipSync } from "node:zlib";
2
2
  import { Hono } from "hono";
3
3
  import { cors } from "hono/cors";
4
4
  import { AsyncLocalStorage } from "node:async_hooks";
5
5
  import { performance } from "node:perf_hooks";
6
+ import { createHash } from "node:crypto";
6
7
  import { Kysely, sql } from "kysely";
7
8
  import { PostgresJSDialect } from "kysely-postgres-js";
8
9
 
@@ -592,6 +593,14 @@ var GitFormatError = class extends Error {
592
593
 
593
594
  //#endregion
594
595
  //#region src/object/object.ts
596
+ /**
597
+ * The git object ID: SHA-1 of the loose-object representation
598
+ * `"<type> <byteLength>\0" + content`. Returns the 40-char lowercase hex digest.
599
+ */
600
+ function computeOid(type, content) {
601
+ const header = Buffer.from(`${type} ${content.length}\0`, "latin1");
602
+ return createHash("sha1").update(header).update(content).digest("hex");
603
+ }
595
604
  /** OIDs in the leading `key <oid>` headers (up to the blank line) for given keys. */
596
605
  function headerOids(content, keys) {
597
606
  const oids = [];
@@ -626,12 +635,33 @@ function treeEntries(content) {
626
635
  function isTreeEntryMode(mode) {
627
636
  return mode === "40000";
628
637
  }
638
+ /** OIDs of a tree's entries (all kinds), in tree order. */
639
+ function treeEntryOids(content) {
640
+ return treeEntries(content).map((e) => e.oid);
641
+ }
642
+ /** A commit's parent OIDs only (ancestry walk; excludes its tree). */
643
+ function commitParents(content) {
644
+ return headerOids(content, /* @__PURE__ */ new Set(["parent"]));
645
+ }
629
646
  /** A commit's root tree OID. Every commit has exactly one `tree` header. */
630
647
  function commitTreeOid(content) {
631
648
  const [tree] = headerOids(content, /* @__PURE__ */ new Set(["tree"]));
632
649
  if (!tree) throw new GitFormatError("missing-tree-header", "commitTreeOid: commit has no tree header");
633
650
  return tree;
634
651
  }
652
+ /**
653
+ * The OIDs an object directly references: a commit → its tree + parents, a tree
654
+ * → its entries, a tag → its target, a blob → nothing. The basis of reachability
655
+ * enumeration (fetch, connectivity).
656
+ */
657
+ function referencedOids(type, content) {
658
+ switch (type) {
659
+ case "blob": return [];
660
+ case "commit": return headerOids(content, /* @__PURE__ */ new Set(["tree", "parent"]));
661
+ case "tag": return headerOids(content, /* @__PURE__ */ new Set(["object"]));
662
+ case "tree": return treeEntryOids(content);
663
+ }
664
+ }
635
665
 
636
666
  //#endregion
637
667
  //#region src/repo-view/build-file-list.ts
@@ -796,10 +826,217 @@ async function copyInsert(tx, target, columns, rows) {
796
826
  await tx.unsafe(`insert into ${target} (${cols}) select ${cols} from ${staging} on conflict do nothing`);
797
827
  }
798
828
 
829
+ //#endregion
830
+ //#region src/store/repo-resolver.ts
831
+ /**
832
+ * Resolves a wire repo name to its `repos.id` surrogate, memoized. The object and
833
+ * ref stores both key on the bigint `repo_id`, so each builds one of these as its
834
+ * name→id boundary.
835
+ *
836
+ * The mapping is immutable once a repo exists (ids are `generated always`, names
837
+ * are unique), so a found id is cached for the resolver's lifetime — keeping the
838
+ * per-object hot path (getObject) at one point-read, not a join. Misses are NEVER
839
+ * cached: a name the lookup didn't find may be created by a later push, and a
840
+ * cached `null` would mask it.
841
+ *
842
+ * Reads resolve (lookup; `null` ⇒ the repo has never been written, i.e. empty).
843
+ * Writes ensure (race-safe get-or-create).
844
+ */
845
+ function createRepoResolver(db) {
846
+ const cache = /* @__PURE__ */ new Map();
847
+ return {
848
+ /** The repo's id, creating the row if absent. Race-safe under concurrent
849
+ * first-pushes, and avoids a no-op UPDATE on the common (exists) path. */
850
+ async ensureRepoId(name) {
851
+ const cached = cache.get(name);
852
+ if (cached !== void 0) return cached;
853
+ const existing = await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirst();
854
+ if (existing) {
855
+ cache.set(name, existing.id);
856
+ return existing.id;
857
+ }
858
+ const id = (await db.insertInto("repos").values({ name }).onConflict((oc) => oc.doNothing()).returning("id").executeTakeFirst())?.id ?? (await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirstOrThrow()).id;
859
+ cache.set(name, id);
860
+ return id;
861
+ },
862
+ /** The repo's id, or `null` if it has never been written to. */
863
+ async resolveRepoId(name) {
864
+ const cached = cache.get(name);
865
+ if (cached !== void 0) return cached;
866
+ const row = await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirst();
867
+ if (!row) return null;
868
+ cache.set(name, row.id);
869
+ return row.id;
870
+ }
871
+ };
872
+ }
873
+
874
+ //#endregion
875
+ //#region src/repo-view/repo-file-projection.ts
876
+ /**
877
+ * Write-only maintainer of `repo_file`: the slim per-branch-tip `path → (mode,
878
+ * blob_oid)` index that IS pggit's public read surface. Reads never go through this
879
+ * module — a consumer queries `repo_file ⋈ git_object` (on `oid = blob_oid`) with
880
+ * direct SQL, the one read mechanism (docs/2026-06-26-read-surface-sharpening-design.md).
881
+ * So this only ever rebuilds or drops the projection on push; there is no read method
882
+ * here by design. It is a derived projection of the canonical objects — no duplicate
883
+ * blob bytes, no orphan reaper (the redesign's collapse, §4.5) — droppable and
884
+ * rebuildable at will. The wire repo name resolves to its bigint surrogate (memoized)
885
+ * here, like the other stores.
886
+ */
887
+ function createRepoFileProjection(pg) {
888
+ const db = initKysely(pg);
889
+ const repos = createRepoResolver(db);
890
+ return {
891
+ /** Drop a repo's entire projection (all branches) — the clean slate for a full
892
+ * rebuild. No blob bytes to reap; the index is the only state. */
893
+ async clearRepo(repoId) {
894
+ const id = await repos.resolveRepoId(repoId);
895
+ if (id === null) return;
896
+ await db.deleteFrom("repo_file").where("repo_id", "=", id).execute();
897
+ },
898
+ /** Drop `refName`'s snapshot (branch deleted). */
899
+ async dropRefSnapshot(repoId, refName) {
900
+ const id = await repos.resolveRepoId(repoId);
901
+ if (id === null) return;
902
+ await db.deleteFrom("repo_file").where("repo_id", "=", id).where("ref_name", "=", refName).execute();
903
+ },
904
+ /** Replace `refName`'s snapshot with `fileList` (one atomic transaction). The
905
+ * blobs already live in git_object — we store only the path→blob_oid index. */
906
+ async rebuildRefSnapshot(repoId, refName, fileList) {
907
+ const id = await repos.ensureRepoId(repoId);
908
+ const rows = fileList.files.map((f) => [
909
+ {
910
+ t: "int8",
911
+ v: id
912
+ },
913
+ {
914
+ t: "text",
915
+ v: refName
916
+ },
917
+ {
918
+ t: "text",
919
+ v: f.path
920
+ },
921
+ {
922
+ t: "text",
923
+ v: f.mode
924
+ },
925
+ {
926
+ t: "bytea",
927
+ v: Buffer.from(f.blobOid, "hex")
928
+ }
929
+ ]);
930
+ await pg.begin(async (tx) => {
931
+ await tx`delete from repo_file where repo_id = ${id} and ref_name = ${refName}`;
932
+ await copyInsert(tx, "repo_file", [
933
+ "repo_id",
934
+ "ref_name",
935
+ "path",
936
+ "mode",
937
+ "blob_oid"
938
+ ], rows);
939
+ });
940
+ }
941
+ };
942
+ }
943
+
799
944
  //#endregion
800
945
  //#region src/object/edges.ts
946
+ /**
947
+ * Edge kinds stored in `git_edge.kind`. tree→blob (would be `4`) is deliberately
948
+ * NOT a kind: blobs are enumerated from tree content, never stored as edges (§4.3),
949
+ * so `4` is reserved/unused.
950
+ */
951
+ const EDGE_KIND = {
952
+ COMMIT_PARENT: 2,
953
+ COMMIT_TREE: 1,
954
+ TAG_TARGET: 5,
955
+ TREE_SUBTREE: 3
956
+ };
801
957
  /** A tree entry pointing at a commit in *another* repo — no blob, no edge here. */
802
958
  const GITLINK_MODE = "160000";
959
+ const WELL_FORMED_OID = /^[0-9a-f]{40}$/;
960
+ /**
961
+ * Validate an OID parsed from a commit/tag header. `commitParents`/`commitTreeOid`/
962
+ * `referencedOids` take whatever follows the header key verbatim — a forged object
963
+ * could carry a non-OID there and yield a bogus edge child — so reject it loudly at
964
+ * the ingest boundary (§5.1). Tree-entry OIDs are exempt: `treeEntries` already
965
+ * guarantees a 20-byte value, and the `bytea CHECK(length(child)=20)` is the
966
+ * database-level backstop for every edge.
967
+ */
968
+ function assertOid(oid, context) {
969
+ if (!WELL_FORMED_OID.test(oid)) throw new GitFormatError("malformed-oid", `${context}: not a well-formed object id: ${JSON.stringify(oid)}`);
970
+ return oid;
971
+ }
972
+ /** Count the leading `key value` header lines (up to the blank line that ends a
973
+ * commit/tag's header block). */
974
+ function countHeader(content, key) {
975
+ const prefix = `${key} `;
976
+ let n = 0;
977
+ for (const line of content.toString("latin1").split("\n")) {
978
+ if (line === "") break;
979
+ if (line.startsWith(prefix)) n++;
980
+ }
981
+ return n;
982
+ }
983
+ /**
984
+ * fsck-grade structural validation at the ingest boundary (§5.1, invariant §10.2):
985
+ * reject the malformed objects that OID-wellformedness and tree parsing do not
986
+ * catch. A commit must not carry more than one `tree` header (git fsck:
987
+ * multipleTrees — `commitTreeOid` would otherwise silently take the first and drop
988
+ * the rest, recording an edge to a tree the object does not actually root). An
989
+ * annotated tag must carry exactly one `object` header (git fsck: missingObject /
990
+ * an extra object line): zero yields no `kind=5` edge and silently breaks peeling
991
+ * and connectivity; more than one yields multiple divergent `kind=5` edges and a
992
+ * nondeterministic `peeled_oid`. The other structural guarantees are already
993
+ * enforced downstream: `assertOid` on every referenced OID (below), a present root
994
+ * `tree` (`commitTreeOid`, which also rejects a zero-tree commit), and a well-formed
995
+ * tree body (`treeEntries` throws). Called by the store once per object before
996
+ * derivation, in the ingest transaction, so a malformed push aborts before any row
997
+ * lands.
998
+ */
999
+ function validateObject(type, content) {
1000
+ if (type === "commit" && countHeader(content, "tree") > 1) throw new GitFormatError("multiple-tree-headers", "commit carries more than one tree header");
1001
+ if (type === "tag") {
1002
+ const objects = countHeader(content, "object");
1003
+ if (objects < 1) throw new GitFormatError("missing-tag-object", "annotated tag has no object header");
1004
+ if (objects > 1) throw new GitFormatError("multiple-tag-objects", "annotated tag carries more than one object header");
1005
+ }
1006
+ }
1007
+ /**
1008
+ * The edges an object contributes to `git_edge`, with the object's own OID as the
1009
+ * parent — the §4.3 standing rule, mode-aware:
1010
+ * - commit → its tree (kind 1) then each parent (kind 2);
1011
+ * - tree → its **subtrees only** (mode `40000` → kind 3). Blobs and gitlinks
1012
+ * (`160000`, a commit living in another repo) are NOT edges — `isTreeEntryMode`
1013
+ * admits only `40000`, so both are dropped;
1014
+ * - tag → its target (kind 5);
1015
+ * - blob → nothing.
1016
+ *
1017
+ * This is the single derivation the store inserts alongside the object row, in the
1018
+ * same transaction (§10.1), so edges are a validated total function of content.
1019
+ */
1020
+ function deriveEdges(type, content) {
1021
+ switch (type) {
1022
+ case "blob": return [];
1023
+ case "commit": return [{
1024
+ child: assertOid(commitTreeOid(content), "commit tree"),
1025
+ kind: EDGE_KIND.COMMIT_TREE
1026
+ }, ...commitParents(content).map((p) => ({
1027
+ child: assertOid(p, "commit parent"),
1028
+ kind: EDGE_KIND.COMMIT_PARENT
1029
+ }))];
1030
+ case "tag": return referencedOids("tag", content).map((t) => ({
1031
+ child: assertOid(t, "tag target"),
1032
+ kind: EDGE_KIND.TAG_TARGET
1033
+ }));
1034
+ case "tree": return treeEntries(content).filter((e) => isTreeEntryMode(e.mode)).map((e) => ({
1035
+ child: e.oid,
1036
+ kind: EDGE_KIND.TREE_SUBTREE
1037
+ }));
1038
+ }
1039
+ }
803
1040
  /**
804
1041
  * The blob OIDs directly in a tree — the §4.3 standing rule's other half: blobs
805
1042
  * are enumerated from tree content, never stored as edges. A tree entry is a blob
@@ -833,13 +1070,297 @@ const PACK_OBJ_TYPE = {
833
1070
  TAG: 4,
834
1071
  TREE: 2
835
1072
  };
1073
+ function encodeObjectHeader(type, size) {
1074
+ let rest = Math.floor(size / 16);
1075
+ let first = type << 4 | size % 16;
1076
+ if (rest > 0) first |= 128;
1077
+ const bytes = [first];
1078
+ while (rest > 0) {
1079
+ let byte = rest % 128;
1080
+ rest = Math.floor(rest / 128);
1081
+ if (rest > 0) byte |= 128;
1082
+ bytes.push(byte);
1083
+ }
1084
+ return Buffer.from(bytes);
1085
+ }
1086
+ function decodeObjectHeader(buf, offset) {
1087
+ let b = buf.readUInt8(offset);
1088
+ let bytesRead = 1;
1089
+ const type = b >> 4 & 7;
1090
+ let size = b & 15;
1091
+ let mult = 16;
1092
+ while (b & 128) {
1093
+ b = buf.readUInt8(offset + bytesRead);
1094
+ bytesRead++;
1095
+ size += (b & 127) * mult;
1096
+ mult *= 128;
1097
+ }
1098
+ return {
1099
+ bytesRead,
1100
+ size,
1101
+ type
1102
+ };
1103
+ }
1104
+
1105
+ //#endregion
1106
+ //#region src/pack/delta.ts
1107
+ /**
1108
+ * Apply a git delta to its base, producing the target object. The delta begins
1109
+ * with two LEB128 varints (source size, target size), then a stream of
1110
+ * instructions: a COPY (high bit set — copy a run from the base at a given
1111
+ * offset/size) or an INSERT (1..127 literal bytes that follow). See
1112
+ * gitformat-pack "Deltified representation". We only ever READ/apply deltas;
1113
+ * the serve path emits none (spec §3.4).
1114
+ */
1115
+ function applyDelta(base, delta) {
1116
+ let pos = 0;
1117
+ const readVarint = () => {
1118
+ let result = 0;
1119
+ let shift = 0;
1120
+ let byte;
1121
+ do {
1122
+ byte = delta.readUInt8(pos);
1123
+ pos += 1;
1124
+ result += (byte & 127) * 2 ** shift;
1125
+ shift += 7;
1126
+ } while (byte & 128);
1127
+ return result;
1128
+ };
1129
+ const sourceSize = readVarint();
1130
+ const targetSize = readVarint();
1131
+ if (base.length !== sourceSize) throw new GitFormatError("delta-base-size-mismatch", `delta: base size ${base.length} ≠ declared ${sourceSize}`);
1132
+ const out = Buffer.alloc(targetSize);
1133
+ let outPos = 0;
1134
+ while (pos < delta.length) {
1135
+ const op = delta.readUInt8(pos);
1136
+ pos += 1;
1137
+ if (op & 128) {
1138
+ let copyOffset = 0;
1139
+ if (op & 1) copyOffset |= delta.readUInt8(pos++);
1140
+ if (op & 2) copyOffset |= delta.readUInt8(pos++) << 8;
1141
+ if (op & 4) copyOffset |= delta.readUInt8(pos++) << 16;
1142
+ if (op & 8) copyOffset += delta.readUInt8(pos++) * 2 ** 24;
1143
+ let copySize = 0;
1144
+ if (op & 16) copySize |= delta.readUInt8(pos++);
1145
+ if (op & 32) copySize |= delta.readUInt8(pos++) << 8;
1146
+ if (op & 64) copySize |= delta.readUInt8(pos++) << 16;
1147
+ if (copySize === 0) copySize = 65536;
1148
+ base.copy(out, outPos, copyOffset, copyOffset + copySize);
1149
+ outPos += copySize;
1150
+ } else if (op !== 0) {
1151
+ delta.copy(out, outPos, pos, pos + op);
1152
+ outPos += op;
1153
+ pos += op;
1154
+ } else throw new GitFormatError("delta-reserved-opcode", "delta: reserved opcode 0x00");
1155
+ }
1156
+ if (outPos !== targetSize) throw new GitFormatError("delta-target-size-mismatch", `delta: produced ${outPos} bytes, declared ${targetSize}`);
1157
+ return out;
1158
+ }
1159
+
1160
+ //#endregion
1161
+ //#region src/pack/read-pack.ts
1162
+ const CODE_TO_TYPE$1 = {
1163
+ [PACK_OBJ_TYPE.COMMIT]: "commit",
1164
+ [PACK_OBJ_TYPE.TREE]: "tree",
1165
+ [PACK_OBJ_TYPE.BLOB]: "blob",
1166
+ [PACK_OBJ_TYPE.TAG]: "tag"
1167
+ };
1168
+ /**
1169
+ * Inflate exactly one zlib stream at the front of `buf`, returning the data and
1170
+ * how many COMPRESSED bytes it consumed — the seam for walking the back-to-back
1171
+ * zlib streams in a packfile. (`bytesWritten` = input consumed up to
1172
+ * Z_STREAM_END; trailing bytes untouched — verified empirically against
1173
+ * node:zlib.)
1174
+ */
1175
+ function inflateOne(buf) {
1176
+ return new Promise((resolve, reject) => {
1177
+ const inf = createInflate();
1178
+ const chunks = [];
1179
+ inf.on("data", (chunk) => chunks.push(chunk));
1180
+ inf.on("end", () => resolve({
1181
+ compressedLength: inf.bytesWritten,
1182
+ data: Buffer.concat(chunks)
1183
+ }));
1184
+ inf.on("error", (e) => reject(new GitFormatError("inflate-failed", `pack: zlib inflate failed: ${e instanceof Error ? e.message : String(e)}`)));
1185
+ inf.end(buf);
1186
+ });
1187
+ }
1188
+ /**
1189
+ * The pack OFS_DELTA "offset encoding" — NOT plain LEB128. Each continuation
1190
+ * adds 1 before shifting, so encodings are unambiguous. Arithmetic (not `<<`) so
1191
+ * offsets ≥ 2³¹ stay correct.
1192
+ */
1193
+ function readOffsetVarint(buf, offset) {
1194
+ let b = buf.readUInt8(offset);
1195
+ let bytesRead = 1;
1196
+ let value = b & 127;
1197
+ while (b & 128) {
1198
+ b = buf.readUInt8(offset + bytesRead);
1199
+ bytesRead += 1;
1200
+ value = (value + 1) * 128 + (b & 127);
1201
+ }
1202
+ return {
1203
+ bytesRead,
1204
+ value
1205
+ };
1206
+ }
1207
+ /**
1208
+ * Parse a v2 packfile into its objects, resolving OFS_DELTA and REF_DELTA
1209
+ * (including delta chains). Bases come from the same pack; a REF_DELTA whose base
1210
+ * is NOT in the pack — a thin pack, as `git push` sends by default — is resolved
1211
+ * via `resolveExternalBase` (the Postgres store on push ingest). Without a
1212
+ * resolver, an external base is a hard error.
1213
+ */
1214
+ async function readPack(pack, resolveExternalBase) {
1215
+ count("readPackCalls");
1216
+ if (pack.subarray(0, 4).toString("latin1") !== "PACK") throw new GitFormatError("bad-magic", "pack: bad magic");
1217
+ const version = pack.readUInt32BE(4);
1218
+ if (version !== 2) throw new GitFormatError("unsupported-version", `pack: unsupported version ${version}`);
1219
+ const objectCount = pack.readUInt32BE(8);
1220
+ const trailerOffset = pack.length - 20;
1221
+ const actualTrailer = createHash("sha1").update(pack.subarray(0, trailerOffset)).digest();
1222
+ if (!pack.subarray(trailerOffset).equals(actualTrailer)) throw new GitFormatError("trailer-mismatch", "pack: trailer SHA-1 mismatch");
1223
+ const entries = /* @__PURE__ */ new Map();
1224
+ const order = [];
1225
+ let offset = 12;
1226
+ for (let i = 0; i < objectCount; i++) {
1227
+ const start = offset;
1228
+ const { type, size, bytesRead } = decodeObjectHeader(pack, offset);
1229
+ offset += bytesRead;
1230
+ if (type === PACK_OBJ_TYPE.OFS_DELTA) {
1231
+ const { value: negOffset, bytesRead: ob } = readOffsetVarint(pack, offset);
1232
+ offset += ob;
1233
+ const { data, compressedLength } = await inflateOne(pack.subarray(offset));
1234
+ count("bytesInflated", data.length);
1235
+ offset += compressedLength;
1236
+ entries.set(start, {
1237
+ baseOffset: start - negOffset,
1238
+ delta: data,
1239
+ kind: "ofs"
1240
+ });
1241
+ } else if (type === PACK_OBJ_TYPE.REF_DELTA) {
1242
+ const baseOid = pack.subarray(offset, offset + 20).toString("hex");
1243
+ offset += 20;
1244
+ const { data, compressedLength } = await inflateOne(pack.subarray(offset));
1245
+ count("bytesInflated", data.length);
1246
+ offset += compressedLength;
1247
+ entries.set(start, {
1248
+ baseOid,
1249
+ delta: data,
1250
+ kind: "ref"
1251
+ });
1252
+ } else {
1253
+ const typeName = CODE_TO_TYPE$1[type];
1254
+ if (!typeName) throw new GitFormatError("unknown-object-type", `pack: unknown object type ${type}`);
1255
+ const { data, compressedLength } = await inflateOne(pack.subarray(offset));
1256
+ count("bytesInflated", data.length);
1257
+ if (data.length !== size) throw new GitFormatError("size-mismatch", `pack: size mismatch (header ${size}, inflated ${data.length})`);
1258
+ offset += compressedLength;
1259
+ entries.set(start, {
1260
+ content: data,
1261
+ kind: "base",
1262
+ type: typeName
1263
+ });
1264
+ }
1265
+ order.push(start);
1266
+ }
1267
+ if (offset !== trailerOffset) throw new GitFormatError("trailing-bytes", `pack: consumed ${offset} bytes, expected ${trailerOffset} before trailer`);
1268
+ const resolved = /* @__PURE__ */ new Map();
1269
+ const byOid = /* @__PURE__ */ new Map();
1270
+ const externalCache = /* @__PURE__ */ new Map();
1271
+ const fetchExternal = async (oid) => {
1272
+ const cached = externalCache.get(oid);
1273
+ if (cached !== void 0) return cached;
1274
+ const fetched = resolveExternalBase ? await resolveExternalBase(oid) : null;
1275
+ externalCache.set(oid, fetched);
1276
+ return fetched;
1277
+ };
1278
+ const record = (off, type, content) => {
1279
+ const obj = {
1280
+ content,
1281
+ oid: computeOid(type, content),
1282
+ type
1283
+ };
1284
+ resolved.set(off, obj);
1285
+ byOid.set(obj.oid, obj);
1286
+ };
1287
+ for (const off of order) {
1288
+ const entry = entries.get(off);
1289
+ if (entry?.kind === "base") record(off, entry.type, entry.content);
1290
+ }
1291
+ let pending = order.filter((off) => !resolved.has(off));
1292
+ while (pending.length > 0) {
1293
+ const stillPending = [];
1294
+ for (const off of pending) {
1295
+ const entry = entries.get(off);
1296
+ if (!entry || entry.kind === "base") continue;
1297
+ const base = entry.kind === "ofs" ? resolved.get(entry.baseOffset) ?? null : byOid.get(entry.baseOid) ?? await fetchExternal(entry.baseOid);
1298
+ if (!base) {
1299
+ stillPending.push(off);
1300
+ continue;
1301
+ }
1302
+ record(off, base.type, applyDelta(base.content, entry.delta));
1303
+ }
1304
+ if (stillPending.length === pending.length) {
1305
+ const off = stillPending[0];
1306
+ const entry = entries.get(off);
1307
+ throw new GitFormatError("unresolved-base", `pack: ref-delta base ${entry?.kind === "ref" ? entry.baseOid : `offset ${entry?.kind === "ofs" ? entry.baseOffset : "?"}`} not found in pack or store`);
1308
+ }
1309
+ pending = stillPending;
1310
+ }
1311
+ return order.map((off) => resolved.get(off));
1312
+ }
1313
+
1314
+ //#endregion
1315
+ //#region src/pack/write-pack.ts
1316
+ const TYPE_CODE = {
1317
+ blob: PACK_OBJ_TYPE.BLOB,
1318
+ commit: PACK_OBJ_TYPE.COMMIT,
1319
+ tag: PACK_OBJ_TYPE.TAG,
1320
+ tree: PACK_OBJ_TYPE.TREE
1321
+ };
1322
+ /**
1323
+ * The 12-byte pack header: `PACK` magic, version 2, object count. The count is
1324
+ * fixed up front, so a streaming encoder must know its object total before the
1325
+ * first object (the row-store's closure provides it without reading content).
1326
+ */
1327
+ function packHeader(objectCount) {
1328
+ const header = Buffer.alloc(12);
1329
+ header.write("PACK", 0, "latin1");
1330
+ header.writeUInt32BE(2, 4);
1331
+ header.writeUInt32BE(objectCount, 8);
1332
+ return header;
1333
+ }
1334
+ /** One packed object: its varint (type, uncompressed size) header + zlib-deflated
1335
+ * content. Undeltified — we never emit deltas (spec §3.4 asymmetric kernel). */
1336
+ function packObject(type, content) {
1337
+ const deflated = deflateSync(content);
1338
+ count("deflateInputBytes", content.length);
1339
+ count("deflateOutputBytes", deflated.length);
1340
+ return Buffer.concat([encodeObjectHeader(TYPE_CODE[type], content.length), deflated]);
1341
+ }
1342
+ /**
1343
+ * Serialize objects into a self-contained, **undeltified** packfile (v2): the
1344
+ * header, each object's (header + deflated content), then a trailing SHA-1 of all
1345
+ * preceding bytes. The serve path streams the same primitives object-by-object
1346
+ * (object-store `buildPack`); this all-at-once form builds test packs and the
1347
+ * empty pack.
1348
+ */
1349
+ function writePack(objects) {
1350
+ count("writePackCalls");
1351
+ const parts = [packHeader(objects.length)];
1352
+ for (const obj of objects) parts.push(packObject(obj.type, obj.content));
1353
+ const body = Buffer.concat(parts);
1354
+ const trailer = createHash("sha1").update(body).digest();
1355
+ return Buffer.concat([body, trailer]);
1356
+ }
836
1357
 
837
1358
  //#endregion
838
1359
  //#region src/store/reachability.ts
839
1360
  /** Objects looked up per round-trip when chunking tree/blob existence queries. */
840
1361
  const LOOKUP_BATCH = 1e3;
841
1362
  /** Split `items` into consecutive batches of at most `size`. */
842
- function batches(items, size) {
1363
+ function batches$1(items, size) {
843
1364
  const out = [];
844
1365
  for (let i = 0; i < items.length; i += size) out.push(items.slice(i, i + size));
845
1366
  return out;
@@ -888,7 +1409,7 @@ async function reachableClosure(db, id, roots, omitBlobs) {
888
1409
  present
889
1410
  };
890
1411
  const blobCandidates = /* @__PURE__ */ new Set();
891
- for (const batch of batches(treeOids, LOOKUP_BATCH)) {
1412
+ for (const batch of batches$1(treeOids, LOOKUP_BATCH)) {
892
1413
  const trees = await db.selectFrom("git_object").select("content").where("repo_id", "=", id).where("oid", "in", batch).execute();
893
1414
  for (const t of trees) for (const blob of treeBlobOids(t.content)) blobCandidates.add(blob);
894
1415
  }
@@ -897,7 +1418,7 @@ async function reachableClosure(db, id, roots, omitBlobs) {
897
1418
  present
898
1419
  };
899
1420
  const presentBlobs = /* @__PURE__ */ new Set();
900
- for (const batch of batches([...blobCandidates], LOOKUP_BATCH)) {
1421
+ for (const batch of batches$1([...blobCandidates], LOOKUP_BATCH)) {
901
1422
  const rows = await db.selectFrom("git_object").select("oid").where("repo_id", "=", id).where("oid", "in", batch.map((h) => Buffer.from(h, "hex"))).execute();
902
1423
  for (const r of rows) presentBlobs.add(r.oid.toString("hex"));
903
1424
  }
@@ -907,48 +1428,551 @@ async function reachableClosure(db, id, roots, omitBlobs) {
907
1428
  present
908
1429
  };
909
1430
  }
1431
+ /** Does `want`'s commit/tag ancestry (edge kinds 2,5) reach any oid in `common`?
1432
+ * The ancestry-only CTE that underpins `readyToGiveUp`. */
1433
+ async function ancestryReachesCommon(db, id, want, commonBufs) {
1434
+ if (commonBufs.length === 0) return false;
1435
+ const commons = sql.join(commonBufs.map((b) => sql`(${b}::bytea)`));
1436
+ return (await sql`
1437
+ with recursive anc(oid) as (
1438
+ select ${Buffer.from(want, "hex")}::bytea
1439
+ union
1440
+ select e.child from git_edge e
1441
+ join anc a on e.parent = a.oid
1442
+ where e.repo_id = ${id}::bigint
1443
+ and e.kind in (${EDGE_KIND.COMMIT_PARENT}, ${EDGE_KIND.TAG_TARGET})
1444
+ )
1445
+ select exists (
1446
+ select 1 from anc join (values ${commons}) as c(oid) on c.oid = anc.oid
1447
+ ) as reached
1448
+ `.execute(db)).rows[0]?.reached ?? false;
1449
+ }
910
1450
 
911
1451
  //#endregion
912
- //#region src/store/repo-resolver.ts
1452
+ //#region src/store/object-store.ts
1453
+ /** Objects fetched per round-trip when streaming content into a served pack. */
1454
+ const PACK_BATCH = 1e3;
913
1455
  /**
914
- * Resolves a wire repo name to its `repos.id` surrogate, memoized. The object and
915
- * ref stores both key on the bigint `repo_id`, so each builds one of these as its
916
- * name→id boundary.
1456
+ * A stored object at/over this size is read in size-bounded chunks, never in one
1457
+ * round-trip. The porsager driver decodes a `bytea` RESULT from its text form
1458
+ * (`\x` + hex, DOUBLE the byte length), so a value over ~256MiB would build a JS
1459
+ * string past V8's max length and throw on the SERVE path — the read-side mirror of
1460
+ * the ingest string-cap that binary COPY fixed (a07/blb01). Kept well under the cap
1461
+ * so the doubled hex of a single chunk stays safely below it.
1462
+ */
1463
+ const BIG_OBJECT_BYTES = 2e8;
1464
+ const READ_CHUNK_BYTES = 1e8;
1465
+ /** Split `items` into consecutive batches of at most `size`. */
1466
+ function batches(items, size) {
1467
+ const out = [];
1468
+ for (let i = 0; i < items.length; i += size) out.push(items.slice(i, i + size));
1469
+ return out;
1470
+ }
1471
+ const TYPE_TO_CODE = {
1472
+ blob: PACK_OBJ_TYPE.BLOB,
1473
+ commit: PACK_OBJ_TYPE.COMMIT,
1474
+ tag: PACK_OBJ_TYPE.TAG,
1475
+ tree: PACK_OBJ_TYPE.TREE
1476
+ };
1477
+ const CODE_TO_TYPE = /* @__PURE__ */ new Map([
1478
+ [PACK_OBJ_TYPE.BLOB, "blob"],
1479
+ [PACK_OBJ_TYPE.COMMIT, "commit"],
1480
+ [PACK_OBJ_TYPE.TAG, "tag"],
1481
+ [PACK_OBJ_TYPE.TREE, "tree"]
1482
+ ]);
1483
+ function typeFromCode(code) {
1484
+ const type = CODE_TO_TYPE.get(code);
1485
+ if (!type) throw new Error(`object-store: unknown git object type code ${code}`);
1486
+ return type;
1487
+ }
1488
+ /**
1489
+ * Postgres-backed git object store. Each immutable object is one row in the
1490
+ * per-repo, HASH-partitioned `git_object` (raw 20-byte `bytea` OID, pack type
1491
+ * code, raw inflated body lz4-TOASTed Postgres-side) — packs are a transport
1492
+ * encoding produced on serve and consumed on ingest, never stored. So a fetch is
1493
+ * a primary-key point-read, not a whole-pack re-inflate.
917
1494
  *
918
- * The mapping is immutable once a repo exists (ids are `generated always`, names
919
- * are unique), so a found id is cached for the resolver's lifetime keeping the
920
- * per-object hot path (getObject) at one point-read, not a join. Misses are NEVER
921
- * cached: a name the lookup didn't find may be created by a later push, and a
922
- * cached `null` would mask it.
1495
+ * The store is the wire→DB boundary: callers speak hex OIDs and the wire repo
1496
+ * name; OIDs are coerced hex↔raw here, and the repo name is resolved to its
1497
+ * bigint surrogate (memoized) here.
1498
+ */
1499
+ function createObjectStore(pg) {
1500
+ const db = initKysely(pg);
1501
+ const repos = createRepoResolver(db);
1502
+ const store = {
1503
+ /**
1504
+ * Build the served pack for a fetch: the want-closure minus the have-closure,
1505
+ * re-adding the explicit wants (promisor lazy-fetch roots — a partial clone may
1506
+ * want a blob reachable from a tree it already has, so it must not be
1507
+ * subtracted). The object count is known from the closure before any content is
1508
+ * read; content then streams in keyset batches into the pack encoder, so only
1509
+ * one batch of inflated content is ever held (never the whole repo).
1510
+ */
1511
+ async buildPack(repoId, wants, haves, omitBlobs, includeTag = false) {
1512
+ const id = await repos.resolveRepoId(repoId);
1513
+ if (id === null || wants.length === 0) return writePack([]);
1514
+ const served = await withPhase("closure", async () => {
1515
+ const want = await reachableClosure(db, id, wants, omitBlobs);
1516
+ if (want.missing.size > 0) throw new WantNotFoundError([...want.missing]);
1517
+ const have = haves.length > 0 ? await reachableClosure(db, id, haves, omitBlobs) : {
1518
+ missing: /* @__PURE__ */ new Set(),
1519
+ present: /* @__PURE__ */ new Set()
1520
+ };
1521
+ const set = /* @__PURE__ */ new Set();
1522
+ for (const o of want.present) if (!have.present.has(o)) set.add(o);
1523
+ if (omitBlobs) {
1524
+ for (const w of wants) if (want.present.has(w)) set.add(w);
1525
+ }
1526
+ if (includeTag) await augmentWithTags(id, set);
1527
+ return [...set];
1528
+ });
1529
+ return withPhase("pack-encode", async () => {
1530
+ const hash = createHash("sha1");
1531
+ const parts = [];
1532
+ const push = (chunk) => {
1533
+ hash.update(chunk);
1534
+ parts.push(chunk);
1535
+ };
1536
+ push(packHeader(served.length));
1537
+ for (const batch of batches(served, PACK_BATCH)) {
1538
+ const rows = await db.selectFrom("git_object").select(["oid", "type"]).select(sql`octet_length(content)`.as("size")).select(sql`case when octet_length(content) < ${BIG_OBJECT_BYTES} then content end`.as("content")).where("repo_id", "=", id).where("oid", "in", batch.map((h) => Buffer.from(h, "hex"))).execute();
1539
+ for (const r of rows) {
1540
+ const content = r.content ?? await readContentChunked(id, r.oid, r.size);
1541
+ push(packObject(typeFromCode(r.type), content));
1542
+ }
1543
+ }
1544
+ const pack = Buffer.concat([...parts, hash.digest()]);
1545
+ count("objectsServed", served.length);
1546
+ count("packBytes", pack.length);
1547
+ return pack;
1548
+ });
1549
+ },
1550
+ /** The subset of `haves` this repo actually has — the negotiation common set,
1551
+ * in one indexed lookup rather than a per-have probe. */
1552
+ async commonHaves(repoId, haves) {
1553
+ if (haves.length === 0) return [];
1554
+ const id = await repos.resolveRepoId(repoId);
1555
+ if (id === null) return [];
1556
+ const rows = await db.selectFrom("git_object").select("oid").where("repo_id", "=", id).where("oid", "in", haves.map((h) => Buffer.from(h, "hex"))).execute();
1557
+ const present = new Set(rows.map((r) => r.oid.toString("hex")));
1558
+ return haves.filter((h) => present.has(h));
1559
+ },
1560
+ async getObject(repoId, oid) {
1561
+ count("getObjectCalls");
1562
+ const id = await repos.resolveRepoId(repoId);
1563
+ if (id === null) return null;
1564
+ const row = await db.selectFrom("git_object").select(["type"]).select(sql`octet_length(content)`.as("size")).select(sql`case when octet_length(content) < ${BIG_OBJECT_BYTES} then content end`.as("content")).where("repo_id", "=", id).where("oid", "=", Buffer.from(oid, "hex")).executeTakeFirst();
1565
+ if (!row) return null;
1566
+ const content = row.content ?? await readContentChunked(id, Buffer.from(oid, "hex"), row.size);
1567
+ count("objectBytesRead", content.length);
1568
+ return {
1569
+ content,
1570
+ type: typeFromCode(row.type)
1571
+ };
1572
+ },
1573
+ async hasObject(repoId, oid) {
1574
+ const id = await repos.resolveRepoId(repoId);
1575
+ if (id === null) return false;
1576
+ return await db.selectFrom("git_object").select("oid").where("repo_id", "=", id).where("oid", "=", Buffer.from(oid, "hex")).executeTakeFirst() !== void 0;
1577
+ },
1578
+ /**
1579
+ * Ingest a received pack: parse it — resolving in-pack deltas, and thin-pack
1580
+ * REF_DELTA bases against objects already in this repo — then insert every
1581
+ * resolved object as a row.
1582
+ */
1583
+ async ingestPack(repoId, packBytes) {
1584
+ return { oids: await insertObjects(await repos.ensureRepoId(repoId), (await readPack(packBytes, (oid) => store.getObject(repoId, oid))).map((p) => ({
1585
+ content: p.content,
1586
+ type: p.type
1587
+ }))) };
1588
+ },
1589
+ /**
1590
+ * Connectivity check (spec §5.2): is every object reachable from `oid` present?
1591
+ * A push whose new tip fails this references an object the pack neither carried
1592
+ * nor delta-resolved, and must be rejected. Delegates to the one reachability
1593
+ * engine (`reachableClosure`) shared with clone/fetch, so connectivity and
1594
+ * serving can never disagree on what is reachable. Full-closure (matching the
1595
+ * old walk's scope); the bounded "new objects only" form is a deferred
1596
+ * optimization (OQ-14).
1597
+ */
1598
+ async isConnected(repoId, oid) {
1599
+ const id = await repos.resolveRepoId(repoId);
1600
+ if (id === null) return false;
1601
+ const { missing } = await reachableClosure(db, id, [oid], false);
1602
+ return missing.size === 0;
1603
+ },
1604
+ /** Seed objects directly (the differential harness + perf bench path): insert
1605
+ * each as a row, idempotently. Equivalent to `ingestPack` minus the pack codec. */
1606
+ async putPack(repoId, objects) {
1607
+ return { oids: await insertObjects(await repos.ensureRepoId(repoId), objects) };
1608
+ },
1609
+ /**
1610
+ * git's `ok_to_give_up`: ready once every want reaches a common have by commit/
1611
+ * tag ancestry (the haves form a cut below all wants, so the delta is well-
1612
+ * defined). One ancestry CTE (edge kinds 2,5) per want replaces `reachesCommon`'s
1613
+ * per-object BFS. Generation-number pruning is a deferred §6.4 lever.
1614
+ */
1615
+ async readyToGiveUp(repoId, wants, common) {
1616
+ if (common.length === 0) return false;
1617
+ const id = await repos.resolveRepoId(repoId);
1618
+ if (id === null) return false;
1619
+ const commonBufs = common.map((h) => Buffer.from(h, "hex"));
1620
+ for (const want of wants) if (!await ancestryReachesCommon(db, id, want, commonBufs)) return false;
1621
+ return true;
1622
+ }
1623
+ };
1624
+ /** Insert objects as rows + their derived edges, idempotent (re-sent objects are
1625
+ * skipped). Each object row and its complete edge set go in ONE transaction from
1626
+ * ONE derivation (§10.1) — so no object ever exists without its edges. Edge
1627
+ * derivation validates at the boundary and throws on malformed content (§5.1),
1628
+ * aborting the ingest before any row lands. Returns every object's hex OID, in
1629
+ * input order. */
1630
+ async function insertObjects(id, objects) {
1631
+ const entries = objects.map((obj) => {
1632
+ validateObject(obj.type, obj.content);
1633
+ const hex = computeOid(obj.type, obj.content);
1634
+ const oid = Buffer.from(hex, "hex");
1635
+ return {
1636
+ edges: deriveEdges(obj.type, obj.content).map((e) => ({
1637
+ child: Buffer.from(e.child, "hex"),
1638
+ kind: e.kind,
1639
+ parent: oid,
1640
+ repo_id: id
1641
+ })),
1642
+ hex,
1643
+ row: {
1644
+ content: obj.content,
1645
+ oid,
1646
+ repo_id: id,
1647
+ size: obj.content.length,
1648
+ type: TYPE_TO_CODE[obj.type]
1649
+ }
1650
+ };
1651
+ });
1652
+ if (entries.length === 0) return [];
1653
+ const objectRows = entries.map((e) => [
1654
+ {
1655
+ t: "int8",
1656
+ v: e.row.repo_id
1657
+ },
1658
+ {
1659
+ t: "bytea",
1660
+ v: e.row.oid
1661
+ },
1662
+ {
1663
+ t: "int2",
1664
+ v: e.row.type
1665
+ },
1666
+ {
1667
+ t: "int4",
1668
+ v: e.row.size
1669
+ },
1670
+ {
1671
+ t: "bytea",
1672
+ v: e.row.content
1673
+ }
1674
+ ]);
1675
+ const edgeRows = entries.flatMap((e) => e.edges.map((edge) => [
1676
+ {
1677
+ t: "int8",
1678
+ v: edge.repo_id
1679
+ },
1680
+ {
1681
+ t: "bytea",
1682
+ v: edge.parent
1683
+ },
1684
+ {
1685
+ t: "bytea",
1686
+ v: edge.child
1687
+ },
1688
+ {
1689
+ t: "int2",
1690
+ v: edge.kind
1691
+ }
1692
+ ]));
1693
+ await pg.begin(async (tx) => {
1694
+ await copyInsert(tx, "git_object", [
1695
+ "repo_id",
1696
+ "oid",
1697
+ "type",
1698
+ "size",
1699
+ "content"
1700
+ ], objectRows);
1701
+ await copyInsert(tx, "git_edge", [
1702
+ "repo_id",
1703
+ "parent",
1704
+ "child",
1705
+ "kind"
1706
+ ], edgeRows);
1707
+ });
1708
+ await pg`update repos set last_pushed_at = clock_timestamp() where id = ${id}::bigint`;
1709
+ return entries.map((e) => e.hex);
1710
+ }
1711
+ /**
1712
+ * Read a single object's `content` in size-bounded chunks via `substring`, so a
1713
+ * blob larger than V8's max string length never reaches the porsager driver as one
1714
+ * over-cap `\x`+hex string — the serve-side mirror of the binary COPY ingest. Used
1715
+ * only for objects at/over BIG_OBJECT_BYTES (smaller content comes back inline).
1716
+ */
1717
+ async function readContentChunked(id, oid, size) {
1718
+ const parts = [];
1719
+ for (let off = 0; off < size; off += READ_CHUNK_BYTES) {
1720
+ const len = Math.min(READ_CHUNK_BYTES, size - off);
1721
+ const row = await db.selectFrom("git_object").select(sql`substring(content from ${off + 1} for ${len})`.as("chunk")).where("repo_id", "=", id).where("oid", "=", oid).executeTakeFirstOrThrow();
1722
+ parts.push(row.chunk);
1723
+ }
1724
+ return Buffer.concat(parts);
1725
+ }
1726
+ /**
1727
+ * include-tag augmentation (§6.5): annotated tags whose peeled target is in the
1728
+ * served set get their tag OBJECTS added — transitively over `kind=5`, so a
1729
+ * tag-of-tag chain ships every tag object in it (each must be present for the
1730
+ * client's fsck). Annotated tags are few, so we fetch them all and filter by
1731
+ * served membership app-side rather than feeding the whole served set into SQL.
1732
+ * Mutates `served`. Peeled targets are already in `served` (they qualified the
1733
+ * tag), so re-adding the chain's terminal commit is a no-op.
1734
+ */
1735
+ async function augmentWithTags(id, served) {
1736
+ const qualifying = (await db.selectFrom("git_ref").select(["oid", "peeled_oid"]).where("repo_id", "=", id).where("oid", "is not", null).where("peeled_oid", "is not", null).execute()).filter((r) => r.peeled_oid !== null && served.has(r.peeled_oid.toString("hex"))).map((r) => r.oid.toString("hex"));
1737
+ if (qualifying.length === 0) return;
1738
+ const chain = await sql`
1739
+ with recursive tags(oid) as (
1740
+ select oid from (values ${sql.join(qualifying.map((r) => sql`(${Buffer.from(r, "hex")}::bytea)`))}) as roots(oid)
1741
+ union
1742
+ select e.child from git_edge e
1743
+ join tags t on e.parent = t.oid
1744
+ where e.repo_id = ${id}::bigint and e.kind = ${EDGE_KIND.TAG_TARGET}
1745
+ )
1746
+ select oid from tags
1747
+ `.execute(db);
1748
+ for (const r of chain.rows) served.add(r.oid.toString("hex"));
1749
+ }
1750
+ return store;
1751
+ }
1752
+
1753
+ //#endregion
1754
+ //#region src/store/refs-store.ts
1755
+ const isZero = (oid) => /^0{40}$/.test(oid);
1756
+ const toOid = (hex) => Buffer.from(hex, "hex");
1757
+ function classifyRefUpdate(cmd) {
1758
+ const create = isZero(cmd.oldOid);
1759
+ if (isZero(cmd.newOid)) return {
1760
+ kind: "delete",
1761
+ oldOid: create ? null : toOid(cmd.oldOid)
1762
+ };
1763
+ if (create) return {
1764
+ kind: "create",
1765
+ newOid: toOid(cmd.newOid)
1766
+ };
1767
+ return {
1768
+ kind: "update",
1769
+ newOid: toOid(cmd.newOid),
1770
+ oldOid: toOid(cmd.oldOid)
1771
+ };
1772
+ }
1773
+ /**
1774
+ * The peeled target of a ref oid: if it is an annotated tag, follow the `kind=5`
1775
+ * (tag→target) chain — while the current node is a tag — to its terminal non-tag
1776
+ * object. A branch or a lightweight tag (the oid is not a tag object) peels to
1777
+ * `null` → `ls-refs` emits no `peeled` line. git imposes NO depth bound on ref
1778
+ * peeling, so neither do we: the `is_tag` predicate terminates the recursion at the
1779
+ * first non-tag, and a content-addressed tag chain is acyclic (an oid cannot embed
1780
+ * its own hash) hence finite. Computed at ref-write, so the tag's edges + target
1781
+ * are already present (connectivity proved the chain on push). Replaces the
1782
+ * per-`ls-refs` app-side tag walk.
1783
+ */
1784
+ async function peelRef(exec, repoId, oid) {
1785
+ return (await sql`
1786
+ with recursive chain(oid, is_tag, depth) as (
1787
+ select o.oid, o.type = ${PACK_OBJ_TYPE.TAG}, 0
1788
+ from git_object o
1789
+ where o.repo_id = ${repoId}::bigint and o.oid = ${oid}::bytea
1790
+ union all
1791
+ select e.child, co.type = ${PACK_OBJ_TYPE.TAG}, c.depth + 1
1792
+ from chain c
1793
+ join git_edge e
1794
+ on e.repo_id = ${repoId}::bigint
1795
+ and e.parent = c.oid
1796
+ and e.kind = ${EDGE_KIND.TAG_TARGET}
1797
+ left join git_object co
1798
+ on co.repo_id = ${repoId}::bigint and co.oid = e.child
1799
+ where c.is_tag
1800
+ )
1801
+ select oid as peeled from chain where not is_tag and depth > 0
1802
+ order by depth desc limit 1
1803
+ `.execute(exec)).rows[0]?.peeled ?? null;
1804
+ }
1805
+ /** Sentinel thrown inside a transaction to roll an atomic batch all the way back. */
1806
+ var AtomicAbort = class extends Error {};
1807
+ /**
1808
+ * A repo is born with `HEAD → refs/heads/main`, mirroring `git init --bare`
1809
+ * (init.defaultBranch). Established lazily on the first ref write — a repo's birth
1810
+ * is its first push — and never overwritten (do-nothing on conflict). So once the
1811
+ * default branch exists `ls-refs` advertises HEAD and a clone checks it out;
1812
+ * before then HEAD dangles unadvertised, exactly like a bare repo whose HEAD
1813
+ * points at an unborn `main`.
1814
+ */
1815
+ const DEFAULT_HEAD_TARGET = "refs/heads/main";
1816
+ async function ensureHeadDefault(exec, repoId) {
1817
+ await exec.insertInto("git_ref").values({
1818
+ name: "HEAD",
1819
+ repo_id: repoId,
1820
+ symref_target: DEFAULT_HEAD_TARGET
1821
+ }).onConflict((oc) => oc.columns(["repo_id", "name"]).doNothing()).execute();
1822
+ }
1823
+ /**
1824
+ * Stamp the repo's GC-activity watermark (`repos.last_pushed_at`) — a ref change
1825
+ * makes the prior tip a reclaim candidate, so the self-scheduling drain must judge
1826
+ * the repo eligible (gc-scheduler.ts §2). Called only when a ref ROW actually
1827
+ * changed (not on a no-op success like deleting an absent ref), so non-mutating
1828
+ * traffic never re-triggers GC. A tiny single-row HOT update on the churn-tuned
1829
+ * `repos` (0004); `clock_timestamp()` is the server-side wall clock.
1830
+ */
1831
+ async function stampPushed(exec, repoId) {
1832
+ await exec.updateTable("repos").set({ last_pushed_at: sql`clock_timestamp()` }).where("id", "=", repoId).execute();
1833
+ }
1834
+ /**
1835
+ * Apply one ref change by compare-and-swap against the client's advertised old
1836
+ * oid, on the given executor (the db, or a transaction for an atomic batch).
1837
+ * Returns the report-status `ok` and whether a row actually changed (`mutated`).
1838
+ * Non-ff is accepted by default — CAS guards concurrency, not ancestry (spec §3.6).
1839
+ */
1840
+ async function casRefUpdate(exec, repoId, cmd) {
1841
+ const name = cmd.ref;
1842
+ const op = classifyRefUpdate(cmd);
1843
+ switch (op.kind) {
1844
+ case "create": {
1845
+ const peeled = await peelRef(exec, repoId, op.newOid);
1846
+ const mutated = (await exec.insertInto("git_ref").values({
1847
+ name,
1848
+ oid: op.newOid,
1849
+ peeled_oid: peeled,
1850
+ repo_id: repoId
1851
+ }).onConflict((oc) => oc.doNothing()).returningAll().execute()).length === 1;
1852
+ return {
1853
+ mutated,
1854
+ ok: mutated
1855
+ };
1856
+ }
1857
+ case "delete": {
1858
+ let q = exec.deleteFrom("git_ref").where("repo_id", "=", repoId).where("name", "=", name);
1859
+ if (op.oldOid !== null) q = q.where("oid", "=", op.oldOid);
1860
+ const mutated = (await q.returningAll().execute()).length === 1;
1861
+ return {
1862
+ mutated,
1863
+ ok: op.oldOid === null || mutated
1864
+ };
1865
+ }
1866
+ case "update": {
1867
+ const peeled = await peelRef(exec, repoId, op.newOid);
1868
+ const mutated = (await exec.updateTable("git_ref").set({
1869
+ oid: op.newOid,
1870
+ peeled_oid: peeled,
1871
+ symref_target: null
1872
+ }).where("repo_id", "=", repoId).where("name", "=", name).where("oid", "=", op.oldOid).returningAll().execute()).length === 1;
1873
+ return {
1874
+ mutated,
1875
+ ok: mutated
1876
+ };
1877
+ }
1878
+ }
1879
+ }
1880
+ /**
1881
+ * Postgres-backed git refs: direct refs (name → oid) and symbolic refs
1882
+ * (HEAD → refs/heads/...). Push applies ref changes through `applyRefUpdates`;
1883
+ * `setRef`/`setSymref` are the seeding helpers.
923
1884
  *
924
- * Reads resolve (lookup; `null` the repo has never been written, i.e. empty).
925
- * Writes ensure (race-safe get-or-create).
1885
+ * Like the object store, this is the wire→DB boundary: the repo name resolves to
1886
+ * its bigint surrogate (memoized) here, ref names cast to their branded column
1887
+ * type, and oids coerce hex↔raw `bytea`.
926
1888
  */
927
- function createRepoResolver(db) {
928
- const cache = /* @__PURE__ */ new Map();
1889
+ function createRefStore(pg) {
1890
+ const db = initKysely(pg);
1891
+ const repos = createRepoResolver(db);
929
1892
  return {
930
- /** The repo's id, creating the row if absent. Race-safe under concurrent
931
- * first-pushes, and avoids a no-op UPDATE on the common (exists) path. */
932
- async ensureRepoId(name) {
933
- const cached = cache.get(name);
934
- if (cached !== void 0) return cached;
935
- const existing = await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirst();
936
- if (existing) {
937
- cache.set(name, existing.id);
938
- return existing.id;
1893
+ /**
1894
+ * Apply a batch of ref CAS updates. Non-atomic (the default push mode): each
1895
+ * ref is independent and the returned flags are per-command. Atomic
1896
+ * (`--atomic`): all-or-nothing in one transaction — if any CAS fails, every
1897
+ * command is rolled back and the result is all-false (spec §3.6).
1898
+ */
1899
+ async applyRefUpdates(repoId, commands, atomic) {
1900
+ const id = await repos.ensureRepoId(repoId);
1901
+ await ensureHeadDefault(db, id);
1902
+ if (!atomic) {
1903
+ const results = [];
1904
+ let mutated = false;
1905
+ for (const cmd of commands) {
1906
+ const r = await casRefUpdate(db, id, cmd);
1907
+ results.push(r.ok);
1908
+ if (r.mutated) mutated = true;
1909
+ }
1910
+ if (mutated) await stampPushed(db, id);
1911
+ return results;
939
1912
  }
940
- const id = (await db.insertInto("repos").values({ name }).onConflict((oc) => oc.doNothing()).returning("id").executeTakeFirst())?.id ?? (await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirstOrThrow()).id;
941
- cache.set(name, id);
942
- return id;
1913
+ const ordered = [...commands].sort((a, b) => a.ref < b.ref ? -1 : a.ref > b.ref ? 1 : 0);
1914
+ let anyMutated = false;
1915
+ try {
1916
+ await db.transaction().execute(async (trx) => {
1917
+ for (const cmd of ordered) {
1918
+ const r = await casRefUpdate(trx, id, cmd);
1919
+ if (!r.ok) throw new AtomicAbort();
1920
+ if (r.mutated) anyMutated = true;
1921
+ }
1922
+ });
1923
+ } catch (error) {
1924
+ if (error instanceof AtomicAbort) return commands.map(() => false);
1925
+ throw error;
1926
+ }
1927
+ if (anyMutated) await stampPushed(db, id);
1928
+ return commands.map(() => true);
943
1929
  },
944
- /** The repo's id, or `null` if it has never been written to. */
945
- async resolveRepoId(name) {
946
- const cached = cache.get(name);
947
- if (cached !== void 0) return cached;
948
- const row = await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirst();
949
- if (!row) return null;
950
- cache.set(name, row.id);
951
- return row.id;
1930
+ async getSymref(repoId, name) {
1931
+ const id = await repos.resolveRepoId(repoId);
1932
+ if (id === null) return null;
1933
+ return (await db.selectFrom("git_ref").select("symref_target").where("repo_id", "=", id).where("name", "=", name).executeTakeFirst())?.symref_target ?? null;
1934
+ },
1935
+ /** Direct refs (name → oid + peeled tag target), sorted by name. Excludes
1936
+ * symbolic refs. */
1937
+ async listRefs(repoId) {
1938
+ const id = await repos.resolveRepoId(repoId);
1939
+ if (id === null) return [];
1940
+ return (await db.selectFrom("git_ref").select([
1941
+ "name",
1942
+ "oid",
1943
+ "peeled_oid"
1944
+ ]).where("repo_id", "=", id).where("oid", "is not", null).orderBy("name").execute()).map((r) => ({
1945
+ name: r.name,
1946
+ oid: r.oid.toString("hex"),
1947
+ peeled: r.peeled_oid ? r.peeled_oid.toString("hex") : void 0
1948
+ }));
1949
+ },
1950
+ async setRef(repoId, name, oid) {
1951
+ const id = await repos.ensureRepoId(repoId);
1952
+ const value = toOid(oid);
1953
+ const peeled = await peelRef(db, id, value);
1954
+ await db.insertInto("git_ref").values({
1955
+ name,
1956
+ oid: value,
1957
+ peeled_oid: peeled,
1958
+ repo_id: id
1959
+ }).onConflict((oc) => oc.columns(["repo_id", "name"]).doUpdateSet({
1960
+ oid: value,
1961
+ peeled_oid: peeled,
1962
+ symref_target: null
1963
+ })).execute();
1964
+ },
1965
+ async setSymref(repoId, name, target) {
1966
+ const id = await repos.ensureRepoId(repoId);
1967
+ await db.insertInto("git_ref").values({
1968
+ name,
1969
+ repo_id: id,
1970
+ symref_target: target
1971
+ }).onConflict((oc) => oc.columns(["repo_id", "name"]).doUpdateSet({
1972
+ oid: null,
1973
+ peeled_oid: null,
1974
+ symref_target: target
1975
+ })).execute();
952
1976
  }
953
1977
  };
954
1978
  }
@@ -1268,6 +2292,21 @@ async function readRequestBody(c) {
1268
2292
  function assertProtocolV2(header) {
1269
2293
  if (!(header ?? "").split(":").map((s) => s.trim()).includes("version=2")) throw new GitProtocolError("pggit serves fetch over git protocol v2 only; set protocol.version=2 (git ≥ 2.26 negotiates it by default)");
1270
2294
  }
2295
+ const GIT_PATH_SUFFIXES = [
2296
+ ["/info/refs", "info/refs"],
2297
+ ["/git-upload-pack", "git-upload-pack"],
2298
+ ["/git-receive-pack", "git-receive-pack"]
2299
+ ];
2300
+ function parseGitPath(path) {
2301
+ for (const [suffix, service] of GIT_PATH_SUFFIXES) if (path.endsWith(suffix)) {
2302
+ const repoId = path.slice(1, -suffix.length);
2303
+ return repoId.length > 0 ? {
2304
+ repoId,
2305
+ service
2306
+ } : null;
2307
+ }
2308
+ return null;
2309
+ }
1271
2310
  function backendFor(deps, repoId) {
1272
2311
  return {
1273
2312
  buildPack: (wants, haves, omitBlobs, includeTag) => deps.objects.buildPack(repoId, wants, haves, omitBlobs, includeTag),
@@ -1305,7 +2344,8 @@ async function receivePackAdvertBody(deps, repoId) {
1305
2344
  }
1306
2345
  /**
1307
2346
  * Build the git-remote Hono app (smart-HTTP, protocol v2 fetch). Mountable into
1308
- * a host app via `host.route("/git", createGitApp(deps))`; the host owns the
2347
+ * a host app via `host.mount("/git", createGitApp(deps).fetch)` (mount, NOT route
2348
+ * mount strips the prefix so the catch-all parses a mount-relative path); the host owns the
1309
2349
  * Postgres lifecycle behind `deps`.
1310
2350
  */
1311
2351
  function createGitApp(deps, opts = {}) {
@@ -1321,7 +2361,9 @@ function createGitApp(deps, opts = {}) {
1321
2361
  return c.text("internal server error", 500);
1322
2362
  });
1323
2363
  app.get("/health", (c) => c.text("ok"));
1324
- app.get("/:repo/info/refs", async (c) => {
2364
+ app.get("/*", async (c) => {
2365
+ const parsed = parseGitPath(c.req.path);
2366
+ if (parsed?.service !== "info/refs") return c.notFound();
1325
2367
  const service = c.req.query("service");
1326
2368
  if (service === "git-upload-pack") {
1327
2369
  assertProtocolV2(c.req.header("git-protocol"));
@@ -1331,7 +2373,7 @@ function createGitApp(deps, opts = {}) {
1331
2373
  });
1332
2374
  }
1333
2375
  if (service === "git-receive-pack") {
1334
- const body = await receivePackAdvertBody(deps, c.req.param("repo"));
2376
+ const body = await receivePackAdvertBody(deps, parsed.repoId);
1335
2377
  return c.body(toArrayBuffer(body), 200, {
1336
2378
  "Cache-Control": "no-cache",
1337
2379
  "Content-Type": "application/x-git-receive-pack-advertisement"
@@ -1339,24 +2381,46 @@ function createGitApp(deps, opts = {}) {
1339
2381
  }
1340
2382
  return c.text(`unsupported service ${JSON.stringify(service)}`, 403);
1341
2383
  });
1342
- app.post("/:repo/git-upload-pack", async (c) => {
1343
- const out = await handleUploadPack(await readRequestBody(c), backendFor(deps, c.req.param("repo")));
1344
- count("wireBytes", out.length);
1345
- return c.body(toArrayBuffer(out), 200, {
1346
- "Cache-Control": "no-cache",
1347
- "Content-Type": "application/x-git-upload-pack-result"
1348
- });
1349
- });
1350
- app.post("/:repo/git-receive-pack", async (c) => {
1351
- const out = await handleReceivePack(await readRequestBody(c), receiveBackendFor(deps, c.req.param("repo")));
1352
- return c.body(toArrayBuffer(out), 200, {
1353
- "Cache-Control": "no-cache",
1354
- "Content-Type": "application/x-git-receive-pack-result"
1355
- });
2384
+ app.post("/*", async (c) => {
2385
+ const parsed = parseGitPath(c.req.path);
2386
+ if (!parsed) return c.notFound();
2387
+ if (parsed.service === "git-upload-pack") {
2388
+ const out = await handleUploadPack(await readRequestBody(c), backendFor(deps, parsed.repoId));
2389
+ count("wireBytes", out.length);
2390
+ return c.body(toArrayBuffer(out), 200, {
2391
+ "Cache-Control": "no-cache",
2392
+ "Content-Type": "application/x-git-upload-pack-result"
2393
+ });
2394
+ }
2395
+ if (parsed.service === "git-receive-pack") {
2396
+ const out = await handleReceivePack(await readRequestBody(c), receiveBackendFor(deps, parsed.repoId));
2397
+ return c.body(toArrayBuffer(out), 200, {
2398
+ "Cache-Control": "no-cache",
2399
+ "Content-Type": "application/x-git-receive-pack-result"
2400
+ });
2401
+ }
2402
+ return c.notFound();
1356
2403
  });
1357
2404
  return app;
1358
2405
  }
2406
+ /**
2407
+ * Build the git-app deps from a Postgres connection — the embed-into-a-host
2408
+ * factory. The host owns the `pg` lifecycle and mounts pggit with
2409
+ * `host.mount("/git", createGitApp(createGitDeps(pg)).fetch)` — `app.mount` strips
2410
+ * the mount prefix so the catch-all parse sees a mount-relative path (`app.route`
2411
+ * would not). `startServer` (server.ts) is the standalone equivalent of this same
2412
+ * composition. `snapshots` is always
2413
+ * included so a mounted host gets the queryable `repo_file` projection maintained
2414
+ * on push (the read surface).
2415
+ */
2416
+ function createGitDeps(pg) {
2417
+ return {
2418
+ objects: createObjectStore(pg),
2419
+ refs: createRefStore(pg),
2420
+ snapshots: createRepoFileProjection(pg)
2421
+ };
2422
+ }
1359
2423
 
1360
2424
  //#endregion
1361
- export { createGc, createGcScheduler, createGitApp };
2425
+ export { createGc, createGcScheduler, createGitApp, createGitDeps };
1362
2426
  //# sourceMappingURL=index.mjs.map