@usecontextlayer/pggit 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +14 -2
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +1119 -55
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
import { gunzipSync } from "node:zlib";
|
|
1
|
+
import { createInflate, deflateSync, gunzipSync } from "node:zlib";
|
|
2
2
|
import { Hono } from "hono";
|
|
3
3
|
import { cors } from "hono/cors";
|
|
4
4
|
import { AsyncLocalStorage } from "node:async_hooks";
|
|
5
5
|
import { performance } from "node:perf_hooks";
|
|
6
|
+
import { createHash } from "node:crypto";
|
|
6
7
|
import { Kysely, sql } from "kysely";
|
|
7
8
|
import { PostgresJSDialect } from "kysely-postgres-js";
|
|
8
9
|
|
|
@@ -592,6 +593,14 @@ var GitFormatError = class extends Error {
|
|
|
592
593
|
|
|
593
594
|
//#endregion
|
|
594
595
|
//#region src/object/object.ts
|
|
596
|
+
/**
|
|
597
|
+
* The git object ID: SHA-1 of the loose-object representation
|
|
598
|
+
* `"<type> <byteLength>\0" + content`. Returns the 40-char lowercase hex digest.
|
|
599
|
+
*/
|
|
600
|
+
function computeOid(type, content) {
|
|
601
|
+
const header = Buffer.from(`${type} ${content.length}\0`, "latin1");
|
|
602
|
+
return createHash("sha1").update(header).update(content).digest("hex");
|
|
603
|
+
}
|
|
595
604
|
/** OIDs in the leading `key <oid>` headers (up to the blank line) for given keys. */
|
|
596
605
|
function headerOids(content, keys) {
|
|
597
606
|
const oids = [];
|
|
@@ -626,12 +635,33 @@ function treeEntries(content) {
|
|
|
626
635
|
function isTreeEntryMode(mode) {
|
|
627
636
|
return mode === "40000";
|
|
628
637
|
}
|
|
638
|
+
/** OIDs of a tree's entries (all kinds), in tree order. */
|
|
639
|
+
function treeEntryOids(content) {
|
|
640
|
+
return treeEntries(content).map((e) => e.oid);
|
|
641
|
+
}
|
|
642
|
+
/** A commit's parent OIDs only (ancestry walk; excludes its tree). */
|
|
643
|
+
function commitParents(content) {
|
|
644
|
+
return headerOids(content, /* @__PURE__ */ new Set(["parent"]));
|
|
645
|
+
}
|
|
629
646
|
/** A commit's root tree OID. Every commit has exactly one `tree` header. */
|
|
630
647
|
function commitTreeOid(content) {
|
|
631
648
|
const [tree] = headerOids(content, /* @__PURE__ */ new Set(["tree"]));
|
|
632
649
|
if (!tree) throw new GitFormatError("missing-tree-header", "commitTreeOid: commit has no tree header");
|
|
633
650
|
return tree;
|
|
634
651
|
}
|
|
652
|
+
/**
|
|
653
|
+
* The OIDs an object directly references: a commit → its tree + parents, a tree
|
|
654
|
+
* → its entries, a tag → its target, a blob → nothing. The basis of reachability
|
|
655
|
+
* enumeration (fetch, connectivity).
|
|
656
|
+
*/
|
|
657
|
+
function referencedOids(type, content) {
|
|
658
|
+
switch (type) {
|
|
659
|
+
case "blob": return [];
|
|
660
|
+
case "commit": return headerOids(content, /* @__PURE__ */ new Set(["tree", "parent"]));
|
|
661
|
+
case "tag": return headerOids(content, /* @__PURE__ */ new Set(["object"]));
|
|
662
|
+
case "tree": return treeEntryOids(content);
|
|
663
|
+
}
|
|
664
|
+
}
|
|
635
665
|
|
|
636
666
|
//#endregion
|
|
637
667
|
//#region src/repo-view/build-file-list.ts
|
|
@@ -796,10 +826,217 @@ async function copyInsert(tx, target, columns, rows) {
|
|
|
796
826
|
await tx.unsafe(`insert into ${target} (${cols}) select ${cols} from ${staging} on conflict do nothing`);
|
|
797
827
|
}
|
|
798
828
|
|
|
829
|
+
//#endregion
|
|
830
|
+
//#region src/store/repo-resolver.ts
|
|
831
|
+
/**
|
|
832
|
+
* Resolves a wire repo name to its `repos.id` surrogate, memoized. The object and
|
|
833
|
+
* ref stores both key on the bigint `repo_id`, so each builds one of these as its
|
|
834
|
+
* name→id boundary.
|
|
835
|
+
*
|
|
836
|
+
* The mapping is immutable once a repo exists (ids are `generated always`, names
|
|
837
|
+
* are unique), so a found id is cached for the resolver's lifetime — keeping the
|
|
838
|
+
* per-object hot path (getObject) at one point-read, not a join. Misses are NEVER
|
|
839
|
+
* cached: a name the lookup didn't find may be created by a later push, and a
|
|
840
|
+
* cached `null` would mask it.
|
|
841
|
+
*
|
|
842
|
+
* Reads resolve (lookup; `null` ⇒ the repo has never been written, i.e. empty).
|
|
843
|
+
* Writes ensure (race-safe get-or-create).
|
|
844
|
+
*/
|
|
845
|
+
function createRepoResolver(db) {
|
|
846
|
+
const cache = /* @__PURE__ */ new Map();
|
|
847
|
+
return {
|
|
848
|
+
/** The repo's id, creating the row if absent. Race-safe under concurrent
|
|
849
|
+
* first-pushes, and avoids a no-op UPDATE on the common (exists) path. */
|
|
850
|
+
async ensureRepoId(name) {
|
|
851
|
+
const cached = cache.get(name);
|
|
852
|
+
if (cached !== void 0) return cached;
|
|
853
|
+
const existing = await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirst();
|
|
854
|
+
if (existing) {
|
|
855
|
+
cache.set(name, existing.id);
|
|
856
|
+
return existing.id;
|
|
857
|
+
}
|
|
858
|
+
const id = (await db.insertInto("repos").values({ name }).onConflict((oc) => oc.doNothing()).returning("id").executeTakeFirst())?.id ?? (await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirstOrThrow()).id;
|
|
859
|
+
cache.set(name, id);
|
|
860
|
+
return id;
|
|
861
|
+
},
|
|
862
|
+
/** The repo's id, or `null` if it has never been written to. */
|
|
863
|
+
async resolveRepoId(name) {
|
|
864
|
+
const cached = cache.get(name);
|
|
865
|
+
if (cached !== void 0) return cached;
|
|
866
|
+
const row = await db.selectFrom("repos").select("id").where("name", "=", name).executeTakeFirst();
|
|
867
|
+
if (!row) return null;
|
|
868
|
+
cache.set(name, row.id);
|
|
869
|
+
return row.id;
|
|
870
|
+
}
|
|
871
|
+
};
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
//#endregion
|
|
875
|
+
//#region src/repo-view/repo-file-projection.ts
|
|
876
|
+
/**
|
|
877
|
+
* Write-only maintainer of `repo_file`: the slim per-branch-tip `path → (mode,
|
|
878
|
+
* blob_oid)` index that IS pggit's public read surface. Reads never go through this
|
|
879
|
+
* module — a consumer queries `repo_file ⋈ git_object` (on `oid = blob_oid`) with
|
|
880
|
+
* direct SQL, the one read mechanism (docs/2026-06-26-read-surface-sharpening-design.md).
|
|
881
|
+
* So this only ever rebuilds or drops the projection on push; there is no read method
|
|
882
|
+
* here by design. It is a derived projection of the canonical objects — no duplicate
|
|
883
|
+
* blob bytes, no orphan reaper (the redesign's collapse, §4.5) — droppable and
|
|
884
|
+
* rebuildable at will. The wire repo name resolves to its bigint surrogate (memoized)
|
|
885
|
+
* here, like the other stores.
|
|
886
|
+
*/
|
|
887
|
+
function createRepoFileProjection(pg) {
|
|
888
|
+
const db = initKysely(pg);
|
|
889
|
+
const repos = createRepoResolver(db);
|
|
890
|
+
return {
|
|
891
|
+
/** Drop a repo's entire projection (all branches) — the clean slate for a full
|
|
892
|
+
* rebuild. No blob bytes to reap; the index is the only state. */
|
|
893
|
+
async clearRepo(repoId) {
|
|
894
|
+
const id = await repos.resolveRepoId(repoId);
|
|
895
|
+
if (id === null) return;
|
|
896
|
+
await db.deleteFrom("repo_file").where("repo_id", "=", id).execute();
|
|
897
|
+
},
|
|
898
|
+
/** Drop `refName`'s snapshot (branch deleted). */
|
|
899
|
+
async dropRefSnapshot(repoId, refName) {
|
|
900
|
+
const id = await repos.resolveRepoId(repoId);
|
|
901
|
+
if (id === null) return;
|
|
902
|
+
await db.deleteFrom("repo_file").where("repo_id", "=", id).where("ref_name", "=", refName).execute();
|
|
903
|
+
},
|
|
904
|
+
/** Replace `refName`'s snapshot with `fileList` (one atomic transaction). The
|
|
905
|
+
* blobs already live in git_object — we store only the path→blob_oid index. */
|
|
906
|
+
async rebuildRefSnapshot(repoId, refName, fileList) {
|
|
907
|
+
const id = await repos.ensureRepoId(repoId);
|
|
908
|
+
const rows = fileList.files.map((f) => [
|
|
909
|
+
{
|
|
910
|
+
t: "int8",
|
|
911
|
+
v: id
|
|
912
|
+
},
|
|
913
|
+
{
|
|
914
|
+
t: "text",
|
|
915
|
+
v: refName
|
|
916
|
+
},
|
|
917
|
+
{
|
|
918
|
+
t: "text",
|
|
919
|
+
v: f.path
|
|
920
|
+
},
|
|
921
|
+
{
|
|
922
|
+
t: "text",
|
|
923
|
+
v: f.mode
|
|
924
|
+
},
|
|
925
|
+
{
|
|
926
|
+
t: "bytea",
|
|
927
|
+
v: Buffer.from(f.blobOid, "hex")
|
|
928
|
+
}
|
|
929
|
+
]);
|
|
930
|
+
await pg.begin(async (tx) => {
|
|
931
|
+
await tx`delete from repo_file where repo_id = ${id} and ref_name = ${refName}`;
|
|
932
|
+
await copyInsert(tx, "repo_file", [
|
|
933
|
+
"repo_id",
|
|
934
|
+
"ref_name",
|
|
935
|
+
"path",
|
|
936
|
+
"mode",
|
|
937
|
+
"blob_oid"
|
|
938
|
+
], rows);
|
|
939
|
+
});
|
|
940
|
+
}
|
|
941
|
+
};
|
|
942
|
+
}
|
|
943
|
+
|
|
799
944
|
//#endregion
|
|
800
945
|
//#region src/object/edges.ts
|
|
946
|
+
/**
|
|
947
|
+
* Edge kinds stored in `git_edge.kind`. tree→blob (would be `4`) is deliberately
|
|
948
|
+
* NOT a kind: blobs are enumerated from tree content, never stored as edges (§4.3),
|
|
949
|
+
* so `4` is reserved/unused.
|
|
950
|
+
*/
|
|
951
|
+
const EDGE_KIND = {
|
|
952
|
+
COMMIT_PARENT: 2,
|
|
953
|
+
COMMIT_TREE: 1,
|
|
954
|
+
TAG_TARGET: 5,
|
|
955
|
+
TREE_SUBTREE: 3
|
|
956
|
+
};
|
|
801
957
|
/** A tree entry pointing at a commit in *another* repo — no blob, no edge here. */
|
|
802
958
|
const GITLINK_MODE = "160000";
|
|
959
|
+
const WELL_FORMED_OID = /^[0-9a-f]{40}$/;
|
|
960
|
+
/**
|
|
961
|
+
* Validate an OID parsed from a commit/tag header. `commitParents`/`commitTreeOid`/
|
|
962
|
+
* `referencedOids` take whatever follows the header key verbatim — a forged object
|
|
963
|
+
* could carry a non-OID there and yield a bogus edge child — so reject it loudly at
|
|
964
|
+
* the ingest boundary (§5.1). Tree-entry OIDs are exempt: `treeEntries` already
|
|
965
|
+
* guarantees a 20-byte value, and the `bytea CHECK(length(child)=20)` is the
|
|
966
|
+
* database-level backstop for every edge.
|
|
967
|
+
*/
|
|
968
|
+
function assertOid(oid, context) {
|
|
969
|
+
if (!WELL_FORMED_OID.test(oid)) throw new GitFormatError("malformed-oid", `${context}: not a well-formed object id: ${JSON.stringify(oid)}`);
|
|
970
|
+
return oid;
|
|
971
|
+
}
|
|
972
|
+
/** Count the leading `key value` header lines (up to the blank line that ends a
|
|
973
|
+
* commit/tag's header block). */
|
|
974
|
+
function countHeader(content, key) {
|
|
975
|
+
const prefix = `${key} `;
|
|
976
|
+
let n = 0;
|
|
977
|
+
for (const line of content.toString("latin1").split("\n")) {
|
|
978
|
+
if (line === "") break;
|
|
979
|
+
if (line.startsWith(prefix)) n++;
|
|
980
|
+
}
|
|
981
|
+
return n;
|
|
982
|
+
}
|
|
983
|
+
/**
|
|
984
|
+
* fsck-grade structural validation at the ingest boundary (§5.1, invariant §10.2):
|
|
985
|
+
* reject the malformed objects that OID-wellformedness and tree parsing do not
|
|
986
|
+
* catch. A commit must not carry more than one `tree` header (git fsck:
|
|
987
|
+
* multipleTrees — `commitTreeOid` would otherwise silently take the first and drop
|
|
988
|
+
* the rest, recording an edge to a tree the object does not actually root). An
|
|
989
|
+
* annotated tag must carry exactly one `object` header (git fsck: missingObject /
|
|
990
|
+
* an extra object line): zero yields no `kind=5` edge and silently breaks peeling
|
|
991
|
+
* and connectivity; more than one yields multiple divergent `kind=5` edges and a
|
|
992
|
+
* nondeterministic `peeled_oid`. The other structural guarantees are already
|
|
993
|
+
* enforced downstream: `assertOid` on every referenced OID (below), a present root
|
|
994
|
+
* `tree` (`commitTreeOid`, which also rejects a zero-tree commit), and a well-formed
|
|
995
|
+
* tree body (`treeEntries` throws). Called by the store once per object before
|
|
996
|
+
* derivation, in the ingest transaction, so a malformed push aborts before any row
|
|
997
|
+
* lands.
|
|
998
|
+
*/
|
|
999
|
+
function validateObject(type, content) {
|
|
1000
|
+
if (type === "commit" && countHeader(content, "tree") > 1) throw new GitFormatError("multiple-tree-headers", "commit carries more than one tree header");
|
|
1001
|
+
if (type === "tag") {
|
|
1002
|
+
const objects = countHeader(content, "object");
|
|
1003
|
+
if (objects < 1) throw new GitFormatError("missing-tag-object", "annotated tag has no object header");
|
|
1004
|
+
if (objects > 1) throw new GitFormatError("multiple-tag-objects", "annotated tag carries more than one object header");
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
/**
|
|
1008
|
+
* The edges an object contributes to `git_edge`, with the object's own OID as the
|
|
1009
|
+
* parent — the §4.3 standing rule, mode-aware:
|
|
1010
|
+
* - commit → its tree (kind 1) then each parent (kind 2);
|
|
1011
|
+
* - tree → its **subtrees only** (mode `40000` → kind 3). Blobs and gitlinks
|
|
1012
|
+
* (`160000`, a commit living in another repo) are NOT edges — `isTreeEntryMode`
|
|
1013
|
+
* admits only `40000`, so both are dropped;
|
|
1014
|
+
* - tag → its target (kind 5);
|
|
1015
|
+
* - blob → nothing.
|
|
1016
|
+
*
|
|
1017
|
+
* This is the single derivation the store inserts alongside the object row, in the
|
|
1018
|
+
* same transaction (§10.1), so edges are a validated total function of content.
|
|
1019
|
+
*/
|
|
1020
|
+
function deriveEdges(type, content) {
|
|
1021
|
+
switch (type) {
|
|
1022
|
+
case "blob": return [];
|
|
1023
|
+
case "commit": return [{
|
|
1024
|
+
child: assertOid(commitTreeOid(content), "commit tree"),
|
|
1025
|
+
kind: EDGE_KIND.COMMIT_TREE
|
|
1026
|
+
}, ...commitParents(content).map((p) => ({
|
|
1027
|
+
child: assertOid(p, "commit parent"),
|
|
1028
|
+
kind: EDGE_KIND.COMMIT_PARENT
|
|
1029
|
+
}))];
|
|
1030
|
+
case "tag": return referencedOids("tag", content).map((t) => ({
|
|
1031
|
+
child: assertOid(t, "tag target"),
|
|
1032
|
+
kind: EDGE_KIND.TAG_TARGET
|
|
1033
|
+
}));
|
|
1034
|
+
case "tree": return treeEntries(content).filter((e) => isTreeEntryMode(e.mode)).map((e) => ({
|
|
1035
|
+
child: e.oid,
|
|
1036
|
+
kind: EDGE_KIND.TREE_SUBTREE
|
|
1037
|
+
}));
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
803
1040
|
/**
|
|
804
1041
|
* The blob OIDs directly in a tree — the §4.3 standing rule's other half: blobs
|
|
805
1042
|
* are enumerated from tree content, never stored as edges. A tree entry is a blob
|
|
@@ -833,13 +1070,297 @@ const PACK_OBJ_TYPE = {
|
|
|
833
1070
|
TAG: 4,
|
|
834
1071
|
TREE: 2
|
|
835
1072
|
};
|
|
1073
|
+
function encodeObjectHeader(type, size) {
|
|
1074
|
+
let rest = Math.floor(size / 16);
|
|
1075
|
+
let first = type << 4 | size % 16;
|
|
1076
|
+
if (rest > 0) first |= 128;
|
|
1077
|
+
const bytes = [first];
|
|
1078
|
+
while (rest > 0) {
|
|
1079
|
+
let byte = rest % 128;
|
|
1080
|
+
rest = Math.floor(rest / 128);
|
|
1081
|
+
if (rest > 0) byte |= 128;
|
|
1082
|
+
bytes.push(byte);
|
|
1083
|
+
}
|
|
1084
|
+
return Buffer.from(bytes);
|
|
1085
|
+
}
|
|
1086
|
+
function decodeObjectHeader(buf, offset) {
|
|
1087
|
+
let b = buf.readUInt8(offset);
|
|
1088
|
+
let bytesRead = 1;
|
|
1089
|
+
const type = b >> 4 & 7;
|
|
1090
|
+
let size = b & 15;
|
|
1091
|
+
let mult = 16;
|
|
1092
|
+
while (b & 128) {
|
|
1093
|
+
b = buf.readUInt8(offset + bytesRead);
|
|
1094
|
+
bytesRead++;
|
|
1095
|
+
size += (b & 127) * mult;
|
|
1096
|
+
mult *= 128;
|
|
1097
|
+
}
|
|
1098
|
+
return {
|
|
1099
|
+
bytesRead,
|
|
1100
|
+
size,
|
|
1101
|
+
type
|
|
1102
|
+
};
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
//#endregion
|
|
1106
|
+
//#region src/pack/delta.ts
|
|
1107
|
+
/**
|
|
1108
|
+
* Apply a git delta to its base, producing the target object. The delta begins
|
|
1109
|
+
* with two LEB128 varints (source size, target size), then a stream of
|
|
1110
|
+
* instructions: a COPY (high bit set — copy a run from the base at a given
|
|
1111
|
+
* offset/size) or an INSERT (1..127 literal bytes that follow). See
|
|
1112
|
+
* gitformat-pack "Deltified representation". We only ever READ/apply deltas;
|
|
1113
|
+
* the serve path emits none (spec §3.4).
|
|
1114
|
+
*/
|
|
1115
|
+
function applyDelta(base, delta) {
|
|
1116
|
+
let pos = 0;
|
|
1117
|
+
const readVarint = () => {
|
|
1118
|
+
let result = 0;
|
|
1119
|
+
let shift = 0;
|
|
1120
|
+
let byte;
|
|
1121
|
+
do {
|
|
1122
|
+
byte = delta.readUInt8(pos);
|
|
1123
|
+
pos += 1;
|
|
1124
|
+
result += (byte & 127) * 2 ** shift;
|
|
1125
|
+
shift += 7;
|
|
1126
|
+
} while (byte & 128);
|
|
1127
|
+
return result;
|
|
1128
|
+
};
|
|
1129
|
+
const sourceSize = readVarint();
|
|
1130
|
+
const targetSize = readVarint();
|
|
1131
|
+
if (base.length !== sourceSize) throw new GitFormatError("delta-base-size-mismatch", `delta: base size ${base.length} ≠ declared ${sourceSize}`);
|
|
1132
|
+
const out = Buffer.alloc(targetSize);
|
|
1133
|
+
let outPos = 0;
|
|
1134
|
+
while (pos < delta.length) {
|
|
1135
|
+
const op = delta.readUInt8(pos);
|
|
1136
|
+
pos += 1;
|
|
1137
|
+
if (op & 128) {
|
|
1138
|
+
let copyOffset = 0;
|
|
1139
|
+
if (op & 1) copyOffset |= delta.readUInt8(pos++);
|
|
1140
|
+
if (op & 2) copyOffset |= delta.readUInt8(pos++) << 8;
|
|
1141
|
+
if (op & 4) copyOffset |= delta.readUInt8(pos++) << 16;
|
|
1142
|
+
if (op & 8) copyOffset += delta.readUInt8(pos++) * 2 ** 24;
|
|
1143
|
+
let copySize = 0;
|
|
1144
|
+
if (op & 16) copySize |= delta.readUInt8(pos++);
|
|
1145
|
+
if (op & 32) copySize |= delta.readUInt8(pos++) << 8;
|
|
1146
|
+
if (op & 64) copySize |= delta.readUInt8(pos++) << 16;
|
|
1147
|
+
if (copySize === 0) copySize = 65536;
|
|
1148
|
+
base.copy(out, outPos, copyOffset, copyOffset + copySize);
|
|
1149
|
+
outPos += copySize;
|
|
1150
|
+
} else if (op !== 0) {
|
|
1151
|
+
delta.copy(out, outPos, pos, pos + op);
|
|
1152
|
+
outPos += op;
|
|
1153
|
+
pos += op;
|
|
1154
|
+
} else throw new GitFormatError("delta-reserved-opcode", "delta: reserved opcode 0x00");
|
|
1155
|
+
}
|
|
1156
|
+
if (outPos !== targetSize) throw new GitFormatError("delta-target-size-mismatch", `delta: produced ${outPos} bytes, declared ${targetSize}`);
|
|
1157
|
+
return out;
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
//#endregion
|
|
1161
|
+
//#region src/pack/read-pack.ts
|
|
1162
|
+
const CODE_TO_TYPE$1 = {
|
|
1163
|
+
[PACK_OBJ_TYPE.COMMIT]: "commit",
|
|
1164
|
+
[PACK_OBJ_TYPE.TREE]: "tree",
|
|
1165
|
+
[PACK_OBJ_TYPE.BLOB]: "blob",
|
|
1166
|
+
[PACK_OBJ_TYPE.TAG]: "tag"
|
|
1167
|
+
};
|
|
1168
|
+
/**
|
|
1169
|
+
* Inflate exactly one zlib stream at the front of `buf`, returning the data and
|
|
1170
|
+
* how many COMPRESSED bytes it consumed — the seam for walking the back-to-back
|
|
1171
|
+
* zlib streams in a packfile. (`bytesWritten` = input consumed up to
|
|
1172
|
+
* Z_STREAM_END; trailing bytes untouched — verified empirically against
|
|
1173
|
+
* node:zlib.)
|
|
1174
|
+
*/
|
|
1175
|
+
function inflateOne(buf) {
|
|
1176
|
+
return new Promise((resolve, reject) => {
|
|
1177
|
+
const inf = createInflate();
|
|
1178
|
+
const chunks = [];
|
|
1179
|
+
inf.on("data", (chunk) => chunks.push(chunk));
|
|
1180
|
+
inf.on("end", () => resolve({
|
|
1181
|
+
compressedLength: inf.bytesWritten,
|
|
1182
|
+
data: Buffer.concat(chunks)
|
|
1183
|
+
}));
|
|
1184
|
+
inf.on("error", (e) => reject(new GitFormatError("inflate-failed", `pack: zlib inflate failed: ${e instanceof Error ? e.message : String(e)}`)));
|
|
1185
|
+
inf.end(buf);
|
|
1186
|
+
});
|
|
1187
|
+
}
|
|
1188
|
+
/**
|
|
1189
|
+
* The pack OFS_DELTA "offset encoding" — NOT plain LEB128. Each continuation
|
|
1190
|
+
* adds 1 before shifting, so encodings are unambiguous. Arithmetic (not `<<`) so
|
|
1191
|
+
* offsets ≥ 2³¹ stay correct.
|
|
1192
|
+
*/
|
|
1193
|
+
function readOffsetVarint(buf, offset) {
|
|
1194
|
+
let b = buf.readUInt8(offset);
|
|
1195
|
+
let bytesRead = 1;
|
|
1196
|
+
let value = b & 127;
|
|
1197
|
+
while (b & 128) {
|
|
1198
|
+
b = buf.readUInt8(offset + bytesRead);
|
|
1199
|
+
bytesRead += 1;
|
|
1200
|
+
value = (value + 1) * 128 + (b & 127);
|
|
1201
|
+
}
|
|
1202
|
+
return {
|
|
1203
|
+
bytesRead,
|
|
1204
|
+
value
|
|
1205
|
+
};
|
|
1206
|
+
}
|
|
1207
|
+
/**
|
|
1208
|
+
* Parse a v2 packfile into its objects, resolving OFS_DELTA and REF_DELTA
|
|
1209
|
+
* (including delta chains). Bases come from the same pack; a REF_DELTA whose base
|
|
1210
|
+
* is NOT in the pack — a thin pack, as `git push` sends by default — is resolved
|
|
1211
|
+
* via `resolveExternalBase` (the Postgres store on push ingest). Without a
|
|
1212
|
+
* resolver, an external base is a hard error.
|
|
1213
|
+
*/
|
|
1214
|
+
async function readPack(pack, resolveExternalBase) {
|
|
1215
|
+
count("readPackCalls");
|
|
1216
|
+
if (pack.subarray(0, 4).toString("latin1") !== "PACK") throw new GitFormatError("bad-magic", "pack: bad magic");
|
|
1217
|
+
const version = pack.readUInt32BE(4);
|
|
1218
|
+
if (version !== 2) throw new GitFormatError("unsupported-version", `pack: unsupported version ${version}`);
|
|
1219
|
+
const objectCount = pack.readUInt32BE(8);
|
|
1220
|
+
const trailerOffset = pack.length - 20;
|
|
1221
|
+
const actualTrailer = createHash("sha1").update(pack.subarray(0, trailerOffset)).digest();
|
|
1222
|
+
if (!pack.subarray(trailerOffset).equals(actualTrailer)) throw new GitFormatError("trailer-mismatch", "pack: trailer SHA-1 mismatch");
|
|
1223
|
+
const entries = /* @__PURE__ */ new Map();
|
|
1224
|
+
const order = [];
|
|
1225
|
+
let offset = 12;
|
|
1226
|
+
for (let i = 0; i < objectCount; i++) {
|
|
1227
|
+
const start = offset;
|
|
1228
|
+
const { type, size, bytesRead } = decodeObjectHeader(pack, offset);
|
|
1229
|
+
offset += bytesRead;
|
|
1230
|
+
if (type === PACK_OBJ_TYPE.OFS_DELTA) {
|
|
1231
|
+
const { value: negOffset, bytesRead: ob } = readOffsetVarint(pack, offset);
|
|
1232
|
+
offset += ob;
|
|
1233
|
+
const { data, compressedLength } = await inflateOne(pack.subarray(offset));
|
|
1234
|
+
count("bytesInflated", data.length);
|
|
1235
|
+
offset += compressedLength;
|
|
1236
|
+
entries.set(start, {
|
|
1237
|
+
baseOffset: start - negOffset,
|
|
1238
|
+
delta: data,
|
|
1239
|
+
kind: "ofs"
|
|
1240
|
+
});
|
|
1241
|
+
} else if (type === PACK_OBJ_TYPE.REF_DELTA) {
|
|
1242
|
+
const baseOid = pack.subarray(offset, offset + 20).toString("hex");
|
|
1243
|
+
offset += 20;
|
|
1244
|
+
const { data, compressedLength } = await inflateOne(pack.subarray(offset));
|
|
1245
|
+
count("bytesInflated", data.length);
|
|
1246
|
+
offset += compressedLength;
|
|
1247
|
+
entries.set(start, {
|
|
1248
|
+
baseOid,
|
|
1249
|
+
delta: data,
|
|
1250
|
+
kind: "ref"
|
|
1251
|
+
});
|
|
1252
|
+
} else {
|
|
1253
|
+
const typeName = CODE_TO_TYPE$1[type];
|
|
1254
|
+
if (!typeName) throw new GitFormatError("unknown-object-type", `pack: unknown object type ${type}`);
|
|
1255
|
+
const { data, compressedLength } = await inflateOne(pack.subarray(offset));
|
|
1256
|
+
count("bytesInflated", data.length);
|
|
1257
|
+
if (data.length !== size) throw new GitFormatError("size-mismatch", `pack: size mismatch (header ${size}, inflated ${data.length})`);
|
|
1258
|
+
offset += compressedLength;
|
|
1259
|
+
entries.set(start, {
|
|
1260
|
+
content: data,
|
|
1261
|
+
kind: "base",
|
|
1262
|
+
type: typeName
|
|
1263
|
+
});
|
|
1264
|
+
}
|
|
1265
|
+
order.push(start);
|
|
1266
|
+
}
|
|
1267
|
+
if (offset !== trailerOffset) throw new GitFormatError("trailing-bytes", `pack: consumed ${offset} bytes, expected ${trailerOffset} before trailer`);
|
|
1268
|
+
const resolved = /* @__PURE__ */ new Map();
|
|
1269
|
+
const byOid = /* @__PURE__ */ new Map();
|
|
1270
|
+
const externalCache = /* @__PURE__ */ new Map();
|
|
1271
|
+
const fetchExternal = async (oid) => {
|
|
1272
|
+
const cached = externalCache.get(oid);
|
|
1273
|
+
if (cached !== void 0) return cached;
|
|
1274
|
+
const fetched = resolveExternalBase ? await resolveExternalBase(oid) : null;
|
|
1275
|
+
externalCache.set(oid, fetched);
|
|
1276
|
+
return fetched;
|
|
1277
|
+
};
|
|
1278
|
+
const record = (off, type, content) => {
|
|
1279
|
+
const obj = {
|
|
1280
|
+
content,
|
|
1281
|
+
oid: computeOid(type, content),
|
|
1282
|
+
type
|
|
1283
|
+
};
|
|
1284
|
+
resolved.set(off, obj);
|
|
1285
|
+
byOid.set(obj.oid, obj);
|
|
1286
|
+
};
|
|
1287
|
+
for (const off of order) {
|
|
1288
|
+
const entry = entries.get(off);
|
|
1289
|
+
if (entry?.kind === "base") record(off, entry.type, entry.content);
|
|
1290
|
+
}
|
|
1291
|
+
let pending = order.filter((off) => !resolved.has(off));
|
|
1292
|
+
while (pending.length > 0) {
|
|
1293
|
+
const stillPending = [];
|
|
1294
|
+
for (const off of pending) {
|
|
1295
|
+
const entry = entries.get(off);
|
|
1296
|
+
if (!entry || entry.kind === "base") continue;
|
|
1297
|
+
const base = entry.kind === "ofs" ? resolved.get(entry.baseOffset) ?? null : byOid.get(entry.baseOid) ?? await fetchExternal(entry.baseOid);
|
|
1298
|
+
if (!base) {
|
|
1299
|
+
stillPending.push(off);
|
|
1300
|
+
continue;
|
|
1301
|
+
}
|
|
1302
|
+
record(off, base.type, applyDelta(base.content, entry.delta));
|
|
1303
|
+
}
|
|
1304
|
+
if (stillPending.length === pending.length) {
|
|
1305
|
+
const off = stillPending[0];
|
|
1306
|
+
const entry = entries.get(off);
|
|
1307
|
+
throw new GitFormatError("unresolved-base", `pack: ref-delta base ${entry?.kind === "ref" ? entry.baseOid : `offset ${entry?.kind === "ofs" ? entry.baseOffset : "?"}`} not found in pack or store`);
|
|
1308
|
+
}
|
|
1309
|
+
pending = stillPending;
|
|
1310
|
+
}
|
|
1311
|
+
return order.map((off) => resolved.get(off));
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
//#endregion
|
|
1315
|
+
//#region src/pack/write-pack.ts
|
|
1316
|
+
const TYPE_CODE = {
|
|
1317
|
+
blob: PACK_OBJ_TYPE.BLOB,
|
|
1318
|
+
commit: PACK_OBJ_TYPE.COMMIT,
|
|
1319
|
+
tag: PACK_OBJ_TYPE.TAG,
|
|
1320
|
+
tree: PACK_OBJ_TYPE.TREE
|
|
1321
|
+
};
|
|
1322
|
+
/**
|
|
1323
|
+
* The 12-byte pack header: `PACK` magic, version 2, object count. The count is
|
|
1324
|
+
* fixed up front, so a streaming encoder must know its object total before the
|
|
1325
|
+
* first object (the row-store's closure provides it without reading content).
|
|
1326
|
+
*/
|
|
1327
|
+
function packHeader(objectCount) {
|
|
1328
|
+
const header = Buffer.alloc(12);
|
|
1329
|
+
header.write("PACK", 0, "latin1");
|
|
1330
|
+
header.writeUInt32BE(2, 4);
|
|
1331
|
+
header.writeUInt32BE(objectCount, 8);
|
|
1332
|
+
return header;
|
|
1333
|
+
}
|
|
1334
|
+
/** One packed object: its varint (type, uncompressed size) header + zlib-deflated
|
|
1335
|
+
* content. Undeltified — we never emit deltas (spec §3.4 asymmetric kernel). */
|
|
1336
|
+
function packObject(type, content) {
|
|
1337
|
+
const deflated = deflateSync(content);
|
|
1338
|
+
count("deflateInputBytes", content.length);
|
|
1339
|
+
count("deflateOutputBytes", deflated.length);
|
|
1340
|
+
return Buffer.concat([encodeObjectHeader(TYPE_CODE[type], content.length), deflated]);
|
|
1341
|
+
}
|
|
1342
|
+
/**
|
|
1343
|
+
* Serialize objects into a self-contained, **undeltified** packfile (v2): the
|
|
1344
|
+
* header, each object's (header + deflated content), then a trailing SHA-1 of all
|
|
1345
|
+
* preceding bytes. The serve path streams the same primitives object-by-object
|
|
1346
|
+
* (object-store `buildPack`); this all-at-once form builds test packs and the
|
|
1347
|
+
* empty pack.
|
|
1348
|
+
*/
|
|
1349
|
+
function writePack(objects) {
|
|
1350
|
+
count("writePackCalls");
|
|
1351
|
+
const parts = [packHeader(objects.length)];
|
|
1352
|
+
for (const obj of objects) parts.push(packObject(obj.type, obj.content));
|
|
1353
|
+
const body = Buffer.concat(parts);
|
|
1354
|
+
const trailer = createHash("sha1").update(body).digest();
|
|
1355
|
+
return Buffer.concat([body, trailer]);
|
|
1356
|
+
}
|
|
836
1357
|
|
|
837
1358
|
//#endregion
|
|
838
1359
|
//#region src/store/reachability.ts
|
|
839
1360
|
/** Objects looked up per round-trip when chunking tree/blob existence queries. */
|
|
840
1361
|
const LOOKUP_BATCH = 1e3;
|
|
841
1362
|
/** Split `items` into consecutive batches of at most `size`. */
|
|
842
|
-
function batches(items, size) {
|
|
1363
|
+
function batches$1(items, size) {
|
|
843
1364
|
const out = [];
|
|
844
1365
|
for (let i = 0; i < items.length; i += size) out.push(items.slice(i, i + size));
|
|
845
1366
|
return out;
|
|
@@ -888,7 +1409,7 @@ async function reachableClosure(db, id, roots, omitBlobs) {
|
|
|
888
1409
|
present
|
|
889
1410
|
};
|
|
890
1411
|
const blobCandidates = /* @__PURE__ */ new Set();
|
|
891
|
-
for (const batch of batches(treeOids, LOOKUP_BATCH)) {
|
|
1412
|
+
for (const batch of batches$1(treeOids, LOOKUP_BATCH)) {
|
|
892
1413
|
const trees = await db.selectFrom("git_object").select("content").where("repo_id", "=", id).where("oid", "in", batch).execute();
|
|
893
1414
|
for (const t of trees) for (const blob of treeBlobOids(t.content)) blobCandidates.add(blob);
|
|
894
1415
|
}
|
|
@@ -897,7 +1418,7 @@ async function reachableClosure(db, id, roots, omitBlobs) {
|
|
|
897
1418
|
present
|
|
898
1419
|
};
|
|
899
1420
|
const presentBlobs = /* @__PURE__ */ new Set();
|
|
900
|
-
for (const batch of batches([...blobCandidates], LOOKUP_BATCH)) {
|
|
1421
|
+
for (const batch of batches$1([...blobCandidates], LOOKUP_BATCH)) {
|
|
901
1422
|
const rows = await db.selectFrom("git_object").select("oid").where("repo_id", "=", id).where("oid", "in", batch.map((h) => Buffer.from(h, "hex"))).execute();
|
|
902
1423
|
for (const r of rows) presentBlobs.add(r.oid.toString("hex"));
|
|
903
1424
|
}
|
|
@@ -907,48 +1428,551 @@ async function reachableClosure(db, id, roots, omitBlobs) {
|
|
|
907
1428
|
present
|
|
908
1429
|
};
|
|
909
1430
|
}
|
|
1431
|
+
/** Does `want`'s commit/tag ancestry (edge kinds 2,5) reach any oid in `common`?
|
|
1432
|
+
* The ancestry-only CTE that underpins `readyToGiveUp`. */
|
|
1433
|
+
async function ancestryReachesCommon(db, id, want, commonBufs) {
|
|
1434
|
+
if (commonBufs.length === 0) return false;
|
|
1435
|
+
const commons = sql.join(commonBufs.map((b) => sql`(${b}::bytea)`));
|
|
1436
|
+
return (await sql`
|
|
1437
|
+
with recursive anc(oid) as (
|
|
1438
|
+
select ${Buffer.from(want, "hex")}::bytea
|
|
1439
|
+
union
|
|
1440
|
+
select e.child from git_edge e
|
|
1441
|
+
join anc a on e.parent = a.oid
|
|
1442
|
+
where e.repo_id = ${id}::bigint
|
|
1443
|
+
and e.kind in (${EDGE_KIND.COMMIT_PARENT}, ${EDGE_KIND.TAG_TARGET})
|
|
1444
|
+
)
|
|
1445
|
+
select exists (
|
|
1446
|
+
select 1 from anc join (values ${commons}) as c(oid) on c.oid = anc.oid
|
|
1447
|
+
) as reached
|
|
1448
|
+
`.execute(db)).rows[0]?.reached ?? false;
|
|
1449
|
+
}
|
|
910
1450
|
|
|
911
1451
|
//#endregion
|
|
912
|
-
//#region src/store/
|
|
1452
|
+
//#region src/store/object-store.ts
|
|
1453
|
+
/** Objects fetched per round-trip when streaming content into a served pack. */
|
|
1454
|
+
const PACK_BATCH = 1e3;
|
|
913
1455
|
/**
|
|
914
|
-
*
|
|
915
|
-
*
|
|
916
|
-
*
|
|
1456
|
+
* A stored object at/over this size is read in size-bounded chunks, never in one
|
|
1457
|
+
* round-trip. The porsager driver decodes a `bytea` RESULT from its text form
|
|
1458
|
+
* (`\x` + hex, DOUBLE the byte length), so a value over ~256MiB would build a JS
|
|
1459
|
+
* string past V8's max length and throw on the SERVE path — the read-side mirror of
|
|
1460
|
+
* the ingest string-cap that binary COPY fixed (a07/blb01). Kept well under the cap
|
|
1461
|
+
* so the doubled hex of a single chunk stays safely below it.
|
|
1462
|
+
*/
|
|
1463
|
+
const BIG_OBJECT_BYTES = 2e8;
|
|
1464
|
+
const READ_CHUNK_BYTES = 1e8;
|
|
1465
|
+
/** Split `items` into consecutive batches of at most `size`. */
|
|
1466
|
+
function batches(items, size) {
|
|
1467
|
+
const out = [];
|
|
1468
|
+
for (let i = 0; i < items.length; i += size) out.push(items.slice(i, i + size));
|
|
1469
|
+
return out;
|
|
1470
|
+
}
|
|
1471
|
+
const TYPE_TO_CODE = {
|
|
1472
|
+
blob: PACK_OBJ_TYPE.BLOB,
|
|
1473
|
+
commit: PACK_OBJ_TYPE.COMMIT,
|
|
1474
|
+
tag: PACK_OBJ_TYPE.TAG,
|
|
1475
|
+
tree: PACK_OBJ_TYPE.TREE
|
|
1476
|
+
};
|
|
1477
|
+
const CODE_TO_TYPE = /* @__PURE__ */ new Map([
|
|
1478
|
+
[PACK_OBJ_TYPE.BLOB, "blob"],
|
|
1479
|
+
[PACK_OBJ_TYPE.COMMIT, "commit"],
|
|
1480
|
+
[PACK_OBJ_TYPE.TAG, "tag"],
|
|
1481
|
+
[PACK_OBJ_TYPE.TREE, "tree"]
|
|
1482
|
+
]);
|
|
1483
|
+
function typeFromCode(code) {
|
|
1484
|
+
const type = CODE_TO_TYPE.get(code);
|
|
1485
|
+
if (!type) throw new Error(`object-store: unknown git object type code ${code}`);
|
|
1486
|
+
return type;
|
|
1487
|
+
}
|
|
1488
|
+
/**
|
|
1489
|
+
* Postgres-backed git object store. Each immutable object is one row in the
|
|
1490
|
+
* per-repo, HASH-partitioned `git_object` (raw 20-byte `bytea` OID, pack type
|
|
1491
|
+
* code, raw inflated body lz4-TOASTed Postgres-side) — packs are a transport
|
|
1492
|
+
* encoding produced on serve and consumed on ingest, never stored. So a fetch is
|
|
1493
|
+
* a primary-key point-read, not a whole-pack re-inflate.
|
|
917
1494
|
*
|
|
918
|
-
* The
|
|
919
|
-
*
|
|
920
|
-
*
|
|
921
|
-
|
|
922
|
-
|
|
1495
|
+
* The store is the wire→DB boundary: callers speak hex OIDs and the wire repo
|
|
1496
|
+
* name; OIDs are coerced hex↔raw here, and the repo name is resolved to its
|
|
1497
|
+
* bigint surrogate (memoized) here.
|
|
1498
|
+
*/
|
|
1499
|
+
function createObjectStore(pg) {
|
|
1500
|
+
const db = initKysely(pg);
|
|
1501
|
+
const repos = createRepoResolver(db);
|
|
1502
|
+
const store = {
|
|
1503
|
+
/**
|
|
1504
|
+
* Build the served pack for a fetch: the want-closure minus the have-closure,
|
|
1505
|
+
* re-adding the explicit wants (promisor lazy-fetch roots — a partial clone may
|
|
1506
|
+
* want a blob reachable from a tree it already has, so it must not be
|
|
1507
|
+
* subtracted). The object count is known from the closure before any content is
|
|
1508
|
+
* read; content then streams in keyset batches into the pack encoder, so only
|
|
1509
|
+
* one batch of inflated content is ever held (never the whole repo).
|
|
1510
|
+
*/
|
|
1511
|
+
async buildPack(repoId, wants, haves, omitBlobs, includeTag = false) {
|
|
1512
|
+
const id = await repos.resolveRepoId(repoId);
|
|
1513
|
+
if (id === null || wants.length === 0) return writePack([]);
|
|
1514
|
+
const served = await withPhase("closure", async () => {
|
|
1515
|
+
const want = await reachableClosure(db, id, wants, omitBlobs);
|
|
1516
|
+
if (want.missing.size > 0) throw new WantNotFoundError([...want.missing]);
|
|
1517
|
+
const have = haves.length > 0 ? await reachableClosure(db, id, haves, omitBlobs) : {
|
|
1518
|
+
missing: /* @__PURE__ */ new Set(),
|
|
1519
|
+
present: /* @__PURE__ */ new Set()
|
|
1520
|
+
};
|
|
1521
|
+
const set = /* @__PURE__ */ new Set();
|
|
1522
|
+
for (const o of want.present) if (!have.present.has(o)) set.add(o);
|
|
1523
|
+
if (omitBlobs) {
|
|
1524
|
+
for (const w of wants) if (want.present.has(w)) set.add(w);
|
|
1525
|
+
}
|
|
1526
|
+
if (includeTag) await augmentWithTags(id, set);
|
|
1527
|
+
return [...set];
|
|
1528
|
+
});
|
|
1529
|
+
return withPhase("pack-encode", async () => {
|
|
1530
|
+
const hash = createHash("sha1");
|
|
1531
|
+
const parts = [];
|
|
1532
|
+
const push = (chunk) => {
|
|
1533
|
+
hash.update(chunk);
|
|
1534
|
+
parts.push(chunk);
|
|
1535
|
+
};
|
|
1536
|
+
push(packHeader(served.length));
|
|
1537
|
+
for (const batch of batches(served, PACK_BATCH)) {
|
|
1538
|
+
const rows = await db.selectFrom("git_object").select(["oid", "type"]).select(sql`octet_length(content)`.as("size")).select(sql`case when octet_length(content) < ${BIG_OBJECT_BYTES} then content end`.as("content")).where("repo_id", "=", id).where("oid", "in", batch.map((h) => Buffer.from(h, "hex"))).execute();
|
|
1539
|
+
for (const r of rows) {
|
|
1540
|
+
const content = r.content ?? await readContentChunked(id, r.oid, r.size);
|
|
1541
|
+
push(packObject(typeFromCode(r.type), content));
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1544
|
+
const pack = Buffer.concat([...parts, hash.digest()]);
|
|
1545
|
+
count("objectsServed", served.length);
|
|
1546
|
+
count("packBytes", pack.length);
|
|
1547
|
+
return pack;
|
|
1548
|
+
});
|
|
1549
|
+
},
|
|
1550
|
+
/** The subset of `haves` this repo actually has — the negotiation common set,
|
|
1551
|
+
* in one indexed lookup rather than a per-have probe. */
|
|
1552
|
+
async commonHaves(repoId, haves) {
|
|
1553
|
+
if (haves.length === 0) return [];
|
|
1554
|
+
const id = await repos.resolveRepoId(repoId);
|
|
1555
|
+
if (id === null) return [];
|
|
1556
|
+
const rows = await db.selectFrom("git_object").select("oid").where("repo_id", "=", id).where("oid", "in", haves.map((h) => Buffer.from(h, "hex"))).execute();
|
|
1557
|
+
const present = new Set(rows.map((r) => r.oid.toString("hex")));
|
|
1558
|
+
return haves.filter((h) => present.has(h));
|
|
1559
|
+
},
|
|
1560
|
+
async getObject(repoId, oid) {
|
|
1561
|
+
count("getObjectCalls");
|
|
1562
|
+
const id = await repos.resolveRepoId(repoId);
|
|
1563
|
+
if (id === null) return null;
|
|
1564
|
+
const row = await db.selectFrom("git_object").select(["type"]).select(sql`octet_length(content)`.as("size")).select(sql`case when octet_length(content) < ${BIG_OBJECT_BYTES} then content end`.as("content")).where("repo_id", "=", id).where("oid", "=", Buffer.from(oid, "hex")).executeTakeFirst();
|
|
1565
|
+
if (!row) return null;
|
|
1566
|
+
const content = row.content ?? await readContentChunked(id, Buffer.from(oid, "hex"), row.size);
|
|
1567
|
+
count("objectBytesRead", content.length);
|
|
1568
|
+
return {
|
|
1569
|
+
content,
|
|
1570
|
+
type: typeFromCode(row.type)
|
|
1571
|
+
};
|
|
1572
|
+
},
|
|
1573
|
+
async hasObject(repoId, oid) {
|
|
1574
|
+
const id = await repos.resolveRepoId(repoId);
|
|
1575
|
+
if (id === null) return false;
|
|
1576
|
+
return await db.selectFrom("git_object").select("oid").where("repo_id", "=", id).where("oid", "=", Buffer.from(oid, "hex")).executeTakeFirst() !== void 0;
|
|
1577
|
+
},
|
|
1578
|
+
/**
|
|
1579
|
+
* Ingest a received pack: parse it — resolving in-pack deltas, and thin-pack
|
|
1580
|
+
* REF_DELTA bases against objects already in this repo — then insert every
|
|
1581
|
+
* resolved object as a row.
|
|
1582
|
+
*/
|
|
1583
|
+
async ingestPack(repoId, packBytes) {
|
|
1584
|
+
return { oids: await insertObjects(await repos.ensureRepoId(repoId), (await readPack(packBytes, (oid) => store.getObject(repoId, oid))).map((p) => ({
|
|
1585
|
+
content: p.content,
|
|
1586
|
+
type: p.type
|
|
1587
|
+
}))) };
|
|
1588
|
+
},
|
|
1589
|
+
/**
|
|
1590
|
+
* Connectivity check (spec §5.2): is every object reachable from `oid` present?
|
|
1591
|
+
* A push whose new tip fails this references an object the pack neither carried
|
|
1592
|
+
* nor delta-resolved, and must be rejected. Delegates to the one reachability
|
|
1593
|
+
* engine (`reachableClosure`) shared with clone/fetch, so connectivity and
|
|
1594
|
+
* serving can never disagree on what is reachable. Full-closure (matching the
|
|
1595
|
+
* old walk's scope); the bounded "new objects only" form is a deferred
|
|
1596
|
+
* optimization (OQ-14).
|
|
1597
|
+
*/
|
|
1598
|
+
async isConnected(repoId, oid) {
|
|
1599
|
+
const id = await repos.resolveRepoId(repoId);
|
|
1600
|
+
if (id === null) return false;
|
|
1601
|
+
const { missing } = await reachableClosure(db, id, [oid], false);
|
|
1602
|
+
return missing.size === 0;
|
|
1603
|
+
},
|
|
1604
|
+
/** Seed objects directly (the differential harness + perf bench path): insert
|
|
1605
|
+
* each as a row, idempotently. Equivalent to `ingestPack` minus the pack codec. */
|
|
1606
|
+
async putPack(repoId, objects) {
|
|
1607
|
+
return { oids: await insertObjects(await repos.ensureRepoId(repoId), objects) };
|
|
1608
|
+
},
|
|
1609
|
+
/**
|
|
1610
|
+
* git's `ok_to_give_up`: ready once every want reaches a common have by commit/
|
|
1611
|
+
* tag ancestry (the haves form a cut below all wants, so the delta is well-
|
|
1612
|
+
* defined). One ancestry CTE (edge kinds 2,5) per want replaces `reachesCommon`'s
|
|
1613
|
+
* per-object BFS. Generation-number pruning is a deferred §6.4 lever.
|
|
1614
|
+
*/
|
|
1615
|
+
async readyToGiveUp(repoId, wants, common) {
|
|
1616
|
+
if (common.length === 0) return false;
|
|
1617
|
+
const id = await repos.resolveRepoId(repoId);
|
|
1618
|
+
if (id === null) return false;
|
|
1619
|
+
const commonBufs = common.map((h) => Buffer.from(h, "hex"));
|
|
1620
|
+
for (const want of wants) if (!await ancestryReachesCommon(db, id, want, commonBufs)) return false;
|
|
1621
|
+
return true;
|
|
1622
|
+
}
|
|
1623
|
+
};
|
|
1624
|
+
/** Insert objects as rows + their derived edges, idempotent (re-sent objects are
|
|
1625
|
+
* skipped). Each object row and its complete edge set go in ONE transaction from
|
|
1626
|
+
* ONE derivation (§10.1) — so no object ever exists without its edges. Edge
|
|
1627
|
+
* derivation validates at the boundary and throws on malformed content (§5.1),
|
|
1628
|
+
* aborting the ingest before any row lands. Returns every object's hex OID, in
|
|
1629
|
+
* input order. */
|
|
1630
|
+
async function insertObjects(id, objects) {
|
|
1631
|
+
const entries = objects.map((obj) => {
|
|
1632
|
+
validateObject(obj.type, obj.content);
|
|
1633
|
+
const hex = computeOid(obj.type, obj.content);
|
|
1634
|
+
const oid = Buffer.from(hex, "hex");
|
|
1635
|
+
return {
|
|
1636
|
+
edges: deriveEdges(obj.type, obj.content).map((e) => ({
|
|
1637
|
+
child: Buffer.from(e.child, "hex"),
|
|
1638
|
+
kind: e.kind,
|
|
1639
|
+
parent: oid,
|
|
1640
|
+
repo_id: id
|
|
1641
|
+
})),
|
|
1642
|
+
hex,
|
|
1643
|
+
row: {
|
|
1644
|
+
content: obj.content,
|
|
1645
|
+
oid,
|
|
1646
|
+
repo_id: id,
|
|
1647
|
+
size: obj.content.length,
|
|
1648
|
+
type: TYPE_TO_CODE[obj.type]
|
|
1649
|
+
}
|
|
1650
|
+
};
|
|
1651
|
+
});
|
|
1652
|
+
if (entries.length === 0) return [];
|
|
1653
|
+
const objectRows = entries.map((e) => [
|
|
1654
|
+
{
|
|
1655
|
+
t: "int8",
|
|
1656
|
+
v: e.row.repo_id
|
|
1657
|
+
},
|
|
1658
|
+
{
|
|
1659
|
+
t: "bytea",
|
|
1660
|
+
v: e.row.oid
|
|
1661
|
+
},
|
|
1662
|
+
{
|
|
1663
|
+
t: "int2",
|
|
1664
|
+
v: e.row.type
|
|
1665
|
+
},
|
|
1666
|
+
{
|
|
1667
|
+
t: "int4",
|
|
1668
|
+
v: e.row.size
|
|
1669
|
+
},
|
|
1670
|
+
{
|
|
1671
|
+
t: "bytea",
|
|
1672
|
+
v: e.row.content
|
|
1673
|
+
}
|
|
1674
|
+
]);
|
|
1675
|
+
const edgeRows = entries.flatMap((e) => e.edges.map((edge) => [
|
|
1676
|
+
{
|
|
1677
|
+
t: "int8",
|
|
1678
|
+
v: edge.repo_id
|
|
1679
|
+
},
|
|
1680
|
+
{
|
|
1681
|
+
t: "bytea",
|
|
1682
|
+
v: edge.parent
|
|
1683
|
+
},
|
|
1684
|
+
{
|
|
1685
|
+
t: "bytea",
|
|
1686
|
+
v: edge.child
|
|
1687
|
+
},
|
|
1688
|
+
{
|
|
1689
|
+
t: "int2",
|
|
1690
|
+
v: edge.kind
|
|
1691
|
+
}
|
|
1692
|
+
]));
|
|
1693
|
+
await pg.begin(async (tx) => {
|
|
1694
|
+
await copyInsert(tx, "git_object", [
|
|
1695
|
+
"repo_id",
|
|
1696
|
+
"oid",
|
|
1697
|
+
"type",
|
|
1698
|
+
"size",
|
|
1699
|
+
"content"
|
|
1700
|
+
], objectRows);
|
|
1701
|
+
await copyInsert(tx, "git_edge", [
|
|
1702
|
+
"repo_id",
|
|
1703
|
+
"parent",
|
|
1704
|
+
"child",
|
|
1705
|
+
"kind"
|
|
1706
|
+
], edgeRows);
|
|
1707
|
+
});
|
|
1708
|
+
await pg`update repos set last_pushed_at = clock_timestamp() where id = ${id}::bigint`;
|
|
1709
|
+
return entries.map((e) => e.hex);
|
|
1710
|
+
}
|
|
1711
|
+
/**
|
|
1712
|
+
* Read a single object's `content` in size-bounded chunks via `substring`, so a
|
|
1713
|
+
* blob larger than V8's max string length never reaches the porsager driver as one
|
|
1714
|
+
* over-cap `\x`+hex string — the serve-side mirror of the binary COPY ingest. Used
|
|
1715
|
+
* only for objects at/over BIG_OBJECT_BYTES (smaller content comes back inline).
|
|
1716
|
+
*/
|
|
1717
|
+
async function readContentChunked(id, oid, size) {
|
|
1718
|
+
const parts = [];
|
|
1719
|
+
for (let off = 0; off < size; off += READ_CHUNK_BYTES) {
|
|
1720
|
+
const len = Math.min(READ_CHUNK_BYTES, size - off);
|
|
1721
|
+
const row = await db.selectFrom("git_object").select(sql`substring(content from ${off + 1} for ${len})`.as("chunk")).where("repo_id", "=", id).where("oid", "=", oid).executeTakeFirstOrThrow();
|
|
1722
|
+
parts.push(row.chunk);
|
|
1723
|
+
}
|
|
1724
|
+
return Buffer.concat(parts);
|
|
1725
|
+
}
|
|
1726
|
+
/**
|
|
1727
|
+
* include-tag augmentation (§6.5): annotated tags whose peeled target is in the
|
|
1728
|
+
* served set get their tag OBJECTS added — transitively over `kind=5`, so a
|
|
1729
|
+
* tag-of-tag chain ships every tag object in it (each must be present for the
|
|
1730
|
+
* client's fsck). Annotated tags are few, so we fetch them all and filter by
|
|
1731
|
+
* served membership app-side rather than feeding the whole served set into SQL.
|
|
1732
|
+
* Mutates `served`. Peeled targets are already in `served` (they qualified the
|
|
1733
|
+
* tag), so re-adding the chain's terminal commit is a no-op.
|
|
1734
|
+
*/
|
|
1735
|
+
async function augmentWithTags(id, served) {
|
|
1736
|
+
const qualifying = (await db.selectFrom("git_ref").select(["oid", "peeled_oid"]).where("repo_id", "=", id).where("oid", "is not", null).where("peeled_oid", "is not", null).execute()).filter((r) => r.peeled_oid !== null && served.has(r.peeled_oid.toString("hex"))).map((r) => r.oid.toString("hex"));
|
|
1737
|
+
if (qualifying.length === 0) return;
|
|
1738
|
+
const chain = await sql`
|
|
1739
|
+
with recursive tags(oid) as (
|
|
1740
|
+
select oid from (values ${sql.join(qualifying.map((r) => sql`(${Buffer.from(r, "hex")}::bytea)`))}) as roots(oid)
|
|
1741
|
+
union
|
|
1742
|
+
select e.child from git_edge e
|
|
1743
|
+
join tags t on e.parent = t.oid
|
|
1744
|
+
where e.repo_id = ${id}::bigint and e.kind = ${EDGE_KIND.TAG_TARGET}
|
|
1745
|
+
)
|
|
1746
|
+
select oid from tags
|
|
1747
|
+
`.execute(db);
|
|
1748
|
+
for (const r of chain.rows) served.add(r.oid.toString("hex"));
|
|
1749
|
+
}
|
|
1750
|
+
return store;
|
|
1751
|
+
}
|
|
1752
|
+
|
|
1753
|
+
//#endregion
|
|
1754
|
+
//#region src/store/refs-store.ts
|
|
1755
|
+
const isZero = (oid) => /^0{40}$/.test(oid);
|
|
1756
|
+
const toOid = (hex) => Buffer.from(hex, "hex");
|
|
1757
|
+
function classifyRefUpdate(cmd) {
|
|
1758
|
+
const create = isZero(cmd.oldOid);
|
|
1759
|
+
if (isZero(cmd.newOid)) return {
|
|
1760
|
+
kind: "delete",
|
|
1761
|
+
oldOid: create ? null : toOid(cmd.oldOid)
|
|
1762
|
+
};
|
|
1763
|
+
if (create) return {
|
|
1764
|
+
kind: "create",
|
|
1765
|
+
newOid: toOid(cmd.newOid)
|
|
1766
|
+
};
|
|
1767
|
+
return {
|
|
1768
|
+
kind: "update",
|
|
1769
|
+
newOid: toOid(cmd.newOid),
|
|
1770
|
+
oldOid: toOid(cmd.oldOid)
|
|
1771
|
+
};
|
|
1772
|
+
}
|
|
1773
|
+
/**
|
|
1774
|
+
* The peeled target of a ref oid: if it is an annotated tag, follow the `kind=5`
|
|
1775
|
+
* (tag→target) chain — while the current node is a tag — to its terminal non-tag
|
|
1776
|
+
* object. A branch or a lightweight tag (the oid is not a tag object) peels to
|
|
1777
|
+
* `null` → `ls-refs` emits no `peeled` line. git imposes NO depth bound on ref
|
|
1778
|
+
* peeling, so neither do we: the `is_tag` predicate terminates the recursion at the
|
|
1779
|
+
* first non-tag, and a content-addressed tag chain is acyclic (an oid cannot embed
|
|
1780
|
+
* its own hash) hence finite. Computed at ref-write, so the tag's edges + target
|
|
1781
|
+
* are already present (connectivity proved the chain on push). Replaces the
|
|
1782
|
+
* per-`ls-refs` app-side tag walk.
|
|
1783
|
+
*/
|
|
1784
|
+
async function peelRef(exec, repoId, oid) {
|
|
1785
|
+
return (await sql`
|
|
1786
|
+
with recursive chain(oid, is_tag, depth) as (
|
|
1787
|
+
select o.oid, o.type = ${PACK_OBJ_TYPE.TAG}, 0
|
|
1788
|
+
from git_object o
|
|
1789
|
+
where o.repo_id = ${repoId}::bigint and o.oid = ${oid}::bytea
|
|
1790
|
+
union all
|
|
1791
|
+
select e.child, co.type = ${PACK_OBJ_TYPE.TAG}, c.depth + 1
|
|
1792
|
+
from chain c
|
|
1793
|
+
join git_edge e
|
|
1794
|
+
on e.repo_id = ${repoId}::bigint
|
|
1795
|
+
and e.parent = c.oid
|
|
1796
|
+
and e.kind = ${EDGE_KIND.TAG_TARGET}
|
|
1797
|
+
left join git_object co
|
|
1798
|
+
on co.repo_id = ${repoId}::bigint and co.oid = e.child
|
|
1799
|
+
where c.is_tag
|
|
1800
|
+
)
|
|
1801
|
+
select oid as peeled from chain where not is_tag and depth > 0
|
|
1802
|
+
order by depth desc limit 1
|
|
1803
|
+
`.execute(exec)).rows[0]?.peeled ?? null;
|
|
1804
|
+
}
|
|
1805
|
+
/** Sentinel thrown inside a transaction to roll an atomic batch all the way back. */
|
|
1806
|
+
var AtomicAbort = class extends Error {};
|
|
1807
|
+
/**
|
|
1808
|
+
* A repo is born with `HEAD → refs/heads/main`, mirroring `git init --bare`
|
|
1809
|
+
* (init.defaultBranch). Established lazily on the first ref write — a repo's birth
|
|
1810
|
+
* is its first push — and never overwritten (do-nothing on conflict). So once the
|
|
1811
|
+
* default branch exists `ls-refs` advertises HEAD and a clone checks it out;
|
|
1812
|
+
* before then HEAD dangles unadvertised, exactly like a bare repo whose HEAD
|
|
1813
|
+
* points at an unborn `main`.
|
|
1814
|
+
*/
|
|
1815
|
+
const DEFAULT_HEAD_TARGET = "refs/heads/main";
|
|
1816
|
+
async function ensureHeadDefault(exec, repoId) {
|
|
1817
|
+
await exec.insertInto("git_ref").values({
|
|
1818
|
+
name: "HEAD",
|
|
1819
|
+
repo_id: repoId,
|
|
1820
|
+
symref_target: DEFAULT_HEAD_TARGET
|
|
1821
|
+
}).onConflict((oc) => oc.columns(["repo_id", "name"]).doNothing()).execute();
|
|
1822
|
+
}
|
|
1823
|
+
/**
|
|
1824
|
+
* Stamp the repo's GC-activity watermark (`repos.last_pushed_at`) — a ref change
|
|
1825
|
+
* makes the prior tip a reclaim candidate, so the self-scheduling drain must judge
|
|
1826
|
+
* the repo eligible (gc-scheduler.ts §2). Called only when a ref ROW actually
|
|
1827
|
+
* changed (not on a no-op success like deleting an absent ref), so non-mutating
|
|
1828
|
+
* traffic never re-triggers GC. A tiny single-row HOT update on the churn-tuned
|
|
1829
|
+
* `repos` (0004); `clock_timestamp()` is the server-side wall clock.
|
|
1830
|
+
*/
|
|
1831
|
+
async function stampPushed(exec, repoId) {
|
|
1832
|
+
await exec.updateTable("repos").set({ last_pushed_at: sql`clock_timestamp()` }).where("id", "=", repoId).execute();
|
|
1833
|
+
}
|
|
1834
|
+
/**
|
|
1835
|
+
* Apply one ref change by compare-and-swap against the client's advertised old
|
|
1836
|
+
* oid, on the given executor (the db, or a transaction for an atomic batch).
|
|
1837
|
+
* Returns the report-status `ok` and whether a row actually changed (`mutated`).
|
|
1838
|
+
* Non-ff is accepted by default — CAS guards concurrency, not ancestry (spec §3.6).
|
|
1839
|
+
*/
|
|
1840
|
+
async function casRefUpdate(exec, repoId, cmd) {
|
|
1841
|
+
const name = cmd.ref;
|
|
1842
|
+
const op = classifyRefUpdate(cmd);
|
|
1843
|
+
switch (op.kind) {
|
|
1844
|
+
case "create": {
|
|
1845
|
+
const peeled = await peelRef(exec, repoId, op.newOid);
|
|
1846
|
+
const mutated = (await exec.insertInto("git_ref").values({
|
|
1847
|
+
name,
|
|
1848
|
+
oid: op.newOid,
|
|
1849
|
+
peeled_oid: peeled,
|
|
1850
|
+
repo_id: repoId
|
|
1851
|
+
}).onConflict((oc) => oc.doNothing()).returningAll().execute()).length === 1;
|
|
1852
|
+
return {
|
|
1853
|
+
mutated,
|
|
1854
|
+
ok: mutated
|
|
1855
|
+
};
|
|
1856
|
+
}
|
|
1857
|
+
case "delete": {
|
|
1858
|
+
let q = exec.deleteFrom("git_ref").where("repo_id", "=", repoId).where("name", "=", name);
|
|
1859
|
+
if (op.oldOid !== null) q = q.where("oid", "=", op.oldOid);
|
|
1860
|
+
const mutated = (await q.returningAll().execute()).length === 1;
|
|
1861
|
+
return {
|
|
1862
|
+
mutated,
|
|
1863
|
+
ok: op.oldOid === null || mutated
|
|
1864
|
+
};
|
|
1865
|
+
}
|
|
1866
|
+
case "update": {
|
|
1867
|
+
const peeled = await peelRef(exec, repoId, op.newOid);
|
|
1868
|
+
const mutated = (await exec.updateTable("git_ref").set({
|
|
1869
|
+
oid: op.newOid,
|
|
1870
|
+
peeled_oid: peeled,
|
|
1871
|
+
symref_target: null
|
|
1872
|
+
}).where("repo_id", "=", repoId).where("name", "=", name).where("oid", "=", op.oldOid).returningAll().execute()).length === 1;
|
|
1873
|
+
return {
|
|
1874
|
+
mutated,
|
|
1875
|
+
ok: mutated
|
|
1876
|
+
};
|
|
1877
|
+
}
|
|
1878
|
+
}
|
|
1879
|
+
}
|
|
1880
|
+
/**
|
|
1881
|
+
* Postgres-backed git refs: direct refs (name → oid) and symbolic refs
|
|
1882
|
+
* (HEAD → refs/heads/...). Push applies ref changes through `applyRefUpdates`;
|
|
1883
|
+
* `setRef`/`setSymref` are the seeding helpers.
|
|
923
1884
|
*
|
|
924
|
-
*
|
|
925
|
-
*
|
|
1885
|
+
* Like the object store, this is the wire→DB boundary: the repo name resolves to
|
|
1886
|
+
* its bigint surrogate (memoized) here, ref names cast to their branded column
|
|
1887
|
+
* type, and oids coerce hex↔raw `bytea`.
|
|
926
1888
|
*/
|
|
927
|
-
function
|
|
928
|
-
const
|
|
1889
|
+
function createRefStore(pg) {
|
|
1890
|
+
const db = initKysely(pg);
|
|
1891
|
+
const repos = createRepoResolver(db);
|
|
929
1892
|
return {
|
|
930
|
-
/**
|
|
931
|
-
*
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
1893
|
+
/**
|
|
1894
|
+
* Apply a batch of ref CAS updates. Non-atomic (the default push mode): each
|
|
1895
|
+
* ref is independent and the returned flags are per-command. Atomic
|
|
1896
|
+
* (`--atomic`): all-or-nothing in one transaction — if any CAS fails, every
|
|
1897
|
+
* command is rolled back and the result is all-false (spec §3.6).
|
|
1898
|
+
*/
|
|
1899
|
+
async applyRefUpdates(repoId, commands, atomic) {
|
|
1900
|
+
const id = await repos.ensureRepoId(repoId);
|
|
1901
|
+
await ensureHeadDefault(db, id);
|
|
1902
|
+
if (!atomic) {
|
|
1903
|
+
const results = [];
|
|
1904
|
+
let mutated = false;
|
|
1905
|
+
for (const cmd of commands) {
|
|
1906
|
+
const r = await casRefUpdate(db, id, cmd);
|
|
1907
|
+
results.push(r.ok);
|
|
1908
|
+
if (r.mutated) mutated = true;
|
|
1909
|
+
}
|
|
1910
|
+
if (mutated) await stampPushed(db, id);
|
|
1911
|
+
return results;
|
|
939
1912
|
}
|
|
940
|
-
const
|
|
941
|
-
|
|
942
|
-
|
|
1913
|
+
const ordered = [...commands].sort((a, b) => a.ref < b.ref ? -1 : a.ref > b.ref ? 1 : 0);
|
|
1914
|
+
let anyMutated = false;
|
|
1915
|
+
try {
|
|
1916
|
+
await db.transaction().execute(async (trx) => {
|
|
1917
|
+
for (const cmd of ordered) {
|
|
1918
|
+
const r = await casRefUpdate(trx, id, cmd);
|
|
1919
|
+
if (!r.ok) throw new AtomicAbort();
|
|
1920
|
+
if (r.mutated) anyMutated = true;
|
|
1921
|
+
}
|
|
1922
|
+
});
|
|
1923
|
+
} catch (error) {
|
|
1924
|
+
if (error instanceof AtomicAbort) return commands.map(() => false);
|
|
1925
|
+
throw error;
|
|
1926
|
+
}
|
|
1927
|
+
if (anyMutated) await stampPushed(db, id);
|
|
1928
|
+
return commands.map(() => true);
|
|
943
1929
|
},
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
1930
|
+
async getSymref(repoId, name) {
|
|
1931
|
+
const id = await repos.resolveRepoId(repoId);
|
|
1932
|
+
if (id === null) return null;
|
|
1933
|
+
return (await db.selectFrom("git_ref").select("symref_target").where("repo_id", "=", id).where("name", "=", name).executeTakeFirst())?.symref_target ?? null;
|
|
1934
|
+
},
|
|
1935
|
+
/** Direct refs (name → oid + peeled tag target), sorted by name. Excludes
|
|
1936
|
+
* symbolic refs. */
|
|
1937
|
+
async listRefs(repoId) {
|
|
1938
|
+
const id = await repos.resolveRepoId(repoId);
|
|
1939
|
+
if (id === null) return [];
|
|
1940
|
+
return (await db.selectFrom("git_ref").select([
|
|
1941
|
+
"name",
|
|
1942
|
+
"oid",
|
|
1943
|
+
"peeled_oid"
|
|
1944
|
+
]).where("repo_id", "=", id).where("oid", "is not", null).orderBy("name").execute()).map((r) => ({
|
|
1945
|
+
name: r.name,
|
|
1946
|
+
oid: r.oid.toString("hex"),
|
|
1947
|
+
peeled: r.peeled_oid ? r.peeled_oid.toString("hex") : void 0
|
|
1948
|
+
}));
|
|
1949
|
+
},
|
|
1950
|
+
async setRef(repoId, name, oid) {
|
|
1951
|
+
const id = await repos.ensureRepoId(repoId);
|
|
1952
|
+
const value = toOid(oid);
|
|
1953
|
+
const peeled = await peelRef(db, id, value);
|
|
1954
|
+
await db.insertInto("git_ref").values({
|
|
1955
|
+
name,
|
|
1956
|
+
oid: value,
|
|
1957
|
+
peeled_oid: peeled,
|
|
1958
|
+
repo_id: id
|
|
1959
|
+
}).onConflict((oc) => oc.columns(["repo_id", "name"]).doUpdateSet({
|
|
1960
|
+
oid: value,
|
|
1961
|
+
peeled_oid: peeled,
|
|
1962
|
+
symref_target: null
|
|
1963
|
+
})).execute();
|
|
1964
|
+
},
|
|
1965
|
+
async setSymref(repoId, name, target) {
|
|
1966
|
+
const id = await repos.ensureRepoId(repoId);
|
|
1967
|
+
await db.insertInto("git_ref").values({
|
|
1968
|
+
name,
|
|
1969
|
+
repo_id: id,
|
|
1970
|
+
symref_target: target
|
|
1971
|
+
}).onConflict((oc) => oc.columns(["repo_id", "name"]).doUpdateSet({
|
|
1972
|
+
oid: null,
|
|
1973
|
+
peeled_oid: null,
|
|
1974
|
+
symref_target: target
|
|
1975
|
+
})).execute();
|
|
952
1976
|
}
|
|
953
1977
|
};
|
|
954
1978
|
}
|
|
@@ -1268,6 +2292,21 @@ async function readRequestBody(c) {
|
|
|
1268
2292
|
function assertProtocolV2(header) {
|
|
1269
2293
|
if (!(header ?? "").split(":").map((s) => s.trim()).includes("version=2")) throw new GitProtocolError("pggit serves fetch over git protocol v2 only; set protocol.version=2 (git ≥ 2.26 negotiates it by default)");
|
|
1270
2294
|
}
|
|
2295
|
+
const GIT_PATH_SUFFIXES = [
|
|
2296
|
+
["/info/refs", "info/refs"],
|
|
2297
|
+
["/git-upload-pack", "git-upload-pack"],
|
|
2298
|
+
["/git-receive-pack", "git-receive-pack"]
|
|
2299
|
+
];
|
|
2300
|
+
function parseGitPath(path) {
|
|
2301
|
+
for (const [suffix, service] of GIT_PATH_SUFFIXES) if (path.endsWith(suffix)) {
|
|
2302
|
+
const repoId = path.slice(1, -suffix.length);
|
|
2303
|
+
return repoId.length > 0 ? {
|
|
2304
|
+
repoId,
|
|
2305
|
+
service
|
|
2306
|
+
} : null;
|
|
2307
|
+
}
|
|
2308
|
+
return null;
|
|
2309
|
+
}
|
|
1271
2310
|
function backendFor(deps, repoId) {
|
|
1272
2311
|
return {
|
|
1273
2312
|
buildPack: (wants, haves, omitBlobs, includeTag) => deps.objects.buildPack(repoId, wants, haves, omitBlobs, includeTag),
|
|
@@ -1305,7 +2344,8 @@ async function receivePackAdvertBody(deps, repoId) {
|
|
|
1305
2344
|
}
|
|
1306
2345
|
/**
|
|
1307
2346
|
* Build the git-remote Hono app (smart-HTTP, protocol v2 fetch). Mountable into
|
|
1308
|
-
* a host app via `host.
|
|
2347
|
+
* a host app via `host.mount("/git", createGitApp(deps).fetch)` (mount, NOT route —
|
|
2348
|
+
* mount strips the prefix so the catch-all parses a mount-relative path); the host owns the
|
|
1309
2349
|
* Postgres lifecycle behind `deps`.
|
|
1310
2350
|
*/
|
|
1311
2351
|
function createGitApp(deps, opts = {}) {
|
|
@@ -1321,7 +2361,9 @@ function createGitApp(deps, opts = {}) {
|
|
|
1321
2361
|
return c.text("internal server error", 500);
|
|
1322
2362
|
});
|
|
1323
2363
|
app.get("/health", (c) => c.text("ok"));
|
|
1324
|
-
app.get("
|
|
2364
|
+
app.get("/*", async (c) => {
|
|
2365
|
+
const parsed = parseGitPath(c.req.path);
|
|
2366
|
+
if (parsed?.service !== "info/refs") return c.notFound();
|
|
1325
2367
|
const service = c.req.query("service");
|
|
1326
2368
|
if (service === "git-upload-pack") {
|
|
1327
2369
|
assertProtocolV2(c.req.header("git-protocol"));
|
|
@@ -1331,7 +2373,7 @@ function createGitApp(deps, opts = {}) {
|
|
|
1331
2373
|
});
|
|
1332
2374
|
}
|
|
1333
2375
|
if (service === "git-receive-pack") {
|
|
1334
|
-
const body = await receivePackAdvertBody(deps,
|
|
2376
|
+
const body = await receivePackAdvertBody(deps, parsed.repoId);
|
|
1335
2377
|
return c.body(toArrayBuffer(body), 200, {
|
|
1336
2378
|
"Cache-Control": "no-cache",
|
|
1337
2379
|
"Content-Type": "application/x-git-receive-pack-advertisement"
|
|
@@ -1339,24 +2381,46 @@ function createGitApp(deps, opts = {}) {
|
|
|
1339
2381
|
}
|
|
1340
2382
|
return c.text(`unsupported service ${JSON.stringify(service)}`, 403);
|
|
1341
2383
|
});
|
|
1342
|
-
app.post("
|
|
1343
|
-
const
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
"
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
2384
|
+
app.post("/*", async (c) => {
|
|
2385
|
+
const parsed = parseGitPath(c.req.path);
|
|
2386
|
+
if (!parsed) return c.notFound();
|
|
2387
|
+
if (parsed.service === "git-upload-pack") {
|
|
2388
|
+
const out = await handleUploadPack(await readRequestBody(c), backendFor(deps, parsed.repoId));
|
|
2389
|
+
count("wireBytes", out.length);
|
|
2390
|
+
return c.body(toArrayBuffer(out), 200, {
|
|
2391
|
+
"Cache-Control": "no-cache",
|
|
2392
|
+
"Content-Type": "application/x-git-upload-pack-result"
|
|
2393
|
+
});
|
|
2394
|
+
}
|
|
2395
|
+
if (parsed.service === "git-receive-pack") {
|
|
2396
|
+
const out = await handleReceivePack(await readRequestBody(c), receiveBackendFor(deps, parsed.repoId));
|
|
2397
|
+
return c.body(toArrayBuffer(out), 200, {
|
|
2398
|
+
"Cache-Control": "no-cache",
|
|
2399
|
+
"Content-Type": "application/x-git-receive-pack-result"
|
|
2400
|
+
});
|
|
2401
|
+
}
|
|
2402
|
+
return c.notFound();
|
|
1356
2403
|
});
|
|
1357
2404
|
return app;
|
|
1358
2405
|
}
|
|
2406
|
+
/**
|
|
2407
|
+
* Build the git-app deps from a Postgres connection — the embed-into-a-host
|
|
2408
|
+
* factory. The host owns the `pg` lifecycle and mounts pggit with
|
|
2409
|
+
* `host.mount("/git", createGitApp(createGitDeps(pg)).fetch)` — `app.mount` strips
|
|
2410
|
+
* the mount prefix so the catch-all parse sees a mount-relative path (`app.route`
|
|
2411
|
+
* would not). `startServer` (server.ts) is the standalone equivalent of this same
|
|
2412
|
+
* composition. `snapshots` is always
|
|
2413
|
+
* included so a mounted host gets the queryable `repo_file` projection maintained
|
|
2414
|
+
* on push (the read surface).
|
|
2415
|
+
*/
|
|
2416
|
+
function createGitDeps(pg) {
|
|
2417
|
+
return {
|
|
2418
|
+
objects: createObjectStore(pg),
|
|
2419
|
+
refs: createRefStore(pg),
|
|
2420
|
+
snapshots: createRepoFileProjection(pg)
|
|
2421
|
+
};
|
|
2422
|
+
}
|
|
1359
2423
|
|
|
1360
2424
|
//#endregion
|
|
1361
|
-
export { createGc, createGcScheduler, createGitApp };
|
|
2425
|
+
export { createGc, createGcScheduler, createGitApp, createGitDeps };
|
|
1362
2426
|
//# sourceMappingURL=index.mjs.map
|