@crewhaus/federation-discovery 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,41 @@
1
+ {
2
+ "name": "@crewhaus/federation-discovery",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "description": "Federation peer lookup: DNS SRV + .well-known/crewhaus.json with TTL caching (Section 34)",
6
+ "main": "src/index.ts",
7
+ "types": "src/index.ts",
8
+ "exports": {
9
+ ".": "./src/index.ts"
10
+ },
11
+ "scripts": {
12
+ "test": "bun test src"
13
+ },
14
+ "dependencies": {
15
+ "@crewhaus/errors": "0.0.0"
16
+ },
17
+ "license": "Apache-2.0",
18
+ "author": {
19
+ "name": "Max Meier",
20
+ "email": "max@studiomax.io",
21
+ "url": "https://studiomax.io"
22
+ },
23
+ "repository": {
24
+ "type": "git",
25
+ "url": "git+https://github.com/crewhaus/factory.git",
26
+ "directory": "packages/federation-discovery"
27
+ },
28
+ "homepage": "https://github.com/crewhaus/factory/tree/main/packages/federation-discovery#readme",
29
+ "bugs": {
30
+ "url": "https://github.com/crewhaus/factory/issues"
31
+ },
32
+ "publishConfig": {
33
+ "access": "restricted"
34
+ },
35
+ "files": [
36
+ "src",
37
+ "README.md",
38
+ "LICENSE",
39
+ "NOTICE"
40
+ ]
41
+ }
@@ -0,0 +1,212 @@
1
+ import { describe, expect, test } from "bun:test";
2
+
3
+ import {
4
+ FederationDiscoveryError,
5
+ type PeerRecord,
6
+ type SrvResolver,
7
+ type WellKnownFetcher,
8
+ createDiscovery,
9
+ discoverDeployment,
10
+ } from "./index";
11
+
12
+ const HEX64 = "a".repeat(64);
13
+
14
+ const goodPayload = {
15
+ endpoint: "https://federation.deployment-b.example",
16
+ version: "crewhaus.federation.v1",
17
+ supportedShapes: ["cli", "crew"],
18
+ publicKeyFingerprint: HEX64,
19
+ };
20
+
21
+ function fetcherReturning(payload: unknown, status = 200): WellKnownFetcher {
22
+ return async () => ({ status, body: JSON.stringify(payload) });
23
+ }
24
+
25
+ describe("createDiscovery — happy path (T2)", () => {
26
+ test("falls back to .well-known when no SRV configured", async () => {
27
+ const d = createDiscovery({ wellKnownFetcher: fetcherReturning(goodPayload) });
28
+ const rec = await d.discover("deployment-b.example");
29
+ expect(rec.endpoint).toBe("https://federation.deployment-b.example");
30
+ expect(rec.version).toBe("crewhaus.federation.v1");
31
+ expect(rec.supportedShapes).toEqual(["cli", "crew"]);
32
+ expect(rec.publicKeyFingerprint).toBe(HEX64);
33
+ });
34
+
35
+ test("SRV-then-.well-known: SRV returns target, then well-known is fetched against that endpoint", async () => {
36
+ const srv: SrvResolver = async () => ({
37
+ records: [{ priority: 10, weight: 5, port: 8443, name: "fed.deployment-b.example" }],
38
+ ttl: 60,
39
+ });
40
+ const seenUrls: string[] = [];
41
+ const fetcher: WellKnownFetcher = async (url) => {
42
+ seenUrls.push(url);
43
+ return { status: 200, body: JSON.stringify(goodPayload) };
44
+ };
45
+ const d = createDiscovery({
46
+ srvDomain: "internal.crewhaus",
47
+ srvResolver: srv,
48
+ wellKnownFetcher: fetcher,
49
+ });
50
+ const rec = await d.discover("deployment-b");
51
+ expect(seenUrls[0]).toBe("https://fed.deployment-b.example:8443/.well-known/crewhaus.json");
52
+ // SRV is the source of truth for the endpoint, so the discovered
53
+ // record's endpoint is the SRV-derived URL, not the well-known body's.
54
+ expect(rec.endpoint).toBe("https://fed.deployment-b.example:8443");
55
+ expect(rec.publicKeyFingerprint).toBe(HEX64);
56
+ });
57
+
58
+ test("SRV miss → falls through to direct .well-known fetch", async () => {
59
+ const srv: SrvResolver = async () => {
60
+ throw new Error("ENOTFOUND");
61
+ };
62
+ const d = createDiscovery({
63
+ srvDomain: "internal.crewhaus",
64
+ srvResolver: srv,
65
+ wellKnownFetcher: fetcherReturning(goodPayload),
66
+ });
67
+ const rec = await d.discover("deployment-b.example");
68
+ expect(rec.endpoint).toBe(goodPayload.endpoint);
69
+ });
70
+ });
71
+
72
+ describe("createDiscovery — failure modes (T8)", () => {
73
+ test("rejects malformed deployment id", async () => {
74
+ const d = createDiscovery({ wellKnownFetcher: fetcherReturning(goodPayload) });
75
+ await expect(d.discover("../bad")).rejects.toThrow(/invalid deployment id/);
76
+ await expect(d.discover("")).rejects.toThrow(/non-empty/);
77
+ });
78
+
79
+ test("rejects non-200 .well-known", async () => {
80
+ const d = createDiscovery({
81
+ wellKnownFetcher: async () => ({ status: 503, body: "" }),
82
+ });
83
+ await expect(d.discover("deployment-b.example")).rejects.toThrow(/returned 503/);
84
+ });
85
+
86
+ test("rejects malformed JSON", async () => {
87
+ const d = createDiscovery({
88
+ wellKnownFetcher: async () => ({ status: 200, body: "not json{" }),
89
+ });
90
+ await expect(d.discover("deployment-b.example")).rejects.toThrow(/invalid JSON/);
91
+ });
92
+
93
+ test("rejects http:// endpoint (https only)", async () => {
94
+ const d = createDiscovery({
95
+ wellKnownFetcher: fetcherReturning({
96
+ ...goodPayload,
97
+ endpoint: "http://insecure.example",
98
+ }),
99
+ });
100
+ await expect(d.discover("deployment-b.example")).rejects.toThrow(/must be https/);
101
+ });
102
+
103
+ test("rejects malformed publicKeyFingerprint", async () => {
104
+ const d = createDiscovery({
105
+ wellKnownFetcher: fetcherReturning({
106
+ ...goodPayload,
107
+ publicKeyFingerprint: "abc",
108
+ }),
109
+ });
110
+ await expect(d.discover("deployment-b.example")).rejects.toThrow(
111
+ /publicKeyFingerprint must be 64-char hex/,
112
+ );
113
+ });
114
+
115
+ test("rejects record missing endpoint+version", async () => {
116
+ const d = createDiscovery({
117
+ wellKnownFetcher: fetcherReturning({ publicKeyFingerprint: HEX64 }),
118
+ });
119
+ await expect(d.discover("deployment-b.example")).rejects.toThrow(
120
+ /endpoint\+version are required/,
121
+ );
122
+ });
123
+ });
124
+
125
+ describe("createDiscovery — caching (T9 property-style)", () => {
126
+ test("hits the cache before TTL expiry", async () => {
127
+ let calls = 0;
128
+ const fetcher: WellKnownFetcher = async () => {
129
+ calls++;
130
+ return { status: 200, body: JSON.stringify(goodPayload) };
131
+ };
132
+ const d = createDiscovery({ wellKnownFetcher: fetcher });
133
+ await d.discover("deployment-b.example");
134
+ await d.discover("deployment-b.example");
135
+ await d.discover("deployment-b.example");
136
+ expect(calls).toBe(1);
137
+ });
138
+
139
+ test("re-fetches after TTL expires", async () => {
140
+ let now = 1000;
141
+ let calls = 0;
142
+ const fetcher: WellKnownFetcher = async () => {
143
+ calls++;
144
+ return { status: 200, body: JSON.stringify(goodPayload) };
145
+ };
146
+ const d = createDiscovery({ wellKnownFetcher: fetcher, now: () => now });
147
+ await d.discover("deployment-b.example");
148
+ expect(calls).toBe(1);
149
+ now += 5_000;
150
+ await d.discover("deployment-b.example");
151
+ expect(calls).toBe(1); // still in TTL
152
+ now += 60_000;
153
+ await d.discover("deployment-b.example");
154
+ expect(calls).toBe(2); // TTL expired
155
+ });
156
+
157
+ test("negative cache: failed lookup short-circuits within negativeTtlMs", async () => {
158
+ let now = 0;
159
+ let calls = 0;
160
+ const fetcher: WellKnownFetcher = async () => {
161
+ calls++;
162
+ return { status: 503, body: "" };
163
+ };
164
+ const d = createDiscovery({
165
+ wellKnownFetcher: fetcher,
166
+ now: () => now,
167
+ negativeTtlMs: 5_000,
168
+ });
169
+ await expect(d.discover("deployment-b.example")).rejects.toThrow();
170
+ now += 1_000;
171
+ await expect(d.discover("deployment-b.example")).rejects.toThrow(/cached negative/);
172
+ expect(calls).toBe(1);
173
+ now += 5_000;
174
+ await expect(d.discover("deployment-b.example")).rejects.toThrow(/returned 503/);
175
+ expect(calls).toBe(2);
176
+ });
177
+
178
+ test("reset() clears the cache", async () => {
179
+ let calls = 0;
180
+ const fetcher: WellKnownFetcher = async () => {
181
+ calls++;
182
+ return { status: 200, body: JSON.stringify(goodPayload) };
183
+ };
184
+ const d = createDiscovery({ wellKnownFetcher: fetcher });
185
+ await d.discover("deployment-b.example");
186
+ expect(calls).toBe(1);
187
+ d.reset();
188
+ await d.discover("deployment-b.example");
189
+ expect(calls).toBe(2);
190
+ });
191
+
192
+ test("cacheStats reports current entries", async () => {
193
+ const d = createDiscovery({ wellKnownFetcher: fetcherReturning(goodPayload) });
194
+ await d.discover("deployment-b.example");
195
+ await d.discover("deployment-c.example");
196
+ expect(d.cacheStats().entries).toBe(2);
197
+ });
198
+ });
199
+
200
+ describe("discoverDeployment top-level helper", () => {
201
+ test("returns the resolved record", async () => {
202
+ const rec: PeerRecord = await discoverDeployment("deployment-b.example", {
203
+ wellKnownFetcher: fetcherReturning(goodPayload),
204
+ });
205
+ expect(rec).toEqual({
206
+ endpoint: goodPayload.endpoint,
207
+ version: "crewhaus.federation.v1",
208
+ supportedShapes: goodPayload.supportedShapes,
209
+ publicKeyFingerprint: HEX64,
210
+ });
211
+ });
212
+ });
package/src/index.ts ADDED
@@ -0,0 +1,258 @@
1
+ /**
2
+ * @crewhaus/federation-discovery — Section 34
3
+ *
4
+ * Peer lookup for federation. Two methods:
5
+ *
6
+ * 1. DNS SRV — `_crewhaus._tcp.<deployment>.<domain>` returns a list
7
+ * of `<weight, priority, port, target>` records. The first record
8
+ * (sorted by priority then weight) wins.
9
+ *
10
+ * 2. `.well-known/crewhaus.json` — `https://<deployment>/.well-known/crewhaus.json`
11
+ * returns a JSON object describing the peer's endpoint, supported
12
+ * shapes, and public-key fingerprint.
13
+ *
14
+ * The discovered record is cached with TTL = SRV TTL (when available)
15
+ * or 60s for `.well-known`. Negative results are cached too (10s) so a
16
+ * misconfigured peer doesn't trigger DNS storms.
17
+ *
18
+ * Both lookups are pluggable via injected resolvers — production uses
19
+ * `node:dns/promises.resolveSrv` and `globalThis.fetch`, tests pass
20
+ * fake implementations.
21
+ */
22
+ import { CrewhausError } from "@crewhaus/errors";
23
+
24
+ export class FederationDiscoveryError extends CrewhausError {
25
+ override readonly name = "FederationDiscoveryError";
26
+ constructor(message: string, cause?: unknown) {
27
+ super("config", message, cause);
28
+ }
29
+ }
30
+
31
+ export type PeerRecord = {
32
+ /** Discovered HTTPS endpoint, e.g. https://federation.deployment-b.example */
33
+ readonly endpoint: string;
34
+ /** Federation protocol version the peer speaks. */
35
+ readonly version: string;
36
+ /** Target shapes the peer's roles can serve. */
37
+ readonly supportedShapes: readonly string[];
38
+ /** SHA256 fingerprint (hex, no separators) of the peer's leaf cert. */
39
+ readonly publicKeyFingerprint: string;
40
+ };
41
+
42
+ export type SrvRecord = {
43
+ readonly priority: number;
44
+ readonly weight: number;
45
+ readonly port: number;
46
+ readonly name: string;
47
+ };
48
+
49
+ export type SrvResolver = (name: string) => Promise<{
50
+ readonly records: readonly SrvRecord[];
51
+ readonly ttl: number;
52
+ }>;
53
+
54
+ export type WellKnownFetcher = (url: string) => Promise<{
55
+ readonly status: number;
56
+ readonly body: string;
57
+ }>;
58
+
59
+ export type DiscoveryConfig = {
60
+ readonly srvDomain?: string;
61
+ readonly srvResolver?: SrvResolver;
62
+ readonly wellKnownFetcher?: WellKnownFetcher;
63
+ readonly now?: () => number;
64
+ /** Cache TTL for negative-result lookups. Default 10s. */
65
+ readonly negativeTtlMs?: number;
66
+ /**
67
+ * Allow `http://localhost:*` / `http://127.0.0.1:*` endpoints. Default
68
+ * false (production deployments must use HTTPS). Used by tests to
69
+ * stand up in-process Bun.serve fixtures without TLS.
70
+ */
71
+ readonly allowInsecureLocalhost?: boolean;
72
+ };
73
+
74
+ export type Discovery = {
75
+ discover(deployment: string): Promise<PeerRecord>;
76
+ /** Inspect cache state — used by ops + tests. */
77
+ cacheStats(): {
78
+ entries: number;
79
+ expirations: ReadonlyArray<{ deployment: string; expiresAt: number }>;
80
+ };
81
+ reset(): void;
82
+ };
83
+
84
+ type CacheEntry =
85
+ | { readonly kind: "hit"; readonly record: PeerRecord; readonly expiresAt: number }
86
+ | { readonly kind: "miss"; readonly expiresAt: number };
87
+
88
+ export function createDiscovery(config: DiscoveryConfig = {}): Discovery {
89
+ const cache = new Map<string, CacheEntry>();
90
+ const now = config.now ?? (() => Date.now());
91
+ const negativeTtlMs = config.negativeTtlMs ?? 10_000;
92
+
93
+ return {
94
+ async discover(deployment: string): Promise<PeerRecord> {
95
+ assertDeployment(deployment);
96
+ const cached = cache.get(deployment);
97
+ if (cached !== undefined) {
98
+ if (cached.expiresAt > now()) {
99
+ if (cached.kind === "hit") return cached.record;
100
+ throw new FederationDiscoveryError(`peer ${deployment} unreachable (cached negative)`);
101
+ }
102
+ cache.delete(deployment);
103
+ }
104
+
105
+ try {
106
+ const record = await lookup(deployment, config);
107
+ cache.set(deployment, { kind: "hit", record, expiresAt: now() + recordTtl(record) });
108
+ return record;
109
+ } catch (cause) {
110
+ cache.set(deployment, { kind: "miss", expiresAt: now() + negativeTtlMs });
111
+ if (cause instanceof FederationDiscoveryError) throw cause;
112
+ throw new FederationDiscoveryError(
113
+ `peer discovery failed for ${deployment}: ${(cause as Error).message}`,
114
+ cause,
115
+ );
116
+ }
117
+ },
118
+
119
+ cacheStats() {
120
+ const entries = Array.from(cache.entries()).map(([deployment, e]) => ({
121
+ deployment,
122
+ expiresAt: e.expiresAt,
123
+ }));
124
+ return { entries: cache.size, expirations: entries };
125
+ },
126
+
127
+ reset() {
128
+ cache.clear();
129
+ },
130
+ };
131
+ }
132
+
133
+ const DEFAULT_RECORD_TTL_MS = 60_000;
134
+
135
+ function recordTtl(_record: PeerRecord): number {
136
+ return DEFAULT_RECORD_TTL_MS;
137
+ }
138
+
139
+ function assertDeployment(deployment: string): void {
140
+ if (typeof deployment !== "string" || !deployment.length) {
141
+ throw new FederationDiscoveryError("deployment id must be a non-empty string");
142
+ }
143
+ // Block obvious DNS injection attempts; allow standard domain shape.
144
+ if (!/^[a-zA-Z0-9.-]+$/.test(deployment)) {
145
+ throw new FederationDiscoveryError(`invalid deployment id: ${deployment}`);
146
+ }
147
+ }
148
+
149
+ async function lookup(deployment: string, config: DiscoveryConfig): Promise<PeerRecord> {
150
+ // Prefer SRV when a srvDomain is configured + a resolver is available.
151
+ if (config.srvDomain && config.srvResolver) {
152
+ const srvName = `_crewhaus._tcp.${deployment}.${config.srvDomain}`;
153
+ try {
154
+ const { records } = await config.srvResolver(srvName);
155
+ const sorted = [...records].sort((a, b) => a.priority - b.priority || b.weight - a.weight);
156
+ const head = sorted[0];
157
+ if (head !== undefined) {
158
+ const endpoint = `https://${head.name}:${head.port}`;
159
+ // Even with an SRV hit, we still call .well-known to get the full
160
+ // record (supportedShapes + fingerprint). SRV alone doesn't carry
161
+ // public-key info.
162
+ const wellKnown = await fetchWellKnown(endpoint, config);
163
+ return { ...wellKnown, endpoint };
164
+ }
165
+ } catch (cause) {
166
+ // Fall through to .well-known below — SRV is a hint, not the source of truth.
167
+ void cause;
168
+ }
169
+ }
170
+
171
+ // .well-known fallback
172
+ const url = `https://${deployment}/.well-known/crewhaus.json`;
173
+ return fetchWellKnown(url, config);
174
+ }
175
+
176
+ async function fetchWellKnown(baseOrUrl: string, config: DiscoveryConfig): Promise<PeerRecord> {
177
+ const fetcher = config.wellKnownFetcher ?? defaultFetcher;
178
+ const url = baseOrUrl.endsWith("/.well-known/crewhaus.json")
179
+ ? baseOrUrl
180
+ : `${baseOrUrl}/.well-known/crewhaus.json`;
181
+ const { status, body } = await fetcher(url);
182
+ if (status !== 200) {
183
+ throw new FederationDiscoveryError(`well-known fetch returned ${status} for ${url}`);
184
+ }
185
+ let parsed: unknown;
186
+ try {
187
+ parsed = JSON.parse(body);
188
+ } catch (cause) {
189
+ throw new FederationDiscoveryError(`well-known returned invalid JSON for ${url}`, cause);
190
+ }
191
+ return parsePeerRecord(parsed, url, {
192
+ allowInsecureLocalhost: config.allowInsecureLocalhost === true,
193
+ });
194
+ }
195
+
196
+ function parsePeerRecord(
197
+ raw: unknown,
198
+ source: string,
199
+ opts: { allowInsecureLocalhost: boolean } = { allowInsecureLocalhost: false },
200
+ ): PeerRecord {
201
+ if (typeof raw !== "object" || raw === null) {
202
+ throw new FederationDiscoveryError(`peer record at ${source} is not an object`);
203
+ }
204
+ const r = raw as Record<string, unknown>;
205
+ const endpoint = typeof r["endpoint"] === "string" ? r["endpoint"] : "";
206
+ const version = typeof r["version"] === "string" ? r["version"] : "";
207
+ const supportedShapesRaw = r["supportedShapes"] ?? r["supported_shapes"];
208
+ const supportedShapes = Array.isArray(supportedShapesRaw)
209
+ ? (supportedShapesRaw.filter((s) => typeof s === "string") as string[])
210
+ : [];
211
+ const publicKeyFingerprint =
212
+ typeof r["publicKeyFingerprint"] === "string"
213
+ ? r["publicKeyFingerprint"]
214
+ : typeof r["public_key_fingerprint"] === "string"
215
+ ? r["public_key_fingerprint"]
216
+ : "";
217
+ if (!endpoint || !version) {
218
+ throw new FederationDiscoveryError(`peer record at ${source}: endpoint+version are required`);
219
+ }
220
+ if (!/^https:\/\//.test(endpoint)) {
221
+ if (
222
+ !opts.allowInsecureLocalhost ||
223
+ !/^http:\/\/(localhost|127\.0\.0\.1)(:\d+)?(\/|$)/.test(endpoint)
224
+ ) {
225
+ throw new FederationDiscoveryError(
226
+ `peer record at ${source}: endpoint must be https:// (got ${endpoint})`,
227
+ );
228
+ }
229
+ }
230
+ if (!/^[0-9a-f]{64}$/i.test(publicKeyFingerprint)) {
231
+ throw new FederationDiscoveryError(
232
+ `peer record at ${source}: publicKeyFingerprint must be 64-char hex (got ${publicKeyFingerprint.length} chars)`,
233
+ );
234
+ }
235
+ return {
236
+ endpoint,
237
+ version,
238
+ supportedShapes,
239
+ publicKeyFingerprint: publicKeyFingerprint.toLowerCase(),
240
+ };
241
+ }
242
+
243
+ const defaultFetcher: WellKnownFetcher = async (url) => {
244
+ const res = await fetch(url, { method: "GET", headers: { Accept: "application/json" } });
245
+ return { status: res.status, body: await res.text() };
246
+ };
247
+
248
+ /**
249
+ * `crewhaus federation discover <deployment>` CLI helper. Returns the
250
+ * resolved record as JSON for ops piping/jq.
251
+ */
252
+ export async function discoverDeployment(
253
+ deployment: string,
254
+ config: DiscoveryConfig = {},
255
+ ): Promise<PeerRecord> {
256
+ const d = createDiscovery(config);
257
+ return d.discover(deployment);
258
+ }