@checkstack/satellite-backend 0.3.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,70 +1,141 @@
1
1
  import type { Logger } from "@checkstack/backend-api";
2
2
  import type { SignalService } from "@checkstack/signal-common";
3
3
  import type { SatelliteService } from "./service";
4
- import {
5
- SATELLITE_STATUS_CHANGED,
6
- OFFLINE_THRESHOLD_MS,
7
- } from "@checkstack/satellite-common";
4
+ import type { SatelliteConnectionEvent } from "./entity";
5
+ import { computeStatus } from "./status";
6
+ import { SATELLITE_STATUS_CHANGED } from "@checkstack/satellite-common";
8
7
 
9
8
  /**
10
- * Monitors satellite heartbeats and broadcasts status change signals.
11
- * Tracks previous state in-memory to detect transitions (online offline, offline → online).
9
+ * Plug-point for driving the heartbeat-lost (`online` `offline`) edge into
10
+ * the reactive `satellite-connection` entity (reactive automation engine
11
+ * §10.6). Bound from `afterPluginsReady`; when not provided, no entity state is
12
+ * mirrored.
13
+ *
14
+ * The monitor flips ONLY `lastConnectionEvent` to `"heartbeat_lost"` (leaving
15
+ * the already-aged `lastHeartbeatAt` untouched, since it is what made the
16
+ * computed status `offline` in the first place). The change-deriver re-fires
17
+ * `satellite.heartbeat_lost`. The opposite edge (offline→online) is mirrored as
18
+ * `connected` by the WS handler on reconnect, so the monitor leaves it alone.
19
+ */
20
+ export interface SatelliteHeartbeatEntitySink {
21
+ mirror: (satelliteId: string) => Promise<void>;
22
+ }
23
+
24
+ /**
25
+ * Monitors satellite heartbeats and detects the online→offline transition from
26
+ * DURABLE state alone — no pod-local baseline.
27
+ *
28
+ * ## Horizontal-scale correctness
29
+ *
30
+ * The heartbeat-check job runs under ONE consumer group claimed by a VARYING
31
+ * pod. A process-local "previous status" map is therefore wrong: a pod with an
32
+ * empty map never sees the online→offline edge, so `connectionStatus` could get
33
+ * stuck `online` forever after a pod crash. This monitor instead reads every
34
+ * satellite's durable `(lastHeartbeatAt, lastConnectionEvent)`, computes status
35
+ * via {@link computeStatus} (the same wall-clock liveness rule the entity read
36
+ * uses), and detects the heartbeat-lost edge purely from durable state:
37
+ *
38
+ * computed status is `offline` AND `lastConnectionEvent === "connected"`
39
+ * ⇒ this satellite just lost its heartbeat (it was last marked connected,
40
+ * but its heartbeat has now aged past the offline threshold).
41
+ *
42
+ * The mutate that flips `lastConnectionEvent` to `"heartbeat_lost"` is
43
+ * IDEMPOTENT across pods and redelivery: once it is `"heartbeat_lost"`, the
44
+ * predicate above is false, so re-runs (on any pod) are no-ops, and the entity
45
+ * handle's diff-on-unchanged suppresses any duplicate transition/event. Any pod
46
+ * can therefore drive the edge correctly, regardless of which pod (if any) ever
47
+ * observed the satellite online in memory.
12
48
  */
13
49
  export class HeartbeatMonitor {
14
50
  /**
15
- * In-memory tracking of each satellite's last known status.
16
- * Used to detect transitions and avoid redundant signal broadcasts.
51
+ * Pod-local broadcast-dedup ONLY (never the source of truth). The durable
52
+ * `lastConnectionEvent` flip is what makes detection idempotent; this set
53
+ * merely avoids re-broadcasting the same status-change signal from this pod on
54
+ * back-to-back checks. A fresh pod with an empty set still detects + mirrors
55
+ * the edge from durable state — it just also broadcasts once, which is benign.
17
56
  */
18
- private previousStatuses = new Map<string, "online" | "offline">();
57
+ private broadcastedOffline = new Set<string>();
19
58
 
20
59
  constructor(
21
60
  private service: SatelliteService,
22
61
  private signalService: SignalService,
23
62
  private logger: Logger,
63
+ private entitySink?: SatelliteHeartbeatEntitySink,
24
64
  ) {}
25
65
 
26
66
  /**
27
- * Check all satellites and broadcast status change signals for any transitions.
28
- * Called periodically by a recurring queue job.
67
+ * Check all satellites and drive the heartbeat-lost edge for any that have
68
+ * aged out while still marked connected. Called periodically by a recurring
69
+ * queue job; safe to run on any pod and to redeliver.
29
70
  */
30
71
  async checkHeartbeats(): Promise<void> {
31
- const allSatellites = await this.service.listSatellites();
72
+ const rows = await this.service.listConnectionLiveness();
73
+ const liveIds = new Set(rows.map((r) => r.id));
32
74
 
33
- for (const satellite of allSatellites) {
34
- const previousStatus = this.previousStatuses.get(satellite.id);
35
- const currentStatus = satellite.status;
75
+ for (const row of rows) {
76
+ const status = computeStatus(row.lastHeartbeatAt);
77
+ const lostHeartbeat = this.hasLostHeartbeat({
78
+ status,
79
+ lastConnectionEvent: row.lastConnectionEvent,
80
+ });
36
81
 
37
- // Detect transition
38
- if (previousStatus !== undefined && previousStatus !== currentStatus) {
39
- this.logger.info(
40
- `Satellite ${satellite.name} (${satellite.region}) status: ${previousStatus} ${currentStatus}`,
41
- );
82
+ if (!lostHeartbeat) {
83
+ // Still online (or already past the lost edge / never connected):
84
+ // nothing to detect. Clear the broadcast-dedup marker once a satellite
85
+ // is no longer in the lost state so a future lost edge re-broadcasts.
86
+ if (status === "online") this.broadcastedOffline.delete(row.id);
87
+ continue;
88
+ }
89
+
90
+ // Durable heartbeat-lost edge: computed offline while still marked
91
+ // `connected`. Detected from durable state, so this fires correctly from
92
+ // ANY pod with no prior in-memory knowledge of the satellite.
93
+ this.logger.info(
94
+ `Satellite ${row.name} (${row.region}) lost heartbeat (online → offline)`,
95
+ );
42
96
 
97
+ // Broadcast the status-change signal once per offline edge from this pod.
98
+ if (!this.broadcastedOffline.has(row.id)) {
99
+ this.broadcastedOffline.add(row.id);
43
100
  await this.signalService.broadcast(SATELLITE_STATUS_CHANGED, {
44
- satelliteId: satellite.id,
45
- status: currentStatus,
46
- name: satellite.name,
47
- region: satellite.region,
101
+ satelliteId: row.id,
102
+ status: "offline",
103
+ name: row.name,
104
+ region: row.region,
48
105
  });
49
106
  }
50
107
 
51
- this.previousStatuses.set(satellite.id, currentStatus);
108
+ // Drive the entity edge. The mutate is idempotent: it flips
109
+ // `lastConnectionEvent` to `"heartbeat_lost"`, after which this branch is
110
+ // never re-entered for the same satellite (re-runs are no-ops).
111
+ if (this.entitySink) {
112
+ try {
113
+ await this.entitySink.mirror(row.id);
114
+ } catch (error) {
115
+ this.logger.error(
116
+ `Failed to mirror satellite-connection (heartbeat_lost) for ${row.name}:`,
117
+ error,
118
+ );
119
+ }
120
+ }
52
121
  }
53
122
 
54
- // Clean up tracked satellites that no longer exist
55
- const currentIds = new Set(allSatellites.map((s) => s.id));
56
- for (const trackedId of this.previousStatuses.keys()) {
57
- if (!currentIds.has(trackedId)) {
58
- this.previousStatuses.delete(trackedId);
59
- }
123
+ // Drop broadcast-dedup markers for satellites that no longer exist.
124
+ for (const id of this.broadcastedOffline) {
125
+ if (!liveIds.has(id)) this.broadcastedOffline.delete(id);
60
126
  }
61
127
  }
62
128
 
63
129
  /**
64
- * Get the offline threshold in milliseconds.
65
- * Exposed for testing.
130
+ * Pure predicate: a satellite has just lost its heartbeat when its computed
131
+ * status is `offline` but its last recorded lifecycle edge still says it was
132
+ * `connected`. Once the edge is mirrored (`lastConnectionEvent` becomes
133
+ * `"heartbeat_lost"`), this returns false — the idempotency guarantee.
66
134
  */
67
- static get OFFLINE_THRESHOLD_MS(): number {
68
- return OFFLINE_THRESHOLD_MS;
135
+ private hasLostHeartbeat(props: {
136
+ status: "online" | "offline";
137
+ lastConnectionEvent: SatelliteConnectionEvent | null;
138
+ }): boolean {
139
+ return props.status === "offline" && props.lastConnectionEvent === "connected";
69
140
  }
70
141
  }
package/src/hooks.ts CHANGED
@@ -2,8 +2,15 @@ import { createHook } from "@checkstack/backend-api";
2
2
 
3
3
  /**
4
4
  * Satellite hooks for cross-plugin communication.
5
- * Other plugins (e.g., healthcheck-backend) can subscribe to clean up
6
- * when a satellite is removed.
5
+ *
6
+ * The connection-lifecycle hooks (`satellite.connected` / `.disconnected` /
7
+ * `.heartbeat_lost`) were removed in Phase 4 (reactive automation engine
8
+ * §10.6): satellite connection state is now the reactive
9
+ * `satellite-connection` entity (see `./entity.ts`), and the equivalent
10
+ * trigger events are derived from its changes.
11
+ *
12
+ * `satellite.removed` stays — it is a deletion/cleanup signal (consumed by
13
+ * healthcheck-backend to scrub the satellite's id), not entity state.
7
14
  */
8
15
  export const satelliteHooks = {
9
16
  /**
package/src/index.ts CHANGED
@@ -9,6 +9,10 @@ import {
9
9
  } from "@checkstack/satellite-common";
10
10
  import { HealthCheckApi } from "@checkstack/healthcheck-common";
11
11
  import { healthCheckHooks } from "@checkstack/healthcheck-backend";
12
+ import { ScriptPackagesApi } from "@checkstack/script-packages-common";
13
+ import { scriptPackagesChangedHook } from "@checkstack/script-packages-backend";
14
+ import { secretResolverRef } from "@checkstack/secrets-backend";
15
+ import { resolveSatelliteRunSecrets } from "./run-secret-resolver";
12
16
  import { SatelliteService } from "./service";
13
17
  import { createSatelliteRouter } from "./router";
14
18
  import { HeartbeatMonitor } from "./heartbeat-monitor";
@@ -16,6 +20,21 @@ import { SatelliteWsHandler } from "./satellite-ws-handler";
16
20
  import { ConfigRelay } from "./config-relay";
17
21
  import { entityKindExtensionPoint } from "@checkstack/gitops-backend";
18
22
  import { registerSatelliteGitOpsKinds } from "./satellite-gitops-kinds";
23
+ import {
24
+ automationTriggerExtensionPoint,
25
+ entityExtensionPoint,
26
+ withEntityWrite,
27
+ type EntityHandle,
28
+ } from "@checkstack/automation-backend";
29
+ import {
30
+ SATELLITE_CONNECTION_ENTITY_KIND,
31
+ createSatelliteConnectionRead,
32
+ deriveSatelliteConnectionEvents,
33
+ satelliteChangeToPayload,
34
+ satelliteConnectionStateSchema,
35
+ type SatelliteConnectionState,
36
+ } from "./entity";
37
+ import { satelliteTriggers } from "./automations";
19
38
 
20
39
  // Queue and job constants
21
40
  const HEARTBEAT_QUEUE = "satellite-heartbeat";
@@ -27,6 +46,50 @@ export default createBackendPlugin({
27
46
  register(env) {
28
47
  env.registerAccessRules(satelliteAccessRules);
29
48
 
49
+ // ─── Automation Platform: reactive connection entity ─────────────
50
+ // Satellite connection state is the `satellite-connection` entity
51
+ // (reactive automation engine §10.6, §9.1), PLUGIN-BACKED (Model B) and
52
+ // COMPUTE-ON-READ: its `status` is DERIVED on read from the DURABLE, shared
53
+ // `satellites.lastHeartbeatAt` column (the single liveness source of truth,
54
+ // same as the admin list), and `lastConnectionEvent` is the only extra
55
+ // durable column (the deriver's event discriminator). There is NO stored
56
+ // status copy and NO framework `entity_state` mirror, so EVERY pod computes
57
+ // the same state AND a stale row self-heals to offline once the heartbeat
58
+ // ages out (this fixes the horizontal-scaling bug twice: the old in-memory
59
+ // map made pod A's satellite invisible to pod B, and the prior fix's stored
60
+ // status got stuck `online` after a pod crash because the heartbeat-lost
61
+ // EDGE was detected pod-locally). The three lifecycle sites (connect /
62
+ // disconnect / heartbeat-lost) write the liveness inputs through
63
+ // `handle.mutate`, and the framework records full transition HISTORY in
64
+ // `entity_transitions`.
65
+ //
66
+ // The `satellite.connected` / `.disconnected` / `.heartbeat_lost` trigger
67
+ // events are DERIVED from its changes (no hook-backed triggers). The
68
+ // ENTITY-DRIVEN triggers below stay registered so they remain in the
69
+ // editor's trigger catalog + payload-introspectable, and a `toPayload`
70
+ // mapper makes the runtime `trigger.payload` match their `payloadSchema`
71
+ // (mirroring incident / catalog / dependency / healthcheck).
72
+ const automationTriggers = env.getExtensionPoint(
73
+ automationTriggerExtensionPoint,
74
+ );
75
+ for (const trigger of satelliteTriggers) {
76
+ automationTriggers.registerTrigger(trigger, pluginMetadata);
77
+ }
78
+
79
+ const entity = env.getExtensionPoint(entityExtensionPoint);
80
+ entity.registerChangeDeriver({
81
+ kind: SATELLITE_CONNECTION_ENTITY_KIND,
82
+ derive: deriveSatelliteConnectionEvents,
83
+ toPayload: satelliteChangeToPayload,
84
+ });
85
+ entity.declareNonReactiveState({
86
+ table: "satellites",
87
+ reason: "bookkeeping",
88
+ note: "lastHeartbeatAt is the raw liveness timestamp; the satellite-connection entity's reactive status is computed from it on read.",
89
+ });
90
+ // Created once in init; reused by the WS handler + heartbeat monitor.
91
+ let satelliteEntityHandle: EntityHandle<SatelliteConnectionState>;
92
+
30
93
  // ─── GitOps Entity Kind Registration ─────────────────────────────
31
94
  let gitopsService: SatelliteService | undefined;
32
95
  const kindRegistry = env.getExtensionPoint(entityKindExtensionPoint);
@@ -47,6 +110,7 @@ export default createBackendPlugin({
47
110
  signalService: coreServices.signalService,
48
111
  queueManager: coreServices.queueManager,
49
112
  wsRegistry: coreServices.wsRegistry,
113
+ secretResolver: secretResolverRef,
50
114
  },
51
115
  init: async ({ logger, database, rpc, signalService }) => {
52
116
  logger.debug("🛰️ Initializing Satellite Backend...");
@@ -56,6 +120,20 @@ export default createBackendPlugin({
56
120
  );
57
121
  gitopsService = service;
58
122
 
123
+ // Declare the reactive `satellite-connection` entity once. PLUGIN-
124
+ // BACKED, COMPUTE-ON-READ: `read` computes status from the durable
125
+ // `satellites.lastHeartbeatAt` (+ reads `lastConnectionEvent`) via the
126
+ // service (the source of truth — no stored status copy, no
127
+ // `entity_state` mirror, globally consistent from any pod). The handle
128
+ // is the only typed path that drives connection-state changes (reactive
129
+ // automation engine §4.2); it is reused by the WS handler + heartbeat
130
+ // monitor wired in afterPluginsReady.
131
+ satelliteEntityHandle = entity.defineEntity({
132
+ kind: SATELLITE_CONNECTION_ENTITY_KIND,
133
+ state: satelliteConnectionStateSchema,
134
+ read: createSatelliteConnectionRead(service),
135
+ });
136
+
59
137
  const router = createSatelliteRouter({
60
138
  service,
61
139
  signalService,
@@ -72,6 +150,7 @@ export default createBackendPlugin({
72
150
  signalService,
73
151
  wsRegistry,
74
152
  rpcClient,
153
+ secretResolver,
75
154
  onHook,
76
155
  }) => {
77
156
  const service = new SatelliteService(
@@ -112,6 +191,71 @@ export default createBackendPlugin({
112
191
  },
113
192
  },
114
193
  logger,
194
+ {
195
+ // Drive connect/disconnect through `handle.mutate` (Model B):
196
+ // `apply` UPDATEs the satellite row's durable liveness columns
197
+ // (`lastHeartbeatAt` + `lastConnectionEvent`) — the globally-
198
+ // readable source of truth — and returns the view (status COMPUTED
199
+ // from `lastHeartbeatAt`). The framework snapshots `prev` via
200
+ // `read`, records the transition (durable history), and emits the
201
+ // change; the deriver re-fires the equivalent trigger events.
202
+ mirror: async ({ satelliteId, lastEvent, lastHeartbeatAt }) => {
203
+ await withEntityWrite({
204
+ handle: satelliteEntityHandle,
205
+ id: satelliteId,
206
+ apply: () =>
207
+ service.applyConnectionState({
208
+ satelliteId,
209
+ lastEvent,
210
+ lastHeartbeatAt,
211
+ }),
212
+ });
213
+ },
214
+ },
215
+ {
216
+ // Script-package distribution: carry the desired lockfile hash in
217
+ // assignment payloads + persist per-satellite reconcile state.
218
+ // Satellites pull blobs from CORE (getManifest/downloadBlob),
219
+ // never the registry.
220
+ getDesiredLockfileHash: async () => {
221
+ const spClient = rpcClient.forPlugin(ScriptPackagesApi);
222
+ const state = await spClient.getInstallState();
223
+ return state.lockfileHash;
224
+ },
225
+ reportSyncState: async (input) => {
226
+ const spClient = rpcClient.forPlugin(ScriptPackagesApi);
227
+ await spClient.reportSatelliteSyncState(input);
228
+ },
229
+ getManifest: async ({ lockfileHash }) => {
230
+ const spClient = rpcClient.forPlugin(ScriptPackagesApi);
231
+ const res = await spClient.getManifest({ lockfileHash });
232
+ return res.entries;
233
+ },
234
+ getBlobBase64: async ({ integrity }) => {
235
+ const spClient = rpcClient.forPlugin(ScriptPackagesApi);
236
+ try {
237
+ const res = await spClient.downloadBlob({ integrity });
238
+ return res.data;
239
+ } catch {
240
+ return null;
241
+ }
242
+ },
243
+ },
244
+ {
245
+ // JIT secret delivery: resolve a collector's declared secretEnv
246
+ // (read from the satellite's own assignment) via the central
247
+ // resolver. Values are returned over the WS channel per-run and
248
+ // never persisted.
249
+ resolveRunSecrets: async ({ satelliteId, configId, collectorId }) =>
250
+ resolveSatelliteRunSecrets({
251
+ satelliteId,
252
+ configId,
253
+ collectorId,
254
+ getAssignmentsForSatellite: (id) =>
255
+ configRelay.getAssignmentsForSatellite(id),
256
+ resolver: secretResolver,
257
+ }),
258
+ },
115
259
  );
116
260
 
117
261
  // Register satellite WebSocket endpoint via the scoped WS registry
@@ -124,6 +268,30 @@ export default createBackendPlugin({
124
268
  service,
125
269
  signalService,
126
270
  logger,
271
+ {
272
+ // Drive the online → offline (heartbeat-lost) edge through
273
+ // `handle.mutate`. `apply` flips ONLY `lastConnectionEvent` to
274
+ // `"heartbeat_lost"` (the aged `lastHeartbeatAt` is left untouched —
275
+ // it is what made the computed status `offline`). The framework
276
+ // records the transition (durable history) and the deriver re-fires
277
+ // `satellite.heartbeat_lost`. The mutate is idempotent: once
278
+ // `lastConnectionEvent === "heartbeat_lost"`, the monitor's
279
+ // predicate is false and re-runs (on any pod) are no-ops. This is
280
+ // the durable, any-pod offline-on-timeout backstop: a pod that dies
281
+ // without flipping its satellites to offline leaves a stale state
282
+ // only until ANY pod's monitor observes the heartbeat timeout.
283
+ mirror: async (satelliteId) => {
284
+ await withEntityWrite({
285
+ handle: satelliteEntityHandle,
286
+ id: satelliteId,
287
+ apply: () =>
288
+ service.applyConnectionState({
289
+ satelliteId,
290
+ lastEvent: "heartbeat_lost",
291
+ }),
292
+ });
293
+ },
294
+ },
127
295
  );
128
296
 
129
297
  const queue = queueManager.getQueue<Record<string, never>>(
@@ -163,6 +331,18 @@ export default createBackendPlugin({
163
331
  },
164
332
  );
165
333
 
334
+ // Fan the script-packages.changed broadcast out to THIS instance's
335
+ // connected satellites. Every core instance subscribes in broadcast
336
+ // mode, so each pushes to its own satellites; offline satellites
337
+ // converge via the assignment-carried lockfile hash on reconnect.
338
+ onHook(
339
+ scriptPackagesChangedHook,
340
+ async ({ lockfileHash }) => {
341
+ wsHandler.pushRefreshScriptPackagesToAll(lockfileHash);
342
+ },
343
+ { mode: "broadcast" },
344
+ );
345
+
166
346
  logger.debug("✅ Satellite Backend afterPluginsReady complete.");
167
347
  },
168
348
  });
@@ -0,0 +1,121 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import type { SatelliteAssignment } from "@checkstack/satellite-common";
3
+ import type { SecretResolverService } from "@checkstack/secrets-backend";
4
+ import { resolveSatelliteRunSecrets } from "./run-secret-resolver";
5
+
6
+ // A resolver that resolves from a fixed name->value map (mirrors the real
7
+ // resolveForRun: substitute ${{ secrets.NAME }} per declared env entry).
8
+ function fakeResolver(values: Record<string, string>): SecretResolverService {
9
+ const TEMPLATE_RE = /\$\{\{\s*secrets\.([a-zA-Z0-9_-]+)\s*\}\}/g;
10
+ return {
11
+ resolveSecret: async ({ name }) => {
12
+ if (!(name in values)) throw new Error(`Secret not found: ${name}`);
13
+ return values[name];
14
+ },
15
+ resolveBySchema: async ({ value }) => ({ resolved: value, warnings: [] }),
16
+ resolveForRun: async ({ secretEnv }) => {
17
+ const env: Record<string, string> = {};
18
+ for (const [envName, template] of Object.entries(secretEnv)) {
19
+ TEMPLATE_RE.lastIndex = 0;
20
+ env[envName] = template.replaceAll(TEMPLATE_RE, (_m, name: string) => {
21
+ if (!(name in values)) throw new Error(`Secret not found: ${name}`);
22
+ return values[name];
23
+ });
24
+ }
25
+ return {
26
+ env,
27
+ masking: {
28
+ size: 0,
29
+ maskText: (t) => t,
30
+ maskDeep: (v) => v,
31
+ },
32
+ };
33
+ },
34
+ };
35
+ }
36
+
37
+ function assignment(
38
+ configId: string,
39
+ collectors: SatelliteAssignment["collectors"],
40
+ ): SatelliteAssignment {
41
+ return {
42
+ configId,
43
+ systemId: "sys-1",
44
+ strategyId: "script",
45
+ config: {},
46
+ collectors,
47
+ intervalSeconds: 60,
48
+ };
49
+ }
50
+
51
+ describe("resolveSatelliteRunSecrets", () => {
52
+ it("resolves ONLY the collector's declared secretEnv from the assignment", async () => {
53
+ const assignments = [
54
+ assignment("config-1", [
55
+ {
56
+ id: "col-1",
57
+ collectorId: "inline-script",
58
+ config: { secretEnv: { API_TOKEN: "${{ secrets.jira_token }}" } },
59
+ },
60
+ ]),
61
+ ];
62
+ const env = await resolveSatelliteRunSecrets({
63
+ satelliteId: "sat-1",
64
+ configId: "config-1",
65
+ collectorId: "col-1",
66
+ getAssignmentsForSatellite: async () => assignments,
67
+ resolver: fakeResolver({ jira_token: "real-value", other: "nope" }),
68
+ });
69
+ expect(env).toEqual({ API_TOKEN: "real-value" });
70
+ });
71
+
72
+ it("throws when the assignment is not assigned to this satellite", async () => {
73
+ await expect(
74
+ resolveSatelliteRunSecrets({
75
+ satelliteId: "sat-1",
76
+ configId: "missing",
77
+ collectorId: "col-1",
78
+ getAssignmentsForSatellite: async () => [],
79
+ resolver: fakeResolver({}),
80
+ }),
81
+ ).rejects.toThrow(/No assignment/);
82
+ });
83
+
84
+ it("throws when the collector declares no secretEnv (least-privilege)", async () => {
85
+ const assignments = [
86
+ assignment("config-1", [
87
+ { id: "col-1", collectorId: "inline-script", config: {} },
88
+ ]),
89
+ ];
90
+ await expect(
91
+ resolveSatelliteRunSecrets({
92
+ satelliteId: "sat-1",
93
+ configId: "config-1",
94
+ collectorId: "col-1",
95
+ getAssignmentsForSatellite: async () => assignments,
96
+ resolver: fakeResolver({}),
97
+ }),
98
+ ).rejects.toThrow(/no secretEnv/);
99
+ });
100
+
101
+ it("propagates a clear error when a required secret cannot resolve", async () => {
102
+ const assignments = [
103
+ assignment("config-1", [
104
+ {
105
+ id: "col-1",
106
+ collectorId: "inline-script",
107
+ config: { secretEnv: { TOKEN: "${{ secrets.absent }}" } },
108
+ },
109
+ ]),
110
+ ];
111
+ await expect(
112
+ resolveSatelliteRunSecrets({
113
+ satelliteId: "sat-1",
114
+ configId: "config-1",
115
+ collectorId: "col-1",
116
+ getAssignmentsForSatellite: async () => assignments,
117
+ resolver: fakeResolver({}),
118
+ }),
119
+ ).rejects.toThrow(/Secret not found: absent/);
120
+ });
121
+ });
@@ -0,0 +1,66 @@
1
+ import { secretEnvMappingSchema } from "@checkstack/secrets-common";
2
+ import type { SecretResolverService } from "@checkstack/secrets-backend";
3
+ import type { SatelliteAssignment } from "@checkstack/satellite-common";
4
+
5
+ /**
6
+ * Resolve a satellite collector run's secrets just-in-time.
7
+ *
8
+ * Security model (least-privilege, decision 5): the satellite asks by
9
+ * `configId` + `collectorId` only. Core reads the `secretEnv` mapping from
10
+ * the satellite's OWN persisted assignment for that collector — the
11
+ * satellite does not get to choose which secrets — and resolves ONLY those
12
+ * refs via the central resolver. So a compromised satellite cannot request
13
+ * arbitrary secrets; it can only obtain what its assignment already
14
+ * declares it needs.
15
+ *
16
+ * Returns the resolved env map. Throws a clear error when the collector
17
+ * isn't in the satellite's assignments, when the collector declares no
18
+ * `secretEnv` (nothing to resolve — caller should not have asked), or when
19
+ * a referenced secret can't be resolved. The values are never persisted.
20
+ */
21
+ export async function resolveSatelliteRunSecrets({
22
+ satelliteId,
23
+ configId,
24
+ collectorId,
25
+ getAssignmentsForSatellite,
26
+ resolver,
27
+ }: {
28
+ satelliteId: string;
29
+ configId: string;
30
+ collectorId: string;
31
+ getAssignmentsForSatellite: (
32
+ satelliteId: string,
33
+ ) => Promise<SatelliteAssignment[]>;
34
+ resolver: SecretResolverService;
35
+ }): Promise<Record<string, string>> {
36
+ const assignments = await getAssignmentsForSatellite(satelliteId);
37
+ const assignment = assignments.find((a) => a.configId === configId);
38
+ if (!assignment) {
39
+ throw new Error(
40
+ `No assignment "${configId}" for this satellite; cannot deliver secrets.`,
41
+ );
42
+ }
43
+
44
+ const collector = (assignment.collectors ?? []).find(
45
+ (c) => c.id === collectorId || c.collectorId === collectorId,
46
+ );
47
+ if (!collector) {
48
+ throw new Error(
49
+ `Collector "${collectorId}" not found in assignment "${configId}".`,
50
+ );
51
+ }
52
+
53
+ // The declared mapping lives inside the collector's config. Validate it so
54
+ // a malformed config can't smuggle non-template values through.
55
+ const parsed = secretEnvMappingSchema.safeParse(
56
+ (collector.config as { secretEnv?: unknown }).secretEnv,
57
+ );
58
+ if (!parsed.success || Object.keys(parsed.data).length === 0) {
59
+ throw new Error(
60
+ `Collector "${collectorId}" declares no secretEnv; nothing to resolve.`,
61
+ );
62
+ }
63
+
64
+ const { env } = await resolver.resolveForRun({ secretEnv: parsed.data });
65
+ return env;
66
+ }