backlot 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +106 -0
  3. package/dist/cli/client.d.ts +22 -0
  4. package/dist/cli/client.js +83 -0
  5. package/dist/cli/client.js.map +1 -0
  6. package/dist/cli/index.d.ts +2 -0
  7. package/dist/cli/index.js +308 -0
  8. package/dist/cli/index.js.map +1 -0
  9. package/dist/core/events.d.ts +10 -0
  10. package/dist/core/events.js +42 -0
  11. package/dist/core/events.js.map +1 -0
  12. package/dist/core/journal.d.ts +80 -0
  13. package/dist/core/journal.js +186 -0
  14. package/dist/core/journal.js.map +1 -0
  15. package/dist/core/manifest.d.ts +76 -0
  16. package/dist/core/manifest.js +46 -0
  17. package/dist/core/manifest.js.map +1 -0
  18. package/dist/core/paths.d.ts +17 -0
  19. package/dist/core/paths.js +32 -0
  20. package/dist/core/paths.js.map +1 -0
  21. package/dist/core/policy.d.ts +15 -0
  22. package/dist/core/policy.js +45 -0
  23. package/dist/core/policy.js.map +1 -0
  24. package/dist/core/ports.d.ts +3 -0
  25. package/dist/core/ports.js +22 -0
  26. package/dist/core/ports.js.map +1 -0
  27. package/dist/core/retention.d.ts +19 -0
  28. package/dist/core/retention.js +101 -0
  29. package/dist/core/retention.js.map +1 -0
  30. package/dist/core/sync.d.ts +15 -0
  31. package/dist/core/sync.js +150 -0
  32. package/dist/core/sync.js.map +1 -0
  33. package/dist/core/types.d.ts +82 -0
  34. package/dist/core/types.js +6 -0
  35. package/dist/core/types.js.map +1 -0
  36. package/dist/core/upkeep.d.ts +11 -0
  37. package/dist/core/upkeep.js +47 -0
  38. package/dist/core/upkeep.js.map +1 -0
  39. package/dist/core/util.d.ts +36 -0
  40. package/dist/core/util.js +100 -0
  41. package/dist/core/util.js.map +1 -0
  42. package/dist/daemon/engine.d.ts +284 -0
  43. package/dist/daemon/engine.js +858 -0
  44. package/dist/daemon/engine.js.map +1 -0
  45. package/dist/daemon/index.d.ts +2 -0
  46. package/dist/daemon/index.js +183 -0
  47. package/dist/daemon/index.js.map +1 -0
  48. package/dist/daemon/supervisor.d.ts +36 -0
  49. package/dist/daemon/supervisor.js +189 -0
  50. package/dist/daemon/supervisor.js.map +1 -0
  51. package/dist/drivers/datastore-sqlite.d.ts +27 -0
  52. package/dist/drivers/datastore-sqlite.js +83 -0
  53. package/dist/drivers/datastore-sqlite.js.map +1 -0
  54. package/dist/drivers/datastores.d.ts +22 -0
  55. package/dist/drivers/datastores.js +190 -0
  56. package/dist/drivers/datastores.js.map +1 -0
  57. package/dist/drivers/types.d.ts +71 -0
  58. package/dist/drivers/types.js +14 -0
  59. package/dist/drivers/types.js.map +1 -0
  60. package/dist/mcp/index.d.ts +2 -0
  61. package/dist/mcp/index.js +144 -0
  62. package/dist/mcp/index.js.map +1 -0
  63. package/package.json +57 -0
  64. package/schema/stack.schema.json +172 -0
@@ -0,0 +1,858 @@
1
+ /**
2
+ * The engine: pool + lease + bind + run orchestration, owning all policy
3
+ * (drivers own transport/storage; the manifest owns repo knowledge).
4
+ */
5
+ import { execFile, execFileSync, spawn } from 'node:child_process';
6
+ import { mkdtempSync } from 'node:fs';
7
+ import { tmpdir } from 'node:os';
8
+ import { mkdirSync, rmSync, copyFileSync, readdirSync, statSync, existsSync, readFileSync, watch as fsWatch, constants as fsConstants } from 'node:fs';
9
+ import { join, resolve } from 'node:path';
10
+ import { Journal } from '../core/journal.js';
11
+ import { loadStack, defaultPreset } from '../core/manifest.js';
12
+ import { syncIntoEnv, changedOutputs, pullOutputs } from '../core/sync.js';
13
+ import { runUpkeep } from '../core/upkeep.js';
14
+ import { freePort, probeFree } from '../core/ports.js';
15
+ import { envsRoot, artifactsRoot } from '../core/paths.js';
16
+ import { BrokerError, template, templateEnv, now, shortId, matchesAny } from '../core/util.js';
17
+ import { makeDatastore } from '../drivers/datastores.js';
18
+ import { EnvSupervisor, reapPids } from './supervisor.js';
19
+ import { policy } from '../core/policy.js';
20
+ import { retentionSweep } from '../core/retention.js';
21
+ import { logEvent, recentEvents } from '../core/events.js';
22
+ const POOL_MAX = () => policy().poolMax;
23
+ const LEASE_TTL = (kind) => (kind === 'session' ? policy().sessionTtlMs : policy().runTtlMs);
24
+ const IDLE_TTL = () => policy().idleTtlMs;
25
+ const WAIT_MS = () => policy().waitMs;
26
+ const CHECK_TIMEOUT_S = 600;
27
+ /**
28
+ * Run a check/exec command as a PROCESS GROUP with a hard timeout — killing
29
+ * only the `sh` wrapper would orphan grandchildren (a hung Playwright would
30
+ * hold the environment busy forever).
31
+ */
32
+ function runGroupCmd(cmd, cwd, envVars, timeoutS) {
33
+ return new Promise((resolvePromise) => {
34
+ const proc = spawn('sh', ['-c', cmd], { cwd, env: envVars, stdio: ['ignore', 'pipe', 'pipe'], detached: true });
35
+ let out = '';
36
+ let settled = false;
37
+ proc.stdout.on('data', (d) => (out = (out + d.toString()).slice(-8000)));
38
+ proc.stderr.on('data', (d) => (out = (out + d.toString()).slice(-8000)));
39
+ let timedOut = false;
40
+ const timer = setTimeout(() => {
41
+ timedOut = true;
42
+ try {
43
+ process.kill(-proc.pid, 'SIGKILL'); // the whole group
44
+ }
45
+ catch {
46
+ proc.kill('SIGKILL');
47
+ }
48
+ }, timeoutS * 1000);
49
+ timer.unref();
50
+ const done = (r) => {
51
+ if (settled)
52
+ return;
53
+ settled = true;
54
+ clearTimeout(timer);
55
+ resolvePromise(r);
56
+ };
57
+ // Without this, a spawn failure (EMFILE/EAGAIN) emits 'error' with no
58
+ // 'exit' — the promise would never settle and the env lock would wedge
59
+ // forever, starving that environment until a daemon restart.
60
+ proc.on('error', (err) => done({ exitCode: 1, output: `${out}\nspawn error: ${err.message}`.slice(-4000), timedOut }));
61
+ proc.on('exit', (code) => done({ exitCode: code ?? 1, output: out.slice(-4000), timedOut }));
62
+ });
63
+ }
64
+ export class Engine {
65
+ journal = new Journal();
66
+ supervisors = new Map();
67
+ lastSweep = now();
68
+ lastRetention = now();
69
+ // -------- concurrency: a short pool lock for claim/release bookkeeping and
70
+ // one lock per environment for bind/exec/reset. Two environments (or two
71
+ // stacks) proceed in parallel; one environment is never mutated twice at once.
72
+ poolChain = Promise.resolve();
73
+ envChains = new Map();
74
+ /** Envs with an operation in flight — the sweeper must not expire/quiesce these. */
75
+ busy = new Set();
76
+ /** --watch: per-env worktree watchers ("verbs sync, watch streams", decision 0005). */
77
+ watchers = new Map();
78
+ poolLocked(fn) {
79
+ const next = this.poolChain.then(fn, fn);
80
+ this.poolChain = next.catch(() => undefined);
81
+ return next;
82
+ }
83
+ envLocked(envId, fn) {
84
+ const chain = this.envChains.get(envId) ?? Promise.resolve();
85
+ const next = chain.then(async () => {
86
+ this.busy.add(envId);
87
+ try {
88
+ return await fn();
89
+ }
90
+ finally {
91
+ this.busy.delete(envId);
92
+ }
93
+ }, async () => {
94
+ this.busy.add(envId);
95
+ try {
96
+ return await fn();
97
+ }
98
+ finally {
99
+ this.busy.delete(envId);
100
+ }
101
+ });
102
+ this.envChains.set(envId, next.catch(() => undefined));
103
+ return next;
104
+ }
105
+ /** Recovery (decision 0009): reap recorded PIDs from a previous daemon life; hot -> warm. */
106
+ recover() {
107
+ let envs = 0;
108
+ for (const env of this.journal.allEnvs()) {
109
+ if (Object.keys(env.servicePids).length > 0)
110
+ reapPids(env.servicePids);
111
+ // A 'recycling' env from a crashed daemon never finished teardown — finish it.
112
+ if (env.state === 'recycling') {
113
+ void this.teardownClaimed(env).catch(() => undefined);
114
+ continue;
115
+ }
116
+ if (env.state === 'hot' || env.state === 'degraded')
117
+ env.state = 'warm';
118
+ env.servicePids = {};
119
+ this.journal.saveEnv(env);
120
+ envs++;
121
+ }
122
+ const jobs = this.journal.failStaleJobs();
123
+ logEvent({ level: 'info', kind: 'recover', detail: `reconciled ${envs} env(s), ${jobs} stale job(s)` });
124
+ }
125
+ // ---------------------------------------------------------------- pool
126
+ envDirs(id) {
127
+ const root = join(envsRoot(), id);
128
+ return { root, tree: join(root, 'tree'), data: join(root, 'data'), logs: join(root, 'logs') };
129
+ }
130
+ async createEnv(stack) {
131
+ // Monotonic, never-reused sequence — a reaped env's id can never collide
132
+ // with a live one (the old length+1 scheme did, deterministically).
133
+ const n = this.journal.nextEnvSeq(stack.id);
134
+ const id = `${stack.id}-e${n}`;
135
+ const dirs = this.envDirs(id);
136
+ mkdirSync(dirs.tree, { recursive: true });
137
+ mkdirSync(dirs.data, { recursive: true });
138
+ const ports = {};
139
+ for (const [, spec] of Object.entries(stack.manifest.services)) {
140
+ if (spec.port && !(spec.port in ports))
141
+ ports[spec.port] = await freePort();
142
+ }
143
+ const env = {
144
+ id, stack: stack.id, stackRoot: stack.root, state: 'warm', root: dirs.root,
145
+ ports, datastoreNs: {}, fingerprints: {}, presets: {},
146
+ bindCount: 0, createdAt: now(), lastUsedAt: now(), servicePids: {}, failStreak: 0,
147
+ };
148
+ this.journal.saveEnv(env);
149
+ return env;
150
+ }
151
+ /** One atomic claim attempt — MUST run under the pool lock. */
152
+ async tryClaim(stack, holder, kind, hygiene, ttlMs) {
153
+ // A holder keeps its env: rebinding your own lease is the normal loop —
154
+ // unless that env is being torn down or has flapped, in which case drop the
155
+ // stale lease and fall through to a fresh claim.
156
+ const mine = this.journal.leaseForHolder(holder, stack.id);
157
+ if (mine) {
158
+ const env = this.journal.getEnv(mine.envId);
159
+ if (env && env.state !== 'recycling' && env.state !== 'degraded') {
160
+ this.journal.saveLease({ ...mine, hygiene, expiresAt: now() + ttlMs });
161
+ return env;
162
+ }
163
+ this.journal.deleteLease(mine.id);
164
+ }
165
+ const envs = this.journal.envsForStack(stack.id);
166
+ const free = envs
167
+ .filter((e) => !this.journal.leaseForEnv(e.id) && e.state !== 'degraded' && e.state !== 'recycling')
168
+ .sort((a, b) => (a.state === 'hot' ? -1 : 1) - (b.state === 'hot' ? -1 : 1));
169
+ let env = free[0];
170
+ if (!env && envs.length < POOL_MAX())
171
+ env = await this.createEnv(stack);
172
+ if (env) {
173
+ this.journal.saveLease({ id: `l-${shortId()}`, envId: env.id, kind, holder, hygiene, expiresAt: now() + ttlMs });
174
+ return env;
175
+ }
176
+ return null;
177
+ }
178
+ /** Queue at capacity WITHOUT holding the pool lock while sleeping. */
179
+ async acquireEnv(stack, holder, kind, hygiene, ttlMs) {
180
+ const start = now();
181
+ for (;;) {
182
+ const env = await this.poolLocked(() => this.tryClaim(stack, holder, kind, hygiene, ttlMs));
183
+ if (env)
184
+ return env;
185
+ if (now() - start > WAIT_MS()) {
186
+ throw new BrokerError('env-error', `pool at capacity (${POOL_MAX()}/${POOL_MAX()}) — waited ${Math.round(WAIT_MS() / 1000)}s; release a lease or raise BACKLOT_POOL_MAX`, 'pool');
187
+ }
188
+ await new Promise((r) => setTimeout(r, 500));
189
+ }
190
+ }
191
+ // ---------------------------------------------------------------- bind
192
+ templateCtx(stack, env) {
193
+ const services = {};
194
+ for (const [name, spec] of Object.entries(stack.manifest.services)) {
195
+ if (spec.port)
196
+ services[name] = { url: `http://localhost:${env.ports[spec.port]}` };
197
+ }
198
+ const datastores = {};
199
+ const dirs = this.envDirs(env.id);
200
+ const h = { envId: env.id, envTree: dirs.tree, dataDir: dirs.data };
201
+ for (const [name, spec] of Object.entries(stack.manifest.datastores ?? {})) {
202
+ const ds = makeDatastore(name, spec, stack.id);
203
+ datastores[name] = { url: ds.url(h), ns: ds.ns(h) };
204
+ }
205
+ return { ports: env.ports, services, datastores };
206
+ }
207
+ supervisor(env) {
208
+ let sup = this.supervisors.get(env.id);
209
+ if (!sup) {
210
+ const dirs = this.envDirs(env.id);
211
+ sup = new EnvSupervisor(env.id, dirs.tree, dirs.logs, () => {
212
+ // Flapping service -> the environment is degraded: skipped by acquire,
213
+ // auto-reaped by the sweeper (decision 0007).
214
+ const fresh = this.journal.getEnv(env.id);
215
+ if (fresh && fresh.state !== 'recycling') {
216
+ fresh.state = 'degraded';
217
+ this.journal.saveEnv(fresh);
218
+ logEvent({ level: 'warn', kind: 'degraded', envId: env.id, detail: 'service flapped past its restart budget' });
219
+ }
220
+ }, () => {
221
+ // A pid changed (start/restart/exit): keep the journal truthful so
222
+ // recovery reaps the right process, not a stale/innocent pid.
223
+ const s = this.supervisors.get(env.id);
224
+ if (s)
225
+ this.journal.updateServicePids(env.id, s.pids());
226
+ });
227
+ this.supervisors.set(env.id, sup);
228
+ }
229
+ return sup;
230
+ }
231
+ async bindAndStart(stack, envSnapshot, hygiene, kind, watch, sourceRoot, onProgress) {
232
+ const say = onProgress ?? (() => undefined);
233
+ // Re-read under the env lock: the snapshot captured during acquire may be
234
+ // stale (a concurrent degrade/pid update landed). Everything below mutates
235
+ // and saves THIS fresh row, so no epilogue can clobber another verb's write.
236
+ const env = this.journal.getEnv(envSnapshot.id) ?? envSnapshot;
237
+ if (env.state === 'recycling') {
238
+ throw new BrokerError('env-error', `environment ${env.id} is being recycled — retry`, 'pool');
239
+ }
240
+ const dirs = this.envDirs(env.id);
241
+ if (hygiene === 'pristine') {
242
+ say('preparing a pristine environment');
243
+ await this.supervisor(env).stopAll();
244
+ this.supervisors.delete(env.id);
245
+ rmSync(dirs.tree, { recursive: true, force: true });
246
+ rmSync(dirs.data, { recursive: true, force: true });
247
+ mkdirSync(dirs.tree, { recursive: true });
248
+ mkdirSync(dirs.data, { recursive: true });
249
+ env.fingerprints = {};
250
+ env.presets = {};
251
+ }
252
+ say('syncing worktree');
253
+ const sync = syncIntoEnv(sourceRoot ?? stack.root, dirs.tree, stack.manifest);
254
+ say(`synced ${sync.files.length} files (${sync.copied} changed, ${sync.deleted} removed)`);
255
+ const upkeep = await runUpkeep(dirs.tree, sync.files, stack.manifest, env.fingerprints);
256
+ for (const r of upkeep.ran)
257
+ say(`upkeep: ${r.run}`);
258
+ for (const dsName of upkeep.rebakeTemplates) {
259
+ const spec = stack.manifest.datastores?.[dsName];
260
+ if (spec)
261
+ makeDatastore(dsName, spec, stack.id).rebake();
262
+ }
263
+ // Fast path: identical source, services healthy, data untouched -> reuse as-is.
264
+ const unchanged = env.fingerprints['@source'] === sync.sourceHash &&
265
+ upkeep.ran.length === 0 &&
266
+ env.state === 'hot' &&
267
+ this.supervisor(env).allHealthyPids() &&
268
+ hygiene === 'reuse';
269
+ env.fingerprints = { ...upkeep.fingerprints };
270
+ if (unchanged) {
271
+ env.fingerprints['@source'] = sync.sourceHash;
272
+ env.lastUsedAt = now();
273
+ this.journal.saveEnv(env);
274
+ return env;
275
+ }
276
+ // Services must not hold open handles across a data restore or code change.
277
+ await this.supervisor(env).stopAll();
278
+ this.supervisors.delete(env.id);
279
+ // Data state: create-or-restore per hygiene (probe first — infra-error, not code blame).
280
+ const dsHandle = { envId: env.id, envTree: dirs.tree, dataDir: dirs.data };
281
+ for (const [name, spec] of Object.entries(stack.manifest.datastores ?? {})) {
282
+ const ds = makeDatastore(name, spec, stack.id);
283
+ await ds.probe();
284
+ const preset = defaultPreset(spec, kind);
285
+ const exists = Boolean(env.datastoreNs[name]);
286
+ const force = env.presets[name] !== preset || hygiene !== 'reuse' || upkeep.rebakeTemplates.includes(name);
287
+ if (force || !exists)
288
+ say(`preparing datastore '${name}' (${preset})`);
289
+ await ds.ensure(dsHandle, preset, force, exists);
290
+ env.datastoreNs[name] = ds.ns(dsHandle);
291
+ env.presets[name] = preset;
292
+ }
293
+ // Builds: only when the source actually changed (fingerprint '@source').
294
+ const ctx = this.templateCtx(stack, env);
295
+ if (env.fingerprints['@source'] !== sync.sourceHash) {
296
+ for (const [name, spec] of Object.entries(stack.manifest.services)) {
297
+ if (!spec.build)
298
+ continue;
299
+ say(`building '${name}'`);
300
+ const buildStart = now();
301
+ const beat = setInterval(() => say(`building '${name}' … ${Math.round((now() - buildStart) / 1000)}s`), 5000);
302
+ beat.unref();
303
+ try {
304
+ await new Promise((resolvePromise, reject) => {
305
+ execFile('sh', ['-c', template(spec.build, ctx)], { cwd: dirs.tree, maxBuffer: 32 * 1024 * 1024 }, (err, _o, stderr) => {
306
+ if (err)
307
+ reject(new BrokerError('work-error', `build failed for service '${name}'`, name, String(stderr).slice(0, 800)));
308
+ else
309
+ resolvePromise();
310
+ });
311
+ });
312
+ }
313
+ finally {
314
+ clearInterval(beat);
315
+ }
316
+ }
317
+ }
318
+ env.fingerprints['@source'] = sync.sourceHash;
319
+ // Start in dependency order, readiness-gated, fatal-log fast-fail.
320
+ const sup = this.supervisor(env);
321
+ const started = new Set();
322
+ const entries = Object.entries(stack.manifest.services);
323
+ while (started.size < entries.length) {
324
+ const ready = entries.filter(([n, s]) => !started.has(n) && (s.depends_on ?? []).every((d) => started.has(d)));
325
+ if (ready.length === 0)
326
+ throw new BrokerError('work-error', 'depends_on cycle in stack.yaml', 'manifest');
327
+ for (const [name, spec] of ready) {
328
+ if (spec.port) {
329
+ const port = env.ports[spec.port];
330
+ if (!(await probeFree(port))) {
331
+ throw new BrokerError('env-error', `port ${port} for service '${name}' is occupied by a foreign process — try 'backlot pool recycle'`, name);
332
+ }
333
+ }
334
+ // Template the COMMANDS too — ports/urls may ride in the run line itself
335
+ // (e.g. `ng serve --port {{ports.web}}`), not only in env:.
336
+ const resolved = {
337
+ ...spec,
338
+ run: template(spec.run, ctx),
339
+ ...(spec.watch_run ? { watch_run: template(spec.watch_run, ctx) } : {}),
340
+ };
341
+ sup.start(name, resolved, templateEnv(spec.env, ctx), watch);
342
+ const url = spec.port ? `http://localhost:${env.ports[spec.port]}` : undefined;
343
+ say(`starting '${name}', waiting until ready`);
344
+ const readyStart = now();
345
+ const beat = setInterval(() => say(`waiting for '${name}' … ${Math.round((now() - readyStart) / 1000)}s`), 3000);
346
+ beat.unref();
347
+ try {
348
+ await sup.waitReady(name, spec, url, templateEnv(spec.env, ctx));
349
+ clearInterval(beat);
350
+ say(`'${name}' ready`);
351
+ }
352
+ catch (err) {
353
+ clearInterval(beat);
354
+ await sup.stopAll();
355
+ this.supervisors.delete(env.id);
356
+ env.state = 'warm';
357
+ env.servicePids = {};
358
+ this.journal.saveEnv(env);
359
+ throw err;
360
+ }
361
+ started.add(name);
362
+ }
363
+ }
364
+ env.state = 'hot';
365
+ env.servicePids = sup.pids();
366
+ env.bindCount += 1;
367
+ env.lastUsedAt = now();
368
+ env.failStreak = 0; // a successful bind clears the escalation counter
369
+ this.journal.saveEnv(env);
370
+ return env;
371
+ }
372
+ // ---------------------------------------------------------------- watch
373
+ /**
374
+ * --watch: the daemon observes the CONSUMER's worktree (opt-in, per lease)
375
+ * and auto-syncs debounced. The environment's own dev servers then pick up
376
+ * the projected change — two-stage reload. Stopped on release/expiry/
377
+ * quiesce/recycle/shutdown.
378
+ */
379
+ startWatch(envId, stackRoot, cwd, holder) {
380
+ this.stopWatch(envId);
381
+ let timer = null;
382
+ let watcher;
383
+ try {
384
+ watcher = fsWatch(stackRoot, { recursive: true }, (_event, filename) => {
385
+ const f = String(filename ?? '');
386
+ if (f.startsWith('.git') || f.includes('/.git/') || f.startsWith('.backlot'))
387
+ return;
388
+ if (timer)
389
+ clearTimeout(timer);
390
+ timer = setTimeout(() => {
391
+ void this.up({ cwd, holder, kind: 'session', hygiene: 'reuse', watch: true }).catch(() => {
392
+ /* a broken edit is reported on the next explicit verb; keep watching */
393
+ });
394
+ }, 300);
395
+ timer.unref();
396
+ });
397
+ }
398
+ catch {
399
+ return; // recursive fs.watch unavailable — --watch degrades to verbs-only
400
+ }
401
+ this.watchers.set(envId, {
402
+ close: () => {
403
+ if (timer)
404
+ clearTimeout(timer);
405
+ watcher.close();
406
+ },
407
+ });
408
+ }
409
+ stopWatch(envId) {
410
+ this.watchers.get(envId)?.close();
411
+ this.watchers.delete(envId);
412
+ }
413
+ // ---------------------------------------------------------------- verbs
414
+ async up(opts) {
415
+ const stack = loadStack(opts.cwd);
416
+ const holder = opts.holder ?? resolve(opts.cwd);
417
+ const kind = opts.kind ?? 'session';
418
+ let hygiene = opts.hygiene ?? 'reuse';
419
+ opts.onProgress?.(`acquiring an environment (pool ${this.journal.envsForStack(stack.id).length}/${POOL_MAX()})`);
420
+ const env = await this.acquireEnv(stack, holder, kind, hygiene, opts.ttlMs ?? LEASE_TTL(kind));
421
+ // Auto-escalation (decision 0007): two consecutive bind failures on this
422
+ // warm environment -> the next bind is pristine, whatever was asked.
423
+ if (hygiene !== 'pristine' && env.failStreak >= 2)
424
+ hygiene = 'pristine';
425
+ try {
426
+ const bound = await this.envLocked(env.id, () => this.bindAndStart(stack, env, hygiene, kind, opts.watch ?? false, opts.sourceRoot, opts.onProgress));
427
+ if (opts.watch && kind === 'session' && !this.watchers.has(bound.id)) {
428
+ this.startWatch(bound.id, stack.root, opts.cwd, holder);
429
+ }
430
+ return this.ctx(opts.cwd, holder, bound.id);
431
+ }
432
+ catch (err) {
433
+ const fresh = this.journal.getEnv(env.id);
434
+ if (fresh) {
435
+ fresh.failStreak += 1;
436
+ this.journal.saveEnv(fresh);
437
+ }
438
+ // A failed bind must not strand the lease for a run; sessions keep theirs to iterate.
439
+ if (kind === 'run') {
440
+ const lease = this.journal.leaseForHolder(holder, stack.id);
441
+ if (lease)
442
+ this.journal.deleteLease(lease.id);
443
+ }
444
+ throw err;
445
+ }
446
+ }
447
+ ctx(cwd, holder, envId) {
448
+ const stack = loadStack(cwd);
449
+ const h = holder ?? resolve(cwd);
450
+ const lease = this.journal.leaseForHolder(h, stack.id);
451
+ if (!lease && !envId) {
452
+ throw new BrokerError('env-error', `no active lease for this worktree — run 'backlot up' first`, 'lease');
453
+ }
454
+ const env = this.journal.getEnv(envId ?? lease.envId);
455
+ const ctx = this.templateCtx(stack, env);
456
+ const urls = {};
457
+ for (const [name, s] of Object.entries(ctx.services))
458
+ urls[name] = s.url;
459
+ return {
460
+ stack: stack.manifest.name,
461
+ envId: env.id,
462
+ state: env.state,
463
+ lease: lease ? { id: lease.id, kind: lease.kind, hygiene: lease.hygiene, expiresAt: lease.expiresAt } : null,
464
+ urls,
465
+ logins: stack.manifest.auth?.logins ?? null,
466
+ tokenCommand: stack.manifest.auth?.token ?? null,
467
+ datastores: Object.fromEntries(Object.entries(ctx.datastores).map(([n, d]) => [n, { url: d.url, ns: d.ns }])),
468
+ artifactsDir: join(artifactsRoot(), env.id),
469
+ events: this.supervisors.get(env.id)?.events.slice(-20) ?? [],
470
+ };
471
+ }
472
+ async run(opts) {
473
+ const stack = loadStack(opts.cwd);
474
+ const check = stack.manifest.checks?.[opts.check];
475
+ if (!check) {
476
+ throw new BrokerError('work-error', `no check '${opts.check}' in stack.yaml (have: ${Object.keys(stack.manifest.checks ?? {}).join(', ') || 'none'})`, 'manifest');
477
+ }
478
+ // A run ALWAYS gets its own ephemeral holder — never the caller's session
479
+ // holder — so `run` can't reset-data-wipe or delete a live `up` session
480
+ // that happens to share a --holder. Its lease is uniquely ours to delete.
481
+ const holder = `run-${shortId()}`;
482
+ const startedAt = now();
483
+ const context = await this.up({ ...opts, holder, kind: 'run', hygiene: opts.hygiene ?? 'reset-data' });
484
+ const env = this.journal.getEnv(context.envId);
485
+ const dirs = this.envDirs(env.id);
486
+ const ctx = this.templateCtx(stack, env);
487
+ try {
488
+ // envLocked: marks the env busy for the whole check so the sweeper can't
489
+ // expire the run lease mid-check and hand the env to someone else. The
490
+ // process-group timeout bounds how long that hold can last.
491
+ const timeoutS = check.timeout ?? CHECK_TIMEOUT_S;
492
+ opts.onProgress?.(`running check '${opts.check}'`);
493
+ const runStart = now();
494
+ const beat = setInterval(() => opts.onProgress?.(`running check '${opts.check}' … ${Math.round((now() - runStart) / 1000)}s`), 5000);
495
+ beat.unref();
496
+ const res = await this.envLocked(env.id, () => runGroupCmd(template(check.run, ctx), check.cwd ? join(dirs.tree, check.cwd) : dirs.tree, { ...process.env, ...templateEnv(check.env, ctx) }, timeoutS)).finally(() => clearInterval(beat));
497
+ const artifactsDir = this.collectArtifacts(env.id, dirs.tree, check.artifacts ?? []);
498
+ return {
499
+ check: opts.check,
500
+ ok: res.exitCode === 0 && !res.timedOut,
501
+ exitCode: res.timedOut ? -1 : res.exitCode,
502
+ failure: res.exitCode === 0 && !res.timedOut
503
+ ? null
504
+ : res.timedOut
505
+ ? { class: 'work-error', message: `check '${opts.check}' timed out after ${timeoutS}s (process group killed; raise checks.${opts.check}.timeout if legitimate)`, logExcerpt: res.output.slice(-800) }
506
+ : { class: 'work-error', message: `check '${opts.check}' failed (exit ${res.exitCode})`, logExcerpt: res.output.slice(-800) },
507
+ output: res.output,
508
+ artifactsDir,
509
+ outputsChanged: changedOutputs(stack.root, dirs.tree, stack.manifest),
510
+ envId: env.id,
511
+ durationMs: now() - startedAt,
512
+ };
513
+ }
514
+ finally {
515
+ // Only our own ephemeral run lease — guaranteed kind 'run' — is deleted.
516
+ const lease = this.journal.leaseForHolder(holder, stack.id);
517
+ if (lease && lease.kind === 'run')
518
+ this.journal.deleteLease(lease.id); // env stays hot in the pool
519
+ }
520
+ }
521
+ /**
522
+ * Detached submit-and-poll runs (decision 0015): the verdict outlives the
523
+ * client. Returns immediately with a jobId; the caller polls jobStatus.
524
+ * Execution is handed back to the daemon's serialized queue by the server.
525
+ */
526
+ createJob(cwd, check) {
527
+ const id = `job-${shortId()}`;
528
+ this.journal.saveJob({ id, stackCwd: cwd, check, state: 'pending' });
529
+ return id;
530
+ }
531
+ async executeJob(id, opts) {
532
+ this.journal.saveJob({ id, stackCwd: opts.cwd, check: opts.check, state: 'running' });
533
+ try {
534
+ const verdict = await this.run(opts);
535
+ this.journal.saveJob({ id, stackCwd: opts.cwd, check: opts.check, state: 'done', verdict, finishedAt: now() });
536
+ }
537
+ catch (err) {
538
+ const failure = err instanceof BrokerError ? err.toJSON() : { class: 'env-error', message: String(err.message ?? err) };
539
+ this.journal.saveJob({ id, stackCwd: opts.cwd, check: opts.check, state: 'done', verdict: { check: opts.check, ok: false, exitCode: -1, failure }, finishedAt: now() });
540
+ }
541
+ }
542
+ jobStatus(id) {
543
+ const job = this.journal.getJob(id);
544
+ if (!job)
545
+ throw new BrokerError('env-error', `no such job '${id}'`, 'job');
546
+ return job;
547
+ }
548
+ collectArtifacts(envId, tree, patterns) {
549
+ if (patterns.length === 0)
550
+ return null;
551
+ const dest = join(artifactsRoot(), envId, `${now()}`);
552
+ const walk = (dir, prefix = '') => {
553
+ const out = [];
554
+ for (const name of readdirSync(dir)) {
555
+ if (name === 'node_modules' || name === '.git')
556
+ continue;
557
+ const rel = prefix ? `${prefix}/${name}` : name;
558
+ const full = join(dir, name);
559
+ if (statSync(full).isDirectory())
560
+ out.push(...walk(full, rel));
561
+ else
562
+ out.push(rel);
563
+ }
564
+ return out;
565
+ };
566
+ const matched = walk(tree).filter((f) => matchesAny(f, patterns));
567
+ if (matched.length === 0)
568
+ return null;
569
+ for (const rel of matched) {
570
+ const dst = join(dest, rel);
571
+ mkdirSync(join(dst, '..'), { recursive: true });
572
+ copyFileSync(join(tree, rel), dst, fsConstants.COPYFILE_FICLONE);
573
+ }
574
+ return dest;
575
+ }
576
+ async syncLease(cwd, holder, onProgress) {
577
+ // Rebind the existing lease with current hygiene = reuse semantics.
578
+ return this.up({ cwd, holder: holder ?? resolve(cwd), kind: 'session', hygiene: 'reuse', onProgress });
579
+ }
580
+ /** bind --ref: project a COMMITTED ref (not the worktree state) into the env. */
581
+ async bindRef(cwd, ref, holder) {
582
+ const stack = loadStack(cwd);
583
+ let sha;
584
+ try {
585
+ sha = execFileSync('git', ['-C', stack.root, 'rev-parse', '--verify', `${ref}^{commit}`], { encoding: 'utf8' }).trim();
586
+ }
587
+ catch {
588
+ throw new BrokerError('work-error', `'${ref}' is not a commit in this repository`, 'bind');
589
+ }
590
+ const tmp = mkdtempSync(join(tmpdir(), 'backlot-ref-'));
591
+ try {
592
+ execFileSync('sh', ['-c', `git -C "${stack.root}" archive ${sha} | tar -x -C "${tmp}"`]);
593
+ return await this.up({ cwd, holder: holder ?? resolve(cwd), kind: 'session', hygiene: 'reuse', sourceRoot: tmp });
594
+ }
595
+ finally {
596
+ rmSync(tmp, { recursive: true, force: true });
597
+ }
598
+ }
599
+ jobList() {
600
+ return { jobs: this.journal.listJobs(20) };
601
+ }
602
+ async resetData(cwd, holder, onProgress) {
603
+ const stack = loadStack(cwd);
604
+ const h = holder ?? resolve(cwd);
605
+ const lease = this.journal.leaseForHolder(h, stack.id);
606
+ if (!lease)
607
+ throw new BrokerError('env-error', `no active lease — run 'backlot up' first`, 'lease');
608
+ this.journal.saveLease({ ...lease, hygiene: 'reset-data', expiresAt: now() + LEASE_TTL(lease.kind) });
609
+ const env = this.journal.getEnv(lease.envId);
610
+ await this.envLocked(env.id, () => this.bindAndStart(stack, env, 'reset-data', lease.kind, false, undefined, onProgress));
611
+ return this.ctx(cwd, h);
612
+ }
613
+ async exec(cwd, cmd, holder) {
614
+ const stack = loadStack(cwd);
615
+ const h = holder ?? resolve(cwd);
616
+ const lease = this.journal.leaseForHolder(h, stack.id);
617
+ if (!lease)
618
+ throw new BrokerError('env-error', `no active lease — run 'backlot up' first`, 'lease');
619
+ const env = this.journal.getEnv(lease.envId);
620
+ const dirs = this.envDirs(env.id);
621
+ const ctx = this.templateCtx(stack, env);
622
+ const extra = { BACKLOT_ENV_ID: env.id };
623
+ for (const [name, port] of Object.entries(env.ports))
624
+ extra[`BACKLOT_PORT_${name.toUpperCase()}`] = String(port);
625
+ for (const [name, s] of Object.entries(ctx.services))
626
+ extra[`BACKLOT_URL_${name.toUpperCase()}`] = s.url;
627
+ for (const [name, d] of Object.entries(ctx.datastores))
628
+ extra[`BACKLOT_DS_${name.toUpperCase()}`] = d.url;
629
+ return this.envLocked(env.id, () => new Promise((resolvePromise) => {
630
+ execFile('sh', ['-c', cmd], { cwd: dirs.tree, env: { ...process.env, ...extra }, maxBuffer: 32 * 1024 * 1024 }, (err, stdout, stderr) => resolvePromise({ exitCode: err ? (err.code ?? 1) : 0, stdout: String(stdout).slice(-8000), stderr: String(stderr).slice(-8000) }));
631
+ }));
632
+ }
633
+ /** Resolve auth.token with {{role}} and run it in the env tree. */
634
+ async token(cwd, role, holder) {
635
+ const stack = loadStack(cwd);
636
+ const spec = stack.manifest.auth?.token;
637
+ if (!spec)
638
+ throw new BrokerError('work-error', `stack.yaml declares no auth.token command`, 'manifest');
639
+ const lease = this.journal.leaseForHolder(holder ?? resolve(cwd), stack.id);
640
+ if (!lease)
641
+ throw new BrokerError('env-error', `no active lease — run 'backlot up' first`, 'lease');
642
+ const env = this.journal.getEnv(lease.envId);
643
+ const dirs = this.envDirs(env.id);
644
+ const ctx = { ...this.templateCtx(stack, env), role };
645
+ return this.envLocked(env.id, () => new Promise((resolvePromise, reject) => {
646
+ execFile('sh', ['-c', template(spec, ctx)], { cwd: dirs.tree, maxBuffer: 1024 * 1024 }, (err, stdout, stderr) => {
647
+ if (err)
648
+ reject(new BrokerError('work-error', `auth.token command failed`, 'auth', String(stderr).slice(0, 400)));
649
+ else
650
+ resolvePromise({ token: String(stdout).trim(), role });
651
+ });
652
+ }));
653
+ }
654
+ logs(cwd, service, lines, holder) {
655
+ const stack = loadStack(cwd);
656
+ const lease = this.journal.leaseForHolder(holder ?? resolve(cwd), stack.id);
657
+ if (!lease)
658
+ throw new BrokerError('env-error', `no active lease — run 'backlot up' first`, 'lease');
659
+ const env = this.journal.getEnv(lease.envId);
660
+ const logFile = join(this.envDirs(env.id).logs, `${service}.log`);
661
+ if (!existsSync(logFile))
662
+ throw new BrokerError('env-error', `no logs for service '${service}'`, service);
663
+ const content = readFileSync(logFile, 'utf8');
664
+ return { service, lines: content.split('\n').slice(-lines).join('\n') };
665
+ }
666
+ pull(cwd, holder) {
667
+ const stack = loadStack(cwd);
668
+ const lease = this.journal.leaseForHolder(holder ?? resolve(cwd), stack.id);
669
+ if (!lease)
670
+ throw new BrokerError('env-error', `no active lease — run 'backlot up' first`, 'lease');
671
+ const env = this.journal.getEnv(lease.envId);
672
+ return { pulled: pullOutputs(stack.root, this.envDirs(env.id).tree, stack.manifest) };
673
+ }
674
+ async release(cwd, holder) {
675
+ const stack = loadStack(cwd);
676
+ const lease = this.journal.leaseForHolder(holder ?? resolve(cwd), stack.id);
677
+ if (!lease)
678
+ return { released: false };
679
+ this.journal.deleteLease(lease.id);
680
+ this.stopWatch(lease.envId);
681
+ return { released: true, envId: lease.envId };
682
+ }
683
+ status() {
684
+ const envs = this.journal.allEnvs().map((e) => ({
685
+ id: e.id, stack: e.stack, state: e.state, ports: e.ports, bindCount: e.bindCount,
686
+ lease: this.journal.leaseForEnv(e.id) ?? null,
687
+ idleMs: now() - e.lastUsedAt,
688
+ }));
689
+ return { pid: process.pid, envs, poolMax: POOL_MAX(), events: recentEvents(15) };
690
+ }
691
+ /**
692
+ * doctor: actively check for the failure shapes the review surfaced —
693
+ * orphaned ports, journal/reality pid divergence, envs stuck recycling.
694
+ */
695
+ async doctor() {
696
+ const issues = [];
697
+ for (const env of this.journal.allEnvs()) {
698
+ if (env.state === 'recycling')
699
+ issues.push({ level: 'warn', envId: env.id, issue: 'stuck in recycling (a daemon likely died mid-teardown; restart reconciles)' });
700
+ // Journal says these pids run — are they actually alive?
701
+ for (const [svc, pid] of Object.entries(env.servicePids)) {
702
+ let alive = true;
703
+ try {
704
+ process.kill(pid, 0);
705
+ }
706
+ catch {
707
+ alive = false;
708
+ }
709
+ if (!alive)
710
+ issues.push({ level: 'error', envId: env.id, issue: `journal records pid ${pid} for service '${svc}' but it is not running (recovery drift)` });
711
+ }
712
+ // (Port liveness is intentionally NOT probed here: a service bound to ::
713
+ // vs a 127.0.0.1 probe gives false positives across IPv4/IPv6 dual-stack.
714
+ // The pid-divergence check above is the reliable "is it alive" signal.)
715
+ }
716
+ logEvent({ level: issues.length ? 'warn' : 'info', kind: 'doctor', detail: `${issues.length} issue(s)` });
717
+ return { ok: issues.length === 0, issues, events: recentEvents(20) };
718
+ }
719
+ /**
720
+ * Atomically claim an env for teardown UNDER THE POOL LOCK: re-read it, and
721
+ * (unless force) refuse if it's leased or busy, then flip it to the
722
+ * 'recycling' guard state so tryClaim/sweep skip it. Returns the row to tear
723
+ * down, or null if it slipped away. The slow teardown then runs OUTSIDE the
724
+ * lock, but no claim can touch a 'recycling' env.
725
+ */
726
+ claimForTeardown(envId, force) {
727
+ return this.poolLocked(() => {
728
+ const env = this.journal.getEnv(envId);
729
+ if (!env || env.state === 'recycling')
730
+ return null;
731
+ // An in-flight operation (busy) is NEVER interrupted — not even by
732
+ // --force; force only bypasses the LEASE (the clean-slate button).
733
+ if (this.busy.has(envId))
734
+ return null;
735
+ if (!force && this.journal.leaseForEnv(envId))
736
+ return null;
737
+ env.state = 'recycling';
738
+ this.journal.saveEnv(env);
739
+ return env;
740
+ });
741
+ }
742
+ /** Slow teardown of an already-claimed ('recycling') env. */
743
+ async teardownClaimed(env) {
744
+ this.stopWatch(env.id);
745
+ await this.supervisor(env).stopAll();
746
+ this.supervisors.delete(env.id);
747
+ // Drop server-side namespaces too (best effort — the manifest may be gone).
748
+ try {
749
+ const stack = loadStack(env.stackRoot);
750
+ const dirs = this.envDirs(env.id);
751
+ const h = { envId: env.id, envTree: dirs.tree, dataDir: dirs.data };
752
+ for (const [name, spec] of Object.entries(stack.manifest.datastores ?? {})) {
753
+ if (env.datastoreNs[name])
754
+ await makeDatastore(name, spec, stack.id).drop(h);
755
+ }
756
+ }
757
+ catch {
758
+ /* stack unloadable — local files still go */
759
+ }
760
+ rmSync(env.root, { recursive: true, force: true });
761
+ this.journal.deleteEnv(env.id);
762
+ this.envChains.delete(env.id); // don't leak a settled chain for a dead id
763
+ }
764
+ async recycleOne(envId, force) {
765
+ const claimed = await this.claimForTeardown(envId, force);
766
+ if (!claimed)
767
+ return false;
768
+ await this.teardownClaimed(claimed);
769
+ return true;
770
+ }
771
+ async poolRecycle(all) {
772
+ const recycled = [];
773
+ for (const env of this.journal.allEnvs()) {
774
+ if (await this.recycleOne(env.id, all))
775
+ recycled.push(env.id);
776
+ }
777
+ logEvent({ level: 'info', kind: 'pool-recycle', detail: `recycled ${recycled.length} env(s)${all ? ' (--all)' : ''}` });
778
+ return { recycled };
779
+ }
780
+ /** Reap the provably-dead (degraded) envs now, instead of waiting for the sweep. */
781
+ async poolReconcile() {
782
+ const reaped = [];
783
+ for (const env of this.journal.allEnvs()) {
784
+ if (env.state === 'degraded' && (await this.recycleOne(env.id, true)))
785
+ reaped.push(env.id);
786
+ }
787
+ logEvent({ level: 'info', kind: 'pool-reconcile', detail: `reaped ${reaped.length} degraded env(s)` });
788
+ return { reaped };
789
+ }
790
+ // ---------------------------------------------------------------- sweeper
791
+ async sweep() {
792
+ const t = now();
793
+ const gap = t - this.lastSweep;
794
+ const interval = Number(process.env.BACKLOT_SWEEP_MS ?? 15_000);
795
+ if (gap > 3 * interval)
796
+ this.journal.pardon(gap - interval); // sleep pardon (decision 0009)
797
+ this.lastSweep = t;
798
+ // Disk retention (~10 min cadence): nothing backlot writes grows forever.
799
+ if (t - this.lastRetention > Number(process.env.BACKLOT_RETENTION_MS ?? 10 * 60_000)) {
800
+ this.lastRetention = t;
801
+ try {
802
+ retentionSweep(this.journal, policy());
803
+ }
804
+ catch {
805
+ /* best-effort */
806
+ }
807
+ }
808
+ for (const lease of this.journal.allLeases()) {
809
+ // Never expire a lease whose env has an operation in flight (a long bind
810
+ // under a tiny TTL must not lose its env mid-bind).
811
+ if (lease.expiresAt < now() && !this.busy.has(lease.envId)) {
812
+ this.journal.deleteLease(lease.id);
813
+ this.stopWatch(lease.envId);
814
+ }
815
+ }
816
+ for (const env of this.journal.allEnvs()) {
817
+ if (this.busy.has(env.id))
818
+ continue;
819
+ if (env.state === 'degraded') {
820
+ // Dead env — reap regardless of a stale lease (force), but never while an
821
+ // op is in flight (claimForTeardown always respects busy). The holder's
822
+ // stale lease is dropped with the env; its next `up` gets a fresh one.
823
+ await this.recycleOne(env.id, true);
824
+ continue;
825
+ }
826
+ if (env.state === 'hot' && !this.journal.leaseForEnv(env.id) && now() - env.lastUsedAt > IDLE_TTL()) {
827
+ // Claim under the pool lock so a concurrent bind can't lease this env
828
+ // between the idle check and the service kill (dead-URL race).
829
+ const claimed = await this.claimForTeardown(env.id, false);
830
+ if (!claimed)
831
+ continue;
832
+ this.stopWatch(env.id);
833
+ await this.supervisor(claimed).stopAll();
834
+ this.supervisors.delete(env.id);
835
+ const fresh = this.journal.getEnv(env.id);
836
+ if (fresh) {
837
+ fresh.state = 'warm';
838
+ fresh.servicePids = {};
839
+ this.journal.saveEnv(fresh);
840
+ }
841
+ }
842
+ }
843
+ }
844
+ async shutdown() {
845
+ for (const id of [...this.watchers.keys()])
846
+ this.stopWatch(id);
847
+ for (const sup of this.supervisors.values())
848
+ await sup.stopAll();
849
+ for (const env of this.journal.allEnvs()) {
850
+ if (env.state === 'hot') {
851
+ env.state = 'warm';
852
+ env.servicePids = {};
853
+ this.journal.saveEnv(env);
854
+ }
855
+ }
856
+ }
857
+ }
858
+ //# sourceMappingURL=engine.js.map