@celilo/cli 0.4.0-alpha.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/drizzle/0008_aspect_consent.sql +1 -0
- package/drizzle/meta/_journal.json +7 -0
- package/package.json +5 -6
- package/src/cli/command-registry.ts +38 -0
- package/src/cli/commands/backup-pull.test.ts +48 -0
- package/src/cli/commands/backup-pull.ts +116 -0
- package/src/cli/commands/events.test.ts +108 -0
- package/src/cli/commands/events.ts +243 -0
- package/src/cli/commands/module-generate.ts +5 -4
- package/src/cli/commands/module-import-aspect.test.ts +116 -0
- package/src/cli/commands/module-import.ts +12 -1
- package/src/cli/commands/storage-add-s3.ts +91 -46
- package/src/cli/completion.ts +2 -1
- package/src/cli/index.ts +11 -0
- package/src/db/client.ts +4 -0
- package/src/db/schema.ts +9 -1
- package/src/hooks/capability-loader.test.ts +31 -1
- package/src/hooks/capability-loader.ts +65 -16
- package/src/manifest/contracts/v1.ts +12 -0
- package/src/manifest/schema.ts +13 -1
- package/src/manifest/template-validator.ts +1 -0
- package/src/module/packaging/build.test.ts +75 -0
- package/src/module/packaging/build.ts +9 -20
- package/src/module/packaging/package-rules.ts +44 -0
- package/src/secrets/generators.test.ts +14 -1
- package/src/secrets/generators.ts +63 -1
- package/src/services/aspect-approvals.test.ts +30 -10
- package/src/services/aspect-approvals.ts +61 -31
- package/src/services/aspect-runner.test.ts +161 -8
- package/src/services/aspect-runner.ts +156 -34
- package/src/services/backup-create.ts +11 -2
- package/src/services/bus-ensure-flow.test.ts +19 -1
- package/src/services/bus-interview.ts +56 -0
- package/src/services/bus-secret-flow.test.ts +19 -1
- package/src/services/celilo-events.test.ts +122 -0
- package/src/services/celilo-events.ts +144 -0
- package/src/services/celilo-mgmt-hooks.test.ts +9 -1
- package/src/services/config-interview.ts +38 -19
- package/src/services/deploy-planner.test.ts +66 -0
- package/src/services/deploy-planner.ts +16 -2
- package/src/services/deploy-preflight.ts +18 -1
- package/src/services/deployed-systems.ts +30 -1
- package/src/services/dns-provider-backfill.test.ts +150 -0
- package/src/services/dns-provider-backfill.ts +72 -2
- package/src/services/e2e-guard.test.ts +38 -0
- package/src/services/e2e-guard.ts +43 -0
- package/src/services/module-deploy.ts +12 -26
- package/src/services/responder-probe.test.ts +87 -0
- package/src/services/responder-probe.ts +29 -0
- package/src/services/restore-from-file.ts +16 -6
- package/src/services/storage-providers/s3.test.ts +101 -0
- package/src/templates/generator.test.ts +77 -0
- package/src/templates/generator.ts +69 -2
- package/src/variables/context.ts +34 -0
- package/src/variables/lxc-nameserver.test.ts +86 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
|
|
2
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
3
|
+
import { tmpdir } from 'node:os';
|
|
4
|
+
import { join } from 'node:path';
|
|
5
|
+
import type { DnsRecordRequest, HookLogger } from '@celilo/capabilities';
|
|
6
|
+
import { closeDb, getDb } from '../db/client';
|
|
7
|
+
import { runMigrations } from '../db/migrate';
|
|
8
|
+
import { capabilities, moduleConfigs, modules, webRoutes } from '../db/schema';
|
|
9
|
+
import { backfillWebRouteDns } from './dns-provider-backfill';
|
|
10
|
+
|
|
11
|
+
const silentLogger: HookLogger = {
|
|
12
|
+
info() {},
|
|
13
|
+
warn() {},
|
|
14
|
+
error() {},
|
|
15
|
+
debug() {},
|
|
16
|
+
} as unknown as HookLogger;
|
|
17
|
+
|
|
18
|
+
describe('backfillWebRouteDns (ISS-0029)', () => {
|
|
19
|
+
let dir: string;
|
|
20
|
+
|
|
21
|
+
beforeEach(async () => {
|
|
22
|
+
dir = mkdtempSync(join(tmpdir(), 'celilo-webroute-backfill-'));
|
|
23
|
+
process.env.CELILO_DB_PATH = join(dir, 'celilo.db');
|
|
24
|
+
await runMigrations(process.env.CELILO_DB_PATH);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
afterEach(() => {
|
|
28
|
+
closeDb();
|
|
29
|
+
process.env.CELILO_DB_PATH = undefined;
|
|
30
|
+
try {
|
|
31
|
+
rmSync(dir, { recursive: true, force: true });
|
|
32
|
+
} catch {
|
|
33
|
+
/* ignore */
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
function seedModule(id: string) {
|
|
38
|
+
getDb()
|
|
39
|
+
.insert(modules)
|
|
40
|
+
.values({
|
|
41
|
+
id,
|
|
42
|
+
name: id,
|
|
43
|
+
version: '1.0.0',
|
|
44
|
+
manifestData: { id, name: id, version: '1.0.0', celilo_contract: '1.0' },
|
|
45
|
+
sourcePath: `/tmp/${id}`,
|
|
46
|
+
})
|
|
47
|
+
.run();
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function seedFirewall(natIp: string) {
|
|
51
|
+
seedModule('iptables');
|
|
52
|
+
getDb()
|
|
53
|
+
.insert(capabilities)
|
|
54
|
+
.values({ moduleId: 'iptables', capabilityName: 'firewall', version: '1.0.0', data: {} })
|
|
55
|
+
.run();
|
|
56
|
+
getDb()
|
|
57
|
+
.insert(moduleConfigs)
|
|
58
|
+
.values({
|
|
59
|
+
moduleId: 'iptables',
|
|
60
|
+
key: 'nat_ip',
|
|
61
|
+
value: natIp,
|
|
62
|
+
valueJson: JSON.stringify(natIp),
|
|
63
|
+
})
|
|
64
|
+
.run();
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function seedRoute(hostname: string, path = '/') {
|
|
68
|
+
// web_routes.module_id is a FK → modules; ensure the owner exists.
|
|
69
|
+
getDb()
|
|
70
|
+
.insert(modules)
|
|
71
|
+
.values({
|
|
72
|
+
id: 'caddy',
|
|
73
|
+
name: 'caddy',
|
|
74
|
+
version: '1.0.0',
|
|
75
|
+
manifestData: { id: 'caddy', name: 'caddy', version: '1.0.0', celilo_contract: '1.0' },
|
|
76
|
+
sourcePath: '/tmp/caddy',
|
|
77
|
+
})
|
|
78
|
+
.onConflictDoNothing()
|
|
79
|
+
.run();
|
|
80
|
+
getDb()
|
|
81
|
+
.insert(webRoutes)
|
|
82
|
+
.values({
|
|
83
|
+
slug: `${hostname}${path}`,
|
|
84
|
+
moduleId: 'caddy',
|
|
85
|
+
type: 'reverse_proxy',
|
|
86
|
+
path,
|
|
87
|
+
hostname,
|
|
88
|
+
})
|
|
89
|
+
.run();
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Stub capability loader returning a dns_internal that records its calls. */
|
|
93
|
+
function stubLoader(calls: DnsRecordRequest[]) {
|
|
94
|
+
return async () => ({
|
|
95
|
+
dns_internal: {
|
|
96
|
+
async registerRecord(req: DnsRecordRequest) {
|
|
97
|
+
calls.push(req);
|
|
98
|
+
},
|
|
99
|
+
async deleteRecord() {},
|
|
100
|
+
},
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
it('registers each DISTINCT web-route hostname at the firewall nat_ip', async () => {
|
|
105
|
+
seedFirewall('100.64.0.1');
|
|
106
|
+
seedRoute('apt.celilo.computer', '/');
|
|
107
|
+
seedRoute('apt.celilo.computer', '/-/publish'); // same host, different path → deduped
|
|
108
|
+
seedRoute('registry.lunacycle.net', '/');
|
|
109
|
+
|
|
110
|
+
const calls: DnsRecordRequest[] = [];
|
|
111
|
+
await backfillWebRouteDns('technitium', getDb(), silentLogger, stubLoader(calls));
|
|
112
|
+
|
|
113
|
+
expect(calls.map((c) => c.host).sort()).toEqual([
|
|
114
|
+
'apt.celilo.computer',
|
|
115
|
+
'registry.lunacycle.net',
|
|
116
|
+
]);
|
|
117
|
+
expect(calls.every((c) => c.value === '100.64.0.1' && c.type === 'A')).toBe(true);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it('skips without throwing when no firewall nat_ip is available', async () => {
|
|
121
|
+
seedRoute('apt.celilo.computer');
|
|
122
|
+
const calls: DnsRecordRequest[] = [];
|
|
123
|
+
await backfillWebRouteDns('technitium', getDb(), silentLogger, stubLoader(calls));
|
|
124
|
+
expect(calls).toHaveLength(0);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it('is a no-op when there are no web routes', async () => {
|
|
128
|
+
seedFirewall('100.64.0.1');
|
|
129
|
+
const calls: DnsRecordRequest[] = [];
|
|
130
|
+
await backfillWebRouteDns('technitium', getDb(), silentLogger, stubLoader(calls));
|
|
131
|
+
expect(calls).toHaveLength(0);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it('aggregates per-host failures into one error', async () => {
|
|
135
|
+
seedFirewall('100.64.0.1');
|
|
136
|
+
seedRoute('a.celilo.computer');
|
|
137
|
+
seedRoute('b.celilo.computer');
|
|
138
|
+
const failing = async () => ({
|
|
139
|
+
dns_internal: {
|
|
140
|
+
async registerRecord(req: DnsRecordRequest) {
|
|
141
|
+
if (req.host === 'a.celilo.computer') throw new Error('boom');
|
|
142
|
+
},
|
|
143
|
+
async deleteRecord() {},
|
|
144
|
+
},
|
|
145
|
+
});
|
|
146
|
+
await expect(backfillWebRouteDns('technitium', getDb(), silentLogger, failing)).rejects.toThrow(
|
|
147
|
+
/failed for 1 host/,
|
|
148
|
+
);
|
|
149
|
+
});
|
|
150
|
+
});
|
|
@@ -14,10 +14,11 @@
|
|
|
14
14
|
* D5 division of labour: celilo owns host inventory, the module owns DNS.
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
-
import type { HookLogger } from '@celilo/capabilities';
|
|
17
|
+
import type { DnsInternalCapability, HookLogger } from '@celilo/capabilities';
|
|
18
18
|
import { and, eq } from 'drizzle-orm';
|
|
19
19
|
import type { DbClient } from '../db/client';
|
|
20
|
-
import { capabilities as capabilitiesTable, modules } from '../db/schema';
|
|
20
|
+
import { capabilities as capabilitiesTable, modules, webRoutes } from '../db/schema';
|
|
21
|
+
import { loadCapabilityFunctions, resolveFirewallNatIp } from '../hooks/capability-loader';
|
|
21
22
|
import { runNamedHook } from '../hooks/run-named-hook';
|
|
22
23
|
import type { HookName } from '../hooks/types';
|
|
23
24
|
import { getModuleSystems } from './deployed-systems';
|
|
@@ -73,3 +74,72 @@ export async function backfillProviderDns(
|
|
|
73
74
|
throw new Error(`DNS backfill failed for ${failures.length} host(s): ${failures.join('; ')}`);
|
|
74
75
|
}
|
|
75
76
|
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Backfill split-horizon records for every PUBLISHED web-route hostname
|
|
80
|
+
* (ISS-0029). The per-system backfill above covers each deployed host by its
|
|
81
|
+
* bare hostname; this covers the FQDNs modules publish via public_web (e.g.
|
|
82
|
+
* `apt.celilo.computer`), which a late-deploying provider would otherwise never
|
|
83
|
+
* learn. Each hostname is registered at the firewall NAT IP — the same value
|
|
84
|
+
* public_web uses for the live registration — so backfill and live agree.
|
|
85
|
+
*
|
|
86
|
+
* Unlike the per-system path, this does NOT go through `on_system_event` (which
|
|
87
|
+
* concatenates `<hostname>.<zone>` and would corrupt an already-FQDN host).
|
|
88
|
+
* It calls the provider's `registerRecord` directly with the FQDN; the
|
|
89
|
+
* provider creates the split-horizon zone on demand (Phase 1). Attempts every
|
|
90
|
+
* hostname, then throws an aggregate error if any failed.
|
|
91
|
+
*/
|
|
92
|
+
export async function backfillWebRouteDns(
|
|
93
|
+
moduleId: string,
|
|
94
|
+
db: DbClient,
|
|
95
|
+
logger: HookLogger,
|
|
96
|
+
// Injectable so unit tests don't dynamically import a real provider module.
|
|
97
|
+
loadCaps: typeof loadCapabilityFunctions = loadCapabilityFunctions,
|
|
98
|
+
): Promise<void> {
|
|
99
|
+
const hostnames = [
|
|
100
|
+
...new Set(
|
|
101
|
+
db
|
|
102
|
+
.select({ hostname: webRoutes.hostname })
|
|
103
|
+
.from(webRoutes)
|
|
104
|
+
.all()
|
|
105
|
+
.map((r) => r.hostname),
|
|
106
|
+
),
|
|
107
|
+
];
|
|
108
|
+
if (hostnames.length === 0) return;
|
|
109
|
+
|
|
110
|
+
const natIp = await resolveFirewallNatIp(db);
|
|
111
|
+
if (!natIp) {
|
|
112
|
+
// No firewall NAT IP to point at — the live public_web path falls back to
|
|
113
|
+
// Caddy's IP per route; we don't replicate that here. Records land when the
|
|
114
|
+
// route is (re)published. Surface it rather than silently doing nothing.
|
|
115
|
+
logger.warn(
|
|
116
|
+
`web-route DNS backfill for '${moduleId}' skipped: no firewall nat_ip available (${hostnames.length} hostname(s) deferred to live registration)`,
|
|
117
|
+
);
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const caps = await loadCaps(moduleId, db, logger);
|
|
122
|
+
const dnsInternal = caps.dns_internal as DnsInternalCapability | undefined;
|
|
123
|
+
if (!dnsInternal) {
|
|
124
|
+
logger.warn(`web-route DNS backfill: dns_internal capability not loadable for '${moduleId}'`);
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
logger.info(
|
|
129
|
+
`Backfilling ${hostnames.length} web-route hostname(s) into '${moduleId}' at ${natIp}`,
|
|
130
|
+
);
|
|
131
|
+
const failures: string[] = [];
|
|
132
|
+
for (const host of hostnames) {
|
|
133
|
+
try {
|
|
134
|
+
await dnsInternal.registerRecord({ host, type: 'A', value: natIp });
|
|
135
|
+
} catch (err) {
|
|
136
|
+
failures.push(`${host}: ${err instanceof Error ? err.message : String(err)}`);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (failures.length > 0) {
|
|
141
|
+
throw new Error(
|
|
142
|
+
`web-route DNS backfill failed for ${failures.length} host(s): ${failures.join('; ')}`,
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
|
|
2
|
+
import { tmpdir } from 'node:os';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { E2E_CONFLICT_FIX, E2E_CONFLICT_MESSAGE, runningE2eContainers } from './e2e-guard';
|
|
5
|
+
|
|
6
|
+
describe('e2e-guard', () => {
|
|
7
|
+
let prev: string | undefined;
|
|
8
|
+
beforeEach(() => {
|
|
9
|
+
prev = process.env.CELILO_DB_PATH;
|
|
10
|
+
});
|
|
11
|
+
afterEach(() => {
|
|
12
|
+
process.env.CELILO_DB_PATH = prev;
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it('skips the docker check when running against a temp test DB', () => {
|
|
16
|
+
// Integration tests point CELILO_DB_PATH at os.tmpdir() and must not trip
|
|
17
|
+
// over a developer's unrelated e2e stack.
|
|
18
|
+
process.env.CELILO_DB_PATH = join(tmpdir(), 'celilo-test.db');
|
|
19
|
+
expect(runningE2eContainers()).toEqual([]);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it('returns an array and never throws when docker is checked', () => {
|
|
23
|
+
// A non-temp path defeats the short-circuit, exercising the real docker
|
|
24
|
+
// path. Result depends on the host's docker state; the contract is "an
|
|
25
|
+
// array of celilo-e2e-* names, never an exception".
|
|
26
|
+
process.env.CELILO_DB_PATH = '/opt/celilo/celilo.db';
|
|
27
|
+
const result = runningE2eContainers();
|
|
28
|
+
expect(Array.isArray(result)).toBe(true);
|
|
29
|
+
for (const name of result) {
|
|
30
|
+
expect(name.startsWith('celilo-e2e-')).toBe(true);
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('exposes a deploy message that includes the remediation hint', () => {
|
|
35
|
+
expect(E2E_CONFLICT_MESSAGE).toContain(E2E_CONFLICT_FIX);
|
|
36
|
+
expect(E2E_CONFLICT_MESSAGE).toContain('mutually exclusive');
|
|
37
|
+
});
|
|
38
|
+
});
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live/e2e mutual-exclusion guard.
|
|
3
|
+
*
|
|
4
|
+
* A live deploy and the e2e simulator can't run at the same time: the e2e
|
|
5
|
+
* stack binds the same docker networks and hostnames a live deploy touches,
|
|
6
|
+
* so a deploy fired while an e2e network is up (e.g. a leftover `--keep` run)
|
|
7
|
+
* would collide. Both the deploy itself and the fast pre-flight check use this
|
|
8
|
+
* helper to refuse before touching any infrastructure.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { execSync } from 'node:child_process';
|
|
12
|
+
import { tmpdir } from 'node:os';
|
|
13
|
+
|
|
14
|
+
/** One-line remediation, shared by the deploy error and the preflight error. */
|
|
15
|
+
export const E2E_CONFLICT_FIX = 'Stop e2e tests first: cele2e down';
|
|
16
|
+
|
|
17
|
+
/** Flat error string the deploy returns when an e2e stack is up. */
|
|
18
|
+
export const E2E_CONFLICT_MESSAGE = `Cannot deploy: e2e test containers are running.
|
|
19
|
+
Live and e2e environments are mutually exclusive.
|
|
20
|
+
${E2E_CONFLICT_FIX}`;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Names of running `celilo-e2e-*` containers (empty = clear to deploy).
|
|
24
|
+
*
|
|
25
|
+
* Skipped (returns `[]`) when CELILO_DB_PATH points at a temp dir, since
|
|
26
|
+
* integration tests run against throwaway DBs and never touch docker — they
|
|
27
|
+
* must not trip over a developer's unrelated e2e stack. A docker failure
|
|
28
|
+
* (not installed / not running) is treated as "clear": there's nothing to
|
|
29
|
+
* conflict with.
|
|
30
|
+
*/
|
|
31
|
+
export function runningE2eContainers(): string[] {
|
|
32
|
+
if (process.env.CELILO_DB_PATH?.startsWith(tmpdir())) return [];
|
|
33
|
+
try {
|
|
34
|
+
const running = execSync('docker ps --format "{{.Names}}" 2>/dev/null', {
|
|
35
|
+
encoding: 'utf-8',
|
|
36
|
+
timeout: 5000,
|
|
37
|
+
});
|
|
38
|
+
return running.split('\n').filter((name) => name.startsWith('celilo-e2e-'));
|
|
39
|
+
} catch {
|
|
40
|
+
// docker not installed / not running — nothing to conflict with.
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -35,6 +35,7 @@ import { waitForSSH } from './deploy-ssh';
|
|
|
35
35
|
import { executeTerraform, parseTerraformOutputs } from './deploy-terraform';
|
|
36
36
|
import { validateAndPrepareDeployment } from './deploy-validation';
|
|
37
37
|
import { getModuleSystems } from './deployed-systems';
|
|
38
|
+
import { E2E_CONFLICT_MESSAGE, runningE2eContainers } from './e2e-guard';
|
|
38
39
|
import { resolveInfrastructureVariables } from './infrastructure-variable-resolver';
|
|
39
40
|
import { findMachineForModule } from './machine-pool';
|
|
40
41
|
import { checkProxmoxReachable, formatProxmoxUnreachableError } from './proxmox-preflight';
|
|
@@ -341,31 +342,12 @@ async function deployModuleImpl(
|
|
|
341
342
|
: null;
|
|
342
343
|
|
|
343
344
|
try {
|
|
344
|
-
//
|
|
345
|
-
//
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
if (
|
|
349
|
-
|
|
350
|
-
const { execSync } = await import('node:child_process');
|
|
351
|
-
const running = execSync('docker ps --format "{{.Names}}" 2>/dev/null', {
|
|
352
|
-
encoding: 'utf-8',
|
|
353
|
-
timeout: 5000,
|
|
354
|
-
});
|
|
355
|
-
const e2eContainers = running.split('\n').filter((n) => n.startsWith('celilo-e2e-'));
|
|
356
|
-
if (e2eContainers.length > 0) {
|
|
357
|
-
return {
|
|
358
|
-
success: false,
|
|
359
|
-
error:
|
|
360
|
-
'Cannot deploy: e2e test containers are running.\n' +
|
|
361
|
-
'Live and e2e environments are mutually exclusive.\n' +
|
|
362
|
-
'Stop e2e tests first: cele2e down',
|
|
363
|
-
phases,
|
|
364
|
-
};
|
|
365
|
-
}
|
|
366
|
-
} catch {
|
|
367
|
-
// docker ps failed — Docker may not be installed or running, that's fine
|
|
368
|
-
}
|
|
345
|
+
// Live and e2e environments are mutually exclusive — refuse if an e2e
|
|
346
|
+
// stack is up (shared docker networks/hostnames). See e2e-guard.ts.
|
|
347
|
+
// Pre-flight checks the same condition; this is the defense-in-depth
|
|
348
|
+
// guard for callers that skip pre-flight.
|
|
349
|
+
if (runningE2eContainers().length > 0) {
|
|
350
|
+
return { success: false, error: E2E_CONFLICT_MESSAGE, phases };
|
|
369
351
|
}
|
|
370
352
|
|
|
371
353
|
const validation = await validateAndPrepareDeployment(moduleId, db);
|
|
@@ -1292,7 +1274,7 @@ async function deployModuleImpl(
|
|
|
1292
1274
|
// provider (deliveries bind at emit time). Non-providers skip this
|
|
1293
1275
|
// entirely; their registration rides the system.created event below.
|
|
1294
1276
|
// v2/EVENT_DRIVEN_HOOK_SUBSCRIPTIONS.md.
|
|
1295
|
-
const { isDnsInternalProvider, backfillProviderDns } = await import(
|
|
1277
|
+
const { isDnsInternalProvider, backfillProviderDns, backfillWebRouteDns } = await import(
|
|
1296
1278
|
'./dns-provider-backfill'
|
|
1297
1279
|
);
|
|
1298
1280
|
if (isDnsInternalProvider(moduleId, db)) {
|
|
@@ -1304,7 +1286,11 @@ async function deployModuleImpl(
|
|
|
1304
1286
|
dnsGauge.start();
|
|
1305
1287
|
try {
|
|
1306
1288
|
const dnsLogger = createGaugeLogger(dnsGauge, moduleId, 'dns_backfill');
|
|
1289
|
+
// Per-system records (bare hostnames) ...
|
|
1307
1290
|
await backfillProviderDns(moduleId, db, dnsLogger);
|
|
1291
|
+
// ... and published web-route FQDNs (apt.celilo.computer), which the
|
|
1292
|
+
// per-system path doesn't cover (ISS-0029).
|
|
1293
|
+
await backfillWebRouteDns(moduleId, db, dnsLogger);
|
|
1308
1294
|
dnsGauge.stop(true);
|
|
1309
1295
|
} catch (error) {
|
|
1310
1296
|
dnsGauge.stop(false);
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recurrence gate for ISS-0025: a headless deploy interview must fail fast
|
|
3
|
+
* instead of hanging forever when no responder is listening. The deploy path
|
|
4
|
+
* routes every interview query through `busInterviewGuarded`, which calls
|
|
5
|
+
* `ensureResponderForInterview` — so this guard is the choke point to protect.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
|
|
9
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
10
|
+
import { tmpdir } from 'node:os';
|
|
11
|
+
import { join } from 'node:path';
|
|
12
|
+
import { defineEvents, openBus } from '@celilo/event-bus';
|
|
13
|
+
import { ensureResponderForInterview } from './responder-probe';
|
|
14
|
+
|
|
15
|
+
const NO_SCHEMAS = defineEvents({});
|
|
16
|
+
|
|
17
|
+
function setTTY(value: boolean | undefined): void {
|
|
18
|
+
Object.defineProperty(process.stdin, 'isTTY', { value, configurable: true });
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
describe('ensureResponderForInterview (ISS-0025)', () => {
|
|
22
|
+
let dir: string;
|
|
23
|
+
let dbPath: string;
|
|
24
|
+
let origEnv: string | undefined;
|
|
25
|
+
let origTTY: boolean | undefined;
|
|
26
|
+
|
|
27
|
+
beforeEach(() => {
|
|
28
|
+
dir = mkdtempSync(join(tmpdir(), 'responder-guard-'));
|
|
29
|
+
dbPath = join(dir, 'events.db');
|
|
30
|
+
origEnv = process.env.EVENT_BUS_DB;
|
|
31
|
+
process.env.EVENT_BUS_DB = dbPath;
|
|
32
|
+
origTTY = process.stdin.isTTY;
|
|
33
|
+
});
|
|
34
|
+
afterEach(() => {
|
|
35
|
+
if (origEnv === undefined) process.env.EVENT_BUS_DB = undefined;
|
|
36
|
+
else process.env.EVENT_BUS_DB = origEnv;
|
|
37
|
+
setTTY(origTTY);
|
|
38
|
+
try {
|
|
39
|
+
rmSync(dir, { recursive: true, force: true });
|
|
40
|
+
} catch {
|
|
41
|
+
/* ignore */
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it('throws an actionable error when non-TTY and no responder is listening', async () => {
|
|
46
|
+
setTTY(false);
|
|
47
|
+
// No responder on the bus → the probe times out → fail fast, not a hang.
|
|
48
|
+
await expect(ensureResponderForInterview('config.required.foo.bar')).rejects.toThrow(
|
|
49
|
+
/No responder is listening/,
|
|
50
|
+
);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it('names the blocked prompt and the remediations in the error', async () => {
|
|
54
|
+
setTTY(false);
|
|
55
|
+
let message = '';
|
|
56
|
+
try {
|
|
57
|
+
await ensureResponderForInterview('secret.required.caddy.api_token');
|
|
58
|
+
} catch (err) {
|
|
59
|
+
message = err instanceof Error ? err.message : String(err);
|
|
60
|
+
}
|
|
61
|
+
expect(message).toContain('secret.required.caddy.api_token');
|
|
62
|
+
expect(message).toContain('celilo events respond');
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it('resolves when a responder answers the probe', async () => {
|
|
66
|
+
setTTY(false);
|
|
67
|
+
const responder = openBus({ dbPath, events: NO_SCHEMAS });
|
|
68
|
+
responder.watch('responder.probe', (event) => {
|
|
69
|
+
responder.emitRaw(
|
|
70
|
+
`${event.type}.reply`,
|
|
71
|
+
{ kind: 'programmatic', emittedBy: 'test' },
|
|
72
|
+
{ replyFor: event.id, emittedBy: 'test' },
|
|
73
|
+
);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
await expect(ensureResponderForInterview('config.required.foo.bar')).resolves.toBeUndefined();
|
|
77
|
+
|
|
78
|
+
responder.close();
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('skips the probe entirely on a TTY (the terminal-responder will answer)', async () => {
|
|
82
|
+
setTTY(true);
|
|
83
|
+
// No responder running, but a TTY short-circuits before probing — the
|
|
84
|
+
// built-in terminal-responder is the responder.
|
|
85
|
+
await expect(ensureResponderForInterview('config.required.foo.bar')).resolves.toBeUndefined();
|
|
86
|
+
});
|
|
87
|
+
});
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
15
|
import { defineEvents, openBus } from '@celilo/event-bus';
|
|
16
|
+
import { getEventBusPath } from '../config/paths';
|
|
16
17
|
|
|
17
18
|
const NO_SCHEMAS = defineEvents({});
|
|
18
19
|
|
|
@@ -43,3 +44,31 @@ export async function probeForResponder(busDbPath: string, timeoutMs = 1500): Pr
|
|
|
43
44
|
bus.close();
|
|
44
45
|
}
|
|
45
46
|
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Fail fast instead of hanging forever on a deploy interview (ISS-0025).
|
|
50
|
+
*
|
|
51
|
+
* `busInterview` waits indefinitely (`timeoutMs: 0`) for a responder's reply.
|
|
52
|
+
* On a TTY the deploy registers a terminal-responder, so a prompt will be
|
|
53
|
+
* answered — we skip the probe. Headless (non-TTY) with no responder listening,
|
|
54
|
+
* the prompt would hang forever; we probe once and throw an actionable error so
|
|
55
|
+
* a `module generate`-style fail-fast applies to deploys too. Call this
|
|
56
|
+
* immediately before emitting an interview query (see `busInterviewGuarded`).
|
|
57
|
+
*
|
|
58
|
+
* @param queryType the interview event type about to be emitted (e.g.
|
|
59
|
+
* `config.required.<m>.<k>`), echoed in the error so the operator sees which
|
|
60
|
+
* prompt is blocked.
|
|
61
|
+
*/
|
|
62
|
+
export async function ensureResponderForInterview(queryType: string): Promise<void> {
|
|
63
|
+
if (process.stdin.isTTY) return;
|
|
64
|
+
const available = await probeForResponder(getEventBusPath());
|
|
65
|
+
if (available) return;
|
|
66
|
+
throw new Error(
|
|
67
|
+
`No responder is listening and stdin isn't a TTY, so this interview prompt can't be answered (${queryType}).
|
|
68
|
+
|
|
69
|
+
Either:
|
|
70
|
+
1. Run it in a terminal — the built-in prompt will ask, or
|
|
71
|
+
2. Start a responder in another shell: celilo events respond
|
|
72
|
+
(or pre-stage answers: celilo events respond --values <file>)`,
|
|
73
|
+
);
|
|
74
|
+
}
|
|
@@ -233,15 +233,25 @@ export async function restoreFromArtifactFile(
|
|
|
233
233
|
}
|
|
234
234
|
}
|
|
235
235
|
|
|
236
|
-
// 7.
|
|
237
|
-
//
|
|
238
|
-
//
|
|
239
|
-
//
|
|
236
|
+
// 7. Complete the operation BEFORE swapping the DB. The swap
|
|
237
|
+
// (applyStagedSystemFiles) is the final, irreversible step: it closes
|
|
238
|
+
// the live DB and overwrites celilo.db with the artifact's. Marking the
|
|
239
|
+
// operation complete writes to module_operations — if done AFTER the
|
|
240
|
+
// swap it reopens the freshly-staged artifact DB in THIS process and
|
|
241
|
+
// throws SQLITE_IOERR (a fresh process opens the same file fine; it's a
|
|
242
|
+
// same-process post-swap reopen artifact). The operation row lives in
|
|
243
|
+
// the pre-swap DB the swap discards anyway, so completing it here —
|
|
244
|
+
// while that DB is still open — is correct and avoids the reopen.
|
|
245
|
+
// Nothing in-process must touch the DB after the swap; migrateRestoredDb
|
|
246
|
+
// (the CLI's next step) is best-effort and tolerates a reopen failure.
|
|
247
|
+
completeOperation(opId);
|
|
248
|
+
|
|
249
|
+
// 8. Apply staged system files (celilo.db + master.key + fleet SSH key).
|
|
250
|
+
// Staged by celilo-mgmt's on_restore at restore_dir/system/. The live
|
|
251
|
+
// process had the DB open, so this swap happens AFTER the hook returns.
|
|
240
252
|
const systemStaging = join(restoreDataDir, 'system');
|
|
241
253
|
const apply = applyStagedSystemFiles(systemStaging);
|
|
242
254
|
|
|
243
|
-
completeOperation(opId);
|
|
244
|
-
|
|
245
255
|
return {
|
|
246
256
|
success: true,
|
|
247
257
|
crossModuleApplied,
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recurrence gate for ISS-0016: the S3 provider's upload() must send a
|
|
3
|
+
* self-describing, replayable Buffer body — NOT a one-shot Node read stream.
|
|
4
|
+
*
|
|
5
|
+
* The original bug passed `createReadStream(localPath)` as PutObject Body with
|
|
6
|
+
* no ContentLength; AWS SDK v3 fell back to aws-chunked streaming and failed
|
|
7
|
+
* with "The request body terminated unexpectedly" (and couldn't replay the
|
|
8
|
+
* stream across S3's retries/redirects). The verify path used a string body
|
|
9
|
+
* (length known) and so never exercised the broken path — every real S3 backup
|
|
10
|
+
* silently failed. These tests upload a REAL on-disk file and assert the body
|
|
11
|
+
* is a Buffer, so a regression back to a stream is caught here.
|
|
12
|
+
*
|
|
13
|
+
* No live S3 / MinIO harness exists, so we intercept S3Client.prototype.send
|
|
14
|
+
* and inspect the command the provider builds.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { type Mock, afterEach, describe, expect, it, spyOn } from 'bun:test';
|
|
18
|
+
import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
|
|
19
|
+
import { tmpdir } from 'node:os';
|
|
20
|
+
import { join } from 'node:path';
|
|
21
|
+
import { Readable } from 'node:stream';
|
|
22
|
+
import { GetObjectCommand, PutObjectCommand, S3Client } from '@aws-sdk/client-s3';
|
|
23
|
+
import { type S3StorageConfig, createS3StorageProvider } from './s3';
|
|
24
|
+
|
|
25
|
+
const CONFIG: S3StorageConfig = {
|
|
26
|
+
bucket: 'test-bucket',
|
|
27
|
+
region: 'us-east-1',
|
|
28
|
+
endpoint: 'http://localhost:9000',
|
|
29
|
+
accessKeyId: 'test-key',
|
|
30
|
+
secretAccessKey: 'test-secret',
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
describe('s3 storage provider (ISS-0016)', () => {
|
|
34
|
+
let dir: string | undefined;
|
|
35
|
+
// biome-ignore lint/suspicious/noExplicitAny: send() is heavily overloaded; the test only reads .input.
|
|
36
|
+
let sendSpy: Mock<(command: any) => Promise<unknown>> | undefined;
|
|
37
|
+
|
|
38
|
+
afterEach(() => {
|
|
39
|
+
sendSpy?.mockRestore();
|
|
40
|
+
sendSpy = undefined;
|
|
41
|
+
if (dir) {
|
|
42
|
+
rmSync(dir, { recursive: true, force: true });
|
|
43
|
+
dir = undefined;
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('uploads a real on-disk file as a self-describing Buffer body, not a stream', async () => {
|
|
48
|
+
dir = mkdtempSync(join(tmpdir(), 's3-upload-'));
|
|
49
|
+
const file = join(dir, 'envelope.tar.gz');
|
|
50
|
+
// A multi-KB real file — the case that broke against live S3.
|
|
51
|
+
const bytes = Buffer.from('celilo backup envelope payload — '.repeat(2000));
|
|
52
|
+
writeFileSync(file, bytes);
|
|
53
|
+
|
|
54
|
+
// biome-ignore lint/suspicious/noExplicitAny: capturing the built command.
|
|
55
|
+
const sent: any[] = [];
|
|
56
|
+
sendSpy = spyOn(S3Client.prototype, 'send').mockImplementation(async (command: unknown) => {
|
|
57
|
+
sent.push(command);
|
|
58
|
+
return {};
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
const provider = createS3StorageProvider(CONFIG);
|
|
62
|
+
await provider.upload(file, 'celilo-mgmt/2026/envelope.tar.gz');
|
|
63
|
+
|
|
64
|
+
expect(sent).toHaveLength(1);
|
|
65
|
+
const command = sent[0];
|
|
66
|
+
expect(command).toBeInstanceOf(PutObjectCommand);
|
|
67
|
+
|
|
68
|
+
const body = command.input.Body;
|
|
69
|
+
// The regression gate: a Node read stream (the original bug) is not a Buffer.
|
|
70
|
+
expect(Buffer.isBuffer(body)).toBe(true);
|
|
71
|
+
expect(body instanceof Readable).toBe(false);
|
|
72
|
+
// Self-describing length == the whole file: the SDK derives ContentLength
|
|
73
|
+
// and can replay the body across retries/redirects.
|
|
74
|
+
expect(body.length).toBe(bytes.length);
|
|
75
|
+
expect(Buffer.compare(body, bytes)).toBe(0);
|
|
76
|
+
// Key is prefixed with the backup namespace.
|
|
77
|
+
expect(command.input.Key).toBe('celilo-backups/celilo-mgmt/2026/envelope.tar.gz');
|
|
78
|
+
expect(command.input.Bucket).toBe('test-bucket');
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('downloads a multi-chunk response body to disk intact (short-read safe)', async () => {
|
|
82
|
+
dir = mkdtempSync(join(tmpdir(), 's3-download-'));
|
|
83
|
+
const out = join(dir, 'restored.bin');
|
|
84
|
+
const chunks = [Buffer.from('chunk-1-'), Buffer.from('chunk-2-'), Buffer.from('chunk-3')];
|
|
85
|
+
const expected = Buffer.concat(chunks);
|
|
86
|
+
|
|
87
|
+
sendSpy = spyOn(S3Client.prototype, 'send').mockImplementation(async (command: unknown) => {
|
|
88
|
+
expect(command).toBeInstanceOf(GetObjectCommand);
|
|
89
|
+
const body = new Readable({ read() {} });
|
|
90
|
+
// Push several chunks separately to model short reads over the wire.
|
|
91
|
+
for (const chunk of chunks) body.push(chunk);
|
|
92
|
+
body.push(null);
|
|
93
|
+
return { Body: body };
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
const provider = createS3StorageProvider(CONFIG);
|
|
97
|
+
await provider.download('celilo-mgmt/2026/envelope.tar.gz', out);
|
|
98
|
+
|
|
99
|
+
expect(readFileSync(out)).toEqual(expected);
|
|
100
|
+
});
|
|
101
|
+
});
|