@pleri/olam-cli 0.1.170 → 0.1.174
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-stream/driver-runner.js +13 -0
- package/dist/commands/auth.d.ts +22 -7
- package/dist/commands/auth.d.ts.map +1 -1
- package/dist/commands/auth.js +414 -46
- package/dist/commands/auth.js.map +1 -1
- package/dist/commands/create.d.ts.map +1 -1
- package/dist/commands/create.js +45 -1
- package/dist/commands/create.js.map +1 -1
- package/dist/commands/services.d.ts +39 -0
- package/dist/commands/services.d.ts.map +1 -1
- package/dist/commands/services.js +64 -9
- package/dist/commands/services.js.map +1 -1
- package/dist/from-manifest.d.ts +53 -0
- package/dist/from-manifest.d.ts.map +1 -0
- package/dist/from-manifest.js +95 -0
- package/dist/from-manifest.js.map +1 -0
- package/dist/image-digests.json +8 -8
- package/dist/index.js +911 -137
- package/dist/lib/auth-remote.d.ts +130 -0
- package/dist/lib/auth-remote.d.ts.map +1 -0
- package/dist/lib/auth-remote.js +307 -0
- package/dist/lib/auth-remote.js.map +1 -0
- package/dist/mcp-server.js +1487 -435
- package/hermes-bundle/version.json +1 -1
- package/host-cp/k8s/manifests/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/auth-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/kg-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/memory-service/50-deployment.yaml +1 -1
- package/host-cp/observability/ndjson-span-sink.mjs +52 -0
- package/host-cp/src/boot-reconciler.mjs +238 -0
- package/host-cp/src/linear-sync.mjs +43 -0
- package/host-cp/src/plan-chat-service.mjs +129 -1
- package/host-cp/src/port-bridge-manager.mjs +116 -10
- package/host-cp/src/server.mjs +121 -1
- package/host-cp/src/world-activity-tracker.mjs +392 -0
- package/package.json +1 -1
|
@@ -10,6 +10,7 @@ import path from 'node:path';
|
|
|
10
10
|
|
|
11
11
|
const DOCKER_HOST = process.env.DOCKER_HOST ?? 'docker-cli';
|
|
12
12
|
const SOCAT_IMAGE = 'alpine/socat';
|
|
13
|
+
const SOCAT_IMAGE_TAGGED = 'alpine/socat:latest';
|
|
13
14
|
const HOST_PORT_MIN = 25000;
|
|
14
15
|
const HOST_PORT_MAX = 25999;
|
|
15
16
|
const INFRA_PORTS = new Set([8080, 7681, 7682]);
|
|
@@ -83,11 +84,73 @@ async function dockerApiBase() {
|
|
|
83
84
|
}
|
|
84
85
|
|
|
85
86
|
/**
|
|
86
|
-
*
|
|
87
|
+
* Detect whether a docker error message indicates the image is missing
|
|
88
|
+
* (and therefore a `docker pull` retry would help). Docker uses a handful
|
|
89
|
+
* of phrasings across CLI + HTTP API surfaces.
|
|
90
|
+
*/
|
|
91
|
+
function isImageMissingError(message) {
|
|
92
|
+
if (!message) return false;
|
|
93
|
+
return /Unable to find image|pull access denied|manifest unknown|No such image|not found in (the )?(repository|registry)/i.test(
|
|
94
|
+
message,
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Pull alpine/socat:latest via docker CLI. Used by the bare-node bridge
|
|
100
|
+
* create path's fallback retry. 60s budget — image is ~5MB; real pull
|
|
101
|
+
* is typically <2s.
|
|
102
|
+
*
|
|
103
|
+
* @returns {{ok: boolean, stderr: string}}
|
|
104
|
+
*/
|
|
105
|
+
function pullSocatViaCli() {
|
|
106
|
+
const r = spawnSync('docker', ['pull', SOCAT_IMAGE_TAGGED], {
|
|
107
|
+
encoding: 'utf-8',
|
|
108
|
+
timeout: 60_000,
|
|
109
|
+
});
|
|
110
|
+
return {
|
|
111
|
+
ok: r.status === 0,
|
|
112
|
+
stderr: (r.stderr ?? '').trim() || (r.stdout ?? '').trim(),
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Pull alpine/socat:latest via Docker HTTP API. Used by the container-mode
|
|
118
|
+
* bridge create path's fallback retry. Streams the pull progress body so
|
|
119
|
+
* Docker actually performs the pull (it's a streaming endpoint).
|
|
120
|
+
*
|
|
121
|
+
* @param {string} apiBase — Docker HTTP API base URL
|
|
122
|
+
* @returns {Promise<{ok: boolean, stderr: string}>}
|
|
123
|
+
*/
|
|
124
|
+
async function pullSocatViaHttpApi(apiBase) {
|
|
125
|
+
try {
|
|
126
|
+
const resp = await fetch(
|
|
127
|
+
`${apiBase}/images/create?fromImage=${encodeURIComponent(SOCAT_IMAGE)}&tag=latest`,
|
|
128
|
+
{ method: 'POST', signal: AbortSignal.timeout(60_000) },
|
|
129
|
+
);
|
|
130
|
+
if (!resp.ok) {
|
|
131
|
+
const body = await resp.text().catch(() => '');
|
|
132
|
+
return { ok: false, stderr: `pull failed: ${resp.status} ${body}` };
|
|
133
|
+
}
|
|
134
|
+
// Drain the streaming progress body — Docker only completes the pull
|
|
135
|
+
// when the response is consumed.
|
|
136
|
+
await resp.text();
|
|
137
|
+
return { ok: true, stderr: '' };
|
|
138
|
+
} catch (err) {
|
|
139
|
+
return { ok: false, stderr: err?.message ?? String(err) };
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Create and start a socat bridge container.
|
|
145
|
+
*
|
|
146
|
+
* Returns `{ containerId, pulledImage }` — `pulledImage: true` indicates the
|
|
147
|
+
* function had to fall back to `docker pull alpine/socat:latest` (issue #964
|
|
148
|
+
* — preflight in `olam services up` should normally have already pulled it).
|
|
149
|
+
*
|
|
87
150
|
* @param {string} worldId
|
|
88
151
|
* @param {number} containerPort
|
|
89
152
|
* @param {number} hostPort
|
|
90
|
-
* @returns {Promise<string>}
|
|
153
|
+
* @returns {Promise<{containerId: string, pulledImage: boolean}>}
|
|
91
154
|
*/
|
|
92
155
|
async function createBridgeContainer(worldId, containerPort, hostPort) {
|
|
93
156
|
const name = bridgeContainerName(worldId, containerPort);
|
|
@@ -111,11 +174,28 @@ async function createBridgeContainer(worldId, containerPort, hostPort) {
|
|
|
111
174
|
'TCP-LISTEN:' + containerPort + ',fork,reuseaddr',
|
|
112
175
|
'TCP:' + devboxName + ':' + containerPort,
|
|
113
176
|
];
|
|
114
|
-
|
|
177
|
+
let result = spawnSync('docker', args, { encoding: 'utf-8', timeout: 10000 });
|
|
178
|
+
let pulledImage = false;
|
|
179
|
+
|
|
180
|
+
// Issue #964 fallback: if docker run failed because the image is missing,
|
|
181
|
+
// pull it and retry once. This covers hosts where `olam services up`
|
|
182
|
+
// didn't run the preflight (e.g. fresh Hazel install, docker restart
|
|
183
|
+
// pruned the image, etc.).
|
|
184
|
+
if (result.status !== 0 && isImageMissingError(result.stderr ?? '')) {
|
|
185
|
+
const pull = pullSocatViaCli();
|
|
186
|
+
if (!pull.ok) {
|
|
187
|
+
throw new Error(
|
|
188
|
+
`alpine/socat image missing and pull failed: ${pull.stderr || 'unknown error'}`,
|
|
189
|
+
);
|
|
190
|
+
}
|
|
191
|
+
pulledImage = true;
|
|
192
|
+
result = spawnSync('docker', args, { encoding: 'utf-8', timeout: 10000 });
|
|
193
|
+
}
|
|
194
|
+
|
|
115
195
|
if (result.status !== 0) {
|
|
116
196
|
throw new Error(result.stderr?.trim() || 'docker run failed');
|
|
117
197
|
}
|
|
118
|
-
return result.stdout.trim()
|
|
198
|
+
return { containerId: result.stdout.trim(), pulledImage };
|
|
119
199
|
}
|
|
120
200
|
|
|
121
201
|
// container mode: Docker HTTP API
|
|
@@ -135,7 +215,7 @@ async function createBridgeContainer(worldId, containerPort, hostPort) {
|
|
|
135
215
|
},
|
|
136
216
|
};
|
|
137
217
|
|
|
138
|
-
const
|
|
218
|
+
const doCreate = () => fetch(
|
|
139
219
|
`${apiBase}/containers/create?name=${encodeURIComponent(name)}`,
|
|
140
220
|
{
|
|
141
221
|
method: 'POST',
|
|
@@ -145,6 +225,28 @@ async function createBridgeContainer(worldId, containerPort, hostPort) {
|
|
|
145
225
|
},
|
|
146
226
|
);
|
|
147
227
|
|
|
228
|
+
let createResp = await doCreate();
|
|
229
|
+
let pulledImage = false;
|
|
230
|
+
|
|
231
|
+
// Issue #964 fallback for HTTP API path. Docker returns 404 with a body
|
|
232
|
+
// like {"message":"No such image: alpine/socat:latest"} when the image
|
|
233
|
+
// is missing.
|
|
234
|
+
if (!createResp.ok && createResp.status === 404) {
|
|
235
|
+
const body = await createResp.text().catch(() => '');
|
|
236
|
+
if (isImageMissingError(body)) {
|
|
237
|
+
const pull = await pullSocatViaHttpApi(apiBase);
|
|
238
|
+
if (!pull.ok) {
|
|
239
|
+
throw new Error(
|
|
240
|
+
`alpine/socat image missing and pull failed: ${pull.stderr || 'unknown error'}`,
|
|
241
|
+
);
|
|
242
|
+
}
|
|
243
|
+
pulledImage = true;
|
|
244
|
+
createResp = await doCreate();
|
|
245
|
+
} else {
|
|
246
|
+
throw new Error(`container create failed: 404 ${body}`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
148
250
|
if (!createResp.ok) {
|
|
149
251
|
const body = await createResp.text().catch(() => '');
|
|
150
252
|
// If container already exists (409), try to get its ID
|
|
@@ -155,7 +257,7 @@ async function createBridgeContainer(worldId, containerPort, hostPort) {
|
|
|
155
257
|
);
|
|
156
258
|
if (inspectResp.ok) {
|
|
157
259
|
const info = await inspectResp.json();
|
|
158
|
-
return info.Id;
|
|
260
|
+
return { containerId: info.Id, pulledImage };
|
|
159
261
|
}
|
|
160
262
|
}
|
|
161
263
|
throw new Error(`container create failed: ${createResp.status} ${body}`);
|
|
@@ -171,7 +273,7 @@ async function createBridgeContainer(worldId, containerPort, hostPort) {
|
|
|
171
273
|
throw new Error(`container start failed: ${startResp.status}`);
|
|
172
274
|
}
|
|
173
275
|
|
|
174
|
-
return containerId;
|
|
276
|
+
return { containerId, pulledImage };
|
|
175
277
|
}
|
|
176
278
|
|
|
177
279
|
async function removeBridgeContainer(containerName, containerId) {
|
|
@@ -196,7 +298,7 @@ async function removeBridgeContainer(containerName, containerId) {
|
|
|
196
298
|
*
|
|
197
299
|
* @param {string} worldId
|
|
198
300
|
* @param {number} containerPort
|
|
199
|
-
* @returns {Promise<{hostPort: number, containerPort: number, url: string, containerId: string}>}
|
|
301
|
+
* @returns {Promise<{hostPort: number, containerPort: number, url: string, containerId: string, pulledImage?: boolean}>}
|
|
200
302
|
*/
|
|
201
303
|
export async function exposePort(worldId, containerPort) {
|
|
202
304
|
if (INFRA_PORTS.has(containerPort)) {
|
|
@@ -220,18 +322,22 @@ export async function exposePort(worldId, containerPort) {
|
|
|
220
322
|
}
|
|
221
323
|
|
|
222
324
|
const containerName = bridgeContainerName(worldId, containerPort);
|
|
223
|
-
const containerId = await createBridgeContainer(worldId, containerPort, hostPort);
|
|
325
|
+
const { containerId, pulledImage } = await createBridgeContainer(worldId, containerPort, hostPort);
|
|
224
326
|
|
|
225
327
|
const entry = { worldId, containerPort, hostPort, containerId, containerName };
|
|
226
328
|
registry.set(key, entry);
|
|
227
329
|
saveState();
|
|
228
330
|
|
|
229
|
-
|
|
331
|
+
const result = {
|
|
230
332
|
hostPort,
|
|
231
333
|
containerPort,
|
|
232
334
|
url: `http://${HOST_IP}:${hostPort}`,
|
|
233
335
|
containerId,
|
|
234
336
|
};
|
|
337
|
+
// Only attach pulledImage when true so existing callers/tests don't see
|
|
338
|
+
// an unexpected key when the preflight succeeded.
|
|
339
|
+
if (pulledImage) result.pulledImage = true;
|
|
340
|
+
return result;
|
|
235
341
|
}
|
|
236
342
|
|
|
237
343
|
/**
|
package/host-cp/src/server.mjs
CHANGED
|
@@ -41,7 +41,11 @@ import {
|
|
|
41
41
|
WorldStartupFailureKind,
|
|
42
42
|
} from '../lifecycle/index.mjs';
|
|
43
43
|
import { createHostStream, newStreamId } from './host-stream.mjs';
|
|
44
|
-
import {
|
|
44
|
+
import {
|
|
45
|
+
createNdjsonSpanSink,
|
|
46
|
+
attachBetaResponseEvents,
|
|
47
|
+
} from '../observability/ndjson-span-sink.mjs';
|
|
48
|
+
import { betaResponseEmitter } from '@olam/auth-client';
|
|
45
49
|
import { attemptRecovery, findScenarioForKind } from '../recovery/index.mjs';
|
|
46
50
|
import { detectHaltChunk } from './halt-detect.mjs';
|
|
47
51
|
import { spawnUpgraderContainer } from './upgrade-spawner.mjs';
|
|
@@ -72,6 +76,11 @@ import { readSecret as readPlanChatSecret, SECRET_PATH as PLAN_CHAT_SECRET_PATH
|
|
|
72
76
|
import { createPrMergePoller } from './pr-merge-poller.mjs';
|
|
73
77
|
import { parse as parseYaml } from 'yaml';
|
|
74
78
|
import { startWorldsDbReconciler } from './worlds-db-source.mjs';
|
|
79
|
+
import {
|
|
80
|
+
reconcileWorldsWithDocker,
|
|
81
|
+
defaultListContainerNames,
|
|
82
|
+
} from './boot-reconciler.mjs';
|
|
83
|
+
import { startWorldActivityTracker } from './world-activity-tracker.mjs';
|
|
75
84
|
import { authSecretHint } from './auth-secret-hint.mjs';
|
|
76
85
|
import * as tunnelManager from './world-tunnel-manager.mjs';
|
|
77
86
|
import * as bridgeManager from './port-bridge-manager.mjs';
|
|
@@ -83,6 +92,7 @@ import {
|
|
|
83
92
|
} from './routes/process-port.mjs';
|
|
84
93
|
import { instrumentHandler, renderMetrics } from './metrics.mjs';
|
|
85
94
|
import { handleDispatchFromEmail } from './lib/email-dispatch.mjs';
|
|
95
|
+
import { emitTierSuggestion } from '../dispatch/auto-tier-scheduler.mjs';
|
|
86
96
|
|
|
87
97
|
// ── Deployment-mode detection ─────────────────────────────────────
|
|
88
98
|
//
|
|
@@ -490,6 +500,20 @@ const ndjsonSpanSink = await createNdjsonSpanSink({ hostStream }).catch((err) =>
|
|
|
490
500
|
return null;
|
|
491
501
|
});
|
|
492
502
|
|
|
503
|
+
// Wire @olam/auth-client `beta-response` events (Anthropic SDK 0.96+ beta
|
|
504
|
+
// flags — thinking-token-count, cache-diagnostics, future passthrough) into
|
|
505
|
+
// the NDJSON trace as `withCredential.beta-response` spans. Opt-in via the
|
|
506
|
+
// caller's `withCredential('claude', fn, { betas: [...] })` options; when
|
|
507
|
+
// no caller opts in, the emitter never fires and this subscription is a
|
|
508
|
+
// no-op. See docs/decisions/047-anthropic-sdk-beta-flags.md.
|
|
509
|
+
if (ndjsonSpanSink) {
|
|
510
|
+
try {
|
|
511
|
+
attachBetaResponseEvents({ sink: ndjsonSpanSink, emitter: betaResponseEmitter });
|
|
512
|
+
} catch (err) {
|
|
513
|
+
console.warn(`[trace] beta-response wire unavailable: ${err?.message ?? err}`);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
493
517
|
// A4: coalesce docker-event bursts into a single servers.snapshot. World
|
|
494
518
|
// boot fires `create` + `start` + healthcheck transitions in <100ms; we
|
|
495
519
|
// don't want a broadcast storm. Window matches plan-source.md P3 target.
|
|
@@ -922,6 +946,58 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
|
|
|
922
946
|
if (handled) return;
|
|
923
947
|
}
|
|
924
948
|
|
|
949
|
+
// /api/telemetry/planning-sessions — B9: aggregate planning_sessions by
|
|
950
|
+
// session_source for the canonical-surface bet's adoption signal. Per
|
|
951
|
+
// plan-chat-spa-canonical-surface plan § Operator workflow seam falsification
|
|
952
|
+
// trigger: if plan-chat-spa weekly-active sessions < 60% of control-plane/app
|
|
953
|
+
// by 2026-Q3, freeze plan-chat-spa feature work. This endpoint is the
|
|
954
|
+
// data source for that measurement.
|
|
955
|
+
//
|
|
956
|
+
// Query param: ?since=YYYY-MM-DD (required; rejects with 400 otherwise).
|
|
957
|
+
// Response: { plan_chat_spa: N, control_plane_app: M, unknown: K, ratio: pct }
|
|
958
|
+
// where ratio = plan_chat_spa / (plan_chat_spa + control_plane_app) * 100,
|
|
959
|
+
// null if denominator is 0.
|
|
960
|
+
if (url.pathname === '/api/telemetry/planning-sessions' && req.method === 'GET') {
|
|
961
|
+
const since = url.searchParams.get('since');
|
|
962
|
+
if (!since || !/^\d{4}-\d{2}-\d{2}$/.test(since)) {
|
|
963
|
+
res.writeHead(400, { 'Content-Type': 'application/json' });
|
|
964
|
+
return res.end(JSON.stringify({
|
|
965
|
+
error: 'bad_request',
|
|
966
|
+
message: 'Missing or malformed `since` query param. Expected YYYY-MM-DD.',
|
|
967
|
+
}));
|
|
968
|
+
}
|
|
969
|
+
// B9 ships the endpoint CONTRACT + the session_source schema column.
|
|
970
|
+
// The query implementation goes through plan-chat-service.mjs (which
|
|
971
|
+
// owns the pg pool); this host-cp handler currently emits a 503 with
|
|
972
|
+
// a structured "not_implemented" marker so callers can verify the
|
|
973
|
+
// endpoint shape + auth + query-param parsing without the data path.
|
|
974
|
+
//
|
|
975
|
+
// Phase G of this epic adds the plan-chat-service handler that this
|
|
976
|
+
// endpoint will proxy to. Until then operators can run the SQL
|
|
977
|
+
// directly:
|
|
978
|
+
// SELECT COALESCE(session_source, 'unknown'), COUNT(*)
|
|
979
|
+
// FROM planning_sessions
|
|
980
|
+
// WHERE created_at >= $since
|
|
981
|
+
// GROUP BY 1;
|
|
982
|
+
//
|
|
983
|
+
// Notify-C: ship contract + schema; defer data path to Phase G.
|
|
984
|
+
res.writeHead(503, { 'Content-Type': 'application/json' });
|
|
985
|
+
return res.end(JSON.stringify({
|
|
986
|
+
error: 'not_implemented',
|
|
987
|
+
message: 'B9 ships the endpoint contract + session_source schema column. ' +
|
|
988
|
+
'Aggregation handler scaffolded in plan-chat-service.mjs lands in Phase G.',
|
|
989
|
+
since,
|
|
990
|
+
contractShape: {
|
|
991
|
+
plan_chat_spa: 0,
|
|
992
|
+
control_plane_app: 0,
|
|
993
|
+
unknown: 0,
|
|
994
|
+
ratio: null,
|
|
995
|
+
since: '<YYYY-MM-DD>',
|
|
996
|
+
asOf: '<ISO 8601>',
|
|
997
|
+
},
|
|
998
|
+
}));
|
|
999
|
+
}
|
|
1000
|
+
|
|
925
1001
|
// /api/version/status: returns the current version snapshot (baked SHA
|
|
926
1002
|
// vs operator's local HEAD). No auth required beyond the existing gate
|
|
927
1003
|
// (already applied above). Phase 1 only — detection, no auto-upgrade.
|
|
@@ -2224,6 +2300,23 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
|
|
|
2224
2300
|
return jsonReply(res, 400, { error: 'invalid_json', message: err.message });
|
|
2225
2301
|
}
|
|
2226
2302
|
try {
|
|
2303
|
+
// Auto-tier-scheduler v1 (ADR 042): emit an informational
|
|
2304
|
+
// `dispatch.tier-suggestion` event BEFORE handing off to the
|
|
2305
|
+
// dispatch handler. Pure-informational — never changes which
|
|
2306
|
+
// provider actually runs. The dispatch payload's optional
|
|
2307
|
+
// `tierSpec` ({ kind?, expectedDurationMs?, explicitTier? })
|
|
2308
|
+
// carries the shape; absent it, the heuristic falls through to
|
|
2309
|
+
// its default (`cloudflare-sandbox`).
|
|
2310
|
+
if (dispatch && typeof dispatch.worldId === 'string') {
|
|
2311
|
+
try {
|
|
2312
|
+
emitTierSuggestion({
|
|
2313
|
+
worldId: dispatch.worldId,
|
|
2314
|
+
dispatchSpec: dispatch.tierSpec ?? {},
|
|
2315
|
+
currentTier: null,
|
|
2316
|
+
hostStream,
|
|
2317
|
+
});
|
|
2318
|
+
} catch { /* never let a hint surface break dispatch */ }
|
|
2319
|
+
}
|
|
2227
2320
|
const result = await handleDispatchFromEmail({
|
|
2228
2321
|
dispatch,
|
|
2229
2322
|
worlds: WORLDS,
|
|
@@ -3182,6 +3275,16 @@ startWorldsSnapshotLoop();
|
|
|
3182
3275
|
startTunnelsSnapshotLoop();
|
|
3183
3276
|
startListeningSnapshotLoop();
|
|
3184
3277
|
|
|
3278
|
+
// Closes #965: live thought_count + total_cost_usd updates from each
|
|
3279
|
+
// active world's Claude session JSONL. Periodic (60s default) so Rico's
|
|
3280
|
+
// scheduling loop can read fresh values from the `worlds` table and
|
|
3281
|
+
// SPAs can subscribe to the `world.activity.tick` event. Fail-soft per
|
|
3282
|
+
// world: missing/malformed JSONL never crashes the loop.
|
|
3283
|
+
const worldActivityTracker = startWorldActivityTracker({
|
|
3284
|
+
dbPath: WORLDS_DB_PATH,
|
|
3285
|
+
broadcaster: hostStream,
|
|
3286
|
+
});
|
|
3287
|
+
|
|
3185
3288
|
// ── Phase 1a / B1 (PR3): engine-select + await-before-listen ─────
|
|
3186
3289
|
//
|
|
3187
3290
|
// Decision 15: the async KubernetesEngine factory MUST be fully awaited
|
|
@@ -3208,6 +3311,22 @@ const hostCpEngine = await (async () => {
|
|
|
3208
3311
|
return createDockerEngine({ dockerHost: DOCKER_HOST });
|
|
3209
3312
|
})();
|
|
3210
3313
|
|
|
3314
|
+
// ── Boot-time worlds.db ↔ docker reconciler (issue #963) ─────────────
|
|
3315
|
+
//
|
|
3316
|
+
// One-shot pass: if a container is alive but worlds.db has no row, insert
|
|
3317
|
+
// a status='reconciled' row so host-cp can see it. If worlds.db says a
|
|
3318
|
+
// world is running/active but the container is gone, mark it 'orphaned'.
|
|
3319
|
+
// Fail-soft: docker unreachable or DB unavailable → log + continue boot.
|
|
3320
|
+
// Runs BEFORE server.listen() so the first request sees reconciled state.
|
|
3321
|
+
try {
|
|
3322
|
+
await reconcileWorldsWithDocker({
|
|
3323
|
+
dbPath: WORLDS_DB_PATH,
|
|
3324
|
+
listContainerNames: () => defaultListContainerNames(DOCKER_API_BASE, console.log),
|
|
3325
|
+
});
|
|
3326
|
+
} catch (err) {
|
|
3327
|
+
console.error(`[boot-reconciler] unexpected error (continuing boot): ${err.message}`);
|
|
3328
|
+
}
|
|
3329
|
+
|
|
3211
3330
|
server.listen(PORT, '0.0.0.0', () => {
|
|
3212
3331
|
console.log(`olam-host-cp B3 listening on :${PORT}`);
|
|
3213
3332
|
console.log(` DOCKER_HOST=${DOCKER_HOST}`);
|
|
@@ -3248,6 +3367,7 @@ for (const sig of ['SIGTERM', 'SIGINT']) {
|
|
|
3248
3367
|
stopWorldsSnapshotLoop();
|
|
3249
3368
|
stopTunnelsSnapshotLoop();
|
|
3250
3369
|
stopListeningSnapshotLoop();
|
|
3370
|
+
worldActivityTracker.stop();
|
|
3251
3371
|
if (serversSnapshotTimer) { clearTimeout(serversSnapshotTimer); serversSnapshotTimer = null; }
|
|
3252
3372
|
hostStream.close();
|
|
3253
3373
|
if (ndjsonSpanSink) ndjsonSpanSink.close().catch(() => {});
|