@pleri/olam-cli 0.1.169 → 0.1.170
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -0
- package/dist/commands/auth-status.d.ts +1 -0
- package/dist/commands/auth-status.d.ts.map +1 -1
- package/dist/commands/auth-status.js +45 -4
- package/dist/commands/auth-status.js.map +1 -1
- package/dist/commands/create.d.ts.map +1 -1
- package/dist/commands/create.js +26 -0
- package/dist/commands/create.js.map +1 -1
- package/dist/commands/enter.d.ts.map +1 -1
- package/dist/commands/enter.js +5 -0
- package/dist/commands/enter.js.map +1 -1
- package/dist/commands/resume.d.ts +63 -0
- package/dist/commands/resume.d.ts.map +1 -0
- package/dist/commands/resume.js +174 -0
- package/dist/commands/resume.js.map +1 -0
- package/dist/commands/setup.d.ts +19 -0
- package/dist/commands/setup.d.ts.map +1 -1
- package/dist/commands/setup.js +157 -19
- package/dist/commands/setup.js.map +1 -1
- package/dist/image-digests.json +8 -8
- package/dist/index.js +1021 -576
- package/dist/index.js.map +1 -1
- package/dist/lib/health-probes.d.ts +28 -0
- package/dist/lib/health-probes.d.ts.map +1 -1
- package/dist/lib/health-probes.js +75 -0
- package/dist/lib/health-probes.js.map +1 -1
- package/dist/lib/k8s-context-discovery.d.ts +80 -0
- package/dist/lib/k8s-context-discovery.d.ts.map +1 -0
- package/dist/lib/k8s-context-discovery.js +102 -0
- package/dist/lib/k8s-context-discovery.js.map +1 -0
- package/dist/mcp-server.js +1273 -771
- package/dist/spawn/home-override.d.ts +82 -0
- package/dist/spawn/home-override.d.ts.map +1 -0
- package/dist/spawn/home-override.js +107 -0
- package/dist/spawn/home-override.js.map +1 -0
- package/hermes-bundle/version.json +1 -1
- package/host-cp/k8s/manifests/30-configmap.yaml +5 -0
- package/host-cp/k8s/manifests/50-deployment.yaml +9 -2
- package/host-cp/k8s/manifests/auth-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/kg-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/memory-service/50-deployment.yaml +1 -1
- package/host-cp/lifecycle/classify.mjs +110 -0
- package/host-cp/lifecycle/emit.mjs +119 -0
- package/host-cp/lifecycle/evidence.mjs +45 -0
- package/host-cp/lifecycle/failure-kinds.mjs +56 -0
- package/host-cp/lifecycle/index.mjs +22 -0
- package/host-cp/lifecycle/phases.mjs +52 -0
- package/host-cp/observability/grafana-port-forward.sh +1 -1
- package/host-cp/observability/kyverno-cardinality-mutate.sh +2 -2
- package/host-cp/observability/loki-ingest.sh +1 -1
- package/host-cp/observability/ndjson-span-sink.mjs +131 -0
- package/host-cp/observability/prom-no-double-grafana.sh +4 -4
- package/host-cp/observability/redactor.mjs +72 -0
- package/host-cp/recovery/engine.mjs +148 -0
- package/host-cp/recovery/index.mjs +16 -0
- package/host-cp/recovery/ledger.mjs +105 -0
- package/host-cp/recovery/recipes.mjs +46 -0
- package/host-cp/recovery/scenarios.mjs +124 -0
- package/host-cp/recovery/step-runners.mjs +263 -0
- package/host-cp/src/docker-events.mjs +30 -6
- package/host-cp/src/pr-nanny.mjs +55 -3
- package/host-cp/src/server.mjs +173 -0
- package/package.json +1 -1
package/host-cp/src/server.mjs
CHANGED
|
@@ -34,7 +34,15 @@ import { computeProgress } from './world-progress.mjs';
|
|
|
34
34
|
import { createPrCache } from './pr-cache.mjs';
|
|
35
35
|
import { fetchContainerSecret } from './container-secret-fetcher.mjs';
|
|
36
36
|
import { subscribeDockerEvents } from './docker-events.mjs';
|
|
37
|
+
import {
|
|
38
|
+
recordWorldLifecycle,
|
|
39
|
+
emptyEvidence,
|
|
40
|
+
WorldLifecyclePhase,
|
|
41
|
+
WorldStartupFailureKind,
|
|
42
|
+
} from '../lifecycle/index.mjs';
|
|
37
43
|
import { createHostStream, newStreamId } from './host-stream.mjs';
|
|
44
|
+
import { createNdjsonSpanSink } from '../observability/ndjson-span-sink.mjs';
|
|
45
|
+
import { attemptRecovery, findScenarioForKind } from '../recovery/index.mjs';
|
|
38
46
|
import { detectHaltChunk } from './halt-detect.mjs';
|
|
39
47
|
import { spawnUpgraderContainer } from './upgrade-spawner.mjs';
|
|
40
48
|
import { parseProxyPath, perWorldBase, proxyToWorld } from './proxy.mjs';
|
|
@@ -74,6 +82,7 @@ import {
|
|
|
74
82
|
handleServerBridges,
|
|
75
83
|
} from './routes/process-port.mjs';
|
|
76
84
|
import { instrumentHandler, renderMetrics } from './metrics.mjs';
|
|
85
|
+
import { handleDispatchFromEmail } from './lib/email-dispatch.mjs';
|
|
77
86
|
|
|
78
87
|
// ── Deployment-mode detection ─────────────────────────────────────
|
|
79
88
|
//
|
|
@@ -142,6 +151,20 @@ const OLAM_REPO_HOST_PATH = process.env.OLAM_REPO_HOST_PATH ?? '';
|
|
|
142
151
|
const OLAM_GH_CONFIG_HOST_PATH = process.env.OLAM_GH_CONFIG_HOST_PATH ?? '';
|
|
143
152
|
const OLAM_UPGRADER_IMAGE = process.env.OLAM_UPGRADER_IMAGE ?? 'ghcr.io/pleri/olam-host-cp:latest';
|
|
144
153
|
const WORKSPACES_DIR = process.env.OLAM_WORKSPACES_DIR ?? '/data/workspaces';
|
|
154
|
+
// Email-trigger surface (PR feat/email-as-world-trigger). The signing
|
|
155
|
+
// secret is the operator-shared key with the CF Email Worker — see
|
|
156
|
+
// docs/architecture/email-as-trigger.md. The allowlist is enforced
|
|
157
|
+
// defense-in-depth: the worker rejects at SMTP-time so bounces reach
|
|
158
|
+
// senders; we re-check at HTTP-time so a misrouted direct POST cannot
|
|
159
|
+
// bypass it. Both empty → endpoint stays mis-configured and returns
|
|
160
|
+
// 500/403 (fail-closed).
|
|
161
|
+
const OLAM_EMAIL_SIGNING_SECRET = process.env.OLAM_EMAIL_SIGNING_SECRET ?? '';
|
|
162
|
+
const OLAM_EMAIL_ALLOWED_SENDERS = process.env.OLAM_EMAIL_ALLOWED_SENDERS ?? '';
|
|
163
|
+
const OLAM_EMAIL_ATTACHMENTS_ROOT =
|
|
164
|
+
process.env.OLAM_EMAIL_ATTACHMENTS_ROOT ??
|
|
165
|
+
(HOST_CP_MODE === 'container'
|
|
166
|
+
? '/data/email-attachments'
|
|
167
|
+
: path.join(os.homedir(), '.olam', 'email-attachments'));
|
|
145
168
|
const WORLD_NAMES_PATH =
|
|
146
169
|
process.env.OLAM_WORLD_NAMES_PATH ??
|
|
147
170
|
(HOST_CP_MODE === 'container'
|
|
@@ -458,6 +481,15 @@ const sseGate = new SseGate({ maxConcurrent: SSE_CAP });
|
|
|
458
481
|
// poll-every-2s `useListeningServers` loop.
|
|
459
482
|
const hostStream = createHostStream({ log: (m) => console.log(`[host-stream] ${m}`) });
|
|
460
483
|
|
|
484
|
+
// Zero-config NDJSON span sink. Subscribes to host-stream `event: span`
|
|
485
|
+
// broadcasts and appends to ~/.olam/logs/host.trace.ndjson (override via
|
|
486
|
+
// OLAM_TRACE_LOG_PATH). Fail-open: a sink-bootstrap error logs a warning
|
|
487
|
+
// and proceeds without tracing rather than blocking host-cp boot.
|
|
488
|
+
const ndjsonSpanSink = await createNdjsonSpanSink({ hostStream }).catch((err) => {
|
|
489
|
+
console.warn(`[trace] NDJSON span sink unavailable: ${err?.message ?? err}`);
|
|
490
|
+
return null;
|
|
491
|
+
});
|
|
492
|
+
|
|
461
493
|
// A4: coalesce docker-event bursts into a single servers.snapshot. World
|
|
462
494
|
// boot fires `create` + `start` + healthcheck transitions in <100ms; we
|
|
463
495
|
// don't want a broadcast storm. Window matches plan-source.md P3 target.
|
|
@@ -485,6 +517,93 @@ const stopEvents = subscribeDockerEvents({
|
|
|
485
517
|
// this callback is by construction an olam world.
|
|
486
518
|
scheduleServersSnapshot();
|
|
487
519
|
},
|
|
520
|
+
// Killshot #2 — emit typed world.lifecycle events alongside the cache
|
|
521
|
+
// invalidate. Docker actions map onto phases as follows:
|
|
522
|
+
// start | restart → Spawning (container boot kicked off)
|
|
523
|
+
// stop → Finished (clean operator-initiated stop)
|
|
524
|
+
// die | kill → Failed (involuntary exit; carries exit code +
|
|
525
|
+
// classifier-derived failureKind)
|
|
526
|
+
// The lifecycle module's classifier runs against a synthetic evidence
|
|
527
|
+
// bundle so the trace records *why* the bucket was chosen. TrustRequired,
|
|
528
|
+
// ReadyForPrompt, and Running emissions are not observable from
|
|
529
|
+
// host-cp's docker-events surface — those transitions happen inside
|
|
530
|
+
// container-cp and are wired in a follow-up (see ADR 033 § Open
|
|
531
|
+
// questions for the planned container-cp → host-cp emission seam).
|
|
532
|
+
onWorldLifecycleEvent: ({ worldId, action, exitCode }) => {
|
|
533
|
+
const now = Date.now();
|
|
534
|
+
if (action === 'start' || action === 'restart') {
|
|
535
|
+
recordWorldLifecycle(hostStream, {
|
|
536
|
+
worldId,
|
|
537
|
+
phase: WorldLifecyclePhase.Spawning,
|
|
538
|
+
at: now,
|
|
539
|
+
});
|
|
540
|
+
return;
|
|
541
|
+
}
|
|
542
|
+
if (action === 'stop') {
|
|
543
|
+
recordWorldLifecycle(hostStream, {
|
|
544
|
+
worldId,
|
|
545
|
+
phase: WorldLifecyclePhase.Finished,
|
|
546
|
+
at: now,
|
|
547
|
+
});
|
|
548
|
+
return;
|
|
549
|
+
}
|
|
550
|
+
if (action === 'die' || action === 'kill') {
|
|
551
|
+
const ev = emptyEvidence(worldId, now);
|
|
552
|
+
ev.lastPhase = WorldLifecyclePhase.Running;
|
|
553
|
+
ev.lastPhaseAt = now;
|
|
554
|
+
if (exitCode !== undefined) ev.processExitCode = exitCode;
|
|
555
|
+
// For involuntary exit with a code we know the bucket up front;
|
|
556
|
+
// skip the classifier inference and pass it through explicitly so
|
|
557
|
+
// the trace records the exact docker-derived signal.
|
|
558
|
+
const failureKind =
|
|
559
|
+
exitCode !== undefined ? WorldStartupFailureKind.ProviderProcessGone : undefined;
|
|
560
|
+
const lifecycleEvent = recordWorldLifecycle(hostStream, {
|
|
561
|
+
worldId,
|
|
562
|
+
phase: WorldLifecyclePhase.Failed,
|
|
563
|
+
at: now,
|
|
564
|
+
evidence: ev,
|
|
565
|
+
failureKind,
|
|
566
|
+
});
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
// Killshot #3 — bounded auto-recovery. Attempt once per
|
|
570
|
+
// (worldId, failureKind) pair; the engine enforces idempotency.
|
|
571
|
+
// Emit recovery.* events on the host-stream so the NDJSON trace
|
|
572
|
+
// sink captures the full attempt trail.
|
|
573
|
+
const resolvedKind = lifecycleEvent.failureKind ?? null;
|
|
574
|
+
const scenario = findScenarioForKind(resolvedKind);
|
|
575
|
+
if (scenario !== undefined) {
|
|
576
|
+
hostStream.broadcast('recovery.attempt-started', {
|
|
577
|
+
worldId,
|
|
578
|
+
scenario: scenario?.name ?? 'unmatched',
|
|
579
|
+
recipe: scenario?.recipe ?? null,
|
|
580
|
+
});
|
|
581
|
+
attemptRecovery(worldId, ev, resolvedKind)
|
|
582
|
+
.then((entry) => {
|
|
583
|
+
if (entry.outcome === 'escalated') {
|
|
584
|
+
hostStream.broadcast('recovery.escalated', {
|
|
585
|
+
worldId,
|
|
586
|
+
ledgerEntry: entry,
|
|
587
|
+
});
|
|
588
|
+
} else if (entry.outcome === 'success') {
|
|
589
|
+
hostStream.broadcast('recovery.attempt-succeeded', {
|
|
590
|
+
worldId,
|
|
591
|
+
ledgerEntry: entry,
|
|
592
|
+
});
|
|
593
|
+
} else {
|
|
594
|
+
hostStream.broadcast('recovery.attempt-failed', {
|
|
595
|
+
worldId,
|
|
596
|
+
ledgerEntry: entry,
|
|
597
|
+
});
|
|
598
|
+
}
|
|
599
|
+
})
|
|
600
|
+
.catch((err) => {
|
|
601
|
+
// Recovery engine always resolves — this path is a safety net.
|
|
602
|
+
console.error(`[recovery] unexpected engine rejection for ${worldId}: ${err?.message}`);
|
|
603
|
+
});
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
},
|
|
488
607
|
});
|
|
489
608
|
|
|
490
609
|
// Initial servers.snapshot so subscribers connecting before any docker
|
|
@@ -2070,6 +2189,59 @@ const server = http.createServer(instrumentHandler('host-cp', async (req, res) =
|
|
|
2070
2189
|
// B5's CLI uses).
|
|
2071
2190
|
// When unset, returns 503 with a clear setup hint instead of failing
|
|
2072
2191
|
// silently — operators wire when they're ready for cloud-mode dogfood.
|
|
2192
|
+
// POST /v1/dispatch-from-email — see docs/architecture/email-as-trigger.md.
|
|
2193
|
+
//
|
|
2194
|
+
// The CF Email Worker (packages/email-worker-cloudflare) HMAC-signs the
|
|
2195
|
+
// canonical payload (Decision 022) and POSTs it here. The host re-validates
|
|
2196
|
+
// the signature, re-checks the sender allowlist (defense in depth), persists
|
|
2197
|
+
// attachments under OLAM_EMAIL_ATTACHMENTS_ROOT/<worldId>/<timestampMs>/,
|
|
2198
|
+
// and either routes the dispatch to a known world or persists a
|
|
2199
|
+
// spawn-pending request for the MCP/CLI layer to drain.
|
|
2200
|
+
//
|
|
2201
|
+
// The body cap here is 30 MiB — 25 MiB attachment ceiling + 5 MiB margin
|
|
2202
|
+
// for the JSON envelope. Larger payloads are rejected at 413.
|
|
2203
|
+
if (url.pathname === '/v1/dispatch-from-email' && req.method === 'POST') {
|
|
2204
|
+
const chunks = [];
|
|
2205
|
+
let size = 0;
|
|
2206
|
+
const MAX_BODY = 30 * 1024 * 1024;
|
|
2207
|
+
let aborted = false;
|
|
2208
|
+
req.on('data', (chunk) => {
|
|
2209
|
+
size += chunk.length;
|
|
2210
|
+
if (size > MAX_BODY) {
|
|
2211
|
+
aborted = true;
|
|
2212
|
+
jsonReply(res, 413, { error: 'body_too_large', maxBytes: MAX_BODY });
|
|
2213
|
+
req.destroy();
|
|
2214
|
+
return;
|
|
2215
|
+
}
|
|
2216
|
+
chunks.push(chunk);
|
|
2217
|
+
});
|
|
2218
|
+
req.on('end', async () => {
|
|
2219
|
+
if (aborted) return;
|
|
2220
|
+
let dispatch;
|
|
2221
|
+
try {
|
|
2222
|
+
dispatch = JSON.parse(Buffer.concat(chunks).toString('utf8') || '{}');
|
|
2223
|
+
} catch (err) {
|
|
2224
|
+
return jsonReply(res, 400, { error: 'invalid_json', message: err.message });
|
|
2225
|
+
}
|
|
2226
|
+
try {
|
|
2227
|
+
const result = await handleDispatchFromEmail({
|
|
2228
|
+
dispatch,
|
|
2229
|
+
worlds: WORLDS,
|
|
2230
|
+
secret: OLAM_EMAIL_SIGNING_SECRET,
|
|
2231
|
+
attachmentsRoot: OLAM_EMAIL_ATTACHMENTS_ROOT,
|
|
2232
|
+
allowlist: OLAM_EMAIL_ALLOWED_SENDERS,
|
|
2233
|
+
});
|
|
2234
|
+
return jsonReply(res, result.status, result.body);
|
|
2235
|
+
} catch (err) {
|
|
2236
|
+
return jsonReply(res, 500, {
|
|
2237
|
+
error: 'dispatch_failed',
|
|
2238
|
+
message: err instanceof Error ? err.message : String(err),
|
|
2239
|
+
});
|
|
2240
|
+
}
|
|
2241
|
+
});
|
|
2242
|
+
return;
|
|
2243
|
+
}
|
|
2244
|
+
|
|
2073
2245
|
if (url.pathname === '/api/cloud-dispatch' && req.method === 'POST') {
|
|
2074
2246
|
const cloudUrl = process.env.OLAM_CLOUD_URL;
|
|
2075
2247
|
const showcasePw = process.env.OLAM_SHOWCASE_PASSWORD;
|
|
@@ -3078,6 +3250,7 @@ for (const sig of ['SIGTERM', 'SIGINT']) {
|
|
|
3078
3250
|
stopListeningSnapshotLoop();
|
|
3079
3251
|
if (serversSnapshotTimer) { clearTimeout(serversSnapshotTimer); serversSnapshotTimer = null; }
|
|
3080
3252
|
hostStream.close();
|
|
3253
|
+
if (ndjsonSpanSink) ndjsonSpanSink.close().catch(() => {});
|
|
3081
3254
|
clearInterval(versionPollTimer);
|
|
3082
3255
|
cache.clear();
|
|
3083
3256
|
server.close(() => process.exit(0));
|