@pleri/olam-cli 0.1.196 → 0.1.199

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/README.md +52 -0
  2. package/dist/ask/knowledge-pack.generated.d.ts.map +1 -1
  3. package/dist/ask/knowledge-pack.generated.js +10 -8
  4. package/dist/ask/knowledge-pack.generated.js.map +1 -1
  5. package/dist/commands/auth-list-json.d.ts +34 -0
  6. package/dist/commands/auth-list-json.d.ts.map +1 -1
  7. package/dist/commands/auth-list-json.js +24 -0
  8. package/dist/commands/auth-list-json.js.map +1 -1
  9. package/dist/commands/auth-migrate.d.ts +212 -0
  10. package/dist/commands/auth-migrate.d.ts.map +1 -0
  11. package/dist/commands/auth-migrate.js +465 -0
  12. package/dist/commands/auth-migrate.js.map +1 -0
  13. package/dist/commands/auth.d.ts.map +1 -1
  14. package/dist/commands/auth.js +239 -184
  15. package/dist/commands/auth.js.map +1 -1
  16. package/dist/commands/bootstrap.d.ts +4 -0
  17. package/dist/commands/bootstrap.d.ts.map +1 -1
  18. package/dist/commands/bootstrap.js +10 -0
  19. package/dist/commands/bootstrap.js.map +1 -1
  20. package/dist/commands/dispatch.d.ts.map +1 -1
  21. package/dist/commands/dispatch.js +11 -1
  22. package/dist/commands/dispatch.js.map +1 -1
  23. package/dist/commands/doctor.d.ts +33 -0
  24. package/dist/commands/doctor.d.ts.map +1 -1
  25. package/dist/commands/doctor.js +299 -12
  26. package/dist/commands/doctor.js.map +1 -1
  27. package/dist/commands/kg-mirror.d.ts +18 -2
  28. package/dist/commands/kg-mirror.d.ts.map +1 -1
  29. package/dist/commands/kg-mirror.js +78 -3
  30. package/dist/commands/kg-mirror.js.map +1 -1
  31. package/dist/commands/mcp/complete.d.ts +36 -0
  32. package/dist/commands/mcp/complete.d.ts.map +1 -0
  33. package/dist/commands/mcp/complete.js +66 -0
  34. package/dist/commands/mcp/complete.js.map +1 -0
  35. package/dist/commands/mcp/index.d.ts +1 -1
  36. package/dist/commands/mcp/index.d.ts.map +1 -1
  37. package/dist/commands/mcp/index.js +3 -1
  38. package/dist/commands/mcp/index.js.map +1 -1
  39. package/dist/commands/memory/bridge.d.ts +1 -1
  40. package/dist/commands/memory/bridge.d.ts.map +1 -1
  41. package/dist/commands/memory/bridge.js +2 -6
  42. package/dist/commands/memory/bridge.js.map +1 -1
  43. package/dist/commands/memory/secret.d.ts.map +1 -1
  44. package/dist/commands/memory/secret.js +4 -3
  45. package/dist/commands/memory/secret.js.map +1 -1
  46. package/dist/commands/observe.d.ts +3 -3
  47. package/dist/commands/observe.d.ts.map +1 -1
  48. package/dist/commands/observe.js +11 -8
  49. package/dist/commands/observe.js.map +1 -1
  50. package/dist/commands/runbooks.d.ts.map +1 -1
  51. package/dist/commands/runbooks.js +77 -10
  52. package/dist/commands/runbooks.js.map +1 -1
  53. package/dist/commands/services-tls.d.ts.map +1 -1
  54. package/dist/commands/services-tls.js +41 -0
  55. package/dist/commands/services-tls.js.map +1 -1
  56. package/dist/commands/services.d.ts +45 -3
  57. package/dist/commands/services.d.ts.map +1 -1
  58. package/dist/commands/services.js +198 -71
  59. package/dist/commands/services.js.map +1 -1
  60. package/dist/commands/setup-phase-8-kg-hook.d.ts +48 -0
  61. package/dist/commands/setup-phase-8-kg-hook.d.ts.map +1 -0
  62. package/dist/commands/setup-phase-8-kg-hook.js +93 -0
  63. package/dist/commands/setup-phase-8-kg-hook.js.map +1 -0
  64. package/dist/commands/setup-phase-9-memory-bridge.d.ts +36 -0
  65. package/dist/commands/setup-phase-9-memory-bridge.d.ts.map +1 -0
  66. package/dist/commands/setup-phase-9-memory-bridge.js +59 -0
  67. package/dist/commands/setup-phase-9-memory-bridge.js.map +1 -0
  68. package/dist/commands/setup.d.ts +34 -1
  69. package/dist/commands/setup.d.ts.map +1 -1
  70. package/dist/commands/setup.js +328 -23
  71. package/dist/commands/setup.js.map +1 -1
  72. package/dist/commands/update.d.ts +24 -0
  73. package/dist/commands/update.d.ts.map +1 -1
  74. package/dist/commands/update.js +53 -0
  75. package/dist/commands/update.js.map +1 -1
  76. package/dist/commands/upgrade.d.ts +5 -0
  77. package/dist/commands/upgrade.d.ts.map +1 -1
  78. package/dist/commands/upgrade.js +31 -8
  79. package/dist/commands/upgrade.js.map +1 -1
  80. package/dist/image-digests.json +8 -8
  81. package/dist/index.js +4302 -2466
  82. package/dist/lib/auth-backend.d.ts +168 -0
  83. package/dist/lib/auth-backend.d.ts.map +1 -0
  84. package/dist/lib/auth-backend.js +172 -0
  85. package/dist/lib/auth-backend.js.map +1 -0
  86. package/dist/lib/auth-list-cache.d.ts +67 -0
  87. package/dist/lib/auth-list-cache.d.ts.map +1 -0
  88. package/dist/lib/auth-list-cache.js +84 -0
  89. package/dist/lib/auth-list-cache.js.map +1 -0
  90. package/dist/lib/auth-list.d.ts +107 -0
  91. package/dist/lib/auth-list.d.ts.map +1 -0
  92. package/dist/lib/auth-list.js +123 -0
  93. package/dist/lib/auth-list.js.map +1 -0
  94. package/dist/lib/auth-login.d.ts +92 -0
  95. package/dist/lib/auth-login.d.ts.map +1 -0
  96. package/dist/lib/auth-login.js +124 -0
  97. package/dist/lib/auth-login.js.map +1 -0
  98. package/dist/lib/auth-mutator-backend.d.ts +54 -0
  99. package/dist/lib/auth-mutator-backend.d.ts.map +1 -0
  100. package/dist/lib/auth-mutator-backend.js +62 -0
  101. package/dist/lib/auth-mutator-backend.js.map +1 -0
  102. package/dist/lib/auth-remote.d.ts +50 -0
  103. package/dist/lib/auth-remote.d.ts.map +1 -1
  104. package/dist/lib/auth-remote.js +84 -2
  105. package/dist/lib/auth-remote.js.map +1 -1
  106. package/dist/lib/bootstrap-kubernetes.d.ts +93 -12
  107. package/dist/lib/bootstrap-kubernetes.d.ts.map +1 -1
  108. package/dist/lib/bootstrap-kubernetes.js +364 -53
  109. package/dist/lib/bootstrap-kubernetes.js.map +1 -1
  110. package/dist/lib/config.d.ts +7 -0
  111. package/dist/lib/config.d.ts.map +1 -1
  112. package/dist/lib/config.js.map +1 -1
  113. package/dist/lib/health-probes.d.ts +0 -22
  114. package/dist/lib/health-probes.d.ts.map +1 -1
  115. package/dist/lib/health-probes.js +23 -2
  116. package/dist/lib/health-probes.js.map +1 -1
  117. package/dist/lib/peripheral-registry.d.ts +11 -0
  118. package/dist/lib/peripheral-registry.d.ts.map +1 -1
  119. package/dist/lib/peripheral-registry.js +5 -0
  120. package/dist/lib/peripheral-registry.js.map +1 -1
  121. package/dist/lib/plans-client.d.ts.map +1 -1
  122. package/dist/lib/plans-client.js +6 -3
  123. package/dist/lib/plans-client.js.map +1 -1
  124. package/dist/mcp-server.js +14 -3
  125. package/hermes-bundle/version.json +1 -1
  126. package/host-cp/k8s/manifests/30-configmap.yaml +4 -0
  127. package/host-cp/k8s/manifests/50-deployment.yaml +13 -1
  128. package/host-cp/k8s/manifests/auth-service/50-deployment.yaml +1 -1
  129. package/host-cp/k8s/manifests/kg-service/50-deployment.yaml +1 -1
  130. package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml +1 -1
  131. package/host-cp/k8s/manifests/memory-service/50-deployment.yaml +1 -1
  132. package/host-cp/src/dispatch-persister.mjs +157 -0
  133. package/host-cp/src/pr-nanny.mjs +7 -0
  134. package/host-cp/src/server.mjs +175 -3
  135. package/host-cp/src/world-watchdog-pid-lookup.mjs +119 -0
  136. package/host-cp/src/world-watchdog-probes.mjs +271 -0
  137. package/host-cp/src/world-watchdog-recovery.mjs +192 -0
  138. package/host-cp/src/world-watchdog.mjs +313 -0
  139. package/package.json +1 -1
@@ -0,0 +1,313 @@
1
+ /**
2
+ * world-watchdog.mjs — periodic watchdog that probes each active world's
3
+ * `claude` PID for the three wedge signals (wchan + CLOSE_WAIT + CPU) and
4
+ * emits `world.watchdog.tick` events on the host-stream broadcaster.
5
+ *
6
+ * Design:
7
+ * - Mirrors `world-activity-tracker.mjs` shape exactly: `startWorldWatchdog(deps)`
8
+ * returns `{ stop, tickNow }`.
9
+ * - Per-world 2-tick confirm: a `'wedged'` classification is only emitted
10
+ * after TWO consecutive ticks with the wedge signature. A single-tick
11
+ * wedge emits `'suspect'`. A healthy tick resets the streak.
12
+ * - Per-world fail-soft: a probe error for one world never skips other worlds.
13
+ * - `OLAM_WORLD_WATCHDOG_DISABLED=1` → `start()` is a no-op (returns stub).
14
+ * - Cadence: `OLAM_WORLD_WATCHDOG_TICK_MS` env or `intervalMs` dep (default 30_000).
15
+ *
16
+ * v1 stub: `getClaudePidForWorld(worldId)` returns null for all worlds in
17
+ * Phase A. When null, the tick still fires but all probe signals are null,
18
+ * producing `verdict: 'unknown'`. Real PID lookup (docker inspect →
19
+ * /proc/<hostPid>/status NSpid field) is wired in a follow-up.
20
+ * This is documented here and in docs/architecture/world-watchdog.md.
21
+ *
22
+ * Wire-in: `server.mjs` constructs once after broadcaster is ready and calls
23
+ * `.stop()` from the SIGTERM/SIGINT handler. Gated on `!SERVE_ONLY`.
24
+ *
25
+ * @see docs/architecture/world-watchdog.md
26
+ * @see packages/host-cp/src/world-watchdog-probes.mjs
27
+ * @see packages/host-cp/src/world-activity-tracker.mjs (shape reference)
28
+ */
29
+
30
+ import {
31
+ readWchan,
32
+ readCloseWaitSockets,
33
+ readCpuPercent,
34
+ classify,
35
+ } from './world-watchdog-probes.mjs';
36
+ // Recovery hook (B5). Optional dep — when absent (recovery is null/undefined),
37
+ // the watchdog behaves exactly as Phase A: detection-only, no kill, no replay.
38
+ // Wire via startWorldWatchdog({ recovery: createRecovery({...}) }) in server.mjs.
39
+
40
+ const DEFAULT_TICK_MS = 30_000;
41
+ // CPU measurement window: shorter than the tick cadence so we don't overlap.
42
+ const CPU_WINDOW_MS = 500;
43
+
44
+ /**
45
+ * @typedef {object} WorldWatchdogDeps
46
+ * @property {object} [broadcaster] Object with `.broadcast(type, payload)`.
47
+ * Optional — when absent events are skipped but state tracking still works.
48
+ * @property {number} [intervalMs] Tick cadence in ms. Defaults to
49
+ * `OLAM_WORLD_WATCHDOG_TICK_MS` env or 30_000.
50
+ * @property {() => Promise<string[]>} [listActiveWorlds]
51
+ * Returns an array of active world IDs to probe each tick.
52
+ * Defaults to returning [].
53
+ * @property {(worldId: string) => Promise<number|null>} [getClaudePidForWorld]
54
+ * Returns the host-side PID of the claude process for a world, or null.
55
+ * v1 default: always returns null (all worlds → verdict 'unknown').
56
+ * @property {{ procRoot?: string }} [probes]
57
+ * Injectable probe options (procRoot for tests).
58
+ * @property {{ onWedgedVerdict(opts: { worldId: string, pid: number|null }): Promise<void> }} [recovery]
59
+ * Optional recovery handle (from world-watchdog-recovery.mjs). When present,
60
+ * called once on verdict-transition to 'wedged' (suspect → wedged), NOT on
61
+ * steady-state re-wedge. When absent, detection-only (Phase A behaviour).
62
+ * @property {(msg: string) => void} [log] Defaults to `console.log`.
63
+ * @property {(msg: string) => void} [debug] Defaults to no-op.
64
+ * @property {(cb: () => void, ms: number) => any} [setTimer]
65
+ * Injectable `setInterval` for tests.
66
+ * @property {(handle: any) => void} [clearTimer]
67
+ * Injectable `clearInterval` for tests.
68
+ * @property {() => Date} [now] Clock injection for tests.
69
+ */
70
+
71
+ /**
72
+ * @typedef {object} WorldWatchdogHandle
73
+ * @property {() => void} stop
74
+ * @property {() => Promise<number>} tickNow Run one tick immediately (returns
75
+ * the count of worlds processed). Exposed for tests.
76
+ * @property {(worldId: string) => object|null} getVerdict
77
+ * Returns the latest in-memory verdict entry for a world, or null if no tick
78
+ * has fired yet. Used by the HTTP endpoint (A5).
79
+ */
80
+
81
+ /**
82
+ * Per-world state tracked between ticks for the 2-tick confirm.
83
+ *
84
+ * @typedef {object} WorldWatchdogState
85
+ * @property {'healthy'|'suspect'|'wedged'|'unknown'} lastClassification
86
+ * The raw classification from the previous tick (before 2-tick confirm).
87
+ * @property {'healthy'|'suspect'|'wedged'|'unknown'} lastVerdict
88
+ * The emitted verdict (post-confirm).
89
+ * @property {string} lastTickAt ISO-8601 timestamp of last tick.
90
+ * @property {object|null} lastSignals The signals from the last tick.
91
+ * @property {number|null} lastPid The PID probed last tick.
92
+ */
93
+
94
+ /**
95
+ * Start the world watchdog. Returns a `{ stop, tickNow, getVerdict }` handle.
96
+ *
97
+ * Honoring `OLAM_WORLD_WATCHDOG_DISABLED=1`: if the env var is set, returns
98
+ * a no-op stub immediately without starting the interval or making any probe
99
+ * calls.
100
+ *
101
+ * @param {WorldWatchdogDeps} [deps]
102
+ * @returns {WorldWatchdogHandle}
103
+ */
104
+ export function startWorldWatchdog(deps = {}) {
105
+ // Honour kill switch — return a no-op stub.
106
+ if (process.env.OLAM_WORLD_WATCHDOG_DISABLED === '1') {
107
+ return {
108
+ stop() {},
109
+ tickNow: async () => 0,
110
+ getVerdict: () => null,
111
+ };
112
+ }
113
+
114
+ const log = deps.log ?? ((m) => console.log(`[world-watchdog] ${m}`));
115
+ const debug = deps.debug ?? (() => {});
116
+ const setTimer = deps.setTimer ?? ((cb, ms) => setInterval(cb, ms));
117
+ const clearTimer = deps.clearTimer ?? ((h) => clearInterval(h));
118
+ const now = deps.now ?? (() => new Date());
119
+
120
+ const intervalMs =
121
+ deps.intervalMs ??
122
+ parseInt(process.env.OLAM_WORLD_WATCHDOG_TICK_MS ?? `${DEFAULT_TICK_MS}`, 10);
123
+
124
+ const broadcaster = deps.broadcaster ?? null;
125
+ const listActiveWorlds = deps.listActiveWorlds ?? (async () => []);
126
+ const getClaudePidForWorld = deps.getClaudePidForWorld ?? (async (_id) => null);
127
+ const probeOpts = deps.probes ?? {};
128
+ // Recovery hook — null when not configured (Phase A / default-off behaviour).
129
+ const recovery = deps.recovery ?? null;
130
+
131
+ // Per-world state map: worldId → WorldWatchdogState.
132
+ /** @type {Map<string, WorldWatchdogState>} */
133
+ const worldState = new Map();
134
+
135
+ let stopped = false;
136
+ let inFlight = false;
137
+ let intervalHandle = null;
138
+
139
+ /**
140
+ * Probe a single world and update its state. Returns the verdict emitted.
141
+ *
142
+ * @param {string} worldId
143
+ * @returns {Promise<'healthy'|'suspect'|'wedged'|'unknown'>}
144
+ */
145
+ async function probeWorld(worldId) {
146
+ const pid = await getClaudePidForWorld(worldId);
147
+
148
+ let wchan = null;
149
+ let closeWaitSockets = [];
150
+ let cpuPercent = null;
151
+
152
+ if (pid !== null) {
153
+ // All probes are fail-soft — they return null/[] on I/O error.
154
+ [wchan, closeWaitSockets, cpuPercent] = await Promise.all([
155
+ readWchan(pid, probeOpts),
156
+ readCloseWaitSockets(pid, probeOpts),
157
+ readCpuPercent(pid, CPU_WINDOW_MS, probeOpts),
158
+ ]);
159
+ }
160
+
161
+ const closeWaitCount = closeWaitSockets.length;
162
+ const signals = pid !== null
163
+ ? { wchan, closeWaitCount, cpuPercent }
164
+ : null;
165
+
166
+ // Classify raw signals.
167
+ const rawClassification = pid !== null
168
+ ? classify({ wchan, closeWaitCount, cpuPercent })
169
+ : 'unknown';
170
+
171
+ // 2-tick confirm: only emit 'wedged' if BOTH this tick AND the previous tick
172
+ // classified as 'wedged'. Otherwise emit the raw classification.
173
+ const prev = worldState.get(worldId);
174
+ let verdict;
175
+ if (rawClassification === 'wedged' && prev?.lastClassification === 'wedged') {
176
+ verdict = 'wedged';
177
+ } else if (rawClassification === 'wedged') {
178
+ // First 'wedged' tick — emit 'suspect' (2-tick confirm pending).
179
+ verdict = 'suspect';
180
+ } else {
181
+ verdict = rawClassification;
182
+ }
183
+
184
+ const tickAt = now().toISOString();
185
+
186
+ // Update per-world state.
187
+ worldState.set(worldId, {
188
+ lastClassification: rawClassification,
189
+ lastVerdict: verdict,
190
+ lastTickAt: tickAt,
191
+ lastSignals: signals,
192
+ lastPid: pid,
193
+ });
194
+
195
+ // Recovery hook — fire ONCE on verdict-transition to 'wedged' (not on
196
+ // steady-state re-wedge). Guard: prev?.lastVerdict !== 'wedged' ensures
197
+ // only the suspect→wedged transition triggers, not wedged→wedged.
198
+ if (
199
+ verdict === 'wedged' &&
200
+ recovery !== null &&
201
+ prev?.lastVerdict !== 'wedged'
202
+ ) {
203
+ // Fire-and-forget; fail-soft so a recovery error never skips other worlds.
204
+ void recovery.onWedgedVerdict({ worldId, pid }).catch((err) => {
205
+ log(`recovery.onWedgedVerdict ${worldId} failed: ${err?.message ?? err}`);
206
+ });
207
+ }
208
+
209
+ // Emit broadcaster event.
210
+ if (broadcaster && typeof broadcaster.broadcast === 'function') {
211
+ try {
212
+ broadcaster.broadcast('world.watchdog.tick', {
213
+ worldId,
214
+ verdict,
215
+ signals,
216
+ pid,
217
+ lastTickAt: tickAt,
218
+ });
219
+ } catch (err) {
220
+ log(`broadcast ${worldId} failed: ${err?.message ?? err}`);
221
+ }
222
+ }
223
+
224
+ return verdict;
225
+ }
226
+
227
+ /**
228
+ * One tick: get active worlds, probe each, return count processed.
229
+ *
230
+ * @returns {Promise<number>}
231
+ */
232
+ async function tick() {
233
+ if (stopped) return 0;
234
+ if (inFlight) {
235
+ debug('tick skipped: previous tick still in flight');
236
+ return 0;
237
+ }
238
+ inFlight = true;
239
+
240
+ let processed = 0;
241
+ try {
242
+ let worlds;
243
+ try {
244
+ worlds = await listActiveWorlds();
245
+ } catch (err) {
246
+ log(`listActiveWorlds failed: ${err?.message ?? err}`);
247
+ return 0;
248
+ }
249
+
250
+ for (const worldId of worlds) {
251
+ if (stopped) break;
252
+ if (typeof worldId !== 'string') continue;
253
+
254
+ try {
255
+ await probeWorld(worldId);
256
+ processed += 1;
257
+ } catch (err) {
258
+ // Per-world fail-soft: one bad world doesn't crash the loop.
259
+ debug(`probe ${worldId} failed: ${err?.message ?? err}`);
260
+ }
261
+ }
262
+ } finally {
263
+ inFlight = false;
264
+ }
265
+
266
+ return processed;
267
+ }
268
+
269
+ // Kick off an initial tick on next event-loop turn so callers can
270
+ // attach test spies before any probe work happens.
271
+ setImmediate(() => {
272
+ if (stopped) return;
273
+ void tick().catch((err) => {
274
+ log(`initial tick crashed: ${err?.message ?? err}`);
275
+ });
276
+ });
277
+
278
+ intervalHandle = setTimer(() => {
279
+ void tick().catch((err) => {
280
+ log(`tick crashed: ${err?.message ?? err}`);
281
+ });
282
+ }, intervalMs);
283
+ // Don't pin the event loop on shutdown.
284
+ if (intervalHandle && typeof intervalHandle.unref === 'function') {
285
+ intervalHandle.unref();
286
+ }
287
+
288
+ log(`started: interval=${intervalMs}ms`);
289
+
290
+ return {
291
+ stop() {
292
+ if (stopped) return;
293
+ stopped = true;
294
+ if (intervalHandle !== null) {
295
+ try { clearTimer(intervalHandle); } catch { /* ignore */ }
296
+ intervalHandle = null;
297
+ }
298
+ },
299
+
300
+ tickNow: tick,
301
+
302
+ /**
303
+ * Return the latest in-memory verdict entry for a world.
304
+ * Returns null if no tick has fired for this world yet.
305
+ *
306
+ * @param {string} worldId
307
+ * @returns {WorldWatchdogState|null}
308
+ */
309
+ getVerdict(worldId) {
310
+ return worldState.get(worldId) ?? null;
311
+ },
312
+ };
313
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pleri/olam-cli",
3
- "version": "0.1.196",
3
+ "version": "0.1.199",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "olam": "./bin/olam.cjs"