nemoris 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +49 -0
- package/LICENSE +21 -0
- package/README.md +209 -0
- package/SECURITY.md +119 -0
- package/bin/nemoris +46 -0
- package/config/agents/agent.toml.example +28 -0
- package/config/agents/default.toml +22 -0
- package/config/agents/orchestrator.toml +18 -0
- package/config/delivery.toml +73 -0
- package/config/embeddings.toml +5 -0
- package/config/identity/default-purpose.md +1 -0
- package/config/identity/default-soul.md +3 -0
- package/config/identity/orchestrator-purpose.md +1 -0
- package/config/identity/orchestrator-soul.md +1 -0
- package/config/improvement-targets.toml +15 -0
- package/config/jobs/heartbeat-check.toml +30 -0
- package/config/jobs/memory-rollup.toml +46 -0
- package/config/jobs/workspace-health.toml +63 -0
- package/config/mcp.toml +16 -0
- package/config/output-contracts.toml +17 -0
- package/config/peers.toml +32 -0
- package/config/peers.toml.example +32 -0
- package/config/policies/memory-default.toml +10 -0
- package/config/policies/memory-heartbeat.toml +5 -0
- package/config/policies/memory-ops.toml +10 -0
- package/config/policies/tools-heartbeat-minimal.toml +8 -0
- package/config/policies/tools-interactive-safe.toml +8 -0
- package/config/policies/tools-ops-bounded.toml +8 -0
- package/config/policies/tools-orchestrator.toml +7 -0
- package/config/providers/anthropic.toml +15 -0
- package/config/providers/ollama.toml +5 -0
- package/config/providers/openai-codex.toml +9 -0
- package/config/providers/openrouter.toml +5 -0
- package/config/router.toml +22 -0
- package/config/runtime.toml +114 -0
- package/config/skills/self-improvement.toml +15 -0
- package/config/skills/telegram-onboarding-spec.md +240 -0
- package/config/skills/workspace-monitor.toml +15 -0
- package/config/task-router.toml +42 -0
- package/install.sh +50 -0
- package/package.json +90 -0
- package/src/auth/auth-profiles.js +169 -0
- package/src/auth/openai-codex-oauth.js +285 -0
- package/src/battle.js +449 -0
- package/src/cli/help.js +265 -0
- package/src/cli/output-filter.js +49 -0
- package/src/cli/runtime-control.js +704 -0
- package/src/cli-main.js +2763 -0
- package/src/cli.js +78 -0
- package/src/config/loader.js +332 -0
- package/src/config/schema-validator.js +214 -0
- package/src/config/toml-lite.js +8 -0
- package/src/daemon/action-handlers.js +71 -0
- package/src/daemon/healing-tick.js +87 -0
- package/src/daemon/health-probes.js +90 -0
- package/src/daemon/notifier.js +57 -0
- package/src/daemon/nurse.js +218 -0
- package/src/daemon/repair-log.js +106 -0
- package/src/daemon/rule-staging.js +90 -0
- package/src/daemon/rules.js +29 -0
- package/src/daemon/telegram-commands.js +54 -0
- package/src/daemon/updater.js +85 -0
- package/src/jobs/job-runner.js +78 -0
- package/src/mcp/consumer.js +129 -0
- package/src/memory/active-recall.js +171 -0
- package/src/memory/backend-manager.js +97 -0
- package/src/memory/backends/file-backend.js +38 -0
- package/src/memory/backends/qmd-backend.js +219 -0
- package/src/memory/embedding-guards.js +24 -0
- package/src/memory/embedding-index.js +118 -0
- package/src/memory/embedding-service.js +179 -0
- package/src/memory/file-index.js +177 -0
- package/src/memory/memory-signature.js +5 -0
- package/src/memory/memory-store.js +648 -0
- package/src/memory/retrieval-planner.js +66 -0
- package/src/memory/scoring.js +145 -0
- package/src/memory/simhash.js +78 -0
- package/src/memory/sqlite-active-store.js +824 -0
- package/src/memory/write-policy.js +36 -0
- package/src/onboarding/aliases.js +33 -0
- package/src/onboarding/auth/api-key.js +224 -0
- package/src/onboarding/auth/ollama-detect.js +42 -0
- package/src/onboarding/clack-prompter.js +77 -0
- package/src/onboarding/doctor.js +530 -0
- package/src/onboarding/lock.js +42 -0
- package/src/onboarding/model-catalog.js +344 -0
- package/src/onboarding/phases/auth.js +589 -0
- package/src/onboarding/phases/build.js +130 -0
- package/src/onboarding/phases/choose.js +82 -0
- package/src/onboarding/phases/detect.js +98 -0
- package/src/onboarding/phases/hatch.js +216 -0
- package/src/onboarding/phases/identity.js +79 -0
- package/src/onboarding/phases/ollama.js +345 -0
- package/src/onboarding/phases/scaffold.js +99 -0
- package/src/onboarding/phases/telegram.js +377 -0
- package/src/onboarding/phases/validate.js +204 -0
- package/src/onboarding/phases/verify.js +206 -0
- package/src/onboarding/platform.js +482 -0
- package/src/onboarding/status-bar.js +95 -0
- package/src/onboarding/templates.js +794 -0
- package/src/onboarding/toml-writer.js +38 -0
- package/src/onboarding/tui.js +250 -0
- package/src/onboarding/uninstall.js +153 -0
- package/src/onboarding/wizard.js +499 -0
- package/src/providers/anthropic.js +168 -0
- package/src/providers/base.js +247 -0
- package/src/providers/circuit-breaker.js +136 -0
- package/src/providers/ollama.js +163 -0
- package/src/providers/openai-codex.js +149 -0
- package/src/providers/openrouter.js +136 -0
- package/src/providers/registry.js +36 -0
- package/src/providers/router.js +16 -0
- package/src/runtime/bootstrap-cache.js +47 -0
- package/src/runtime/capabilities-prompt.js +25 -0
- package/src/runtime/completion-ping.js +99 -0
- package/src/runtime/config-validator.js +121 -0
- package/src/runtime/context-ledger.js +360 -0
- package/src/runtime/cutover-readiness.js +42 -0
- package/src/runtime/daemon.js +729 -0
- package/src/runtime/delivery-ack.js +195 -0
- package/src/runtime/delivery-adapters/local-file.js +41 -0
- package/src/runtime/delivery-adapters/openclaw-cli.js +94 -0
- package/src/runtime/delivery-adapters/openclaw-peer.js +98 -0
- package/src/runtime/delivery-adapters/shadow.js +13 -0
- package/src/runtime/delivery-adapters/standalone-http.js +98 -0
- package/src/runtime/delivery-adapters/telegram.js +104 -0
- package/src/runtime/delivery-adapters/tui.js +128 -0
- package/src/runtime/delivery-manager.js +807 -0
- package/src/runtime/delivery-store.js +168 -0
- package/src/runtime/dependency-health.js +118 -0
- package/src/runtime/envelope.js +114 -0
- package/src/runtime/evaluation.js +1089 -0
- package/src/runtime/exec-approvals.js +216 -0
- package/src/runtime/executor.js +500 -0
- package/src/runtime/failure-ping.js +67 -0
- package/src/runtime/flows.js +83 -0
- package/src/runtime/guards.js +45 -0
- package/src/runtime/handoff.js +51 -0
- package/src/runtime/identity-cache.js +28 -0
- package/src/runtime/improvement-engine.js +109 -0
- package/src/runtime/improvement-harness.js +581 -0
- package/src/runtime/input-sanitiser.js +72 -0
- package/src/runtime/interaction-contract.js +347 -0
- package/src/runtime/lane-readiness.js +226 -0
- package/src/runtime/migration.js +323 -0
- package/src/runtime/model-resolution.js +78 -0
- package/src/runtime/network.js +64 -0
- package/src/runtime/notification-store.js +97 -0
- package/src/runtime/notifier.js +256 -0
- package/src/runtime/orchestrator.js +53 -0
- package/src/runtime/orphan-reaper.js +41 -0
- package/src/runtime/output-contract-schema.js +139 -0
- package/src/runtime/output-contract-validator.js +439 -0
- package/src/runtime/peer-readiness.js +69 -0
- package/src/runtime/peer-registry.js +133 -0
- package/src/runtime/pilot-status.js +108 -0
- package/src/runtime/prompt-builder.js +261 -0
- package/src/runtime/provider-attempt.js +582 -0
- package/src/runtime/report-fallback.js +71 -0
- package/src/runtime/result-normalizer.js +183 -0
- package/src/runtime/retention.js +74 -0
- package/src/runtime/review.js +244 -0
- package/src/runtime/route-job.js +15 -0
- package/src/runtime/run-store.js +38 -0
- package/src/runtime/schedule.js +88 -0
- package/src/runtime/scheduler-state.js +434 -0
- package/src/runtime/scheduler.js +656 -0
- package/src/runtime/session-compactor.js +182 -0
- package/src/runtime/session-search.js +155 -0
- package/src/runtime/slack-inbound.js +249 -0
- package/src/runtime/ssrf.js +102 -0
- package/src/runtime/status-aggregator.js +330 -0
- package/src/runtime/task-contract.js +140 -0
- package/src/runtime/task-packet.js +107 -0
- package/src/runtime/task-router.js +140 -0
- package/src/runtime/telegram-inbound.js +1565 -0
- package/src/runtime/token-counter.js +134 -0
- package/src/runtime/token-estimator.js +59 -0
- package/src/runtime/tool-loop.js +200 -0
- package/src/runtime/transport-server.js +311 -0
- package/src/runtime/tui-server.js +411 -0
- package/src/runtime/ulid.js +44 -0
- package/src/security/ssrf-check.js +197 -0
- package/src/setup.js +369 -0
- package/src/shadow/bridge.js +303 -0
- package/src/skills/loader.js +84 -0
- package/src/tools/catalog.json +49 -0
- package/src/tools/cli-delegate.js +44 -0
- package/src/tools/mcp-client.js +106 -0
- package/src/tools/micro/cancel-task.js +6 -0
- package/src/tools/micro/complete-task.js +6 -0
- package/src/tools/micro/fail-task.js +6 -0
- package/src/tools/micro/http-fetch.js +74 -0
- package/src/tools/micro/index.js +36 -0
- package/src/tools/micro/lcm-recall.js +60 -0
- package/src/tools/micro/list-dir.js +17 -0
- package/src/tools/micro/list-skills.js +46 -0
- package/src/tools/micro/load-skill.js +38 -0
- package/src/tools/micro/memory-search.js +45 -0
- package/src/tools/micro/read-file.js +11 -0
- package/src/tools/micro/session-search.js +54 -0
- package/src/tools/micro/shell-exec.js +43 -0
- package/src/tools/micro/trigger-job.js +79 -0
- package/src/tools/micro/web-search.js +58 -0
- package/src/tools/micro/workspace-paths.js +39 -0
- package/src/tools/micro/write-file.js +14 -0
- package/src/tools/micro/write-memory.js +41 -0
- package/src/tools/registry.js +348 -0
- package/src/tools/tool-result-contract.js +36 -0
- package/src/tui/chat.js +835 -0
- package/src/tui/renderer.js +175 -0
- package/src/tui/socket-client.js +217 -0
- package/src/utils/canonical-json.js +29 -0
- package/src/utils/compaction.js +30 -0
- package/src/utils/env-loader.js +5 -0
- package/src/utils/errors.js +80 -0
- package/src/utils/fs.js +101 -0
- package/src/utils/ids.js +5 -0
- package/src/utils/model-context-limits.js +30 -0
- package/src/utils/token-budget.js +74 -0
- package/src/utils/usage-cost.js +25 -0
- package/src/utils/usage-metrics.js +14 -0
- package/vendor/smol-toml-1.5.2.tgz +0 -0
|
@@ -0,0 +1,729 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { OpenClawShadowBridge } from "../shadow/bridge.js";
|
|
4
|
+
import { Scheduler } from "./scheduler.js";
|
|
5
|
+
import { Executor } from "./executor.js";
|
|
6
|
+
import { SchedulerStateStore } from "./scheduler-state.js";
|
|
7
|
+
import { computeNextRun, computeRunSlot, formatRunState, isDue, iso } from "./schedule.js";
|
|
8
|
+
import { DeliveryManager } from "./delivery-manager.js";
|
|
9
|
+
import { createRuntimeId } from "../utils/ids.js";
|
|
10
|
+
import { routeJob } from "./route-job.js";
|
|
11
|
+
import { HealingTick } from "../daemon/healing-tick.js";
|
|
12
|
+
import { RepairLog } from "../daemon/repair-log.js";
|
|
13
|
+
import { RuleStaging } from "../daemon/rule-staging.js";
|
|
14
|
+
|
|
15
|
+
export const DAEMON_LOCK_FILE = "daemon.lock";
|
|
16
|
+
|
|
17
|
+
function buildIdempotencyInstance(job, slotTime) {
|
|
18
|
+
if (!job.idempotencyKey) return null;
|
|
19
|
+
return `${job.idempotencyKey}:${iso(slotTime)}`;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function buildLeaseId(jobId, now) {
|
|
23
|
+
return `${jobId}:${now.getTime()}:${createRuntimeId("lease")}`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function buildRetryDelay(attempt) {
|
|
27
|
+
return Math.min(Math.max(attempt, 1), 5) * 60 * 1000;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function isStaleInProgress(entry, now, maxRuntimeSeconds) {
|
|
31
|
+
if (!entry.inProgressStartedAt) return true;
|
|
32
|
+
const startedAt = new Date(entry.inProgressStartedAt).getTime();
|
|
33
|
+
const leaseMs = Math.max((maxRuntimeSeconds || 30) * 2000, 60_000);
|
|
34
|
+
return startedAt + leaseMs <= now.getTime();
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function normalizeFatalError(error) {
|
|
38
|
+
if (error instanceof Error) {
|
|
39
|
+
return error.stack || error.message;
|
|
40
|
+
}
|
|
41
|
+
if (typeof error === "string") return error;
|
|
42
|
+
try {
|
|
43
|
+
return JSON.stringify(error);
|
|
44
|
+
} catch {
|
|
45
|
+
return String(error);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function isMissingProcessError(error) {
|
|
50
|
+
const message = String(error?.message || "");
|
|
51
|
+
return error?.code === "ESRCH" ||
|
|
52
|
+
error?.code === "ENOENT" ||
|
|
53
|
+
/no such process/i.test(message) ||
|
|
54
|
+
/process does not exist/i.test(message) ||
|
|
55
|
+
/could not find/i.test(message);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function readPidFromFile(filePath, fsImpl = fs) {
|
|
59
|
+
try {
|
|
60
|
+
const pid = Number.parseInt(fsImpl.readFileSync(filePath, "utf8").trim(), 10);
|
|
61
|
+
return Number.isInteger(pid) && pid > 0 ? pid : null;
|
|
62
|
+
} catch {
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function safeUnlink(filePath, fsImpl = fs) {
|
|
68
|
+
try {
|
|
69
|
+
fsImpl.unlinkSync(filePath);
|
|
70
|
+
} catch {
|
|
71
|
+
// Best effort cleanup.
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function removeOwnedFile(filePath, pid, fsImpl = fs) {
|
|
76
|
+
try {
|
|
77
|
+
if (readPidFromFile(filePath, fsImpl) === pid) {
|
|
78
|
+
fsImpl.unlinkSync(filePath);
|
|
79
|
+
}
|
|
80
|
+
} catch {
|
|
81
|
+
// Best effort cleanup.
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export function isProcessRunning(pid, { processImpl = process } = {}) {
|
|
86
|
+
if (!Number.isInteger(pid) || pid <= 0) {
|
|
87
|
+
return false;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
try {
|
|
91
|
+
processImpl.kill(pid, 0);
|
|
92
|
+
return true;
|
|
93
|
+
} catch (error) {
|
|
94
|
+
if (isMissingProcessError(error)) {
|
|
95
|
+
return false;
|
|
96
|
+
}
|
|
97
|
+
return true;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function emitLockHeldError({ lockFile, pidFile, fsImpl = fs, processImpl = process, stderr = process.stderr }) {
|
|
102
|
+
const pid = readPidFromFile(pidFile, fsImpl) ?? readPidFromFile(lockFile, fsImpl);
|
|
103
|
+
const suffix = pid ? ` (pid ${pid})` : "";
|
|
104
|
+
stderr.write(`Another daemon is already running${suffix}. Stop it first with: nemoris stop\n`);
|
|
105
|
+
processImpl.exit(1);
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export function acquireDaemonLifecycleLock({
|
|
110
|
+
stateDir,
|
|
111
|
+
pidFile = path.join(stateDir, "daemon.pid"),
|
|
112
|
+
fsImpl = fs,
|
|
113
|
+
processImpl = process,
|
|
114
|
+
stderr = process.stderr,
|
|
115
|
+
} = {}) {
|
|
116
|
+
const lockFile = path.join(stateDir, DAEMON_LOCK_FILE);
|
|
117
|
+
const pid = processImpl.pid;
|
|
118
|
+
fsImpl.mkdirSync(stateDir, { recursive: true });
|
|
119
|
+
|
|
120
|
+
let released = false;
|
|
121
|
+
let lockFd = null;
|
|
122
|
+
let usesFlock = typeof fsImpl.flockSync === "function";
|
|
123
|
+
|
|
124
|
+
const release = () => {
|
|
125
|
+
if (released) return;
|
|
126
|
+
released = true;
|
|
127
|
+
|
|
128
|
+
removeOwnedFile(pidFile, pid, fsImpl);
|
|
129
|
+
|
|
130
|
+
if (usesFlock) {
|
|
131
|
+
if (lockFd !== null) {
|
|
132
|
+
try {
|
|
133
|
+
fsImpl.flockSync(lockFd, "un");
|
|
134
|
+
} catch {
|
|
135
|
+
// Ignore lock release failures during shutdown.
|
|
136
|
+
}
|
|
137
|
+
try {
|
|
138
|
+
fsImpl.closeSync(lockFd);
|
|
139
|
+
} catch {
|
|
140
|
+
// Ignore fd close failures during shutdown.
|
|
141
|
+
}
|
|
142
|
+
lockFd = null;
|
|
143
|
+
}
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
removeOwnedFile(lockFile, pid, fsImpl);
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
if (usesFlock) {
|
|
151
|
+
try {
|
|
152
|
+
lockFd = fsImpl.openSync(lockFile, "w");
|
|
153
|
+
fsImpl.flockSync(lockFd, "ex_nb");
|
|
154
|
+
fsImpl.writeFileSync(lockFd, `${pid}\n`, "utf8");
|
|
155
|
+
fsImpl.writeFileSync(pidFile, `${pid}\n`, "utf8");
|
|
156
|
+
return { lockFile, pidFile, release };
|
|
157
|
+
} catch (error) {
|
|
158
|
+
if (lockFd !== null) {
|
|
159
|
+
try {
|
|
160
|
+
fsImpl.closeSync(lockFd);
|
|
161
|
+
} catch {
|
|
162
|
+
// Ignore close failures on failed lock acquisition.
|
|
163
|
+
}
|
|
164
|
+
lockFd = null;
|
|
165
|
+
}
|
|
166
|
+
if (error?.code === "EWOULDBLOCK" || error?.code === "EAGAIN") {
|
|
167
|
+
return emitLockHeldError({ lockFile, pidFile, fsImpl, processImpl, stderr });
|
|
168
|
+
}
|
|
169
|
+
throw error;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
usesFlock = false;
|
|
174
|
+
for (let attempt = 0; attempt < 2; attempt += 1) {
|
|
175
|
+
try {
|
|
176
|
+
const fd = fsImpl.openSync(lockFile, "wx");
|
|
177
|
+
try {
|
|
178
|
+
fsImpl.writeFileSync(fd, `${pid}\n`, "utf8");
|
|
179
|
+
} finally {
|
|
180
|
+
fsImpl.closeSync(fd);
|
|
181
|
+
}
|
|
182
|
+
fsImpl.writeFileSync(pidFile, `${pid}\n`, "utf8");
|
|
183
|
+
return { lockFile, pidFile, release };
|
|
184
|
+
} catch (error) {
|
|
185
|
+
if (error?.code !== "EEXIST") {
|
|
186
|
+
throw error;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const lockPid = readPidFromFile(lockFile, fsImpl) ?? readPidFromFile(pidFile, fsImpl);
|
|
190
|
+
if (isProcessRunning(lockPid, { processImpl })) {
|
|
191
|
+
return emitLockHeldError({ lockFile, pidFile, fsImpl, processImpl, stderr });
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
safeUnlink(lockFile, fsImpl);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return emitLockHeldError({ lockFile, pidFile, fsImpl, processImpl, stderr });
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
export class SchedulerDaemon {
|
|
202
|
+
constructor({ projectRoot, liveRoot, stateRoot, fetchImpl, deliveryAck, taskContract, telegramSendFn, envelopeStore } = {}) {
|
|
203
|
+
this.projectRoot = projectRoot;
|
|
204
|
+
this.liveRoot = liveRoot;
|
|
205
|
+
this.stateRoot = stateRoot;
|
|
206
|
+
this.scheduler = new Scheduler({ projectRoot, liveRoot, stateRoot });
|
|
207
|
+
this.executor = new Executor({ projectRoot, liveRoot, stateRoot, fetchImpl });
|
|
208
|
+
this.stateStore = new SchedulerStateStore({ rootDir: path.join(stateRoot, "scheduler") });
|
|
209
|
+
this.deliveryManager = new DeliveryManager({ projectRoot, liveRoot, stateRoot, fetchImpl, stateStore: this.stateStore });
|
|
210
|
+
this.deliveryAck = deliveryAck || null;
|
|
211
|
+
this.taskContract = taskContract || null;
|
|
212
|
+
this.telegramSendFn = telegramSendFn || null;
|
|
213
|
+
this.envelopeStore = envelopeStore || null;
|
|
214
|
+
this.maintenanceCycleCount = 0;
|
|
215
|
+
this.shuttingDown = false;
|
|
216
|
+
this.healingTick = null; // Initialized on first maintenance run
|
|
217
|
+
this.shutdownRequestedAt = null;
|
|
218
|
+
this.activeRuns = 0;
|
|
219
|
+
this.lastTickResult = null;
|
|
220
|
+
this.orchestrator = null;
|
|
221
|
+
this.mcpConsumer = null;
|
|
222
|
+
this.repairLog = null;
|
|
223
|
+
this.ruleStaging = null;
|
|
224
|
+
this._globalHandlersInstalled = false;
|
|
225
|
+
this._fatalExitInProgress = false;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
setOrchestrator(orchestrator) {
|
|
229
|
+
this.orchestrator = orchestrator;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
setMcpConsumer(consumer) {
|
|
233
|
+
this.mcpConsumer = consumer;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
_collectOpenSqliteHandles() {
|
|
237
|
+
const handles = [];
|
|
238
|
+
const seen = new Set();
|
|
239
|
+
const addHandle = (db, label) => {
|
|
240
|
+
if (!db || typeof db.exec !== "function" || seen.has(db)) return;
|
|
241
|
+
seen.add(db);
|
|
242
|
+
handles.push({ db, label });
|
|
243
|
+
};
|
|
244
|
+
|
|
245
|
+
addHandle(this.stateStore?.db, "stateStore");
|
|
246
|
+
addHandle(this.taskContract?.db, "taskContract");
|
|
247
|
+
addHandle(this.deliveryAck?.db, "deliveryAck");
|
|
248
|
+
addHandle(this.envelopeStore?.db, "envelopeStore");
|
|
249
|
+
addHandle(this.repairLog?.db, "repairLog");
|
|
250
|
+
addHandle(this.ruleStaging?.db, "ruleStaging");
|
|
251
|
+
|
|
252
|
+
const sqliteStores = this.scheduler?.memoryStore?.sqliteStores;
|
|
253
|
+
if (sqliteStores instanceof Map) {
|
|
254
|
+
for (const [dbPath, store] of sqliteStores.entries()) {
|
|
255
|
+
addHandle(store?.db, `memoryStore:${dbPath}`);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
return handles;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
checkpointOpenDbs() {
|
|
263
|
+
const results = [];
|
|
264
|
+
for (const { db, label } of this._collectOpenSqliteHandles()) {
|
|
265
|
+
try {
|
|
266
|
+
db.exec("PRAGMA wal_checkpoint(TRUNCATE);");
|
|
267
|
+
results.push({ label, ok: true });
|
|
268
|
+
} catch (error) {
|
|
269
|
+
results.push({ label, ok: false, error: error.message });
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
return results;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
_handleGlobalFailure(event, error) {
|
|
276
|
+
if (this._fatalExitInProgress) return;
|
|
277
|
+
this._fatalExitInProgress = true;
|
|
278
|
+
|
|
279
|
+
const normalizedError = normalizeFatalError(error);
|
|
280
|
+
console.error(JSON.stringify({ service: "scheduler_daemon", event, error: normalizedError }));
|
|
281
|
+
|
|
282
|
+
try {
|
|
283
|
+
this.repairLog?.write?.({
|
|
284
|
+
source: "scheduler_daemon",
|
|
285
|
+
type: "unhandled_crash",
|
|
286
|
+
action: "restart",
|
|
287
|
+
result: "escalated",
|
|
288
|
+
severity: "critical",
|
|
289
|
+
diagnosis: normalizedError,
|
|
290
|
+
context: JSON.stringify({ event }),
|
|
291
|
+
escalated: 1,
|
|
292
|
+
});
|
|
293
|
+
} catch {
|
|
294
|
+
// Best-effort only on fatal path.
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
this.checkpointOpenDbs();
|
|
298
|
+
process.exit(1);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
installGlobalHandlers() {
|
|
302
|
+
if (this._globalHandlersInstalled) return;
|
|
303
|
+
|
|
304
|
+
if (!this.repairLog && this.stateStore?.db) {
|
|
305
|
+
this.repairLog = new RepairLog(this.stateStore.db);
|
|
306
|
+
this.repairLog.ensureSchema();
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
this._globalHandlersInstalled = true;
|
|
310
|
+
this._onUnhandledRejection = (reason) => this._handleGlobalFailure("unhandledRejection", reason);
|
|
311
|
+
this._onUncaughtException = (error) => this._handleGlobalFailure("uncaughtException", error);
|
|
312
|
+
process.on("unhandledRejection", this._onUnhandledRejection);
|
|
313
|
+
process.on("uncaughtException", this._onUncaughtException);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
async checkMigrationBanner() {
|
|
317
|
+
if (this._bannerChecked || !this.liveRoot) return;
|
|
318
|
+
this._bannerChecked = true;
|
|
319
|
+
try {
|
|
320
|
+
const bridge = new OpenClawShadowBridge({ liveRoot: this.liveRoot });
|
|
321
|
+
const config = await bridge.loadConfig();
|
|
322
|
+
if (config.agents?.list?.length > 0) {
|
|
323
|
+
console.log(`[nemoris] OpenClaw detected at ${this.liveRoot} — /transfer to migrate`);
|
|
324
|
+
}
|
|
325
|
+
} catch { /* non-blocking */ }
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
async listDueJobs(now = new Date()) {
|
|
329
|
+
const [plans, state] = await Promise.all([this.scheduler.listJobPlans(), this.stateStore.load()]);
|
|
330
|
+
return plans.filter((job) => {
|
|
331
|
+
const current = state.jobs[job.id] || {};
|
|
332
|
+
const slotTime = computeRunSlot(job.trigger, now);
|
|
333
|
+
const idempotencyInstance = buildIdempotencyInstance(job, slotTime);
|
|
334
|
+
if (current.inProgress && !isStaleInProgress(current, now, job.budget?.maxRuntimeSeconds)) {
|
|
335
|
+
return false;
|
|
336
|
+
}
|
|
337
|
+
if (idempotencyInstance && current.lastIdempotencyKey === idempotencyInstance && current.lastStatus === "ok") {
|
|
338
|
+
return false;
|
|
339
|
+
}
|
|
340
|
+
return isDue(job.trigger, current.lastRunAt, current.nextRunAt, now);
|
|
341
|
+
});
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
async tick({ now = new Date(), mode = "dry-run" } = {}) {
|
|
345
|
+
await this.checkMigrationBanner();
|
|
346
|
+
if (this.shuttingDown) {
|
|
347
|
+
const earlyResult = {
|
|
348
|
+
timestamp: iso(now),
|
|
349
|
+
maxConcurrentJobs: 0,
|
|
350
|
+
maxJobsPerTick: 0,
|
|
351
|
+
dueCount: 0,
|
|
352
|
+
results: []
|
|
353
|
+
};
|
|
354
|
+
this.lastTickResult = earlyResult;
|
|
355
|
+
return earlyResult;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
const runtime = await this.scheduler.loadRuntime();
|
|
359
|
+
const maxJobsPerTick = runtime.runtime?.concurrency?.maxJobsPerTick ?? runtime.runtime?.concurrency?.maxConcurrentJobs ?? 2;
|
|
360
|
+
const dueJobs = (await this.listDueJobs(now)).slice(0, maxJobsPerTick);
|
|
361
|
+
const results = [];
|
|
362
|
+
const maintenance = await this.runMaintenance(runtime, now);
|
|
363
|
+
const allDueJobs = await this.listDueJobs(now);
|
|
364
|
+
const deferredCount = Math.max(allDueJobs.length - dueJobs.length, 0);
|
|
365
|
+
|
|
366
|
+
for (const job of dueJobs) {
|
|
367
|
+
const slotTime = computeRunSlot(job.trigger, now);
|
|
368
|
+
const idempotencyInstance = buildIdempotencyInstance(job, slotTime);
|
|
369
|
+
const leaseId = buildLeaseId(job.id, now);
|
|
370
|
+
const startedState = await this.stateStore.mutateJob(job.id, (current) => {
|
|
371
|
+
if (current.inProgress && !isStaleInProgress(current, now, job.budget?.maxRuntimeSeconds)) {
|
|
372
|
+
return current;
|
|
373
|
+
}
|
|
374
|
+
if (idempotencyInstance && current.lastIdempotencyKey === idempotencyInstance && current.lastStatus === "ok") {
|
|
375
|
+
return current;
|
|
376
|
+
}
|
|
377
|
+
if (!isDue(job.trigger, current.lastRunAt, current.nextRunAt, now)) {
|
|
378
|
+
return current;
|
|
379
|
+
}
|
|
380
|
+
const retrySlotKey = current.retrySlotKey || null;
|
|
381
|
+
const retryAttempt = retrySlotKey === idempotencyInstance ? current.retryAttempt || 0 : 0;
|
|
382
|
+
return {
|
|
383
|
+
...current,
|
|
384
|
+
inProgress: true,
|
|
385
|
+
inProgressStartedAt: iso(now),
|
|
386
|
+
currentIdempotencyKey: idempotencyInstance,
|
|
387
|
+
currentLeaseId: leaseId,
|
|
388
|
+
retrySlotKey: retrySlotKey === idempotencyInstance ? retrySlotKey : current.retrySlotKey || null,
|
|
389
|
+
retryAttempt
|
|
390
|
+
};
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
if (!startedState.inProgress || startedState.currentLeaseId !== leaseId) {
|
|
394
|
+
results.push({
|
|
395
|
+
jobId: job.id,
|
|
396
|
+
status: "skipped",
|
|
397
|
+
reason: startedState.inProgress ? "already_claimed" : "not_due",
|
|
398
|
+
state: formatRunState(startedState)
|
|
399
|
+
});
|
|
400
|
+
continue;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
if (this.shuttingDown) {
|
|
404
|
+
const jobState = await this.stateStore.updateJob(job.id, {
|
|
405
|
+
inProgress: false,
|
|
406
|
+
inProgressStartedAt: null,
|
|
407
|
+
currentIdempotencyKey: null,
|
|
408
|
+
currentLeaseId: null,
|
|
409
|
+
abandonedAt: iso(now),
|
|
410
|
+
abandonedReason: "shutdown_requested_before_execution"
|
|
411
|
+
});
|
|
412
|
+
results.push({
|
|
413
|
+
jobId: job.id,
|
|
414
|
+
status: "abandoned",
|
|
415
|
+
reason: "shutdown_requested_before_execution",
|
|
416
|
+
state: formatRunState(jobState)
|
|
417
|
+
});
|
|
418
|
+
continue;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
try {
|
|
422
|
+
this.activeRuns += 1;
|
|
423
|
+
const run = await routeJob({
|
|
424
|
+
jobId: job.id,
|
|
425
|
+
mode,
|
|
426
|
+
orchestrator: this.orchestrator,
|
|
427
|
+
executor: this.executor,
|
|
428
|
+
skipOrchestrator: job.skipOrchestrator || false,
|
|
429
|
+
shadowImport: true,
|
|
430
|
+
idempotencyKey: idempotencyInstance,
|
|
431
|
+
});
|
|
432
|
+
const nextRunAt = computeNextRun(job.trigger, now);
|
|
433
|
+
const jobState = await this.stateStore.updateJob(job.id, {
|
|
434
|
+
lastRunAt: iso(now),
|
|
435
|
+
nextRunAt: iso(nextRunAt),
|
|
436
|
+
lastStatus: "ok",
|
|
437
|
+
lastRunFile: run.filePath,
|
|
438
|
+
lastError: null,
|
|
439
|
+
consecutiveFailures: 0,
|
|
440
|
+
inProgress: false,
|
|
441
|
+
inProgressStartedAt: null,
|
|
442
|
+
currentIdempotencyKey: null,
|
|
443
|
+
currentLeaseId: null,
|
|
444
|
+
lastIdempotencyKey: idempotencyInstance,
|
|
445
|
+
retryAttempt: 0,
|
|
446
|
+
retrySlotKey: null
|
|
447
|
+
});
|
|
448
|
+
results.push({
|
|
449
|
+
jobId: job.id,
|
|
450
|
+
status: "ok",
|
|
451
|
+
runFile: run.filePath,
|
|
452
|
+
notificationFiles: run.notificationFiles || [],
|
|
453
|
+
state: formatRunState(jobState)
|
|
454
|
+
});
|
|
455
|
+
} catch (error) {
|
|
456
|
+
const existing = (await this.stateStore.load()).jobs[job.id] || {};
|
|
457
|
+
const currentAttempt =
|
|
458
|
+
existing.retrySlotKey === idempotencyInstance ? (existing.retryAttempt || 0) + 1 : 1;
|
|
459
|
+
const maxAttempts = job.retry?.maxAttempts ?? 1;
|
|
460
|
+
const retryScheduled = currentAttempt < maxAttempts;
|
|
461
|
+
const nextRunAt = retryScheduled
|
|
462
|
+
? new Date(now.getTime() + buildRetryDelay(currentAttempt))
|
|
463
|
+
: computeNextRun(job.trigger, now);
|
|
464
|
+
const jobState = await this.stateStore.updateJob(job.id, {
|
|
465
|
+
lastRunAt: iso(now),
|
|
466
|
+
nextRunAt: iso(nextRunAt),
|
|
467
|
+
lastStatus: "error",
|
|
468
|
+
lastError: error.message,
|
|
469
|
+
consecutiveFailures: (existing.consecutiveFailures || 0) + 1,
|
|
470
|
+
inProgress: false,
|
|
471
|
+
inProgressStartedAt: null,
|
|
472
|
+
currentIdempotencyKey: null,
|
|
473
|
+
currentLeaseId: null,
|
|
474
|
+
retryAttempt: retryScheduled ? currentAttempt : 0,
|
|
475
|
+
retrySlotKey: retryScheduled ? idempotencyInstance : null
|
|
476
|
+
});
|
|
477
|
+
results.push({
|
|
478
|
+
jobId: job.id,
|
|
479
|
+
status: "error",
|
|
480
|
+
error: error.message,
|
|
481
|
+
retryScheduled,
|
|
482
|
+
retryAttempt: currentAttempt,
|
|
483
|
+
runFile: error.runFile || null,
|
|
484
|
+
state: formatRunState(jobState)
|
|
485
|
+
});
|
|
486
|
+
} finally {
|
|
487
|
+
this.activeRuns = Math.max(0, this.activeRuns - 1);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
const tickResult = {
|
|
492
|
+
timestamp: iso(now),
|
|
493
|
+
maxConcurrentJobs: maxJobsPerTick,
|
|
494
|
+
maxJobsPerTick,
|
|
495
|
+
executionMode: "sequential",
|
|
496
|
+
selectedJobCount: dueJobs.length,
|
|
497
|
+
executedJobCount: results.filter((item) => item.status !== "skipped").length,
|
|
498
|
+
deferredJobCount: deferredCount,
|
|
499
|
+
sequentialExecution: true,
|
|
500
|
+
dueCount: dueJobs.length,
|
|
501
|
+
maintenance,
|
|
502
|
+
results
|
|
503
|
+
};
|
|
504
|
+
this.lastTickResult = tickResult;
|
|
505
|
+
return tickResult;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
getLastTickResult() {
|
|
509
|
+
return this.lastTickResult;
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
async runMaintenance(runtime, now = new Date()) {
|
|
513
|
+
const config = runtime.runtime?.maintenance || {};
|
|
514
|
+
const walThresholdBytes = Number(config.walCheckpointThresholdBytes ?? 64 * 1024 * 1024);
|
|
515
|
+
const pruneOnTick = config.pruneOnTick ?? true;
|
|
516
|
+
const sweepHandoffsOnTick = config.sweepPendingHandoffsOnTick ?? true;
|
|
517
|
+
const sweepFollowUpsOnTick = config.sweepPendingFollowUpsOnTick ?? true;
|
|
518
|
+
const agents = Object.keys(runtime.agents || {});
|
|
519
|
+
const wal = [];
|
|
520
|
+
for (const agentId of agents) {
|
|
521
|
+
try {
|
|
522
|
+
const result = await this.scheduler.memoryStore.manageSqlite(agentId, {
|
|
523
|
+
thresholdBytes: walThresholdBytes
|
|
524
|
+
});
|
|
525
|
+
wal.push({ agentId, ...result });
|
|
526
|
+
} catch (error) {
|
|
527
|
+
wal.push({ agentId, action: "error", error: error.message });
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
const handoffs = sweepHandoffsOnTick
|
|
532
|
+
? await this.deliveryManager.sweepPendingHandoffs({
|
|
533
|
+
now,
|
|
534
|
+
timeoutMinutes: runtime.runtime?.handoffs?.pendingTimeoutMinutes,
|
|
535
|
+
escalateOnExpiry: runtime.runtime?.handoffs?.escalateOnExpiry,
|
|
536
|
+
escalationDeliveryProfile: runtime.runtime?.handoffs?.escalationDeliveryProfile
|
|
537
|
+
})
|
|
538
|
+
: { timestamp: iso(now), pendingCount: 0, expiredCount: 0, results: [] };
|
|
539
|
+
|
|
540
|
+
const followUps = sweepFollowUpsOnTick
|
|
541
|
+
? await this.deliveryManager.sweepPendingFollowUps({
|
|
542
|
+
now,
|
|
543
|
+
timeoutMinutes: runtime.runtime?.followUps?.pendingTimeoutMinutes,
|
|
544
|
+
maxFollowUpDepth: runtime.runtime?.followUps?.maxFollowUpDepth,
|
|
545
|
+
escalateOnExpiry: runtime.runtime?.followUps?.escalateOnExpiry,
|
|
546
|
+
escalationDeliveryProfile: runtime.runtime?.followUps?.escalationDeliveryProfile
|
|
547
|
+
})
|
|
548
|
+
: { timestamp: iso(now), pendingCount: 0, expiredCount: 0, results: [] };
|
|
549
|
+
|
|
550
|
+
// Purge expired follow-up completion records
|
|
551
|
+
try {
|
|
552
|
+
await this.stateStore.ensureReady();
|
|
553
|
+
this.stateStore.purgeExpiredFollowUps(now.toISOString());
|
|
554
|
+
} catch (_err) {
|
|
555
|
+
// Non-fatal — log and continue
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
// Self-healing maintenance
|
|
559
|
+
let healing = { expiredRules: 0, prunedEntries: 0, promoted: 0 };
|
|
560
|
+
try {
|
|
561
|
+
if (!this.healingTick) {
|
|
562
|
+
await this.stateStore.ensureReady();
|
|
563
|
+
const repairLog = new RepairLog(this.stateStore.db);
|
|
564
|
+
repairLog.ensureSchema();
|
|
565
|
+
const ruleStaging = new RuleStaging(this.stateStore.db);
|
|
566
|
+
ruleStaging.ensureSchema();
|
|
567
|
+
this.repairLog = repairLog;
|
|
568
|
+
this.ruleStaging = ruleStaging;
|
|
569
|
+
this.healingTick = new HealingTick({
|
|
570
|
+
repairLog,
|
|
571
|
+
ruleStaging,
|
|
572
|
+
executeAction: async (action, context) => {
|
|
573
|
+
// Action execution is wired in Chunk 8 (action handlers)
|
|
574
|
+
console.log(JSON.stringify({ service: "healing_daemon", event: "action_executed", action, context, timestamp: new Date().toISOString() }));
|
|
575
|
+
return true;
|
|
576
|
+
},
|
|
577
|
+
notify: this.telegramSendFn
|
|
578
|
+
? async (severity, message) => { if (severity !== "silent") await this.telegramSendFn(message); }
|
|
579
|
+
: async () => {}
|
|
580
|
+
});
|
|
581
|
+
}
|
|
582
|
+
healing = this.healingTick.runMaintenanceSweep();
|
|
583
|
+
} catch (err) {
|
|
584
|
+
console.log(JSON.stringify({ service: "healing_daemon", event: "maintenance_error", error: err.message, timestamp: new Date().toISOString() }));
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
const pruning = pruneOnTick
|
|
588
|
+
? await Promise.all([
|
|
589
|
+
this.executor.runStore.prune(),
|
|
590
|
+
this.deliveryManager.notificationStore.prune(),
|
|
591
|
+
this.deliveryManager.deliveryStore.prune()
|
|
592
|
+
])
|
|
593
|
+
: [];
|
|
594
|
+
|
|
595
|
+
// Sweep pending decisions (Pillar 2)
|
|
596
|
+
let pendingDecisions = { rePinged: 0, parked: 0 };
|
|
597
|
+
if (this.deliveryAck) {
|
|
598
|
+
try {
|
|
599
|
+
pendingDecisions = await this.deliveryAck.sweepPendingDecisions({
|
|
600
|
+
now,
|
|
601
|
+
sendFn: this.telegramSendFn || (() => {}),
|
|
602
|
+
});
|
|
603
|
+
} catch (_err) { /* non-fatal */ }
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// Sweep timed-out tasks (Pillar 3)
|
|
607
|
+
let taskSweep = { timedOut: 0 };
|
|
608
|
+
if (this.taskContract) {
|
|
609
|
+
try {
|
|
610
|
+
const overdue = this.taskContract.sweepTimedOutTasks();
|
|
611
|
+
taskSweep = { timedOut: overdue.length };
|
|
612
|
+
} catch (_err) { /* non-fatal */ }
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// Prune old envelopes (30-day retention)
|
|
616
|
+
let envelopePruned = 0;
|
|
617
|
+
if (this.envelopeStore) {
|
|
618
|
+
try {
|
|
619
|
+
envelopePruned = this.envelopeStore.prune(30);
|
|
620
|
+
} catch (_err) { /* non-fatal */ }
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
// Embedding maintenance — run every 5th cycle to avoid overhead
|
|
624
|
+
this.maintenanceCycleCount += 1;
|
|
625
|
+
let embeddingMaintenance = { ran: false, cycle: this.maintenanceCycleCount };
|
|
626
|
+
if (this.maintenanceCycleCount % 5 === 0 && this.scheduler.embeddingIndex) {
|
|
627
|
+
const staleAgents = [];
|
|
628
|
+
for (const agentId of agents) {
|
|
629
|
+
try {
|
|
630
|
+
const health = await this.scheduler.memoryStore.getEmbeddingHealth(agentId);
|
|
631
|
+
const staleCount = health.embeddingHealth?.staleCount || 0;
|
|
632
|
+
const missingCount = health.embeddingHealth?.missingCount || 0;
|
|
633
|
+
if (staleCount > 0 || missingCount > 0) {
|
|
634
|
+
staleAgents.push({ agentId, staleCount, missingCount });
|
|
635
|
+
}
|
|
636
|
+
} catch { /* non-fatal */ }
|
|
637
|
+
}
|
|
638
|
+
if (staleAgents.length > 0) {
|
|
639
|
+
// Fire-and-forget background rebuilds
|
|
640
|
+
for (const { agentId } of staleAgents) {
|
|
641
|
+
this.scheduler.memoryStore
|
|
642
|
+
.rebuildEmbeddings(agentId, { embeddingIndex: this.scheduler.embeddingIndex })
|
|
643
|
+
.catch((err) => {
|
|
644
|
+
console.warn(`[nemoris] embedding rebuild failed for ${agentId}: ${err?.message || err}`);
|
|
645
|
+
});
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
embeddingMaintenance = { ran: true, cycle: this.maintenanceCycleCount, staleAgents };
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
const state = {
|
|
652
|
+
timestamp: iso(now),
|
|
653
|
+
wal,
|
|
654
|
+
handoffs,
|
|
655
|
+
followUps,
|
|
656
|
+
pruning,
|
|
657
|
+
pendingDecisions,
|
|
658
|
+
taskSweep,
|
|
659
|
+
envelopePruned,
|
|
660
|
+
embeddingMaintenance,
|
|
661
|
+
healing
|
|
662
|
+
};
|
|
663
|
+
await this.stateStore.setMeta("maintenance", state);
|
|
664
|
+
return state;
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
requestShutdown(now = new Date()) {
|
|
668
|
+
this.shuttingDown = true;
|
|
669
|
+
this.shutdownRequestedAt = iso(now);
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
async waitForIdle(timeoutMs = 15000) {
|
|
673
|
+
const deadline = Date.now() + timeoutMs;
|
|
674
|
+
while (this.activeRuns > 0 && Date.now() < deadline) {
|
|
675
|
+
await new Promise((resolve) => setTimeout(resolve, 25));
|
|
676
|
+
}
|
|
677
|
+
return {
|
|
678
|
+
idle: this.activeRuns === 0,
|
|
679
|
+
activeRuns: this.activeRuns
|
|
680
|
+
};
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
async close(options = {}) {
|
|
684
|
+
this.requestShutdown();
|
|
685
|
+
await this.waitForIdle(options.drainTimeoutMs || 15000);
|
|
686
|
+
if (this.mcpConsumer) {
|
|
687
|
+
try { await this.mcpConsumer.shutdown(); } catch { /* best-effort */ }
|
|
688
|
+
}
|
|
689
|
+
this.stateStore.close();
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
/**
|
|
694
|
+
* Single pass of the interactive drain loop. Reads all queued interactive jobs,
|
|
695
|
+
* marks them running, executes each via the callback, and marks succeeded/failed.
|
|
696
|
+
*
|
|
697
|
+
* Exported for testability. The daemon's interactiveDrainLoop() calls this
|
|
698
|
+
* in a loop with a 100ms sleep between passes.
|
|
699
|
+
*
|
|
700
|
+
* @param {import('./scheduler-state.js').SchedulerStateStore} stateStore
|
|
701
|
+
* @param {(job: object) => Promise<object>} processJob
|
|
702
|
+
* @returns {Promise<Array<{jobId: string, status: string, error?: string}>>}
|
|
703
|
+
*/
|
|
704
|
+
export async function drainInteractiveOnce(stateStore, processJob, options = {}) {
|
|
705
|
+
const jobs = stateStore.drainQueuedInteractiveJobs();
|
|
706
|
+
const results = [];
|
|
707
|
+
|
|
708
|
+
for (const job of jobs) {
|
|
709
|
+
try {
|
|
710
|
+
const outcome = await processJob(job);
|
|
711
|
+
if (outcome?.requeue) {
|
|
712
|
+
stateStore.updateInteractiveJobStatus(job.job_id, "queued");
|
|
713
|
+
results.push({ jobId: job.job_id, status: "requeued", reason: outcome.reason || "requeued" });
|
|
714
|
+
continue;
|
|
715
|
+
}
|
|
716
|
+
stateStore.updateInteractiveJobStatus(job.job_id, "succeeded");
|
|
717
|
+
results.push({ jobId: job.job_id, status: "succeeded" });
|
|
718
|
+
} catch (error) {
|
|
719
|
+
console.error(JSON.stringify({ service: "interactive_drain", jobId: job.job_id, error: error.message, stack: error.stack?.split("\n").slice(0, 5) }));
|
|
720
|
+
stateStore.updateInteractiveJobStatus(job.job_id, "failed");
|
|
721
|
+
results.push({ jobId: job.job_id, status: "failed", error: error.message });
|
|
722
|
+
if (options.onJobFailure) {
|
|
723
|
+
try { await options.onJobFailure(job, error); } catch (_) {}
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
return results;
|
|
729
|
+
}
|