mcp-coordinator 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/server/backup.d.ts +7 -0
- package/dist/cli/server/backup.js +162 -0
- package/dist/cli/server/index.js +5 -0
- package/dist/cli/server/restore.d.ts +2 -0
- package/dist/cli/server/restore.js +117 -0
- package/dist/src/consultation.d.ts +8 -0
- package/dist/src/consultation.js +8 -0
- package/dist/src/db-adapter.d.ts +30 -0
- package/dist/src/db-adapter.js +32 -1
- package/dist/src/dependency-map.js +2 -2
- package/dist/src/file-tracker.d.ts +10 -0
- package/dist/src/file-tracker.js +32 -0
- package/dist/src/http/handle-health.d.ts +23 -0
- package/dist/src/http/handle-health.js +86 -0
- package/dist/src/impact-scorer.js +87 -50
- package/dist/src/metrics.d.ts +83 -0
- package/dist/src/metrics.js +162 -0
- package/dist/src/mqtt-bridge.d.ts +19 -0
- package/dist/src/mqtt-bridge.js +53 -5
- package/dist/src/serve-http.js +35 -1
- package/dist/src/server-setup.d.ts +2 -0
- package/dist/src/server-setup.js +10 -2
- package/dist/src/sse-emitter.d.ts +6 -0
- package/dist/src/sse-emitter.js +50 -2
- package/package.json +3 -1
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
/**
|
|
3
|
+
* Check whether the coordinator daemon appears to be running.
|
|
4
|
+
* Returns the pid if alive, or null otherwise.
|
|
5
|
+
*/
|
|
6
|
+
export declare function getRunningCoordinatorPid(configDir: string): number | null;
|
|
7
|
+
export declare function createServerBackupCommand(): Command;
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
import { existsSync, readFileSync, statSync } from "fs";
|
|
3
|
+
import { join, resolve, basename } from "path";
|
|
4
|
+
import { create as tarCreate } from "tar";
|
|
5
|
+
import { getConfigDir, loadConfig } from "../config.js";
|
|
6
|
+
/**
|
|
7
|
+
* Format a timestamp suitable for filenames: YYYY-MM-DD-HHMMSS (UTC).
|
|
8
|
+
*/
|
|
9
|
+
function timestampSlug(date = new Date()) {
|
|
10
|
+
const pad = (n) => String(n).padStart(2, "0");
|
|
11
|
+
return (`${date.getUTCFullYear()}-${pad(date.getUTCMonth() + 1)}-${pad(date.getUTCDate())}` +
|
|
12
|
+
`-${pad(date.getUTCHours())}${pad(date.getUTCMinutes())}${pad(date.getUTCSeconds())}`);
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Returns true if a process with the given pid is currently alive.
|
|
16
|
+
* On both POSIX and Windows, sending signal 0 acts as a liveness probe
|
|
17
|
+
* (it raises ESRCH if the process is dead, EPERM if alive but not ours).
|
|
18
|
+
*/
|
|
19
|
+
function isProcessAlive(pid) {
|
|
20
|
+
try {
|
|
21
|
+
process.kill(pid, 0);
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
catch (err) {
|
|
25
|
+
// EPERM means the process exists but we lack permission — still "alive".
|
|
26
|
+
if (err.code === "EPERM")
|
|
27
|
+
return true;
|
|
28
|
+
return false;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Check whether the coordinator daemon appears to be running.
|
|
33
|
+
* Returns the pid if alive, or null otherwise.
|
|
34
|
+
*/
|
|
35
|
+
export function getRunningCoordinatorPid(configDir) {
|
|
36
|
+
const pidPath = join(configDir, "server.pid");
|
|
37
|
+
if (!existsSync(pidPath))
|
|
38
|
+
return null;
|
|
39
|
+
const raw = readFileSync(pidPath, "utf-8").trim();
|
|
40
|
+
const pid = parseInt(raw, 10);
|
|
41
|
+
if (Number.isNaN(pid))
|
|
42
|
+
return null;
|
|
43
|
+
return isProcessAlive(pid) ? pid : null;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Recursively walk a directory and yield relative file paths.
|
|
47
|
+
* Used purely for reporting (file count) — tar handles the actual packing.
|
|
48
|
+
*/
|
|
49
|
+
async function countFiles(root) {
|
|
50
|
+
const { readdir, stat } = await import("fs/promises");
|
|
51
|
+
let count = 0;
|
|
52
|
+
const walk = async (dir) => {
|
|
53
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
54
|
+
for (const entry of entries) {
|
|
55
|
+
const full = join(dir, entry.name);
|
|
56
|
+
if (entry.isDirectory()) {
|
|
57
|
+
await walk(full);
|
|
58
|
+
}
|
|
59
|
+
else if (entry.isFile()) {
|
|
60
|
+
count += 1;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
if (existsSync(root))
|
|
65
|
+
await walk(root);
|
|
66
|
+
return count;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Build the list of entries (relative to configDir) we want to include in the
|
|
70
|
+
* tarball. We deliberately keep the layout flat — the same paths used at
|
|
71
|
+
* runtime — so a `tar -xzf` into `~/.mcp-coordinator/` is a valid restore.
|
|
72
|
+
*
|
|
73
|
+
* NOTE on live backups: this command refuses to run while the coordinator is
|
|
74
|
+
* up because better-sqlite3's WAL journal may have uncommitted writes that
|
|
75
|
+
* file-copy will miss. For online backups, switch to SQLite's Online Backup
|
|
76
|
+
* API (`db.backup(path)` from better-sqlite3) and snapshot config.json
|
|
77
|
+
* separately — see docs/superpowers/working/v04/backup-integration.md.
|
|
78
|
+
*/
|
|
79
|
+
function buildEntries(configDir, dataDirAbsolute) {
|
|
80
|
+
const entries = [];
|
|
81
|
+
if (existsSync(join(configDir, "config.json")))
|
|
82
|
+
entries.push("config.json");
|
|
83
|
+
// The data dir might live outside ~/.mcp-coordinator (custom --data-dir).
|
|
84
|
+
// tar's `cwd` option can only point at one directory, so when the data dir
|
|
85
|
+
// is non-default we pack it under its absolute path inside the archive
|
|
86
|
+
// (preserving structure for round-trip restore via --data-dir).
|
|
87
|
+
const defaultDataDir = join(configDir, "data");
|
|
88
|
+
if (resolve(dataDirAbsolute) === resolve(defaultDataDir) && existsSync(defaultDataDir)) {
|
|
89
|
+
entries.push("data");
|
|
90
|
+
}
|
|
91
|
+
return entries;
|
|
92
|
+
}
|
|
93
|
+
export function createServerBackupCommand() {
|
|
94
|
+
return new Command("backup")
|
|
95
|
+
.description("Snapshot the coordinator config + SQLite database to a tar.gz archive")
|
|
96
|
+
.option("--output <path>", "Output tarball path (default ./mcp-coordinator-backup-<ts>.tar.gz)")
|
|
97
|
+
.option("--data-dir <path>", "Data directory to back up (overrides config.server.data_dir)")
|
|
98
|
+
.option("--force", "Skip the running-coordinator safety check")
|
|
99
|
+
.action(async (opts) => {
|
|
100
|
+
const configDir = getConfigDir();
|
|
101
|
+
const config = loadConfig(configDir);
|
|
102
|
+
const dataDir = resolve(opts.dataDir ?? process.env.COORDINATOR_DATA_DIR ?? config.server.data_dir);
|
|
103
|
+
// Safety: refuse when the daemon is up. WAL writes might be in flight.
|
|
104
|
+
const runningPid = getRunningCoordinatorPid(configDir);
|
|
105
|
+
if (runningPid !== null && !opts.force) {
|
|
106
|
+
console.error(`Coordinator is running (PID ${runningPid}).`);
|
|
107
|
+
console.error("Refusing to back up: live SQLite WAL writes may be in flight.");
|
|
108
|
+
console.error("Either stop it first ('mcp-coordinator server stop') or pass --force.");
|
|
109
|
+
process.exit(1);
|
|
110
|
+
}
|
|
111
|
+
if (!existsSync(configDir)) {
|
|
112
|
+
console.error(`No coordinator config directory at ${configDir} — nothing to back up.`);
|
|
113
|
+
process.exit(1);
|
|
114
|
+
}
|
|
115
|
+
const ts = timestampSlug();
|
|
116
|
+
const outputPath = resolve(opts.output ?? join(process.cwd(), `mcp-coordinator-backup-${ts}.tar.gz`));
|
|
117
|
+
const defaultDataDir = join(configDir, "data");
|
|
118
|
+
const dataIsCustom = resolve(dataDir) !== resolve(defaultDataDir);
|
|
119
|
+
// Pack ~/.mcp-coordinator entries from configDir as cwd.
|
|
120
|
+
const entries = buildEntries(configDir, dataDir);
|
|
121
|
+
// For custom data dirs, pack them under their absolute path so restore
|
|
122
|
+
// can reproduce the original location (or be redirected with --data-dir).
|
|
123
|
+
const customDataEntries = [];
|
|
124
|
+
if (dataIsCustom && existsSync(dataDir)) {
|
|
125
|
+
customDataEntries.push({ cwd: resolve(dataDir, ".."), entry: basename(dataDir) });
|
|
126
|
+
}
|
|
127
|
+
if (entries.length === 0 && customDataEntries.length === 0) {
|
|
128
|
+
console.error("Nothing to back up: no config.json and no data directory found.");
|
|
129
|
+
process.exit(1);
|
|
130
|
+
}
|
|
131
|
+
// First archive pass: config + default data (if any).
|
|
132
|
+
if (entries.length > 0) {
|
|
133
|
+
await tarCreate({ gzip: true, file: outputPath, cwd: configDir, portable: true }, entries);
|
|
134
|
+
}
|
|
135
|
+
// Second pass for a custom data dir — append into the same archive.
|
|
136
|
+
// tar's gzip mode doesn't support append, so we re-create instead by
|
|
137
|
+
// extracting the previous entries and re-packing. Simpler path: emit
|
|
138
|
+
// a sibling .data.tar.gz when custom dir is in use, and document it.
|
|
139
|
+
if (customDataEntries.length > 0) {
|
|
140
|
+
const dataArchive = outputPath.replace(/\.tar\.gz$/, ".data.tar.gz");
|
|
141
|
+
for (const { cwd, entry } of customDataEntries) {
|
|
142
|
+
await tarCreate({ gzip: true, file: dataArchive, cwd, portable: true }, [entry]);
|
|
143
|
+
}
|
|
144
|
+
console.log(`Custom data dir packed separately: ${dataArchive}`);
|
|
145
|
+
}
|
|
146
|
+
// outputPath only exists if entries.length > 0; report on whichever
|
|
147
|
+
// archive(s) we actually produced.
|
|
148
|
+
const reportPath = entries.length > 0
|
|
149
|
+
? outputPath
|
|
150
|
+
: outputPath.replace(/\.tar\.gz$/, ".data.tar.gz");
|
|
151
|
+
const sizeBytes = existsSync(reportPath) ? statSync(reportPath).size : 0;
|
|
152
|
+
const sizeMB = (sizeBytes / (1024 * 1024)).toFixed(2);
|
|
153
|
+
const fileCount = (existsSync(join(configDir, "config.json")) ? 1 : 0) +
|
|
154
|
+
(await countFiles(dataIsCustom ? dataDir : defaultDataDir));
|
|
155
|
+
console.log("Backup complete.");
|
|
156
|
+
console.log(` Archive: ${reportPath}`);
|
|
157
|
+
console.log(` Size: ${sizeMB} MB (${sizeBytes} bytes)`);
|
|
158
|
+
console.log(` Files: ${fileCount}`);
|
|
159
|
+
console.log(` ConfigDir: ${configDir}`);
|
|
160
|
+
console.log(` DataDir: ${dataDir}${dataIsCustom ? " (custom)" : ""}`);
|
|
161
|
+
});
|
|
162
|
+
}
|
package/dist/cli/server/index.js
CHANGED
|
@@ -3,11 +3,16 @@ import { createServerStartCommand } from "./start.js";
|
|
|
3
3
|
import { createServerStopCommand } from "./stop.js";
|
|
4
4
|
import { createServerStatusCommand } from "./status.js";
|
|
5
5
|
import { createServerLogsCommand } from "./logs.js";
|
|
6
|
+
import { createServerBackupCommand } from "./backup.js";
|
|
7
|
+
import { createServerRestoreCommand } from "./restore.js";
|
|
6
8
|
export function createServerProgram() {
|
|
7
9
|
const server = new Command("server").description("Manage the coordination server");
|
|
8
10
|
server.addCommand(createServerStartCommand());
|
|
9
11
|
server.addCommand(createServerStopCommand());
|
|
10
12
|
server.addCommand(createServerStatusCommand());
|
|
11
13
|
server.addCommand(createServerLogsCommand());
|
|
14
|
+
// v0.4 Operability
|
|
15
|
+
server.addCommand(createServerBackupCommand());
|
|
16
|
+
server.addCommand(createServerRestoreCommand());
|
|
12
17
|
return server;
|
|
13
18
|
}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
import { existsSync, mkdirSync, renameSync, statSync } from "fs";
|
|
3
|
+
import { resolve } from "path";
|
|
4
|
+
import { extract as tarExtract, list as tarList } from "tar";
|
|
5
|
+
import { getConfigDir } from "../config.js";
|
|
6
|
+
import { getRunningCoordinatorPid } from "./backup.js";
|
|
7
|
+
function timestampSlug(date = new Date()) {
|
|
8
|
+
const pad = (n) => String(n).padStart(2, "0");
|
|
9
|
+
return (`${date.getUTCFullYear()}-${pad(date.getUTCMonth() + 1)}-${pad(date.getUTCDate())}` +
|
|
10
|
+
`-${pad(date.getUTCHours())}${pad(date.getUTCMinutes())}${pad(date.getUTCSeconds())}`);
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Inspect the tarball without extracting and return the list of top-level
|
|
14
|
+
* entries (file or dir names). Used to validate structure before touching
|
|
15
|
+
* the user's existing config dir.
|
|
16
|
+
*/
|
|
17
|
+
async function listTarballEntries(tarPath) {
|
|
18
|
+
const entries = [];
|
|
19
|
+
await tarList({
|
|
20
|
+
file: tarPath,
|
|
21
|
+
onReadEntry: (entry) => {
|
|
22
|
+
// Trailing slash on dirs — strip it, take the first path segment only.
|
|
23
|
+
const head = entry.path.replace(/\\/g, "/").split("/")[0];
|
|
24
|
+
if (head && !entries.includes(head))
|
|
25
|
+
entries.push(head);
|
|
26
|
+
},
|
|
27
|
+
});
|
|
28
|
+
return entries;
|
|
29
|
+
}
|
|
30
|
+
export function createServerRestoreCommand() {
|
|
31
|
+
return new Command("restore")
|
|
32
|
+
.description("Restore a coordinator config + database snapshot from a tar.gz archive")
|
|
33
|
+
.argument("<tarball>", "Path to the backup .tar.gz produced by 'mcp-coordinator server backup'")
|
|
34
|
+
.option("--force", "Skip the running-coordinator safety check")
|
|
35
|
+
.option("--no-backup", "Do not snapshot the existing config dir before overwriting")
|
|
36
|
+
.option("--data-dir <path>", "Override data directory (rarely needed)")
|
|
37
|
+
.action(async (tarballArg, opts) => {
|
|
38
|
+
const tarPath = resolve(tarballArg);
|
|
39
|
+
if (!existsSync(tarPath)) {
|
|
40
|
+
console.error(`Tarball not found: ${tarPath}`);
|
|
41
|
+
process.exit(1);
|
|
42
|
+
}
|
|
43
|
+
const tarStat = statSync(tarPath);
|
|
44
|
+
if (!tarStat.isFile()) {
|
|
45
|
+
console.error(`Not a regular file: ${tarPath}`);
|
|
46
|
+
process.exit(1);
|
|
47
|
+
}
|
|
48
|
+
const configDir = getConfigDir();
|
|
49
|
+
// Safety: refuse when the daemon is running so we don't clobber an
|
|
50
|
+
// open SQLite handle (would corrupt the WAL on the daemon side).
|
|
51
|
+
const runningPid = getRunningCoordinatorPid(configDir);
|
|
52
|
+
if (runningPid !== null && !opts.force) {
|
|
53
|
+
console.error(`Coordinator is running (PID ${runningPid}).`);
|
|
54
|
+
console.error("Refusing to restore: stop the coordinator first or pass --force.");
|
|
55
|
+
process.exit(1);
|
|
56
|
+
}
|
|
57
|
+
// Validate tarball contents BEFORE moving anything aside.
|
|
58
|
+
let entries;
|
|
59
|
+
try {
|
|
60
|
+
entries = await listTarballEntries(tarPath);
|
|
61
|
+
}
|
|
62
|
+
catch (err) {
|
|
63
|
+
console.error(`Failed to read tarball: ${err.message}`);
|
|
64
|
+
process.exit(1);
|
|
65
|
+
}
|
|
66
|
+
const hasConfig = entries.includes("config.json");
|
|
67
|
+
const hasData = entries.includes("data");
|
|
68
|
+
if (!hasConfig && !hasData) {
|
|
69
|
+
console.error("Tarball does not contain expected entries (config.json or data/).");
|
|
70
|
+
console.error(`Top-level entries found: ${entries.join(", ") || "(none)"}`);
|
|
71
|
+
process.exit(1);
|
|
72
|
+
}
|
|
73
|
+
// Snapshot the existing config dir before overwriting.
|
|
74
|
+
// commander auto-flips `--no-backup` -> opts.backup = false. The default
|
|
75
|
+
// value is `true` (the option is registered with .option("--no-backup")
|
|
76
|
+
// below, which makes commander seed `opts.backup = true`).
|
|
77
|
+
const shouldSnapshot = opts.backup !== false;
|
|
78
|
+
let snapshotPath = null;
|
|
79
|
+
if (shouldSnapshot && existsSync(configDir)) {
|
|
80
|
+
snapshotPath = `${configDir}.bak-${timestampSlug()}`;
|
|
81
|
+
renameSync(configDir, snapshotPath);
|
|
82
|
+
console.log(`Existing config moved aside: ${snapshotPath}`);
|
|
83
|
+
}
|
|
84
|
+
// Recreate the target dir and extract.
|
|
85
|
+
mkdirSync(configDir, { recursive: true });
|
|
86
|
+
try {
|
|
87
|
+
await tarExtract({ file: tarPath, cwd: configDir });
|
|
88
|
+
}
|
|
89
|
+
catch (err) {
|
|
90
|
+
console.error(`Extraction failed: ${err.message}`);
|
|
91
|
+
// Try to roll back the snapshot if we made one.
|
|
92
|
+
if (snapshotPath !== null && existsSync(snapshotPath)) {
|
|
93
|
+
// Best-effort: only roll back if extraction created an empty dir.
|
|
94
|
+
try {
|
|
95
|
+
renameSync(configDir, `${configDir}.failed-${timestampSlug()}`);
|
|
96
|
+
renameSync(snapshotPath, configDir);
|
|
97
|
+
console.error("Rolled back to previous config dir.");
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
console.error(`Manual recovery required — snapshot at: ${snapshotPath}`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
process.exit(1);
|
|
104
|
+
}
|
|
105
|
+
console.log("Restore complete.");
|
|
106
|
+
console.log(` Source: ${tarPath}`);
|
|
107
|
+
console.log(` ConfigDir: ${configDir}`);
|
|
108
|
+
console.log(` Restored: ${entries.filter((e) => e === "config.json" || e === "data").join(", ")}`);
|
|
109
|
+
if (snapshotPath !== null) {
|
|
110
|
+
console.log(` Previous: ${snapshotPath} (delete once verified)`);
|
|
111
|
+
}
|
|
112
|
+
if (opts.dataDir !== undefined) {
|
|
113
|
+
console.log(` Note: --data-dir was provided but restore extracts to default location.\n` +
|
|
114
|
+
` Update config.json or COORDINATOR_DATA_DIR if you need a non-default path.`);
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
}
|
|
@@ -74,6 +74,14 @@ export declare class Consultation {
|
|
|
74
74
|
* parsing the thread list themselves.
|
|
75
75
|
*/
|
|
76
76
|
assigned_to_me?: string;
|
|
77
|
+
/**
|
|
78
|
+
* P2 perf: bound resolved-thread queries to a recency window. Without
|
|
79
|
+
* this, the impact scorer would scan all-time resolved threads on every
|
|
80
|
+
* announce_work call (O(historical-threads) per scoring pass). The window
|
|
81
|
+
* applies to resolved_at when status='resolved', otherwise to created_at,
|
|
82
|
+
* so the filter is meaningful for both states.
|
|
83
|
+
*/
|
|
84
|
+
since_minutes?: number;
|
|
77
85
|
}): Thread[];
|
|
78
86
|
getThreadUpdates(agentId: string, since?: string): ThreadMessage[];
|
|
79
87
|
logActionSummary(params: {
|
package/dist/src/consultation.js
CHANGED
|
@@ -320,6 +320,14 @@ export class Consultation {
|
|
|
320
320
|
sql += " AND (assigned_to IS NULL OR assigned_to = ?)";
|
|
321
321
|
params.push(filters.assigned_to_me);
|
|
322
322
|
}
|
|
323
|
+
if (typeof filters.since_minutes === "number") {
|
|
324
|
+
// For resolved threads, gate on resolved_at (the moment that matters
|
|
325
|
+
// for "recent enough to still influence scoring"). For open/resolving
|
|
326
|
+
// threads, gate on created_at since they have no resolved_at yet.
|
|
327
|
+
// COALESCE picks the right column per row.
|
|
328
|
+
sql += " AND COALESCE(resolved_at, created_at) > datetime('now', '-' || ? || ' minutes')";
|
|
329
|
+
params.push(filters.since_minutes);
|
|
330
|
+
}
|
|
323
331
|
sql += " ORDER BY created_at DESC";
|
|
324
332
|
return db.prepare(sql).all(...params);
|
|
325
333
|
}
|
package/dist/src/db-adapter.d.ts
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Database adapter surface.
|
|
3
|
+
*
|
|
4
|
+
* Design intent: this file is the *contract* both `createBetterSqlite3` and
|
|
5
|
+
* `createBunSqlite` (in `database.ts`) implement. The interfaces are a strict
|
|
6
|
+
* subset of better-sqlite3's API that Bun:sqlite also satisfies, so callers
|
|
7
|
+
* stay portable across both runtimes.
|
|
8
|
+
*
|
|
9
|
+
* Helpers (e.g. `withTransaction`) live here so portable code paths can use
|
|
10
|
+
* one canonical entry point without each call site re-deriving the
|
|
11
|
+
* `db.transaction(fn)()` two-step pattern.
|
|
12
|
+
*/
|
|
1
13
|
export interface RunResult {
|
|
2
14
|
changes: number;
|
|
3
15
|
lastInsertRowid: number;
|
|
@@ -13,3 +25,21 @@ export interface DatabaseAdapter {
|
|
|
13
25
|
close(): void;
|
|
14
26
|
transaction<T>(fn: () => T): () => T;
|
|
15
27
|
}
|
|
28
|
+
/**
|
|
29
|
+
* Run `fn` inside a single SQLite transaction and return its result.
|
|
30
|
+
*
|
|
31
|
+
* Replaces the verbose two-step pattern:
|
|
32
|
+
*
|
|
33
|
+
* const tx = db.transaction(() => { ...; return value; });
|
|
34
|
+
* const value = tx();
|
|
35
|
+
*
|
|
36
|
+
* with:
|
|
37
|
+
*
|
|
38
|
+
* const value = withTransaction(db, () => { ...; return value; });
|
|
39
|
+
*
|
|
40
|
+
* Errors thrown inside `fn` propagate to the caller and the transaction is
|
|
41
|
+
* rolled back by the underlying driver (better-sqlite3 / bun:sqlite both do
|
|
42
|
+
* this). Use this for any read-modify-write block where multiple statements
|
|
43
|
+
* must be atomic.
|
|
44
|
+
*/
|
|
45
|
+
export declare function withTransaction<T>(db: DatabaseAdapter, fn: () => T): T;
|
package/dist/src/db-adapter.js
CHANGED
|
@@ -1 +1,32 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Database adapter surface.
|
|
3
|
+
*
|
|
4
|
+
* Design intent: this file is the *contract* both `createBetterSqlite3` and
|
|
5
|
+
* `createBunSqlite` (in `database.ts`) implement. The interfaces are a strict
|
|
6
|
+
* subset of better-sqlite3's API that Bun:sqlite also satisfies, so callers
|
|
7
|
+
* stay portable across both runtimes.
|
|
8
|
+
*
|
|
9
|
+
* Helpers (e.g. `withTransaction`) live here so portable code paths can use
|
|
10
|
+
* one canonical entry point without each call site re-deriving the
|
|
11
|
+
* `db.transaction(fn)()` two-step pattern.
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Run `fn` inside a single SQLite transaction and return its result.
|
|
15
|
+
*
|
|
16
|
+
* Replaces the verbose two-step pattern:
|
|
17
|
+
*
|
|
18
|
+
* const tx = db.transaction(() => { ...; return value; });
|
|
19
|
+
* const value = tx();
|
|
20
|
+
*
|
|
21
|
+
* with:
|
|
22
|
+
*
|
|
23
|
+
* const value = withTransaction(db, () => { ...; return value; });
|
|
24
|
+
*
|
|
25
|
+
* Errors thrown inside `fn` propagate to the caller and the transaction is
|
|
26
|
+
* rolled back by the underlying driver (better-sqlite3 / bun:sqlite both do
|
|
27
|
+
* this). Use this for any read-modify-write block where multiple statements
|
|
28
|
+
* must be atomic.
|
|
29
|
+
*/
|
|
30
|
+
export function withTransaction(db, fn) {
|
|
31
|
+
return db.transaction(fn)();
|
|
32
|
+
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { getDb } from "./database.js";
|
|
2
|
+
import { withTransaction } from "./db-adapter.js";
|
|
2
3
|
export class DependencyMapper {
|
|
3
4
|
getMap() {
|
|
4
5
|
const db = getDb();
|
|
@@ -20,12 +21,11 @@ export class DependencyMapper {
|
|
|
20
21
|
VALUES (?, ?, ?, ?)
|
|
21
22
|
ON CONFLICT(module_id) DO UPDATE SET
|
|
22
23
|
depends_on = excluded.depends_on, exports = excluded.exports, owners = excluded.owners`);
|
|
23
|
-
|
|
24
|
+
withTransaction(db, () => {
|
|
24
25
|
for (const [id, info] of Object.entries(map)) {
|
|
25
26
|
stmt.run(id, JSON.stringify(info.depends_on), JSON.stringify(info.exports), JSON.stringify(info.owners));
|
|
26
27
|
}
|
|
27
28
|
});
|
|
28
|
-
tx();
|
|
29
29
|
}
|
|
30
30
|
getModuleInfo(moduleId) {
|
|
31
31
|
const db = getDb();
|
|
@@ -17,5 +17,15 @@ export declare class FileTracker {
|
|
|
17
17
|
conflict: boolean;
|
|
18
18
|
agents: string[];
|
|
19
19
|
};
|
|
20
|
+
/**
|
|
21
|
+
* P2 perf: batch lookup of recent file→agents activity. Replaces N
|
|
22
|
+
* `checkFileConflict` calls (one per file) with a single SQL query, then
|
|
23
|
+
* builds an in-memory reverse index. The impact scorer uses this so its
|
|
24
|
+
* per-file inner loop is O(1) Map.get() rather than O(F) SQL round-trips.
|
|
25
|
+
*
|
|
26
|
+
* Excludes the calling agent (so the scorer doesn't flag the announcer
|
|
27
|
+
* against themselves). Returns Map<file_path, Set<agent_id>>.
|
|
28
|
+
*/
|
|
29
|
+
getFileToAgentsIndex(filePaths: string[], excludeAgentId: string, withinMinutes?: number): Map<string, Set<string>>;
|
|
20
30
|
fileToModule(filePath: string): string;
|
|
21
31
|
}
|
package/dist/src/file-tracker.js
CHANGED
|
@@ -31,6 +31,38 @@ export class FileTracker {
|
|
|
31
31
|
AND created_at > datetime('now', '-' || ? || ' minutes')`).all(filePath, agentId, withinMinutes);
|
|
32
32
|
return { conflict: rows.length > 0, agents: rows.map((r) => r.agent_id) };
|
|
33
33
|
}
|
|
34
|
+
/**
|
|
35
|
+
* P2 perf: batch lookup of recent file→agents activity. Replaces N
|
|
36
|
+
* `checkFileConflict` calls (one per file) with a single SQL query, then
|
|
37
|
+
* builds an in-memory reverse index. The impact scorer uses this so its
|
|
38
|
+
* per-file inner loop is O(1) Map.get() rather than O(F) SQL round-trips.
|
|
39
|
+
*
|
|
40
|
+
* Excludes the calling agent (so the scorer doesn't flag the announcer
|
|
41
|
+
* against themselves). Returns Map<file_path, Set<agent_id>>.
|
|
42
|
+
*/
|
|
43
|
+
getFileToAgentsIndex(filePaths, excludeAgentId, withinMinutes = 30) {
|
|
44
|
+
const index = new Map();
|
|
45
|
+
if (filePaths.length === 0)
|
|
46
|
+
return index;
|
|
47
|
+
const db = getDb();
|
|
48
|
+
// Dynamic IN-list — better-sqlite3 binds each ? positionally. Cheap because
|
|
49
|
+
// the impact scorer only passes target_files + depends_on_files (typically
|
|
50
|
+
// a handful of files per announce_work call).
|
|
51
|
+
const placeholders = filePaths.map(() => "?").join(",");
|
|
52
|
+
const rows = db.prepare(`SELECT DISTINCT file_path, agent_id FROM file_activity
|
|
53
|
+
WHERE file_path IN (${placeholders})
|
|
54
|
+
AND agent_id != ?
|
|
55
|
+
AND created_at > datetime('now', '-' || ? || ' minutes')`).all(...filePaths, excludeAgentId, withinMinutes);
|
|
56
|
+
for (const r of rows) {
|
|
57
|
+
let set = index.get(r.file_path);
|
|
58
|
+
if (!set) {
|
|
59
|
+
set = new Set();
|
|
60
|
+
index.set(r.file_path, set);
|
|
61
|
+
}
|
|
62
|
+
set.add(r.agent_id);
|
|
63
|
+
}
|
|
64
|
+
return index;
|
|
65
|
+
}
|
|
34
66
|
fileToModule(filePath) {
|
|
35
67
|
// Strip leading / so "/server/src/x.ts" and "server/src/x.ts" produce the
|
|
36
68
|
// same module name. Without this, split("/") on an absolute path yields
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { IncomingMessage, ServerResponse } from "http";
|
|
2
|
+
import type { CoordinatorServices } from "../server-setup.js";
|
|
3
|
+
/**
|
|
4
|
+
* Liveness probe — process is alive. Always returns 200 with no dep checks
|
|
5
|
+
* so orchestrators don't restart the pod over transient downstream failures.
|
|
6
|
+
*/
|
|
7
|
+
export declare function handleLivez(_req: IncomingMessage, res: ServerResponse): void;
|
|
8
|
+
/**
|
|
9
|
+
* Readiness probe — downstream deps must all be green for the LB to route
|
|
10
|
+
* traffic here. 503 when any check fails so the pod is drained until ready.
|
|
11
|
+
*
|
|
12
|
+
* Each check is wrapped in try/catch so a thrown DB/MQTT error becomes a
|
|
13
|
+
* structured `{ok:false,error:"…"}` instead of a 500. The response shape is
|
|
14
|
+
* identical between 200 and 503 so consumers can parse uniformly.
|
|
15
|
+
*/
|
|
16
|
+
export declare function handleReadyz(_req: IncomingMessage, res: ServerResponse, services: Pick<CoordinatorServices, "mqttBridge">): void;
|
|
17
|
+
/**
|
|
18
|
+
* Backwards-compatible alias. The original /health route returned a fixed
|
|
19
|
+
* {status:"ok",version} payload with no dep checks; semantically that is a
|
|
20
|
+
* liveness probe, so we delegate. Anything that polled /health for "is the
|
|
21
|
+
* process up" continues to work without changes.
|
|
22
|
+
*/
|
|
23
|
+
export declare function handleHealth(req: IncomingMessage, res: ServerResponse): void;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { getDb } from "../database.js";
|
|
2
|
+
import { json } from "./utils.js";
|
|
3
|
+
import { getVersion } from "../../cli/version.js";
|
|
4
|
+
/**
|
|
5
|
+
* v0.4 Operability: Kubernetes-style health probes.
|
|
6
|
+
*
|
|
7
|
+
* - /livez → is the process alive? Used by an orchestrator (k8s, systemd,
|
|
8
|
+
* docker swarm) to decide whether to restart the pod. MUST NOT
|
|
9
|
+
* check downstream deps; an unreachable DB does not mean the
|
|
10
|
+
* coordinator process should be killed and restarted.
|
|
11
|
+
*
|
|
12
|
+
* - /readyz → are downstream deps ready? Used by a load balancer / service
|
|
13
|
+
* mesh to decide whether to add this pod to rotation. Returns 503
|
|
14
|
+
* when the DB or MQTT broker is not reachable so the LB drains
|
|
15
|
+
* traffic until the coordinator can actually serve it.
|
|
16
|
+
*
|
|
17
|
+
* - /health → backwards-compat alias for /livez. The original stub returned
|
|
18
|
+
* {status:"ok",version} unconditionally; preserving alive-only
|
|
19
|
+
* semantics keeps existing dashboards and uptime probes green
|
|
20
|
+
* without forcing them to migrate.
|
|
21
|
+
*/
|
|
22
|
+
const STARTED_AT_MS = Date.now();
|
|
23
|
+
const VERSION = getVersion();
|
|
24
|
+
function uptimeSeconds() {
|
|
25
|
+
return Math.floor((Date.now() - STARTED_AT_MS) / 1000);
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Liveness probe — process is alive. Always returns 200 with no dep checks
|
|
29
|
+
* so orchestrators don't restart the pod over transient downstream failures.
|
|
30
|
+
*/
|
|
31
|
+
export function handleLivez(_req, res) {
|
|
32
|
+
json(res, {
|
|
33
|
+
status: "alive",
|
|
34
|
+
uptime_seconds: uptimeSeconds(),
|
|
35
|
+
version: VERSION,
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Readiness probe — downstream deps must all be green for the LB to route
|
|
40
|
+
* traffic here. 503 when any check fails so the pod is drained until ready.
|
|
41
|
+
*
|
|
42
|
+
* Each check is wrapped in try/catch so a thrown DB/MQTT error becomes a
|
|
43
|
+
* structured `{ok:false,error:"…"}` instead of a 500. The response shape is
|
|
44
|
+
* identical between 200 and 503 so consumers can parse uniformly.
|
|
45
|
+
*/
|
|
46
|
+
export function handleReadyz(_req, res, services) {
|
|
47
|
+
const checks = {
|
|
48
|
+
db: { ok: false },
|
|
49
|
+
mqtt: { ok: false },
|
|
50
|
+
};
|
|
51
|
+
try {
|
|
52
|
+
// Cheapest possible round-trip that exercises the connection without
|
|
53
|
+
// touching application tables. Throws if the handle is closed or the
|
|
54
|
+
// file is locked beyond busy_timeout.
|
|
55
|
+
getDb().prepare("SELECT 1").get();
|
|
56
|
+
checks.db.ok = true;
|
|
57
|
+
}
|
|
58
|
+
catch (err) {
|
|
59
|
+
checks.db.error = err.message;
|
|
60
|
+
}
|
|
61
|
+
try {
|
|
62
|
+
if (services.mqttBridge.isConnected()) {
|
|
63
|
+
checks.mqtt.ok = true;
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
checks.mqtt.error = "not connected";
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
catch (err) {
|
|
70
|
+
checks.mqtt.error = err.message;
|
|
71
|
+
}
|
|
72
|
+
const allOk = checks.db.ok && checks.mqtt.ok;
|
|
73
|
+
json(res, {
|
|
74
|
+
status: allOk ? "ready" : "not_ready",
|
|
75
|
+
checks,
|
|
76
|
+
}, allOk ? 200 : 503);
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Backwards-compatible alias. The original /health route returned a fixed
|
|
80
|
+
* {status:"ok",version} payload with no dep checks; semantically that is a
|
|
81
|
+
* liveness probe, so we delegate. Anything that polled /health for "is the
|
|
82
|
+
* process up" continues to work without changes.
|
|
83
|
+
*/
|
|
84
|
+
export function handleHealth(req, res) {
|
|
85
|
+
return handleLivez(req, res);
|
|
86
|
+
}
|