iriai-build 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/iriai-build.js +78 -0
- package/bridge-v3.js +98 -0
- package/cli/bootstrap.js +83 -0
- package/cli/commands/implementation.js +64 -0
- package/cli/commands/index.js +46 -0
- package/cli/commands/launch.js +153 -0
- package/cli/commands/plan.js +117 -0
- package/cli/commands/setup.js +80 -0
- package/cli/commands/slack.js +97 -0
- package/cli/commands/transfer.js +111 -0
- package/cli/config.js +92 -0
- package/cli/display.js +121 -0
- package/cli/terminal-input.js +666 -0
- package/cli/wait.js +82 -0
- package/index.js +1488 -0
- package/lib/agent-process.js +170 -0
- package/lib/bridge-state.js +126 -0
- package/lib/constants.js +137 -0
- package/lib/health-monitor.js +113 -0
- package/lib/prompt-builder.js +565 -0
- package/lib/signal-watcher.js +215 -0
- package/lib/slack-helpers.js +224 -0
- package/lib/state-machines/feature-lead.js +408 -0
- package/lib/state-machines/operator-agent.js +173 -0
- package/lib/state-machines/planning-role.js +161 -0
- package/lib/state-machines/role-agent.js +186 -0
- package/lib/state-machines/team-orchestrator.js +160 -0
- package/package.json +31 -0
- package/v3/.handover-html-evidence.md +35 -0
- package/v3/KICKOFF-HTML-EVIDENCE.md +98 -0
- package/v3/PLAN-HTML-EVIDENCE-HARDENING.md +603 -0
- package/v3/adapters/desktop-adapter.js +78 -0
- package/v3/adapters/interface.js +146 -0
- package/v3/adapters/slack-adapter.js +608 -0
- package/v3/adapters/slack-helpers.js +179 -0
- package/v3/adapters/terminal-adapter.js +249 -0
- package/v3/agent-supervisor.js +320 -0
- package/v3/artifact-portal.js +1184 -0
- package/v3/bridge.db +0 -0
- package/v3/constants.js +170 -0
- package/v3/db.js +76 -0
- package/v3/file-io.js +216 -0
- package/v3/helpers.js +174 -0
- package/v3/operator.js +364 -0
- package/v3/orchestrator.js +2886 -0
- package/v3/plan-compiler.js +440 -0
- package/v3/prompt-builder.js +849 -0
- package/v3/queries.js +461 -0
- package/v3/recovery.js +508 -0
- package/v3/review-sessions.js +360 -0
- package/v3/roles/accessibility-auditor/CLAUDE.md +50 -0
- package/v3/roles/analytics-engineer/CLAUDE.md +40 -0
- package/v3/roles/architect/CLAUDE.md +809 -0
- package/v3/roles/backend-implementer/CLAUDE.md +97 -0
- package/v3/roles/code-reviewer/CLAUDE.md +89 -0
- package/v3/roles/database-implementer/CLAUDE.md +97 -0
- package/v3/roles/deployer/CLAUDE.md +42 -0
- package/v3/roles/designer/CLAUDE.md +386 -0
- package/v3/roles/documentation/CLAUDE.md +40 -0
- package/v3/roles/feature-lead/CLAUDE.md +233 -0
- package/v3/roles/frontend-implementer/CLAUDE.md +97 -0
- package/v3/roles/implementer/CLAUDE.md +97 -0
- package/v3/roles/integration-tester/CLAUDE.md +174 -0
- package/v3/roles/observability-engineer/CLAUDE.md +40 -0
- package/v3/roles/operator/CLAUDE.md +322 -0
- package/v3/roles/orchestrator/CLAUDE.md +288 -0
- package/v3/roles/package-implementer/CLAUDE.md +47 -0
- package/v3/roles/performance-analyst/CLAUDE.md +49 -0
- package/v3/roles/plan-compiler/CLAUDE.md +163 -0
- package/v3/roles/planning-lead/CLAUDE.md +41 -0
- package/v3/roles/pm/CLAUDE.md +806 -0
- package/v3/roles/regression-tester/CLAUDE.md +135 -0
- package/v3/roles/release-manager/CLAUDE.md +43 -0
- package/v3/roles/security-auditor/CLAUDE.md +90 -0
- package/v3/roles/smoke-tester/CLAUDE.md +97 -0
- package/v3/roles/test-author/CLAUDE.md +42 -0
- package/v3/roles/verifier/CLAUDE.md +90 -0
- package/v3/schema.sql +134 -0
- package/v3/slack-adapter.js +510 -0
- package/v3/slack-helpers.js +346 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
// agent-process.js — Process wrapper for claude agent sessions.
|
|
2
|
+
//
|
|
3
|
+
// Two modes:
|
|
4
|
+
// spawn() — bash script (legacy, used by index.js)
|
|
5
|
+
// spawnClaude() — claude CLI directly (v3 bridge)
|
|
6
|
+
//
|
|
7
|
+
// Uses child_process.spawn instead of node-pty to avoid blocking
|
|
8
|
+
// the parent's event loop (node-pty interferes with WebSocket I/O).
|
|
9
|
+
|
|
10
|
+
import { EventEmitter } from "node:events";
|
|
11
|
+
import { spawn as cpSpawn } from "node:child_process";
|
|
12
|
+
import fs from "node:fs";
|
|
13
|
+
import path from "node:path";
|
|
14
|
+
import { getClaudeBin } from "../cli/config.js";
|
|
15
|
+
|
|
16
|
+
class AgentProcess extends EventEmitter {
|
|
17
|
+
/**
|
|
18
|
+
* @param {object} opts
|
|
19
|
+
* @param {string} opts.key - Unique identifier (e.g. "fl-slug", "role-slug-0-backend")
|
|
20
|
+
* @param {string} [opts.script] - Path to bash script (for spawn())
|
|
21
|
+
* @param {string[]} [opts.args] - Script arguments (for spawn())
|
|
22
|
+
* @param {string} opts.cwd - Working directory
|
|
23
|
+
* @param {object} [opts.extraEnv] - Extra env vars (merged with process.env)
|
|
24
|
+
* @param {string} opts.signalDir - Signal directory (for log file)
|
|
25
|
+
*/
|
|
26
|
+
constructor({ key, script, args, cwd, extraEnv, signalDir }) {
|
|
27
|
+
super();
|
|
28
|
+
this.key = key;
|
|
29
|
+
this.script = script;
|
|
30
|
+
this.args = args || [];
|
|
31
|
+
this.cwd = cwd;
|
|
32
|
+
this.extraEnv = extraEnv || {};
|
|
33
|
+
this.signalDir = signalDir;
|
|
34
|
+
this.logFile = path.join(signalDir, ".runner.log");
|
|
35
|
+
|
|
36
|
+
this._process = null;
|
|
37
|
+
this.pid = null;
|
|
38
|
+
this.startTime = null;
|
|
39
|
+
this.exitCode = null;
|
|
40
|
+
this.exited = false;
|
|
41
|
+
this._killed = false;
|
|
42
|
+
this._logStream = null;
|
|
43
|
+
this._killTimer = null;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Spawn claude CLI directly. No bash middleware.
|
|
48
|
+
* @param {string} prompt - The prompt to pass with -p flag
|
|
49
|
+
* @param {object} opts
|
|
50
|
+
* @param {string} [opts.model] - Model to use (default: "opus")
|
|
51
|
+
*/
|
|
52
|
+
spawnClaude(prompt, { model = "opus", continue: cont = false } = {}) {
|
|
53
|
+
const claudeArgs = [
|
|
54
|
+
"--dangerously-skip-permissions",
|
|
55
|
+
"--verbose",
|
|
56
|
+
"--model", model,
|
|
57
|
+
];
|
|
58
|
+
if (cont) claudeArgs.push("--continue");
|
|
59
|
+
claudeArgs.push("-p", prompt);
|
|
60
|
+
const bin = getClaudeBin();
|
|
61
|
+
return this._spawn(bin, claudeArgs, `${bin} ${claudeArgs.slice(0, 4).join(" ")} -p <prompt>`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Spawn a bash script (legacy mode for index.js compatibility).
|
|
66
|
+
*/
|
|
67
|
+
spawn() {
|
|
68
|
+
return this._spawn("bash", [this.script, ...this.args], `bash ${this.script} ${this.args.join(" ")}`);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
_spawn(command, args, logLabel) {
|
|
72
|
+
const env = { ...process.env, ...this.extraEnv };
|
|
73
|
+
delete env.CLAUDECODE;
|
|
74
|
+
|
|
75
|
+
this._logStream = fs.createWriteStream(this.logFile, { flags: "a" });
|
|
76
|
+
this._logStream.write(
|
|
77
|
+
`\n[agent-process] Spawning: ${logLabel}\n` +
|
|
78
|
+
`[agent-process] cwd: ${this.cwd} key: ${this.key} time: ${new Date().toISOString()}\n`
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
this._process = cpSpawn(command, args, {
|
|
82
|
+
cwd: this.cwd,
|
|
83
|
+
env,
|
|
84
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
85
|
+
detached: false,
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
this.pid = this._process.pid;
|
|
89
|
+
this.startTime = Date.now();
|
|
90
|
+
|
|
91
|
+
this._process.stdout.on("data", (data) => {
|
|
92
|
+
if (this._logStream && !this._logStream.destroyed) {
|
|
93
|
+
this._logStream.write(data);
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
this._process.stderr.on("data", (data) => {
|
|
98
|
+
if (this._logStream && !this._logStream.destroyed) {
|
|
99
|
+
this._logStream.write(data);
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
this._process.on("exit", (exitCode, signal) => {
|
|
104
|
+
this.exitCode = exitCode;
|
|
105
|
+
this.exited = true;
|
|
106
|
+
if (this._killTimer) {
|
|
107
|
+
clearTimeout(this._killTimer);
|
|
108
|
+
this._killTimer = null;
|
|
109
|
+
}
|
|
110
|
+
if (this._logStream && !this._logStream.destroyed) {
|
|
111
|
+
this._logStream.write(
|
|
112
|
+
`\n[agent-process] Exited: code=${exitCode} signal=${signal} elapsed=${this.getElapsedMs()}ms\n`
|
|
113
|
+
);
|
|
114
|
+
this._logStream.end();
|
|
115
|
+
}
|
|
116
|
+
this.emit("exit", {
|
|
117
|
+
exitCode: exitCode ?? 1,
|
|
118
|
+
signal,
|
|
119
|
+
key: this.key,
|
|
120
|
+
elapsed: this.getElapsedMs(),
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
this._process.on("error", (err) => {
|
|
125
|
+
if (this._logStream && !this._logStream.destroyed) {
|
|
126
|
+
this._logStream.write(`\n[agent-process] Spawn error: ${err.message}\n`);
|
|
127
|
+
}
|
|
128
|
+
if (!this.exited) {
|
|
129
|
+
this.exited = true;
|
|
130
|
+
this.exitCode = 1;
|
|
131
|
+
this.emit("exit", {
|
|
132
|
+
exitCode: 1,
|
|
133
|
+
signal: null,
|
|
134
|
+
key: this.key,
|
|
135
|
+
elapsed: this.getElapsedMs(),
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
return this;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
kill(signal = "SIGTERM") {
|
|
144
|
+
if (!this._process || this._killed || this.exited) return;
|
|
145
|
+
this._killed = true;
|
|
146
|
+
|
|
147
|
+
try {
|
|
148
|
+
process.kill(this.pid, signal);
|
|
149
|
+
} catch {
|
|
150
|
+
/* already gone */
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Escalate to SIGKILL after 5s grace period
|
|
154
|
+
if (signal !== "SIGKILL") {
|
|
155
|
+
this._killTimer = setTimeout(() => {
|
|
156
|
+
try {
|
|
157
|
+
if (!this.exited) process.kill(this.pid, "SIGKILL");
|
|
158
|
+
} catch {
|
|
159
|
+
/* already gone */
|
|
160
|
+
}
|
|
161
|
+
}, 5000);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
getElapsedMs() {
|
|
166
|
+
return this.startTime ? Date.now() - this.startTime : 0;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
export default AgentProcess;
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
// bridge-state.js — State persistence and startup recovery.
|
|
2
|
+
// Same JSON format as index.js for compatibility.
|
|
3
|
+
|
|
4
|
+
import fs from "node:fs";
|
|
5
|
+
import path from "node:path";
|
|
6
|
+
import { STATE_FILE, IMPL_BASE } from "./constants.js";
|
|
7
|
+
import { ensureDir } from "./slack-helpers.js";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Save features state to disk.
|
|
11
|
+
* @param {object} features - thread_ts -> feature state
|
|
12
|
+
*/
|
|
13
|
+
export function save(features) {
|
|
14
|
+
ensureDir(path.dirname(STATE_FILE));
|
|
15
|
+
// Merge with on-disk state so concurrent bridge processes (e.g. during
|
|
16
|
+
// graceful shutdown of a replaced instance) don't overwrite entries they
|
|
17
|
+
// never knew about.
|
|
18
|
+
let diskState = {};
|
|
19
|
+
try {
|
|
20
|
+
if (fs.existsSync(STATE_FILE)) {
|
|
21
|
+
diskState = JSON.parse(fs.readFileSync(STATE_FILE, "utf-8"));
|
|
22
|
+
}
|
|
23
|
+
} catch { /* ok — start from empty */ }
|
|
24
|
+
const merged = { ...diskState, ...features };
|
|
25
|
+
fs.writeFileSync(STATE_FILE, JSON.stringify(merged, null, 2));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Load features state from disk.
|
|
30
|
+
* @returns {object} features
|
|
31
|
+
*/
|
|
32
|
+
export function load() {
|
|
33
|
+
try {
|
|
34
|
+
if (fs.existsSync(STATE_FILE)) {
|
|
35
|
+
const features = JSON.parse(fs.readFileSync(STATE_FILE, "utf-8"));
|
|
36
|
+
console.log(
|
|
37
|
+
`[state] Restored: ${Object.keys(features).length} active features`
|
|
38
|
+
);
|
|
39
|
+
return features;
|
|
40
|
+
}
|
|
41
|
+
} catch {
|
|
42
|
+
console.warn("[state] Could not load bridge state, starting fresh");
|
|
43
|
+
}
|
|
44
|
+
return {};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Discover the implementation signal tree for a feature.
|
|
49
|
+
* Same structure as index.js discoverImplSignalTree().
|
|
50
|
+
*/
|
|
51
|
+
export function discoverImplSignalTree(slug) {
|
|
52
|
+
const featureDir = path.join(IMPL_BASE, "features", slug);
|
|
53
|
+
const tree = {
|
|
54
|
+
featureDir,
|
|
55
|
+
featureLead: null,
|
|
56
|
+
operator: null,
|
|
57
|
+
featureReview: {},
|
|
58
|
+
teams: {},
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
const flDir = path.join(featureDir, "feature-lead");
|
|
62
|
+
if (fs.existsSync(flDir)) tree.featureLead = flDir;
|
|
63
|
+
|
|
64
|
+
const opDir = path.join(featureDir, "operator");
|
|
65
|
+
if (fs.existsSync(opDir)) tree.operator = opDir;
|
|
66
|
+
|
|
67
|
+
const reviewDir = path.join(featureDir, "feature-review");
|
|
68
|
+
try {
|
|
69
|
+
for (const entry of fs.readdirSync(reviewDir)) {
|
|
70
|
+
const roleDir = path.join(reviewDir, entry);
|
|
71
|
+
if (fs.statSync(roleDir).isDirectory()) {
|
|
72
|
+
tree.featureReview[entry] = roleDir;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
} catch {
|
|
76
|
+
/* no review dir yet */
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const teamsDir = path.join(featureDir, "teams");
|
|
80
|
+
try {
|
|
81
|
+
for (const team of fs.readdirSync(teamsDir).sort()) {
|
|
82
|
+
if (!team.startsWith("team-")) continue;
|
|
83
|
+
const teamDir = path.join(teamsDir, team);
|
|
84
|
+
if (!fs.statSync(teamDir).isDirectory()) continue;
|
|
85
|
+
const teamNum = team.replace("team-", "");
|
|
86
|
+
tree.teams[teamNum] = {
|
|
87
|
+
dir: teamDir,
|
|
88
|
+
orchestrator: null,
|
|
89
|
+
roles: {},
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
const orchDir = path.join(teamDir, "orchestrator");
|
|
93
|
+
if (fs.existsSync(orchDir)) tree.teams[teamNum].orchestrator = orchDir;
|
|
94
|
+
|
|
95
|
+
const rolesDir = path.join(teamDir, "roles");
|
|
96
|
+
try {
|
|
97
|
+
for (const role of fs.readdirSync(rolesDir)) {
|
|
98
|
+
const roleDir = path.join(rolesDir, role);
|
|
99
|
+
if (fs.statSync(roleDir).isDirectory()) {
|
|
100
|
+
tree.teams[teamNum].roles[role] = roleDir;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
} catch {
|
|
104
|
+
/* no roles dir */
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
} catch {
|
|
108
|
+
/* no teams dir */
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return tree;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Identify features that were in impl phase when bridge stopped.
|
|
116
|
+
* Returns array of { thread_ts, featureState, slug }.
|
|
117
|
+
*/
|
|
118
|
+
export function getActiveImplFeatures(features) {
|
|
119
|
+
return Object.entries(features)
|
|
120
|
+
.filter(([, f]) => f.active_role === "impl")
|
|
121
|
+
.map(([thread_ts, featureState]) => ({
|
|
122
|
+
thread_ts,
|
|
123
|
+
featureState,
|
|
124
|
+
slug: featureState.feature_slug,
|
|
125
|
+
}));
|
|
126
|
+
}
|
package/lib/constants.js
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
// constants.js — Centralized paths, timeouts, signal file names, role mappings.
|
|
2
|
+
// Extracted from index.js config section and bash scripts.
|
|
3
|
+
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
|
|
6
|
+
const HOME = process.env.HOME;
|
|
7
|
+
|
|
8
|
+
// ─── Base Directories ────────────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
export const PLANNING_BASE =
|
|
11
|
+
process.env.PLANNING_SIGNAL_BASE ||
|
|
12
|
+
path.join(HOME, "src/iriai/.planning");
|
|
13
|
+
|
|
14
|
+
export const IMPL_BASE =
|
|
15
|
+
process.env.IMPL_SIGNAL_BASE ||
|
|
16
|
+
path.join(HOME, "src/iriai/.implementation");
|
|
17
|
+
|
|
18
|
+
export const IRIAI_TEAM_DIR =
|
|
19
|
+
process.env.IRIAI_TEAM_DIR ||
|
|
20
|
+
path.join(HOME, "src/iriai/iriai-team");
|
|
21
|
+
|
|
22
|
+
export const SCRIPTS_DIR = path.join(IRIAI_TEAM_DIR, "scripts");
|
|
23
|
+
|
|
24
|
+
export const STATE_FILE = path.join(PLANNING_BASE, "lead", ".bridge-state.json");
|
|
25
|
+
|
|
26
|
+
export const DASHBOARD_LOG = path.join(IRIAI_TEAM_DIR, ".dashboard-log");
|
|
27
|
+
export const DASHBOARD_FILE = path.join(IRIAI_TEAM_DIR, "DASHBOARD.md");
|
|
28
|
+
export const FEATURE_STATUS_FILE = path.join(IRIAI_TEAM_DIR, "FEATURE-STATUS.md");
|
|
29
|
+
|
|
30
|
+
// ─── Planning Role Directories ───────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
export const ROLE_DIRS = {
|
|
33
|
+
pm: path.join(PLANNING_BASE, "pm"),
|
|
34
|
+
designer: path.join(PLANNING_BASE, "design"),
|
|
35
|
+
architect: path.join(PLANNING_BASE, "architect"),
|
|
36
|
+
"plan-compiler": path.join(PLANNING_BASE, "plan-compiler"),
|
|
37
|
+
lead: path.join(PLANNING_BASE, "lead"),
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
export const ROLE_LABELS = {
|
|
41
|
+
pm: "PM",
|
|
42
|
+
designer: "Designer",
|
|
43
|
+
architect: "Architect",
|
|
44
|
+
"plan-compiler": "Plan Compiler",
|
|
45
|
+
lead: "Feature Lead",
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
export const PIPELINE_ORDER = ["pm", "designer", "architect", "plan-compiler"];
|
|
49
|
+
|
|
50
|
+
// ─── Timeouts (milliseconds unless noted) ────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
export const ROLE_HARD_TIMEOUT_MS = 75 * 60 * 1000; // 75 min
|
|
53
|
+
export const ROLE_SOFT_TIMEOUT_MS = 15 * 60 * 1000; // 15 min
|
|
54
|
+
export const ORCH_SOFT_TIMEOUT_MS = 45 * 60 * 1000; // 45 min
|
|
55
|
+
export const PLANNING_TIMEOUT_MS = 30 * 60 * 1000; // 30 min
|
|
56
|
+
export const OPERATOR_TIMEOUT_MS = 2 * 60 * 1000; // 2 min
|
|
57
|
+
export const FL_SOFT_TIMEOUT_MS = 45 * 60 * 1000; // 45 min
|
|
58
|
+
export const FL_CONTEXT_EXHAUST_MS = 5 * 60 * 1000; // 5 min (auto-refresh threshold)
|
|
59
|
+
|
|
60
|
+
export const RSS_CEILING_KB = 8_388_608; // 8 GB
|
|
61
|
+
export const STUCK_THRESHOLD_MS = 10 * 60 * 1000; // 10 min no PTY output
|
|
62
|
+
export const HEALTH_CHECK_INTERVAL_MS = 30_000; // 30s
|
|
63
|
+
|
|
64
|
+
// ─── Retry Limits ────────────────────────────────────────────────────────────
|
|
65
|
+
|
|
66
|
+
export const MAX_ROLE_RETRIES = 2;
|
|
67
|
+
export const MAX_ORCH_RETRIES = 1;
|
|
68
|
+
export const MAX_FL_RETRIES = 3;
|
|
69
|
+
export const MAX_FL_INIT_RETRIES = 2;
|
|
70
|
+
export const MAX_PLANNING_RETRIES = 2;
|
|
71
|
+
|
|
72
|
+
// ─── Backoff (seconds) ──────────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
export const FAST_EXIT_THRESHOLD_MS = 15_000; // <15s = "fast exit" (possible usage limit)
|
|
75
|
+
export const FAST_EXIT_BACKOFF_S = 30; // multiplied by retry count
|
|
76
|
+
export const NORMAL_BACKOFF_S = 5; // multiplied by retry count
|
|
77
|
+
export const FL_NORMAL_BACKOFF_S = 10; // FL uses longer normal backoff
|
|
78
|
+
|
|
79
|
+
// ─── Signal File Names ──────────────────────────────────────────────────────
|
|
80
|
+
|
|
81
|
+
export const SIGNAL = {
|
|
82
|
+
TASK: ".task",
|
|
83
|
+
ACTIVE_TASK: ".active-task",
|
|
84
|
+
DONE: ".done",
|
|
85
|
+
OUTPUT: ".output",
|
|
86
|
+
AGENT_RESPONSE: ".agent-response",
|
|
87
|
+
USER_MESSAGE: ".user-message",
|
|
88
|
+
QUESTION: ".question",
|
|
89
|
+
ANSWER: ".answer",
|
|
90
|
+
NEEDS_RESTART: ".needs-restart",
|
|
91
|
+
HANDOVER: ".handover",
|
|
92
|
+
KILL: ".kill",
|
|
93
|
+
STUCK: ".stuck",
|
|
94
|
+
CRASHED: ".crashed",
|
|
95
|
+
STARTED: ".started",
|
|
96
|
+
RUNNING: ".running",
|
|
97
|
+
GATE_READY: ".gate-ready",
|
|
98
|
+
GATE_APPROVED: ".gate-approved",
|
|
99
|
+
PHASE_DONE: ".phase-done",
|
|
100
|
+
FEATURE_COMPLETE: ".feature-complete",
|
|
101
|
+
CONTEXT_REFRESH: ".context-refresh",
|
|
102
|
+
CONVERSATION_HISTORY: ".conversation-history",
|
|
103
|
+
CLAUDE_SESSION_LOG: ".claude-session.log",
|
|
104
|
+
RUNNER_LOG: ".runner.log",
|
|
105
|
+
DASHBOARD_LOG: ".dashboard-log",
|
|
106
|
+
BRIDGE_STATE: ".bridge-state.json",
|
|
107
|
+
TEAM_CONFIG: ".team-config",
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
// ─── Known Repos ─────────────────────────────────────────────────────────────
|
|
111
|
+
|
|
112
|
+
export const KNOWN_REPOS = [
|
|
113
|
+
"first-party-apps/directory/directory-backend",
|
|
114
|
+
"first-party-apps/directory/directory-frontend",
|
|
115
|
+
"first-party-apps/events/events-backend",
|
|
116
|
+
"first-party-apps/events/events-frontend",
|
|
117
|
+
"first-party-apps/subdomain-home/subdomain-home-frontend",
|
|
118
|
+
"first-party-apps/subdomain-home/subdomain-home-server",
|
|
119
|
+
"frontend-apps/iriai-app/iriai-app-bff",
|
|
120
|
+
"frontend-apps/iriai-app/iriai-app-frontend",
|
|
121
|
+
"packages/auth-python",
|
|
122
|
+
"packages/auth-react",
|
|
123
|
+
"platform/auth/auth-frontend",
|
|
124
|
+
"platform/auth/auth-service",
|
|
125
|
+
"platform/deploy-console/deploy-console-frontend",
|
|
126
|
+
"platform/deploy-console/deploy-console-service",
|
|
127
|
+
"platform/integration-engine/integration-engine-service",
|
|
128
|
+
];
|
|
129
|
+
|
|
130
|
+
// ─── Feature Review Roles ────────────────────────────────────────────────────
|
|
131
|
+
|
|
132
|
+
export const FEATURE_REVIEW_ROLES = [
|
|
133
|
+
"integration-tester",
|
|
134
|
+
"code-reviewer",
|
|
135
|
+
"security-auditor",
|
|
136
|
+
"health-monitor",
|
|
137
|
+
];
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
// health-monitor.js — Periodic health checks for registered AgentProcesses.
|
|
2
|
+
// Single setInterval(check, 30_000) replacing per-script timeout/RSS checks.
|
|
3
|
+
|
|
4
|
+
import { EventEmitter } from "node:events";
|
|
5
|
+
import { execSync } from "node:child_process";
|
|
6
|
+
import {
|
|
7
|
+
HEALTH_CHECK_INTERVAL_MS,
|
|
8
|
+
RSS_CEILING_KB,
|
|
9
|
+
STUCK_THRESHOLD_MS,
|
|
10
|
+
} from "./constants.js";
|
|
11
|
+
|
|
12
|
+
export default class HealthMonitor extends EventEmitter {
|
|
13
|
+
constructor() {
|
|
14
|
+
super();
|
|
15
|
+
this._agents = new Map(); // key -> { agent, hardTimeoutMs, lastOutput }
|
|
16
|
+
this._timer = null;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Register an AgentProcess for monitoring.
|
|
21
|
+
* @param {AgentProcess} agent
|
|
22
|
+
* @param {object} opts
|
|
23
|
+
* @param {number} opts.hardTimeoutMs - Kill after this many ms
|
|
24
|
+
*/
|
|
25
|
+
register(agent, { hardTimeoutMs } = {}) {
|
|
26
|
+
this._agents.set(agent.key, {
|
|
27
|
+
agent,
|
|
28
|
+
hardTimeoutMs: hardTimeoutMs || Infinity,
|
|
29
|
+
lastOutput: Date.now(),
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
// Track PTY output for stuck detection
|
|
33
|
+
if (agent.ptyProcess) {
|
|
34
|
+
agent.ptyProcess.onData(() => {
|
|
35
|
+
const entry = this._agents.get(agent.key);
|
|
36
|
+
if (entry) entry.lastOutput = Date.now();
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Auto-unregister on exit
|
|
41
|
+
agent.once("exit", () => this.unregister(agent.key));
|
|
42
|
+
|
|
43
|
+
if (!this._timer) this._start();
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
unregister(key) {
|
|
47
|
+
this._agents.delete(key);
|
|
48
|
+
if (this._agents.size === 0 && this._timer) {
|
|
49
|
+
clearInterval(this._timer);
|
|
50
|
+
this._timer = null;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
_start() {
|
|
55
|
+
this._timer = setInterval(() => this._check(), HEALTH_CHECK_INTERVAL_MS);
|
|
56
|
+
// Don't prevent Node from exiting
|
|
57
|
+
this._timer.unref();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
_check() {
|
|
61
|
+
const now = Date.now();
|
|
62
|
+
|
|
63
|
+
for (const [key, entry] of this._agents) {
|
|
64
|
+
const { agent, hardTimeoutMs, lastOutput } = entry;
|
|
65
|
+
if (agent.exited) continue;
|
|
66
|
+
|
|
67
|
+
const elapsed = agent.getElapsedMs();
|
|
68
|
+
|
|
69
|
+
// Hard timeout
|
|
70
|
+
if (elapsed > hardTimeoutMs) {
|
|
71
|
+
console.log(`[health] ${key}: hard timeout (${Math.round(elapsed / 1000)}s)`);
|
|
72
|
+
this.emit("timeout", { key, agent, elapsed });
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// RSS check (only if we have a PID)
|
|
77
|
+
if (agent.pid) {
|
|
78
|
+
const rss = this._getRssKb(agent.pid);
|
|
79
|
+
if (rss > RSS_CEILING_KB) {
|
|
80
|
+
console.log(`[health] ${key}: OOM (${rss}KB > ${RSS_CEILING_KB}KB)`);
|
|
81
|
+
this.emit("oom", { key, agent, rssKb: rss });
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Stuck detection: no PTY output for >10 min
|
|
87
|
+
const silent = now - lastOutput;
|
|
88
|
+
if (silent > STUCK_THRESHOLD_MS) {
|
|
89
|
+
console.log(`[health] ${key}: stuck (no output for ${Math.round(silent / 1000)}s)`);
|
|
90
|
+
this.emit("stuck", { key, agent, silentMs: silent });
|
|
91
|
+
// Reset lastOutput so we don't re-fire every 30s
|
|
92
|
+
entry.lastOutput = now;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
_getRssKb(pid) {
|
|
98
|
+
try {
|
|
99
|
+
const out = execSync(`ps -o rss= -p ${pid}`, { encoding: "utf-8", timeout: 3000 });
|
|
100
|
+
return parseInt(out.trim(), 10) || 0;
|
|
101
|
+
} catch {
|
|
102
|
+
return 0;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
stop() {
|
|
107
|
+
if (this._timer) {
|
|
108
|
+
clearInterval(this._timer);
|
|
109
|
+
this._timer = null;
|
|
110
|
+
}
|
|
111
|
+
this._agents.clear();
|
|
112
|
+
}
|
|
113
|
+
}
|