onkol 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/discord-api.d.ts +20 -0
- package/dist/cli/discord-api.js +102 -0
- package/dist/cli/index.js +47 -9
- package/dist/cli/systemd.js +21 -3
- package/package.json +7 -5
- package/scripts/spawn-worker.sh +10 -4
- package/scripts/start-orchestrator.sh +60 -13
- package/scripts/worker-watchdog.sh +192 -0
- package/src/plugin/index.ts +47 -1
|
@@ -17,3 +17,23 @@ export declare function createChannel(token: string, guildId: string, name: stri
|
|
|
17
17
|
}>;
|
|
18
18
|
export declare function deleteChannel(token: string, channelId: string): Promise<void>;
|
|
19
19
|
export declare function sendMessage(token: string, channelId: string, content: string): Promise<void>;
|
|
20
|
+
/**
|
|
21
|
+
* Validates the bot token and checks if it can connect to the Discord gateway
|
|
22
|
+
* with the required intents (Guilds, GuildMessages, MessageContent).
|
|
23
|
+
* Returns { ok: true } or { ok: false, error: string }.
|
|
24
|
+
*/
|
|
25
|
+
export declare function validateBotToken(token: string): Promise<{
|
|
26
|
+
ok: true;
|
|
27
|
+
} | {
|
|
28
|
+
ok: false;
|
|
29
|
+
error: string;
|
|
30
|
+
}>;
|
|
31
|
+
/**
|
|
32
|
+
* Performs a lightweight check for MessageContent intent by attempting a
|
|
33
|
+
* test gateway connection. Returns a warning message if the intent appears
|
|
34
|
+
* to be disabled, or null if everything looks good.
|
|
35
|
+
*
|
|
36
|
+
* Note: The Discord REST API doesn't expose which intents are enabled.
|
|
37
|
+
* We do a quick WebSocket handshake to the gateway to detect DisallowedIntents.
|
|
38
|
+
*/
|
|
39
|
+
export declare function checkGatewayIntents(token: string): Promise<string | null>;
|
package/dist/cli/discord-api.js
CHANGED
|
@@ -51,3 +51,105 @@ export async function sendMessage(token, channelId, content) {
|
|
|
51
51
|
if (!res.ok)
|
|
52
52
|
throw new Error(`Failed to send message: ${res.status} ${await res.text()}`);
|
|
53
53
|
}
|
|
54
|
+
/**
|
|
55
|
+
* Validates the bot token and checks if it can connect to the Discord gateway
|
|
56
|
+
* with the required intents (Guilds, GuildMessages, MessageContent).
|
|
57
|
+
* Returns { ok: true } or { ok: false, error: string }.
|
|
58
|
+
*/
|
|
59
|
+
export async function validateBotToken(token) {
|
|
60
|
+
// Step 1: Check the token is valid via /users/@me
|
|
61
|
+
const meRes = await fetch(`${DISCORD_API}/users/@me`, {
|
|
62
|
+
headers: { Authorization: `Bot ${token}` },
|
|
63
|
+
});
|
|
64
|
+
if (!meRes.ok) {
|
|
65
|
+
const body = await meRes.text();
|
|
66
|
+
if (meRes.status === 401)
|
|
67
|
+
return { ok: false, error: 'Invalid bot token.' };
|
|
68
|
+
return { ok: false, error: `Discord API error (${meRes.status}): ${body}` };
|
|
69
|
+
}
|
|
70
|
+
// Step 2: Get the bot's application to check if it's a bot token
|
|
71
|
+
const me = await meRes.json();
|
|
72
|
+
if (!me.bot)
|
|
73
|
+
return { ok: false, error: 'This token belongs to a user account, not a bot.' };
|
|
74
|
+
// Step 3: Try connecting to the gateway with the required intents to check for DisallowedIntents
|
|
75
|
+
// Intents: Guilds (1) | GuildMessages (512) | MessageContent (32768) = 33281
|
|
76
|
+
const gatewayRes = await fetch(`${DISCORD_API}/gateway/bot`, {
|
|
77
|
+
headers: { Authorization: `Bot ${token}` },
|
|
78
|
+
});
|
|
79
|
+
if (!gatewayRes.ok) {
|
|
80
|
+
const body = await gatewayRes.text();
|
|
81
|
+
return { ok: false, error: `Cannot fetch gateway info (${gatewayRes.status}): ${body}` };
|
|
82
|
+
}
|
|
83
|
+
return { ok: true };
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Performs a lightweight check for MessageContent intent by attempting a
|
|
87
|
+
* test gateway connection. Returns a warning message if the intent appears
|
|
88
|
+
* to be disabled, or null if everything looks good.
|
|
89
|
+
*
|
|
90
|
+
* Note: The Discord REST API doesn't expose which intents are enabled.
|
|
91
|
+
* We do a quick WebSocket handshake to the gateway to detect DisallowedIntents.
|
|
92
|
+
*/
|
|
93
|
+
export function checkGatewayIntents(token) {
|
|
94
|
+
return new Promise(async (resolve) => {
|
|
95
|
+
const timeout = setTimeout(() => resolve(null), 10000); // assume OK if no response in 10s
|
|
96
|
+
try {
|
|
97
|
+
const gatewayRes = await fetch(`${DISCORD_API}/gateway/bot`, {
|
|
98
|
+
headers: { Authorization: `Bot ${token}` },
|
|
99
|
+
});
|
|
100
|
+
if (!gatewayRes.ok) {
|
|
101
|
+
clearTimeout(timeout);
|
|
102
|
+
resolve('Could not fetch gateway URL. Check your bot token.');
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
const { url } = await gatewayRes.json();
|
|
106
|
+
// Dynamic import for WebSocket (works in both Node and Bun)
|
|
107
|
+
const WebSocket = (await import('ws')).default;
|
|
108
|
+
const ws = new WebSocket(`${url}?v=10&encoding=json`);
|
|
109
|
+
ws.on('message', (data) => {
|
|
110
|
+
try {
|
|
111
|
+
const payload = JSON.parse(data.toString());
|
|
112
|
+
if (payload.op === 10) {
|
|
113
|
+
// Send IDENTIFY with the intents we need
|
|
114
|
+
// Guilds=1, GuildMessages=512, MessageContent=32768
|
|
115
|
+
ws.send(JSON.stringify({
|
|
116
|
+
op: 2,
|
|
117
|
+
d: {
|
|
118
|
+
token,
|
|
119
|
+
intents: 1 | 512 | 32768,
|
|
120
|
+
properties: { os: 'linux', browser: 'onkol-setup', device: 'onkol-setup' },
|
|
121
|
+
},
|
|
122
|
+
}));
|
|
123
|
+
}
|
|
124
|
+
else if (payload.op === 0 && payload.t === 'READY') {
|
|
125
|
+
// All good — intents accepted
|
|
126
|
+
ws.close();
|
|
127
|
+
clearTimeout(timeout);
|
|
128
|
+
resolve(null);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
catch { /* ignore parse errors */ }
|
|
132
|
+
});
|
|
133
|
+
ws.on('close', (code) => {
|
|
134
|
+
clearTimeout(timeout);
|
|
135
|
+
if (code === 4014) {
|
|
136
|
+
resolve('MessageContent intent is not enabled for this bot.\n' +
|
|
137
|
+
' Go to https://discord.com/developers/applications → your bot → Bot settings\n' +
|
|
138
|
+
' → Privileged Gateway Intents → enable "Message Content Intent" → Save');
|
|
139
|
+
}
|
|
140
|
+
else if (code === 4004) {
|
|
141
|
+
resolve('Invalid bot token (gateway rejected authentication).');
|
|
142
|
+
}
|
|
143
|
+
// Other close codes are fine (we close it ourselves on READY)
|
|
144
|
+
});
|
|
145
|
+
ws.on('error', () => {
|
|
146
|
+
clearTimeout(timeout);
|
|
147
|
+
resolve(null); // network error, don't block setup
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
clearTimeout(timeout);
|
|
152
|
+
resolve(null);
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
}
|
package/dist/cli/index.js
CHANGED
|
@@ -8,7 +8,7 @@ import { mkdirSync, writeFileSync, readFileSync, copyFileSync, existsSync } from
|
|
|
8
8
|
import { resolve } from 'path';
|
|
9
9
|
import { execSync } from 'child_process';
|
|
10
10
|
import { runSetupPrompts } from './prompts.js';
|
|
11
|
-
import { createCategory, createChannel } from './discord-api.js';
|
|
11
|
+
import { createCategory, createChannel, validateBotToken, checkGatewayIntents } from './discord-api.js';
|
|
12
12
|
import { discoverServices, formatServicesMarkdown } from './auto-discover.js';
|
|
13
13
|
import { renderOrchestratorClaude, renderSettings } from './templates.js';
|
|
14
14
|
import { generateSystemdUnit, generateCrontab } from './systemd.js';
|
|
@@ -152,6 +152,25 @@ program
|
|
|
152
152
|
if (answers.discordUserId.trim()) {
|
|
153
153
|
allowedUsers.push(answers.discordUserId.trim());
|
|
154
154
|
}
|
|
155
|
+
// --- Validate Discord bot token and intents ---
|
|
156
|
+
if (!skip('discord')) {
|
|
157
|
+
console.log(chalk.gray('Validating Discord bot token...'));
|
|
158
|
+
const tokenCheck = await validateBotToken(answers.botToken);
|
|
159
|
+
if (!tokenCheck.ok) {
|
|
160
|
+
console.error(chalk.red(`\nFATAL: ${tokenCheck.error}`));
|
|
161
|
+
console.error(chalk.yellow('\nYour answers have been saved. Fix the issue and run `npx onkol setup` again to resume.'));
|
|
162
|
+
process.exit(1);
|
|
163
|
+
}
|
|
164
|
+
console.log(chalk.green('✓ Bot token is valid'));
|
|
165
|
+
console.log(chalk.gray('Checking gateway intents...'));
|
|
166
|
+
const intentWarning = await checkGatewayIntents(answers.botToken);
|
|
167
|
+
if (intentWarning) {
|
|
168
|
+
console.error(chalk.red(`\nFATAL: ${intentWarning}`));
|
|
169
|
+
console.error(chalk.yellow('\nEnable the required intent and run `npx onkol setup` again to resume.'));
|
|
170
|
+
process.exit(1);
|
|
171
|
+
}
|
|
172
|
+
console.log(chalk.green('✓ Message Content intent is enabled'));
|
|
173
|
+
}
|
|
155
174
|
// --- CRITICAL: Create Discord category and orchestrator channel ---
|
|
156
175
|
let categoryId = checkpoint.categoryId || '';
|
|
157
176
|
let orchChannelId = checkpoint.orchChannelId || '';
|
|
@@ -441,30 +460,49 @@ program
|
|
|
441
460
|
console.log(chalk.gray('\nStarting orchestrator...'));
|
|
442
461
|
let started = false;
|
|
443
462
|
try {
|
|
444
|
-
execSync(`sudo systemctl start onkol-${answers.nodeName}`, { stdio: 'pipe' });
|
|
445
|
-
// Wait for tmux session to appear
|
|
446
|
-
for (let i = 0; i <
|
|
463
|
+
execSync(`sudo systemctl start onkol-${answers.nodeName}`, { stdio: 'pipe', timeout: 60000 });
|
|
464
|
+
// Wait for tmux session to appear (the start script itself verifies, but double-check)
|
|
465
|
+
for (let i = 0; i < 5; i++) {
|
|
447
466
|
try {
|
|
448
467
|
execSync(`tmux has-session -t onkol-${answers.nodeName}`, { stdio: 'pipe' });
|
|
449
468
|
started = true;
|
|
450
469
|
break;
|
|
451
470
|
}
|
|
452
471
|
catch { /* not ready yet */ }
|
|
453
|
-
execSync('sleep
|
|
472
|
+
execSync('sleep 2', { stdio: 'pipe' });
|
|
454
473
|
}
|
|
455
474
|
if (started) {
|
|
456
475
|
console.log(chalk.green(`✓ Orchestrator started via systemd (tmux session "onkol-${answers.nodeName}")`));
|
|
457
476
|
}
|
|
477
|
+
else {
|
|
478
|
+
// systemctl succeeded but tmux session not visible — likely PATH or env issue
|
|
479
|
+
console.log(chalk.yellow(`⚠ systemctl started but tmux session not found. Trying direct start...`));
|
|
480
|
+
try {
|
|
481
|
+
const logs = execSync(`sudo journalctl -u onkol-${answers.nodeName} --no-pager -n 10 2>&1`, { encoding: 'utf-8' });
|
|
482
|
+
if (logs.trim())
|
|
483
|
+
console.log(chalk.gray(` Journal: ${logs.trim().split('\n').slice(-3).join('\n ')}`));
|
|
484
|
+
}
|
|
485
|
+
catch { /* ignore */ }
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
catch (err) {
|
|
489
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
490
|
+
console.log(chalk.yellow(`⚠ systemctl start failed: ${msg.split('\n')[0]}`));
|
|
458
491
|
}
|
|
459
|
-
catch { /* systemctl start failed, try direct */ }
|
|
460
492
|
if (!started) {
|
|
461
493
|
try {
|
|
462
|
-
execSync(`bash "${resolve(dir, 'scripts/start-orchestrator.sh')}"`, { stdio: 'pipe' });
|
|
494
|
+
execSync(`bash "${resolve(dir, 'scripts/start-orchestrator.sh')}"`, { stdio: 'pipe', timeout: 60000 });
|
|
495
|
+
// Verify the session is actually running
|
|
496
|
+
execSync(`tmux has-session -t onkol-${answers.nodeName}`, { stdio: 'pipe' });
|
|
497
|
+
started = true;
|
|
463
498
|
console.log(chalk.green(`✓ Orchestrator started in tmux session "onkol-${answers.nodeName}"`));
|
|
464
499
|
}
|
|
465
500
|
catch {
|
|
466
|
-
console.log(chalk.
|
|
467
|
-
console.log(chalk.yellow(`
|
|
501
|
+
console.log(chalk.red(`✗ Could not start orchestrator. The tmux session failed to stay alive.`));
|
|
502
|
+
console.log(chalk.yellow(` Debug steps:`));
|
|
503
|
+
console.log(chalk.yellow(` 1. Run manually: bash ${dir}/scripts/start-orchestrator.sh`));
|
|
504
|
+
console.log(chalk.yellow(` 2. Check: tmux attach -t onkol-${answers.nodeName}`));
|
|
505
|
+
console.log(chalk.yellow(` 3. Verify claude works: claude --version`));
|
|
468
506
|
}
|
|
469
507
|
}
|
|
470
508
|
// Setup complete — clear checkpoint
|
package/dist/cli/systemd.js
CHANGED
|
@@ -1,15 +1,33 @@
|
|
|
1
1
|
export function generateSystemdUnit(nodeName, user, onkolDir) {
|
|
2
|
+
// Resolve PATH additions for claude and bun at generation time
|
|
3
|
+
const homeDir = process.env.HOME || `/home/${user}`;
|
|
4
|
+
const extraPaths = [
|
|
5
|
+
`${homeDir}/.local/bin`,
|
|
6
|
+
`${homeDir}/.bun/bin`,
|
|
7
|
+
].filter(p => {
|
|
8
|
+
try {
|
|
9
|
+
return require('fs').existsSync(p);
|
|
10
|
+
}
|
|
11
|
+
catch {
|
|
12
|
+
return false;
|
|
13
|
+
}
|
|
14
|
+
});
|
|
15
|
+
const pathEnv = extraPaths.length > 0
|
|
16
|
+
? `Environment=PATH=${extraPaths.join(':')}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin`
|
|
17
|
+
: '';
|
|
2
18
|
return `[Unit]
|
|
3
19
|
Description=Onkol Node: ${nodeName}
|
|
4
20
|
After=network.target
|
|
5
21
|
|
|
6
22
|
[Service]
|
|
7
|
-
Type=
|
|
23
|
+
Type=oneshot
|
|
24
|
+
RemainAfterExit=yes
|
|
8
25
|
User=${user}
|
|
26
|
+
${pathEnv}
|
|
27
|
+
Environment=HOME=${homeDir}
|
|
9
28
|
ExecStart=${onkolDir}/scripts/start-orchestrator.sh
|
|
10
29
|
ExecStop=/usr/bin/tmux kill-session -t onkol-${nodeName}
|
|
11
|
-
|
|
12
|
-
RestartSec=10
|
|
30
|
+
TimeoutStartSec=60
|
|
13
31
|
|
|
14
32
|
[Install]
|
|
15
33
|
WantedBy=multi-user.target
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "onkol",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Decentralized on-call agent system powered by Claude Code",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -19,16 +19,18 @@
|
|
|
19
19
|
},
|
|
20
20
|
"dependencies": {
|
|
21
21
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
22
|
+
"chalk": "^5.0.0",
|
|
23
|
+
"commander": "^13.0.0",
|
|
22
24
|
"discord.js": "^14.0.0",
|
|
23
25
|
"handlebars": "^4.7.0",
|
|
24
26
|
"inquirer": "^12.0.0",
|
|
25
|
-
"
|
|
26
|
-
"commander": "^13.0.0"
|
|
27
|
+
"ws": "^8.20.0"
|
|
27
28
|
},
|
|
28
29
|
"devDependencies": {
|
|
29
30
|
"@types/node": "^22.0.0",
|
|
30
|
-
"
|
|
31
|
-
"bun-types": "^1.2.0"
|
|
31
|
+
"@types/ws": "^8.18.1",
|
|
32
|
+
"bun-types": "^1.2.0",
|
|
33
|
+
"typescript": "^5.7.0"
|
|
32
34
|
},
|
|
33
35
|
"engines": {
|
|
34
36
|
"node": ">=18.0.0"
|
package/scripts/spawn-worker.sh
CHANGED
|
@@ -82,7 +82,8 @@ cat > "$WORKER_DIR/.mcp.json" << MCPEOF
|
|
|
82
82
|
"env": {
|
|
83
83
|
"DISCORD_BOT_TOKEN": "$BOT_TOKEN",
|
|
84
84
|
"DISCORD_CHANNEL_ID": "$CHANNEL_ID",
|
|
85
|
-
"DISCORD_ALLOWED_USERS": "$ALLOWED_USERS_ESCAPED"
|
|
85
|
+
"DISCORD_ALLOWED_USERS": "$ALLOWED_USERS_ESCAPED",
|
|
86
|
+
"TMUX_TARGET": "${TMUX_SESSION}:${WORKER_NAME}"
|
|
86
87
|
}
|
|
87
88
|
}
|
|
88
89
|
}
|
|
@@ -177,8 +178,13 @@ cat >> "$WORKER_DIR/CLAUDE.md" << STARTEOF
|
|
|
177
178
|
Immediately when you start:
|
|
178
179
|
1. Read $WORKER_DIR/task.md for your task
|
|
179
180
|
2. Read $WORKER_DIR/context.md for context
|
|
180
|
-
3.
|
|
181
|
-
4.
|
|
181
|
+
3. Use the \`reply\` tool to send "Starting work on: <brief task summary>" to Discord
|
|
182
|
+
4. Begin work — send progress updates via \`reply\` every few steps
|
|
183
|
+
5. When done, send your full results/summary via \`reply\` (split into <2000 char messages)
|
|
184
|
+
6. For file deliverables, use \`replyWithFile\` to attach them
|
|
185
|
+
|
|
186
|
+
IMPORTANT: The user CANNOT see your terminal. The ONLY way to communicate is the reply tool.
|
|
187
|
+
If you complete work without sending results via reply, the user will never see your output.
|
|
182
188
|
Do NOT wait for a message. Start working as soon as you boot.
|
|
183
189
|
STARTEOF
|
|
184
190
|
|
|
@@ -196,7 +202,7 @@ TMUX_TARGET="${TMUX_SESSION}:${WORKER_NAME}"
|
|
|
196
202
|
if echo "\$PANE_CONTENT" | grep -q "^❯"; then
|
|
197
203
|
# Claude is ready — send the initial prompt via tmux keys
|
|
198
204
|
sleep 1
|
|
199
|
-
tmux send-keys -t "\$TMUX_TARGET" "Read $WORKER_DIR/task.md and $WORKER_DIR/context.md, then begin work
|
|
205
|
+
tmux send-keys -t "\$TMUX_TARGET" "Read $WORKER_DIR/task.md and $WORKER_DIR/context.md, then begin work. IMPORTANT: You MUST use the reply tool from the discord-filtered MCP server for ALL communication — send a starting message now, progress updates as you work, and final results when done. The user cannot see your terminal." Enter
|
|
200
206
|
break
|
|
201
207
|
fi
|
|
202
208
|
tmux send-keys -t "\$TMUX_TARGET" Enter 2>/dev/null || true
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
2
4
|
ONKOL_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
3
5
|
CONFIG="$ONKOL_DIR/config.json"
|
|
4
6
|
NODE_NAME=$(jq -r '.nodeName' "$CONFIG")
|
|
@@ -9,24 +11,69 @@ if tmux has-session -t "$TMUX_SESSION" 2>/dev/null; then
|
|
|
9
11
|
exit 0
|
|
10
12
|
fi
|
|
11
13
|
|
|
14
|
+
# Resolve full paths to binaries — critical for systemd which uses a minimal PATH
|
|
15
|
+
CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "")
|
|
16
|
+
if [ -z "$CLAUDE_BIN" ]; then
|
|
17
|
+
# Check common install locations
|
|
18
|
+
for candidate in "$HOME/.local/bin/claude" /usr/local/bin/claude /usr/bin/claude; do
|
|
19
|
+
if [ -x "$candidate" ]; then
|
|
20
|
+
CLAUDE_BIN="$candidate"
|
|
21
|
+
break
|
|
22
|
+
fi
|
|
23
|
+
done
|
|
24
|
+
fi
|
|
25
|
+
if [ -z "$CLAUDE_BIN" ]; then
|
|
26
|
+
echo "ERROR: claude not found in PATH or common locations." >&2
|
|
27
|
+
exit 1
|
|
28
|
+
fi
|
|
29
|
+
|
|
30
|
+
BUN_BIN=$(command -v bun 2>/dev/null || echo "")
|
|
31
|
+
if [ -z "$BUN_BIN" ]; then
|
|
32
|
+
for candidate in "$HOME/.bun/bin/bun" /usr/local/bin/bun /usr/bin/bun; do
|
|
33
|
+
if [ -x "$candidate" ]; then
|
|
34
|
+
BUN_BIN="$candidate"
|
|
35
|
+
break
|
|
36
|
+
fi
|
|
37
|
+
done
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
# Build PATH that includes directories for claude and bun so MCP plugins can find them
|
|
41
|
+
EXTRA_PATH=""
|
|
42
|
+
[ -n "$CLAUDE_BIN" ] && EXTRA_PATH="$(dirname "$CLAUDE_BIN")"
|
|
43
|
+
if [ -n "$BUN_BIN" ]; then
|
|
44
|
+
BUN_DIR="$(dirname "$BUN_BIN")"
|
|
45
|
+
if [ -n "$EXTRA_PATH" ]; then
|
|
46
|
+
EXTRA_PATH="$BUN_DIR:$EXTRA_PATH"
|
|
47
|
+
else
|
|
48
|
+
EXTRA_PATH="$BUN_DIR"
|
|
49
|
+
fi
|
|
50
|
+
fi
|
|
51
|
+
FULL_PATH="${EXTRA_PATH:+$EXTRA_PATH:}${PATH}"
|
|
52
|
+
|
|
12
53
|
tmux new-session -d -s "$TMUX_SESSION" \
|
|
13
|
-
"cd '$ONKOL_DIR' &&
|
|
54
|
+
"export PATH='$FULL_PATH'; cd '$ONKOL_DIR' && '$CLAUDE_BIN' \
|
|
14
55
|
--dangerously-skip-permissions \
|
|
15
56
|
--dangerously-load-development-channels server:discord-filtered \
|
|
16
|
-
--mcp-config '$ONKOL_DIR/.mcp.json'"
|
|
57
|
+
--mcp-config '$ONKOL_DIR/.mcp.json'; echo 'Claude exited with code '\$?'. Press Enter to close.'; read"
|
|
58
|
+
|
|
59
|
+
# Verify the session actually started and stayed alive
|
|
60
|
+
sleep 2
|
|
61
|
+
if ! tmux has-session -t "$TMUX_SESSION" 2>/dev/null; then
|
|
62
|
+
echo "ERROR: tmux session '$TMUX_SESSION' died immediately after creation." >&2
|
|
63
|
+
echo "Check that claude is working: $CLAUDE_BIN --version" >&2
|
|
64
|
+
exit 1
|
|
65
|
+
fi
|
|
17
66
|
|
|
18
67
|
# Auto-accept interactive prompts (trust dialog + dev channels warning)
|
|
19
|
-
#
|
|
20
|
-
(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
done
|
|
29
|
-
) &
|
|
68
|
+
# Sends Enter every 2 seconds until claude reaches the prompt
|
|
69
|
+
for i in $(seq 1 10); do
|
|
70
|
+
sleep 2
|
|
71
|
+
PANE_CONTENT=$(tmux capture-pane -t "$TMUX_SESSION" -p 2>/dev/null || echo "")
|
|
72
|
+
if echo "$PANE_CONTENT" | grep -q "^❯"; then
|
|
73
|
+
break
|
|
74
|
+
fi
|
|
75
|
+
tmux send-keys -t "$TMUX_SESSION" Enter 2>/dev/null || true
|
|
76
|
+
done
|
|
30
77
|
|
|
31
78
|
echo "Orchestrator started in tmux session '$TMUX_SESSION'."
|
|
32
79
|
echo "Attach with: tmux attach -t $TMUX_SESSION"
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Worker watchdog — runs periodically to check on active workers.
|
|
3
|
+
# Uses an LLM to analyze tmux pane content instead of brittle regex.
|
|
4
|
+
# Falls back to basic checks if no LLM is configured.
|
|
5
|
+
|
|
6
|
+
set -uo pipefail
|
|
7
|
+
|
|
8
|
+
ONKOL_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
9
|
+
CONFIG="$ONKOL_DIR/config.json"
|
|
10
|
+
TRACKING="$ONKOL_DIR/workers/tracking.json"
|
|
11
|
+
|
|
12
|
+
if [ ! -f "$TRACKING" ] || [ "$(jq length "$TRACKING")" -eq 0 ]; then
|
|
13
|
+
exit 0
|
|
14
|
+
fi
|
|
15
|
+
|
|
16
|
+
BOT_TOKEN=$(jq -r '.botToken' "$CONFIG")
|
|
17
|
+
ORCHESTRATOR_CHANNEL=$(jq -r '.orchestratorChannelId' "$CONFIG")
|
|
18
|
+
NODE_NAME=$(jq -r '.nodeName' "$CONFIG")
|
|
19
|
+
TMUX_SESSION="onkol-${NODE_NAME}"
|
|
20
|
+
|
|
21
|
+
# Watchdog LLM config
|
|
22
|
+
WATCHDOG_PROVIDER=$(jq -r '.watchdog.provider // empty' "$CONFIG")
|
|
23
|
+
WATCHDOG_MODEL=$(jq -r '.watchdog.model // empty' "$CONFIG")
|
|
24
|
+
WATCHDOG_API_KEY=$(jq -r '.watchdog.apiKey // empty' "$CONFIG")
|
|
25
|
+
|
|
26
|
+
WINDOWS=$(tmux list-windows -t "$TMUX_SESSION" -F '#{window_name}' 2>/dev/null || echo "")
|
|
27
|
+
|
|
28
|
+
discord_msg() {
|
|
29
|
+
local channel="$1" text="$2"
|
|
30
|
+
curl -s -X POST \
|
|
31
|
+
"https://discord.com/api/v10/channels/${channel}/messages" \
|
|
32
|
+
-H "Authorization: Bot ${BOT_TOKEN}" \
|
|
33
|
+
-H "Content-Type: application/json" \
|
|
34
|
+
-d "{\"content\": $(echo "$text" | jq -Rs .)}" \
|
|
35
|
+
> /dev/null 2>&1
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# Call LLM to analyze worker pane content.
|
|
39
|
+
# Returns a JSON object: {"status": "...", "action": "...", "message": "..."}
|
|
40
|
+
# status: working | done_replied | done_silent | error | idle | unknown
|
|
41
|
+
# action: none | nudge_reply | nudge_error | nudge_idle | alert_orchestrator
|
|
42
|
+
llm_analyze() {
|
|
43
|
+
local pane_content="$1"
|
|
44
|
+
local worker_name="$2"
|
|
45
|
+
|
|
46
|
+
# Determine API endpoint and headers based on provider
|
|
47
|
+
local api_url=""
|
|
48
|
+
local auth_header=""
|
|
49
|
+
local model="$WATCHDOG_MODEL"
|
|
50
|
+
|
|
51
|
+
case "$WATCHDOG_PROVIDER" in
|
|
52
|
+
openrouter)
|
|
53
|
+
api_url="https://openrouter.ai/api/v1/chat/completions"
|
|
54
|
+
auth_header="Authorization: Bearer ${WATCHDOG_API_KEY}"
|
|
55
|
+
;;
|
|
56
|
+
gemini)
|
|
57
|
+
api_url="https://generativelanguage.googleapis.com/v1beta/openai/chat/completions"
|
|
58
|
+
auth_header="Authorization: Bearer ${WATCHDOG_API_KEY}"
|
|
59
|
+
;;
|
|
60
|
+
custom)
|
|
61
|
+
api_url=$(jq -r '.watchdog.apiUrl // empty' "$CONFIG")
|
|
62
|
+
auth_header="Authorization: Bearer ${WATCHDOG_API_KEY}"
|
|
63
|
+
;;
|
|
64
|
+
*)
|
|
65
|
+
echo '{"status":"unknown","action":"none","message":"no llm configured"}'
|
|
66
|
+
return
|
|
67
|
+
;;
|
|
68
|
+
esac
|
|
69
|
+
|
|
70
|
+
if [ -z "$api_url" ] || [ -z "$WATCHDOG_API_KEY" ]; then
|
|
71
|
+
echo '{"status":"unknown","action":"none","message":"missing api config"}'
|
|
72
|
+
return
|
|
73
|
+
fi
|
|
74
|
+
|
|
75
|
+
local sys_prompt="You analyze Claude Code terminal output to determine a worker's state. Respond with ONLY a JSON object, no markdown fences.
|
|
76
|
+
|
|
77
|
+
Keys:
|
|
78
|
+
- status: one of: working, done_replied, done_silent, error, idle
|
|
79
|
+
- action: one of: none, nudge_reply, nudge_error, nudge_idle
|
|
80
|
+
- reason: one short sentence explaining your assessment
|
|
81
|
+
|
|
82
|
+
Rules:
|
|
83
|
+
- working: Claude is actively executing tools, thinking, or generating output. Action: none
|
|
84
|
+
- done_replied: Worker finished AND used the discord-filtered reply MCP tool (you'll see 'discord-filtered - reply (MCP)' with result 'sent'). Action: none
|
|
85
|
+
- done_silent: Worker finished work (wrote files, completed analysis, etc.) but NEVER used the reply MCP tool to send results to Discord. Action: nudge_reply
|
|
86
|
+
- error: Worker hit a fatal error and stopped (Traceback, FATAL, crash at the prompt). Action: nudge_error. Note: errors from EARLIER that the worker recovered from do NOT count.
|
|
87
|
+
- idle: Worker is sitting at the prompt with no clear completion or error. Action: nudge_idle"
|
|
88
|
+
|
|
89
|
+
# Use jq to build the payload — handles all JSON escaping correctly
|
|
90
|
+
local payload
|
|
91
|
+
payload=$(jq -n \
|
|
92
|
+
--arg model "$model" \
|
|
93
|
+
--arg sys "$sys_prompt" \
|
|
94
|
+
--arg user "Worker name: ${worker_name}
|
|
95
|
+
|
|
96
|
+
Terminal output (last 100 lines):
|
|
97
|
+
${pane_content}" \
|
|
98
|
+
'{
|
|
99
|
+
model: $model,
|
|
100
|
+
messages: [
|
|
101
|
+
{role: "system", content: $sys},
|
|
102
|
+
{role: "user", content: $user}
|
|
103
|
+
],
|
|
104
|
+
temperature: 0,
|
|
105
|
+
max_tokens: 150
|
|
106
|
+
}')
|
|
107
|
+
|
|
108
|
+
local response
|
|
109
|
+
response=$(curl -s -m 15 "$api_url" \
|
|
110
|
+
-H "$auth_header" \
|
|
111
|
+
-H "Content-Type: application/json" \
|
|
112
|
+
-d "$payload" 2>/dev/null)
|
|
113
|
+
|
|
114
|
+
# Extract the content from the response
|
|
115
|
+
local content
|
|
116
|
+
content=$(echo "$response" | jq -r '.choices[0].message.content // empty' 2>/dev/null)
|
|
117
|
+
|
|
118
|
+
if [ -z "$content" ]; then
|
|
119
|
+
echo '{"status":"unknown","action":"none","message":"llm call failed"}'
|
|
120
|
+
return
|
|
121
|
+
fi
|
|
122
|
+
|
|
123
|
+
# Strip markdown fences if present
|
|
124
|
+
content=$(echo "$content" | sed 's/^```json//; s/^```//; s/```$//' | tr -d '\n')
|
|
125
|
+
|
|
126
|
+
# Validate it's valid JSON
|
|
127
|
+
if echo "$content" | jq . >/dev/null 2>&1; then
|
|
128
|
+
echo "$content"
|
|
129
|
+
else
|
|
130
|
+
echo '{"status":"unknown","action":"none","message":"invalid llm response"}'
|
|
131
|
+
fi
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
jq -r '.[] | select(.status == "active") | .name' "$TRACKING" | while read -r WORKER; do
|
|
135
|
+
WORKER_DIR="$ONKOL_DIR/workers/$WORKER"
|
|
136
|
+
WORKER_CHANNEL=$(jq -r ".[] | select(.name == \"$WORKER\") | .channelId" "$TRACKING")
|
|
137
|
+
TMUX_TARGET="${TMUX_SESSION}:${WORKER}"
|
|
138
|
+
|
|
139
|
+
# Case 1: tmux window is gone — worker crashed (no LLM needed)
|
|
140
|
+
if ! echo "$WINDOWS" | grep -q "^${WORKER}$"; then
|
|
141
|
+
discord_msg "$ORCHESTRATOR_CHANNEL" \
|
|
142
|
+
"[watchdog] Worker **${WORKER}** has crashed — its tmux window is gone. Please check and decide: respawn or dissolve."
|
|
143
|
+
continue
|
|
144
|
+
fi
|
|
145
|
+
|
|
146
|
+
# Capture pane content
|
|
147
|
+
PANE_FULL=$(tmux capture-pane -t "$TMUX_TARGET" -p -S -100 2>/dev/null || echo "")
|
|
148
|
+
|
|
149
|
+
# Use LLM if configured, otherwise skip (no more regex fallback — too brittle)
|
|
150
|
+
if [ -z "$WATCHDOG_PROVIDER" ]; then
|
|
151
|
+
continue
|
|
152
|
+
fi
|
|
153
|
+
|
|
154
|
+
# Check nudge cooldown (don't analyze more than once per 10 minutes per worker)
|
|
155
|
+
NUDGE_FLAG="$WORKER_DIR/.watchdog-last-nudge"
|
|
156
|
+
if [ -f "$NUDGE_FLAG" ] && [ -z "$(find "$NUDGE_FLAG" -mmin +10 2>/dev/null)" ]; then
|
|
157
|
+
continue
|
|
158
|
+
fi
|
|
159
|
+
|
|
160
|
+
# Ask LLM to analyze the pane
|
|
161
|
+
ANALYSIS=$(llm_analyze "$PANE_FULL" "$WORKER")
|
|
162
|
+
ACTION=$(echo "$ANALYSIS" | jq -r '.action // "none"')
|
|
163
|
+
STATUS=$(echo "$ANALYSIS" | jq -r '.status // "unknown"')
|
|
164
|
+
REASON=$(echo "$ANALYSIS" | jq -r '.reason // ""')
|
|
165
|
+
|
|
166
|
+
case "$ACTION" in
|
|
167
|
+
nudge_reply)
|
|
168
|
+
touch "$NUDGE_FLAG"
|
|
169
|
+
tmux send-keys -t "$TMUX_TARGET" \
|
|
170
|
+
"You appear to have finished your work but haven't sent results to Discord. Use the reply tool from the discord-filtered MCP server to send a summary of what you did and your findings. Use replyWithFile for any file deliverables. The user CANNOT see your terminal output." Enter
|
|
171
|
+
discord_msg "$ORCHESTRATOR_CHANNEL" \
|
|
172
|
+
"[watchdog] Worker **${WORKER}** — $REASON. Nudged it to send results via Discord."
|
|
173
|
+
;;
|
|
174
|
+
nudge_error)
|
|
175
|
+
touch "$NUDGE_FLAG"
|
|
176
|
+
tmux send-keys -t "$TMUX_TARGET" \
|
|
177
|
+
"You encountered an error. Use the reply tool to report this error to the user on Discord, then try to recover or ask for help." Enter
|
|
178
|
+
discord_msg "$ORCHESTRATOR_CHANNEL" \
|
|
179
|
+
"[watchdog] Worker **${WORKER}** — $REASON. Nudged it to report via Discord."
|
|
180
|
+
;;
|
|
181
|
+
nudge_idle)
|
|
182
|
+
touch "$NUDGE_FLAG"
|
|
183
|
+
tmux send-keys -t "$TMUX_TARGET" \
|
|
184
|
+
"You've been idle for a while. If you're done, use the reply tool to send your results to Discord. If you're stuck, use the reply tool to ask for help. The user cannot see your terminal." Enter
|
|
185
|
+
discord_msg "$ORCHESTRATOR_CHANNEL" \
|
|
186
|
+
"[watchdog] Worker **${WORKER}** — $REASON. Nudged it to respond."
|
|
187
|
+
;;
|
|
188
|
+
none|*)
|
|
189
|
+
# Worker is fine (working or already replied) — do nothing
|
|
190
|
+
;;
|
|
191
|
+
esac
|
|
192
|
+
done
|
package/src/plugin/index.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
|
3
|
+
import { execSync } from 'child_process'
|
|
3
4
|
import { createMcpServer } from './mcp-server.js'
|
|
4
5
|
import { createDiscordClient } from './discord-client.js'
|
|
5
6
|
import { MessageBatcher } from './message-batcher.js'
|
|
@@ -7,6 +8,7 @@ import { MessageBatcher } from './message-batcher.js'
|
|
|
7
8
|
const BOT_TOKEN = process.env.DISCORD_BOT_TOKEN
|
|
8
9
|
const CHANNEL_ID = process.env.DISCORD_CHANNEL_ID
|
|
9
10
|
const ALLOWED_USERS: string[] = JSON.parse(process.env.DISCORD_ALLOWED_USERS || '[]')
|
|
11
|
+
const TMUX_TARGET = process.env.TMUX_TARGET || ''
|
|
10
12
|
|
|
11
13
|
if (!BOT_TOKEN) {
|
|
12
14
|
console.error('[discord-filtered] DISCORD_BOT_TOKEN is required')
|
|
@@ -17,13 +19,57 @@ if (!CHANNEL_ID) {
|
|
|
17
19
|
process.exit(1)
|
|
18
20
|
}
|
|
19
21
|
|
|
22
|
+
function sendInterrupt(): boolean {
|
|
23
|
+
if (!TMUX_TARGET) {
|
|
24
|
+
console.error('[discord-filtered] !stop received but TMUX_TARGET not set — cannot interrupt')
|
|
25
|
+
return false
|
|
26
|
+
}
|
|
27
|
+
try {
|
|
28
|
+
// Escape is Claude Code's interrupt key
|
|
29
|
+
execSync(`tmux send-keys -t ${JSON.stringify(TMUX_TARGET)} Escape`, { stdio: 'pipe' })
|
|
30
|
+
console.error(`[discord-filtered] Sent interrupt (Escape) to ${TMUX_TARGET}`)
|
|
31
|
+
return true
|
|
32
|
+
} catch (err) {
|
|
33
|
+
console.error(`[discord-filtered] Failed to send interrupt: ${err}`)
|
|
34
|
+
return false
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
20
38
|
const discord = createDiscordClient(
|
|
21
39
|
{ botToken: BOT_TOKEN, channelId: CHANNEL_ID, allowedUsers: ALLOWED_USERS },
|
|
22
40
|
async (message) => {
|
|
41
|
+
const content = message.content
|
|
42
|
+
const isInterrupt = /^!stop\b/i.test(content)
|
|
43
|
+
|
|
44
|
+
if (isInterrupt) {
|
|
45
|
+
sendInterrupt()
|
|
46
|
+
// Strip the !stop prefix and forward the rest as a normal message
|
|
47
|
+
const rest = content.replace(/^!stop\s*/i, '').trim()
|
|
48
|
+
// React to confirm the interrupt was received
|
|
49
|
+
try { await message.react('🛑') } catch { /* ignore */ }
|
|
50
|
+
// Small delay to let Claude Code process the Escape before the new message arrives
|
|
51
|
+
await new Promise(r => setTimeout(r, 1500))
|
|
52
|
+
// Forward the message (with or without remaining text)
|
|
53
|
+
await mcpServer.notification({
|
|
54
|
+
method: 'notifications/claude/channel',
|
|
55
|
+
params: {
|
|
56
|
+
content: rest || '[interrupted by user]',
|
|
57
|
+
meta: {
|
|
58
|
+
channel_id: message.channel.id,
|
|
59
|
+
sender: message.author.username,
|
|
60
|
+
sender_id: message.author.id,
|
|
61
|
+
message_id: message.id,
|
|
62
|
+
interrupt: true,
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
})
|
|
66
|
+
return
|
|
67
|
+
}
|
|
68
|
+
|
|
23
69
|
await mcpServer.notification({
|
|
24
70
|
method: 'notifications/claude/channel',
|
|
25
71
|
params: {
|
|
26
|
-
content:
|
|
72
|
+
content: content,
|
|
27
73
|
meta: {
|
|
28
74
|
channel_id: message.channel.id,
|
|
29
75
|
sender: message.author.username,
|