onkol 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,3 +17,23 @@ export declare function createChannel(token: string, guildId: string, name: stri
17
17
  }>;
18
18
  export declare function deleteChannel(token: string, channelId: string): Promise<void>;
19
19
  export declare function sendMessage(token: string, channelId: string, content: string): Promise<void>;
20
+ /**
21
+ * Validates the bot token and checks if it can connect to the Discord gateway
22
+ * with the required intents (Guilds, GuildMessages, MessageContent).
23
+ * Returns { ok: true } or { ok: false, error: string }.
24
+ */
25
+ export declare function validateBotToken(token: string): Promise<{
26
+ ok: true;
27
+ } | {
28
+ ok: false;
29
+ error: string;
30
+ }>;
31
+ /**
32
+ * Performs a lightweight check for MessageContent intent by attempting a
33
+ * test gateway connection. Returns a warning message if the intent appears
34
+ * to be disabled, or null if everything looks good.
35
+ *
36
+ * Note: The Discord REST API doesn't expose which intents are enabled.
37
+ * We do a quick WebSocket handshake to the gateway to detect DisallowedIntents.
38
+ */
39
+ export declare function checkGatewayIntents(token: string): Promise<string | null>;
@@ -51,3 +51,105 @@ export async function sendMessage(token, channelId, content) {
51
51
  if (!res.ok)
52
52
  throw new Error(`Failed to send message: ${res.status} ${await res.text()}`);
53
53
  }
54
+ /**
55
+ * Validates the bot token and checks if it can connect to the Discord gateway
56
+ * with the required intents (Guilds, GuildMessages, MessageContent).
57
+ * Returns { ok: true } or { ok: false, error: string }.
58
+ */
59
+ export async function validateBotToken(token) {
60
+ // Step 1: Check the token is valid via /users/@me
61
+ const meRes = await fetch(`${DISCORD_API}/users/@me`, {
62
+ headers: { Authorization: `Bot ${token}` },
63
+ });
64
+ if (!meRes.ok) {
65
+ const body = await meRes.text();
66
+ if (meRes.status === 401)
67
+ return { ok: false, error: 'Invalid bot token.' };
68
+ return { ok: false, error: `Discord API error (${meRes.status}): ${body}` };
69
+ }
70
+ // Step 2: Get the bot's application to check if it's a bot token
71
+ const me = await meRes.json();
72
+ if (!me.bot)
73
+ return { ok: false, error: 'This token belongs to a user account, not a bot.' };
74
+ // Step 3: Try connecting to the gateway with the required intents to check for DisallowedIntents
75
+ // Intents: Guilds (1) | GuildMessages (512) | MessageContent (32768) = 33281
76
+ const gatewayRes = await fetch(`${DISCORD_API}/gateway/bot`, {
77
+ headers: { Authorization: `Bot ${token}` },
78
+ });
79
+ if (!gatewayRes.ok) {
80
+ const body = await gatewayRes.text();
81
+ return { ok: false, error: `Cannot fetch gateway info (${gatewayRes.status}): ${body}` };
82
+ }
83
+ return { ok: true };
84
+ }
85
+ /**
86
+ * Performs a lightweight check for MessageContent intent by attempting a
87
+ * test gateway connection. Returns a warning message if the intent appears
88
+ * to be disabled, or null if everything looks good.
89
+ *
90
+ * Note: The Discord REST API doesn't expose which intents are enabled.
91
+ * We do a quick WebSocket handshake to the gateway to detect DisallowedIntents.
92
+ */
93
+ export function checkGatewayIntents(token) {
94
+ return new Promise(async (resolve) => {
95
+ const timeout = setTimeout(() => resolve(null), 10000); // assume OK if no response in 10s
96
+ try {
97
+ const gatewayRes = await fetch(`${DISCORD_API}/gateway/bot`, {
98
+ headers: { Authorization: `Bot ${token}` },
99
+ });
100
+ if (!gatewayRes.ok) {
101
+ clearTimeout(timeout);
102
+ resolve('Could not fetch gateway URL. Check your bot token.');
103
+ return;
104
+ }
105
+ const { url } = await gatewayRes.json();
106
+ // Dynamic import for WebSocket (works in both Node and Bun)
107
+ const WebSocket = (await import('ws')).default;
108
+ const ws = new WebSocket(`${url}?v=10&encoding=json`);
109
+ ws.on('message', (data) => {
110
+ try {
111
+ const payload = JSON.parse(data.toString());
112
+ if (payload.op === 10) {
113
+ // Send IDENTIFY with the intents we need
114
+ // Guilds=1, GuildMessages=512, MessageContent=32768
115
+ ws.send(JSON.stringify({
116
+ op: 2,
117
+ d: {
118
+ token,
119
+ intents: 1 | 512 | 32768,
120
+ properties: { os: 'linux', browser: 'onkol-setup', device: 'onkol-setup' },
121
+ },
122
+ }));
123
+ }
124
+ else if (payload.op === 0 && payload.t === 'READY') {
125
+ // All good — intents accepted
126
+ ws.close();
127
+ clearTimeout(timeout);
128
+ resolve(null);
129
+ }
130
+ }
131
+ catch { /* ignore parse errors */ }
132
+ });
133
+ ws.on('close', (code) => {
134
+ clearTimeout(timeout);
135
+ if (code === 4014) {
136
+ resolve('MessageContent intent is not enabled for this bot.\n' +
137
+ ' Go to https://discord.com/developers/applications → your bot → Bot settings\n' +
138
+ ' → Privileged Gateway Intents → enable "Message Content Intent" → Save');
139
+ }
140
+ else if (code === 4004) {
141
+ resolve('Invalid bot token (gateway rejected authentication).');
142
+ }
143
+ // Other close codes are fine (we close it ourselves on READY)
144
+ });
145
+ ws.on('error', () => {
146
+ clearTimeout(timeout);
147
+ resolve(null); // network error, don't block setup
148
+ });
149
+ }
150
+ catch {
151
+ clearTimeout(timeout);
152
+ resolve(null);
153
+ }
154
+ });
155
+ }
package/dist/cli/index.js CHANGED
@@ -8,7 +8,7 @@ import { mkdirSync, writeFileSync, readFileSync, copyFileSync, existsSync } from
8
8
  import { resolve } from 'path';
9
9
  import { execSync } from 'child_process';
10
10
  import { runSetupPrompts } from './prompts.js';
11
- import { createCategory, createChannel } from './discord-api.js';
11
+ import { createCategory, createChannel, validateBotToken, checkGatewayIntents } from './discord-api.js';
12
12
  import { discoverServices, formatServicesMarkdown } from './auto-discover.js';
13
13
  import { renderOrchestratorClaude, renderSettings } from './templates.js';
14
14
  import { generateSystemdUnit, generateCrontab } from './systemd.js';
@@ -152,6 +152,25 @@ program
152
152
  if (answers.discordUserId.trim()) {
153
153
  allowedUsers.push(answers.discordUserId.trim());
154
154
  }
155
+ // --- Validate Discord bot token and intents ---
156
+ if (!skip('discord')) {
157
+ console.log(chalk.gray('Validating Discord bot token...'));
158
+ const tokenCheck = await validateBotToken(answers.botToken);
159
+ if (!tokenCheck.ok) {
160
+ console.error(chalk.red(`\nFATAL: ${tokenCheck.error}`));
161
+ console.error(chalk.yellow('\nYour answers have been saved. Fix the issue and run `npx onkol setup` again to resume.'));
162
+ process.exit(1);
163
+ }
164
+ console.log(chalk.green('✓ Bot token is valid'));
165
+ console.log(chalk.gray('Checking gateway intents...'));
166
+ const intentWarning = await checkGatewayIntents(answers.botToken);
167
+ if (intentWarning) {
168
+ console.error(chalk.red(`\nFATAL: ${intentWarning}`));
169
+ console.error(chalk.yellow('\nEnable the required intent and run `npx onkol setup` again to resume.'));
170
+ process.exit(1);
171
+ }
172
+ console.log(chalk.green('✓ Message Content intent is enabled'));
173
+ }
155
174
  // --- CRITICAL: Create Discord category and orchestrator channel ---
156
175
  let categoryId = checkpoint.categoryId || '';
157
176
  let orchChannelId = checkpoint.orchChannelId || '';
@@ -441,30 +460,49 @@ program
441
460
  console.log(chalk.gray('\nStarting orchestrator...'));
442
461
  let started = false;
443
462
  try {
444
- execSync(`sudo systemctl start onkol-${answers.nodeName}`, { stdio: 'pipe' });
445
- // Wait for tmux session to appear
446
- for (let i = 0; i < 10; i++) {
463
+ execSync(`sudo systemctl start onkol-${answers.nodeName}`, { stdio: 'pipe', timeout: 60000 });
464
+ // Wait for tmux session to appear (the start script itself verifies, but double-check)
465
+ for (let i = 0; i < 5; i++) {
447
466
  try {
448
467
  execSync(`tmux has-session -t onkol-${answers.nodeName}`, { stdio: 'pipe' });
449
468
  started = true;
450
469
  break;
451
470
  }
452
471
  catch { /* not ready yet */ }
453
- execSync('sleep 1', { stdio: 'pipe' });
472
+ execSync('sleep 2', { stdio: 'pipe' });
454
473
  }
455
474
  if (started) {
456
475
  console.log(chalk.green(`✓ Orchestrator started via systemd (tmux session "onkol-${answers.nodeName}")`));
457
476
  }
477
+ else {
478
+ // systemctl succeeded but tmux session not visible — likely PATH or env issue
479
+ console.log(chalk.yellow(`⚠ systemctl started but tmux session not found. Trying direct start...`));
480
+ try {
481
+ const logs = execSync(`sudo journalctl -u onkol-${answers.nodeName} --no-pager -n 10 2>&1`, { encoding: 'utf-8' });
482
+ if (logs.trim())
483
+ console.log(chalk.gray(` Journal: ${logs.trim().split('\n').slice(-3).join('\n ')}`));
484
+ }
485
+ catch { /* ignore */ }
486
+ }
487
+ }
488
+ catch (err) {
489
+ const msg = err instanceof Error ? err.message : String(err);
490
+ console.log(chalk.yellow(`⚠ systemctl start failed: ${msg.split('\n')[0]}`));
458
491
  }
459
- catch { /* systemctl start failed, try direct */ }
460
492
  if (!started) {
461
493
  try {
462
- execSync(`bash "${resolve(dir, 'scripts/start-orchestrator.sh')}"`, { stdio: 'pipe' });
494
+ execSync(`bash "${resolve(dir, 'scripts/start-orchestrator.sh')}"`, { stdio: 'pipe', timeout: 60000 });
495
+ // Verify the session is actually running
496
+ execSync(`tmux has-session -t onkol-${answers.nodeName}`, { stdio: 'pipe' });
497
+ started = true;
463
498
  console.log(chalk.green(`✓ Orchestrator started in tmux session "onkol-${answers.nodeName}"`));
464
499
  }
465
500
  catch {
466
- console.log(chalk.yellow(`⚠ Could not start orchestrator automatically.`));
467
- console.log(chalk.yellow(` Start manually: ${dir}/scripts/start-orchestrator.sh`));
501
+ console.log(chalk.red(`✗ Could not start orchestrator. The tmux session failed to stay alive.`));
502
+ console.log(chalk.yellow(` Debug steps:`));
503
+ console.log(chalk.yellow(` 1. Run manually: bash ${dir}/scripts/start-orchestrator.sh`));
504
+ console.log(chalk.yellow(` 2. Check: tmux attach -t onkol-${answers.nodeName}`));
505
+ console.log(chalk.yellow(` 3. Verify claude works: claude --version`));
468
506
  }
469
507
  }
470
508
  // Setup complete — clear checkpoint
@@ -1,15 +1,33 @@
1
1
  export function generateSystemdUnit(nodeName, user, onkolDir) {
2
+ // Resolve PATH additions for claude and bun at generation time
3
+ const homeDir = process.env.HOME || `/home/${user}`;
4
+ const extraPaths = [
5
+ `${homeDir}/.local/bin`,
6
+ `${homeDir}/.bun/bin`,
7
+ ].filter(p => {
8
+ try {
9
+ return require('fs').existsSync(p);
10
+ }
11
+ catch {
12
+ return false;
13
+ }
14
+ });
15
+ const pathEnv = extraPaths.length > 0
16
+ ? `Environment=PATH=${extraPaths.join(':')}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin`
17
+ : '';
2
18
  return `[Unit]
3
19
  Description=Onkol Node: ${nodeName}
4
20
  After=network.target
5
21
 
6
22
  [Service]
7
- Type=forking
23
+ Type=oneshot
24
+ RemainAfterExit=yes
8
25
  User=${user}
26
+ ${pathEnv}
27
+ Environment=HOME=${homeDir}
9
28
  ExecStart=${onkolDir}/scripts/start-orchestrator.sh
10
29
  ExecStop=/usr/bin/tmux kill-session -t onkol-${nodeName}
11
- Restart=on-failure
12
- RestartSec=10
30
+ TimeoutStartSec=60
13
31
 
14
32
  [Install]
15
33
  WantedBy=multi-user.target
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "onkol",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "Decentralized on-call agent system powered by Claude Code",
5
5
  "type": "module",
6
6
  "bin": {
@@ -19,16 +19,18 @@
19
19
  },
20
20
  "dependencies": {
21
21
  "@modelcontextprotocol/sdk": "^1.0.0",
22
+ "chalk": "^5.0.0",
23
+ "commander": "^13.0.0",
22
24
  "discord.js": "^14.0.0",
23
25
  "handlebars": "^4.7.0",
24
26
  "inquirer": "^12.0.0",
25
- "chalk": "^5.0.0",
26
- "commander": "^13.0.0"
27
+ "ws": "^8.20.0"
27
28
  },
28
29
  "devDependencies": {
29
30
  "@types/node": "^22.0.0",
30
- "typescript": "^5.7.0",
31
- "bun-types": "^1.2.0"
31
+ "@types/ws": "^8.18.1",
32
+ "bun-types": "^1.2.0",
33
+ "typescript": "^5.7.0"
32
34
  },
33
35
  "engines": {
34
36
  "node": ">=18.0.0"
@@ -82,7 +82,8 @@ cat > "$WORKER_DIR/.mcp.json" << MCPEOF
82
82
  "env": {
83
83
  "DISCORD_BOT_TOKEN": "$BOT_TOKEN",
84
84
  "DISCORD_CHANNEL_ID": "$CHANNEL_ID",
85
- "DISCORD_ALLOWED_USERS": "$ALLOWED_USERS_ESCAPED"
85
+ "DISCORD_ALLOWED_USERS": "$ALLOWED_USERS_ESCAPED",
86
+ "TMUX_TARGET": "${TMUX_SESSION}:${WORKER_NAME}"
86
87
  }
87
88
  }
88
89
  }
@@ -177,8 +178,13 @@ cat >> "$WORKER_DIR/CLAUDE.md" << STARTEOF
177
178
  Immediately when you start:
178
179
  1. Read $WORKER_DIR/task.md for your task
179
180
  2. Read $WORKER_DIR/context.md for context
180
- 3. Begin work according to your intent
181
- 4. Report progress and results using the reply tool to your Discord channel
181
+ 3. Use the \`reply\` tool to send "Starting work on: <brief task summary>" to Discord
182
+ 4. Begin work send progress updates via \`reply\` every few steps
183
+ 5. When done, send your full results/summary via \`reply\` (split into <2000 char messages)
184
+ 6. For file deliverables, use \`replyWithFile\` to attach them
185
+
186
+ IMPORTANT: The user CANNOT see your terminal. The ONLY way to communicate is the reply tool.
187
+ If you complete work without sending results via reply, the user will never see your output.
182
188
  Do NOT wait for a message. Start working as soon as you boot.
183
189
  STARTEOF
184
190
 
@@ -196,7 +202,7 @@ TMUX_TARGET="${TMUX_SESSION}:${WORKER_NAME}"
196
202
  if echo "\$PANE_CONTENT" | grep -q "^❯"; then
197
203
  # Claude is ready — send the initial prompt via tmux keys
198
204
  sleep 1
199
- tmux send-keys -t "\$TMUX_TARGET" "Read $WORKER_DIR/task.md and $WORKER_DIR/context.md, then begin work per CLAUDE.md." Enter
205
+ tmux send-keys -t "\$TMUX_TARGET" "Read $WORKER_DIR/task.md and $WORKER_DIR/context.md, then begin work. IMPORTANT: You MUST use the reply tool from the discord-filtered MCP server for ALL communication — send a starting message now, progress updates as you work, and final results when done. The user cannot see your terminal." Enter
200
206
  break
201
207
  fi
202
208
  tmux send-keys -t "\$TMUX_TARGET" Enter 2>/dev/null || true
@@ -1,4 +1,6 @@
1
1
  #!/bin/bash
2
+ set -euo pipefail
3
+
2
4
  ONKOL_DIR="$(cd "$(dirname "$0")/.." && pwd)"
3
5
  CONFIG="$ONKOL_DIR/config.json"
4
6
  NODE_NAME=$(jq -r '.nodeName' "$CONFIG")
@@ -9,24 +11,69 @@ if tmux has-session -t "$TMUX_SESSION" 2>/dev/null; then
9
11
  exit 0
10
12
  fi
11
13
 
14
+ # Resolve full paths to binaries — critical for systemd which uses a minimal PATH
15
+ CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "")
16
+ if [ -z "$CLAUDE_BIN" ]; then
17
+ # Check common install locations
18
+ for candidate in "$HOME/.local/bin/claude" /usr/local/bin/claude /usr/bin/claude; do
19
+ if [ -x "$candidate" ]; then
20
+ CLAUDE_BIN="$candidate"
21
+ break
22
+ fi
23
+ done
24
+ fi
25
+ if [ -z "$CLAUDE_BIN" ]; then
26
+ echo "ERROR: claude not found in PATH or common locations." >&2
27
+ exit 1
28
+ fi
29
+
30
+ BUN_BIN=$(command -v bun 2>/dev/null || echo "")
31
+ if [ -z "$BUN_BIN" ]; then
32
+ for candidate in "$HOME/.bun/bin/bun" /usr/local/bin/bun /usr/bin/bun; do
33
+ if [ -x "$candidate" ]; then
34
+ BUN_BIN="$candidate"
35
+ break
36
+ fi
37
+ done
38
+ fi
39
+
40
+ # Build PATH that includes directories for claude and bun so MCP plugins can find them
41
+ EXTRA_PATH=""
42
+ [ -n "$CLAUDE_BIN" ] && EXTRA_PATH="$(dirname "$CLAUDE_BIN")"
43
+ if [ -n "$BUN_BIN" ]; then
44
+ BUN_DIR="$(dirname "$BUN_BIN")"
45
+ if [ -n "$EXTRA_PATH" ]; then
46
+ EXTRA_PATH="$BUN_DIR:$EXTRA_PATH"
47
+ else
48
+ EXTRA_PATH="$BUN_DIR"
49
+ fi
50
+ fi
51
+ FULL_PATH="${EXTRA_PATH:+$EXTRA_PATH:}${PATH}"
52
+
12
53
  tmux new-session -d -s "$TMUX_SESSION" \
13
- "cd '$ONKOL_DIR' && claude \
54
+ "export PATH='$FULL_PATH'; cd '$ONKOL_DIR' && '$CLAUDE_BIN' \
14
55
  --dangerously-skip-permissions \
15
56
  --dangerously-load-development-channels server:discord-filtered \
16
- --mcp-config '$ONKOL_DIR/.mcp.json'"
57
+ --mcp-config '$ONKOL_DIR/.mcp.json'; echo 'Claude exited with code '\$?'. Press Enter to close.'; read"
58
+
59
+ # Verify the session actually started and stayed alive
60
+ sleep 2
61
+ if ! tmux has-session -t "$TMUX_SESSION" 2>/dev/null; then
62
+ echo "ERROR: tmux session '$TMUX_SESSION' died immediately after creation." >&2
63
+ echo "Check that claude is working: $CLAUDE_BIN --version" >&2
64
+ exit 1
65
+ fi
17
66
 
18
67
  # Auto-accept interactive prompts (trust dialog + dev channels warning)
19
- # Background loop sends Enter every 2 seconds until claude reaches the prompt
20
- (
21
- for i in $(seq 1 10); do
22
- sleep 2
23
- PANE_CONTENT=$(tmux capture-pane -t "$TMUX_SESSION" -p 2>/dev/null || echo "")
24
- if echo "$PANE_CONTENT" | grep -q "^❯"; then
25
- break
26
- fi
27
- tmux send-keys -t "$TMUX_SESSION" Enter 2>/dev/null || true
28
- done
29
- ) &
68
+ # Sends Enter every 2 seconds until claude reaches the prompt
69
+ for i in $(seq 1 10); do
70
+ sleep 2
71
+ PANE_CONTENT=$(tmux capture-pane -t "$TMUX_SESSION" -p 2>/dev/null || echo "")
72
+ if echo "$PANE_CONTENT" | grep -q "^❯"; then
73
+ break
74
+ fi
75
+ tmux send-keys -t "$TMUX_SESSION" Enter 2>/dev/null || true
76
+ done
30
77
 
31
78
  echo "Orchestrator started in tmux session '$TMUX_SESSION'."
32
79
  echo "Attach with: tmux attach -t $TMUX_SESSION"
@@ -0,0 +1,192 @@
1
+ #!/bin/bash
2
+ # Worker watchdog — runs periodically to check on active workers.
3
+ # Uses an LLM to analyze tmux pane content instead of brittle regex.
4
+ # Falls back to basic checks if no LLM is configured.
5
+
6
+ set -uo pipefail
7
+
8
+ ONKOL_DIR="$(cd "$(dirname "$0")/.." && pwd)"
9
+ CONFIG="$ONKOL_DIR/config.json"
10
+ TRACKING="$ONKOL_DIR/workers/tracking.json"
11
+
12
+ if [ ! -f "$TRACKING" ] || [ "$(jq length "$TRACKING")" -eq 0 ]; then
13
+ exit 0
14
+ fi
15
+
16
+ BOT_TOKEN=$(jq -r '.botToken' "$CONFIG")
17
+ ORCHESTRATOR_CHANNEL=$(jq -r '.orchestratorChannelId' "$CONFIG")
18
+ NODE_NAME=$(jq -r '.nodeName' "$CONFIG")
19
+ TMUX_SESSION="onkol-${NODE_NAME}"
20
+
21
+ # Watchdog LLM config
22
+ WATCHDOG_PROVIDER=$(jq -r '.watchdog.provider // empty' "$CONFIG")
23
+ WATCHDOG_MODEL=$(jq -r '.watchdog.model // empty' "$CONFIG")
24
+ WATCHDOG_API_KEY=$(jq -r '.watchdog.apiKey // empty' "$CONFIG")
25
+
26
+ WINDOWS=$(tmux list-windows -t "$TMUX_SESSION" -F '#{window_name}' 2>/dev/null || echo "")
27
+
28
+ discord_msg() {
29
+ local channel="$1" text="$2"
30
+ curl -s -X POST \
31
+ "https://discord.com/api/v10/channels/${channel}/messages" \
32
+ -H "Authorization: Bot ${BOT_TOKEN}" \
33
+ -H "Content-Type: application/json" \
34
+ -d "{\"content\": $(echo "$text" | jq -Rs .)}" \
35
+ > /dev/null 2>&1
36
+ }
37
+
38
+ # Call LLM to analyze worker pane content.
39
+ # Returns a JSON object: {"status": "...", "action": "...", "message": "..."}
40
+ # status: working | done_replied | done_silent | error | idle | unknown
41
+ # action: none | nudge_reply | nudge_error | nudge_idle | alert_orchestrator
42
+ llm_analyze() {
43
+ local pane_content="$1"
44
+ local worker_name="$2"
45
+
46
+ # Determine API endpoint and headers based on provider
47
+ local api_url=""
48
+ local auth_header=""
49
+ local model="$WATCHDOG_MODEL"
50
+
51
+ case "$WATCHDOG_PROVIDER" in
52
+ openrouter)
53
+ api_url="https://openrouter.ai/api/v1/chat/completions"
54
+ auth_header="Authorization: Bearer ${WATCHDOG_API_KEY}"
55
+ ;;
56
+ gemini)
57
+ api_url="https://generativelanguage.googleapis.com/v1beta/openai/chat/completions"
58
+ auth_header="Authorization: Bearer ${WATCHDOG_API_KEY}"
59
+ ;;
60
+ custom)
61
+ api_url=$(jq -r '.watchdog.apiUrl // empty' "$CONFIG")
62
+ auth_header="Authorization: Bearer ${WATCHDOG_API_KEY}"
63
+ ;;
64
+ *)
65
+ echo '{"status":"unknown","action":"none","message":"no llm configured"}'
66
+ return
67
+ ;;
68
+ esac
69
+
70
+ if [ -z "$api_url" ] || [ -z "$WATCHDOG_API_KEY" ]; then
71
+ echo '{"status":"unknown","action":"none","message":"missing api config"}'
72
+ return
73
+ fi
74
+
75
+ local sys_prompt="You analyze Claude Code terminal output to determine a worker's state. Respond with ONLY a JSON object, no markdown fences.
76
+
77
+ Keys:
78
+ - status: one of: working, done_replied, done_silent, error, idle
79
+ - action: one of: none, nudge_reply, nudge_error, nudge_idle
80
+ - reason: one short sentence explaining your assessment
81
+
82
+ Rules:
83
+ - working: Claude is actively executing tools, thinking, or generating output. Action: none
84
+ - done_replied: Worker finished AND used the discord-filtered reply MCP tool (you'll see 'discord-filtered - reply (MCP)' with result 'sent'). Action: none
85
+ - done_silent: Worker finished work (wrote files, completed analysis, etc.) but NEVER used the reply MCP tool to send results to Discord. Action: nudge_reply
86
+ - error: Worker hit a fatal error and stopped (Traceback, FATAL, crash at the prompt). Action: nudge_error. Note: errors from EARLIER that the worker recovered from do NOT count.
87
+ - idle: Worker is sitting at the prompt with no clear completion or error. Action: nudge_idle"
88
+
89
+ # Use jq to build the payload — handles all JSON escaping correctly
90
+ local payload
91
+ payload=$(jq -n \
92
+ --arg model "$model" \
93
+ --arg sys "$sys_prompt" \
94
+ --arg user "Worker name: ${worker_name}
95
+
96
+ Terminal output (last 100 lines):
97
+ ${pane_content}" \
98
+ '{
99
+ model: $model,
100
+ messages: [
101
+ {role: "system", content: $sys},
102
+ {role: "user", content: $user}
103
+ ],
104
+ temperature: 0,
105
+ max_tokens: 150
106
+ }')
107
+
108
+ local response
109
+ response=$(curl -s -m 15 "$api_url" \
110
+ -H "$auth_header" \
111
+ -H "Content-Type: application/json" \
112
+ -d "$payload" 2>/dev/null)
113
+
114
+ # Extract the content from the response
115
+ local content
116
+ content=$(echo "$response" | jq -r '.choices[0].message.content // empty' 2>/dev/null)
117
+
118
+ if [ -z "$content" ]; then
119
+ echo '{"status":"unknown","action":"none","message":"llm call failed"}'
120
+ return
121
+ fi
122
+
123
+ # Strip markdown fences if present
124
+ content=$(echo "$content" | sed 's/^```json//; s/^```//; s/```$//' | tr -d '\n')
125
+
126
+ # Validate it's valid JSON
127
+ if echo "$content" | jq . >/dev/null 2>&1; then
128
+ echo "$content"
129
+ else
130
+ echo '{"status":"unknown","action":"none","message":"invalid llm response"}'
131
+ fi
132
+ }
133
+
134
+ jq -r '.[] | select(.status == "active") | .name' "$TRACKING" | while read -r WORKER; do
135
+ WORKER_DIR="$ONKOL_DIR/workers/$WORKER"
136
+ WORKER_CHANNEL=$(jq -r ".[] | select(.name == \"$WORKER\") | .channelId" "$TRACKING")
137
+ TMUX_TARGET="${TMUX_SESSION}:${WORKER}"
138
+
139
+ # Case 1: tmux window is gone — worker crashed (no LLM needed)
140
+ if ! echo "$WINDOWS" | grep -q "^${WORKER}$"; then
141
+ discord_msg "$ORCHESTRATOR_CHANNEL" \
142
+ "[watchdog] Worker **${WORKER}** has crashed — its tmux window is gone. Please check and decide: respawn or dissolve."
143
+ continue
144
+ fi
145
+
146
+ # Capture pane content
147
+ PANE_FULL=$(tmux capture-pane -t "$TMUX_TARGET" -p -S -100 2>/dev/null || echo "")
148
+
149
+ # Use LLM if configured, otherwise skip (no more regex fallback — too brittle)
150
+ if [ -z "$WATCHDOG_PROVIDER" ]; then
151
+ continue
152
+ fi
153
+
154
+ # Check nudge cooldown (don't analyze more than once per 10 minutes per worker)
155
+ NUDGE_FLAG="$WORKER_DIR/.watchdog-last-nudge"
156
+ if [ -f "$NUDGE_FLAG" ] && [ -z "$(find "$NUDGE_FLAG" -mmin +10 2>/dev/null)" ]; then
157
+ continue
158
+ fi
159
+
160
+ # Ask LLM to analyze the pane
161
+ ANALYSIS=$(llm_analyze "$PANE_FULL" "$WORKER")
162
+ ACTION=$(echo "$ANALYSIS" | jq -r '.action // "none"')
163
+ STATUS=$(echo "$ANALYSIS" | jq -r '.status // "unknown"')
164
+ REASON=$(echo "$ANALYSIS" | jq -r '.reason // ""')
165
+
166
+ case "$ACTION" in
167
+ nudge_reply)
168
+ touch "$NUDGE_FLAG"
169
+ tmux send-keys -t "$TMUX_TARGET" \
170
+ "You appear to have finished your work but haven't sent results to Discord. Use the reply tool from the discord-filtered MCP server to send a summary of what you did and your findings. Use replyWithFile for any file deliverables. The user CANNOT see your terminal output." Enter
171
+ discord_msg "$ORCHESTRATOR_CHANNEL" \
172
+ "[watchdog] Worker **${WORKER}** — $REASON. Nudged it to send results via Discord."
173
+ ;;
174
+ nudge_error)
175
+ touch "$NUDGE_FLAG"
176
+ tmux send-keys -t "$TMUX_TARGET" \
177
+ "You encountered an error. Use the reply tool to report this error to the user on Discord, then try to recover or ask for help." Enter
178
+ discord_msg "$ORCHESTRATOR_CHANNEL" \
179
+ "[watchdog] Worker **${WORKER}** — $REASON. Nudged it to report via Discord."
180
+ ;;
181
+ nudge_idle)
182
+ touch "$NUDGE_FLAG"
183
+ tmux send-keys -t "$TMUX_TARGET" \
184
+ "You've been idle for a while. If you're done, use the reply tool to send your results to Discord. If you're stuck, use the reply tool to ask for help. The user cannot see your terminal." Enter
185
+ discord_msg "$ORCHESTRATOR_CHANNEL" \
186
+ "[watchdog] Worker **${WORKER}** — $REASON. Nudged it to respond."
187
+ ;;
188
+ none|*)
189
+ # Worker is fine (working or already replied) — do nothing
190
+ ;;
191
+ esac
192
+ done
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env bun
2
2
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
3
+ import { execSync } from 'child_process'
3
4
  import { createMcpServer } from './mcp-server.js'
4
5
  import { createDiscordClient } from './discord-client.js'
5
6
  import { MessageBatcher } from './message-batcher.js'
@@ -7,6 +8,7 @@ import { MessageBatcher } from './message-batcher.js'
7
8
  const BOT_TOKEN = process.env.DISCORD_BOT_TOKEN
8
9
  const CHANNEL_ID = process.env.DISCORD_CHANNEL_ID
9
10
  const ALLOWED_USERS: string[] = JSON.parse(process.env.DISCORD_ALLOWED_USERS || '[]')
11
+ const TMUX_TARGET = process.env.TMUX_TARGET || ''
10
12
 
11
13
  if (!BOT_TOKEN) {
12
14
  console.error('[discord-filtered] DISCORD_BOT_TOKEN is required')
@@ -17,13 +19,57 @@ if (!CHANNEL_ID) {
17
19
  process.exit(1)
18
20
  }
19
21
 
22
+ function sendInterrupt(): boolean {
23
+ if (!TMUX_TARGET) {
24
+ console.error('[discord-filtered] !stop received but TMUX_TARGET not set — cannot interrupt')
25
+ return false
26
+ }
27
+ try {
28
+ // Escape is Claude Code's interrupt key
29
+ execSync(`tmux send-keys -t ${JSON.stringify(TMUX_TARGET)} Escape`, { stdio: 'pipe' })
30
+ console.error(`[discord-filtered] Sent interrupt (Escape) to ${TMUX_TARGET}`)
31
+ return true
32
+ } catch (err) {
33
+ console.error(`[discord-filtered] Failed to send interrupt: ${err}`)
34
+ return false
35
+ }
36
+ }
37
+
20
38
  const discord = createDiscordClient(
21
39
  { botToken: BOT_TOKEN, channelId: CHANNEL_ID, allowedUsers: ALLOWED_USERS },
22
40
  async (message) => {
41
+ const content = message.content
42
+ const isInterrupt = /^!stop\b/i.test(content)
43
+
44
+ if (isInterrupt) {
45
+ sendInterrupt()
46
+ // Strip the !stop prefix and forward the rest as a normal message
47
+ const rest = content.replace(/^!stop\s*/i, '').trim()
48
+ // React to confirm the interrupt was received
49
+ try { await message.react('🛑') } catch { /* ignore */ }
50
+ // Small delay to let Claude Code process the Escape before the new message arrives
51
+ await new Promise(r => setTimeout(r, 1500))
52
+ // Forward the message (with or without remaining text)
53
+ await mcpServer.notification({
54
+ method: 'notifications/claude/channel',
55
+ params: {
56
+ content: rest || '[interrupted by user]',
57
+ meta: {
58
+ channel_id: message.channel.id,
59
+ sender: message.author.username,
60
+ sender_id: message.author.id,
61
+ message_id: message.id,
62
+ interrupt: true,
63
+ },
64
+ },
65
+ })
66
+ return
67
+ }
68
+
23
69
  await mcpServer.notification({
24
70
  method: 'notifications/claude/channel',
25
71
  params: {
26
- content: message.content,
72
+ content: content,
27
73
  meta: {
28
74
  channel_id: message.channel.id,
29
75
  sender: message.author.username,