npm - onkol - Versions diffs - 0.2.0 → 0.4.0 - Mend

onkol 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/cli/discord-api.d.ts +20 -0
package/dist/cli/discord-api.js +102 -0
package/dist/cli/index.js +122 -6
package/dist/cli/systemd.js +21 -3
package/package.json +7 -5
package/scripts/spawn-worker.sh +10 -4
package/scripts/start-orchestrator.sh +60 -13
package/scripts/worker-watchdog.sh +192 -0
package/src/plugin/index.ts +47 -1

package/dist/cli/discord-api.d.ts CHANGED Viewed

@@ -17,3 +17,23 @@ export declare function createChannel(token: string, guildId: string, name: stri
 }>;
 export declare function deleteChannel(token: string, channelId: string): Promise<void>;
 export declare function sendMessage(token: string, channelId: string, content: string): Promise<void>;
+/**
+ * Validates the bot token and checks if it can connect to the Discord gateway
+ * with the required intents (Guilds, GuildMessages, MessageContent).
+ * Returns { ok: true } or { ok: false, error: string }.
+ */
+export declare function validateBotToken(token: string): Promise<{
+    ok: true;
+} | {
+    ok: false;
+    error: string;
+}>;
+/**
+ * Performs a lightweight check for MessageContent intent by attempting a
+ * test gateway connection. Returns a warning message if the intent appears
+ * to be disabled, or null if everything looks good.
+ *
+ * Note: The Discord REST API doesn't expose which intents are enabled.
+ * We do a quick WebSocket handshake to the gateway to detect DisallowedIntents.
+ */
+export declare function checkGatewayIntents(token: string): Promise<string | null>;

package/dist/cli/discord-api.js CHANGED Viewed

@@ -51,3 +51,105 @@ export async function sendMessage(token, channelId, content) {
     if (!res.ok)
         throw new Error(`Failed to send message: ${res.status} ${await res.text()}`);
 }
+/**
+ * Validates the bot token and checks if it can connect to the Discord gateway
+ * with the required intents (Guilds, GuildMessages, MessageContent).
+ * Returns { ok: true } or { ok: false, error: string }.
+ */
+export async function validateBotToken(token) {
+    // Step 1: Check the token is valid via /users/@me
+    const meRes = await fetch(`${DISCORD_API}/users/@me`, {
+        headers: { Authorization: `Bot ${token}` },
+    });
+    if (!meRes.ok) {
+        const body = await meRes.text();
+        if (meRes.status === 401)
+            return { ok: false, error: 'Invalid bot token.' };
+        return { ok: false, error: `Discord API error (${meRes.status}): ${body}` };
+    }
+    // Step 2: Get the bot's application to check if it's a bot token
+    const me = await meRes.json();
+    if (!me.bot)
+        return { ok: false, error: 'This token belongs to a user account, not a bot.' };
+    // Step 3: Try connecting to the gateway with the required intents to check for DisallowedIntents
+    // Intents: Guilds (1) | GuildMessages (512) | MessageContent (32768) = 33281
+    const gatewayRes = await fetch(`${DISCORD_API}/gateway/bot`, {
+        headers: { Authorization: `Bot ${token}` },
+    });
+    if (!gatewayRes.ok) {
+        const body = await gatewayRes.text();
+        return { ok: false, error: `Cannot fetch gateway info (${gatewayRes.status}): ${body}` };
+    }
+    return { ok: true };
+}
+/**
+ * Performs a lightweight check for MessageContent intent by attempting a
+ * test gateway connection. Returns a warning message if the intent appears
+ * to be disabled, or null if everything looks good.
+ *
+ * Note: The Discord REST API doesn't expose which intents are enabled.
+ * We do a quick WebSocket handshake to the gateway to detect DisallowedIntents.
+ */
+export function checkGatewayIntents(token) {
+    return new Promise(async (resolve) => {
+        const timeout = setTimeout(() => resolve(null), 10000); // assume OK if no response in 10s
+        try {
+            const gatewayRes = await fetch(`${DISCORD_API}/gateway/bot`, {
+                headers: { Authorization: `Bot ${token}` },
+            });
+            if (!gatewayRes.ok) {
+                clearTimeout(timeout);
+                resolve('Could not fetch gateway URL. Check your bot token.');
+                return;
+            }
+            const { url } = await gatewayRes.json();
+            // Dynamic import for WebSocket (works in both Node and Bun)
+            const WebSocket = (await import('ws')).default;
+            const ws = new WebSocket(`${url}?v=10&encoding=json`);
+            ws.on('message', (data) => {
+                try {
+                    const payload = JSON.parse(data.toString());
+                    if (payload.op === 10) {
+                        // Send IDENTIFY with the intents we need
+                        // Guilds=1, GuildMessages=512, MessageContent=32768
+                        ws.send(JSON.stringify({
+                            op: 2,
+                            d: {
+                                token,
+                                intents: 1 | 512 | 32768,
+                                properties: { os: 'linux', browser: 'onkol-setup', device: 'onkol-setup' },
+                            },
+                        }));
+                    }
+                    else if (payload.op === 0 && payload.t === 'READY') {
+                        // All good — intents accepted
+                        ws.close();
+                        clearTimeout(timeout);
+                        resolve(null);
+                    }
+                }
+                catch { /* ignore parse errors */ }
+            });
+            ws.on('close', (code) => {
+                clearTimeout(timeout);
+                if (code === 4014) {
+                    resolve('MessageContent intent is not enabled for this bot.\n' +
+                        '    Go to https://discord.com/developers/applications → your bot → Bot settings\n' +
+                        '    → Privileged Gateway Intents → enable "Message Content Intent" → Save');
+                }
+                else if (code === 4004) {
+                    resolve('Invalid bot token (gateway rejected authentication).');
+                }
+                // Other close codes are fine (we close it ourselves on READY)
+            });
+            ws.on('error', () => {
+                clearTimeout(timeout);
+                resolve(null); // network error, don't block setup
+            });
+        }
+        catch {
+            clearTimeout(timeout);
+            resolve(null);
+        }
+    });
+}

package/dist/cli/index.js CHANGED Viewed

@@ -8,7 +8,7 @@ import { mkdirSync, writeFileSync, readFileSync, copyFileSync, existsSync } from
 import { resolve } from 'path';
 import { execSync } from 'child_process';
 import { runSetupPrompts } from './prompts.js';
-import { createCategory, createChannel } from './discord-api.js';
+import { createCategory, createChannel, validateBotToken, checkGatewayIntents } from './discord-api.js';
 import { discoverServices, formatServicesMarkdown } from './auto-discover.js';
 import { renderOrchestratorClaude, renderSettings } from './templates.js';
 import { generateSystemdUnit, generateCrontab } from './systemd.js';
@@ -42,11 +42,69 @@ function markStep(homeDir, checkpoint, step) {
     checkpoint.completed.push(step);
     saveCheckpoint(homeDir, checkpoint);
 }
+function checkDependencies() {
+    console.log(chalk.bold('Checking dependencies...\n'));
+    const deps = [
+        {
+            name: 'claude',
+            check: 'claude --version',
+            installHint: 'Install Claude Code: https://docs.anthropic.com/en/docs/claude-code/getting-started',
+            required: true,
+        },
+        {
+            name: 'bun',
+            check: 'bun --version',
+            installHint: 'Install Bun: curl -fsSL https://bun.sh/install | bash',
+            required: true,
+        },
+        {
+            name: 'tmux',
+            check: 'tmux -V',
+            installHint: 'Install tmux:\n    Ubuntu/Debian: sudo apt install tmux\n    RHEL/CentOS:  sudo yum install tmux\n    Arch:         sudo pacman -S tmux\n    macOS:        brew install tmux',
+            required: true,
+        },
+        {
+            name: 'jq',
+            check: 'jq --version',
+            installHint: 'Install jq:\n    Ubuntu/Debian: sudo apt install jq\n    RHEL/CentOS:  sudo yum install jq\n    Arch:         sudo pacman -S jq\n    macOS:        brew install jq',
+            required: true,
+        },
+        {
+            name: 'curl',
+            check: 'curl --version',
+            installHint: 'Install curl:\n    Ubuntu/Debian: sudo apt install curl\n    RHEL/CentOS:  sudo yum install curl',
+            required: true,
+        },
+    ];
+    const missing = [];
+    for (const dep of deps) {
+        try {
+            execSync(dep.check, { stdio: 'pipe' });
+            console.log(chalk.green(`  ✓ ${dep.name}`));
+        }
+        catch {
+            console.log(chalk.red(`  ✗ ${dep.name} — not found`));
+            missing.push(dep);
+        }
+    }
+    if (missing.length > 0) {
+        console.log(chalk.red(`\nMissing ${missing.length} required dependencies:\n`));
+        for (const dep of missing) {
+            console.log(chalk.yellow(`  ${dep.name}:`));
+            console.log(chalk.gray(`    ${dep.installHint}\n`));
+        }
+        console.log(chalk.red('Install the missing dependencies and run `npx onkol setup` again.'));
+        process.exit(1);
+    }
+    console.log(chalk.green('\n  All dependencies found.\n'));
+}
 program
     .command('setup')
     .description('Set up an Onkol node on this VM')
     .action(async () => {
     console.log(chalk.bold('\nWelcome to Onkol Setup\n'));
+    // Check all dependencies before doing anything
+    checkDependencies();
     const homeDir = process.env.HOME || '/root';
     let answers;
     let checkpoint;
@@ -94,6 +152,25 @@ program
     if (answers.discordUserId.trim()) {
         allowedUsers.push(answers.discordUserId.trim());
     }
+    // --- Validate Discord bot token and intents ---
+    if (!skip('discord')) {
+        console.log(chalk.gray('Validating Discord bot token...'));
+        const tokenCheck = await validateBotToken(answers.botToken);
+        if (!tokenCheck.ok) {
+            console.error(chalk.red(`\nFATAL: ${tokenCheck.error}`));
+            console.error(chalk.yellow('\nYour answers have been saved. Fix the issue and run `npx onkol setup` again to resume.'));
+            process.exit(1);
+        }
+        console.log(chalk.green('✓ Bot token is valid'));
+        console.log(chalk.gray('Checking gateway intents...'));
+        const intentWarning = await checkGatewayIntents(answers.botToken);
+        if (intentWarning) {
+            console.error(chalk.red(`\nFATAL: ${intentWarning}`));
+            console.error(chalk.yellow('\nEnable the required intent and run `npx onkol setup` again to resume.'));
+            process.exit(1);
+        }
+        console.log(chalk.green('✓ Message Content intent is enabled'));
+    }
     // --- CRITICAL: Create Discord category and orchestrator channel ---
     let categoryId = checkpoint.categoryId || '';
     let orchChannelId = checkpoint.orchChannelId || '';
@@ -379,15 +456,54 @@ program
         }
         catch { /* ignore */ }
     }
-    // Start orchestrator
+    // Start orchestrator — try systemctl first (so service shows active), fall back to script
     console.log(chalk.gray('\nStarting orchestrator...'));
+    let started = false;
     try {
-        execSync(`bash "${resolve(dir, 'scripts/start-orchestrator.sh')}"`, { stdio: 'pipe' });
-        console.log(chalk.green(`✓ Orchestrator started in tmux session "onkol-${answers.nodeName}"`));
+        execSync(`sudo systemctl start onkol-${answers.nodeName}`, { stdio: 'pipe', timeout: 60000 });
+        // Wait for tmux session to appear (the start script itself verifies, but double-check)
+        for (let i = 0; i < 5; i++) {
+            try {
+                execSync(`tmux has-session -t onkol-${answers.nodeName}`, { stdio: 'pipe' });
+                started = true;
+                break;
+            }
+            catch { /* not ready yet */ }
+            execSync('sleep 2', { stdio: 'pipe' });
+        }
+        if (started) {
+            console.log(chalk.green(`✓ Orchestrator started via systemd (tmux session "onkol-${answers.nodeName}")`));
+        }
+        else {
+            // systemctl succeeded but tmux session not visible — likely PATH or env issue
+            console.log(chalk.yellow(`⚠ systemctl started but tmux session not found. Trying direct start...`));
+            try {
+                const logs = execSync(`sudo journalctl -u onkol-${answers.nodeName} --no-pager -n 10 2>&1`, { encoding: 'utf-8' });
+                if (logs.trim())
+                    console.log(chalk.gray(`  Journal: ${logs.trim().split('\n').slice(-3).join('\n  ')}`));
+            }
+            catch { /* ignore */ }
+        }
     }
     catch (err) {
-        console.log(chalk.yellow(`⚠ Could not start orchestrator automatically.`));
-        console.log(chalk.yellow(`  Start manually: ${dir}/scripts/start-orchestrator.sh`));
+        const msg = err instanceof Error ? err.message : String(err);
+        console.log(chalk.yellow(`⚠ systemctl start failed: ${msg.split('\n')[0]}`));
+    }
+    if (!started) {
+        try {
+            execSync(`bash "${resolve(dir, 'scripts/start-orchestrator.sh')}"`, { stdio: 'pipe', timeout: 60000 });
+            // Verify the session is actually running
+            execSync(`tmux has-session -t onkol-${answers.nodeName}`, { stdio: 'pipe' });
+            started = true;
+            console.log(chalk.green(`✓ Orchestrator started in tmux session "onkol-${answers.nodeName}"`));
+        }
+        catch {
+            console.log(chalk.red(`✗ Could not start orchestrator. The tmux session failed to stay alive.`));
+            console.log(chalk.yellow(`  Debug steps:`));
+            console.log(chalk.yellow(`    1. Run manually: bash ${dir}/scripts/start-orchestrator.sh`));
+            console.log(chalk.yellow(`    2. Check: tmux attach -t onkol-${answers.nodeName}`));
+            console.log(chalk.yellow(`    3. Verify claude works: claude --version`));
+        }
     }
     // Setup complete — clear checkpoint
     clearCheckpoint(homeDir);

package/dist/cli/systemd.js CHANGED Viewed

@@ -1,15 +1,33 @@
 export function generateSystemdUnit(nodeName, user, onkolDir) {
+    // Resolve PATH additions for claude and bun at generation time
+    const homeDir = process.env.HOME || `/home/${user}`;
+    const extraPaths = [
+        `${homeDir}/.local/bin`,
+        `${homeDir}/.bun/bin`,
+    ].filter(p => {
+        try {
+            return require('fs').existsSync(p);
+        }
+        catch {
+            return false;
+        }
+    });
+    const pathEnv = extraPaths.length > 0
+        ? `Environment=PATH=${extraPaths.join(':')}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin`
+        : '';
     return `[Unit]
 Description=Onkol Node: ${nodeName}
 After=network.target
 [Service]
-Type=forking
+Type=oneshot
+RemainAfterExit=yes
 User=${user}
+${pathEnv}
+Environment=HOME=${homeDir}
 ExecStart=${onkolDir}/scripts/start-orchestrator.sh
 ExecStop=/usr/bin/tmux kill-session -t onkol-${nodeName}
-Restart=on-failure
-RestartSec=10
+TimeoutStartSec=60
 [Install]
 WantedBy=multi-user.target

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "onkol",
-  "version": "0.2.0",
+  "version": "0.4.0",
   "description": "Decentralized on-call agent system powered by Claude Code",
   "type": "module",
   "bin": {
@@ -19,16 +19,18 @@
   },
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.0.0",
+    "chalk": "^5.0.0",
+    "commander": "^13.0.0",
     "discord.js": "^14.0.0",
     "handlebars": "^4.7.0",
     "inquirer": "^12.0.0",
-    "chalk": "^5.0.0",
-    "commander": "^13.0.0"
+    "ws": "^8.20.0"
   },
   "devDependencies": {
     "@types/node": "^22.0.0",
-    "typescript": "^5.7.0",
-    "bun-types": "^1.2.0"
+    "@types/ws": "^8.18.1",
+    "bun-types": "^1.2.0",
+    "typescript": "^5.7.0"
   },
   "engines": {
     "node": ">=18.0.0"

package/scripts/spawn-worker.sh CHANGED Viewed

@@ -82,7 +82,8 @@ cat > "$WORKER_DIR/.mcp.json" << MCPEOF
       "env": {
         "DISCORD_BOT_TOKEN": "$BOT_TOKEN",
         "DISCORD_CHANNEL_ID": "$CHANNEL_ID",
-        "DISCORD_ALLOWED_USERS": "$ALLOWED_USERS_ESCAPED"
+        "DISCORD_ALLOWED_USERS": "$ALLOWED_USERS_ESCAPED",
+        "TMUX_TARGET": "${TMUX_SESSION}:${WORKER_NAME}"
       }
     }
   }
@@ -177,8 +178,13 @@ cat >> "$WORKER_DIR/CLAUDE.md" << STARTEOF
 Immediately when you start:
 1. Read $WORKER_DIR/task.md for your task
 2. Read $WORKER_DIR/context.md for context
-3. Begin work according to your intent
-4. Report progress and results using the reply tool to your Discord channel
+3. Use the \`reply\` tool to send "Starting work on: <brief task summary>" to Discord
+4. Begin work — send progress updates via \`reply\` every few steps
+5. When done, send your full results/summary via \`reply\` (split into <2000 char messages)
+6. For file deliverables, use \`replyWithFile\` to attach them
+IMPORTANT: The user CANNOT see your terminal. The ONLY way to communicate is the reply tool.
+If you complete work without sending results via reply, the user will never see your output.
 Do NOT wait for a message. Start working as soon as you boot.
 STARTEOF
@@ -196,7 +202,7 @@ TMUX_TARGET="${TMUX_SESSION}:${WORKER_NAME}"
     if echo "\$PANE_CONTENT" | grep -q "^❯"; then
       # Claude is ready — send the initial prompt via tmux keys
       sleep 1
-      tmux send-keys -t "\$TMUX_TARGET" "Read $WORKER_DIR/task.md and $WORKER_DIR/context.md, then begin work per CLAUDE.md." Enter
+      tmux send-keys -t "\$TMUX_TARGET" "Read $WORKER_DIR/task.md and $WORKER_DIR/context.md, then begin work. IMPORTANT: You MUST use the reply tool from the discord-filtered MCP server for ALL communication — send a starting message now, progress updates as you work, and final results when done. The user cannot see your terminal." Enter
       break
     fi
     tmux send-keys -t "\$TMUX_TARGET" Enter 2>/dev/null || true

package/scripts/start-orchestrator.sh CHANGED Viewed

@@ -1,4 +1,6 @@
 #!/bin/bash
+set -euo pipefail
 ONKOL_DIR="$(cd "$(dirname "$0")/.." && pwd)"
 CONFIG="$ONKOL_DIR/config.json"
 NODE_NAME=$(jq -r '.nodeName' "$CONFIG")
@@ -9,24 +11,69 @@ if tmux has-session -t "$TMUX_SESSION" 2>/dev/null; then
   exit 0
 fi
+# Resolve full paths to binaries — critical for systemd which uses a minimal PATH
+CLAUDE_BIN=$(command -v claude 2>/dev/null || echo "")
+if [ -z "$CLAUDE_BIN" ]; then
+  # Check common install locations
+  for candidate in "$HOME/.local/bin/claude" /usr/local/bin/claude /usr/bin/claude; do
+    if [ -x "$candidate" ]; then
+      CLAUDE_BIN="$candidate"
+      break
+    fi
+  done
+fi
+if [ -z "$CLAUDE_BIN" ]; then
+  echo "ERROR: claude not found in PATH or common locations." >&2
+  exit 1
+fi
+BUN_BIN=$(command -v bun 2>/dev/null || echo "")
+if [ -z "$BUN_BIN" ]; then
+  for candidate in "$HOME/.bun/bin/bun" /usr/local/bin/bun /usr/bin/bun; do
+    if [ -x "$candidate" ]; then
+      BUN_BIN="$candidate"
+      break
+    fi
+  done
+fi
+# Build PATH that includes directories for claude and bun so MCP plugins can find them
+EXTRA_PATH=""
+[ -n "$CLAUDE_BIN" ] && EXTRA_PATH="$(dirname "$CLAUDE_BIN")"
+if [ -n "$BUN_BIN" ]; then
+  BUN_DIR="$(dirname "$BUN_BIN")"
+  if [ -n "$EXTRA_PATH" ]; then
+    EXTRA_PATH="$BUN_DIR:$EXTRA_PATH"
+  else
+    EXTRA_PATH="$BUN_DIR"
+  fi
+fi
+FULL_PATH="${EXTRA_PATH:+$EXTRA_PATH:}${PATH}"
 tmux new-session -d -s "$TMUX_SESSION" \
-  "cd '$ONKOL_DIR' && claude \
+  "export PATH='$FULL_PATH'; cd '$ONKOL_DIR' && '$CLAUDE_BIN' \
     --dangerously-skip-permissions \
     --dangerously-load-development-channels server:discord-filtered \
-    --mcp-config '$ONKOL_DIR/.mcp.json'"
+    --mcp-config '$ONKOL_DIR/.mcp.json'; echo 'Claude exited with code '\$?'. Press Enter to close.'; read"
+# Verify the session actually started and stayed alive
+sleep 2
+if ! tmux has-session -t "$TMUX_SESSION" 2>/dev/null; then
+  echo "ERROR: tmux session '$TMUX_SESSION' died immediately after creation." >&2
+  echo "Check that claude is working: $CLAUDE_BIN --version" >&2
+  exit 1
+fi
 # Auto-accept interactive prompts (trust dialog + dev channels warning)
-# Background loop sends Enter every 2 seconds until claude reaches the ❯ prompt
-(
-  for i in $(seq 1 10); do
-    sleep 2
-    PANE_CONTENT=$(tmux capture-pane -t "$TMUX_SESSION" -p 2>/dev/null || echo "")
-    if echo "$PANE_CONTENT" | grep -q "^❯"; then
-      break
-    fi
-    tmux send-keys -t "$TMUX_SESSION" Enter 2>/dev/null || true
-  done
-) &
+# Sends Enter every 2 seconds until claude reaches the prompt
+for i in $(seq 1 10); do
+  sleep 2
+  PANE_CONTENT=$(tmux capture-pane -t "$TMUX_SESSION" -p 2>/dev/null || echo "")
+  if echo "$PANE_CONTENT" | grep -q "^❯"; then
+    break
+  fi
+  tmux send-keys -t "$TMUX_SESSION" Enter 2>/dev/null || true
+done
 echo "Orchestrator started in tmux session '$TMUX_SESSION'."
 echo "Attach with: tmux attach -t $TMUX_SESSION"

package/scripts/worker-watchdog.sh ADDED Viewed

@@ -0,0 +1,192 @@
+#!/bin/bash
+# Worker watchdog — runs periodically to check on active workers.
+# Uses an LLM to analyze tmux pane content instead of brittle regex.
+# Falls back to basic checks if no LLM is configured.
+set -uo pipefail
+ONKOL_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+CONFIG="$ONKOL_DIR/config.json"
+TRACKING="$ONKOL_DIR/workers/tracking.json"
+if [ ! -f "$TRACKING" ] || [ "$(jq length "$TRACKING")" -eq 0 ]; then
+  exit 0
+fi
+BOT_TOKEN=$(jq -r '.botToken' "$CONFIG")
+ORCHESTRATOR_CHANNEL=$(jq -r '.orchestratorChannelId' "$CONFIG")
+NODE_NAME=$(jq -r '.nodeName' "$CONFIG")
+TMUX_SESSION="onkol-${NODE_NAME}"
+# Watchdog LLM config
+WATCHDOG_PROVIDER=$(jq -r '.watchdog.provider // empty' "$CONFIG")
+WATCHDOG_MODEL=$(jq -r '.watchdog.model // empty' "$CONFIG")
+WATCHDOG_API_KEY=$(jq -r '.watchdog.apiKey // empty' "$CONFIG")
+WINDOWS=$(tmux list-windows -t "$TMUX_SESSION" -F '#{window_name}' 2>/dev/null || echo "")
+discord_msg() {
+  local channel="$1" text="$2"
+  curl -s -X POST \
+    "https://discord.com/api/v10/channels/${channel}/messages" \
+    -H "Authorization: Bot ${BOT_TOKEN}" \
+    -H "Content-Type: application/json" \
+    -d "{\"content\": $(echo "$text" | jq -Rs .)}" \
+    > /dev/null 2>&1
+}
+# Call LLM to analyze worker pane content.
+# Returns a JSON object: {"status": "...", "action": "...", "message": "..."}
+# status: working | done_replied | done_silent | error | idle | unknown
+# action: none | nudge_reply | nudge_error | nudge_idle | alert_orchestrator
+llm_analyze() {
+  local pane_content="$1"
+  local worker_name="$2"
+  # Determine API endpoint and headers based on provider
+  local api_url=""
+  local auth_header=""
+  local model="$WATCHDOG_MODEL"
+  case "$WATCHDOG_PROVIDER" in
+    openrouter)
+      api_url="https://openrouter.ai/api/v1/chat/completions"
+      auth_header="Authorization: Bearer ${WATCHDOG_API_KEY}"
+      ;;
+    gemini)
+      api_url="https://generativelanguage.googleapis.com/v1beta/openai/chat/completions"
+      auth_header="Authorization: Bearer ${WATCHDOG_API_KEY}"
+      ;;
+    custom)
+      api_url=$(jq -r '.watchdog.apiUrl // empty' "$CONFIG")
+      auth_header="Authorization: Bearer ${WATCHDOG_API_KEY}"
+      ;;
+    *)
+      echo '{"status":"unknown","action":"none","message":"no llm configured"}'
+      return
+      ;;
+  esac
+  if [ -z "$api_url" ] || [ -z "$WATCHDOG_API_KEY" ]; then
+    echo '{"status":"unknown","action":"none","message":"missing api config"}'
+    return
+  fi
+  local sys_prompt="You analyze Claude Code terminal output to determine a worker's state. Respond with ONLY a JSON object, no markdown fences.
+Keys:
+- status: one of: working, done_replied, done_silent, error, idle
+- action: one of: none, nudge_reply, nudge_error, nudge_idle
+- reason: one short sentence explaining your assessment
+Rules:
+- working: Claude is actively executing tools, thinking, or generating output. Action: none
+- done_replied: Worker finished AND used the discord-filtered reply MCP tool (you'll see 'discord-filtered - reply (MCP)' with result 'sent'). Action: none
+- done_silent: Worker finished work (wrote files, completed analysis, etc.) but NEVER used the reply MCP tool to send results to Discord. Action: nudge_reply
+- error: Worker hit a fatal error and stopped (Traceback, FATAL, crash at the prompt). Action: nudge_error. Note: errors from EARLIER that the worker recovered from do NOT count.
+- idle: Worker is sitting at the prompt with no clear completion or error. Action: nudge_idle"
+  # Use jq to build the payload — handles all JSON escaping correctly
+  local payload
+  payload=$(jq -n \
+    --arg model "$model" \
+    --arg sys "$sys_prompt" \
+    --arg user "Worker name: ${worker_name}
+Terminal output (last 100 lines):
+${pane_content}" \
+    '{
+      model: $model,
+      messages: [
+        {role: "system", content: $sys},
+        {role: "user", content: $user}
+      ],
+      temperature: 0,
+      max_tokens: 150
+    }')
+  local response
+  response=$(curl -s -m 15 "$api_url" \
+    -H "$auth_header" \
+    -H "Content-Type: application/json" \
+    -d "$payload" 2>/dev/null)
+  # Extract the content from the response
+  local content
+  content=$(echo "$response" | jq -r '.choices[0].message.content // empty' 2>/dev/null)
+  if [ -z "$content" ]; then
+    echo '{"status":"unknown","action":"none","message":"llm call failed"}'
+    return
+  fi
+  # Strip markdown fences if present
+  content=$(echo "$content" | sed 's/^```json//; s/^```//; s/```$//' | tr -d '\n')
+  # Validate it's valid JSON
+  if echo "$content" | jq . >/dev/null 2>&1; then
+    echo "$content"
+  else
+    echo '{"status":"unknown","action":"none","message":"invalid llm response"}'
+  fi
+}
+jq -r '.[] | select(.status == "active") | .name' "$TRACKING" | while read -r WORKER; do
+  WORKER_DIR="$ONKOL_DIR/workers/$WORKER"
+  WORKER_CHANNEL=$(jq -r ".[] | select(.name == \"$WORKER\") | .channelId" "$TRACKING")
+  TMUX_TARGET="${TMUX_SESSION}:${WORKER}"
+  # Case 1: tmux window is gone — worker crashed (no LLM needed)
+  if ! echo "$WINDOWS" | grep -q "^${WORKER}$"; then
+    discord_msg "$ORCHESTRATOR_CHANNEL" \
+      "[watchdog] Worker **${WORKER}** has crashed — its tmux window is gone. Please check and decide: respawn or dissolve."
+    continue
+  fi
+  # Capture pane content
+  PANE_FULL=$(tmux capture-pane -t "$TMUX_TARGET" -p -S -100 2>/dev/null || echo "")
+  # Use LLM if configured, otherwise skip (no more regex fallback — too brittle)
+  if [ -z "$WATCHDOG_PROVIDER" ]; then
+    continue
+  fi
+  # Check nudge cooldown (don't analyze more than once per 10 minutes per worker)
+  NUDGE_FLAG="$WORKER_DIR/.watchdog-last-nudge"
+  if [ -f "$NUDGE_FLAG" ] && [ -z "$(find "$NUDGE_FLAG" -mmin +10 2>/dev/null)" ]; then
+    continue
+  fi
+  # Ask LLM to analyze the pane
+  ANALYSIS=$(llm_analyze "$PANE_FULL" "$WORKER")
+  ACTION=$(echo "$ANALYSIS" | jq -r '.action // "none"')
+  STATUS=$(echo "$ANALYSIS" | jq -r '.status // "unknown"')
+  REASON=$(echo "$ANALYSIS" | jq -r '.reason // ""')
+  case "$ACTION" in
+    nudge_reply)
+      touch "$NUDGE_FLAG"
+      tmux send-keys -t "$TMUX_TARGET" \
+        "You appear to have finished your work but haven't sent results to Discord. Use the reply tool from the discord-filtered MCP server to send a summary of what you did and your findings. Use replyWithFile for any file deliverables. The user CANNOT see your terminal output." Enter
+      discord_msg "$ORCHESTRATOR_CHANNEL" \
+        "[watchdog] Worker **${WORKER}** — $REASON. Nudged it to send results via Discord."
+      ;;
+    nudge_error)
+      touch "$NUDGE_FLAG"
+      tmux send-keys -t "$TMUX_TARGET" \
+        "You encountered an error. Use the reply tool to report this error to the user on Discord, then try to recover or ask for help." Enter
+      discord_msg "$ORCHESTRATOR_CHANNEL" \
+        "[watchdog] Worker **${WORKER}** — $REASON. Nudged it to report via Discord."
+      ;;
+    nudge_idle)
+      touch "$NUDGE_FLAG"
+      tmux send-keys -t "$TMUX_TARGET" \
+        "You've been idle for a while. If you're done, use the reply tool to send your results to Discord. If you're stuck, use the reply tool to ask for help. The user cannot see your terminal." Enter
+      discord_msg "$ORCHESTRATOR_CHANNEL" \
+        "[watchdog] Worker **${WORKER}** — $REASON. Nudged it to respond."
+      ;;
+    none|*)
+      # Worker is fine (working or already replied) — do nothing
+      ;;
+  esac
+done

package/src/plugin/index.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 #!/usr/bin/env bun
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
+import { execSync } from 'child_process'
 import { createMcpServer } from './mcp-server.js'
 import { createDiscordClient } from './discord-client.js'
 import { MessageBatcher } from './message-batcher.js'
@@ -7,6 +8,7 @@ import { MessageBatcher } from './message-batcher.js'
 const BOT_TOKEN = process.env.DISCORD_BOT_TOKEN
 const CHANNEL_ID = process.env.DISCORD_CHANNEL_ID
 const ALLOWED_USERS: string[] = JSON.parse(process.env.DISCORD_ALLOWED_USERS || '[]')
+const TMUX_TARGET = process.env.TMUX_TARGET || ''
 if (!BOT_TOKEN) {
   console.error('[discord-filtered] DISCORD_BOT_TOKEN is required')
@@ -17,13 +19,57 @@ if (!CHANNEL_ID) {
   process.exit(1)
 }
+function sendInterrupt(): boolean {
+  if (!TMUX_TARGET) {
+    console.error('[discord-filtered] !stop received but TMUX_TARGET not set — cannot interrupt')
+    return false
+  }
+  try {
+    // Escape is Claude Code's interrupt key
+    execSync(`tmux send-keys -t ${JSON.stringify(TMUX_TARGET)} Escape`, { stdio: 'pipe' })
+    console.error(`[discord-filtered] Sent interrupt (Escape) to ${TMUX_TARGET}`)
+    return true
+  } catch (err) {
+    console.error(`[discord-filtered] Failed to send interrupt: ${err}`)
+    return false
+  }
+}
 const discord = createDiscordClient(
   { botToken: BOT_TOKEN, channelId: CHANNEL_ID, allowedUsers: ALLOWED_USERS },
   async (message) => {
+    const content = message.content
+    const isInterrupt = /^!stop\b/i.test(content)
+    if (isInterrupt) {
+      sendInterrupt()
+      // Strip the !stop prefix and forward the rest as a normal message
+      const rest = content.replace(/^!stop\s*/i, '').trim()
+      // React to confirm the interrupt was received
+      try { await message.react('🛑') } catch { /* ignore */ }
+      // Small delay to let Claude Code process the Escape before the new message arrives
+      await new Promise(r => setTimeout(r, 1500))
+      // Forward the message (with or without remaining text)
+      await mcpServer.notification({
+        method: 'notifications/claude/channel',
+        params: {
+          content: rest || '[interrupted by user]',
+          meta: {
+            channel_id: message.channel.id,
+            sender: message.author.username,
+            sender_id: message.author.id,
+            message_id: message.id,
+            interrupt: true,
+          },
+        },
+      })
+      return
+    }
     await mcpServer.notification({
       method: 'notifications/claude/channel',
       params: {
-        content: message.content,
+        content: content,
         meta: {
           channel_id: message.channel.id,
           sender: message.author.username,