npm - nothumanallowed - Versions diffs - 13.3.1 → 13.4.6 - Mend

nothumanallowed 13.3.1 → 13.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +23 -2
package/package.json +1 -1
package/src/commands/ui.mjs +125 -27
package/src/constants.mjs +1 -1
package/src/services/tool-executor.mjs +217 -0
package/src/services/web-ui.mjs +725 -97

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # NotHumanAllowed
-**38 specialized AI agents, 80 tools, Studio visual workflows — all local, all free.** Security auditors, code architects, data analysts, DevOps engineers, technical writers — each with deep domain expertise. Use them individually, run complex multi-agent workflows in Studio, or let them deliberate together.
+**38 specialized AI agents, 81 tools, Studio visual workflows — all local, all free.** Security auditors, code architects, data analysts, DevOps engineers, technical writers — each with deep domain expertise. Use them individually, run complex multi-agent workflows in Studio, or let them deliberate together with Parliament mode.
 ## Quick Start
@@ -36,7 +36,8 @@ EmailAgent → WebSearchAgent → WriterAgent
 - **No configuration** — works with any LLM provider including Liara (free, no API key)
 - **Live canvas** — see each agent activate, stream output, and hand off to the next
-- **2–5 step workflows** — context flows automatically between steps
+- **HTML dashboard** — canvas generates a downloadable visual report (HTML + PDF)
+- **Parliament mode** — enable for 2+ specialist agents to cross-read and deliberate: R1 (independent), R2 (agents read each other), R3 (HERALD mediation), convergence score
 - Open `nha ui` → click **Studio** in the sidebar
 ## Daily Operations (PAO)
@@ -83,6 +84,26 @@ All data stored locally in `~/.nha/ops/`. Tokens encrypted with AES-256-GCM. You
 38 agents across 11 domains. Each agent is a standalone `.mjs` file you own locally — inspect it, modify it, run it offline.
+## Code Execution
+`execute_code` runs Python, JavaScript, or TypeScript in an isolated sandbox:
+```bash
+# Python with auto-installed packages
+nha chat
+> use execute_code to analyze this CSV with pandas
+# TypeScript
+> write and run a TypeScript script that parses this JSON
+```
+- **Isolated sandbox** — dedicated temp dir per run, deleted after execution
+- **Stripped environment** — subprocess never sees NHA API keys
+- **Package install** — `packages: ["pandas", "numpy"]` auto-installs via pip/npm
+- **Multi-file** — pass extra files (CSV, JSON, helper modules) via `files: [{path, content}]`
+- **SIGKILL on timeout** — 30s default, configurable up to 120s
+- **Returns** stdout, stderr, exit code, and list of files created in sandbox
 ### Security
 - **SABER** — Security audit, OWASP, threat modeling, pentest planning
 - **ZERO** — Vulnerability scanning, dependency audit, secret detection

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nothumanallowed",
-  "version": "13.3.1",
+  "version": "13.4.6",
   "description": "NotHumanAllowed — 38 AI agents, 80 tools, Studio (visual agentic workflows). Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, Alexandria E2E messaging, GitHub, Notion, Slack, voice chat, free AI (Liara), 28 languages. Zero-dependency CLI.",
   "type": "module",
   "bin": {

package/src/commands/ui.mjs CHANGED Viewed

@@ -2769,42 +2769,135 @@ export async function cmdUI(args) {
           return steps;
         };
-        // Use keyword plan directly — only fall back to LLM for genuinely ambiguous tasks
+        // ── Hybrid planning: keyword baseline + LLM refinement ──────────────────
+        //
+        // Strategy (3 tiers):
+        //
+        //   TIER 1 — Keyword baseline (always runs, <1ms, zero LLM):
+        //     Builds a solid plan from regex matches on the task. Reliable for all
+        //     known patterns. Already contains `reason` for each step.
+        //
+        //   TIER 2 — LLM refinement (runs when baseline ≥ 1 step OR task is non-trivial):
+        //     Receives the task + the keyword plan as context. Can ADD missing steps,
+        //     REMOVE wrong ones, REORDER, and ADJUST prompts. Does NOT build from scratch.
+        //     Falls back to keyword plan on any parse/timeout error.
+        //
+        //   TIER 3 — LLM-only fallback (runs when keyword baseline is empty):
+        //     Task had zero keyword matches → pure LLM planning with full task text.
+        //     Same fallback: on error, returns a single WebSearchAgent step.
+        //
+        // Why this is safe now: SENTINEL's /api/studio/ is an intent-aware route.
+        // Prompt injection detection is disabled for this path — the body IS the task.
+        // Encoding attacks, rate limits, and toxicity checks remain fully active.
         const keywordSteps = buildKeywordPlan();
-        const taskIsComplex = !hasPdf && !hasEmail && !hasCalendar && !hasSearch && !hasGitHub && !hasSlack && !hasBriefing && !hasStrategy && !hasReputation && !hasCode && !hasWriting && !hasData && keywordSteps.length <= 1;
+        const hasKeywordPlan = keywordSteps.length > 0;
+        // Sanitize task for LLM: strip HTML tags and control chars (defensive, not SENTINEL).
+        const sanitizedTask = task.replace(/<[^>]*>/g, ' ').replace(/[\x00-\x08\x0b\x0c\x0e-\x1f]/g, '').trim();
+        // Build a compact JSON representation of the keyword plan for the LLM to refine.
+        const keywordPlanJson = hasKeywordPlan
+          ? JSON.stringify(keywordSteps.map(s => ({ agent: s.agent, label: s.label, reason: s.reason || '' })))
+          : '[]';
+        const planConfig = Object.assign({}, config, { thinking: 'off' });
         try {
-          let steps;
-          if (!taskIsComplex) {
-            // Use keyword plan directly — no LLM, no SENTINEL risk
-            process.stderr.write('[STUDIO PLAN KEYWORD] steps=' + keywordSteps.length + '\n');
-            steps = keywordSteps;
-          } else {
-            // Task is ambiguous — use LLM planner with sanitized short description
-            const shortTask = task.slice(0, 200).replace(/[`'"]/g, ' ');
-            const plannerLangStr = plannerLang;
-            const planPrompt = `Workflow planner. Goal: ${shortTask}\nLanguage: ${plannerLangStr}.\nOutput ONLY JSON:\n{"steps":[{"icon":"EMOJI","agent":"AGENT_NAME","label":"LABEL","prompt":"INSTRUCTION"}]}\nAgents: WebSearchAgent, EmailAgent, CalendarAgent, HERALD, ORACLE, ATHENA, CASSANDRA, MERCURY, QUILL, CanvasAgent (last, only if visual needed). 2-5 steps.`;
-            const planConfig = Object.assign({}, config, { thinking: 'off' });
-            const planRaw = await callLLM(planConfig, 'Output ONLY valid JSON. No explanation.', planPrompt, { max_tokens: 800 });
-            process.stderr.write('[STUDIO PLAN LLM RAW] ' + planRaw.slice(0, 400) + '\n');
+          let steps = keywordSteps;
+          // TIER 2 / 3: always attempt LLM if we have a working LLM config
+          if (config && (config.provider || config.apiKey || config.baseUrl)) {
             try {
+              let planPrompt;
+              let planSys;
+              if (hasKeywordPlan) {
+                // TIER 2: refine the keyword plan
+                planSys = `You are a workflow planner for NHA Studio. Output ONLY valid JSON — no explanation, no markdown.`;
+                planPrompt = `Task: ${sanitizedTask}
+Keyword-detected plan (JSON):
+${keywordPlanJson}
+Language for labels: ${plannerLang}.
+Review the plan above. You may:
+- ADD steps that are clearly needed but missing
+- REMOVE steps that are wrong for this task
+- REORDER steps to fix logical sequence (e.g. Notion before email)
+- ADJUST the "prompt" field of any step to better match the task
+- KEEP steps that are correct as-is
+Available agents: WebSearchAgent, DocumentReaderAgent, EmailAgent, CalendarAgent, GitHubAgent, SlackAgent, NotionAgent, HERALD, ORACLE, ATHENA, CASSANDRA, MERCURY, QUILL, DataAnalystAgent, polyglot, CanvasAgent (last, only if visual output needed).
+Output ONLY:
+{"steps":[{"icon":"EMOJI","agent":"AGENT_NAME","label":"LABEL","reason":"WHY THIS AGENT","prompt":"INSTRUCTION"}]}
+Rules:
+- 2 to 6 steps maximum
+- CanvasAgent only as the final step and only for complex multi-agent analyses
+- Keep existing reasons where step is unchanged, write a new reason when you add/change a step`;
+              } else {
+                // TIER 3: pure LLM planning — zero keyword matches
+                planSys = `You are a workflow planner for NHA Studio. Output ONLY valid JSON — no explanation, no markdown.`;
+                planPrompt = `Task: ${sanitizedTask}
+Language for labels: ${plannerLang}.
+Build a workflow plan for this task.
+Available agents: WebSearchAgent, DocumentReaderAgent, EmailAgent, CalendarAgent, GitHubAgent, SlackAgent, NotionAgent, HERALD, ORACLE, ATHENA, CASSANDRA, MERCURY, QUILL, DataAnalystAgent, polyglot, CanvasAgent.
+Output ONLY:
+{"steps":[{"icon":"EMOJI","agent":"AGENT_NAME","label":"LABEL","reason":"WHY THIS AGENT","prompt":"INSTRUCTION"}]}
+Rules:
+- 2 to 5 steps
+- HERALD = executive synthesis when no other specialist fits
+- CanvasAgent only as the final step for complex multi-agent workflows
+- reason = one sentence explaining why this agent was chosen`;
+              }
+              const planRaw = await callLLM(planConfig, planSys, planPrompt, { max_tokens: 900 });
+              process.stderr.write('[STUDIO PLAN LLM RAW] mode=' + (hasKeywordPlan ? 'refine' : 'pure') + ' len=' + planRaw.length + '\n');
+              // Parse LLM output — strip <think> blocks (Qwen3), markdown fences, extract JSON
               let clean = planRaw;
               let prev = '';
               while (prev !== clean) { prev = clean; clean = clean.replace(/<think>[\s\S]*?<\/think>/g, ''); }
-              clean = clean.trim().replace(/^```[\w]*\r?\n?/,'').replace(/\r?\n?```$/,'').trim();
+              clean = clean.trim().replace(/^```[\w]*\r?\n?/, '').replace(/\r?\n?```$/, '').trim();
               const jsonMatch = clean.match(/\{[\s\S]*\}/);
               const parsed = JSON.parse(jsonMatch ? jsonMatch[0] : clean);
-              steps = parsed.steps;
-            } catch (parseErr) {
-              process.stderr.write('[STUDIO PLAN PARSE ERR] ' + parseErr.message + '\n');
-              steps = keywordSteps;
+              if (Array.isArray(parsed.steps) && parsed.steps.length > 0) {
+                // Merge: LLM steps override keyword steps. Preserve `reason` from keyword where LLM kept same agent.
+                const keywordReasonMap = {};
+                keywordSteps.forEach(s => { keywordReasonMap[s.agent] = s.reason || ''; });
+                steps = parsed.steps.map(s => ({
+                  icon: s.icon || '\u{1F916}',
+                  agent: s.agent,
+                  label: s.label,
+                  reason: s.reason || keywordReasonMap[s.agent] || '',
+                  prompt: s.prompt,
+                }));
+                process.stderr.write('[STUDIO PLAN LLM OK] steps=' + steps.length + '\n');
+              } else {
+                process.stderr.write('[STUDIO PLAN LLM EMPTY] falling back to keyword plan\n');
+              }
+            } catch (llmErr) {
+              process.stderr.write('[STUDIO PLAN LLM ERR] ' + llmErr.message + ' — using keyword plan\n');
+              // steps already = keywordSteps, no action needed
             }
+          } else {
+            process.stderr.write('[STUDIO PLAN KEYWORD ONLY] no LLM config, steps=' + keywordSteps.length + '\n');
           }
+          // Final safety net: if everything failed and we have nothing, single web search step
           if (!Array.isArray(steps) || !steps.length) {
-            sendJSON(res, 500, { error: 'Empty workflow plan' });
-            logRequest(method, pathname, 500, Date.now() - start);
-            return;
+            steps = [{ icon: '\u{1F50D}', agent: 'WebSearchAgent', label: plannerLang === 'Italian' ? 'Ricerca web' : 'Web search', reason: plannerLang === 'Italian' ? 'Fallback: nessun piano costruito' : 'Fallback: no plan built', prompt: sanitizedTask }];
           }
           sendJSON(res, 200, { steps });
           logRequest(method, pathname, 200, Date.now() - start);
         } catch (e) {
@@ -3571,11 +3664,16 @@ ${writtenSoFar ? `## REPORT WRITTEN SO FAR (for consistency):\n${writtenSoFar.sl
               return out.join('');
             };
-            // If LLM output has no HTML tags → it's markdown → convert
-            if (!bodyHtml || !bodyHtml.includes('<')) {
-              const source = bodyHtml || context;
+            // Quality check: count <p> and <li> tags — if output is a sparse skeleton
+            // (many sections but almost no paragraph/list content), fall back to converting context directly.
+            const sectionCount = (bodyHtml.match(/<div[^>]*class="section/g) || []).length;
+            const contentCount = (bodyHtml.match(/<p[\s>]|<li[\s>]/g) || []).length;
+            const isSparse = bodyHtml.includes('<') && sectionCount > 0 && contentCount < sectionCount;
+            // If LLM output has no HTML tags, or is a sparse skeleton → use context directly
+            if (!bodyHtml || !bodyHtml.includes('<') || isSparse) {
+              const source = context || bodyHtml;
               const converted = mdToNhaHtml(source);
-              const agentNames = (stepDef && Array.isArray(stepDef)) ? '' : '';
               bodyHtml = `<div class="header"><h1>${reportTitle.replace(/</g,'&lt;')}</h1><p>NHA Studio Report \u00b7 ${today}</p><div class="meta"><span>${today}</span></div></div>` +
                 converted +
                 `<div class="footer">NHA Studio \u00b7 ${today}</div>`;

package/src/constants.mjs CHANGED Viewed

@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
-export const VERSION = '13.3.1';
+export const VERSION = '13.4.6';
 export const BASE_URL = 'https://nothumanallowed.com/cli';
 export const API_BASE = 'https://nothumanallowed.com/api/v1';

package/src/services/tool-executor.mjs CHANGED Viewed

@@ -47,6 +47,17 @@ import {
 import { notify } from './notification.mjs';
+// ── execute_code: module-level tsx path cache ─────────────────────────────────
+// Resolved once lazily (first TypeScript execution) — avoids shell spawn on every call.
+import { execSync as _execSyncTsx } from 'child_process';
+let _tsxPath = undefined; // undefined = not yet resolved; null = not found; string = path
+function getTsxPath() {
+  if (_tsxPath !== undefined) return _tsxPath;
+  try { _tsxPath = _execSyncTsx('which tsx 2>/dev/null', { timeout: 3000 }).toString().trim() || null; }
+  catch { _tsxPath = null; }
+  return _tsxPath;
+}
 // ── Constants ────────────────────────────────────────────────────────────────
 /** Actions that mutate external state and require user confirmation. */
@@ -452,6 +463,21 @@ TOOLS:
     Download a file from Drive. For Google Docs/Sheets/Slides, exports as PDF.
     Returns the file as base64-encoded content. Use for binary files, PDFs, images.
+--- CODE EXECUTION ---
+81. execute_code(language: "python"|"javascript"|"typescript", code: string, files?: [{path: string, content: string}], packages?: string[], stdin?: string, timeout?: number)
+    Execute code in an isolated sandbox and return the full output.
+    - language: "python", "javascript", or "typescript"
+    - code: the main script to run
+    - files (optional): extra files to create in the sandbox before running (e.g. input CSVs, helper modules). Paths are relative to sandbox.
+    - packages (optional): pip or npm packages to install before execution (e.g. ["pandas","numpy"] for python, ["lodash"] for js)
+    - stdin (optional): text piped to the process stdin
+    - timeout (optional): seconds before SIGKILL, default 30, max 120
+    Returns: exit_code (0=success ✓, non-zero=failure ✗), stdout, stderr, list of files written in sandbox.
+    Sandbox: isolated temp dir, stripped env (no NHA API keys visible to subprocess), SIGKILL on timeout, sandbox deleted after run.
+    Use for: data analysis, algorithm verification, running Python scripts, CSV/JSON processing, math computations, generating files (charts, reports), testing TypeScript/JS logic.
+    Do NOT use for: network requests (use fetch_url), permanent file I/O (use file_write/file_read).
 RULES:
 - ABSOLUTE RULE: NEVER LIE. NEVER fabricate, invent, or guess information. If you do not know, say "I don't know." If a tool fails, say it failed. If you cannot see something, say so. Honesty is MORE important than being helpful.
 - CRITICAL ROUTING RULE — browser_open vs web_search:
@@ -1948,6 +1974,197 @@ export async function executeTool(action, params, config) {
       return `Found ${results.length} match${results.length > 1 ? 'es' : ''} for "${params.query}":\n\n${results.join('\n')}`;
     }
+    case 'execute_code': {
+      const {
+        language = 'python',
+        code,
+        files = [],    // [{path: string, content: string}] — extra files to write in sandbox
+        packages = [], // string[] — pip/npm packages to install before running
+        stdin = '',    // string — piped to process stdin
+        timeout = 30,  // seconds (max 120)
+      } = params;
+      if (!code || typeof code !== 'string') return 'execute_code: missing required param "code"';
+      // bash removed: unrestricted shell has full filesystem access and can exfiltrate data.
+      const SUPPORTED = ['python', 'javascript', 'typescript'];
+      if (!SUPPORTED.includes(language)) {
+        return `execute_code: unsupported language "${language}" — use: ${SUPPORTED.join(', ')}`;
+      }
+      const { spawn } = await import('child_process');
+      const os = await import('os');
+      const fs = await import('fs');
+      const path = await import('path');
+      const crypto = await import('crypto');
+      const MAX_OUTPUT_BYTES = 128 * 1024;   // 128 KB per stream
+      const TIMEOUT_MS = Math.min(Math.max(timeout, 5), 120) * 1000;
+      // ── Isolated sandbox directory ─────────────────────────────────────────
+      // Each execution gets its own temp dir — cleaned up after run.
+      // Subprocess never sees NHA's cwd or env vars (API keys etc.).
+      const sandboxId = crypto.default.randomBytes(8).toString('hex');
+      const sandboxDir = path.default.join(os.default.tmpdir(), `nha_sandbox_${sandboxId}`);
+      fs.default.mkdirSync(sandboxDir, { recursive: true });
+      // Stripped env — only safe POSIX vars, zero NHA secrets.
+      // NOTE: packages install runs with network access (pip/npm fetch from registries).
+      // This is an accepted risk for a local CLI tool — not suitable for server deployment.
+      const safeEnv = {
+        PATH: process.env.PATH || '/usr/local/bin:/usr/bin:/bin',
+        HOME: sandboxDir,
+        TMPDIR: sandboxDir,
+        LANG: 'en_US.UTF-8',
+        PYTHONDONTWRITEBYTECODE: '1',
+        PYTHONUNBUFFERED: '1',
+        NODE_NO_WARNINGS: '1',
+      };
+      const cleanup = () => {
+        try { fs.default.rmSync(sandboxDir, { recursive: true, force: true }); } catch { /* ignore */ }
+      };
+      try {
+        // ── Write extra files first ──────────────────────────────────────────
+        for (const f of (files || [])) {
+          if (!f.path || typeof f.content !== 'string') continue;
+          // Prevent path traversal — only allow relative paths inside sandbox
+          const safePath = path.default.join(sandboxDir, path.default.normalize(f.path).replace(/^(\.\.[/\\])+/, ''));
+          fs.default.mkdirSync(path.default.dirname(safePath), { recursive: true });
+          fs.default.writeFileSync(safePath, f.content, 'utf-8');
+        }
+        // ── Install packages ─────────────────────────────────────────────────
+        if (packages && packages.length > 0) {
+          const validPkgName = /^[a-zA-Z0-9@._/-]+$/;
+          const safePkgs = packages.filter(p => typeof p === 'string' && validPkgName.test(p) && p.length < 80);
+          if (safePkgs.length > 0) {
+            let installCmd, installArgs;
+            if (language === 'python') {
+              installCmd = 'pip3';
+              installArgs = ['install', '--quiet', '--target', path.default.join(sandboxDir, 'site-packages'), ...safePkgs];
+            } else {
+              // javascript / typescript
+              fs.default.writeFileSync(path.default.join(sandboxDir, 'package.json'), JSON.stringify({ type: 'module' }));
+              installCmd = 'npm';
+              installArgs = ['install', '--prefix', sandboxDir, '--no-save', '--quiet', ...safePkgs];
+            }
+            await new Promise((resolve) => {
+              const inst = spawn(installCmd, installArgs, { cwd: sandboxDir, env: safeEnv, timeout: 60_000 });
+              inst.on('close', resolve);
+              inst.on('error', resolve);
+            });
+          }
+        }
+        // ── Resolve runtime + write main entrypoint ──────────────────────────
+        let cmd, cmdArgs, mainFile;
+        if (language === 'python') {
+          mainFile = path.default.join(sandboxDir, 'main.py');
+          // Prepend sys.path so installed packages are found
+          const sitePkgs = path.default.join(sandboxDir, 'site-packages');
+          const preamble = `import sys; sys.path.insert(0, ${JSON.stringify(sitePkgs)})\n`;
+          fs.default.writeFileSync(mainFile, preamble + code, 'utf-8');
+          cmd = 'python3'; cmdArgs = [mainFile];
+        } else if (language === 'javascript') {
+          mainFile = path.default.join(sandboxDir, 'main.mjs');
+          fs.default.writeFileSync(mainFile, code, 'utf-8');
+          cmd = 'node'; cmdArgs = [mainFile];
+        } else if (language === 'typescript') {
+          // Prefer tsx (faster, more compatible), fallback to node --experimental-strip-types (Node 22+)
+          mainFile = path.default.join(sandboxDir, 'main.ts');
+          fs.default.writeFileSync(mainFile, code, 'utf-8');
+          const tsxPath = getTsxPath();
+          if (tsxPath) { cmd = tsxPath; cmdArgs = [mainFile]; }
+          else { cmd = 'node'; cmdArgs = ['--experimental-strip-types', mainFile]; }
+        }
+        // ── Execute ──────────────────────────────────────────────────────────
+        const result = await new Promise((resolve) => {
+          const child = spawn(cmd, cmdArgs, {
+            cwd: sandboxDir,
+            env: safeEnv,
+            stdio: ['pipe', 'pipe', 'pipe'],
+          });
+          // Feed stdin if provided
+          if (stdin) {
+            child.stdin.write(stdin);
+            child.stdin.end();
+          } else {
+            child.stdin.end();
+          }
+          let stdoutBuf = '';
+          let stderrBuf = '';
+          let stdoutTrunc = false;
+          let stderrTrunc = false;
+          child.stdout.on('data', (d) => {
+            if (stdoutBuf.length < MAX_OUTPUT_BYTES) stdoutBuf += d.toString();
+            else stdoutTrunc = true;
+          });
+          child.stderr.on('data', (d) => {
+            if (stderrBuf.length < MAX_OUTPUT_BYTES) stderrBuf += d.toString();
+            else stderrTrunc = true;
+          });
+          // Hard kill after timeout
+          const killer = setTimeout(() => {
+            try { child.kill('SIGKILL'); } catch {}
+            resolve({ exit_code: 124, stdout: stdoutBuf, stderr: stderrBuf, timed_out: true });
+          }, TIMEOUT_MS);
+          child.on('close', (exitCode) => {
+            clearTimeout(killer);
+            resolve({
+              exit_code: exitCode ?? 1,
+              stdout: stdoutBuf + (stdoutTrunc ? '\n[stdout truncated at 128 KB]' : ''),
+              stderr: stderrBuf + (stderrTrunc ? '\n[stderr truncated at 128 KB]' : ''),
+              timed_out: false,
+            });
+          });
+          child.on('error', (err) => {
+            clearTimeout(killer);
+            resolve({ exit_code: 1, stdout: '', stderr: `[spawn error] ${err.message}`, timed_out: false });
+          });
+        });
+        // ── Collect created/modified files ───────────────────────────────────
+        // List files written inside sandbox (excluding the main entrypoint and site-packages)
+        let createdFiles = [];
+        try {
+          const walk = (dir, base) => {
+            for (const entry of fs.default.readdirSync(dir, { withFileTypes: true })) {
+              const rel = base ? `${base}/${entry.name}` : entry.name;
+              if (entry.isDirectory()) {
+                if (!['site-packages', 'node_modules', '__pycache__'].includes(entry.name)) walk(path.default.join(dir, entry.name), rel);
+              } else if (!['main.py','main.mjs','main.ts','main.sh','package.json'].includes(entry.name)) {
+                const size = fs.default.statSync(path.default.join(dir, entry.name)).size;
+                createdFiles.push(`  ${rel} (${size} bytes)`);
+              }
+            }
+          };
+          walk(sandboxDir, '');
+        } catch {}
+        // ── Format response ──────────────────────────────────────────────────
+        const lines = [];
+        if (result.timed_out) lines.push(`⏱ TIMEOUT — execution exceeded ${timeout}s (exit 124)`);
+        lines.push(`exit_code: ${result.exit_code}${result.exit_code === 0 ? ' ✓' : ' ✗'}`);
+        if (result.stdout.trim()) lines.push(`\nstdout:\n${result.stdout.trimEnd()}`);
+        if (result.stderr.trim()) lines.push(`\nstderr:\n${result.stderr.trimEnd()}`);
+        if (!result.stdout.trim() && !result.stderr.trim()) lines.push('\n(no output)');
+        if (createdFiles.length > 0) lines.push(`\nfiles written in sandbox:\n${createdFiles.join('\n')}`);
+        return lines.join('\n');
+      } finally {
+        cleanup();
+      }
+    }
     default:
       return `Unknown action: ${action}`;
   }