nothumanallowed 13.3.1 → 13.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # NotHumanAllowed
2
2
 
3
- **38 specialized AI agents, 80 tools, Studio visual workflows — all local, all free.** Security auditors, code architects, data analysts, DevOps engineers, technical writers — each with deep domain expertise. Use them individually, run complex multi-agent workflows in Studio, or let them deliberate together.
3
+ **38 specialized AI agents, 81 tools, Studio visual workflows — all local, all free.** Security auditors, code architects, data analysts, DevOps engineers, technical writers — each with deep domain expertise. Use them individually, run complex multi-agent workflows in Studio, or let them deliberate together with Parliament mode.
4
4
 
5
5
  ## Quick Start
6
6
 
@@ -36,7 +36,8 @@ EmailAgent → WebSearchAgent → WriterAgent
36
36
 
37
37
  - **No configuration** — works with any LLM provider including Liara (free, no API key)
38
38
  - **Live canvas** — see each agent activate, stream output, and hand off to the next
39
- - **2–5 step workflows** — context flows automatically between steps
39
+ - **HTML dashboard** — canvas generates a downloadable visual report (HTML + PDF)
40
+ - **Parliament mode** — enable for 2+ specialist agents to cross-read and deliberate: R1 (independent), R2 (agents read each other), R3 (HERALD mediation), convergence score
40
41
  - Open `nha ui` → click **Studio** in the sidebar
41
42
 
42
43
  ## Daily Operations (PAO)
@@ -83,6 +84,26 @@ All data stored locally in `~/.nha/ops/`. Tokens encrypted with AES-256-GCM. You
83
84
 
84
85
  38 agents across 11 domains. Each agent is a standalone `.mjs` file you own locally — inspect it, modify it, run it offline.
85
86
 
87
+ ## Code Execution
88
+
89
+ `execute_code` runs Python, JavaScript, or TypeScript in an isolated sandbox:
90
+
91
+ ```bash
92
+ # Python with auto-installed packages
93
+ nha chat
94
+ > use execute_code to analyze this CSV with pandas
95
+
96
+ # TypeScript
97
+ > write and run a TypeScript script that parses this JSON
98
+ ```
99
+
100
+ - **Isolated sandbox** — dedicated temp dir per run, deleted after execution
101
+ - **Stripped environment** — subprocess never sees NHA API keys
102
+ - **Package install** — `packages: ["pandas", "numpy"]` auto-installs via pip/npm
103
+ - **Multi-file** — pass extra files (CSV, JSON, helper modules) via `files: [{path, content}]`
104
+ - **SIGKILL on timeout** — 30s default, configurable up to 120s
105
+ - **Returns** stdout, stderr, exit code, and list of files created in sandbox
106
+
86
107
  ### Security
87
108
  - **SABER** — Security audit, OWASP, threat modeling, pentest planning
88
109
  - **ZERO** — Vulnerability scanning, dependency audit, secret detection
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nothumanallowed",
3
- "version": "13.3.1",
3
+ "version": "13.4.6",
4
4
  "description": "NotHumanAllowed — 38 AI agents, 80 tools, Studio (visual agentic workflows). Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, Alexandria E2E messaging, GitHub, Notion, Slack, voice chat, free AI (Liara), 28 languages. Zero-dependency CLI.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -2769,42 +2769,135 @@ export async function cmdUI(args) {
2769
2769
  return steps;
2770
2770
  };
2771
2771
 
2772
- // Use keyword plan directly only fall back to LLM for genuinely ambiguous tasks
2772
+ // ── Hybrid planning: keyword baseline + LLM refinement ──────────────────
2773
+ //
2774
+ // Strategy (3 tiers):
2775
+ //
2776
+ // TIER 1 — Keyword baseline (always runs, <1ms, zero LLM):
2777
+ // Builds a solid plan from regex matches on the task. Reliable for all
2778
+ // known patterns. Already contains `reason` for each step.
2779
+ //
2780
+ // TIER 2 — LLM refinement (runs when baseline ≥ 1 step OR task is non-trivial):
2781
+ // Receives the task + the keyword plan as context. Can ADD missing steps,
2782
+ // REMOVE wrong ones, REORDER, and ADJUST prompts. Does NOT build from scratch.
2783
+ // Falls back to keyword plan on any parse/timeout error.
2784
+ //
2785
+ // TIER 3 — LLM-only fallback (runs when keyword baseline is empty):
2786
+ // Task had zero keyword matches → pure LLM planning with full task text.
2787
+ // Same fallback: on error, returns a single WebSearchAgent step.
2788
+ //
2789
+ // Why this is safe now: SENTINEL's /api/studio/ is an intent-aware route.
2790
+ // Prompt injection detection is disabled for this path — the body IS the task.
2791
+ // Encoding attacks, rate limits, and toxicity checks remain fully active.
2792
+
2773
2793
  const keywordSteps = buildKeywordPlan();
2774
- const taskIsComplex = !hasPdf && !hasEmail && !hasCalendar && !hasSearch && !hasGitHub && !hasSlack && !hasBriefing && !hasStrategy && !hasReputation && !hasCode && !hasWriting && !hasData && keywordSteps.length <= 1;
2794
+ const hasKeywordPlan = keywordSteps.length > 0;
2795
+
2796
+ // Sanitize task for LLM: strip HTML tags and control chars (defensive, not SENTINEL).
2797
+ const sanitizedTask = task.replace(/<[^>]*>/g, ' ').replace(/[\x00-\x08\x0b\x0c\x0e-\x1f]/g, '').trim();
2798
+
2799
+ // Build a compact JSON representation of the keyword plan for the LLM to refine.
2800
+ const keywordPlanJson = hasKeywordPlan
2801
+ ? JSON.stringify(keywordSteps.map(s => ({ agent: s.agent, label: s.label, reason: s.reason || '' })))
2802
+ : '[]';
2803
+
2804
+ const planConfig = Object.assign({}, config, { thinking: 'off' });
2775
2805
 
2776
2806
  try {
2777
- let steps;
2778
- if (!taskIsComplex) {
2779
- // Use keyword plan directly no LLM, no SENTINEL risk
2780
- process.stderr.write('[STUDIO PLAN KEYWORD] steps=' + keywordSteps.length + '\n');
2781
- steps = keywordSteps;
2782
- } else {
2783
- // Task is ambiguous — use LLM planner with sanitized short description
2784
- const shortTask = task.slice(0, 200).replace(/[`'"]/g, ' ');
2785
- const plannerLangStr = plannerLang;
2786
- const planPrompt = `Workflow planner. Goal: ${shortTask}\nLanguage: ${plannerLangStr}.\nOutput ONLY JSON:\n{"steps":[{"icon":"EMOJI","agent":"AGENT_NAME","label":"LABEL","prompt":"INSTRUCTION"}]}\nAgents: WebSearchAgent, EmailAgent, CalendarAgent, HERALD, ORACLE, ATHENA, CASSANDRA, MERCURY, QUILL, CanvasAgent (last, only if visual needed). 2-5 steps.`;
2787
- const planConfig = Object.assign({}, config, { thinking: 'off' });
2788
- const planRaw = await callLLM(planConfig, 'Output ONLY valid JSON. No explanation.', planPrompt, { max_tokens: 800 });
2789
- process.stderr.write('[STUDIO PLAN LLM RAW] ' + planRaw.slice(0, 400) + '\n');
2807
+ let steps = keywordSteps;
2808
+
2809
+ // TIER 2 / 3: always attempt LLM if we have a working LLM config
2810
+ if (config && (config.provider || config.apiKey || config.baseUrl)) {
2790
2811
  try {
2812
+ let planPrompt;
2813
+ let planSys;
2814
+
2815
+ if (hasKeywordPlan) {
2816
+ // TIER 2: refine the keyword plan
2817
+ planSys = `You are a workflow planner for NHA Studio. Output ONLY valid JSON — no explanation, no markdown.`;
2818
+ planPrompt = `Task: ${sanitizedTask}
2819
+
2820
+ Keyword-detected plan (JSON):
2821
+ ${keywordPlanJson}
2822
+
2823
+ Language for labels: ${plannerLang}.
2824
+
2825
+ Review the plan above. You may:
2826
+ - ADD steps that are clearly needed but missing
2827
+ - REMOVE steps that are wrong for this task
2828
+ - REORDER steps to fix logical sequence (e.g. Notion before email)
2829
+ - ADJUST the "prompt" field of any step to better match the task
2830
+ - KEEP steps that are correct as-is
2831
+
2832
+ Available agents: WebSearchAgent, DocumentReaderAgent, EmailAgent, CalendarAgent, GitHubAgent, SlackAgent, NotionAgent, HERALD, ORACLE, ATHENA, CASSANDRA, MERCURY, QUILL, DataAnalystAgent, polyglot, CanvasAgent (last, only if visual output needed).
2833
+
2834
+ Output ONLY:
2835
+ {"steps":[{"icon":"EMOJI","agent":"AGENT_NAME","label":"LABEL","reason":"WHY THIS AGENT","prompt":"INSTRUCTION"}]}
2836
+
2837
+ Rules:
2838
+ - 2 to 6 steps maximum
2839
+ - CanvasAgent only as the final step and only for complex multi-agent analyses
2840
+ - Keep existing reasons where step is unchanged, write a new reason when you add/change a step`;
2841
+ } else {
2842
+ // TIER 3: pure LLM planning — zero keyword matches
2843
+ planSys = `You are a workflow planner for NHA Studio. Output ONLY valid JSON — no explanation, no markdown.`;
2844
+ planPrompt = `Task: ${sanitizedTask}
2845
+
2846
+ Language for labels: ${plannerLang}.
2847
+
2848
+ Build a workflow plan for this task.
2849
+
2850
+ Available agents: WebSearchAgent, DocumentReaderAgent, EmailAgent, CalendarAgent, GitHubAgent, SlackAgent, NotionAgent, HERALD, ORACLE, ATHENA, CASSANDRA, MERCURY, QUILL, DataAnalystAgent, polyglot, CanvasAgent.
2851
+
2852
+ Output ONLY:
2853
+ {"steps":[{"icon":"EMOJI","agent":"AGENT_NAME","label":"LABEL","reason":"WHY THIS AGENT","prompt":"INSTRUCTION"}]}
2854
+
2855
+ Rules:
2856
+ - 2 to 5 steps
2857
+ - HERALD = executive synthesis when no other specialist fits
2858
+ - CanvasAgent only as the final step for complex multi-agent workflows
2859
+ - reason = one sentence explaining why this agent was chosen`;
2860
+ }
2861
+
2862
+ const planRaw = await callLLM(planConfig, planSys, planPrompt, { max_tokens: 900 });
2863
+ process.stderr.write('[STUDIO PLAN LLM RAW] mode=' + (hasKeywordPlan ? 'refine' : 'pure') + ' len=' + planRaw.length + '\n');
2864
+
2865
+ // Parse LLM output — strip <think> blocks (Qwen3), markdown fences, extract JSON
2791
2866
  let clean = planRaw;
2792
2867
  let prev = '';
2793
2868
  while (prev !== clean) { prev = clean; clean = clean.replace(/<think>[\s\S]*?<\/think>/g, ''); }
2794
- clean = clean.trim().replace(/^```[\w]*\r?\n?/,'').replace(/\r?\n?```$/,'').trim();
2869
+ clean = clean.trim().replace(/^```[\w]*\r?\n?/, '').replace(/\r?\n?```$/, '').trim();
2795
2870
  const jsonMatch = clean.match(/\{[\s\S]*\}/);
2796
2871
  const parsed = JSON.parse(jsonMatch ? jsonMatch[0] : clean);
2797
- steps = parsed.steps;
2798
- } catch (parseErr) {
2799
- process.stderr.write('[STUDIO PLAN PARSE ERR] ' + parseErr.message + '\n');
2800
- steps = keywordSteps;
2872
+
2873
+ if (Array.isArray(parsed.steps) && parsed.steps.length > 0) {
2874
+ // Merge: LLM steps override keyword steps. Preserve `reason` from keyword where LLM kept same agent.
2875
+ const keywordReasonMap = {};
2876
+ keywordSteps.forEach(s => { keywordReasonMap[s.agent] = s.reason || ''; });
2877
+ steps = parsed.steps.map(s => ({
2878
+ icon: s.icon || '\u{1F916}',
2879
+ agent: s.agent,
2880
+ label: s.label,
2881
+ reason: s.reason || keywordReasonMap[s.agent] || '',
2882
+ prompt: s.prompt,
2883
+ }));
2884
+ process.stderr.write('[STUDIO PLAN LLM OK] steps=' + steps.length + '\n');
2885
+ } else {
2886
+ process.stderr.write('[STUDIO PLAN LLM EMPTY] falling back to keyword plan\n');
2887
+ }
2888
+ } catch (llmErr) {
2889
+ process.stderr.write('[STUDIO PLAN LLM ERR] ' + llmErr.message + ' — using keyword plan\n');
2890
+ // steps already = keywordSteps, no action needed
2801
2891
  }
2892
+ } else {
2893
+ process.stderr.write('[STUDIO PLAN KEYWORD ONLY] no LLM config, steps=' + keywordSteps.length + '\n');
2802
2894
  }
2895
+
2896
+ // Final safety net: if everything failed and we have nothing, single web search step
2803
2897
  if (!Array.isArray(steps) || !steps.length) {
2804
- sendJSON(res, 500, { error: 'Empty workflow plan' });
2805
- logRequest(method, pathname, 500, Date.now() - start);
2806
- return;
2898
+ steps = [{ icon: '\u{1F50D}', agent: 'WebSearchAgent', label: plannerLang === 'Italian' ? 'Ricerca web' : 'Web search', reason: plannerLang === 'Italian' ? 'Fallback: nessun piano costruito' : 'Fallback: no plan built', prompt: sanitizedTask }];
2807
2899
  }
2900
+
2808
2901
  sendJSON(res, 200, { steps });
2809
2902
  logRequest(method, pathname, 200, Date.now() - start);
2810
2903
  } catch (e) {
@@ -3571,11 +3664,16 @@ ${writtenSoFar ? `## REPORT WRITTEN SO FAR (for consistency):\n${writtenSoFar.sl
3571
3664
  return out.join('');
3572
3665
  };
3573
3666
 
3574
- // If LLM output has no HTML tags it's markdown convert
3575
- if (!bodyHtml || !bodyHtml.includes('<')) {
3576
- const source = bodyHtml || context;
3667
+ // Quality check: count <p> and <li> tags if output is a sparse skeleton
3668
+ // (many sections but almost no paragraph/list content), fall back to converting context directly.
3669
+ const sectionCount = (bodyHtml.match(/<div[^>]*class="section/g) || []).length;
3670
+ const contentCount = (bodyHtml.match(/<p[\s>]|<li[\s>]/g) || []).length;
3671
+ const isSparse = bodyHtml.includes('<') && sectionCount > 0 && contentCount < sectionCount;
3672
+
3673
+ // If LLM output has no HTML tags, or is a sparse skeleton → use context directly
3674
+ if (!bodyHtml || !bodyHtml.includes('<') || isSparse) {
3675
+ const source = context || bodyHtml;
3577
3676
  const converted = mdToNhaHtml(source);
3578
- const agentNames = (stepDef && Array.isArray(stepDef)) ? '' : '';
3579
3677
  bodyHtml = `<div class="header"><h1>${reportTitle.replace(/</g,'&lt;')}</h1><p>NHA Studio Report \u00b7 ${today}</p><div class="meta"><span>${today}</span></div></div>` +
3580
3678
  converted +
3581
3679
  `<div class="footer">NHA Studio \u00b7 ${today}</div>`;
package/src/constants.mjs CHANGED
@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
5
5
  const __filename = fileURLToPath(import.meta.url);
6
6
  const __dirname = path.dirname(__filename);
7
7
 
8
- export const VERSION = '13.3.1';
8
+ export const VERSION = '13.4.6';
9
9
  export const BASE_URL = 'https://nothumanallowed.com/cli';
10
10
  export const API_BASE = 'https://nothumanallowed.com/api/v1';
11
11
 
@@ -47,6 +47,17 @@ import {
47
47
 
48
48
  import { notify } from './notification.mjs';
49
49
 
50
+ // ── execute_code: module-level tsx path cache ─────────────────────────────────
51
+ // Resolved once lazily (first TypeScript execution) — avoids shell spawn on every call.
52
+ import { execSync as _execSyncTsx } from 'child_process';
53
+ let _tsxPath = undefined; // undefined = not yet resolved; null = not found; string = path
54
+ function getTsxPath() {
55
+ if (_tsxPath !== undefined) return _tsxPath;
56
+ try { _tsxPath = _execSyncTsx('which tsx 2>/dev/null', { timeout: 3000 }).toString().trim() || null; }
57
+ catch { _tsxPath = null; }
58
+ return _tsxPath;
59
+ }
60
+
50
61
  // ── Constants ────────────────────────────────────────────────────────────────
51
62
 
52
63
  /** Actions that mutate external state and require user confirmation. */
@@ -452,6 +463,21 @@ TOOLS:
452
463
  Download a file from Drive. For Google Docs/Sheets/Slides, exports as PDF.
453
464
  Returns the file as base64-encoded content. Use for binary files, PDFs, images.
454
465
 
466
+ --- CODE EXECUTION ---
467
+
468
+ 81. execute_code(language: "python"|"javascript"|"typescript", code: string, files?: [{path: string, content: string}], packages?: string[], stdin?: string, timeout?: number)
469
+ Execute code in an isolated sandbox and return the full output.
470
+ - language: "python", "javascript", or "typescript"
471
+ - code: the main script to run
472
+ - files (optional): extra files to create in the sandbox before running (e.g. input CSVs, helper modules). Paths are relative to sandbox.
473
+ - packages (optional): pip or npm packages to install before execution (e.g. ["pandas","numpy"] for python, ["lodash"] for js)
474
+ - stdin (optional): text piped to the process stdin
475
+ - timeout (optional): seconds before SIGKILL, default 30, max 120
476
+ Returns: exit_code (0=success ✓, non-zero=failure ✗), stdout, stderr, list of files written in sandbox.
477
+ Sandbox: isolated temp dir, stripped env (no NHA API keys visible to subprocess), SIGKILL on timeout, sandbox deleted after run.
478
+ Use for: data analysis, algorithm verification, running Python scripts, CSV/JSON processing, math computations, generating files (charts, reports), testing TypeScript/JS logic.
479
+ Do NOT use for: network requests (use fetch_url), permanent file I/O (use file_write/file_read).
480
+
455
481
  RULES:
456
482
  - ABSOLUTE RULE: NEVER LIE. NEVER fabricate, invent, or guess information. If you do not know, say "I don't know." If a tool fails, say it failed. If you cannot see something, say so. Honesty is MORE important than being helpful.
457
483
  - CRITICAL ROUTING RULE — browser_open vs web_search:
@@ -1948,6 +1974,197 @@ export async function executeTool(action, params, config) {
1948
1974
  return `Found ${results.length} match${results.length > 1 ? 'es' : ''} for "${params.query}":\n\n${results.join('\n')}`;
1949
1975
  }
1950
1976
 
1977
+ case 'execute_code': {
1978
+ const {
1979
+ language = 'python',
1980
+ code,
1981
+ files = [], // [{path: string, content: string}] — extra files to write in sandbox
1982
+ packages = [], // string[] — pip/npm packages to install before running
1983
+ stdin = '', // string — piped to process stdin
1984
+ timeout = 30, // seconds (max 120)
1985
+ } = params;
1986
+
1987
+ if (!code || typeof code !== 'string') return 'execute_code: missing required param "code"';
1988
+
1989
+ // bash removed: unrestricted shell has full filesystem access and can exfiltrate data.
1990
+ const SUPPORTED = ['python', 'javascript', 'typescript'];
1991
+ if (!SUPPORTED.includes(language)) {
1992
+ return `execute_code: unsupported language "${language}" — use: ${SUPPORTED.join(', ')}`;
1993
+ }
1994
+
1995
+ const { spawn } = await import('child_process');
1996
+ const os = await import('os');
1997
+ const fs = await import('fs');
1998
+ const path = await import('path');
1999
+ const crypto = await import('crypto');
2000
+
2001
+ const MAX_OUTPUT_BYTES = 128 * 1024; // 128 KB per stream
2002
+ const TIMEOUT_MS = Math.min(Math.max(timeout, 5), 120) * 1000;
2003
+
2004
+ // ── Isolated sandbox directory ─────────────────────────────────────────
2005
+ // Each execution gets its own temp dir — cleaned up after run.
2006
+ // Subprocess never sees NHA's cwd or env vars (API keys etc.).
2007
+ const sandboxId = crypto.default.randomBytes(8).toString('hex');
2008
+ const sandboxDir = path.default.join(os.default.tmpdir(), `nha_sandbox_${sandboxId}`);
2009
+ fs.default.mkdirSync(sandboxDir, { recursive: true });
2010
+
2011
+ // Stripped env — only safe POSIX vars, zero NHA secrets.
2012
+ // NOTE: packages install runs with network access (pip/npm fetch from registries).
2013
+ // This is an accepted risk for a local CLI tool — not suitable for server deployment.
2014
+ const safeEnv = {
2015
+ PATH: process.env.PATH || '/usr/local/bin:/usr/bin:/bin',
2016
+ HOME: sandboxDir,
2017
+ TMPDIR: sandboxDir,
2018
+ LANG: 'en_US.UTF-8',
2019
+ PYTHONDONTWRITEBYTECODE: '1',
2020
+ PYTHONUNBUFFERED: '1',
2021
+ NODE_NO_WARNINGS: '1',
2022
+ };
2023
+
2024
+ const cleanup = () => {
2025
+ try { fs.default.rmSync(sandboxDir, { recursive: true, force: true }); } catch { /* ignore */ }
2026
+ };
2027
+
2028
+ try {
2029
+ // ── Write extra files first ──────────────────────────────────────────
2030
+ for (const f of (files || [])) {
2031
+ if (!f.path || typeof f.content !== 'string') continue;
2032
+ // Prevent path traversal — only allow relative paths inside sandbox
2033
+ const safePath = path.default.join(sandboxDir, path.default.normalize(f.path).replace(/^(\.\.[/\\])+/, ''));
2034
+ fs.default.mkdirSync(path.default.dirname(safePath), { recursive: true });
2035
+ fs.default.writeFileSync(safePath, f.content, 'utf-8');
2036
+ }
2037
+
2038
+ // ── Install packages ─────────────────────────────────────────────────
2039
+ if (packages && packages.length > 0) {
2040
+ const validPkgName = /^[a-zA-Z0-9@._/-]+$/;
2041
+ const safePkgs = packages.filter(p => typeof p === 'string' && validPkgName.test(p) && p.length < 80);
2042
+ if (safePkgs.length > 0) {
2043
+ let installCmd, installArgs;
2044
+ if (language === 'python') {
2045
+ installCmd = 'pip3';
2046
+ installArgs = ['install', '--quiet', '--target', path.default.join(sandboxDir, 'site-packages'), ...safePkgs];
2047
+ } else {
2048
+ // javascript / typescript
2049
+ fs.default.writeFileSync(path.default.join(sandboxDir, 'package.json'), JSON.stringify({ type: 'module' }));
2050
+ installCmd = 'npm';
2051
+ installArgs = ['install', '--prefix', sandboxDir, '--no-save', '--quiet', ...safePkgs];
2052
+ }
2053
+ await new Promise((resolve) => {
2054
+ const inst = spawn(installCmd, installArgs, { cwd: sandboxDir, env: safeEnv, timeout: 60_000 });
2055
+ inst.on('close', resolve);
2056
+ inst.on('error', resolve);
2057
+ });
2058
+ }
2059
+ }
2060
+
2061
+ // ── Resolve runtime + write main entrypoint ──────────────────────────
2062
+ let cmd, cmdArgs, mainFile;
2063
+ if (language === 'python') {
2064
+ mainFile = path.default.join(sandboxDir, 'main.py');
2065
+ // Prepend sys.path so installed packages are found
2066
+ const sitePkgs = path.default.join(sandboxDir, 'site-packages');
2067
+ const preamble = `import sys; sys.path.insert(0, ${JSON.stringify(sitePkgs)})\n`;
2068
+ fs.default.writeFileSync(mainFile, preamble + code, 'utf-8');
2069
+ cmd = 'python3'; cmdArgs = [mainFile];
2070
+ } else if (language === 'javascript') {
2071
+ mainFile = path.default.join(sandboxDir, 'main.mjs');
2072
+ fs.default.writeFileSync(mainFile, code, 'utf-8');
2073
+ cmd = 'node'; cmdArgs = [mainFile];
2074
+ } else if (language === 'typescript') {
2075
+ // Prefer tsx (faster, more compatible), fallback to node --experimental-strip-types (Node 22+)
2076
+ mainFile = path.default.join(sandboxDir, 'main.ts');
2077
+ fs.default.writeFileSync(mainFile, code, 'utf-8');
2078
+ const tsxPath = getTsxPath();
2079
+ if (tsxPath) { cmd = tsxPath; cmdArgs = [mainFile]; }
2080
+ else { cmd = 'node'; cmdArgs = ['--experimental-strip-types', mainFile]; }
2081
+ }
2082
+
2083
+ // ── Execute ──────────────────────────────────────────────────────────
2084
+ const result = await new Promise((resolve) => {
2085
+ const child = spawn(cmd, cmdArgs, {
2086
+ cwd: sandboxDir,
2087
+ env: safeEnv,
2088
+ stdio: ['pipe', 'pipe', 'pipe'],
2089
+ });
2090
+
2091
+ // Feed stdin if provided
2092
+ if (stdin) {
2093
+ child.stdin.write(stdin);
2094
+ child.stdin.end();
2095
+ } else {
2096
+ child.stdin.end();
2097
+ }
2098
+
2099
+ let stdoutBuf = '';
2100
+ let stderrBuf = '';
2101
+ let stdoutTrunc = false;
2102
+ let stderrTrunc = false;
2103
+
2104
+ child.stdout.on('data', (d) => {
2105
+ if (stdoutBuf.length < MAX_OUTPUT_BYTES) stdoutBuf += d.toString();
2106
+ else stdoutTrunc = true;
2107
+ });
2108
+ child.stderr.on('data', (d) => {
2109
+ if (stderrBuf.length < MAX_OUTPUT_BYTES) stderrBuf += d.toString();
2110
+ else stderrTrunc = true;
2111
+ });
2112
+
2113
+ // Hard kill after timeout
2114
+ const killer = setTimeout(() => {
2115
+ try { child.kill('SIGKILL'); } catch {}
2116
+ resolve({ exit_code: 124, stdout: stdoutBuf, stderr: stderrBuf, timed_out: true });
2117
+ }, TIMEOUT_MS);
2118
+
2119
+ child.on('close', (exitCode) => {
2120
+ clearTimeout(killer);
2121
+ resolve({
2122
+ exit_code: exitCode ?? 1,
2123
+ stdout: stdoutBuf + (stdoutTrunc ? '\n[stdout truncated at 128 KB]' : ''),
2124
+ stderr: stderrBuf + (stderrTrunc ? '\n[stderr truncated at 128 KB]' : ''),
2125
+ timed_out: false,
2126
+ });
2127
+ });
2128
+
2129
+ child.on('error', (err) => {
2130
+ clearTimeout(killer);
2131
+ resolve({ exit_code: 1, stdout: '', stderr: `[spawn error] ${err.message}`, timed_out: false });
2132
+ });
2133
+ });
2134
+
2135
+ // ── Collect created/modified files ───────────────────────────────────
2136
+ // List files written inside sandbox (excluding the main entrypoint and site-packages)
2137
+ let createdFiles = [];
2138
+ try {
2139
+ const walk = (dir, base) => {
2140
+ for (const entry of fs.default.readdirSync(dir, { withFileTypes: true })) {
2141
+ const rel = base ? `${base}/${entry.name}` : entry.name;
2142
+ if (entry.isDirectory()) {
2143
+ if (!['site-packages', 'node_modules', '__pycache__'].includes(entry.name)) walk(path.default.join(dir, entry.name), rel);
2144
+ } else if (!['main.py','main.mjs','main.ts','main.sh','package.json'].includes(entry.name)) {
2145
+ const size = fs.default.statSync(path.default.join(dir, entry.name)).size;
2146
+ createdFiles.push(` ${rel} (${size} bytes)`);
2147
+ }
2148
+ }
2149
+ };
2150
+ walk(sandboxDir, '');
2151
+ } catch {}
2152
+
2153
+ // ── Format response ──────────────────────────────────────────────────
2154
+ const lines = [];
2155
+ if (result.timed_out) lines.push(`⏱ TIMEOUT — execution exceeded ${timeout}s (exit 124)`);
2156
+ lines.push(`exit_code: ${result.exit_code}${result.exit_code === 0 ? ' ✓' : ' ✗'}`);
2157
+ if (result.stdout.trim()) lines.push(`\nstdout:\n${result.stdout.trimEnd()}`);
2158
+ if (result.stderr.trim()) lines.push(`\nstderr:\n${result.stderr.trimEnd()}`);
2159
+ if (!result.stdout.trim() && !result.stderr.trim()) lines.push('\n(no output)');
2160
+ if (createdFiles.length > 0) lines.push(`\nfiles written in sandbox:\n${createdFiles.join('\n')}`);
2161
+
2162
+ return lines.join('\n');
2163
+ } finally {
2164
+ cleanup();
2165
+ }
2166
+ }
2167
+
1951
2168
  default:
1952
2169
  return `Unknown action: ${action}`;
1953
2170
  }