@mammothb/pi-eval 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,18 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 mammothb
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6
+ associated documentation files (the "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the
9
+ following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all copies or substantial
12
+ portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
15
+ LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
16
+ EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18
+ USE OR OTHER DEALINGS IN THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # pi-eval
2
+
3
+ A [pi](https://pi.dev) extension that adds an `eval` tool for executing
4
+ JavaScript and Python code in isolated subprocesses.
5
+
6
+ ## Usage
7
+
8
+ Once installed, the LLM can call the `eval` tool to run code snippets without
9
+ managing temp files. Each call spawns a fresh subprocess — no state persists
10
+ between calls.
11
+
12
+ ### Tool parameters
13
+
14
+ | Parameter | Type | Default | Description |
15
+ | ------------------ | ------ | ---------- | ----------- |
16
+ | `language` | string | _(required)_ | Programming language: `"javascript"` or `"python"` |
17
+ | `code` | string | _(required)_ | Code to execute |
18
+ | `nodeModulesPath` | string | — | Path to node_modules for `require()` resolution |
19
+ | `pythonPath` | string | — | Path to python3 binary (e.g., `.venv/bin/python3`) |
20
+
21
+ ### Features
22
+
23
+ - **JavaScript**: Writes code to a temp file, spawns `node` as a subprocess.
24
+ Console output is captured as labeled `STDOUT:` / `STDERR:` sections.
25
+ - **Python**: Spawns `python3` with `-c`, capturing stdout/stderr identically.
26
+ Supports virtual environments via the `pythonPath` parameter.
27
+ - **Safety**: 30-second timeout, 1 MB output cap, abort-on-Escape support.
28
+ - **Dependency isolation**: Use `nodeModulesPath` to resolve packages from a
29
+ project's `node_modules/`. Use `pythonPath` to target a venv.
package/index.ts ADDED
@@ -0,0 +1,6 @@
1
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { createEvalTool } from "./src/eval.js";
3
+
4
+ export default function (pi: ExtensionAPI) {
5
+ pi.registerTool(createEvalTool());
6
+ }
package/package.json ADDED
@@ -0,0 +1,27 @@
1
+ {
2
+ "name": "@mammothb/pi-eval",
3
+ "version": "1.0.0",
4
+ "description": "A pi extension that adds an eval tool for executing JavaScript and Python code in isolated subprocesses",
5
+ "keywords": [
6
+ "pi-package"
7
+ ],
8
+ "license": "MIT",
9
+ "files": [
10
+ "index.ts",
11
+ "src"
12
+ ],
13
+ "pi": {
14
+ "extensions": [
15
+ "./index.ts"
16
+ ]
17
+ },
18
+ "devDependencies": {
19
+ "typebox": "^1.2.0"
20
+ },
21
+ "peerDependencies": {
22
+ "@earendil-works/pi-ai": "*",
23
+ "@earendil-works/pi-coding-agent": "*",
24
+ "@earendil-works/pi-tui": "*",
25
+ "typebox": "*"
26
+ }
27
+ }
package/src/eval.ts ADDED
@@ -0,0 +1,85 @@
1
+ import type { ToolDefinition } from "@earendil-works/pi-coding-agent";
2
+ import { Type } from "typebox";
3
+ import { executeJavaScript } from "./javascript.js";
4
+ import { executePython } from "./python.js";
5
+ import {
6
+ EvalCancelledError,
7
+ type EvalDetails,
8
+ EvalUnsupportedLanguageError,
9
+ } from "./types.js";
10
+
11
+ export const TIMEOUT_MS = 30_000;
12
+
13
+ const Parameters = Type.Object({
14
+ language: Type.Union([Type.Literal("javascript"), Type.Literal("python")]),
15
+ code: Type.String({ description: "Code to execute" }),
16
+ pythonPath: Type.Optional(
17
+ Type.String({
18
+ description:
19
+ "Path to python3 binary (e.g., '.venv/bin/python3' for venvs). " +
20
+ "Defaults to 'python3'.",
21
+ }),
22
+ ),
23
+ nodeModulesPath: Type.Optional(
24
+ Type.String({
25
+ description:
26
+ "Path to a node_modules directory. When set, NODE_PATH is passed " +
27
+ "to the subprocess so require() resolves from this directory. " +
28
+ "Use './node_modules' for project-local packages.",
29
+ }),
30
+ ),
31
+ });
32
+
33
+ export function createEvalTool(): ToolDefinition<
34
+ typeof Parameters,
35
+ EvalDetails
36
+ > {
37
+ return {
38
+ name: "eval",
39
+ label: "Eval",
40
+ description: `Execute JavaScript or Python code in an isolated subprocess.
41
+
42
+ - Each call is a fresh subprocess — no state persists between calls
43
+ - 30-second timeout; press Escape to cancel a running evaluation
44
+ - Working directory is the agent's current working directory (like bash)
45
+ - Use nodeModulesPath to resolve require() from a project directory
46
+ - Use pythonPath to target a virtual environment`,
47
+ promptSnippet:
48
+ "Execute JavaScript or Python code in an isolated subprocess",
49
+ parameters: Parameters,
50
+ async execute(_toolCallId, params, signal, _onUpdate, ctx) {
51
+ const { language, code, pythonPath, nodeModulesPath } = params;
52
+
53
+ // Validate language (belt-and-suspenders: TypeBox schema already constrains it,
54
+ // but a raw API call could bypass validation)
55
+ if (language !== "javascript" && language !== "python") {
56
+ throw new EvalUnsupportedLanguageError(language);
57
+ }
58
+
59
+ if (signal?.aborted) {
60
+ throw new EvalCancelledError();
61
+ }
62
+
63
+ // Build timeout signal — passed separately from the user signal so
64
+ // subprocess.ts can discriminate timeout vs user cancel
65
+ const timeoutSignal = AbortSignal.timeout(TIMEOUT_MS);
66
+
67
+ if (timeoutSignal.aborted) {
68
+ throw new EvalCancelledError();
69
+ }
70
+
71
+ if (language === "python") {
72
+ return executePython(code, pythonPath, signal, timeoutSignal, ctx.cwd);
73
+ }
74
+
75
+ // ── JavaScript execution via temp file + node subprocess ──
76
+ return executeJavaScript(
77
+ code,
78
+ nodeModulesPath,
79
+ signal,
80
+ timeoutSignal,
81
+ ctx.cwd,
82
+ );
83
+ },
84
+ };
85
+ }
package/src/format.ts ADDED
@@ -0,0 +1,59 @@
1
+ import type { AgentToolResult } from "@earendil-works/pi-coding-agent";
2
+ import {
3
+ type EvalDetails,
4
+ EvalToolError,
5
+ type SubprocessResult,
6
+ } from "./types.js";
7
+
8
+ export const MAX_OUTPUT = 1024 * 1024; // 1 MB
9
+
10
+ export interface FormatOutputOptions {
11
+ stdout: string;
12
+ stderr: string;
13
+ truncated?: boolean;
14
+ exitSignal?: string | null;
15
+ }
16
+
17
+ export function formatOutput(opts: FormatOutputOptions): string {
18
+ const parts: string[] = [];
19
+ parts.push(`STDOUT:\n${opts.stdout || "(no output)"}`);
20
+ if (opts.stderr) {
21
+ parts.push(`STDERR:\n${opts.stderr}`);
22
+ }
23
+ if (opts.truncated) {
24
+ parts.push("[Output truncated at 1 MB]");
25
+ }
26
+ if (opts.exitSignal) {
27
+ parts.push(`[Process killed by signal: ${opts.exitSignal}]`);
28
+ }
29
+ return parts.join("\n\n");
30
+ }
31
+
32
+ /**
33
+ * Convert a subprocess result into an AgentToolResult, throwing on failure.
34
+ *
35
+ * @throws {EvalToolError} if exitCode is non-zero or the process was killed by a signal.
36
+ */
37
+ export function assertSuccessOrThrow(
38
+ language: string,
39
+ result: SubprocessResult,
40
+ ): AgentToolResult<EvalDetails> {
41
+ const output = formatOutput({
42
+ stdout: result.stdout,
43
+ stderr: result.stderr,
44
+ truncated: result.truncated,
45
+ exitSignal: result.exitSignal,
46
+ });
47
+ // Treat non-zero exit code or signal kill as failure
48
+ if (result.exitCode !== 0 || result.exitSignal != null) {
49
+ throw new EvalToolError(output, "NON_ZERO_EXIT");
50
+ }
51
+ return {
52
+ content: [{ type: "text" as const, text: output }],
53
+ details: {
54
+ language,
55
+ exitCode: result.exitCode,
56
+ exitSignal: result.exitSignal,
57
+ },
58
+ };
59
+ }
@@ -0,0 +1,42 @@
1
+ import { randomUUID } from "node:crypto";
2
+ import { rm, writeFile } from "node:fs/promises";
3
+ import { tmpdir } from "node:os";
4
+ import { isAbsolute, join, resolve } from "node:path";
5
+ import type { AgentToolResult } from "@earendil-works/pi-coding-agent";
6
+ import { assertSuccessOrThrow } from "./format.js";
7
+ import { run } from "./subprocess.js";
8
+ import type { EvalDetails } from "./types.js";
9
+
10
+ export async function executeJavaScript(
11
+ code: string,
12
+ nodeModulesPath: string | undefined,
13
+ userSignal: AbortSignal | undefined,
14
+ timeoutSignal: AbortSignal,
15
+ cwd: string,
16
+ ): Promise<AgentToolResult<EvalDetails>> {
17
+ const tmpFile = join(tmpdir(), `pi-eval-${randomUUID()}.js`);
18
+
19
+ try {
20
+ await writeFile(tmpFile, code, "utf-8");
21
+
22
+ const env: NodeJS.ProcessEnv = { ...process.env };
23
+ if (nodeModulesPath) {
24
+ env.NODE_PATH = isAbsolute(nodeModulesPath)
25
+ ? nodeModulesPath
26
+ : resolve(cwd, nodeModulesPath);
27
+ }
28
+
29
+ const result = await run(
30
+ "node",
31
+ [tmpFile],
32
+ cwd,
33
+ env,
34
+ userSignal,
35
+ timeoutSignal,
36
+ );
37
+ return assertSuccessOrThrow("javascript", result);
38
+ } finally {
39
+ // await ensures cleanup completes before result/error propagates
40
+ await rm(tmpFile, { force: true });
41
+ }
42
+ }
package/src/python.ts ADDED
@@ -0,0 +1,63 @@
1
+ import { constants } from "node:fs";
2
+ import { access } from "node:fs/promises";
3
+ import { delimiter, isAbsolute, join, resolve } from "node:path";
4
+ import type { AgentToolResult } from "@earendil-works/pi-coding-agent";
5
+ import { assertSuccessOrThrow } from "./format.js";
6
+ import { run } from "./subprocess.js";
7
+ import { EvalBinaryNotFoundError, type EvalDetails } from "./types.js";
8
+
9
+ export async function resolvePythonBinary(
10
+ pythonPath: string | undefined,
11
+ cwd: string,
12
+ ): Promise<string> {
13
+ if (pythonPath) {
14
+ // Explicit path: resolve relative to cwd, then validate
15
+ const resolved = isAbsolute(pythonPath)
16
+ ? pythonPath
17
+ : resolve(cwd, pythonPath);
18
+ try {
19
+ await access(resolved, constants.X_OK);
20
+ } catch {
21
+ throw new EvalBinaryNotFoundError(resolved);
22
+ }
23
+ return resolved;
24
+ }
25
+
26
+ // Default: search PATH for python3
27
+ const pathDirs = (process.env.PATH || "").split(delimiter);
28
+ const candidates = [
29
+ ...pathDirs.map((dir) => join(dir, "python3")),
30
+ "/usr/bin/python3",
31
+ "/usr/local/bin/python3",
32
+ ];
33
+
34
+ for (const candidate of candidates) {
35
+ try {
36
+ await access(candidate, constants.X_OK);
37
+ return candidate;
38
+ } catch {
39
+ // try next
40
+ }
41
+ }
42
+
43
+ throw new EvalBinaryNotFoundError("python3");
44
+ }
45
+
46
+ export async function executePython(
47
+ code: string,
48
+ pythonPath: string | undefined,
49
+ userSignal: AbortSignal | undefined,
50
+ timeoutSignal: AbortSignal,
51
+ cwd: string,
52
+ ): Promise<AgentToolResult<EvalDetails>> {
53
+ const bin = await resolvePythonBinary(pythonPath, cwd);
54
+ const result = await run(
55
+ bin,
56
+ ["-c", code],
57
+ cwd,
58
+ {},
59
+ userSignal,
60
+ timeoutSignal,
61
+ );
62
+ return assertSuccessOrThrow("python", result);
63
+ }
@@ -0,0 +1,130 @@
1
+ import { spawn } from "node:child_process";
2
+ import { MAX_OUTPUT } from "./format.js";
3
+ import {
4
+ EvalCancelledError,
5
+ EvalSpawnError,
6
+ EvalTimeoutError,
7
+ type SubprocessResult,
8
+ } from "./types.js";
9
+
10
+ /**
11
+ * Spawn a subprocess and capture stdout/stderr.
12
+ *
13
+ * @throws {EvalTimeoutError} when {@link timeoutSignal} fires
14
+ * @throws {EvalCancelledError} when {@link userSignal} fires
15
+ * @throws {EvalSpawnError} when the process fails to start
16
+ */
17
+ export function run(
18
+ file: string,
19
+ args: string[],
20
+ cwd: string,
21
+ env: NodeJS.ProcessEnv,
22
+ userSignal: AbortSignal | undefined,
23
+ timeoutSignal: AbortSignal,
24
+ ): Promise<SubprocessResult> {
25
+ return new Promise((resolvePromise, reject) => {
26
+ // Combine signals for spawn (either will kill the child)
27
+ const combinedSignal = userSignal
28
+ ? AbortSignal.any([userSignal, timeoutSignal])
29
+ : timeoutSignal;
30
+
31
+ const child = spawn(file, args, {
32
+ cwd,
33
+ env,
34
+ signal: combinedSignal,
35
+ stdio: ["ignore", "pipe", "pipe"],
36
+ });
37
+
38
+ let stdout = "";
39
+ let stderr = "";
40
+ let truncated = false;
41
+
42
+ const onData = (target: "stdout" | "stderr") => (chunk: Buffer) => {
43
+ const used = stdout.length + stderr.length;
44
+ const remaining = MAX_OUTPUT - used;
45
+ if (remaining <= 0) {
46
+ truncated = true;
47
+ child.kill();
48
+ return;
49
+ }
50
+ // If the chunk is larger than remaining capacity, only keep what fits
51
+ // and mark as truncated immediately (don't wait for the next chunk).
52
+ const fits = Math.min(chunk.length, remaining);
53
+ const text = chunk.toString("utf-8", 0, fits);
54
+ if (target === "stdout") {
55
+ stdout += text;
56
+ } else {
57
+ stderr += text;
58
+ }
59
+ if (fits < chunk.length) {
60
+ truncated = true;
61
+ child.kill();
62
+ }
63
+ };
64
+
65
+ child.stdout.on("data", onData("stdout"));
66
+ child.stderr.on("data", onData("stderr"));
67
+
68
+ let settled = false;
69
+
70
+ child.on("close", (exitCode, exitSignal) => {
71
+ if (settled) {
72
+ return;
73
+ }
74
+ // Truncation kill (we killed it) — resolve with partial output.
75
+ // Treat as a normal termination: the truncated flag communicates the
76
+ // condition, not the exit signal.
77
+ if (truncated) {
78
+ settled = true;
79
+ resolvePromise({
80
+ stdout,
81
+ stderr,
82
+ exitCode: 0,
83
+ exitSignal: null,
84
+ truncated: true,
85
+ });
86
+ return;
87
+ }
88
+ // Check our abort signals first (they kill the child via spawn's signal option)
89
+ if (timeoutSignal.aborted) {
90
+ settled = true;
91
+ reject(new EvalTimeoutError());
92
+ return;
93
+ }
94
+ if (userSignal?.aborted) {
95
+ settled = true;
96
+ reject(new EvalCancelledError());
97
+ return;
98
+ }
99
+ // Normal exit (exitCode may be null if killed by an external signal)
100
+ settled = true;
101
+ resolvePromise({
102
+ stdout,
103
+ stderr,
104
+ exitCode,
105
+ exitSignal,
106
+ truncated,
107
+ });
108
+ });
109
+
110
+ child.on("error", (err) => {
111
+ if (settled) {
112
+ return;
113
+ }
114
+ // Abort during spawn: Node throws an error before the process starts.
115
+ // Discriminate which signal caused it.
116
+ if (timeoutSignal.aborted) {
117
+ settled = true;
118
+ reject(new EvalTimeoutError());
119
+ return;
120
+ }
121
+ if (userSignal?.aborted) {
122
+ settled = true;
123
+ reject(new EvalCancelledError());
124
+ return;
125
+ }
126
+ settled = true;
127
+ reject(new EvalSpawnError(file, err.message));
128
+ });
129
+ });
130
+ }
package/src/types.ts ADDED
@@ -0,0 +1,60 @@
1
+ export class EvalToolError extends Error {
2
+ constructor(
3
+ message: string,
4
+ public readonly code: string,
5
+ ) {
6
+ super(message);
7
+ this.name = "EvalToolError";
8
+ }
9
+ }
10
+
11
+ export class EvalTimeoutError extends EvalToolError {
12
+ constructor() {
13
+ super("Evaluation cancelled or timed out after 30 seconds", "TIMEOUT");
14
+ }
15
+ }
16
+
17
+ export class EvalCancelledError extends EvalToolError {
18
+ constructor() {
19
+ super("Evaluation cancelled", "CANCELLED");
20
+ }
21
+ }
22
+
23
+ export class EvalBinaryNotFoundError extends EvalToolError {
24
+ constructor(binary: string) {
25
+ super(
26
+ `Python binary not found or not executable: ${binary}`,
27
+ "BINARY_NOT_FOUND",
28
+ );
29
+ }
30
+ }
31
+
32
+ export class EvalSpawnError extends EvalToolError {
33
+ constructor(file: string, cause: string) {
34
+ super(`Failed to spawn ${file}: ${cause}`, "SPAWN_FAILED");
35
+ }
36
+ }
37
+
38
+ export class EvalUnsupportedLanguageError extends EvalToolError {
39
+ constructor(language: string) {
40
+ super(
41
+ `Unsupported language: "${language}". Use "javascript" or "python".`,
42
+ "UNSUPPORTED_LANGUAGE",
43
+ );
44
+ }
45
+ }
46
+
47
+ export interface SubprocessResult {
48
+ stdout: string;
49
+ stderr: string;
50
+ /** null when the process was killed by a signal rather than exiting normally */
51
+ exitCode: number | null;
52
+ exitSignal: string | null;
53
+ truncated: boolean;
54
+ }
55
+
56
+ export interface EvalDetails {
57
+ language: string;
58
+ exitCode: number | null;
59
+ exitSignal: string | null;
60
+ }