@oh-my-pi/pi-coding-agent 14.9.2 → 14.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +89 -0
- package/package.json +7 -7
- package/scripts/format-prompts.ts +3 -3
- package/src/async/job-manager.ts +66 -9
- package/src/capability/rule.ts +20 -0
- package/src/config/model-registry.ts +13 -0
- package/src/config/model-resolver.ts +8 -2
- package/src/config/prompt-templates.ts +0 -5
- package/src/config/settings-schema.ts +39 -1
- package/src/edit/index.ts +8 -0
- package/src/edit/renderer.ts +6 -1
- package/src/edit/streaming.ts +53 -2
- package/src/eval/eval.lark +10 -31
- package/src/eval/index.ts +1 -0
- package/src/eval/js/context-manager.ts +1 -38
- package/src/eval/js/prelude.txt +0 -2
- package/src/eval/parse.ts +156 -255
- package/src/eval/py/executor.ts +24 -8
- package/src/eval/py/index.ts +1 -0
- package/src/eval/py/prelude.py +11 -80
- package/src/eval/sniff.ts +28 -0
- package/src/export/html/template.css +50 -0
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +229 -17
- package/src/extensibility/plugins/loader.ts +31 -6
- package/src/extensibility/skills.ts +20 -0
- package/src/hashline/constants.ts +20 -0
- package/src/hashline/grammar.lark +16 -23
- package/src/hashline/hash.ts +4 -34
- package/src/hashline/input.ts +16 -2
- package/src/hashline/parser.ts +12 -1
- package/src/internal-urls/agent-protocol.ts +64 -52
- package/src/internal-urls/artifact-protocol.ts +52 -51
- package/src/internal-urls/docs-index.generated.ts +34 -1
- package/src/internal-urls/index.ts +6 -19
- package/src/internal-urls/local-protocol.ts +50 -7
- package/src/internal-urls/mcp-protocol.ts +3 -8
- package/src/internal-urls/memory-protocol.ts +90 -59
- package/src/internal-urls/pi-protocol.ts +1 -0
- package/src/internal-urls/router.ts +40 -23
- package/src/internal-urls/rule-protocol.ts +3 -20
- package/src/internal-urls/skill-protocol.ts +5 -27
- package/src/internal-urls/types.ts +18 -2
- package/src/main.ts +1 -1
- package/src/mcp/manager.ts +17 -0
- package/src/modes/components/session-observer-overlay.ts +2 -2
- package/src/modes/components/tool-execution.ts +6 -0
- package/src/modes/components/tree-selector.ts +4 -0
- package/src/modes/controllers/event-controller.ts +23 -2
- package/src/modes/controllers/mcp-command-controller.ts +7 -10
- package/src/modes/interactive-mode.ts +2 -2
- package/src/modes/theme/theme.ts +27 -27
- package/src/modes/types.ts +1 -1
- package/src/modes/utils/ui-helpers.ts +14 -9
- package/src/prompts/commands/orchestrate.md +1 -0
- package/src/prompts/system/custom-system-prompt.md +0 -2
- package/src/prompts/system/project-prompt.md +10 -0
- package/src/prompts/system/subagent-system-prompt.md +18 -9
- package/src/prompts/system/subagent-user-prompt.md +1 -10
- package/src/prompts/system/system-prompt.md +159 -232
- package/src/prompts/tools/ask.md +0 -1
- package/src/prompts/tools/bash.md +0 -34
- package/src/prompts/tools/eval.md +27 -16
- package/src/prompts/tools/github.md +6 -5
- package/src/prompts/tools/hashline.md +1 -0
- package/src/prompts/tools/job.md +14 -6
- package/src/prompts/tools/task.md +20 -3
- package/src/registry/agent-registry.ts +2 -1
- package/src/sdk.ts +87 -89
- package/src/session/agent-session.ts +107 -37
- package/src/session/artifacts.ts +7 -4
- package/src/session/session-manager.ts +30 -1
- package/src/ssh/connection-manager.ts +32 -16
- package/src/ssh/sshfs-mount.ts +10 -7
- package/src/system-prompt.ts +3 -9
- package/src/task/executor.ts +23 -7
- package/src/task/index.ts +57 -36
- package/src/tool-discovery/tool-index.ts +21 -8
- package/src/tools/ast-edit.ts +3 -2
- package/src/tools/ast-grep.ts +3 -2
- package/src/tools/bash.ts +30 -50
- package/src/tools/browser/tab-supervisor.ts +12 -2
- package/src/tools/eval.ts +59 -44
- package/src/tools/fetch.ts +1 -1
- package/src/tools/gh.ts +140 -4
- package/src/tools/index.ts +12 -11
- package/src/tools/job.ts +48 -12
- package/src/tools/path-utils.ts +21 -1
- package/src/tools/read.ts +74 -31
- package/src/tools/search.ts +16 -3
- package/src/tools/todo-write.ts +1 -1
- package/src/utils/file-display-mode.ts +11 -5
- package/src/web/scrapers/mastodon.ts +1 -1
- package/src/web/scrapers/repology.ts +7 -7
- package/src/internal-urls/jobs-protocol.ts +0 -119
- package/src/task/template.ts +0 -47
- package/src/tools/bash-normalize.ts +0 -107
|
@@ -32,9 +32,6 @@ interface VmHelperOptions {
|
|
|
32
32
|
reverse?: boolean;
|
|
33
33
|
unique?: boolean;
|
|
34
34
|
count?: boolean;
|
|
35
|
-
cwd?: string;
|
|
36
|
-
timeoutMs?: number;
|
|
37
|
-
timeout?: number;
|
|
38
35
|
}
|
|
39
36
|
|
|
40
37
|
interface VmContextState {
|
|
@@ -303,41 +300,6 @@ async function createHelpers(state: VmContextState) {
|
|
|
303
300
|
emitStatus(state, { op: "tree", path: root, entries: entryCount, preview: result.slice(0, 1000) });
|
|
304
301
|
return result;
|
|
305
302
|
},
|
|
306
|
-
run: async (
|
|
307
|
-
command: string,
|
|
308
|
-
options: VmHelperOptions = {},
|
|
309
|
-
): Promise<{ stdout: string; stderr: string; exit_code: number }> => {
|
|
310
|
-
const cwd = options.cwd ? resolvePath(state, options.cwd) : state.cwd;
|
|
311
|
-
const timeoutMs =
|
|
312
|
-
typeof options.timeoutMs === "number"
|
|
313
|
-
? options.timeoutMs
|
|
314
|
-
: typeof options.timeout === "number"
|
|
315
|
-
? options.timeout * 1000
|
|
316
|
-
: undefined;
|
|
317
|
-
const timeoutSignal =
|
|
318
|
-
typeof timeoutMs === "number" && Number.isFinite(timeoutMs) && timeoutMs > 0
|
|
319
|
-
? AbortSignal.timeout(timeoutMs)
|
|
320
|
-
: undefined;
|
|
321
|
-
const signal =
|
|
322
|
-
state.currentRun?.signal && timeoutSignal
|
|
323
|
-
? AbortSignal.any([state.currentRun.signal, timeoutSignal])
|
|
324
|
-
: (state.currentRun?.signal ?? timeoutSignal);
|
|
325
|
-
const child = Bun.spawn(["bash", "-lc", command], {
|
|
326
|
-
cwd,
|
|
327
|
-
env: getMergedEnv(state),
|
|
328
|
-
stdout: "pipe",
|
|
329
|
-
stderr: "pipe",
|
|
330
|
-
signal,
|
|
331
|
-
});
|
|
332
|
-
const [stdout, stderr, exit_code] = await Promise.all([
|
|
333
|
-
new Response(child.stdout as ReadableStream<Uint8Array>).text(),
|
|
334
|
-
new Response(child.stderr as ReadableStream<Uint8Array>).text(),
|
|
335
|
-
child.exited,
|
|
336
|
-
]);
|
|
337
|
-
const output = `${stdout}${stderr}`.slice(0, 500);
|
|
338
|
-
emitStatus(state, { op: "run", cmd: command.slice(0, 120), code: exit_code, output });
|
|
339
|
-
return { stdout, stderr, exit_code };
|
|
340
|
-
},
|
|
341
303
|
env: (key?: string, value?: string): string | Record<string, string> | undefined => {
|
|
342
304
|
if (!key) {
|
|
343
305
|
const env = Object.fromEntries(Object.entries(getMergedEnv(state)).sort(([a], [b]) => a.localeCompare(b)));
|
|
@@ -419,6 +381,7 @@ async function createVmState(
|
|
|
419
381
|
atob,
|
|
420
382
|
btoa,
|
|
421
383
|
Buffer,
|
|
384
|
+
Bun,
|
|
422
385
|
process: createProcessSubset(cwd),
|
|
423
386
|
require: buildRequire(cwd),
|
|
424
387
|
createRequire,
|
package/src/eval/js/prelude.txt
CHANGED
|
@@ -12,7 +12,6 @@ if (!globalThis.__omp_js_prelude_loaded__) {
|
|
|
12
12
|
const counter = (items, opts = {}) => callHelper("counter", items, toOptions(opts));
|
|
13
13
|
const diff = (a, b) => callHelper("diff", a, b);
|
|
14
14
|
const tree = (path = ".", opts = {}) => callHelper("tree", path, toOptions(opts));
|
|
15
|
-
const run = (cmd, opts = {}) => callHelper("run", cmd, toOptions(opts));
|
|
16
15
|
const env = (key, value) => callHelper("env", key, value);
|
|
17
16
|
|
|
18
17
|
const tool = new Proxy(
|
|
@@ -67,6 +66,5 @@ if (!globalThis.__omp_js_prelude_loaded__) {
|
|
|
67
66
|
globalThis.counter = counter;
|
|
68
67
|
globalThis.diff = diff;
|
|
69
68
|
globalThis.tree = tree;
|
|
70
|
-
globalThis.run = run;
|
|
71
69
|
globalThis.env = env;
|
|
72
70
|
}
|
package/src/eval/parse.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { sniffEvalLanguage } from "./sniff";
|
|
1
2
|
import type { EvalLanguage } from "./types";
|
|
2
3
|
|
|
3
4
|
export type EvalLanguageOrigin = "default" | "header";
|
|
@@ -14,324 +15,224 @@ export interface ParsedEvalCell {
|
|
|
14
15
|
|
|
15
16
|
export interface ParsedEvalInput {
|
|
16
17
|
cells: ParsedEvalCell[];
|
|
18
|
+
/**
|
|
19
|
+
* True when the parser encountered `*** Abort` (recovery sentinel emitted
|
|
20
|
+
* by the agent loop's harmony-leak mitigation; see
|
|
21
|
+
* `docs/ERRATA-GPT5-HARMONY.md`). The cell containing the marker, if any,
|
|
22
|
+
* is dropped — its body is incomplete and unsafe to execute.
|
|
23
|
+
*/
|
|
24
|
+
aborted?: boolean;
|
|
17
25
|
}
|
|
18
26
|
|
|
19
27
|
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
28
|
+
const DEFAULT_LANGUAGE: EvalLanguage = "python";
|
|
20
29
|
|
|
21
30
|
/**
|
|
22
|
-
* Canonical language tokens
|
|
23
|
-
*
|
|
24
|
-
*
|
|
25
|
-
* advertised in the tool's prompt — the lark grammar describes the
|
|
26
|
-
* canonical surface we encourage callers to emit.
|
|
31
|
+
* Canonical language tokens plus common long-form aliases. The grammar
|
|
32
|
+
* advertises only `PY` / `JS` / `TS`, but unconstrained models reach for
|
|
33
|
+
* `Python` / `JavaScript` / `TypeScript` often enough that we accept them.
|
|
27
34
|
*/
|
|
28
|
-
const
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
const LANGUAGE_MAP: Record<string, EvalLanguage> = {
|
|
36
|
+
PY: "python",
|
|
37
|
+
PYTHON: "python",
|
|
38
|
+
IPY: "python",
|
|
39
|
+
IPYTHON: "python",
|
|
40
|
+
JS: "js",
|
|
41
|
+
JAVASCRIPT: "js",
|
|
42
|
+
TS: "js",
|
|
43
|
+
TYPESCRIPT: "js",
|
|
37
44
|
};
|
|
38
45
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
46
|
+
// Markers are case-insensitive, accept ≥2 leading stars (so `**Begin` and
|
|
47
|
+
// `*** Begin` both work), and tolerate any whitespace (including tabs)
|
|
48
|
+
// between tokens. Models that can't constrain-sample frequently emit minor
|
|
49
|
+
// variations like `**End`, `*** end py`, or `***\tTitle: foo`.
|
|
50
|
+
const STARS = String.raw`\*{2,}`;
|
|
51
|
+
const BEGIN_RE = new RegExp(`^${STARS}\\s*Begin\\b\\s*(\\S+)?\\s*$`, "i");
|
|
52
|
+
const END_RE = new RegExp(`^${STARS}\\s*End\\b.*$`, "i");
|
|
53
|
+
const TITLE_RE = new RegExp(`^${STARS}\\s*Title\\s*:\\s*(.+?)\\s*$`, "i");
|
|
54
|
+
const TIMEOUT_RE = new RegExp(`^${STARS}\\s*Timeout\\s*:\\s*(\\S+)\\s*$`, "i");
|
|
55
|
+
const RESET_RE = new RegExp(`^${STARS}\\s*Reset\\s*$`, "i");
|
|
56
|
+
const ABORT_RE = new RegExp(`^${STARS}\\s*Abort\\s*$`, "i");
|
|
42
57
|
|
|
43
58
|
/**
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
*
|
|
47
|
-
* synonyms the LLM is likely to reach for instead of the short canonical.
|
|
59
|
+
* Warning text appended to the eval tool result when parsing terminated on
|
|
60
|
+
* `*** Abort`. Tells the model that earlier cells (if any) ran normally and
|
|
61
|
+
* that any aborted cell needs to be re-issued.
|
|
48
62
|
*/
|
|
49
|
-
const
|
|
50
|
-
|
|
51
|
-
const
|
|
63
|
+
export const ABORT_WARNING =
|
|
64
|
+
"Tool stream truncated mid-call due to detected output corruption. Earlier cells (if any) executed normally; their state persists. Re-issue the aborted cell.";
|
|
65
|
+
const DURATION_RE = /^(\d+)(ms|s|m)?$/i;
|
|
52
66
|
|
|
53
|
-
function
|
|
54
|
-
|
|
55
|
-
if (T_KEYS.has(key)) return "t";
|
|
56
|
-
if (RST_KEYS.has(key)) return "rst";
|
|
57
|
-
return null;
|
|
67
|
+
function resolveLang(token: string | undefined): EvalLanguage | undefined {
|
|
68
|
+
return token ? LANGUAGE_MAP[token.toUpperCase()] : undefined;
|
|
58
69
|
}
|
|
59
70
|
|
|
60
|
-
interface HeaderInfo {
|
|
61
|
-
language?: EvalLanguage;
|
|
62
|
-
title?: string;
|
|
63
|
-
timeoutMs?: number;
|
|
64
|
-
reset?: boolean;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
/**
|
|
68
|
-
* Match a header line: `={5,} <info>? ={5,}`. Both bars MUST be on the
|
|
69
|
-
* same line and each MUST be at least five equal signs (lengths need not
|
|
70
|
-
* match — a 5/6 split is fine).
|
|
71
|
-
*/
|
|
72
|
-
const HEADER_RE = /^={5,}([^=].*?)?={5,}\s*$/;
|
|
73
|
-
const EMPTY_HEADER_RE = /^={5,}\s*$/;
|
|
74
|
-
|
|
75
|
-
const ATTR_TOKEN_RE = /^([a-zA-Z][\w-]*)(?::(?:"([^"]*)"|'([^']*)'|(.*)))?$/;
|
|
76
|
-
const DURATION_TOKEN_RE = /^\d+(?:ms|s|m)?$/;
|
|
77
|
-
|
|
78
71
|
function parseDurationMs(raw: string, lineNumber: number): number {
|
|
79
|
-
const match =
|
|
72
|
+
const match = DURATION_RE.exec(raw.trim());
|
|
80
73
|
if (!match) {
|
|
81
74
|
throw new Error(
|
|
82
75
|
`Eval line ${lineNumber}: invalid duration \`${raw}\`; use a number with optional ms, s, or m units.`,
|
|
83
76
|
);
|
|
84
77
|
}
|
|
85
78
|
const value = Number.parseInt(match[1], 10);
|
|
86
|
-
const unit = match[2] ?? "s";
|
|
79
|
+
const unit = (match[2] ?? "s").toLowerCase();
|
|
87
80
|
if (unit === "ms") return value;
|
|
88
81
|
if (unit === "s") return value * 1000;
|
|
89
82
|
return value * 60_000;
|
|
90
83
|
}
|
|
91
84
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
return undefined;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
function trimOuterBlankLines(lines: string[]): string[] {
|
|
100
|
-
let start = 0;
|
|
101
|
-
let end = lines.length;
|
|
102
|
-
while (start < end && lines[start].trim() === "") start++;
|
|
103
|
-
while (end > start && lines[end - 1].trim() === "") end--;
|
|
104
|
-
return lines.slice(start, end);
|
|
105
|
-
}
|
|
85
|
+
// Markdown fence wrapping a single bare cell, e.g. "```py\n...\n```" or
|
|
86
|
+
// "```\n...\n```". Used by models that wrap eval input in code fences.
|
|
87
|
+
const FENCE_OPEN_RE = /^```\s*([A-Za-z]\w*)?\s*$/;
|
|
88
|
+
const FENCE_CLOSE_RE = /^```\s*$/;
|
|
106
89
|
|
|
107
90
|
/**
|
|
108
|
-
*
|
|
109
|
-
*
|
|
110
|
-
*
|
|
111
|
-
*
|
|
112
|
-
* A line that contains text but only one bar (e.g. `===== title`) is NOT
|
|
113
|
-
* a header — it's normal code that happens to start with equal signs.
|
|
91
|
+
* Last-resort fallback when the input has no recognizable `*** Begin` header.
|
|
92
|
+
* Models that can't constrain-sample sometimes pass bare code or wrap it in
|
|
93
|
+
* a markdown fence (```py / ```python / bare ```). Treat the whole input as
|
|
94
|
+
* a single implicit cell, sniffing the language from the body.
|
|
114
95
|
*/
|
|
115
|
-
function
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
*/
|
|
128
|
-
function tokenizeInfoString(info: string): string[] {
|
|
129
|
-
const tokens: string[] = [];
|
|
130
|
-
let i = 0;
|
|
131
|
-
while (i < info.length) {
|
|
132
|
-
while (i < info.length && /\s/.test(info[i])) i++;
|
|
133
|
-
if (i >= info.length) break;
|
|
134
|
-
let token = "";
|
|
135
|
-
while (i < info.length && !/\s/.test(info[i])) {
|
|
136
|
-
const ch = info[i];
|
|
137
|
-
if (ch === '"' || ch === "'") {
|
|
138
|
-
token += ch;
|
|
139
|
-
i++;
|
|
140
|
-
while (i < info.length && info[i] !== ch) {
|
|
141
|
-
token += info[i];
|
|
142
|
-
i++;
|
|
143
|
-
}
|
|
144
|
-
if (i < info.length) {
|
|
145
|
-
token += info[i];
|
|
146
|
-
i++;
|
|
147
|
-
}
|
|
148
|
-
} else {
|
|
149
|
-
token += ch;
|
|
150
|
-
i++;
|
|
151
|
-
}
|
|
96
|
+
function parseImplicitCell(lines: string[]): ParsedEvalCell {
|
|
97
|
+
let body = lines.slice();
|
|
98
|
+
while (body.length > 0 && body[0].trim() === "") body.shift();
|
|
99
|
+
while (body.length > 0 && body[body.length - 1].trim() === "") body.pop();
|
|
100
|
+
|
|
101
|
+
let fenceLang: string | undefined;
|
|
102
|
+
if (body.length >= 2) {
|
|
103
|
+
const open = FENCE_OPEN_RE.exec(body[0]);
|
|
104
|
+
const closeIdx = body.length - 1;
|
|
105
|
+
if (open && FENCE_CLOSE_RE.test(body[closeIdx])) {
|
|
106
|
+
fenceLang = open[1];
|
|
107
|
+
body = body.slice(1, closeIdx);
|
|
152
108
|
}
|
|
153
|
-
tokens.push(token);
|
|
154
109
|
}
|
|
155
|
-
return tokens;
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
/**
|
|
159
|
-
* Decode a header info string into language, title, timeout, and reset flag.
|
|
160
|
-
*
|
|
161
|
-
* Token forms (all optional, any order):
|
|
162
|
-
* - `py` / `js` / `ts` bare language
|
|
163
|
-
* - `py:"..."` / `js:"..."` / `ts:"..."` language + title shorthand
|
|
164
|
-
* - `id:"..."` cell title
|
|
165
|
-
* - `t:<duration>` per-cell timeout
|
|
166
|
-
* - `<duration>` bare positional duration (lenient)
|
|
167
|
-
* - `rst` reset flag
|
|
168
|
-
* - `rst:true|false` reset flag with explicit value
|
|
169
|
-
*
|
|
170
|
-
* Fallback aliases (accepted but not advertised in the prompt):
|
|
171
|
-
* - id: title, name, cell, file, label
|
|
172
|
-
* - t: timeout, duration, time
|
|
173
|
-
* - rst: reset
|
|
174
|
-
*
|
|
175
|
-
* Truly unknown keys are silently dropped. First occurrence wins when a
|
|
176
|
-
* key is repeated (canonical or alias). Anything that doesn't classify
|
|
177
|
-
* accumulates as a positional title fragment joined by spaces.
|
|
178
|
-
*/
|
|
179
|
-
function parseHeaderInfo(info: string, lineNumber: number): HeaderInfo {
|
|
180
|
-
const tokens = tokenizeInfoString(info);
|
|
181
|
-
if (tokens.length === 0) return {};
|
|
182
|
-
|
|
183
|
-
let language: EvalLanguage | undefined;
|
|
184
|
-
let titleAttr: string | undefined;
|
|
185
|
-
let positionalDurationMs: number | undefined;
|
|
186
|
-
let tAttr: string | undefined;
|
|
187
|
-
let rstAttr: string | undefined;
|
|
188
|
-
let bareReset = false;
|
|
189
|
-
const titleParts: string[] = [];
|
|
190
|
-
|
|
191
|
-
for (const token of tokens) {
|
|
192
|
-
// Bare reset flag.
|
|
193
|
-
if (RST_KEYS.has(token.toLowerCase())) {
|
|
194
|
-
bareReset = true;
|
|
195
|
-
continue;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
const attrMatch = ATTR_TOKEN_RE.exec(token);
|
|
199
|
-
if (attrMatch && token.includes(":")) {
|
|
200
|
-
const key = attrMatch[1].toLowerCase();
|
|
201
|
-
const value = attrMatch[2] ?? attrMatch[3] ?? attrMatch[4] ?? "";
|
|
202
110
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
// unknown / repeated keys silently dropped
|
|
216
|
-
continue;
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
// Bare language token (no colon).
|
|
220
|
-
const lang = resolveLanguageAlias(token);
|
|
221
|
-
if (lang && language === undefined) {
|
|
222
|
-
language = lang;
|
|
223
|
-
continue;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
// Bare positional duration (lenient — `t:` is canonical).
|
|
227
|
-
if (positionalDurationMs === undefined && DURATION_TOKEN_RE.test(token)) {
|
|
228
|
-
positionalDurationMs = parseDurationMs(token, lineNumber);
|
|
229
|
-
continue;
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
titleParts.push(token);
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
const explicitTitle = (titleAttr ?? "").trim();
|
|
236
|
-
const positionalTitle = titleParts.join(" ").trim();
|
|
237
|
-
const title = explicitTitle.length > 0 ? explicitTitle : positionalTitle.length > 0 ? positionalTitle : undefined;
|
|
238
|
-
|
|
239
|
-
let timeoutMs: number | undefined;
|
|
240
|
-
if (tAttr !== undefined) {
|
|
241
|
-
timeoutMs = parseDurationMs(tAttr, lineNumber);
|
|
242
|
-
} else if (positionalDurationMs !== undefined) {
|
|
243
|
-
timeoutMs = positionalDurationMs;
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
let reset: boolean | undefined;
|
|
247
|
-
if (rstAttr !== undefined) {
|
|
248
|
-
const parsed = parseBoolean(rstAttr);
|
|
249
|
-
if (parsed === undefined) {
|
|
250
|
-
throw new Error(`Eval line ${lineNumber}: invalid rst value \`${rstAttr}\`; use true or false.`);
|
|
251
|
-
}
|
|
252
|
-
reset = parsed;
|
|
253
|
-
} else if (bareReset) {
|
|
254
|
-
reset = true;
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
return { language, title, timeoutMs, reset };
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
interface ExpansionState {
|
|
261
|
-
language: EvalLanguage;
|
|
262
|
-
languageOrigin: EvalLanguageOrigin;
|
|
111
|
+
const code = body.join("\n");
|
|
112
|
+
const explicitLanguage = resolveLang(fenceLang);
|
|
113
|
+
const language = explicitLanguage ?? sniffEvalLanguage(code) ?? DEFAULT_LANGUAGE;
|
|
114
|
+
return {
|
|
115
|
+
index: 0,
|
|
116
|
+
title: undefined,
|
|
117
|
+
code,
|
|
118
|
+
language,
|
|
119
|
+
languageOrigin: explicitLanguage ? "header" : "default",
|
|
120
|
+
timeoutMs: DEFAULT_TIMEOUT_MS,
|
|
121
|
+
reset: false,
|
|
122
|
+
};
|
|
263
123
|
}
|
|
264
124
|
|
|
265
125
|
export function parseEvalInput(input: string): ParsedEvalInput {
|
|
266
126
|
const normalized = input.replace(/\r\n?/g, "\n");
|
|
267
127
|
const lines = normalized.split("\n");
|
|
268
|
-
// `split("\n")` produces a trailing empty element when the input ends with
|
|
269
|
-
// a newline. Drop it so we don't emit phantom blank trailing code lines.
|
|
270
128
|
if (lines.length > 0 && lines[lines.length - 1] === "") lines.pop();
|
|
271
129
|
|
|
272
|
-
const state: ExpansionState = { language: "python", languageOrigin: "default" };
|
|
273
130
|
const cells: ParsedEvalCell[] = [];
|
|
131
|
+
let aborted = false;
|
|
274
132
|
let i = 0;
|
|
275
133
|
|
|
276
|
-
//
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
cells.push({
|
|
287
|
-
index: cells.length,
|
|
288
|
-
title: undefined,
|
|
289
|
-
code: trimmed.join("\n"),
|
|
290
|
-
language: state.language,
|
|
291
|
-
languageOrigin: state.languageOrigin,
|
|
292
|
-
timeoutMs: DEFAULT_TIMEOUT_MS,
|
|
293
|
-
reset: false,
|
|
294
|
-
});
|
|
134
|
+
// Skip leading blank lines.
|
|
135
|
+
while (i < lines.length && lines[i].trim() === "") i++;
|
|
136
|
+
|
|
137
|
+
// Lenient fallback: if the input has no recognizable begin marker, treat
|
|
138
|
+
// the entire input as one implicit cell — unless that content contains
|
|
139
|
+
// `*** Abort`, in which case the body is incomplete/unsafe and we drop it.
|
|
140
|
+
if (i < lines.length && !BEGIN_RE.test(lines[i])) {
|
|
141
|
+
const tail = lines.slice(i);
|
|
142
|
+
if (tail.some(line => ABORT_RE.test(line))) {
|
|
143
|
+
return { cells, aborted: true };
|
|
295
144
|
}
|
|
145
|
+
const cell = parseImplicitCell(tail);
|
|
146
|
+
if (cell.code.length > 0) cells.push(cell);
|
|
147
|
+
return { cells };
|
|
296
148
|
}
|
|
297
149
|
|
|
298
150
|
while (i < lines.length) {
|
|
299
|
-
const
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
151
|
+
const beginMatch = BEGIN_RE.exec(lines[i])!;
|
|
152
|
+
const langToken = beginMatch[1];
|
|
153
|
+
const explicitLanguage = resolveLang(langToken);
|
|
154
|
+
i++;
|
|
155
|
+
|
|
156
|
+
let title: string | undefined;
|
|
157
|
+
let timeoutMs: number | undefined;
|
|
158
|
+
let reset = false;
|
|
159
|
+
|
|
160
|
+
while (i < lines.length) {
|
|
161
|
+
const line = lines[i];
|
|
162
|
+
const lineNumber = i + 1;
|
|
163
|
+
const titleMatch = TITLE_RE.exec(line);
|
|
164
|
+
if (titleMatch) {
|
|
165
|
+
if (title === undefined) title = titleMatch[1];
|
|
166
|
+
i++;
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
const timeoutMatch = TIMEOUT_RE.exec(line);
|
|
170
|
+
if (timeoutMatch) {
|
|
171
|
+
if (timeoutMs === undefined) timeoutMs = parseDurationMs(timeoutMatch[1], lineNumber);
|
|
172
|
+
i++;
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
if (RESET_RE.test(line)) {
|
|
176
|
+
reset = true;
|
|
177
|
+
i++;
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
break;
|
|
304
181
|
}
|
|
305
|
-
const headerLineNumber = i + 1;
|
|
306
|
-
const info = parseHeaderInfo(headerInfo, headerLineNumber);
|
|
307
|
-
i++; // consume header line
|
|
308
182
|
|
|
183
|
+
// Collect cell body. Close on `*** End` OR on the next `*** Begin`
|
|
184
|
+
// (implicit end — leniency for models that drop end markers between
|
|
185
|
+
// back-to-back cells). `*** Abort` (recovery sentinel) drops the
|
|
186
|
+
// in-progress cell entirely: its body is partial and unsafe to run.
|
|
309
187
|
const codeLines: string[] = [];
|
|
310
|
-
|
|
311
|
-
|
|
188
|
+
let cellAborted = false;
|
|
189
|
+
while (i < lines.length) {
|
|
190
|
+
const line = lines[i];
|
|
191
|
+
if (ABORT_RE.test(line)) {
|
|
192
|
+
cellAborted = true;
|
|
193
|
+
aborted = true;
|
|
194
|
+
i++;
|
|
195
|
+
break;
|
|
196
|
+
}
|
|
197
|
+
if (END_RE.test(line)) {
|
|
198
|
+
i++;
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
if (BEGIN_RE.test(line)) break;
|
|
202
|
+
codeLines.push(line);
|
|
312
203
|
i++;
|
|
313
204
|
}
|
|
205
|
+
|
|
206
|
+
if (cellAborted) break;
|
|
207
|
+
|
|
314
208
|
// Strip trailing blank lines so visual spacing between cells doesn't
|
|
315
209
|
// leak into the preceding cell's code.
|
|
316
210
|
while (codeLines.length > 0 && codeLines[codeLines.length - 1].trim() === "") {
|
|
317
211
|
codeLines.pop();
|
|
318
212
|
}
|
|
213
|
+
const code = codeLines.join("\n");
|
|
319
214
|
|
|
320
|
-
const language =
|
|
321
|
-
const languageOrigin: EvalLanguageOrigin =
|
|
215
|
+
const language = explicitLanguage ?? sniffEvalLanguage(code) ?? DEFAULT_LANGUAGE;
|
|
216
|
+
const languageOrigin: EvalLanguageOrigin = explicitLanguage ? "header" : "default";
|
|
322
217
|
|
|
323
218
|
cells.push({
|
|
324
219
|
index: cells.length,
|
|
325
|
-
title
|
|
326
|
-
code
|
|
220
|
+
title,
|
|
221
|
+
code,
|
|
327
222
|
language,
|
|
328
223
|
languageOrigin,
|
|
329
|
-
timeoutMs:
|
|
330
|
-
reset
|
|
224
|
+
timeoutMs: timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
225
|
+
reset,
|
|
331
226
|
});
|
|
332
|
-
|
|
333
|
-
|
|
227
|
+
|
|
228
|
+
// Skip blank separator lines between cells; an `*** Abort` here
|
|
229
|
+
// terminates parsing while keeping previously-collected cells.
|
|
230
|
+
while (i < lines.length && lines[i].trim() === "") i++;
|
|
231
|
+
if (i < lines.length && ABORT_RE.test(lines[i])) {
|
|
232
|
+
aborted = true;
|
|
233
|
+
break;
|
|
234
|
+
}
|
|
334
235
|
}
|
|
335
236
|
|
|
336
|
-
return { cells };
|
|
237
|
+
return aborted ? { cells, aborted: true } : { cells };
|
|
337
238
|
}
|
package/src/eval/py/executor.ts
CHANGED
|
@@ -39,6 +39,13 @@ export interface PythonExecutorOptions {
|
|
|
39
39
|
useSharedGateway?: boolean;
|
|
40
40
|
/** Session file path for accessing task outputs */
|
|
41
41
|
sessionFile?: string;
|
|
42
|
+
/**
|
|
43
|
+
* Effective artifacts directory for the current session. Subagents share
|
|
44
|
+
* the parent's directory, so this can differ from `sessionFile`'s sibling
|
|
45
|
+
* dir. When present, exported to the kernel as `PI_ARTIFACTS_DIR` and
|
|
46
|
+
* preferred over `PI_SESSION_FILE`-derived paths.
|
|
47
|
+
*/
|
|
48
|
+
artifactsDir?: string;
|
|
42
49
|
/** Artifact path/id for full output storage */
|
|
43
50
|
artifactPath?: string;
|
|
44
51
|
artifactId?: string;
|
|
@@ -102,6 +109,7 @@ let cleanupTimer: NodeJS.Timeout | null = null;
|
|
|
102
109
|
interface KernelSessionExecutionOptions {
|
|
103
110
|
useSharedGateway?: boolean;
|
|
104
111
|
sessionFile?: string;
|
|
112
|
+
artifactsDir?: string;
|
|
105
113
|
signal?: AbortSignal;
|
|
106
114
|
deadlineMs?: number;
|
|
107
115
|
kernelOwnerId?: string;
|
|
@@ -123,6 +131,19 @@ function getExecutionDeadlineMs(options?: Pick<PythonExecutorOptions, "deadlineM
|
|
|
123
131
|
return Date.now() + options.timeoutMs;
|
|
124
132
|
}
|
|
125
133
|
|
|
134
|
+
/**
|
|
135
|
+
* Build the env block exposed to the Python kernel. Includes the session file
|
|
136
|
+
* (for things that need the raw session path) and the effective artifacts
|
|
137
|
+
* directory (preferred by the prelude when resolving output IDs, so subagents
|
|
138
|
+
* see the parent's flat dir instead of a non-existent sibling).
|
|
139
|
+
*/
|
|
140
|
+
function buildKernelEnv(options: { sessionFile?: string; artifactsDir?: string }): Record<string, string> | undefined {
|
|
141
|
+
const env: Record<string, string> = {};
|
|
142
|
+
if (options.sessionFile) env.PI_SESSION_FILE = options.sessionFile;
|
|
143
|
+
if (options.artifactsDir) env.PI_ARTIFACTS_DIR = options.artifactsDir;
|
|
144
|
+
return Object.keys(env).length > 0 ? env : undefined;
|
|
145
|
+
}
|
|
146
|
+
|
|
126
147
|
function getRemainingTimeoutMs(deadlineMs?: number): number | undefined {
|
|
127
148
|
if (deadlineMs === undefined) return undefined;
|
|
128
149
|
return deadlineMs - Date.now();
|
|
@@ -523,9 +544,7 @@ async function createKernelSession(
|
|
|
523
544
|
isRetry?: boolean,
|
|
524
545
|
): Promise<KernelSession> {
|
|
525
546
|
requireRemainingTimeoutMs(options.deadlineMs);
|
|
526
|
-
const env
|
|
527
|
-
? { PI_SESSION_FILE: options.sessionFile }
|
|
528
|
-
: undefined;
|
|
547
|
+
const env = buildKernelEnv(options);
|
|
529
548
|
const startOptions = buildKernelStartOptions(cwd, env, options);
|
|
530
549
|
|
|
531
550
|
let kernel: PythonKernel;
|
|
@@ -586,9 +605,7 @@ async function restartKernelSession(
|
|
|
586
605
|
});
|
|
587
606
|
}
|
|
588
607
|
}
|
|
589
|
-
const env
|
|
590
|
-
? { PI_SESSION_FILE: options.sessionFile }
|
|
591
|
-
: undefined;
|
|
608
|
+
const env = buildKernelEnv(options);
|
|
592
609
|
const startOptions = buildKernelStartOptions(cwd, env, options);
|
|
593
610
|
const kernel = await PythonKernel.start(startOptions);
|
|
594
611
|
session.kernel = kernel;
|
|
@@ -936,10 +953,9 @@ export async function executePython(code: string, options?: PythonExecutorOption
|
|
|
936
953
|
await ensureKernelAvailable(cwd);
|
|
937
954
|
|
|
938
955
|
const kernelMode = executionOptions.kernelMode ?? "session";
|
|
939
|
-
const sessionFile = executionOptions.sessionFile;
|
|
940
956
|
|
|
941
957
|
if (kernelMode === "per-call") {
|
|
942
|
-
const env
|
|
958
|
+
const env = buildKernelEnv(executionOptions);
|
|
943
959
|
requireRemainingTimeoutMs(deadlineMs);
|
|
944
960
|
const startOptions = buildKernelStartOptions(cwd, env, executionOptions);
|
|
945
961
|
const kernel = await PythonKernel.start(startOptions);
|
package/src/eval/py/index.ts
CHANGED