@oh-my-pi/pi-coding-agent 14.9.1 → 14.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +60 -0
- package/package.json +7 -7
- package/scripts/format-prompts.ts +3 -3
- package/src/config/prompt-templates.ts +0 -5
- package/src/config/settings-schema.ts +38 -0
- package/src/eval/eval.lark +10 -31
- package/src/eval/index.ts +1 -0
- package/src/eval/parse.ts +156 -255
- package/src/eval/sniff.ts +28 -0
- package/src/export/html/template.css +38 -0
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +209 -15
- package/src/extensibility/extensions/runner.ts +173 -177
- package/src/hashline/apply.ts +8 -24
- package/src/hashline/constants.ts +20 -0
- package/src/hashline/execute.ts +0 -1
- package/src/hashline/grammar.lark +16 -27
- package/src/hashline/hash.ts +4 -34
- package/src/hashline/input.ts +16 -2
- package/src/hashline/parser.ts +12 -40
- package/src/hashline/types.ts +1 -2
- package/src/internal-urls/agent-protocol.ts +1 -0
- package/src/internal-urls/artifact-protocol.ts +1 -0
- package/src/internal-urls/docs-index.generated.ts +2 -1
- package/src/internal-urls/jobs-protocol.ts +1 -0
- package/src/internal-urls/local-protocol.ts +1 -0
- package/src/internal-urls/mcp-protocol.ts +1 -0
- package/src/internal-urls/memory-protocol.ts +1 -0
- package/src/internal-urls/pi-protocol.ts +1 -0
- package/src/internal-urls/router.ts +2 -1
- package/src/internal-urls/rule-protocol.ts +1 -0
- package/src/internal-urls/skill-protocol.ts +1 -0
- package/src/internal-urls/types.ts +18 -2
- package/src/mcp/transports/http.ts +49 -47
- package/src/prompts/system/custom-system-prompt.md +0 -2
- package/src/prompts/system/now-prompt.md +7 -0
- package/src/prompts/system/project-prompt.md +2 -0
- package/src/prompts/system/subagent-system-prompt.md +18 -9
- package/src/prompts/system/subagent-user-prompt.md +1 -10
- package/src/prompts/system/system-prompt.md +154 -233
- package/src/prompts/tools/bash.md +0 -24
- package/src/prompts/tools/eval.md +26 -13
- package/src/prompts/tools/hashline.md +1 -4
- package/src/sdk.ts +12 -22
- package/src/session/agent-session.ts +49 -17
- package/src/system-prompt.ts +38 -104
- package/src/task/executor.ts +15 -9
- package/src/task/index.ts +38 -33
- package/src/task/render.ts +4 -2
- package/src/tools/bash.ts +15 -41
- package/src/tools/eval.ts +13 -36
- package/src/tools/index.ts +0 -3
- package/src/tools/path-utils.ts +21 -1
- package/src/tools/read.ts +71 -49
- package/src/tools/search.ts +13 -1
- package/src/utils/file-display-mode.ts +11 -5
- package/src/workspace-tree.ts +210 -410
- package/src/task/template.ts +0 -47
- package/src/tools/bash-normalize.ts +0 -107
package/src/eval/parse.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { sniffEvalLanguage } from "./sniff";
|
|
1
2
|
import type { EvalLanguage } from "./types";
|
|
2
3
|
|
|
3
4
|
export type EvalLanguageOrigin = "default" | "header";
|
|
@@ -14,324 +15,224 @@ export interface ParsedEvalCell {
|
|
|
14
15
|
|
|
15
16
|
export interface ParsedEvalInput {
|
|
16
17
|
cells: ParsedEvalCell[];
|
|
18
|
+
/**
|
|
19
|
+
* True when the parser encountered `*** Abort` (recovery sentinel emitted
|
|
20
|
+
* by the agent loop's harmony-leak mitigation; see
|
|
21
|
+
* `docs/ERRATA-GPT5-HARMONY.md`). The cell containing the marker, if any,
|
|
22
|
+
* is dropped — its body is incomplete and unsafe to execute.
|
|
23
|
+
*/
|
|
24
|
+
aborted?: boolean;
|
|
17
25
|
}
|
|
18
26
|
|
|
19
27
|
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
28
|
+
const DEFAULT_LANGUAGE: EvalLanguage = "python";
|
|
20
29
|
|
|
21
30
|
/**
|
|
22
|
-
* Canonical language tokens
|
|
23
|
-
*
|
|
24
|
-
*
|
|
25
|
-
* advertised in the tool's prompt — the lark grammar describes the
|
|
26
|
-
* canonical surface we encourage callers to emit.
|
|
31
|
+
* Canonical language tokens plus common long-form aliases. The grammar
|
|
32
|
+
* advertises only `PY` / `JS` / `TS`, but unconstrained models reach for
|
|
33
|
+
* `Python` / `JavaScript` / `TypeScript` often enough that we accept them.
|
|
27
34
|
*/
|
|
28
|
-
const
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
const LANGUAGE_MAP: Record<string, EvalLanguage> = {
|
|
36
|
+
PY: "python",
|
|
37
|
+
PYTHON: "python",
|
|
38
|
+
IPY: "python",
|
|
39
|
+
IPYTHON: "python",
|
|
40
|
+
JS: "js",
|
|
41
|
+
JAVASCRIPT: "js",
|
|
42
|
+
TS: "js",
|
|
43
|
+
TYPESCRIPT: "js",
|
|
37
44
|
};
|
|
38
45
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
46
|
+
// Markers are case-insensitive, accept ≥2 leading stars (so `**Begin` and
|
|
47
|
+
// `*** Begin` both work), and tolerate any whitespace (including tabs)
|
|
48
|
+
// between tokens. Models that can't constrain-sample frequently emit minor
|
|
49
|
+
// variations like `**End`, `*** end py`, or `***\tTitle: foo`.
|
|
50
|
+
const STARS = String.raw`\*{2,}`;
|
|
51
|
+
const BEGIN_RE = new RegExp(`^${STARS}\\s*Begin\\b\\s*(\\S+)?\\s*$`, "i");
|
|
52
|
+
const END_RE = new RegExp(`^${STARS}\\s*End\\b.*$`, "i");
|
|
53
|
+
const TITLE_RE = new RegExp(`^${STARS}\\s*Title\\s*:\\s*(.+?)\\s*$`, "i");
|
|
54
|
+
const TIMEOUT_RE = new RegExp(`^${STARS}\\s*Timeout\\s*:\\s*(\\S+)\\s*$`, "i");
|
|
55
|
+
const RESET_RE = new RegExp(`^${STARS}\\s*Reset\\s*$`, "i");
|
|
56
|
+
const ABORT_RE = new RegExp(`^${STARS}\\s*Abort\\s*$`, "i");
|
|
42
57
|
|
|
43
58
|
/**
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
*
|
|
47
|
-
* synonyms the LLM is likely to reach for instead of the short canonical.
|
|
59
|
+
* Warning text appended to the eval tool result when parsing terminated on
|
|
60
|
+
* `*** Abort`. Tells the model that earlier cells (if any) ran normally and
|
|
61
|
+
* that any aborted cell needs to be re-issued.
|
|
48
62
|
*/
|
|
49
|
-
const
|
|
50
|
-
|
|
51
|
-
const
|
|
63
|
+
export const ABORT_WARNING =
|
|
64
|
+
"Tool stream truncated mid-call due to detected output corruption. Earlier cells (if any) executed normally; their state persists. Re-issue the aborted cell.";
|
|
65
|
+
const DURATION_RE = /^(\d+)(ms|s|m)?$/i;
|
|
52
66
|
|
|
53
|
-
function
|
|
54
|
-
|
|
55
|
-
if (T_KEYS.has(key)) return "t";
|
|
56
|
-
if (RST_KEYS.has(key)) return "rst";
|
|
57
|
-
return null;
|
|
67
|
+
function resolveLang(token: string | undefined): EvalLanguage | undefined {
|
|
68
|
+
return token ? LANGUAGE_MAP[token.toUpperCase()] : undefined;
|
|
58
69
|
}
|
|
59
70
|
|
|
60
|
-
interface HeaderInfo {
|
|
61
|
-
language?: EvalLanguage;
|
|
62
|
-
title?: string;
|
|
63
|
-
timeoutMs?: number;
|
|
64
|
-
reset?: boolean;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
/**
|
|
68
|
-
* Match a header line: `={5,} <info>? ={5,}`. Both bars MUST be on the
|
|
69
|
-
* same line and each MUST be at least five equal signs (lengths need not
|
|
70
|
-
* match — a 5/6 split is fine).
|
|
71
|
-
*/
|
|
72
|
-
const HEADER_RE = /^={5,}([^=].*?)?={5,}\s*$/;
|
|
73
|
-
const EMPTY_HEADER_RE = /^={5,}\s*$/;
|
|
74
|
-
|
|
75
|
-
const ATTR_TOKEN_RE = /^([a-zA-Z][\w-]*)(?::(?:"([^"]*)"|'([^']*)'|(.*)))?$/;
|
|
76
|
-
const DURATION_TOKEN_RE = /^\d+(?:ms|s|m)?$/;
|
|
77
|
-
|
|
78
71
|
function parseDurationMs(raw: string, lineNumber: number): number {
|
|
79
|
-
const match =
|
|
72
|
+
const match = DURATION_RE.exec(raw.trim());
|
|
80
73
|
if (!match) {
|
|
81
74
|
throw new Error(
|
|
82
75
|
`Eval line ${lineNumber}: invalid duration \`${raw}\`; use a number with optional ms, s, or m units.`,
|
|
83
76
|
);
|
|
84
77
|
}
|
|
85
78
|
const value = Number.parseInt(match[1], 10);
|
|
86
|
-
const unit = match[2] ?? "s";
|
|
79
|
+
const unit = (match[2] ?? "s").toLowerCase();
|
|
87
80
|
if (unit === "ms") return value;
|
|
88
81
|
if (unit === "s") return value * 1000;
|
|
89
82
|
return value * 60_000;
|
|
90
83
|
}
|
|
91
84
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
return undefined;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
function trimOuterBlankLines(lines: string[]): string[] {
|
|
100
|
-
let start = 0;
|
|
101
|
-
let end = lines.length;
|
|
102
|
-
while (start < end && lines[start].trim() === "") start++;
|
|
103
|
-
while (end > start && lines[end - 1].trim() === "") end--;
|
|
104
|
-
return lines.slice(start, end);
|
|
105
|
-
}
|
|
85
|
+
// Markdown fence wrapping a single bare cell, e.g. "```py\n...\n```" or
|
|
86
|
+
// "```\n...\n```". Used by models that wrap eval input in code fences.
|
|
87
|
+
const FENCE_OPEN_RE = /^```\s*([A-Za-z]\w*)?\s*$/;
|
|
88
|
+
const FENCE_CLOSE_RE = /^```\s*$/;
|
|
106
89
|
|
|
107
90
|
/**
|
|
108
|
-
*
|
|
109
|
-
*
|
|
110
|
-
*
|
|
111
|
-
*
|
|
112
|
-
* A line that contains text but only one bar (e.g. `===== title`) is NOT
|
|
113
|
-
* a header — it's normal code that happens to start with equal signs.
|
|
91
|
+
* Last-resort fallback when the input has no recognizable `*** Begin` header.
|
|
92
|
+
* Models that can't constrain-sample sometimes pass bare code or wrap it in
|
|
93
|
+
* a markdown fence (```py / ```python / bare ```). Treat the whole input as
|
|
94
|
+
* a single implicit cell, sniffing the language from the body.
|
|
114
95
|
*/
|
|
115
|
-
function
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
*/
|
|
128
|
-
function tokenizeInfoString(info: string): string[] {
|
|
129
|
-
const tokens: string[] = [];
|
|
130
|
-
let i = 0;
|
|
131
|
-
while (i < info.length) {
|
|
132
|
-
while (i < info.length && /\s/.test(info[i])) i++;
|
|
133
|
-
if (i >= info.length) break;
|
|
134
|
-
let token = "";
|
|
135
|
-
while (i < info.length && !/\s/.test(info[i])) {
|
|
136
|
-
const ch = info[i];
|
|
137
|
-
if (ch === '"' || ch === "'") {
|
|
138
|
-
token += ch;
|
|
139
|
-
i++;
|
|
140
|
-
while (i < info.length && info[i] !== ch) {
|
|
141
|
-
token += info[i];
|
|
142
|
-
i++;
|
|
143
|
-
}
|
|
144
|
-
if (i < info.length) {
|
|
145
|
-
token += info[i];
|
|
146
|
-
i++;
|
|
147
|
-
}
|
|
148
|
-
} else {
|
|
149
|
-
token += ch;
|
|
150
|
-
i++;
|
|
151
|
-
}
|
|
96
|
+
function parseImplicitCell(lines: string[]): ParsedEvalCell {
|
|
97
|
+
let body = lines.slice();
|
|
98
|
+
while (body.length > 0 && body[0].trim() === "") body.shift();
|
|
99
|
+
while (body.length > 0 && body[body.length - 1].trim() === "") body.pop();
|
|
100
|
+
|
|
101
|
+
let fenceLang: string | undefined;
|
|
102
|
+
if (body.length >= 2) {
|
|
103
|
+
const open = FENCE_OPEN_RE.exec(body[0]);
|
|
104
|
+
const closeIdx = body.length - 1;
|
|
105
|
+
if (open && FENCE_CLOSE_RE.test(body[closeIdx])) {
|
|
106
|
+
fenceLang = open[1];
|
|
107
|
+
body = body.slice(1, closeIdx);
|
|
152
108
|
}
|
|
153
|
-
tokens.push(token);
|
|
154
109
|
}
|
|
155
|
-
return tokens;
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
/**
|
|
159
|
-
* Decode a header info string into language, title, timeout, and reset flag.
|
|
160
|
-
*
|
|
161
|
-
* Token forms (all optional, any order):
|
|
162
|
-
* - `py` / `js` / `ts` bare language
|
|
163
|
-
* - `py:"..."` / `js:"..."` / `ts:"..."` language + title shorthand
|
|
164
|
-
* - `id:"..."` cell title
|
|
165
|
-
* - `t:<duration>` per-cell timeout
|
|
166
|
-
* - `<duration>` bare positional duration (lenient)
|
|
167
|
-
* - `rst` reset flag
|
|
168
|
-
* - `rst:true|false` reset flag with explicit value
|
|
169
|
-
*
|
|
170
|
-
* Fallback aliases (accepted but not advertised in the prompt):
|
|
171
|
-
* - id: title, name, cell, file, label
|
|
172
|
-
* - t: timeout, duration, time
|
|
173
|
-
* - rst: reset
|
|
174
|
-
*
|
|
175
|
-
* Truly unknown keys are silently dropped. First occurrence wins when a
|
|
176
|
-
* key is repeated (canonical or alias). Anything that doesn't classify
|
|
177
|
-
* accumulates as a positional title fragment joined by spaces.
|
|
178
|
-
*/
|
|
179
|
-
function parseHeaderInfo(info: string, lineNumber: number): HeaderInfo {
|
|
180
|
-
const tokens = tokenizeInfoString(info);
|
|
181
|
-
if (tokens.length === 0) return {};
|
|
182
|
-
|
|
183
|
-
let language: EvalLanguage | undefined;
|
|
184
|
-
let titleAttr: string | undefined;
|
|
185
|
-
let positionalDurationMs: number | undefined;
|
|
186
|
-
let tAttr: string | undefined;
|
|
187
|
-
let rstAttr: string | undefined;
|
|
188
|
-
let bareReset = false;
|
|
189
|
-
const titleParts: string[] = [];
|
|
190
|
-
|
|
191
|
-
for (const token of tokens) {
|
|
192
|
-
// Bare reset flag.
|
|
193
|
-
if (RST_KEYS.has(token.toLowerCase())) {
|
|
194
|
-
bareReset = true;
|
|
195
|
-
continue;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
const attrMatch = ATTR_TOKEN_RE.exec(token);
|
|
199
|
-
if (attrMatch && token.includes(":")) {
|
|
200
|
-
const key = attrMatch[1].toLowerCase();
|
|
201
|
-
const value = attrMatch[2] ?? attrMatch[3] ?? attrMatch[4] ?? "";
|
|
202
110
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
// unknown / repeated keys silently dropped
|
|
216
|
-
continue;
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
// Bare language token (no colon).
|
|
220
|
-
const lang = resolveLanguageAlias(token);
|
|
221
|
-
if (lang && language === undefined) {
|
|
222
|
-
language = lang;
|
|
223
|
-
continue;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
// Bare positional duration (lenient — `t:` is canonical).
|
|
227
|
-
if (positionalDurationMs === undefined && DURATION_TOKEN_RE.test(token)) {
|
|
228
|
-
positionalDurationMs = parseDurationMs(token, lineNumber);
|
|
229
|
-
continue;
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
titleParts.push(token);
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
const explicitTitle = (titleAttr ?? "").trim();
|
|
236
|
-
const positionalTitle = titleParts.join(" ").trim();
|
|
237
|
-
const title = explicitTitle.length > 0 ? explicitTitle : positionalTitle.length > 0 ? positionalTitle : undefined;
|
|
238
|
-
|
|
239
|
-
let timeoutMs: number | undefined;
|
|
240
|
-
if (tAttr !== undefined) {
|
|
241
|
-
timeoutMs = parseDurationMs(tAttr, lineNumber);
|
|
242
|
-
} else if (positionalDurationMs !== undefined) {
|
|
243
|
-
timeoutMs = positionalDurationMs;
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
let reset: boolean | undefined;
|
|
247
|
-
if (rstAttr !== undefined) {
|
|
248
|
-
const parsed = parseBoolean(rstAttr);
|
|
249
|
-
if (parsed === undefined) {
|
|
250
|
-
throw new Error(`Eval line ${lineNumber}: invalid rst value \`${rstAttr}\`; use true or false.`);
|
|
251
|
-
}
|
|
252
|
-
reset = parsed;
|
|
253
|
-
} else if (bareReset) {
|
|
254
|
-
reset = true;
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
return { language, title, timeoutMs, reset };
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
interface ExpansionState {
|
|
261
|
-
language: EvalLanguage;
|
|
262
|
-
languageOrigin: EvalLanguageOrigin;
|
|
111
|
+
const code = body.join("\n");
|
|
112
|
+
const explicitLanguage = resolveLang(fenceLang);
|
|
113
|
+
const language = explicitLanguage ?? sniffEvalLanguage(code) ?? DEFAULT_LANGUAGE;
|
|
114
|
+
return {
|
|
115
|
+
index: 0,
|
|
116
|
+
title: undefined,
|
|
117
|
+
code,
|
|
118
|
+
language,
|
|
119
|
+
languageOrigin: explicitLanguage ? "header" : "default",
|
|
120
|
+
timeoutMs: DEFAULT_TIMEOUT_MS,
|
|
121
|
+
reset: false,
|
|
122
|
+
};
|
|
263
123
|
}
|
|
264
124
|
|
|
265
125
|
export function parseEvalInput(input: string): ParsedEvalInput {
|
|
266
126
|
const normalized = input.replace(/\r\n?/g, "\n");
|
|
267
127
|
const lines = normalized.split("\n");
|
|
268
|
-
// `split("\n")` produces a trailing empty element when the input ends with
|
|
269
|
-
// a newline. Drop it so we don't emit phantom blank trailing code lines.
|
|
270
128
|
if (lines.length > 0 && lines[lines.length - 1] === "") lines.pop();
|
|
271
129
|
|
|
272
|
-
const state: ExpansionState = { language: "python", languageOrigin: "default" };
|
|
273
130
|
const cells: ParsedEvalCell[] = [];
|
|
131
|
+
let aborted = false;
|
|
274
132
|
let i = 0;
|
|
275
133
|
|
|
276
|
-
//
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
cells.push({
|
|
287
|
-
index: cells.length,
|
|
288
|
-
title: undefined,
|
|
289
|
-
code: trimmed.join("\n"),
|
|
290
|
-
language: state.language,
|
|
291
|
-
languageOrigin: state.languageOrigin,
|
|
292
|
-
timeoutMs: DEFAULT_TIMEOUT_MS,
|
|
293
|
-
reset: false,
|
|
294
|
-
});
|
|
134
|
+
// Skip leading blank lines.
|
|
135
|
+
while (i < lines.length && lines[i].trim() === "") i++;
|
|
136
|
+
|
|
137
|
+
// Lenient fallback: if the input has no recognizable begin marker, treat
|
|
138
|
+
// the entire input as one implicit cell — unless that content contains
|
|
139
|
+
// `*** Abort`, in which case the body is incomplete/unsafe and we drop it.
|
|
140
|
+
if (i < lines.length && !BEGIN_RE.test(lines[i])) {
|
|
141
|
+
const tail = lines.slice(i);
|
|
142
|
+
if (tail.some(line => ABORT_RE.test(line))) {
|
|
143
|
+
return { cells, aborted: true };
|
|
295
144
|
}
|
|
145
|
+
const cell = parseImplicitCell(tail);
|
|
146
|
+
if (cell.code.length > 0) cells.push(cell);
|
|
147
|
+
return { cells };
|
|
296
148
|
}
|
|
297
149
|
|
|
298
150
|
while (i < lines.length) {
|
|
299
|
-
const
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
151
|
+
const beginMatch = BEGIN_RE.exec(lines[i])!;
|
|
152
|
+
const langToken = beginMatch[1];
|
|
153
|
+
const explicitLanguage = resolveLang(langToken);
|
|
154
|
+
i++;
|
|
155
|
+
|
|
156
|
+
let title: string | undefined;
|
|
157
|
+
let timeoutMs: number | undefined;
|
|
158
|
+
let reset = false;
|
|
159
|
+
|
|
160
|
+
while (i < lines.length) {
|
|
161
|
+
const line = lines[i];
|
|
162
|
+
const lineNumber = i + 1;
|
|
163
|
+
const titleMatch = TITLE_RE.exec(line);
|
|
164
|
+
if (titleMatch) {
|
|
165
|
+
if (title === undefined) title = titleMatch[1];
|
|
166
|
+
i++;
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
const timeoutMatch = TIMEOUT_RE.exec(line);
|
|
170
|
+
if (timeoutMatch) {
|
|
171
|
+
if (timeoutMs === undefined) timeoutMs = parseDurationMs(timeoutMatch[1], lineNumber);
|
|
172
|
+
i++;
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
if (RESET_RE.test(line)) {
|
|
176
|
+
reset = true;
|
|
177
|
+
i++;
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
break;
|
|
304
181
|
}
|
|
305
|
-
const headerLineNumber = i + 1;
|
|
306
|
-
const info = parseHeaderInfo(headerInfo, headerLineNumber);
|
|
307
|
-
i++; // consume header line
|
|
308
182
|
|
|
183
|
+
// Collect cell body. Close on `*** End` OR on the next `*** Begin`
|
|
184
|
+
// (implicit end — leniency for models that drop end markers between
|
|
185
|
+
// back-to-back cells). `*** Abort` (recovery sentinel) drops the
|
|
186
|
+
// in-progress cell entirely: its body is partial and unsafe to run.
|
|
309
187
|
const codeLines: string[] = [];
|
|
310
|
-
|
|
311
|
-
|
|
188
|
+
let cellAborted = false;
|
|
189
|
+
while (i < lines.length) {
|
|
190
|
+
const line = lines[i];
|
|
191
|
+
if (ABORT_RE.test(line)) {
|
|
192
|
+
cellAborted = true;
|
|
193
|
+
aborted = true;
|
|
194
|
+
i++;
|
|
195
|
+
break;
|
|
196
|
+
}
|
|
197
|
+
if (END_RE.test(line)) {
|
|
198
|
+
i++;
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
if (BEGIN_RE.test(line)) break;
|
|
202
|
+
codeLines.push(line);
|
|
312
203
|
i++;
|
|
313
204
|
}
|
|
205
|
+
|
|
206
|
+
if (cellAborted) break;
|
|
207
|
+
|
|
314
208
|
// Strip trailing blank lines so visual spacing between cells doesn't
|
|
315
209
|
// leak into the preceding cell's code.
|
|
316
210
|
while (codeLines.length > 0 && codeLines[codeLines.length - 1].trim() === "") {
|
|
317
211
|
codeLines.pop();
|
|
318
212
|
}
|
|
213
|
+
const code = codeLines.join("\n");
|
|
319
214
|
|
|
320
|
-
const language =
|
|
321
|
-
const languageOrigin: EvalLanguageOrigin =
|
|
215
|
+
const language = explicitLanguage ?? sniffEvalLanguage(code) ?? DEFAULT_LANGUAGE;
|
|
216
|
+
const languageOrigin: EvalLanguageOrigin = explicitLanguage ? "header" : "default";
|
|
322
217
|
|
|
323
218
|
cells.push({
|
|
324
219
|
index: cells.length,
|
|
325
|
-
title
|
|
326
|
-
code
|
|
220
|
+
title,
|
|
221
|
+
code,
|
|
327
222
|
language,
|
|
328
223
|
languageOrigin,
|
|
329
|
-
timeoutMs:
|
|
330
|
-
reset
|
|
224
|
+
timeoutMs: timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
225
|
+
reset,
|
|
331
226
|
});
|
|
332
|
-
|
|
333
|
-
|
|
227
|
+
|
|
228
|
+
// Skip blank separator lines between cells; an `*** Abort` here
|
|
229
|
+
// terminates parsing while keeping previously-collected cells.
|
|
230
|
+
while (i < lines.length && lines[i].trim() === "") i++;
|
|
231
|
+
if (i < lines.length && ABORT_RE.test(lines[i])) {
|
|
232
|
+
aborted = true;
|
|
233
|
+
break;
|
|
234
|
+
}
|
|
334
235
|
}
|
|
335
236
|
|
|
336
|
-
return { cells };
|
|
237
|
+
return aborted ? { cells, aborted: true } : { cells };
|
|
337
238
|
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { EvalLanguage } from "./types";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Best-effort language sniff for cells with no explicit `language`.
|
|
5
|
+
*
|
|
6
|
+
* Order:
|
|
7
|
+
* 1. Shebang on first line (`#!/usr/bin/env python`, `#!/usr/bin/env node`, etc.)
|
|
8
|
+
* 2. Strong syntactic markers unique to one language. Bias false negatives over
|
|
9
|
+
* false positives — anything ambiguous returns `undefined` and the caller
|
|
10
|
+
* falls back to the default-backend rules.
|
|
11
|
+
*/
|
|
12
|
+
export function sniffEvalLanguage(code: string): EvalLanguage | undefined {
|
|
13
|
+
const stripped = code.replace(/^\s+/, "");
|
|
14
|
+
if (stripped.startsWith("#!")) {
|
|
15
|
+
const firstLine = stripped.split("\n", 1)[0]!.toLowerCase();
|
|
16
|
+
if (/(\bpython\d?\b|\bipython\b)/.test(firstLine)) return "python";
|
|
17
|
+
if (/(\bnode\b|\bbun\b|\bdeno\b|\bjavascript\b|\bjs\b)/.test(firstLine)) return "js";
|
|
18
|
+
}
|
|
19
|
+
const jsMarkers =
|
|
20
|
+
/(^|\n)\s*(const|let|var|async\s+function|function\s*\*?\s*[\w$]*\s*\(|import\s+[^\n]+\sfrom\s|export\s+(default|const|let|function|class|async)|require\s*\(|console\.\w+\s*\(|=>|;\s*$)/m;
|
|
21
|
+
const pyMarkers =
|
|
22
|
+
/(^|\n)\s*(def\s+\w+\s*\(|from\s+[\w.]+\s+import|import\s+\w+(\s+as\s+\w+)?\s*$|class\s+\w+\s*[(:]|print\s*\(|elif\s+[^\n]*:|with\s+[^\n]+:\s*$|@[\w.]+\s*$)/m;
|
|
23
|
+
const hasJs = jsMarkers.test(code);
|
|
24
|
+
const hasPy = pyMarkers.test(code);
|
|
25
|
+
if (hasJs && !hasPy) return "js";
|
|
26
|
+
if (hasPy && !hasJs) return "python";
|
|
27
|
+
return undefined;
|
|
28
|
+
}
|
|
@@ -650,6 +650,7 @@
|
|
|
650
650
|
padding: var(--line-height);
|
|
651
651
|
border-radius: 4px;
|
|
652
652
|
margin-bottom: var(--line-height);
|
|
653
|
+
cursor: pointer;
|
|
653
654
|
}
|
|
654
655
|
|
|
655
656
|
.tools-header {
|
|
@@ -658,6 +659,35 @@
|
|
|
658
659
|
margin-bottom: var(--line-height);
|
|
659
660
|
}
|
|
660
661
|
|
|
662
|
+
.tools-list.collapsed .tools-header {
|
|
663
|
+
margin-bottom: 0;
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
.tools-list.collapsed .tools-content {
|
|
667
|
+
display: none;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
.tools-list:not(.collapsed) .tools-collapsed {
|
|
671
|
+
display: none;
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
.tools-collapsed {
|
|
675
|
+
margin-top: 6px;
|
|
676
|
+
display: flex;
|
|
677
|
+
flex-wrap: wrap;
|
|
678
|
+
gap: 4px;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
.tool-name-chip {
|
|
682
|
+
display: inline-block;
|
|
683
|
+
padding: 1px 6px;
|
|
684
|
+
border-radius: 3px;
|
|
685
|
+
background: var(--container-bg);
|
|
686
|
+
color: var(--text);
|
|
687
|
+
font-size: 11px;
|
|
688
|
+
font-weight: 500;
|
|
689
|
+
}
|
|
690
|
+
|
|
661
691
|
.tool-item {
|
|
662
692
|
font-size: 11px;
|
|
663
693
|
}
|
|
@@ -726,6 +756,14 @@
|
|
|
726
756
|
color: var(--warning);
|
|
727
757
|
}
|
|
728
758
|
|
|
759
|
+
.tool-intent {
|
|
760
|
+
color: var(--muted);
|
|
761
|
+
font-style: italic;
|
|
762
|
+
font-size: 11px;
|
|
763
|
+
margin-bottom: 4px;
|
|
764
|
+
opacity: 0.85;
|
|
765
|
+
}
|
|
766
|
+
|
|
729
767
|
.tool-args {
|
|
730
768
|
margin-top: 4px;
|
|
731
769
|
color: var(--toolOutput);
|