jeo-code 0.5.6 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ja.md +2 -2
- package/README.ko.md +2 -2
- package/README.md +2 -2
- package/README.zh.md +2 -2
- package/package.json +1 -1
- package/src/agent/opik-tracer.ts +364 -0
- package/src/autopilot.ts +35 -17
- package/src/commands/launch.ts +72 -86
package/README.ja.md
CHANGED
|
@@ -150,11 +150,11 @@ CI は `.github/workflows/npm-publish.yml` で公開します — GitHub リリ
|
|
|
150
150
|
## 変更履歴 (Changelog)
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.5.8]** (2026-06-15) — Native Opik observability for the turn loop (opt-in `JEO_OPIK`, pure-TS no-op when unset) + autopilot convergence tracking.
|
|
154
|
+
- **[0.5.7]** (2026-06-15) — `/model` picker is default-only, `/clear` resets to the initial screen, ESC clears the input box, and a launch process-listener leak is fixed.
|
|
153
155
|
- **[0.5.6]** (2026-06-15) — `/model` sets only the default thinking; per-role reasoning moved to `/agents`.
|
|
154
156
|
- **[0.5.5]** (2026-06-15) — Full multi-line visibility — the input box scrolls to the caret and the submitted card shows every line.
|
|
155
157
|
- **[0.5.4]** (2026-06-15) — Reliable multi-line input is ON by default — a paste fills the box and submits as one message.
|
|
156
|
-
- **[0.5.3]** (2026-06-15) — `$` chains multiple skills in one line (all run, in order), plus multi-line prompt input — paste-merge and gated Shift+Enter.
|
|
157
|
-
- **[0.5.2]** (2026-06-14) — `$skill` prompt invocation with prefix/fuzzy suggestions, and a per-session input-box hue (amber in cmd-mode).
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/README.ko.md
CHANGED
|
@@ -150,11 +150,11 @@ CI는 `.github/workflows/npm-publish.yml`로 배포합니다 — GitHub 릴리
|
|
|
150
150
|
## 변경 이력 (Changelog)
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.5.8]** (2026-06-15) — Native Opik observability for the turn loop (opt-in `JEO_OPIK`, pure-TS no-op when unset) + autopilot convergence tracking.
|
|
154
|
+
- **[0.5.7]** (2026-06-15) — `/model` picker is default-only, `/clear` resets to the initial screen, ESC clears the input box, and a launch process-listener leak is fixed.
|
|
153
155
|
- **[0.5.6]** (2026-06-15) — `/model` sets only the default thinking; per-role reasoning moved to `/agents`.
|
|
154
156
|
- **[0.5.5]** (2026-06-15) — Full multi-line visibility — the input box scrolls to the caret and the submitted card shows every line.
|
|
155
157
|
- **[0.5.4]** (2026-06-15) — Reliable multi-line input is ON by default — a paste fills the box and submits as one message.
|
|
156
|
-
- **[0.5.3]** (2026-06-15) — `$` chains multiple skills in one line (all run, in order), plus multi-line prompt input — paste-merge and gated Shift+Enter.
|
|
157
|
-
- **[0.5.2]** (2026-06-14) — `$skill` prompt invocation with prefix/fuzzy suggestions, and a per-session input-box hue (amber in cmd-mode).
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/README.md
CHANGED
|
@@ -150,11 +150,11 @@ Required npm token permissions (repository secret `NPM_TOKEN`):
|
|
|
150
150
|
## Changelog
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.5.8]** (2026-06-15) — Native Opik observability for the turn loop (opt-in `JEO_OPIK`, pure-TS no-op when unset) + autopilot convergence tracking.
|
|
154
|
+
- **[0.5.7]** (2026-06-15) — `/model` picker is default-only, `/clear` resets to the initial screen, ESC clears the input box, and a launch process-listener leak is fixed.
|
|
153
155
|
- **[0.5.6]** (2026-06-15) — `/model` sets only the default thinking; per-role reasoning moved to `/agents`.
|
|
154
156
|
- **[0.5.5]** (2026-06-15) — Full multi-line visibility — the input box scrolls to the caret and the submitted card shows every line.
|
|
155
157
|
- **[0.5.4]** (2026-06-15) — Reliable multi-line input is ON by default — a paste fills the box and submits as one message.
|
|
156
|
-
- **[0.5.3]** (2026-06-15) — `$` chains multiple skills in one line (all run, in order), plus multi-line prompt input — paste-merge and gated Shift+Enter.
|
|
157
|
-
- **[0.5.2]** (2026-06-14) — `$skill` prompt invocation with prefix/fuzzy suggestions, and a per-session input-box hue (amber in cmd-mode).
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/README.zh.md
CHANGED
|
@@ -150,11 +150,11 @@ CI 通过 `.github/workflows/npm-publish.yml` 发布 — GitHub 发布 release
|
|
|
150
150
|
## 更新日志 (Changelog)
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.5.8]** (2026-06-15) — Native Opik observability for the turn loop (opt-in `JEO_OPIK`, pure-TS no-op when unset) + autopilot convergence tracking.
|
|
154
|
+
- **[0.5.7]** (2026-06-15) — `/model` picker is default-only, `/clear` resets to the initial screen, ESC clears the input box, and a launch process-listener leak is fixed.
|
|
153
155
|
- **[0.5.6]** (2026-06-15) — `/model` sets only the default thinking; per-role reasoning moved to `/agents`.
|
|
154
156
|
- **[0.5.5]** (2026-06-15) — Full multi-line visibility — the input box scrolls to the caret and the submitted card shows every line.
|
|
155
157
|
- **[0.5.4]** (2026-06-15) — Reliable multi-line input is ON by default — a paste fills the box and submits as one message.
|
|
156
|
-
- **[0.5.3]** (2026-06-15) — `$` chains multiple skills in one line (all run, in order), plus multi-line prompt input — paste-merge and gated Shift+Enter.
|
|
157
|
-
- **[0.5.2]** (2026-06-14) — `$skill` prompt invocation with prefix/fuzzy suggestions, and a per-session input-box hue (amber in cmd-mode).
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/package.json
CHANGED
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Opik observability for the jeo agent turn loop (spec-stack · Run phase).
|
|
3
|
+
*
|
|
4
|
+
* Each agent turn becomes ONE Opik trace; each step/tool becomes a span; token
|
|
5
|
+
* usage and the eval feedback scores (`completed` / `verified` / `efficiency`)
|
|
6
|
+
* are attached to the trace. Pure TypeScript over `fetch` — no Python, no
|
|
7
|
+
* `opik` npm package — consistent with jeo's zero-native-dependency constraint.
|
|
8
|
+
*
|
|
9
|
+
* Hard invariants (see .specify/specs/opik-observability/seed.md):
|
|
10
|
+
* - I1: `JEO_OPIK` unset => the tracer is a no-op; zero Opik HTTP calls.
|
|
11
|
+
* - I2: no tracer error ever propagates out of an events callback.
|
|
12
|
+
* - I3: no secret is logged; the key only travels in the `Authorization` header.
|
|
13
|
+
* - I4: engine output is identical regardless of tracing outcome.
|
|
14
|
+
*
|
|
15
|
+
* Opik REST surface (private v1), confirmed against the installed SDK:
|
|
16
|
+
* - POST {base}/v1/private/traces/batch { traces: [...] }
|
|
17
|
+
* - POST {base}/v1/private/spans/batch { spans: [...] }
|
|
18
|
+
* - PUT {base}/v1/private/traces/feedback-scores { scores: [...] }
|
|
19
|
+
* Headers: `Authorization: <api_key>`, `Comet-Workspace: <workspace>`.
|
|
20
|
+
*/
|
|
21
|
+
import { jeoEnv } from "../util/env";
|
|
22
|
+
import type { AgentLoopEvents, ToolInvocation } from "./engine";
|
|
23
|
+
|
|
24
|
+
type Env = Record<string, string | undefined>;
|
|
25
|
+
type FetchImpl = typeof fetch;
|
|
26
|
+
|
|
27
|
+
const DEFAULT_BASE = "https://www.comet.com/opik/api";
|
|
28
|
+
const DEFAULT_PROJECT = "jeo";
|
|
29
|
+
const DEFAULT_WORKSPACE = "jeo";
|
|
30
|
+
/** Verification signal (mirrors engine.ts VERIFY_SIGNAL_RE) — used for the eval score. */
|
|
31
|
+
const VERIFY_SIGNAL_RE = /\b(test|tests|tsc|typecheck|lint|build|check|spec|pytest|vitest|jest)\b/i;
|
|
32
|
+
|
|
33
|
+
/** Master switch. Tracing is OFF unless `JEO_OPIK` is `1`/`true`/`yes`/`on`. */
|
|
34
|
+
export function opikEnabled(env: Env = process.env): boolean {
|
|
35
|
+
const raw = (jeoEnv("OPIK", env) ?? "").trim().toLowerCase();
|
|
36
|
+
return raw === "1" || raw === "true" || raw === "yes" || raw === "on";
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface OpikConfig {
|
|
40
|
+
apiKey?: string;
|
|
41
|
+
workspace: string;
|
|
42
|
+
baseUrl: string;
|
|
43
|
+
projectName: string;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Resolve Opik connection config from the environment (no I/O). */
|
|
47
|
+
export function resolveOpikConfig(env: Env = process.env): OpikConfig {
|
|
48
|
+
const baseRaw = (env.OPIK_URL_OVERRIDE ?? DEFAULT_BASE).trim();
|
|
49
|
+
// Normalize a trailing slash so path joins are predictable.
|
|
50
|
+
const baseUrl = baseRaw.replace(/\/+$/, "");
|
|
51
|
+
return {
|
|
52
|
+
apiKey: env.OPIK_API_KEY?.trim() || undefined,
|
|
53
|
+
workspace: (env.COMET_WORKSPACE ?? DEFAULT_WORKSPACE).trim() || DEFAULT_WORKSPACE,
|
|
54
|
+
baseUrl,
|
|
55
|
+
projectName: (env.OPIK_PROJECT_NAME ?? DEFAULT_PROJECT).trim() || DEFAULT_PROJECT,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** RFC-9562 UUIDv7 (time-ordered) — Opik orders traces/spans by id. */
|
|
60
|
+
export function uuidv7(now: number = Date.now(), rnd: () => number = Math.random): string {
|
|
61
|
+
const ts = Math.max(0, Math.trunc(now));
|
|
62
|
+
const hex = ts.toString(16).padStart(12, "0").slice(-12);
|
|
63
|
+
const b: number[] = [];
|
|
64
|
+
for (let i = 0; i < 16; i++) b.push(Math.floor(rnd() * 256) & 0xff);
|
|
65
|
+
// 48-bit big-endian timestamp
|
|
66
|
+
for (let i = 0; i < 6; i++) b[i] = parseInt(hex.slice(i * 2, i * 2 + 2), 16);
|
|
67
|
+
b[6] = 0x70 | (b[6]! & 0x0f); // version 7
|
|
68
|
+
b[8] = 0x80 | (b[8]! & 0x3f); // variant
|
|
69
|
+
const h = b.map(x => x.toString(16).padStart(2, "0")).join("");
|
|
70
|
+
return `${h.slice(0, 8)}-${h.slice(8, 12)}-${h.slice(12, 16)}-${h.slice(16, 20)}-${h.slice(20)}`;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** ISO-8601 with milliseconds (Opik expects RFC-3339 timestamps). */
|
|
74
|
+
function iso(ms: number): string {
|
|
75
|
+
return new Date(ms).toISOString();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export interface TurnMeta {
|
|
79
|
+
/** Human-readable turn name (the user intent / first message). */
|
|
80
|
+
name: string;
|
|
81
|
+
/** The user input recorded on the trace. */
|
|
82
|
+
input?: string;
|
|
83
|
+
/** Extra metadata (model, cwd, …). */
|
|
84
|
+
metadata?: Record<string, unknown>;
|
|
85
|
+
tags?: string[];
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export interface StepRecord {
|
|
89
|
+
step: number;
|
|
90
|
+
tool: string;
|
|
91
|
+
success: boolean;
|
|
92
|
+
output: string;
|
|
93
|
+
startTime: number;
|
|
94
|
+
endTime: number;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export interface TurnScores {
|
|
98
|
+
completed: number;
|
|
99
|
+
verified: number;
|
|
100
|
+
efficiency: number;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Eval scoring (the *evaluation* surface). All in [0,1].
|
|
105
|
+
* - completed: 1 when the turn ended in `done`.
|
|
106
|
+
* - verified: 1 when a verification signal (test/tsc/build/…) ran in-turn.
|
|
107
|
+
* - efficiency: 1 for a 1-step turn, decaying as steps grow (1/sqrt(steps)),
|
|
108
|
+
* so fewer steps to reach `done` scores higher; floored at 0.
|
|
109
|
+
*/
|
|
110
|
+
export function computeScores(args: {
|
|
111
|
+
done: boolean;
|
|
112
|
+
steps: number;
|
|
113
|
+
verificationRan: boolean;
|
|
114
|
+
}): TurnScores {
|
|
115
|
+
const steps = Math.max(1, Math.trunc(args.steps) || 1);
|
|
116
|
+
const efficiency = Math.min(1, 1 / Math.sqrt(steps));
|
|
117
|
+
return {
|
|
118
|
+
completed: args.done ? 1 : 0,
|
|
119
|
+
verified: args.verificationRan ? 1 : 0,
|
|
120
|
+
efficiency: Number(efficiency.toFixed(4)),
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/** Whether a tool name + output looks like an in-turn verification signal. */
|
|
125
|
+
export function isVerificationStep(tool: string, output: string): boolean {
|
|
126
|
+
if (tool !== "bash") return false;
|
|
127
|
+
return VERIFY_SIGNAL_RE.test(output);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// ---- Pure payload builders (unit-tested without network) --------------------
|
|
131
|
+
|
|
132
|
+
export function buildTracePayload(args: {
|
|
133
|
+
id: string;
|
|
134
|
+
project: string;
|
|
135
|
+
meta: TurnMeta;
|
|
136
|
+
startTime: number;
|
|
137
|
+
endTime: number;
|
|
138
|
+
output?: string;
|
|
139
|
+
usage?: { inputTokens: number; outputTokens: number };
|
|
140
|
+
}): Record<string, unknown> {
|
|
141
|
+
const metadata = { ...(args.meta.metadata ?? {}) } as Record<string, unknown>;
|
|
142
|
+
if (args.usage) {
|
|
143
|
+
metadata.usage = {
|
|
144
|
+
prompt_tokens: args.usage.inputTokens,
|
|
145
|
+
completion_tokens: args.usage.outputTokens,
|
|
146
|
+
total_tokens: args.usage.inputTokens + args.usage.outputTokens,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
return {
|
|
150
|
+
id: args.id,
|
|
151
|
+
project_name: args.project,
|
|
152
|
+
name: args.meta.name,
|
|
153
|
+
start_time: iso(args.startTime),
|
|
154
|
+
end_time: iso(args.endTime),
|
|
155
|
+
...(args.meta.input != null ? { input: { message: args.meta.input } } : {}),
|
|
156
|
+
...(args.output != null ? { output: { result: args.output } } : {}),
|
|
157
|
+
metadata,
|
|
158
|
+
tags: args.meta.tags ?? ["jeo"],
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export function buildSpanPayload(args: {
|
|
163
|
+
id: string;
|
|
164
|
+
traceId: string;
|
|
165
|
+
project: string;
|
|
166
|
+
rec: StepRecord;
|
|
167
|
+
}): Record<string, unknown> {
|
|
168
|
+
const { rec } = args;
|
|
169
|
+
return {
|
|
170
|
+
id: args.id,
|
|
171
|
+
trace_id: args.traceId,
|
|
172
|
+
project_name: args.project,
|
|
173
|
+
name: `step ${rec.step}: ${rec.tool}`,
|
|
174
|
+
type: "general",
|
|
175
|
+
start_time: iso(rec.startTime),
|
|
176
|
+
end_time: iso(rec.endTime),
|
|
177
|
+
input: { tool: rec.tool },
|
|
178
|
+
output: { success: rec.success, output: rec.output.slice(0, 4000) },
|
|
179
|
+
metadata: { step: rec.step, success: rec.success },
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
export function buildScorePayload(args: {
|
|
184
|
+
traceId: string;
|
|
185
|
+
project: string;
|
|
186
|
+
scores: TurnScores;
|
|
187
|
+
}): Record<string, unknown> {
|
|
188
|
+
const mk = (name: string, value: number, reason: string) => ({
|
|
189
|
+
id: args.traceId,
|
|
190
|
+
project_name: args.project,
|
|
191
|
+
name,
|
|
192
|
+
value,
|
|
193
|
+
source: "sdk" as const,
|
|
194
|
+
reason,
|
|
195
|
+
});
|
|
196
|
+
return {
|
|
197
|
+
scores: [
|
|
198
|
+
mk("completed", args.scores.completed, "1 when the turn ended in `done`"),
|
|
199
|
+
mk("verified", args.scores.verified, "1 when a verification signal ran in-turn"),
|
|
200
|
+
mk("efficiency", args.scores.efficiency, "1/sqrt(steps); fewer steps score higher"),
|
|
201
|
+
],
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// ---- Tracer -----------------------------------------------------------------
|
|
206
|
+
|
|
207
|
+
export interface OpikTracer {
|
|
208
|
+
readonly enabled: boolean;
|
|
209
|
+
startTurn(): void;
|
|
210
|
+
step(rec: StepRecord): void;
|
|
211
|
+
usage(u: { inputTokens: number; outputTokens: number }): void;
|
|
212
|
+
endTurn(result: { done: boolean; steps: number; output?: string }): Promise<void>;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const NOOP_TRACER: OpikTracer = {
|
|
216
|
+
enabled: false,
|
|
217
|
+
startTurn() {},
|
|
218
|
+
step() {},
|
|
219
|
+
usage() {},
|
|
220
|
+
async endTurn() {},
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
class LiveOpikTracer implements OpikTracer {
|
|
224
|
+
readonly enabled = true;
|
|
225
|
+
private readonly traceId = uuidv7();
|
|
226
|
+
private readonly steps: StepRecord[] = [];
|
|
227
|
+
private readonly spanIds = new Map<number, string>();
|
|
228
|
+
private startedAt = Date.now();
|
|
229
|
+
private usageAcc = { inputTokens: 0, outputTokens: 0 };
|
|
230
|
+
private sawUsage = false;
|
|
231
|
+
private verificationRan = false;
|
|
232
|
+
private ended = false;
|
|
233
|
+
|
|
234
|
+
constructor(
|
|
235
|
+
private readonly meta: TurnMeta,
|
|
236
|
+
private readonly cfg: OpikConfig,
|
|
237
|
+
private readonly fetchImpl: FetchImpl,
|
|
238
|
+
) {}
|
|
239
|
+
|
|
240
|
+
private headers(): Record<string, string> {
|
|
241
|
+
const h: Record<string, string> = {
|
|
242
|
+
"Content-Type": "application/json",
|
|
243
|
+
"Comet-Workspace": this.cfg.workspace,
|
|
244
|
+
};
|
|
245
|
+
if (this.cfg.apiKey) h["Authorization"] = this.cfg.apiKey;
|
|
246
|
+
return h;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/** Fire-and-forget POST/PUT; any failure is swallowed (I2/I4). */
|
|
250
|
+
private async send(path: string, body: unknown, method: "POST" | "PUT" = "POST"): Promise<void> {
|
|
251
|
+
try {
|
|
252
|
+
await this.fetchImpl(`${this.cfg.baseUrl}/${path}`, {
|
|
253
|
+
method,
|
|
254
|
+
headers: this.headers(),
|
|
255
|
+
body: JSON.stringify(body),
|
|
256
|
+
});
|
|
257
|
+
} catch {
|
|
258
|
+
/* never break the turn */
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
startTurn(): void {
|
|
263
|
+
this.startedAt = Date.now();
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
step(rec: StepRecord): void {
|
|
267
|
+
this.steps.push(rec);
|
|
268
|
+
this.spanIds.set(rec.step, uuidv7(rec.startTime));
|
|
269
|
+
if (isVerificationStep(rec.tool, rec.output)) this.verificationRan = true;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
usage(u: { inputTokens: number; outputTokens: number }): void {
|
|
273
|
+
this.usageAcc.inputTokens += u.inputTokens || 0;
|
|
274
|
+
this.usageAcc.outputTokens += u.outputTokens || 0;
|
|
275
|
+
this.sawUsage = true;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
async endTurn(result: { done: boolean; steps: number; output?: string }): Promise<void> {
|
|
279
|
+
if (this.ended) return;
|
|
280
|
+
this.ended = true;
|
|
281
|
+
const endedAt = Date.now();
|
|
282
|
+
const project = this.cfg.projectName;
|
|
283
|
+
|
|
284
|
+
const trace = buildTracePayload({
|
|
285
|
+
id: this.traceId,
|
|
286
|
+
project,
|
|
287
|
+
meta: this.meta,
|
|
288
|
+
startTime: this.startedAt,
|
|
289
|
+
endTime: endedAt,
|
|
290
|
+
output: result.output,
|
|
291
|
+
usage: this.sawUsage ? this.usageAcc : undefined,
|
|
292
|
+
});
|
|
293
|
+
const spans = this.steps.map(rec =>
|
|
294
|
+
buildSpanPayload({ id: this.spanIds.get(rec.step)!, traceId: this.traceId, project, rec }),
|
|
295
|
+
);
|
|
296
|
+
const scores = computeScores({
|
|
297
|
+
done: result.done,
|
|
298
|
+
steps: result.steps,
|
|
299
|
+
verificationRan: this.verificationRan,
|
|
300
|
+
});
|
|
301
|
+
const scorePayload = buildScorePayload({ traceId: this.traceId, project, scores });
|
|
302
|
+
|
|
303
|
+
await this.send("v1/private/traces/batch", { traces: [trace] });
|
|
304
|
+
if (spans.length > 0) await this.send("v1/private/spans/batch", { spans });
|
|
305
|
+
await this.send("v1/private/traces/feedback-scores", scorePayload, "PUT");
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Build a tracer for one turn. Returns a no-op tracer (zero network) when
|
|
311
|
+
* `JEO_OPIK` is off or no API key is configured.
|
|
312
|
+
*/
|
|
313
|
+
export function createOpikTracer(
|
|
314
|
+
meta: TurnMeta,
|
|
315
|
+
env: Env = process.env,
|
|
316
|
+
fetchImpl: FetchImpl = fetch,
|
|
317
|
+
): OpikTracer {
|
|
318
|
+
if (!opikEnabled(env)) return NOOP_TRACER;
|
|
319
|
+
const cfg = resolveOpikConfig(env);
|
|
320
|
+
if (!cfg.apiKey) return NOOP_TRACER; // no creds => stay silent, never guess
|
|
321
|
+
return new LiveOpikTracer(meta, cfg, fetchImpl);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Compose an existing `AgentLoopEvents` with tracer hooks. Every original
|
|
326
|
+
* callback is delegated unchanged; the tracer observes step boundaries, tool
|
|
327
|
+
* results, and usage. Tracer side-effects can never throw out of a callback.
|
|
328
|
+
*/
|
|
329
|
+
export function wrapEvents(events: AgentLoopEvents | undefined, tracer: OpikTracer): AgentLoopEvents {
|
|
330
|
+
if (!tracer.enabled) return events ?? {};
|
|
331
|
+
const base: AgentLoopEvents = events ?? {};
|
|
332
|
+
let stepStartedAt = Date.now();
|
|
333
|
+
let currentStep = 0;
|
|
334
|
+
|
|
335
|
+
const wrapped: AgentLoopEvents = {
|
|
336
|
+
...base,
|
|
337
|
+
onStep(step: number) {
|
|
338
|
+
currentStep = step;
|
|
339
|
+
stepStartedAt = Date.now();
|
|
340
|
+
try { base.onStep?.(step); } finally { /* tracer has no per-onStep write */ }
|
|
341
|
+
},
|
|
342
|
+
onAssistant(raw: string, invocation: ToolInvocation | null) {
|
|
343
|
+
base.onAssistant?.(raw, invocation);
|
|
344
|
+
},
|
|
345
|
+
onToolResult(tool: string, success: boolean, output: string) {
|
|
346
|
+
try {
|
|
347
|
+
tracer.step({
|
|
348
|
+
step: currentStep || 1,
|
|
349
|
+
tool,
|
|
350
|
+
success,
|
|
351
|
+
output,
|
|
352
|
+
startTime: stepStartedAt,
|
|
353
|
+
endTime: Date.now(),
|
|
354
|
+
});
|
|
355
|
+
} catch { /* I2 */ }
|
|
356
|
+
base.onToolResult?.(tool, success, output);
|
|
357
|
+
},
|
|
358
|
+
onUsage(usage: { inputTokens: number; outputTokens: number }) {
|
|
359
|
+
try { tracer.usage(usage); } catch { /* I2 */ }
|
|
360
|
+
base.onUsage?.(usage);
|
|
361
|
+
},
|
|
362
|
+
};
|
|
363
|
+
return wrapped;
|
|
364
|
+
}
|
package/src/autopilot.ts
CHANGED
|
@@ -142,6 +142,33 @@ function isImprovement(goal: Goal, score: number, best: number | undefined): boo
|
|
|
142
142
|
return true; // gate handled via passed, not score
|
|
143
143
|
}
|
|
144
144
|
|
|
145
|
+
/**
|
|
146
|
+
* Single source of truth for the ratchet keep/revert decision. Shared by step,
|
|
147
|
+
* loop, and status so they can never diverge.
|
|
148
|
+
* - gate goal: keep iff the eval passed (score is irrelevant).
|
|
149
|
+
* - min/max goal: a non-measurable (NaN) score can never prove improvement, so
|
|
150
|
+
* it is always reverted; otherwise keep iff it improves on the best so far.
|
|
151
|
+
*/
|
|
152
|
+
export function decideStep(
|
|
153
|
+
goal: Goal,
|
|
154
|
+
score: number,
|
|
155
|
+
passed: boolean,
|
|
156
|
+
best: number | undefined,
|
|
157
|
+
): "keep" | "revert" {
|
|
158
|
+
if (goal === "gate") return passed ? "keep" : "revert";
|
|
159
|
+
if (Number.isNaN(score)) return "revert";
|
|
160
|
+
return isImprovement(goal, score, best) ? "keep" : "revert";
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Convergence is a streak of consecutive no-progress steps (reverts) reaching
|
|
165
|
+
* patience — for every goal, gate included. A gate loop that keeps failing has
|
|
166
|
+
* made no forward progress and must stop early instead of burning the budget.
|
|
167
|
+
*/
|
|
168
|
+
export function isConverged(sinceImprove: number, patience: number): boolean {
|
|
169
|
+
return sinceImprove >= patience;
|
|
170
|
+
}
|
|
171
|
+
|
|
145
172
|
function hasBaseline(): boolean {
|
|
146
173
|
return readLog().some((e) => e.type === "baseline");
|
|
147
174
|
}
|
|
@@ -189,14 +216,7 @@ function cmdStep(flags: Record<string, string>): void {
|
|
|
189
216
|
const best = currentBest(s);
|
|
190
217
|
const { score, passed, output } = runEval(s);
|
|
191
218
|
|
|
192
|
-
|
|
193
|
-
if (s.goal === "gate") {
|
|
194
|
-
decision = passed ? "keep" : "revert";
|
|
195
|
-
} else if (Number.isNaN(score)) {
|
|
196
|
-
decision = "revert"; // no measurable score => cannot prove improvement
|
|
197
|
-
} else {
|
|
198
|
-
decision = isImprovement(s.goal, score, best) ? "keep" : "revert";
|
|
199
|
-
}
|
|
219
|
+
const decision = decideStep(s.goal, score, passed, best);
|
|
200
220
|
|
|
201
221
|
if (decision === "revert" && flags["on-revert"]) {
|
|
202
222
|
try {
|
|
@@ -242,10 +262,7 @@ function cmdLoop(flags: Record<string, string>): void {
|
|
|
242
262
|
|
|
243
263
|
const best = currentBest(s);
|
|
244
264
|
const { score, passed, output } = runEval(s);
|
|
245
|
-
|
|
246
|
-
if (s.goal === "gate") decision = passed ? "keep" : "revert";
|
|
247
|
-
else if (Number.isNaN(score)) decision = "revert";
|
|
248
|
-
else decision = isImprovement(s.goal, score, best) ? "keep" : "revert";
|
|
265
|
+
const decision = decideStep(s.goal, score, passed, best);
|
|
249
266
|
|
|
250
267
|
if (decision === "revert" && flags["on-revert"]) {
|
|
251
268
|
try {
|
|
@@ -255,11 +272,12 @@ function cmdLoop(flags: Record<string, string>): void {
|
|
|
255
272
|
}
|
|
256
273
|
}
|
|
257
274
|
appendLog({ type: "step", iteration: i, change: `loop#${i}`, score, passed, decision, prevBest: best ?? null, output });
|
|
258
|
-
|
|
259
|
-
|
|
275
|
+
// A keep is forward progress (min/max: provably an improvement; gate: a pass).
|
|
276
|
+
// Anything else extends the no-progress streak toward convergence.
|
|
277
|
+
sinceImprove = decision === "keep" ? 0 : sinceImprove + 1;
|
|
260
278
|
console.log(`jeo autopilot: loop ${i}/${max} ${decision.toUpperCase()} score=${fmt(score)} (sinceImprove=${sinceImprove})`);
|
|
261
279
|
|
|
262
|
-
if (
|
|
280
|
+
if (isConverged(sinceImprove, s.patience)) {
|
|
263
281
|
appendLog({ type: "stop", reason: "converged", iteration: i, patience: s.patience });
|
|
264
282
|
console.log(`jeo autopilot: stop — converged (no improvement in ${s.patience} steps)`);
|
|
265
283
|
return;
|
|
@@ -279,13 +297,13 @@ function cmdStatus(flags: Record<string, string>): void {
|
|
|
279
297
|
const best = currentBest(s);
|
|
280
298
|
const stop = [...log].reverse().find((e) => e.type === "stop");
|
|
281
299
|
|
|
282
|
-
// convergence: steps since last keep
|
|
300
|
+
// convergence: steps since last keep (forward progress)
|
|
283
301
|
let sinceImprove = 0;
|
|
284
302
|
for (const e of steps) {
|
|
285
303
|
if (e.decision === "keep") sinceImprove = 0;
|
|
286
304
|
else sinceImprove++;
|
|
287
305
|
}
|
|
288
|
-
const converged =
|
|
306
|
+
const converged = isConverged(sinceImprove, s.patience);
|
|
289
307
|
|
|
290
308
|
let recommendation: string;
|
|
291
309
|
if (stop) recommendation = `stopped: ${stop.reason as string}`;
|
package/src/commands/launch.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { createInterface } from "node:readline/promises";
|
|
|
2
2
|
import { emitKeypressEvents } from "node:readline";
|
|
3
3
|
import { PassThrough } from "node:stream";
|
|
4
4
|
import { runAgentLoop, executorSystemPrompt, DEFAULT_TOOLS, TOOL_PROTOCOL, WORKING_DISCIPLINE, type AgentLoopEvents } from "../agent/engine";
|
|
5
|
+
import { createOpikTracer, wrapEvents } from "../agent/opik-tracer";
|
|
5
6
|
import { initialDynamicStepLimit } from "../agent/step-budget";
|
|
6
7
|
import { memoryPromptSection, spawnDetachedDistill } from "../agent/memory";
|
|
7
8
|
import { createTaskTool, taskToolProtocolLine, type TaskSubEvent } from "../agent/task-tool";
|
|
@@ -1472,6 +1473,16 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
1472
1473
|
subagent: createSubagentTool(subagentRegistry),
|
|
1473
1474
|
};
|
|
1474
1475
|
const tools = filterToolMap(fullTools, Array.from(allowedTools));
|
|
1476
|
+
// Opik observability (opt-in via JEO_OPIK): one trace per turn, spans per
|
|
1477
|
+
// step/tool, token usage, and completed/verified/efficiency eval scores.
|
|
1478
|
+
// No-op (zero network) when disabled or unconfigured; never breaks a turn.
|
|
1479
|
+
const opik = createOpikTracer({
|
|
1480
|
+
name: userInput.trim().slice(0, 80) || "jeo turn",
|
|
1481
|
+
input: userInput,
|
|
1482
|
+
metadata: { model: sessionModel, cwd },
|
|
1483
|
+
tags: ["jeo", "launch"],
|
|
1484
|
+
});
|
|
1485
|
+
opik.startTurn();
|
|
1475
1486
|
result = await runAgentLoop(history, {
|
|
1476
1487
|
cwd,
|
|
1477
1488
|
tools,
|
|
@@ -1480,7 +1491,7 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
1480
1491
|
maxTokens: sessionThinking ? thinkingMaxTokens(sessionThinking) : undefined,
|
|
1481
1492
|
signal: ac.signal,
|
|
1482
1493
|
steer: drainSteer,
|
|
1483
|
-
events: { ...withToolDetailCapture(tui ? tui.events() : streamEvents), onBeforeDone },
|
|
1494
|
+
events: wrapEvents({ ...withToolDetailCapture(tui ? tui.events() : streamEvents), onBeforeDone }, opik),
|
|
1484
1495
|
});
|
|
1485
1496
|
if (result.done && looksLikeSkillEcho(result.doneReason ?? "", resolvedSkills)) {
|
|
1486
1497
|
history.push({
|
|
@@ -1498,7 +1509,7 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
1498
1509
|
maxTokens: sessionThinking ? thinkingMaxTokens(sessionThinking) : undefined,
|
|
1499
1510
|
signal: ac.signal,
|
|
1500
1511
|
steer: drainSteer,
|
|
1501
|
-
events: withToolDetailCapture(tui ? tui.events() : streamEvents),
|
|
1512
|
+
events: wrapEvents(withToolDetailCapture(tui ? tui.events() : streamEvents), opik),
|
|
1502
1513
|
});
|
|
1503
1514
|
const usage =
|
|
1504
1515
|
result.usage && retry.usage
|
|
@@ -1509,6 +1520,8 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
1509
1520
|
: retry.usage ?? result.usage;
|
|
1510
1521
|
result = { ...retry, steps: result.steps + retry.steps, usage };
|
|
1511
1522
|
}
|
|
1523
|
+
// Close the Opik trace once per turn (done or budget-stop). Errors swallowed.
|
|
1524
|
+
await opik.endTurn({ done: result.done, steps: result.steps, output: result.doneReason });
|
|
1512
1525
|
} finally {
|
|
1513
1526
|
harness.dispose();
|
|
1514
1527
|
subagentRegistry.cancelAll(); // #9: no detached run leaks past the turn
|
|
@@ -1931,6 +1944,15 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
1931
1944
|
const multilineInput = !!process.stdin.isTTY && jeoEnv("NO_MULTILINE") !== "1";
|
|
1932
1945
|
const loneLfShiftEnter = jeoEnv("MULTILINE") === "1";
|
|
1933
1946
|
const expandSentinel = (s: string): string => (multilineInput ? s.split(SENTINEL).join("\n") : s);
|
|
1947
|
+
// Prompt-scoped process listeners (stdin data/keypress, stdout resize). Registered
|
|
1948
|
+
// once per launch but previously anonymous and never removed — benign for a single
|
|
1949
|
+
// CLI run, but repeated launch() (test harness) accumulated them past Node's
|
|
1950
|
+
// 10-listener default → MaxListenersExceededWarning + a real leak. Track each remover
|
|
1951
|
+
// and drain it on every exit path so the process listener set returns to baseline.
|
|
1952
|
+
const promptListenerCleanups: Array<() => void> = [];
|
|
1953
|
+
const drainPromptListeners = () => {
|
|
1954
|
+
for (const off of promptListenerCleanups.splice(0)) { try { off(); } catch { /* best effort */ } }
|
|
1955
|
+
};
|
|
1934
1956
|
let keyFilter: PassThrough | undefined;
|
|
1935
1957
|
if (multilineInput) {
|
|
1936
1958
|
const kf = new PassThrough();
|
|
@@ -1951,7 +1973,7 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
1951
1973
|
// off) and the xterm "\x1b[27;2;13~" / kitty "\x1b[13;2u" sequences. Enter ("\r")
|
|
1952
1974
|
// passes through and submits.
|
|
1953
1975
|
let kfInPaste = false;
|
|
1954
|
-
|
|
1976
|
+
const kfDataHandler = (chunk: Buffer) => {
|
|
1955
1977
|
const data = chunk.toString("utf8");
|
|
1956
1978
|
let out = "";
|
|
1957
1979
|
let i = 0;
|
|
@@ -1972,7 +1994,9 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
1972
1994
|
out += data[i]; i += 1;
|
|
1973
1995
|
}
|
|
1974
1996
|
kf.write(out);
|
|
1975
|
-
}
|
|
1997
|
+
};
|
|
1998
|
+
process.stdin.on("data", kfDataHandler);
|
|
1999
|
+
promptListenerCleanups.push(() => process.stdin.off("data", kfDataHandler));
|
|
1976
2000
|
keyFilter = kf;
|
|
1977
2001
|
// readline now decodes keypresses on `keyFilter`; keep process.stdin emitting
|
|
1978
2002
|
// 'keypress' too so the footer-redraw / paste-marker / picker listeners (registered
|
|
@@ -2032,13 +2056,15 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
2032
2056
|
const pasteMerge: { buf: string[]; endWaiters: Array<() => void> } = { buf: [], endWaiters: [] };
|
|
2033
2057
|
let pasteLineFired = false; // the line that resolved rl.question came from inside a paste
|
|
2034
2058
|
if (process.stdin.isTTY) {
|
|
2035
|
-
|
|
2059
|
+
const pasteKeypressHandler = (_ch: string, key: { name?: string } | undefined) => {
|
|
2036
2060
|
if (key?.name === "paste-start") { promptPasteActive = true; pasteMerge.buf = []; }
|
|
2037
2061
|
else if (key?.name === "paste-end") {
|
|
2038
2062
|
promptPasteActive = false;
|
|
2039
2063
|
for (const w of pasteMerge.endWaiters.splice(0)) w();
|
|
2040
2064
|
}
|
|
2041
|
-
}
|
|
2065
|
+
};
|
|
2066
|
+
process.stdin.on("keypress", pasteKeypressHandler);
|
|
2067
|
+
promptListenerCleanups.push(() => process.stdin.off("keypress", pasteKeypressHandler));
|
|
2042
2068
|
// Enable bracketed paste for the REPL lifetime (restored on exit below):
|
|
2043
2069
|
// terminals only wrap pastes in the 200~/201~ markers once the app opts in.
|
|
2044
2070
|
process.stdout.write("\x1b[?2004h");
|
|
@@ -2540,28 +2566,13 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
2540
2566
|
const notReadyWarning = (st: { name: string; label: string }): string =>
|
|
2541
2567
|
` ! ${st.name} is not call-ready yet (${st.label}) — run /provider login antigravity before the first turn.`;
|
|
2542
2568
|
|
|
2543
|
-
|
|
2569
|
+
|
|
2544
2570
|
const MODEL_BADGE_ROLE_ORDER = ["planner", "architect", "executor", "critic"] as const;
|
|
2545
2571
|
|
|
2546
2572
|
const roleBadgeColor = (roleId: string): ModelAssignmentBadge["color"] =>
|
|
2547
2573
|
roleId === "executor" || roleId === "architect" || roleId === "planner" || roleId === "critic" ? roleId : "critic";
|
|
2548
2574
|
|
|
2549
|
-
|
|
2550
|
-
const roles = allSubagentRoles(config);
|
|
2551
|
-
const emitted = new Set<string>();
|
|
2552
|
-
const out: ReturnType<typeof allSubagentRoles> = [];
|
|
2553
|
-
for (const id of CORE_MODEL_ACTION_ROLE_ORDER) {
|
|
2554
|
-
const role = roles.find(r => r.id === id);
|
|
2555
|
-
if (role) {
|
|
2556
|
-
emitted.add(role.id);
|
|
2557
|
-
out.push(role);
|
|
2558
|
-
}
|
|
2559
|
-
}
|
|
2560
|
-
for (const role of roles) {
|
|
2561
|
-
if (!emitted.has(role.id)) out.push(role);
|
|
2562
|
-
}
|
|
2563
|
-
return out;
|
|
2564
|
-
};
|
|
2575
|
+
|
|
2565
2576
|
|
|
2566
2577
|
const modelPickerAssignments = async (): Promise<ModelAssignmentBadge[]> => {
|
|
2567
2578
|
const cfg = await readGlobalConfig();
|
|
@@ -2732,7 +2743,7 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
2732
2743
|
choices.push({
|
|
2733
2744
|
value: "heading:default",
|
|
2734
2745
|
label: "Set as DEFAULT (Default)",
|
|
2735
|
-
hint: `${config.defaultModel} (${currentDefaultThinking})`,
|
|
2746
|
+
hint: `${config.defaultModel} (${currentDefaultThinking}) · roles → /agents`,
|
|
2736
2747
|
disabled: true,
|
|
2737
2748
|
});
|
|
2738
2749
|
appendChildren([
|
|
@@ -2744,73 +2755,21 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
2744
2755
|
})),
|
|
2745
2756
|
]);
|
|
2746
2757
|
|
|
2747
|
-
for (const role of orderedModelRoles(config)) {
|
|
2748
|
-
const roleThinking = resolveSubagentThinking(role.id, config) ?? "inherit";
|
|
2749
|
-
choices.push({
|
|
2750
|
-
value: `heading:${role.id}`,
|
|
2751
|
-
label: `Set as ${role.title.toUpperCase()} (${role.title})`,
|
|
2752
|
-
hint: `${resolveSubagentModel(role.id, config)} (${roleThinking})`,
|
|
2753
|
-
disabled: true,
|
|
2754
|
-
});
|
|
2755
|
-
appendChildren([
|
|
2756
|
-
{ value: `${role.id}:keep`, label: "Set model only", hint: `keep thinking ${roleThinking} · set via /agents edit` },
|
|
2757
|
-
]);
|
|
2758
|
-
}
|
|
2759
|
-
|
|
2760
|
-
choices.push({
|
|
2761
|
-
value: "preset:openai-codex",
|
|
2762
|
-
label: "Apply OpenAI Codex role preset",
|
|
2763
|
-
hint: "Default medium · Executor low · Architect xhigh · Planner medium · Critic high",
|
|
2764
|
-
});
|
|
2765
2758
|
return choices;
|
|
2766
2759
|
};
|
|
2767
2760
|
|
|
2768
|
-
|
|
2769
|
-
const roleThinking: Record<(typeof CORE_MODEL_ACTION_ROLE_ORDER)[number], ThinkLevel> = {
|
|
2770
|
-
executor: "low",
|
|
2771
|
-
architect: "xhigh",
|
|
2772
|
-
planner: "medium",
|
|
2773
|
-
critic: "high",
|
|
2774
|
-
};
|
|
2775
|
-
await saveConfigPatch(raw => {
|
|
2776
|
-
let subagents = raw.subagents ?? {};
|
|
2777
|
-
for (const roleId of CORE_MODEL_ACTION_ROLE_ORDER) {
|
|
2778
|
-
subagents = withSubagentSetting({ subagents }, roleId, { model: target, thinking: roleThinking[roleId] });
|
|
2779
|
-
}
|
|
2780
|
-
return {
|
|
2781
|
-
...rememberModelPatch(raw, target),
|
|
2782
|
-
thinkingLevel: "medium",
|
|
2783
|
-
subagents,
|
|
2784
|
-
};
|
|
2785
|
-
});
|
|
2786
|
-
sessionModel = target;
|
|
2787
|
-
sessionThinking = "medium";
|
|
2788
|
-
const { resolved, provider } = await describeModel(target);
|
|
2789
|
-
const st = (await describeAllProviders(cfgForPick)).find(s => s.name === provider);
|
|
2790
|
-
console.log(`OpenAI Codex role preset applied to ${formatModelLine({ label: target, resolved, provider, ready: st?.ready })} — Default medium, Executor low, Architect xhigh, Planner medium, Critic high`);
|
|
2791
|
-
};
|
|
2761
|
+
|
|
2792
2762
|
|
|
2793
2763
|
|
|
2794
2764
|
const applyPickedModelWithTarget = async (target: string): Promise<boolean> => {
|
|
2795
2765
|
if (!process.stdin.isTTY || !process.stdout.isTTY) return false;
|
|
2796
2766
|
const cfgForPick = await readGlobalConfig();
|
|
2767
|
+
// `/model` only assigns the DEFAULT model + (optionally) the default thinking.
|
|
2768
|
+
// Per-role model and thinking are configured in /agents (and /agents edit).
|
|
2797
2769
|
const choice = await pickFromOptions(`Model Name: ${displayModelName(target)}\n\nAction for: ${target}`, modelActionChoices(cfgForPick)) ?? "default:keep";
|
|
2798
|
-
|
|
2799
|
-
await applyOpenAiCodexRolePreset(target, cfgForPick);
|
|
2800
|
-
return true;
|
|
2801
|
-
}
|
|
2802
|
-
const [applyTo, action = "keep"] = choice.split(":", 2);
|
|
2803
|
-
if (applyTo === "heading") return false;
|
|
2804
|
-
const roleTarget = applyTo !== "default" ? getSubagentRole(applyTo, cfgForPick) : undefined;
|
|
2770
|
+
const [, action = "keep"] = choice.split(":", 2);
|
|
2805
2771
|
const { resolved, provider } = await describeModel(target);
|
|
2806
2772
|
const st = (await describeAllProviders(cfgForPick)).find(s => s.name === provider);
|
|
2807
|
-
if (roleTarget) {
|
|
2808
|
-
const thinkPatch = action === "inherit" ? { thinking: undefined } : isThinkingLevel(action) ? { thinking: action } : {};
|
|
2809
|
-
await saveConfigPatch(raw => ({ subagents: withSubagentSetting(raw, roleTarget.id, { model: target, ...thinkPatch }) }));
|
|
2810
|
-
const thinkNote = action !== "keep" ? ` · thinking ${action}` : "";
|
|
2811
|
-
console.log(`Subagent '${roleTarget.id}' model set to ${formatModelLine({ label: target, resolved, provider, ready: st?.ready })}${thinkNote} — saved (change anytime via /model or /agents)`);
|
|
2812
|
-
return true;
|
|
2813
|
-
}
|
|
2814
2773
|
sessionModel = target;
|
|
2815
2774
|
const defaultThinking = isThinkingLevel(action) ? action : undefined;
|
|
2816
2775
|
if (defaultThinking) {
|
|
@@ -2820,7 +2779,7 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
2820
2779
|
...rememberModelPatch(raw, target),
|
|
2821
2780
|
...(defaultThinking ? { thinkingLevel: defaultThinking } : {}),
|
|
2822
2781
|
}));
|
|
2823
|
-
console.log(`Model set to ${formatModelLine({ label: target, resolved, provider, ready: st?.ready })}${defaultThinking ? ` · thinking ${defaultThinking}` : ""} — saved as default`);
|
|
2782
|
+
console.log(`Model set to ${formatModelLine({ label: target, resolved, provider, ready: st?.ready })}${defaultThinking ? ` · thinking ${defaultThinking}` : ""} — saved as default. Role models/thinking: /agents`);
|
|
2824
2783
|
return true;
|
|
2825
2784
|
};
|
|
2826
2785
|
|
|
@@ -2925,7 +2884,7 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
2925
2884
|
|
|
2926
2885
|
if (previewEnabled) {
|
|
2927
2886
|
process.once("exit", () => out.write("\x1b[?25h")); // safety net: never leave the cursor hidden
|
|
2928
|
-
|
|
2887
|
+
const footerKeypressHandler = (_ch: string, key: { name?: string; ctrl?: boolean; meta?: boolean } | undefined) => {
|
|
2929
2888
|
if (key?.ctrl && key.name === "c") {
|
|
2930
2889
|
forceExitFromCtrlC();
|
|
2931
2890
|
return;
|
|
@@ -3040,18 +2999,22 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
3040
2999
|
drawFooter(previewLines(typedLine));
|
|
3041
3000
|
} catch { /* ignore render races */ }
|
|
3042
3001
|
});
|
|
3043
|
-
}
|
|
3002
|
+
};
|
|
3003
|
+
process.stdin.on("keypress", footerKeypressHandler);
|
|
3004
|
+
promptListenerCleanups.push(() => process.stdin.off("keypress", footerKeypressHandler));
|
|
3044
3005
|
// Idle-prompt resize: re-reserve the footer at the new terminal height so the
|
|
3045
3006
|
// fixed reservation stays accurate (otherwise the next paint would target the
|
|
3046
3007
|
// old row count and either over-shoot or under-paint the reserved region).
|
|
3047
|
-
|
|
3008
|
+
const idleResizeHandler = () => {
|
|
3048
3009
|
if (!previewArmed) return;
|
|
3049
3010
|
try {
|
|
3050
3011
|
disarmPreview();
|
|
3051
3012
|
armPreview();
|
|
3052
3013
|
drawFooter(promptHistoryLines ? historyPreviewLines(promptHistoryLines) : previewLines(typedLine, navIdx));
|
|
3053
3014
|
} catch { /* ignore resize render races */ }
|
|
3054
|
-
}
|
|
3015
|
+
};
|
|
3016
|
+
process.stdout.on("resize", idleResizeHandler);
|
|
3017
|
+
promptListenerCleanups.push(() => process.stdout.off("resize", idleResizeHandler));
|
|
3055
3018
|
}
|
|
3056
3019
|
|
|
3057
3020
|
while (true) {
|
|
@@ -3162,7 +3125,14 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
3162
3125
|
}
|
|
3163
3126
|
if (input === "/clear") {
|
|
3164
3127
|
history.length = 1;
|
|
3165
|
-
|
|
3128
|
+
// Back to the initial screen: wipe the conversation, clear the terminal +
|
|
3129
|
+
// scrollback, and re-render the welcome banner so /clear looks like a fresh launch.
|
|
3130
|
+
if (process.stdout.isTTY) {
|
|
3131
|
+
disarmPreview();
|
|
3132
|
+
process.stdout.write("\x1b[2J\x1b[3J\x1b[H"); // clear screen + scrollback + cursor home
|
|
3133
|
+
console.log(renderWelcome(welcomeData).join("\n"));
|
|
3134
|
+
}
|
|
3135
|
+
console.log("(history cleared — back to the start screen)");
|
|
3166
3136
|
continue;
|
|
3167
3137
|
}
|
|
3168
3138
|
if (input === "/compact") {
|
|
@@ -3281,6 +3251,20 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
3281
3251
|
if (history[k]!.role === "assistant" && !lastReply) lastReply = String(history[k]!.content ?? "");
|
|
3282
3252
|
if (lastUserInput && lastReply) break;
|
|
3283
3253
|
}
|
|
3254
|
+
// Seed readline's input history so ↑ in the prompt recalls THIS session's
|
|
3255
|
+
// prior prompts (not just lines typed in the current run). readline history
|
|
3256
|
+
// is newest-first; unshift in chronological order so the session's newest
|
|
3257
|
+
// prompt lands at the front (first ↑). Skip injected/framed messages.
|
|
3258
|
+
const rli = rl as unknown as { history?: string[] };
|
|
3259
|
+
if (Array.isArray(rli.history)) {
|
|
3260
|
+
const priorPrompts = history
|
|
3261
|
+
.filter(m => m.role === "user")
|
|
3262
|
+
.map(m => String(m.content ?? "").trim())
|
|
3263
|
+
.filter(c => c && !c.startsWith("Tool [") && !c.startsWith("[mid-turn steering") && !c.startsWith("[Earlier conversation summary]"));
|
|
3264
|
+
for (const p of priorPrompts) {
|
|
3265
|
+
if (rli.history[0] !== p) rli.history.unshift(p);
|
|
3266
|
+
}
|
|
3267
|
+
}
|
|
3284
3268
|
const sep = "─".repeat(Math.min(48, Math.max(20, (process.stdout.columns ?? 80) - 1)));
|
|
3285
3269
|
logLines([
|
|
3286
3270
|
sep,
|
|
@@ -4359,6 +4343,7 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
4359
4343
|
} catch { /* best effort */ }
|
|
4360
4344
|
process.removeListener("SIGINT", forceExitFromCtrlC);
|
|
4361
4345
|
process.stdin.off("data", forceExitOnCtrlCByte);
|
|
4346
|
+
drainPromptListeners();
|
|
4362
4347
|
restorePromptRawMode();
|
|
4363
4348
|
process.exit(130);
|
|
4364
4349
|
}
|
|
@@ -4374,6 +4359,7 @@ export async function runLaunchCommand(args: string[]): Promise<void> {
|
|
|
4374
4359
|
if (sessionId && !flags.noSession) console.log(formatResumeHint(sessionId));
|
|
4375
4360
|
process.removeListener("SIGINT", forceExitFromCtrlC);
|
|
4376
4361
|
process.stdin.off("data", forceExitOnCtrlCByte);
|
|
4362
|
+
drainPromptListeners();
|
|
4377
4363
|
restorePromptRawMode();
|
|
4378
4364
|
gracefulReadlineClose = true;
|
|
4379
4365
|
rl.close();
|