@os-eco/overstory-cli 0.8.4 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/package.json +1 -1
- package/src/agents/manifest.test.ts +33 -8
- package/src/agents/manifest.ts +4 -3
- package/src/commands/inspect.test.ts +398 -1
- package/src/commands/inspect.ts +234 -0
- package/src/commands/sling.ts +1 -1
- package/src/events/tailer.test.ts +461 -0
- package/src/events/tailer.ts +235 -0
- package/src/index.ts +1 -1
- package/src/runtimes/claude.test.ts +1 -1
- package/src/runtimes/opencode.test.ts +325 -0
- package/src/runtimes/opencode.ts +185 -0
- package/src/runtimes/pi.test.ts +1 -1
- package/src/runtimes/registry.test.ts +21 -1
- package/src/runtimes/registry.ts +3 -0
- package/src/runtimes/sapling.test.ts +30 -0
- package/src/runtimes/sapling.ts +27 -24
- package/src/runtimes/types.ts +2 -2
- package/src/types.ts +2 -0
- package/src/watchdog/daemon.ts +57 -0
package/src/commands/inspect.ts
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
* MetricsStore, and tmux capture-pane.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
+
import { readdir } from "node:fs/promises";
|
|
8
9
|
import { join } from "node:path";
|
|
9
10
|
import { Command } from "commander";
|
|
10
11
|
import { loadConfig } from "../config.ts";
|
|
@@ -86,6 +87,170 @@ async function captureTmux(sessionName: string, lines: number): Promise<string |
|
|
|
86
87
|
}
|
|
87
88
|
}
|
|
88
89
|
|
|
90
|
+
/** Parsed data from a headless agent's stdout.log NDJSON event stream. */
|
|
91
|
+
interface StdoutLogData {
|
|
92
|
+
toolCalls: Array<{
|
|
93
|
+
toolName: string;
|
|
94
|
+
argsSummary: string;
|
|
95
|
+
durationMs: number | null;
|
|
96
|
+
timestamp: string;
|
|
97
|
+
}>;
|
|
98
|
+
cumulativeInputTokens: number;
|
|
99
|
+
cumulativeOutputTokens: number;
|
|
100
|
+
cumulativeCacheReadTokens: number;
|
|
101
|
+
lastModel: string;
|
|
102
|
+
lastContextUtilization: number | null;
|
|
103
|
+
currentTurn: number;
|
|
104
|
+
isMidTool: boolean;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Find the most recent log directory for a headless agent.
|
|
109
|
+
* Looks under logsBaseDir/{agentName}/ and returns the last entry
|
|
110
|
+
* when sorted alphabetically (ISO timestamps sort = chronological).
|
|
111
|
+
*/
|
|
112
|
+
async function findLatestLogDir(logsBaseDir: string, agentName: string): Promise<string | null> {
|
|
113
|
+
const agentLogsDir = join(logsBaseDir, agentName);
|
|
114
|
+
try {
|
|
115
|
+
const entries = await readdir(agentLogsDir);
|
|
116
|
+
if (entries.length === 0) return null;
|
|
117
|
+
entries.sort();
|
|
118
|
+
const latest = entries[entries.length - 1];
|
|
119
|
+
if (!latest) return null;
|
|
120
|
+
return join(agentLogsDir, latest);
|
|
121
|
+
} catch {
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Parse the last 200 lines of a headless agent's stdout.log NDJSON file.
|
|
128
|
+
*
|
|
129
|
+
* Extracts tool call activity and token usage from Sapling/Codex event streams.
|
|
130
|
+
* Handles partial lines and malformed JSON gracefully.
|
|
131
|
+
*
|
|
132
|
+
* @param logPath - Absolute path to stdout.log
|
|
133
|
+
* @returns Parsed data, or null if file missing or unreadable
|
|
134
|
+
*/
|
|
135
|
+
async function parseStdoutLog(logPath: string): Promise<StdoutLogData | null> {
|
|
136
|
+
const file = Bun.file(logPath);
|
|
137
|
+
if (!(await file.exists())) return null;
|
|
138
|
+
|
|
139
|
+
try {
|
|
140
|
+
const text = await file.text();
|
|
141
|
+
const allLines = text.split("\n");
|
|
142
|
+
// Tail last 200 lines for efficiency
|
|
143
|
+
const lines = allLines.length > 200 ? allLines.slice(-200) : allLines;
|
|
144
|
+
|
|
145
|
+
const toolCalls: StdoutLogData["toolCalls"] = [];
|
|
146
|
+
|
|
147
|
+
// Track pending tool_start events for durationMs matching.
|
|
148
|
+
// When tool_end arrives, pop the most recent pending entry with matching toolName.
|
|
149
|
+
const pendingTools: Array<{
|
|
150
|
+
toolName: string;
|
|
151
|
+
argsSummary: string;
|
|
152
|
+
timestamp: string;
|
|
153
|
+
}> = [];
|
|
154
|
+
|
|
155
|
+
let cumulativeInputTokens = 0;
|
|
156
|
+
let cumulativeOutputTokens = 0;
|
|
157
|
+
let cumulativeCacheReadTokens = 0;
|
|
158
|
+
let lastModel = "";
|
|
159
|
+
let lastContextUtilization: number | null = null;
|
|
160
|
+
let currentTurn = 0;
|
|
161
|
+
let lastEventType: string | null = null;
|
|
162
|
+
|
|
163
|
+
for (const line of lines) {
|
|
164
|
+
const trimmed = line.trim();
|
|
165
|
+
if (!trimmed) continue;
|
|
166
|
+
|
|
167
|
+
let event: Record<string, unknown>;
|
|
168
|
+
try {
|
|
169
|
+
event = JSON.parse(trimmed) as Record<string, unknown>;
|
|
170
|
+
} catch {
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const type = typeof event.type === "string" ? event.type : null;
|
|
175
|
+
if (!type) continue;
|
|
176
|
+
|
|
177
|
+
lastEventType = type;
|
|
178
|
+
const timestamp =
|
|
179
|
+
typeof event.timestamp === "string" ? event.timestamp : new Date().toISOString();
|
|
180
|
+
|
|
181
|
+
if (type === "tool_start") {
|
|
182
|
+
const toolName = typeof event.toolName === "string" ? event.toolName : "unknown";
|
|
183
|
+
const argsSummary = typeof event.argsSummary === "string" ? event.argsSummary : "";
|
|
184
|
+
pendingTools.push({ toolName, argsSummary, timestamp });
|
|
185
|
+
} else if (type === "tool_end") {
|
|
186
|
+
const toolName = typeof event.toolName === "string" ? event.toolName : "";
|
|
187
|
+
const durationMs = typeof event.durationMs === "number" ? event.durationMs : null;
|
|
188
|
+
|
|
189
|
+
// Find and pop the most recent matching pending tool
|
|
190
|
+
let pendingIdx = -1;
|
|
191
|
+
for (let i = pendingTools.length - 1; i >= 0; i--) {
|
|
192
|
+
if (pendingTools[i]?.toolName === toolName) {
|
|
193
|
+
pendingIdx = i;
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
if (pendingIdx >= 0) {
|
|
198
|
+
const pending = pendingTools[pendingIdx];
|
|
199
|
+
if (pending) {
|
|
200
|
+
pendingTools.splice(pendingIdx, 1);
|
|
201
|
+
toolCalls.push({
|
|
202
|
+
toolName: pending.toolName,
|
|
203
|
+
argsSummary: pending.argsSummary,
|
|
204
|
+
durationMs,
|
|
205
|
+
timestamp: pending.timestamp,
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
} else if (type === "turn_start") {
|
|
210
|
+
const turn = typeof event.turn === "number" ? event.turn : currentTurn + 1;
|
|
211
|
+
currentTurn = turn;
|
|
212
|
+
} else if (type === "turn_end") {
|
|
213
|
+
const inputTokens = typeof event.inputTokens === "number" ? event.inputTokens : 0;
|
|
214
|
+
const outputTokens = typeof event.outputTokens === "number" ? event.outputTokens : 0;
|
|
215
|
+
const cacheReadTokens =
|
|
216
|
+
typeof event.cacheReadTokens === "number" ? event.cacheReadTokens : 0;
|
|
217
|
+
const model = typeof event.model === "string" ? event.model : "";
|
|
218
|
+
const ctxUtil =
|
|
219
|
+
typeof event.contextUtilization === "number" ? event.contextUtilization : null;
|
|
220
|
+
|
|
221
|
+
cumulativeInputTokens += inputTokens;
|
|
222
|
+
cumulativeOutputTokens += outputTokens;
|
|
223
|
+
cumulativeCacheReadTokens += cacheReadTokens;
|
|
224
|
+
if (model) lastModel = model;
|
|
225
|
+
if (ctxUtil !== null) lastContextUtilization = ctxUtil;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Any still-pending tool_starts are mid-execution — include them without durationMs
|
|
230
|
+
for (const pending of pendingTools) {
|
|
231
|
+
toolCalls.push({
|
|
232
|
+
toolName: pending.toolName,
|
|
233
|
+
argsSummary: pending.argsSummary,
|
|
234
|
+
durationMs: null,
|
|
235
|
+
timestamp: pending.timestamp,
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
toolCalls,
|
|
241
|
+
cumulativeInputTokens,
|
|
242
|
+
cumulativeOutputTokens,
|
|
243
|
+
cumulativeCacheReadTokens,
|
|
244
|
+
lastModel,
|
|
245
|
+
lastContextUtilization,
|
|
246
|
+
currentTurn,
|
|
247
|
+
isMidTool: lastEventType === "tool_start",
|
|
248
|
+
};
|
|
249
|
+
} catch {
|
|
250
|
+
return null;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
89
254
|
export interface InspectData {
|
|
90
255
|
session: AgentSession;
|
|
91
256
|
timeSinceLastActivity: number;
|
|
@@ -106,6 +271,12 @@ export interface InspectData {
|
|
|
106
271
|
modelUsed: string | null;
|
|
107
272
|
} | null;
|
|
108
273
|
tmuxOutput: string | null;
|
|
274
|
+
/** Turn progress for headless agents (populated from stdout.log). */
|
|
275
|
+
headlessTurnInfo: {
|
|
276
|
+
currentTurn: number;
|
|
277
|
+
contextUtilization: number | null;
|
|
278
|
+
isMidTool: boolean;
|
|
279
|
+
} | null;
|
|
109
280
|
}
|
|
110
281
|
|
|
111
282
|
/**
|
|
@@ -200,6 +371,51 @@ export async function gatherInspectData(
|
|
|
200
371
|
tmuxOutput = await captureTmux(session.tmuxSession, lines);
|
|
201
372
|
}
|
|
202
373
|
|
|
374
|
+
// Headless stdout.log fallback: parse NDJSON event stream for rich activity data.
|
|
375
|
+
// Used when tmuxSession is empty (headless agent: sapling, codex, etc.).
|
|
376
|
+
let headlessTurnInfo: InspectData["headlessTurnInfo"] = null;
|
|
377
|
+
if (session.tmuxSession === "") {
|
|
378
|
+
const logsBaseDir = join(overstoryDir, "logs");
|
|
379
|
+
const latestLogDir = await findLatestLogDir(logsBaseDir, agentName);
|
|
380
|
+
if (latestLogDir !== null) {
|
|
381
|
+
const stdoutData = await parseStdoutLog(join(latestLogDir, "stdout.log"));
|
|
382
|
+
if (stdoutData !== null) {
|
|
383
|
+
// Populate recentToolCalls from stdout.log when events.db had nothing.
|
|
384
|
+
if (recentToolCalls.length === 0 && stdoutData.toolCalls.length > 0) {
|
|
385
|
+
const limit = opts.limit ?? 20;
|
|
386
|
+
recentToolCalls = stdoutData.toolCalls.slice(0, limit).map((call) => ({
|
|
387
|
+
toolName: call.toolName,
|
|
388
|
+
args: call.argsSummary,
|
|
389
|
+
durationMs: call.durationMs,
|
|
390
|
+
timestamp: call.timestamp,
|
|
391
|
+
}));
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// Populate tokenUsage from turn_end events when metrics.db had nothing.
|
|
395
|
+
if (
|
|
396
|
+
tokenUsage === null &&
|
|
397
|
+
(stdoutData.cumulativeInputTokens > 0 || stdoutData.cumulativeOutputTokens > 0)
|
|
398
|
+
) {
|
|
399
|
+
tokenUsage = {
|
|
400
|
+
inputTokens: stdoutData.cumulativeInputTokens,
|
|
401
|
+
outputTokens: stdoutData.cumulativeOutputTokens,
|
|
402
|
+
cacheReadTokens: stdoutData.cumulativeCacheReadTokens,
|
|
403
|
+
cacheCreationTokens: 0,
|
|
404
|
+
estimatedCostUsd: null,
|
|
405
|
+
modelUsed: stdoutData.lastModel || null,
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Always populate turn progress info for headless agents.
|
|
410
|
+
headlessTurnInfo = {
|
|
411
|
+
currentTurn: stdoutData.currentTurn,
|
|
412
|
+
contextUtilization: stdoutData.lastContextUtilization,
|
|
413
|
+
isMidTool: stdoutData.isMidTool,
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
|
|
203
419
|
// Headless fallback: show recent events as live output when no tmux
|
|
204
420
|
if (!tmuxOutput && session.tmuxSession === "" && recentToolCalls.length > 0) {
|
|
205
421
|
const lines: string[] = ["[Headless agent — showing recent tool events]", ""];
|
|
@@ -219,6 +435,7 @@ export async function gatherInspectData(
|
|
|
219
435
|
toolStats,
|
|
220
436
|
tokenUsage,
|
|
221
437
|
tmuxOutput,
|
|
438
|
+
headlessTurnInfo,
|
|
222
439
|
};
|
|
223
440
|
} finally {
|
|
224
441
|
store.close();
|
|
@@ -256,6 +473,23 @@ export function printInspectData(data: InspectData): void {
|
|
|
256
473
|
w(`Current file: ${data.currentFile}\n\n`);
|
|
257
474
|
}
|
|
258
475
|
|
|
476
|
+
// Headless turn progress
|
|
477
|
+
if (data.headlessTurnInfo) {
|
|
478
|
+
const { currentTurn, contextUtilization, isMidTool } = data.headlessTurnInfo;
|
|
479
|
+
w("Turn Progress\n");
|
|
480
|
+
w(`${separator()}\n`);
|
|
481
|
+
if (currentTurn > 0) {
|
|
482
|
+
w(` Current turn: ${currentTurn}\n`);
|
|
483
|
+
}
|
|
484
|
+
if (contextUtilization !== null) {
|
|
485
|
+
const pct = (contextUtilization * 100).toFixed(1);
|
|
486
|
+
w(` Context usage: ${pct}%\n`);
|
|
487
|
+
}
|
|
488
|
+
const status = isMidTool ? "executing tool" : "between turns";
|
|
489
|
+
w(` Status: ${status}\n`);
|
|
490
|
+
w("\n");
|
|
491
|
+
}
|
|
492
|
+
|
|
259
493
|
// Token usage
|
|
260
494
|
if (data.tokenUsage) {
|
|
261
495
|
w("Token Usage\n");
|
package/src/commands/sling.ts
CHANGED
|
@@ -876,7 +876,7 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
876
876
|
const argv = runtime.buildDirectSpawn({
|
|
877
877
|
cwd: worktreePath,
|
|
878
878
|
env: directEnv,
|
|
879
|
-
model: resolvedModel.model,
|
|
879
|
+
...(resolvedModel.isExplicitOverride ? { model: resolvedModel.model } : {}),
|
|
880
880
|
instructionPath: runtime.instructionPath,
|
|
881
881
|
});
|
|
882
882
|
|