@os-eco/overstory-cli 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@
5
5
  * MetricsStore, and tmux capture-pane.
6
6
  */
7
7
 
8
+ import { readdir } from "node:fs/promises";
8
9
  import { join } from "node:path";
9
10
  import { Command } from "commander";
10
11
  import { loadConfig } from "../config.ts";
@@ -86,6 +87,170 @@ async function captureTmux(sessionName: string, lines: number): Promise<string |
86
87
  }
87
88
  }
88
89
 
90
+ /** Parsed data from a headless agent's stdout.log NDJSON event stream. */
91
+ interface StdoutLogData {
92
+ toolCalls: Array<{
93
+ toolName: string;
94
+ argsSummary: string;
95
+ durationMs: number | null;
96
+ timestamp: string;
97
+ }>;
98
+ cumulativeInputTokens: number;
99
+ cumulativeOutputTokens: number;
100
+ cumulativeCacheReadTokens: number;
101
+ lastModel: string;
102
+ lastContextUtilization: number | null;
103
+ currentTurn: number;
104
+ isMidTool: boolean;
105
+ }
106
+
107
+ /**
108
+ * Find the most recent log directory for a headless agent.
109
+ * Looks under logsBaseDir/{agentName}/ and returns the last entry
110
+ * when sorted alphabetically (ISO timestamps sort = chronological).
111
+ */
112
+ async function findLatestLogDir(logsBaseDir: string, agentName: string): Promise<string | null> {
113
+ const agentLogsDir = join(logsBaseDir, agentName);
114
+ try {
115
+ const entries = await readdir(agentLogsDir);
116
+ if (entries.length === 0) return null;
117
+ entries.sort();
118
+ const latest = entries[entries.length - 1];
119
+ if (!latest) return null;
120
+ return join(agentLogsDir, latest);
121
+ } catch {
122
+ return null;
123
+ }
124
+ }
125
+
126
+ /**
127
+ * Parse the last 200 lines of a headless agent's stdout.log NDJSON file.
128
+ *
129
+ * Extracts tool call activity and token usage from Sapling/Codex event streams.
130
+ * Handles partial lines and malformed JSON gracefully.
131
+ *
132
+ * @param logPath - Absolute path to stdout.log
133
+ * @returns Parsed data, or null if file missing or unreadable
134
+ */
135
+ async function parseStdoutLog(logPath: string): Promise<StdoutLogData | null> {
136
+ const file = Bun.file(logPath);
137
+ if (!(await file.exists())) return null;
138
+
139
+ try {
140
+ const text = await file.text();
141
+ const allLines = text.split("\n");
142
+ // Tail last 200 lines for efficiency
143
+ const lines = allLines.length > 200 ? allLines.slice(-200) : allLines;
144
+
145
+ const toolCalls: StdoutLogData["toolCalls"] = [];
146
+
147
+ // Track pending tool_start events for durationMs matching.
148
+ // When tool_end arrives, pop the most recent pending entry with matching toolName.
149
+ const pendingTools: Array<{
150
+ toolName: string;
151
+ argsSummary: string;
152
+ timestamp: string;
153
+ }> = [];
154
+
155
+ let cumulativeInputTokens = 0;
156
+ let cumulativeOutputTokens = 0;
157
+ let cumulativeCacheReadTokens = 0;
158
+ let lastModel = "";
159
+ let lastContextUtilization: number | null = null;
160
+ let currentTurn = 0;
161
+ let lastEventType: string | null = null;
162
+
163
+ for (const line of lines) {
164
+ const trimmed = line.trim();
165
+ if (!trimmed) continue;
166
+
167
+ let event: Record<string, unknown>;
168
+ try {
169
+ event = JSON.parse(trimmed) as Record<string, unknown>;
170
+ } catch {
171
+ continue;
172
+ }
173
+
174
+ const type = typeof event.type === "string" ? event.type : null;
175
+ if (!type) continue;
176
+
177
+ lastEventType = type;
178
+ const timestamp =
179
+ typeof event.timestamp === "string" ? event.timestamp : new Date().toISOString();
180
+
181
+ if (type === "tool_start") {
182
+ const toolName = typeof event.toolName === "string" ? event.toolName : "unknown";
183
+ const argsSummary = typeof event.argsSummary === "string" ? event.argsSummary : "";
184
+ pendingTools.push({ toolName, argsSummary, timestamp });
185
+ } else if (type === "tool_end") {
186
+ const toolName = typeof event.toolName === "string" ? event.toolName : "";
187
+ const durationMs = typeof event.durationMs === "number" ? event.durationMs : null;
188
+
189
+ // Find and pop the most recent matching pending tool
190
+ let pendingIdx = -1;
191
+ for (let i = pendingTools.length - 1; i >= 0; i--) {
192
+ if (pendingTools[i]?.toolName === toolName) {
193
+ pendingIdx = i;
194
+ break;
195
+ }
196
+ }
197
+ if (pendingIdx >= 0) {
198
+ const pending = pendingTools[pendingIdx];
199
+ if (pending) {
200
+ pendingTools.splice(pendingIdx, 1);
201
+ toolCalls.push({
202
+ toolName: pending.toolName,
203
+ argsSummary: pending.argsSummary,
204
+ durationMs,
205
+ timestamp: pending.timestamp,
206
+ });
207
+ }
208
+ }
209
+ } else if (type === "turn_start") {
210
+ const turn = typeof event.turn === "number" ? event.turn : currentTurn + 1;
211
+ currentTurn = turn;
212
+ } else if (type === "turn_end") {
213
+ const inputTokens = typeof event.inputTokens === "number" ? event.inputTokens : 0;
214
+ const outputTokens = typeof event.outputTokens === "number" ? event.outputTokens : 0;
215
+ const cacheReadTokens =
216
+ typeof event.cacheReadTokens === "number" ? event.cacheReadTokens : 0;
217
+ const model = typeof event.model === "string" ? event.model : "";
218
+ const ctxUtil =
219
+ typeof event.contextUtilization === "number" ? event.contextUtilization : null;
220
+
221
+ cumulativeInputTokens += inputTokens;
222
+ cumulativeOutputTokens += outputTokens;
223
+ cumulativeCacheReadTokens += cacheReadTokens;
224
+ if (model) lastModel = model;
225
+ if (ctxUtil !== null) lastContextUtilization = ctxUtil;
226
+ }
227
+ }
228
+
229
+ // Any still-pending tool_starts are mid-execution — include them without durationMs
230
+ for (const pending of pendingTools) {
231
+ toolCalls.push({
232
+ toolName: pending.toolName,
233
+ argsSummary: pending.argsSummary,
234
+ durationMs: null,
235
+ timestamp: pending.timestamp,
236
+ });
237
+ }
238
+
239
+ return {
240
+ toolCalls,
241
+ cumulativeInputTokens,
242
+ cumulativeOutputTokens,
243
+ cumulativeCacheReadTokens,
244
+ lastModel,
245
+ lastContextUtilization,
246
+ currentTurn,
247
+ isMidTool: lastEventType === "tool_start",
248
+ };
249
+ } catch {
250
+ return null;
251
+ }
252
+ }
253
+
89
254
  export interface InspectData {
90
255
  session: AgentSession;
91
256
  timeSinceLastActivity: number;
@@ -106,6 +271,12 @@ export interface InspectData {
106
271
  modelUsed: string | null;
107
272
  } | null;
108
273
  tmuxOutput: string | null;
274
+ /** Turn progress for headless agents (populated from stdout.log). */
275
+ headlessTurnInfo: {
276
+ currentTurn: number;
277
+ contextUtilization: number | null;
278
+ isMidTool: boolean;
279
+ } | null;
109
280
  }
110
281
 
111
282
  /**
@@ -200,6 +371,51 @@ export async function gatherInspectData(
200
371
  tmuxOutput = await captureTmux(session.tmuxSession, lines);
201
372
  }
202
373
 
374
+ // Headless stdout.log fallback: parse NDJSON event stream for rich activity data.
375
+ // Used when tmuxSession is empty (headless agent: sapling, codex, etc.).
376
+ let headlessTurnInfo: InspectData["headlessTurnInfo"] = null;
377
+ if (session.tmuxSession === "") {
378
+ const logsBaseDir = join(overstoryDir, "logs");
379
+ const latestLogDir = await findLatestLogDir(logsBaseDir, agentName);
380
+ if (latestLogDir !== null) {
381
+ const stdoutData = await parseStdoutLog(join(latestLogDir, "stdout.log"));
382
+ if (stdoutData !== null) {
383
+ // Populate recentToolCalls from stdout.log when events.db had nothing.
384
+ if (recentToolCalls.length === 0 && stdoutData.toolCalls.length > 0) {
385
+ const limit = opts.limit ?? 20;
386
+ recentToolCalls = stdoutData.toolCalls.slice(0, limit).map((call) => ({
387
+ toolName: call.toolName,
388
+ args: call.argsSummary,
389
+ durationMs: call.durationMs,
390
+ timestamp: call.timestamp,
391
+ }));
392
+ }
393
+
394
+ // Populate tokenUsage from turn_end events when metrics.db had nothing.
395
+ if (
396
+ tokenUsage === null &&
397
+ (stdoutData.cumulativeInputTokens > 0 || stdoutData.cumulativeOutputTokens > 0)
398
+ ) {
399
+ tokenUsage = {
400
+ inputTokens: stdoutData.cumulativeInputTokens,
401
+ outputTokens: stdoutData.cumulativeOutputTokens,
402
+ cacheReadTokens: stdoutData.cumulativeCacheReadTokens,
403
+ cacheCreationTokens: 0,
404
+ estimatedCostUsd: null,
405
+ modelUsed: stdoutData.lastModel || null,
406
+ };
407
+ }
408
+
409
+ // Always populate turn progress info for headless agents.
410
+ headlessTurnInfo = {
411
+ currentTurn: stdoutData.currentTurn,
412
+ contextUtilization: stdoutData.lastContextUtilization,
413
+ isMidTool: stdoutData.isMidTool,
414
+ };
415
+ }
416
+ }
417
+ }
418
+
203
419
  // Headless fallback: show recent events as live output when no tmux
204
420
  if (!tmuxOutput && session.tmuxSession === "" && recentToolCalls.length > 0) {
205
421
  const lines: string[] = ["[Headless agent — showing recent tool events]", ""];
@@ -219,6 +435,7 @@ export async function gatherInspectData(
219
435
  toolStats,
220
436
  tokenUsage,
221
437
  tmuxOutput,
438
+ headlessTurnInfo,
222
439
  };
223
440
  } finally {
224
441
  store.close();
@@ -256,6 +473,23 @@ export function printInspectData(data: InspectData): void {
256
473
  w(`Current file: ${data.currentFile}\n\n`);
257
474
  }
258
475
 
476
+ // Headless turn progress
477
+ if (data.headlessTurnInfo) {
478
+ const { currentTurn, contextUtilization, isMidTool } = data.headlessTurnInfo;
479
+ w("Turn Progress\n");
480
+ w(`${separator()}\n`);
481
+ if (currentTurn > 0) {
482
+ w(` Current turn: ${currentTurn}\n`);
483
+ }
484
+ if (contextUtilization !== null) {
485
+ const pct = (contextUtilization * 100).toFixed(1);
486
+ w(` Context usage: ${pct}%\n`);
487
+ }
488
+ const status = isMidTool ? "executing tool" : "between turns";
489
+ w(` Status: ${status}\n`);
490
+ w("\n");
491
+ }
492
+
259
493
  // Token usage
260
494
  if (data.tokenUsage) {
261
495
  w("Token Usage\n");
@@ -876,7 +876,7 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
876
876
  const argv = runtime.buildDirectSpawn({
877
877
  cwd: worktreePath,
878
878
  env: directEnv,
879
- model: resolvedModel.model,
879
+ ...(resolvedModel.isExplicitOverride ? { model: resolvedModel.model } : {}),
880
880
  instructionPath: runtime.instructionPath,
881
881
  });
882
882