@os-eco/overstory-cli 0.7.4 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,7 +54,14 @@ export class ClaudeRuntime implements AgentRuntime {
54
54
  const permMode = opts.permissionMode === "bypass" ? "bypassPermissions" : "default";
55
55
  let cmd = `claude --model ${opts.model} --permission-mode ${permMode}`;
56
56
 
57
- if (opts.appendSystemPrompt) {
57
+ if (opts.appendSystemPromptFile) {
58
+ // Read from file at shell expansion time — avoids tmux IPC message size
59
+ // limits (~8-16KB) that cause "command too long" errors when large agent
60
+ // definitions are inlined. The $(cat ...) expands inside the tmux pane's
61
+ // shell, so the tmux IPC message only carries the short command string.
62
+ const escaped = opts.appendSystemPromptFile.replace(/'/g, "'\\''");
63
+ cmd += ` --append-system-prompt "$(cat '${escaped}')"`;
64
+ } else if (opts.appendSystemPrompt) {
58
65
  // Single-quote the content for safe shell expansion.
59
66
  // POSIX single-quoted strings cannot contain single quotes, so escape
60
67
  // them using the standard technique: end quote, escaped quote, start quote.
@@ -0,0 +1,507 @@
1
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
+ import { mkdtemp } from "node:fs/promises";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { cleanupTempDir } from "../test-helpers.ts";
6
+ import type { ResolvedModel } from "../types.ts";
7
+ import { CopilotRuntime } from "./copilot.ts";
8
+ import type { SpawnOpts } from "./types.ts";
9
+
10
+ describe("CopilotRuntime", () => {
11
+ const runtime = new CopilotRuntime();
12
+
13
+ describe("id and instructionPath", () => {
14
+ test("id is 'copilot'", () => {
15
+ expect(runtime.id).toBe("copilot");
16
+ });
17
+
18
+ test("instructionPath is .github/copilot-instructions.md", () => {
19
+ expect(runtime.instructionPath).toBe(".github/copilot-instructions.md");
20
+ });
21
+ });
22
+
23
+ describe("buildSpawnCommand", () => {
24
+ test("bypass permission mode includes --allow-all-tools", () => {
25
+ const opts: SpawnOpts = {
26
+ model: "sonnet",
27
+ permissionMode: "bypass",
28
+ cwd: "/tmp/worktree",
29
+ env: {},
30
+ };
31
+ const cmd = runtime.buildSpawnCommand(opts);
32
+ expect(cmd).toBe("copilot --model sonnet --allow-all-tools");
33
+ });
34
+
35
+ test("ask permission mode omits permission flag", () => {
36
+ const opts: SpawnOpts = {
37
+ model: "opus",
38
+ permissionMode: "ask",
39
+ cwd: "/tmp/worktree",
40
+ env: {},
41
+ };
42
+ const cmd = runtime.buildSpawnCommand(opts);
43
+ expect(cmd).toBe("copilot --model opus");
44
+ expect(cmd).not.toContain("--allow-all-tools");
45
+ expect(cmd).not.toContain("--permission-mode");
46
+ });
47
+
48
+ test("appendSystemPrompt is ignored (copilot has no such flag)", () => {
49
+ const opts: SpawnOpts = {
50
+ model: "sonnet",
51
+ permissionMode: "bypass",
52
+ cwd: "/tmp/worktree",
53
+ env: {},
54
+ appendSystemPrompt: "You are a builder agent.",
55
+ };
56
+ const cmd = runtime.buildSpawnCommand(opts);
57
+ expect(cmd).toBe("copilot --model sonnet --allow-all-tools");
58
+ expect(cmd).not.toContain("append-system-prompt");
59
+ expect(cmd).not.toContain("You are a builder agent");
60
+ });
61
+
62
+ test("appendSystemPromptFile is ignored (copilot has no such flag)", () => {
63
+ const opts: SpawnOpts = {
64
+ model: "opus",
65
+ permissionMode: "bypass",
66
+ cwd: "/project",
67
+ env: {},
68
+ appendSystemPromptFile: "/project/.overstory/agent-defs/coordinator.md",
69
+ };
70
+ const cmd = runtime.buildSpawnCommand(opts);
71
+ expect(cmd).toBe("copilot --model opus --allow-all-tools");
72
+ expect(cmd).not.toContain("cat");
73
+ expect(cmd).not.toContain("coordinator.md");
74
+ });
75
+
76
+ test("cwd and env are not embedded in command string", () => {
77
+ const opts: SpawnOpts = {
78
+ model: "sonnet",
79
+ permissionMode: "bypass",
80
+ cwd: "/some/specific/path",
81
+ env: { GITHUB_TOKEN: "gh-test-123" },
82
+ };
83
+ const cmd = runtime.buildSpawnCommand(opts);
84
+ expect(cmd).not.toContain("/some/specific/path");
85
+ expect(cmd).not.toContain("gh-test-123");
86
+ expect(cmd).not.toContain("GITHUB_TOKEN");
87
+ });
88
+
89
+ test("all model names pass through unchanged", () => {
90
+ for (const model of ["sonnet", "opus", "haiku", "gpt-4o", "openrouter/gpt-5"]) {
91
+ const opts: SpawnOpts = {
92
+ model,
93
+ permissionMode: "bypass",
94
+ cwd: "/tmp",
95
+ env: {},
96
+ };
97
+ const cmd = runtime.buildSpawnCommand(opts);
98
+ expect(cmd).toContain(`--model ${model}`);
99
+ }
100
+ });
101
+
102
+ test("produces identical output for same inputs (deterministic)", () => {
103
+ const opts: SpawnOpts = {
104
+ model: "sonnet",
105
+ permissionMode: "bypass",
106
+ cwd: "/tmp/worktree",
107
+ env: {},
108
+ };
109
+ const cmd1 = runtime.buildSpawnCommand(opts);
110
+ const cmd2 = runtime.buildSpawnCommand(opts);
111
+ expect(cmd1).toBe(cmd2);
112
+ });
113
+ });
114
+
115
+ describe("buildPrintCommand", () => {
116
+ test("basic command without model includes --allow-all-tools", () => {
117
+ const argv = runtime.buildPrintCommand("Summarize this diff");
118
+ expect(argv).toEqual(["copilot", "-p", "Summarize this diff", "--allow-all-tools"]);
119
+ });
120
+
121
+ test("command with model override appends --model flag", () => {
122
+ const argv = runtime.buildPrintCommand("Classify this error", "haiku");
123
+ expect(argv).toEqual([
124
+ "copilot",
125
+ "-p",
126
+ "Classify this error",
127
+ "--allow-all-tools",
128
+ "--model",
129
+ "haiku",
130
+ ]);
131
+ });
132
+
133
+ test("model undefined omits --model flag", () => {
134
+ const argv = runtime.buildPrintCommand("Hello", undefined);
135
+ expect(argv).not.toContain("--model");
136
+ expect(argv).toContain("--allow-all-tools");
137
+ });
138
+
139
+ test("--allow-all-tools always present regardless of model", () => {
140
+ const withModel = runtime.buildPrintCommand("prompt", "opus");
141
+ const withoutModel = runtime.buildPrintCommand("prompt");
142
+ expect(withModel).toContain("--allow-all-tools");
143
+ expect(withoutModel).toContain("--allow-all-tools");
144
+ });
145
+ });
146
+
147
+ describe("detectReady", () => {
148
+ test("returns loading for empty pane", () => {
149
+ const state = runtime.detectReady("");
150
+ expect(state).toEqual({ phase: "loading" });
151
+ });
152
+
153
+ test("returns loading for partial content (prompt only, no status bar)", () => {
154
+ const state = runtime.detectReady("Welcome to Copilot!\n\u276f");
155
+ expect(state).toEqual({ phase: "loading" });
156
+ });
157
+
158
+ test("returns loading for partial content (status bar only, no prompt)", () => {
159
+ const state = runtime.detectReady("shift+tab to toggle");
160
+ expect(state).toEqual({ phase: "loading" });
161
+ });
162
+
163
+ test("returns ready for ❯ + shift+tab", () => {
164
+ const state = runtime.detectReady("GitHub Copilot\n\u276f\nshift+tab to chat");
165
+ expect(state).toEqual({ phase: "ready" });
166
+ });
167
+
168
+ test("returns ready for ❯ + esc", () => {
169
+ const state = runtime.detectReady("GitHub Copilot\n\u276f\nesc to cancel");
170
+ expect(state).toEqual({ phase: "ready" });
171
+ });
172
+
173
+ test("returns ready for 'copilot' keyword + shift+tab (case-insensitive)", () => {
174
+ const state = runtime.detectReady("Copilot Agent Ready\nshift+tab");
175
+ expect(state).toEqual({ phase: "ready" });
176
+ });
177
+
178
+ test("returns ready for 'copilot' keyword + esc (case-insensitive)", () => {
179
+ const state = runtime.detectReady("GitHub Copilot v1.0\npress esc to exit");
180
+ expect(state).toEqual({ phase: "ready" });
181
+ });
182
+
183
+ test("case-insensitive match for 'COPILOT'", () => {
184
+ const state = runtime.detectReady("GITHUB COPILOT\nESC");
185
+ expect(state).toEqual({ phase: "ready" });
186
+ });
187
+
188
+ test("returns loading for random pane content", () => {
189
+ const state = runtime.detectReady("Loading...\nPlease wait");
190
+ expect(state).toEqual({ phase: "loading" });
191
+ });
192
+
193
+ test("no trust dialog phase — trust text is ignored", () => {
194
+ // Copilot has no trust dialog; this should just test loading/ready states
195
+ const state = runtime.detectReady("trust this folder");
196
+ // Without prompt+statusbar indicators, remains loading
197
+ expect(state).toEqual({ phase: "loading" });
198
+ });
199
+
200
+ test("Shift+Tab (capital) is matched case-insensitively", () => {
201
+ const state = runtime.detectReady("\u276f\nShift+Tab to toggle");
202
+ expect(state).toEqual({ phase: "ready" });
203
+ });
204
+ });
205
+
206
+ describe("buildEnv", () => {
207
+ test("returns empty object when model has no env", () => {
208
+ const model: ResolvedModel = { model: "sonnet" };
209
+ const env = runtime.buildEnv(model);
210
+ expect(env).toEqual({});
211
+ });
212
+
213
+ test("returns model.env when present", () => {
214
+ const model: ResolvedModel = {
215
+ model: "gpt-4o",
216
+ env: { GITHUB_TOKEN: "gh-test-123", COPILOT_API_URL: "https://api.github.com" },
217
+ };
218
+ const env = runtime.buildEnv(model);
219
+ expect(env).toEqual({
220
+ GITHUB_TOKEN: "gh-test-123",
221
+ COPILOT_API_URL: "https://api.github.com",
222
+ });
223
+ });
224
+
225
+ test("returns empty object when model.env is undefined", () => {
226
+ const model: ResolvedModel = { model: "opus", env: undefined };
227
+ const env = runtime.buildEnv(model);
228
+ expect(env).toEqual({});
229
+ });
230
+
231
+ test("env is safe to spread into session env", () => {
232
+ const model: ResolvedModel = { model: "sonnet" };
233
+ const env = runtime.buildEnv(model);
234
+ const combined = { ...env, OVERSTORY_AGENT_NAME: "builder-1" };
235
+ expect(combined).toEqual({ OVERSTORY_AGENT_NAME: "builder-1" });
236
+ });
237
+ });
238
+
239
+ describe("deployConfig", () => {
240
+ let tempDir: string;
241
+
242
+ beforeEach(async () => {
243
+ tempDir = await mkdtemp(join(tmpdir(), "overstory-copilot-test-"));
244
+ });
245
+
246
+ afterEach(async () => {
247
+ await cleanupTempDir(tempDir);
248
+ });
249
+
250
+ test("writes overlay to .github/copilot-instructions.md when provided", async () => {
251
+ const worktreePath = join(tempDir, "worktree");
252
+
253
+ await runtime.deployConfig(
254
+ worktreePath,
255
+ { content: "# Copilot Instructions\nYou are a builder." },
256
+ {
257
+ agentName: "test-builder",
258
+ capability: "builder",
259
+ worktreePath,
260
+ },
261
+ );
262
+
263
+ const overlayPath = join(worktreePath, ".github", "copilot-instructions.md");
264
+ const content = await Bun.file(overlayPath).text();
265
+ expect(content).toBe("# Copilot Instructions\nYou are a builder.");
266
+ });
267
+
268
+ test("creates .github directory if it does not exist", async () => {
269
+ const worktreePath = join(tempDir, "new-worktree");
270
+
271
+ await runtime.deployConfig(
272
+ worktreePath,
273
+ { content: "# Instructions" },
274
+ { agentName: "test", capability: "builder", worktreePath },
275
+ );
276
+
277
+ const githubDirExists = await Bun.file(
278
+ join(worktreePath, ".github", "copilot-instructions.md"),
279
+ ).exists();
280
+ expect(githubDirExists).toBe(true);
281
+ });
282
+
283
+ test("skips overlay write when overlay is undefined", async () => {
284
+ const worktreePath = join(tempDir, "worktree");
285
+
286
+ await runtime.deployConfig(worktreePath, undefined, {
287
+ agentName: "coordinator",
288
+ capability: "coordinator",
289
+ worktreePath,
290
+ });
291
+
292
+ // No overlay written — .github directory should not be created.
293
+ const overlayPath = join(worktreePath, ".github", "copilot-instructions.md");
294
+ const overlayExists = await Bun.file(overlayPath).exists();
295
+ expect(overlayExists).toBe(false);
296
+ });
297
+
298
+ test("does not write settings.local.json (no hook deployment)", async () => {
299
+ const worktreePath = join(tempDir, "worktree");
300
+
301
+ await runtime.deployConfig(
302
+ worktreePath,
303
+ { content: "# Instructions" },
304
+ { agentName: "test-builder", capability: "builder", worktreePath },
305
+ );
306
+
307
+ // Copilot does not deploy Claude Code hooks.
308
+ const settingsPath = join(worktreePath, ".claude", "settings.local.json");
309
+ const settingsExists = await Bun.file(settingsPath).exists();
310
+ expect(settingsExists).toBe(false);
311
+ });
312
+ });
313
+
314
+ describe("parseTranscript", () => {
315
+ let tempDir: string;
316
+
317
+ beforeEach(async () => {
318
+ tempDir = await mkdtemp(join(tmpdir(), "overstory-copilot-transcript-test-"));
319
+ });
320
+
321
+ afterEach(async () => {
322
+ await cleanupTempDir(tempDir);
323
+ });
324
+
325
+ test("returns null for non-existent file", async () => {
326
+ const result = await runtime.parseTranscript(join(tempDir, "does-not-exist.jsonl"));
327
+ expect(result).toBeNull();
328
+ });
329
+
330
+ test("parses Claude-style transcript (type:assistant, message.usage)", async () => {
331
+ const transcriptPath = join(tempDir, "session.jsonl");
332
+ const entry = JSON.stringify({
333
+ type: "assistant",
334
+ message: {
335
+ model: "claude-sonnet-4-6",
336
+ usage: {
337
+ input_tokens: 100,
338
+ output_tokens: 50,
339
+ },
340
+ },
341
+ });
342
+ await Bun.write(transcriptPath, `${entry}\n`);
343
+
344
+ const result = await runtime.parseTranscript(transcriptPath);
345
+ expect(result).not.toBeNull();
346
+ expect(result?.inputTokens).toBe(100);
347
+ expect(result?.outputTokens).toBe(50);
348
+ expect(result?.model).toBe("claude-sonnet-4-6");
349
+ });
350
+
351
+ test("parses Pi-style transcript (type:message_end, top-level tokens)", async () => {
352
+ const transcriptPath = join(tempDir, "session.jsonl");
353
+ const modelEntry = JSON.stringify({ type: "model_change", model: "gpt-4o" });
354
+ const tokenEntry = JSON.stringify({
355
+ type: "message_end",
356
+ inputTokens: 200,
357
+ outputTokens: 75,
358
+ });
359
+ await Bun.write(transcriptPath, `${modelEntry}\n${tokenEntry}\n`);
360
+
361
+ const result = await runtime.parseTranscript(transcriptPath);
362
+ expect(result).not.toBeNull();
363
+ expect(result?.inputTokens).toBe(200);
364
+ expect(result?.outputTokens).toBe(75);
365
+ expect(result?.model).toBe("gpt-4o");
366
+ });
367
+
368
+ test("aggregates multiple Claude-style turns", async () => {
369
+ const transcriptPath = join(tempDir, "session.jsonl");
370
+ const entry1 = JSON.stringify({
371
+ type: "assistant",
372
+ message: {
373
+ model: "claude-sonnet-4-6",
374
+ usage: { input_tokens: 100, output_tokens: 50 },
375
+ },
376
+ });
377
+ const entry2 = JSON.stringify({
378
+ type: "assistant",
379
+ message: {
380
+ model: "claude-sonnet-4-6",
381
+ usage: { input_tokens: 200, output_tokens: 75 },
382
+ },
383
+ });
384
+ await Bun.write(transcriptPath, `${entry1}\n${entry2}\n`);
385
+
386
+ const result = await runtime.parseTranscript(transcriptPath);
387
+ expect(result?.inputTokens).toBe(300);
388
+ expect(result?.outputTokens).toBe(125);
389
+ });
390
+
391
+ test("aggregates multiple Pi-style turns", async () => {
392
+ const transcriptPath = join(tempDir, "session.jsonl");
393
+ const entry1 = JSON.stringify({ type: "message_end", inputTokens: 100, outputTokens: 40 });
394
+ const entry2 = JSON.stringify({ type: "message_end", inputTokens: 150, outputTokens: 60 });
395
+ await Bun.write(transcriptPath, `${entry1}\n${entry2}\n`);
396
+
397
+ const result = await runtime.parseTranscript(transcriptPath);
398
+ expect(result?.inputTokens).toBe(250);
399
+ expect(result?.outputTokens).toBe(100);
400
+ });
401
+
402
+ test("top-level model field is picked up from any entry", async () => {
403
+ const transcriptPath = join(tempDir, "session.jsonl");
404
+ const modelEntry = JSON.stringify({ model: "copilot-4" });
405
+ const tokenEntry = JSON.stringify({
406
+ type: "assistant",
407
+ message: { usage: { input_tokens: 10, output_tokens: 5 } },
408
+ });
409
+ await Bun.write(transcriptPath, `${modelEntry}\n${tokenEntry}\n`);
410
+
411
+ const result = await runtime.parseTranscript(transcriptPath);
412
+ expect(result?.model).toBe("copilot-4");
413
+ expect(result?.inputTokens).toBe(10);
414
+ });
415
+
416
+ test("message.model takes precedence over top-level model when both present", async () => {
417
+ const transcriptPath = join(tempDir, "session.jsonl");
418
+ const entry = JSON.stringify({
419
+ type: "assistant",
420
+ model: "top-level-model",
421
+ message: {
422
+ model: "message-model",
423
+ usage: { input_tokens: 10, output_tokens: 5 },
424
+ },
425
+ });
426
+ await Bun.write(transcriptPath, `${entry}\n`);
427
+
428
+ const result = await runtime.parseTranscript(transcriptPath);
429
+ // message.model is processed after top-level model in same entry,
430
+ // so message.model wins for assistant entries.
431
+ expect(result?.model).toBe("message-model");
432
+ });
433
+
434
+ test("mixed Claude-style and Pi-style in same transcript", async () => {
435
+ const transcriptPath = join(tempDir, "session.jsonl");
436
+ const claudeEntry = JSON.stringify({
437
+ type: "assistant",
438
+ message: {
439
+ model: "claude-sonnet-4-6",
440
+ usage: { input_tokens: 100, output_tokens: 40 },
441
+ },
442
+ });
443
+ const piEntry = JSON.stringify({
444
+ type: "message_end",
445
+ inputTokens: 50,
446
+ outputTokens: 20,
447
+ });
448
+ await Bun.write(transcriptPath, `${claudeEntry}\n${piEntry}\n`);
449
+
450
+ const result = await runtime.parseTranscript(transcriptPath);
451
+ expect(result?.inputTokens).toBe(150);
452
+ expect(result?.outputTokens).toBe(60);
453
+ });
454
+
455
+ test("skips non-relevant entry types", async () => {
456
+ const transcriptPath = join(tempDir, "session.jsonl");
457
+ const userEntry = JSON.stringify({ type: "user", message: { content: "hello" } });
458
+ const assistantEntry = JSON.stringify({
459
+ type: "assistant",
460
+ message: {
461
+ model: "claude-sonnet-4-6",
462
+ usage: { input_tokens: 50, output_tokens: 25 },
463
+ },
464
+ });
465
+ await Bun.write(transcriptPath, `${userEntry}\n${assistantEntry}\n`);
466
+
467
+ const result = await runtime.parseTranscript(transcriptPath);
468
+ expect(result?.inputTokens).toBe(50);
469
+ expect(result?.outputTokens).toBe(25);
470
+ });
471
+
472
+ test("skips malformed lines and continues parsing", async () => {
473
+ const transcriptPath = join(tempDir, "session.jsonl");
474
+ const goodEntry = JSON.stringify({
475
+ type: "assistant",
476
+ message: { model: "gpt-4o", usage: { input_tokens: 30, output_tokens: 15 } },
477
+ });
478
+ await Bun.write(transcriptPath, `not json at all\n${goodEntry}\n{broken`);
479
+
480
+ const result = await runtime.parseTranscript(transcriptPath);
481
+ expect(result).not.toBeNull();
482
+ expect(result?.inputTokens).toBe(30);
483
+ expect(result?.outputTokens).toBe(15);
484
+ });
485
+
486
+ test("returns zero tokens for empty transcript", async () => {
487
+ const transcriptPath = join(tempDir, "empty.jsonl");
488
+ await Bun.write(transcriptPath, "");
489
+
490
+ const result = await runtime.parseTranscript(transcriptPath);
491
+ expect(result).not.toBeNull();
492
+ expect(result?.inputTokens).toBe(0);
493
+ expect(result?.outputTokens).toBe(0);
494
+ expect(result?.model).toBe("");
495
+ });
496
+ });
497
+ });
498
+
499
+ describe("CopilotRuntime integration: registry resolves 'copilot'", () => {
500
+ test("getRuntime('copilot') returns CopilotRuntime", async () => {
501
+ const { getRuntime } = await import("./registry.ts");
502
+ const rt = getRuntime("copilot");
503
+ expect(rt).toBeInstanceOf(CopilotRuntime);
504
+ expect(rt.id).toBe("copilot");
505
+ expect(rt.instructionPath).toBe(".github/copilot-instructions.md");
506
+ });
507
+ });