fraim-framework 2.0.161 → 2.0.163

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,18 +8,23 @@ exports.parseSeekMentoringSignal = parseSeekMentoringSignal;
8
8
  exports.parseUsageSignal = parseUsageSignal;
9
9
  exports.parseAgentIdentitySignal = parseAgentIdentitySignal;
10
10
  exports.detectEmployees = detectEmployees;
11
+ exports.prepareCodexBrowserHome = prepareCodexBrowserHome;
12
+ exports.sharedBrowserHostConfig = sharedBrowserHostConfig;
11
13
  exports.buildStartPlan = buildStartPlan;
12
14
  exports.buildContinuePlan = buildContinuePlan;
13
15
  exports.supportsDirectPath = supportsDirectPath;
14
16
  exports.buildDirectStartPlan = buildDirectStartPlan;
15
17
  exports.buildDirectContinuePlan = buildDirectContinuePlan;
16
18
  exports.parseHostLine = parseHostLine;
19
+ exports.findGeminiSessionIdForPrompt = findGeminiSessionIdForPrompt;
17
20
  const crypto_1 = require("crypto");
18
21
  const child_process_1 = require("child_process");
19
22
  const fs_1 = __importDefault(require("fs"));
20
23
  const os_1 = __importDefault(require("os"));
21
24
  const path_1 = __importDefault(require("path"));
22
25
  const managed_agent_paths_1 = require("../cli/utils/managed-agent-paths");
26
+ const mcp_config_generator_1 = require("../cli/setup/mcp-config-generator");
27
+ const agent_token_prices_1 = require("../local-mcp-server/agent-token-prices");
23
28
  // Parse a single line of host stdout looking for a seekMentoring tool-use
24
29
  // signal. Returns null if the line does not contain one. Supports both
25
30
  // hosts FRAIM ships against today:
@@ -89,8 +94,9 @@ function parseSeekMentoringSignal(line) {
89
94
  // Issue #347 — extract per-turn usage from the host's JSON stream.
90
95
  // Codex: `{"type":"turn.completed","usage":{input_tokens, cached_input_tokens, output_tokens, reasoning_output_tokens}}`.
91
96
  // Claude Code: `{"type":"result", ..., "usage":{input_tokens, output_tokens, cache_creation_input_tokens, cache_read_input_tokens}, "total_cost_usd": ...}`.
97
+ // Gemini CLI: `{"stats":{"models":{"model-id":{"tokens":{input,prompt,cached,candidates,thoughts,tool}}}}}`.
92
98
  function parseUsageSignal(line) {
93
- if (!line.includes('usage'))
99
+ if (!line.includes('usage') && !line.includes('"stats"'))
94
100
  return null;
95
101
  let parsed;
96
102
  try {
@@ -138,6 +144,62 @@ function parseUsageSignal(line) {
138
144
  costUsd: costUsd ?? undefined,
139
145
  };
140
146
  }
147
+ // Gemini CLI JSON output reports per-model stats. Its `input` bucket is
148
+ // fresh prompt input, `cached` is prompt-cache reads, and `thoughts` are
149
+ // billable output/reasoning tokens.
150
+ if (typeof obj.stats === 'object' && obj.stats !== null) {
151
+ const stats = obj.stats;
152
+ const models = stats.models;
153
+ if (models && typeof models === 'object') {
154
+ let sawTokens = false;
155
+ let nonCachedInputTokens = 0;
156
+ let cachedInputTokens = 0;
157
+ let outputTokens = 0;
158
+ let reasoningTokens = 0;
159
+ let costUsd = 0;
160
+ let hasCompletePricing = true;
161
+ for (const [modelId, modelStats] of Object.entries(models)) {
162
+ if (typeof modelStats !== 'object' || modelStats === null)
163
+ continue;
164
+ const tokens = modelStats.tokens;
165
+ if (!tokens || typeof tokens !== 'object')
166
+ continue;
167
+ const freshInput = numberOrNull(tokens.input) ?? 0;
168
+ const cachedInput = numberOrNull(tokens.cached) ?? 0;
169
+ const candidates = numberOrNull(tokens.candidates) ?? 0;
170
+ const thoughts = numberOrNull(tokens.thoughts) ?? 0;
171
+ const toolTokens = numberOrNull(tokens.tool) ?? 0;
172
+ const modelOutput = candidates + thoughts + toolTokens;
173
+ if (freshInput === 0 && cachedInput === 0 && modelOutput === 0)
174
+ continue;
175
+ sawTokens = true;
176
+ nonCachedInputTokens += freshInput;
177
+ cachedInputTokens += cachedInput;
178
+ outputTokens += modelOutput;
179
+ reasoningTokens += thoughts;
180
+ const price = (0, agent_token_prices_1.lookupPrice)('gemini', modelId.toLowerCase());
181
+ if (price) {
182
+ costUsd +=
183
+ (freshInput / 1_000_000) * price.inputPerMTok +
184
+ (cachedInput / 1_000_000) * price.cacheReadPerMTok +
185
+ (modelOutput / 1_000_000) * price.outputPerMTok;
186
+ }
187
+ else {
188
+ hasCompletePricing = false;
189
+ }
190
+ }
191
+ if (!sawTokens)
192
+ return null;
193
+ return {
194
+ nonCachedInputTokens,
195
+ cachedInputTokens,
196
+ cacheCreationTokens: 0,
197
+ outputTokens,
198
+ reasoningTokens: reasoningTokens || undefined,
199
+ costUsd: hasCompletePricing ? costUsd : undefined,
200
+ };
201
+ }
202
+ }
141
203
  return null;
142
204
  }
143
205
  // Issue #347 — extract agent identity from the fraim_connect tool call.
@@ -212,13 +274,58 @@ function extractSignalFromArgs(args) {
212
274
  const discriminant = typeof args.runDiscriminant === 'string' ? args.runDiscriminant : undefined;
213
275
  const jobName = typeof args.jobName === 'string' ? args.jobName : undefined;
214
276
  const jobId = typeof args.jobId === 'string' ? args.jobId : undefined;
215
- return { phaseId, phaseStatus, findingsText, discriminant, jobName, jobId };
277
+ const reviewHandoff = extractReviewHandoffFromArgs(args);
278
+ return { phaseId, phaseStatus, findingsText, discriminant, jobName, jobId, ...(reviewHandoff ? { reviewHandoff } : {}) };
279
+ }
280
+ function extractReviewHandoffFromArgs(args) {
281
+ const direct = readReviewHandoffCandidate(args.reviewHandoff);
282
+ if (direct)
283
+ return direct;
284
+ const evidence = args.evidence;
285
+ if (evidence && typeof evidence === 'object') {
286
+ return readReviewHandoffCandidate(evidence.reviewHandoff);
287
+ }
288
+ return null;
289
+ }
290
+ function readReviewHandoffCandidate(value) {
291
+ let candidate = value;
292
+ if (typeof candidate === 'string') {
293
+ try {
294
+ candidate = JSON.parse(candidate);
295
+ }
296
+ catch {
297
+ return null;
298
+ }
299
+ }
300
+ if (!candidate || typeof candidate !== 'object' || Array.isArray(candidate))
301
+ return null;
302
+ const obj = candidate;
303
+ if (typeof obj.reviewRequired !== 'boolean')
304
+ return null;
305
+ const artifacts = Array.isArray(obj.artifacts)
306
+ ? obj.artifacts.filter((artifact) => artifact && typeof artifact === 'object')
307
+ : [];
308
+ return {
309
+ reviewRequired: obj.reviewRequired,
310
+ reviewTarget: obj.reviewTarget && typeof obj.reviewTarget === 'object'
311
+ ? obj.reviewTarget
312
+ : null,
313
+ artifacts,
314
+ ...(typeof obj.summary === 'string' ? { summary: obj.summary } : {}),
315
+ ...(typeof obj.feedbackMode === 'string' ? { feedbackMode: obj.feedbackMode } : {}),
316
+ };
216
317
  }
217
318
  const EMPLOYEE_LABELS = {
218
319
  codex: 'Codex',
219
320
  claude: 'Claude Code',
220
321
  gemini: 'Gemini CLI',
322
+ copilot: 'GitHub Copilot CLI',
221
323
  };
324
+ // GitHub Copilot CLI binary name after `npm install -g @github/copilot`.
325
+ // The @github/copilot package installs a binary named `copilot` on PATH.
326
+ // Note: the package name is @github/copilot (NOT @github/copilot-cli which
327
+ // does not exist on npm). The binary is `copilot` (NOT `github-copilot-cli`).
328
+ const COPILOT_BINARY = 'copilot';
222
329
  const executableName = (command) => command;
223
330
  function quoteWindowsArg(value) {
224
331
  if (value.length === 0) {
@@ -247,9 +354,15 @@ const availableByVersionProbe = (command) => {
247
354
  });
248
355
  return result.status === 0;
249
356
  };
357
+ // Resolve the binary name for each agent tool.
358
+ function agentBinaryName(id) {
359
+ if (id === 'copilot')
360
+ return COPILOT_BINARY;
361
+ return executableName(id);
362
+ }
250
363
  function detectEmployees() {
251
364
  return Object.keys(EMPLOYEE_LABELS).map((id) => {
252
- const available = availableByVersionProbe(executableName(id));
365
+ const available = availableByVersionProbe(agentBinaryName(id));
253
366
  return {
254
367
  id,
255
368
  label: EMPLOYEE_LABELS[id],
@@ -302,8 +415,40 @@ function transformHeadlessFraimMessage(message, kind) {
302
415
  if (parsed.remainder) {
303
416
  parts.push(`\n\nManager instructions: ${parsed.remainder}`);
304
417
  }
418
+ const storageGuard = machineLevelStorageGuard(parsed.jobId);
419
+ if (storageGuard) {
420
+ parts.push(`\n\n${storageGuard}`);
421
+ }
305
422
  return parts.join('');
306
423
  }
424
+ function machineLevelStorageGuard(jobId) {
425
+ const normalized = jobId.toLowerCase();
426
+ const userFraim = path_1.default.join(os_1.default.homedir(), '.fraim');
427
+ if (normalized === 'manager-agreements') {
428
+ const managerContext = path_1.default.join(userFraim, 'personalized-employee', 'context', 'manager_context.md');
429
+ const managerRules = path_1.default.join(userFraim, 'personalized-employee', 'rules', 'manager_rules.md');
430
+ return [
431
+ 'Storage scope guardrail:',
432
+ '- Manager agreements artifacts are machine-level, not repo-level.',
433
+ `- Required write targets: ${managerContext} and ${managerRules}.`,
434
+ '- Keep the split crisp: manager_context.md is what is true about the manager; manager_rules.md is how employees must behave because of those truths.',
435
+ '- Do not write, validate, call canonical, commit, or open a PR for repo-local fraim/personalized-employee/context/manager_context.md or fraim/personalized-employee/rules/manager_rules.md as substitutes.',
436
+ '- If the exact machine-level paths cannot be written, fail the phase and report the concrete filesystem error.',
437
+ ].join('\n');
438
+ }
439
+ if (normalized === 'organization-onboarding') {
440
+ const orgContext = path_1.default.join(userFraim, 'personalized-employee', 'context', 'org_context.md');
441
+ const orgRules = path_1.default.join(userFraim, 'personalized-employee', 'rules', 'org_rules.md');
442
+ return [
443
+ 'Storage scope guardrail:',
444
+ '- Organization onboarding artifacts are machine-level, not repo-level.',
445
+ `- Required write targets: ${orgContext} and ${orgRules}.`,
446
+ '- Do not write, validate, call canonical, commit, or open a PR for repo-local fraim/personalized-employee/context/org_context.md or fraim/personalized-employee/rules/org_rules.md as substitutes.',
447
+ '- If the exact machine-level paths cannot be written, fail the phase and report the concrete filesystem error.',
448
+ ].join('\n');
449
+ }
450
+ return null;
451
+ }
307
452
  // If ~/.gemini/settings.json has a wrong/test FRAIM_API_KEY, patch it with the
308
453
  // real key from ~/.fraim/config.json so the FRAIM MCP server can authenticate.
309
454
  // This self-heals when a test run accidentally writes a test key to global config.
@@ -329,49 +474,201 @@ function ensureGeminiApiKey() {
329
474
  }
330
475
  catch { /* best-effort: never crash the Hub over a config patch */ }
331
476
  }
332
- function buildStartPlan(hostId, message) {
477
+ // Build (idempotently) a Hub-managed CODEX_HOME so `codex exec` drives the shared
478
+ // browser. config.toml = user's real config + a playwright→cdp override; auth is
479
+ // copied; the sessions dir is junctioned to the real one so resume + new sessions
480
+ // keep working. The user's real ~/.codex is never modified. Returns the temp home
481
+ // path, or null on any failure (caller then leaves Codex on its own browser).
482
+ function prepareCodexBrowserHome(cdp, env = process.env) {
483
+ try {
484
+ const real = env['CODEX_HOME'] || path_1.default.join(os_1.default.homedir(), '.codex');
485
+ // Derive the temp-home path from the real home so different real homes (e.g.
486
+ // a test's fake home vs the user's ~/.codex) never share — and pollute — one
487
+ // temp home (a stale sessions junction breaks resume).
488
+ const homeKey = (0, crypto_1.createHash)('sha1').update(real).digest('hex').slice(0, 12);
489
+ const home = path_1.default.join(os_1.default.tmpdir(), 'fraim-codex-home-' + homeKey);
490
+ fs_1.default.mkdirSync(home, { recursive: true });
491
+ // config.toml = real config with the playwright server redirected to the CDP endpoint.
492
+ const realConfig = path_1.default.join(real, 'config.toml');
493
+ const existing = fs_1.default.existsSync(realConfig) ? fs_1.default.readFileSync(realConfig, 'utf8') : '';
494
+ const pwBlock = `[mcp_servers.playwright]\ncommand = "npx"\nargs = ["-y", "@playwright/mcp@latest", "--cdp-endpoint", "${cdp}"]\n`;
495
+ const merged = (0, mcp_config_generator_1.mergeTomlMCPServers)(existing, pwBlock, ['playwright']).content;
496
+ fs_1.default.writeFileSync(path_1.default.join(home, 'config.toml'), merged, 'utf8');
497
+ // Auth + the session index (so resume can find existing rollouts by thread id).
498
+ for (const f of ['auth.json', 'session_index.jsonl', 'history.jsonl']) {
499
+ const src = path_1.default.join(real, f);
500
+ if (fs_1.default.existsSync(src)) {
501
+ try {
502
+ fs_1.default.copyFileSync(src, path_1.default.join(home, f));
503
+ }
504
+ catch { /* best effort */ }
505
+ }
506
+ }
507
+ // Sessions: junction temp/sessions -> real/sessions so resume (existing sessions)
508
+ // works and new sessions persist alongside the user's. Re-point if the existing
509
+ // junction targets the wrong dir.
510
+ const realSessions = path_1.default.join(real, 'sessions');
511
+ fs_1.default.mkdirSync(realSessions, { recursive: true });
512
+ const tmpSessions = path_1.default.join(home, 'sessions');
513
+ try {
514
+ let ok = false;
515
+ if (fs_1.default.existsSync(tmpSessions)) {
516
+ const st = fs_1.default.lstatSync(tmpSessions);
517
+ if (st.isSymbolicLink()) {
518
+ ok = path_1.default.resolve(fs_1.default.readlinkSync(tmpSessions)) === path_1.default.resolve(realSessions);
519
+ if (!ok)
520
+ fs_1.default.unlinkSync(tmpSessions); // wrong target → drop the junction (not the target)
521
+ }
522
+ }
523
+ if (!ok && !fs_1.default.existsSync(tmpSessions))
524
+ fs_1.default.symlinkSync(realSessions, tmpSessions, 'junction');
525
+ }
526
+ catch { /* junction best-effort; new runs still work without resuming old sessions */ }
527
+ return home;
528
+ }
529
+ catch {
530
+ return null;
531
+ }
532
+ }
533
+ function sharedBrowserHostConfig(hostId, env = process.env) {
534
+ const cdp = env['FRAIM_BROWSER_CDP_ENDPOINT'];
535
+ if (!cdp)
536
+ return { args: [] };
537
+ const pwArgs = ['-y', '@playwright/mcp@latest', '--cdp-endpoint', cdp];
538
+ const mcpServers = { playwright: { command: 'npx', args: pwArgs } };
539
+ if (hostId === 'claude') {
540
+ // Claude Code's --mcp-config takes a FILE path (inline JSON is rejected by the
541
+ // CLI's schema). Write the ephemeral config to a temp file and pass its path —
542
+ // per-invocation; never touches the user's persisted ~/.claude.json.
543
+ const file = path_1.default.join(os_1.default.tmpdir(), 'fraim-shared-browser-mcp.json');
544
+ try {
545
+ fs_1.default.writeFileSync(file, JSON.stringify({ mcpServers }), 'utf8');
546
+ }
547
+ catch {
548
+ return { args: [] };
549
+ }
550
+ return { args: ['--mcp-config', file] };
551
+ }
333
552
  if (hostId === 'codex') {
553
+ // Codex `exec` ignores -c overrides of mcp_servers, so we point CODEX_HOME at a
554
+ // Hub-managed temp dir whose config.toml merges the user's real config with a
555
+ // playwright→cdp override. Auth is copied and the sessions dir is junctioned to
556
+ // ~/.codex/sessions, so resume (incl. existing sessions) still works and new
557
+ // sessions persist there. The user's real ~/.codex/config.toml is never touched.
558
+ const home = prepareCodexBrowserHome(cdp, env);
559
+ return home ? { args: [], env: { CODEX_HOME: home } } : { args: [] };
560
+ }
561
+ if (hostId === 'gemini') {
562
+ // Gemini CLI has no per-invocation MCP flag, but it loads a SYSTEM settings
563
+ // file from GEMINI_CLI_SYSTEM_SETTINGS_PATH which overrides the same-named
564
+ // server. Point it at an ephemeral temp file — per-invocation via env; the
565
+ // user's ~/.gemini/settings.json is untouched.
566
+ const file = path_1.default.join(os_1.default.tmpdir(), 'fraim-gemini-browser-settings.json');
567
+ try {
568
+ fs_1.default.writeFileSync(file, JSON.stringify({ mcpServers }), 'utf8');
569
+ }
570
+ catch {
571
+ return { args: [] };
572
+ }
573
+ return { args: [], env: { GEMINI_CLI_SYSTEM_SETTINGS_PATH: file } };
574
+ }
575
+ if (hostId === 'copilot') {
576
+ // GitHub Copilot CLI does not yet publish a documented per-invocation
577
+ // settings-file env var analogous to GEMINI_CLI_SYSTEM_SETTINGS_PATH.
578
+ // If one is discovered in a future release, write the ephemeral file here
579
+ // and return { args: [], env: { <COPILOT_SETTINGS_ENV_VAR>: file } }.
580
+ // Until then, return the Option-B no-op per spec R5.2 — the Hub's
581
+ // start-payload builder will inject a browser-guidance note instead.
582
+ return { args: [] };
583
+ }
584
+ return { args: [] };
585
+ }
586
+ function buildStartPlan(hostId, message, sessionId) {
587
+ if (hostId === 'codex') {
588
+ const browser = sharedBrowserHostConfig('codex');
334
589
  return {
335
590
  command: executableName('codex'),
336
- args: ['exec', '--json', '--skip-git-repo-check', '--dangerously-bypass-approvals-and-sandbox'],
591
+ args: ['exec', '--json', '--skip-git-repo-check', '--dangerously-bypass-approvals-and-sandbox', ...browser.args],
337
592
  stdin: transformHeadlessFraimMessage(message, 'start'),
593
+ env: browser.env,
338
594
  };
339
595
  }
340
596
  if (hostId === 'gemini') {
341
597
  ensureGeminiApiKey();
598
+ const prompt = transformHeadlessFraimMessage(message, 'start');
599
+ const browser = sharedBrowserHostConfig('gemini');
342
600
  return {
343
601
  command: executableName('gemini'),
344
- args: ['--yolo', '--skip-trust'],
602
+ // Gemini CLI creates the durable session id itself. Hub captures
603
+ // that real id from Gemini's chat log after start; pre-seeded UUIDs
604
+ // are not reliably accepted by `gemini --resume`.
605
+ args: ['--yolo', '--skip-trust', '-p', ' ', ...browser.args],
606
+ stdin: prompt,
607
+ env: browser.env,
608
+ };
609
+ }
610
+ if (hostId === 'copilot') {
611
+ // GitHub Copilot CLI headless invocation.
612
+ // --yolo auto-approves all tool permissions (analogous to
613
+ // --dangerously-skip-permissions for Claude Code). The task is provided
614
+ // via stdin; -p/--prompt requires inline text which is cumbersome for
615
+ // multi-line FRAIM instructions. The session id is self-assigned by the
616
+ // binary on first run; Hub captures it from the stream output
617
+ // (parseHostLine 'copilot' branch).
618
+ const browser = sharedBrowserHostConfig('copilot');
619
+ return {
620
+ command: COPILOT_BINARY,
621
+ args: ['--yolo', ...browser.args],
345
622
  stdin: transformHeadlessFraimMessage(message, 'start'),
623
+ env: browser.env,
346
624
  };
347
625
  }
626
+ const browser = sharedBrowserHostConfig('claude');
348
627
  return {
349
628
  command: executableName('claude'),
350
- args: ['-p', '--verbose', '--output-format', 'stream-json', '--dangerously-skip-permissions'],
629
+ args: ['-p', '--verbose', '--output-format', 'stream-json', '--dangerously-skip-permissions', ...browser.args],
351
630
  stdin: transformHeadlessFraimMessage(message, 'start'),
631
+ env: browser.env,
352
632
  };
353
633
  }
354
634
  function buildContinuePlan(hostId, sessionId, message) {
355
635
  if (hostId === 'codex') {
636
+ const browser = sharedBrowserHostConfig('codex');
356
637
  return {
357
638
  command: executableName('codex'),
358
- args: ['exec', 'resume', '--json', '--skip-git-repo-check', '--dangerously-bypass-approvals-and-sandbox', sessionId],
639
+ args: ['exec', 'resume', '--json', '--skip-git-repo-check', '--dangerously-bypass-approvals-and-sandbox', sessionId, ...browser.args],
359
640
  stdin: transformHeadlessFraimMessage(message, 'continue'),
641
+ env: browser.env,
360
642
  };
361
643
  }
362
644
  if (hostId === 'gemini') {
363
- // Gemini CLI does not have a native session-resume flag; each message
364
- // is sent as a fresh invocation. The Hub still tracks state client-side.
645
+ ensureGeminiApiKey();
646
+ const prompt = transformHeadlessFraimMessage(message, 'continue');
647
+ const browser = sharedBrowserHostConfig('gemini');
365
648
  return {
366
649
  command: executableName('gemini'),
367
- args: ['--yolo', '--skip-trust'],
650
+ args: ['--resume', sessionId, '--yolo', '--skip-trust', '-p', ' ', ...browser.args],
651
+ stdin: prompt,
652
+ env: browser.env,
653
+ };
654
+ }
655
+ if (hostId === 'copilot') {
656
+ // Resume an existing GitHub Copilot CLI session.
657
+ // --resume <sessionId> accepts the session id returned on the first run.
658
+ const browser = sharedBrowserHostConfig('copilot');
659
+ return {
660
+ command: COPILOT_BINARY,
661
+ args: ['--yolo', '--resume', sessionId, ...browser.args],
368
662
  stdin: transformHeadlessFraimMessage(message, 'continue'),
663
+ env: browser.env,
369
664
  };
370
665
  }
666
+ const browser = sharedBrowserHostConfig('claude');
371
667
  return {
372
668
  command: executableName('claude'),
373
- args: ['-p', '--verbose', '--output-format', 'stream-json', '--dangerously-skip-permissions', '-r', sessionId],
669
+ args: ['-p', '--verbose', '--output-format', 'stream-json', '--dangerously-skip-permissions', '-r', sessionId, ...browser.args],
374
670
  stdin: transformHeadlessFraimMessage(message, 'continue'),
671
+ env: browser.env,
375
672
  };
376
673
  }
377
674
  // Issue #442: all agents support a direct-path invocation (no FRAIM, no
@@ -401,7 +698,7 @@ const DIRECT_PREAMBLE = 'DO NOT USE FRAIM FOR THIS SESSION. No phases, no seekMe
401
698
  // Issue #442: builds a CLI plan for the Direct (B) side of an A/B run.
402
699
  // All agents supported: Codex and Gemini run raw (no FRAIM preamble);
403
700
  // Claude uses --strict-mcp-config + --append-system-prompt for full isolation.
404
- function buildDirectStartPlan(hostId, message) {
701
+ function buildDirectStartPlan(hostId, message, sessionId) {
405
702
  if (hostId === 'codex') {
406
703
  return {
407
704
  command: executableName('codex'),
@@ -413,7 +710,15 @@ function buildDirectStartPlan(hostId, message) {
413
710
  ensureGeminiApiKey();
414
711
  return {
415
712
  command: executableName('gemini'),
416
- args: ['--yolo', '--skip-trust'],
713
+ args: ['--yolo', '--skip-trust', '-p', ' '],
714
+ stdin: DIRECT_PREAMBLE + message,
715
+ };
716
+ }
717
+ if (hostId === 'copilot') {
718
+ // Direct (A/B) mode for Copilot: headless, no FRAIM MCP wiring.
719
+ return {
720
+ command: COPILOT_BINARY,
721
+ args: ['--yolo'],
417
722
  stdin: DIRECT_PREAMBLE + message,
418
723
  };
419
724
  }
@@ -443,7 +748,15 @@ function buildDirectContinuePlan(hostId, sessionId, message) {
443
748
  ensureGeminiApiKey();
444
749
  return {
445
750
  command: executableName('gemini'),
446
- args: ['--yolo', '--skip-trust'],
751
+ args: ['--resume', sessionId, '--yolo', '--skip-trust', '-p', ' '],
752
+ stdin: DIRECT_PREAMBLE + message,
753
+ };
754
+ }
755
+ if (hostId === 'copilot') {
756
+ // Direct continue mode for Copilot: resume session, no FRAIM MCP wiring.
757
+ return {
758
+ command: COPILOT_BINARY,
759
+ args: ['--yolo', '--resume', sessionId],
447
760
  stdin: DIRECT_PREAMBLE + message,
448
761
  };
449
762
  }
@@ -502,10 +815,42 @@ function parseHostLine(hostId, line) {
502
815
  // message so it still surfaces in the Hub timeline.
503
816
  if (hostId === 'gemini') {
504
817
  try {
505
- JSON.parse(trimmed); // validate JSON — if this throws, fall through to plain text
818
+ const parsed = JSON.parse(trimmed);
819
+ if (typeof parsed.session_id === 'string' && parsed.session_id.trim().length > 0) {
820
+ return withSignal({ sessionId: parsed.session_id.trim(), raw: trimmed });
821
+ }
822
+ return withSignal({ raw: trimmed });
823
+ }
824
+ catch {
825
+ if (isGeminiCliNotice(trimmed)) {
826
+ return withSignal({ raw: trimmed });
827
+ }
828
+ return withSignal({ message: trimmed, raw: trimmed });
829
+ }
830
+ }
831
+ // GitHub Copilot CLI output: JSON stream where each event carries a `type`
832
+ // field. Known event shapes (from the agentic CLI stream):
833
+ // { "type": "session.started", "session_id": "..." } — session id
834
+ // { "type": "message", "role": "assistant", "content": "..." } — reply text
835
+ // { "type": "turn.completed", "usage": { ... } } — token usage (same shape as Codex)
836
+ // For any JSON event not matching the above, signal scanning (seekMentoring,
837
+ // agent identity) still runs because withSignal is applied to every parsed result.
838
+ // Non-JSON lines from Copilot are treated as plain-text employee messages.
839
+ if (hostId === 'copilot') {
840
+ try {
841
+ const parsed = JSON.parse(trimmed);
842
+ if (parsed.type === 'session.started' && typeof parsed.session_id === 'string' && parsed.session_id.length > 0) {
843
+ return withSignal({ sessionId: parsed.session_id, raw: trimmed });
844
+ }
845
+ if (parsed.type === 'message' && parsed.role === 'assistant' && typeof parsed.content === 'string') {
846
+ return withSignal({ message: parsed.content, raw: trimmed });
847
+ }
848
+ // All other JSON events: apply signal scanning and surface as raw.
506
849
  return withSignal({ raw: trimmed });
507
850
  }
508
851
  catch {
852
+ // Non-JSON line from Copilot: treat as a plain-text employee message,
853
+ // same pattern as Gemini CLI's non-JSON output.
509
854
  return withSignal({ message: trimmed, raw: trimmed });
510
855
  }
511
856
  }
@@ -531,6 +876,10 @@ function parseHostLine(hostId, line) {
531
876
  return withSignal({ raw: trimmed });
532
877
  }
533
878
  }
879
+ function isGeminiCliNotice(line) {
880
+ return line === 'YOLO mode is enabled. All tool calls will be automatically approved.' ||
881
+ line === 'Ripgrep is not available. Falling back to GrepTool.';
882
+ }
534
883
  function wireHostProcess(hostId, child, handlers) {
535
884
  const wire = (buffer, channel) => {
536
885
  let pending = '';
@@ -561,6 +910,7 @@ function wireHostProcess(hostId, child, handlers) {
561
910
  }
562
911
  function spawnHostProcess(hostId, plan, projectPath, handlers) {
563
912
  const invocation = resolveHostInvocation(plan);
913
+ const startedAtMs = Date.now();
564
914
  const child = (0, child_process_1.spawn)(invocation.command, invocation.args, {
565
915
  cwd: projectPath,
566
916
  stdio: ['pipe', 'pipe', 'pipe'],
@@ -570,20 +920,114 @@ function spawnHostProcess(hostId, plan, projectPath, handlers) {
570
920
  child.stdin.write(plan.stdin);
571
921
  }
572
922
  child.stdin.end();
923
+ if (typeof plan.stdin === 'string' && !plan.args.includes('--resume')) {
924
+ child.once('close', () => {
925
+ const sessionId = discoverSessionIdAfterStart(hostId, projectPath, plan.stdin || '', startedAtMs);
926
+ if (sessionId) {
927
+ handlers.onEvent({ sessionId, raw: `${hostId}-session:${sessionId}` }, 'system');
928
+ }
929
+ });
930
+ }
573
931
  return wireHostProcess(hostId, child, handlers);
574
932
  }
933
+ function discoverSessionIdAfterStart(hostId, projectPath, prompt, startedAtMs) {
934
+ if (hostId !== 'gemini')
935
+ return null;
936
+ return findGeminiSessionIdForPrompt(projectPath, prompt, startedAtMs);
937
+ }
938
+ function findGeminiSessionIdForPrompt(_projectPath, prompt, startedAtMs) {
939
+ const promptNeedle = normalizeGeminiPromptForMatch(prompt).slice(0, 160);
940
+ const records = readGeminiSessionRecords();
941
+ const recent = records
942
+ .filter((record) => record.updatedAtMs >= startedAtMs - 10_000)
943
+ .filter((record) => {
944
+ if (!promptNeedle)
945
+ return true;
946
+ const userText = normalizeGeminiPromptForMatch(record.userText);
947
+ return userText.includes(promptNeedle) || promptNeedle.includes(userText.slice(0, 80));
948
+ })
949
+ .sort((a, b) => b.updatedAtMs - a.updatedAtMs);
950
+ if (recent[0])
951
+ return recent[0].sessionId;
952
+ const fallback = records
953
+ .filter((record) => record.updatedAtMs >= startedAtMs - 10_000)
954
+ .sort((a, b) => b.updatedAtMs - a.updatedAtMs)[0];
955
+ return fallback?.sessionId || null;
956
+ }
957
+ function readGeminiSessionRecords() {
958
+ const root = path_1.default.join(os_1.default.homedir(), '.gemini', 'tmp');
959
+ if (!fs_1.default.existsSync(root))
960
+ return [];
961
+ const records = [];
962
+ for (const filePath of collectGeminiSessionFiles(root)) {
963
+ try {
964
+ const stat = fs_1.default.statSync(filePath);
965
+ const lines = fs_1.default.readFileSync(filePath, 'utf8').split(/\r?\n/).filter(Boolean);
966
+ const metadata = JSON.parse(lines[0] || '{}');
967
+ const sessionId = typeof metadata.sessionId === 'string' ? metadata.sessionId.trim() : '';
968
+ if (!sessionId)
969
+ continue;
970
+ const userText = lines
971
+ .slice(1, 8)
972
+ .map((line) => {
973
+ try {
974
+ const entry = JSON.parse(line);
975
+ if (entry.type !== 'user' || !Array.isArray(entry.content))
976
+ return '';
977
+ return entry.content.map((part) => typeof part.text === 'string' ? part.text : '').join('\n');
978
+ }
979
+ catch {
980
+ return '';
981
+ }
982
+ })
983
+ .filter(Boolean)
984
+ .join('\n');
985
+ records.push({ sessionId, filePath, updatedAtMs: stat.mtimeMs, userText });
986
+ }
987
+ catch {
988
+ // Ignore malformed or concurrently-written session files.
989
+ }
990
+ }
991
+ return records;
992
+ }
993
+ function collectGeminiSessionFiles(root) {
994
+ const files = [];
995
+ const visit = (dir) => {
996
+ let entries;
997
+ try {
998
+ entries = fs_1.default.readdirSync(dir, { withFileTypes: true });
999
+ }
1000
+ catch {
1001
+ return;
1002
+ }
1003
+ for (const entry of entries) {
1004
+ const fullPath = path_1.default.join(dir, entry.name);
1005
+ if (entry.isDirectory()) {
1006
+ visit(fullPath);
1007
+ }
1008
+ else if (/^session-.*\.jsonl$/i.test(entry.name)) {
1009
+ files.push(fullPath);
1010
+ }
1011
+ }
1012
+ };
1013
+ visit(root);
1014
+ return files;
1015
+ }
1016
+ function normalizeGeminiPromptForMatch(value) {
1017
+ return String(value || '').replace(/\s+/g, ' ').trim();
1018
+ }
575
1019
  class CliHostRuntime {
576
1020
  detectEmployees() {
577
1021
  return detectEmployees();
578
1022
  }
579
- startRun(hostId, projectPath, message, handlers) {
580
- return spawnHostProcess(hostId, buildStartPlan(hostId, message), projectPath, handlers);
1023
+ startRun(hostId, projectPath, message, handlers, sessionId) {
1024
+ return spawnHostProcess(hostId, buildStartPlan(hostId, message, sessionId), projectPath, handlers);
581
1025
  }
582
1026
  continueRun(hostId, projectPath, sessionId, message, handlers) {
583
1027
  return spawnHostProcess(hostId, buildContinuePlan(hostId, sessionId, message), projectPath, handlers);
584
1028
  }
585
- startDirectRun(hostId, message, projectPath, handlers) {
586
- return spawnHostProcess(hostId, buildDirectStartPlan(hostId, message), projectPath, handlers);
1029
+ startDirectRun(hostId, message, projectPath, handlers, sessionId) {
1030
+ return spawnHostProcess(hostId, buildDirectStartPlan(hostId, message, sessionId), projectPath, handlers);
587
1031
  }
588
1032
  continueDirectRun(hostId, sessionId, message, projectPath, handlers) {
589
1033
  return spawnHostProcess(hostId, buildDirectContinuePlan(hostId, sessionId, message), projectPath, handlers);
@@ -596,18 +1040,21 @@ class FakeHostRuntime {
596
1040
  { id: 'codex', label: 'Codex', available: true, detail: 'Test double employee.', supportsRaw: true },
597
1041
  { id: 'claude', label: 'Claude Code', available: true, detail: 'Test double employee.', supportsRaw: true },
598
1042
  { id: 'gemini', label: 'Gemini CLI', available: true, detail: 'Test double employee.', supportsRaw: true },
1043
+ { id: 'copilot', label: 'GitHub Copilot CLI', available: true, detail: 'Test double agent tool.', supportsRaw: true },
599
1044
  ];
600
1045
  }
601
1046
  detectEmployees() {
602
1047
  return this.employees;
603
1048
  }
604
- startRun(hostId, _projectPath, message, handlers) {
1049
+ startRun(hostId, _projectPath, message, handlers, _sessionId) {
1050
+ this.lastStartMessage = message;
605
1051
  return this.fakeProcess(hostId, this.fakeEmployeeReply('start', message), handlers);
606
1052
  }
607
1053
  continueRun(hostId, _projectPath, sessionId, message, handlers) {
1054
+ this.lastContinueMessage = message;
608
1055
  return this.fakeProcess(hostId, this.fakeEmployeeReply('continue', message), handlers);
609
1056
  }
610
- startDirectRun(hostId, _message, _projectPath, handlers) {
1057
+ startDirectRun(hostId, _message, _projectPath, handlers, _sessionId) {
611
1058
  return this.fakeProcess(hostId, 'Understood. Working directly on that now.', handlers);
612
1059
  }
613
1060
  continueDirectRun(hostId, _sessionId, _message, _projectPath, handlers) {
@@ -676,6 +1123,7 @@ class ScriptedHostRuntime {
676
1123
  { id: 'codex', label: 'Codex', available: true, detail: 'Scripted test double.', supportsRaw: true },
677
1124
  { id: 'claude', label: 'Claude Code', available: true, detail: 'Scripted test double.', supportsRaw: true },
678
1125
  { id: 'gemini', label: 'Gemini CLI', available: true, detail: 'Scripted test double.', supportsRaw: true },
1126
+ { id: 'copilot', label: 'GitHub Copilot CLI', available: true, detail: 'Scripted test double.', supportsRaw: true },
679
1127
  ];
680
1128
  // Track each active run so the test can emit signals at it. Key is the
681
1129
  // sessionId we hand back on startRun; mapping sessionId → handlers
@@ -688,8 +1136,8 @@ class ScriptedHostRuntime {
688
1136
  detectEmployees() {
689
1137
  return this.employees;
690
1138
  }
691
- startRun(_hostId, _projectPath, _message, handlers) {
692
- const sessionId = (0, crypto_1.randomUUID)();
1139
+ startRun(_hostId, _projectPath, _message, handlers, requestedSessionId) {
1140
+ const sessionId = requestedSessionId || (0, crypto_1.randomUUID)();
693
1141
  handlers.onEvent({ sessionId, raw: 'scripted-session-start' }, 'system');
694
1142
  this.handlersBySession.set(sessionId, handlers);
695
1143
  return this.spawnDouble();
@@ -699,8 +1147,8 @@ class ScriptedHostRuntime {
699
1147
  handlers.onEvent({ sessionId, raw: 'scripted-session-resume' }, 'system');
700
1148
  return this.spawnDouble();
701
1149
  }
702
- startDirectRun(_hostId, _message, _projectPath, handlers) {
703
- const sessionId = (0, crypto_1.randomUUID)();
1150
+ startDirectRun(_hostId, _message, _projectPath, handlers, requestedSessionId) {
1151
+ const sessionId = requestedSessionId || (0, crypto_1.randomUUID)();
704
1152
  handlers.onEvent({ sessionId, raw: 'scripted-direct-session-start' }, 'system');
705
1153
  return this.spawnDouble();
706
1154
  }