@phnx-labs/agents-cli 1.20.17 → 1.20.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +1 -1
  3. package/dist/commands/budget.d.ts +14 -0
  4. package/dist/commands/budget.js +137 -0
  5. package/dist/commands/cost.d.ts +12 -0
  6. package/dist/commands/cost.js +139 -0
  7. package/dist/commands/exec.d.ts +20 -0
  8. package/dist/commands/exec.js +382 -5
  9. package/dist/commands/secrets.d.ts +15 -0
  10. package/dist/commands/secrets.js +250 -4
  11. package/dist/commands/sessions.js +4 -0
  12. package/dist/index.js +4 -0
  13. package/dist/lib/budget/config.d.ts +9 -0
  14. package/dist/lib/budget/config.js +115 -0
  15. package/dist/lib/budget/enforce.d.ts +94 -0
  16. package/dist/lib/budget/enforce.js +151 -0
  17. package/dist/lib/budget/ledger.d.ts +61 -0
  18. package/dist/lib/budget/ledger.js +107 -0
  19. package/dist/lib/budget/preflight.d.ts +110 -0
  20. package/dist/lib/budget/preflight.js +200 -0
  21. package/dist/lib/checkpoint.d.ts +54 -0
  22. package/dist/lib/checkpoint.js +56 -0
  23. package/dist/lib/cloud/rush.js +18 -0
  24. package/dist/lib/exec.d.ts +36 -0
  25. package/dist/lib/exec.js +192 -4
  26. package/dist/lib/git.d.ts +18 -0
  27. package/dist/lib/git.js +67 -4
  28. package/dist/lib/loop.d.ts +145 -0
  29. package/dist/lib/loop.js +330 -0
  30. package/dist/lib/mcp.d.ts +7 -0
  31. package/dist/lib/mcp.js +24 -0
  32. package/dist/lib/models.d.ts +11 -0
  33. package/dist/lib/models.js +21 -0
  34. package/dist/lib/plugins.js +5 -2
  35. package/dist/lib/pricing/cost.d.ts +46 -0
  36. package/dist/lib/pricing/cost.js +71 -0
  37. package/dist/lib/pricing/index.d.ts +8 -0
  38. package/dist/lib/pricing/index.js +8 -0
  39. package/dist/lib/pricing/prices.json +138 -0
  40. package/dist/lib/pricing/table.d.ts +17 -0
  41. package/dist/lib/pricing/table.js +73 -0
  42. package/dist/lib/secrets/Agents CLI.app/Contents/CodeResources +0 -0
  43. package/dist/lib/secrets/Agents CLI.app/Contents/MacOS/Agents CLI +0 -0
  44. package/dist/lib/secrets/agent.d.ts +134 -0
  45. package/dist/lib/secrets/agent.js +501 -0
  46. package/dist/lib/secrets/bundles.d.ts +21 -0
  47. package/dist/lib/secrets/bundles.js +43 -0
  48. package/dist/lib/session/db.d.ts +40 -0
  49. package/dist/lib/session/db.js +84 -2
  50. package/dist/lib/session/discover.d.ts +2 -0
  51. package/dist/lib/session/discover.js +126 -2
  52. package/dist/lib/session/render.d.ts +2 -0
  53. package/dist/lib/session/render.js +1 -1
  54. package/dist/lib/session/types.d.ts +4 -0
  55. package/dist/lib/teams/agents.d.ts +32 -0
  56. package/dist/lib/teams/agents.js +66 -3
  57. package/dist/lib/teams/api.js +20 -0
  58. package/dist/lib/teams/parsers.js +16 -4
  59. package/dist/lib/types.d.ts +48 -0
  60. package/dist/lib/workflows.d.ts +56 -0
  61. package/dist/lib/workflows.js +72 -5
  62. package/package.json +2 -1
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Harness-level loop checkpoint (issue #332).
3
+ *
4
+ * A checkpoint is the durable harness state for a `--loop` run: it records the
5
+ * iteration count, the pinned session id, the prompt being re-injected, and the
6
+ * loop config — everything `--resume-checkpoint` needs to continue a run that a
7
+ * SIGTERM, timeout, or machine sleep killed mid-flight.
8
+ *
9
+ * This is NOT provider-side state. `--session-id` resumes Claude's *conversation*
10
+ * (server-side); a checkpoint resumes the *harness* (iteration count, loop
11
+ * variables, prompt chain) — the part Claude's own resume cannot recover.
12
+ *
13
+ * Atomic write (temp + rename) mirrors `writeRunMeta` in routines.ts so a crash
14
+ * mid-write never leaves a half-written checkpoint that `readCheckpoint` would
15
+ * choke on. `readCheckpoint` returns null on a missing or corrupt file (mirrors
16
+ * `readRunMeta`) — a corrupt checkpoint is a "start fresh", never a throw.
17
+ */
18
+ import * as fs from 'fs';
19
+ import * as path from 'path';
20
+ import { getRunsDir } from './state.js';
21
+ /** Path to a run's checkpoint file: <runsDir>/<runId>/checkpoint.json. */
22
+ export function checkpointPath(runId) {
23
+ return path.join(getRunsDir(), runId, 'checkpoint.json');
24
+ }
25
+ /**
26
+ * Write a checkpoint atomically (temp file + rename). The rename is atomic on a
27
+ * single filesystem, so a reader never observes a partially written file.
28
+ * Mirrors the durable-write contract of `writeRunMeta`.
29
+ */
30
+ export function writeCheckpoint(c, file) {
31
+ const target = file ?? checkpointPath(c.id);
32
+ fs.mkdirSync(path.dirname(target), { recursive: true });
33
+ const tmp = `${target}.${process.pid}.tmp`;
34
+ fs.writeFileSync(tmp, JSON.stringify(c, null, 2), 'utf-8');
35
+ fs.renameSync(tmp, target);
36
+ }
37
+ /**
38
+ * Read a checkpoint from disk. Returns null if the file is missing or its
39
+ * contents are not valid JSON — corruption means "no resumable state", which
40
+ * the caller treats as a fresh start. Mirrors `readRunMeta`.
41
+ */
42
+ export function readCheckpoint(file) {
43
+ if (!fs.existsSync(file))
44
+ return null;
45
+ try {
46
+ const parsed = JSON.parse(fs.readFileSync(file, 'utf-8'));
47
+ if (!parsed || typeof parsed !== 'object')
48
+ return null;
49
+ if (typeof parsed.id !== 'string' || typeof parsed.iteration !== 'number')
50
+ return null;
51
+ return parsed;
52
+ }
53
+ catch {
54
+ return null;
55
+ }
56
+ }
@@ -341,6 +341,24 @@ export class RushCloudProvider {
341
341
  if (repos.length === 0) {
342
342
  throw new Error('Rush Cloud requires --repo <owner/repo> (or --repo repeated for multi-repo).');
343
343
  }
344
+ // Budget pre-flight gate (issue #346). Cloud dispatches inherit the local
345
+ // project's caps; we refuse to POST a run that would breach an on_exceed:block
346
+ // cap. The repo slug is the project attribution key. Server-side spend is
347
+ // authoritative for live enforcement; this pre-flight is the deterministic
348
+ // "don't even start it" guard. Dormant when no caps are configured.
349
+ {
350
+ const { runPreflightGate } = await import('../budget/preflight.js');
351
+ const projectKey = repos[0] ?? process.cwd();
352
+ const gate = runPreflightGate({
353
+ agent: options.agent ?? 'cloud',
354
+ model: options.model ?? `${options.agent ?? 'cloud'}-default`,
355
+ prompt: options.prompt,
356
+ project: projectKey,
357
+ });
358
+ if (!gate.dormant && !gate.decision.allow) {
359
+ throw new Error(`[budget] BLOCKED cloud dispatch (${projectKey}): ${gate.decision.reason}`);
360
+ }
361
+ }
344
362
  // Validate each repo's shape and resolve its installation_id up front.
345
363
  // Any bad entry fails the whole dispatch — we never want a half-started
346
364
  // multi-repo run that only found installations for some of the repos.
@@ -82,6 +82,23 @@ export interface ExecOptions {
82
82
  sessionId?: string;
83
83
  verbose?: boolean;
84
84
  env?: Record<string, string>;
85
+ /**
86
+ * Workflow capability scoping (Claude only). Sourced from WORKFLOW.md
87
+ * frontmatter `tools:` / `mcpServers:` and translated to Claude headless
88
+ * flags in buildExecCommand. Other agents ignore these.
89
+ *
90
+ * `toolsRestrict` is the AVAILABLE-tool allowlist: it maps to `--tools`, which
91
+ * restricts the built-in tool set the run can use at all (NOT `--allowedTools`,
92
+ * which only auto-approves without restricting availability). Declaring
93
+ * `[Read, Grep]` makes Write/Bash/Edit unavailable for the whole run.
94
+ */
95
+ toolsRestrict?: string[];
96
+ /**
97
+ * Path to an ephemeral mcp-config JSON. Emitted as `--mcp-config <path>`
98
+ * together with `--strict-mcp-config` so ONLY the named servers load (the
99
+ * flag alone merely ADDS to the existing server set).
100
+ */
101
+ mcpConfigPath?: string;
85
102
  }
86
103
  /**
87
104
  * Resolve interactive vs headless. Explicit flags are definitive and win over
@@ -90,6 +107,23 @@ export interface ExecOptions {
90
107
  * `--interactive` takes precedence over `--headless`; the CLI layer rejects passing both.
91
108
  */
92
109
  export declare function resolveInteractive(options: Pick<ExecOptions, 'interactive' | 'headless' | 'prompt'>): boolean;
110
+ /**
111
+ * Decide whether spawnAgent must capture (PIPE + tee) the child's stdout so the
112
+ * live budget watcher can parse it (issue #346, FIX 3).
113
+ *
114
+ * The bug this fixes: stdout used to be PIPED only when downstream output was
115
+ * piped (`piped = !isTTY`). For a normal headless run AT A TERMINAL, stdout was
116
+ * 'inherit', so `child.stdout` was null and the watcher — hence the mid-run
117
+ * hard-cap kill — was silently skipped. We now tap stdout for ALL
118
+ * non-interactive runs when caps are active, regardless of TTY, and tee it back
119
+ * so the user still sees output. Interactive REPLs are never tapped (the human
120
+ * owns the TTY; they rely on the pre-flight gate).
121
+ *
122
+ * @param interactive resolveInteractive() result for the run
123
+ * @param piped true when the parent's stdout is NOT a TTY (output piped)
124
+ * @param capsActive true when a budget watcher is attached (caps configured)
125
+ */
126
+ export declare function shouldTapStdout(interactive: boolean, piped: boolean, capsActive: boolean): boolean;
93
127
  /** Parse an array of KEY=VALUE strings into an env record. Returns undefined for empty input. */
94
128
  export declare function parseExecEnv(entries: string[]): Record<string, string> | undefined;
95
129
  /**
@@ -135,6 +169,8 @@ export declare function execAgent(options: ExecOptions): Promise<number>;
135
169
  * keeping version resolution in one place instead of reimplementing it in batch.
136
170
  */
137
171
  export declare function execShimPassthrough(agent: AgentId, rawArgs: string[], cwd: string, pinnedVersion?: string): Promise<number>;
172
+ /** Exit code spawnAgent resolves with when a run is killed for crossing a budget cap. */
173
+ export declare const BUDGET_KILL_EXIT_CODE = 7;
138
174
  /**
139
175
  * Patterns that indicate a rate/usage limit. Matching is intentionally broad
140
176
  * because providers phrase these differently -- Anthropic uses "5-hour limit"
package/dist/lib/exec.js CHANGED
@@ -114,6 +114,29 @@ export function resolveInteractive(options) {
114
114
  return false;
115
115
  return options.prompt === undefined;
116
116
  }
117
+ /**
118
+ * Decide whether spawnAgent must capture (PIPE + tee) the child's stdout so the
119
+ * live budget watcher can parse it (issue #346, FIX 3).
120
+ *
121
+ * The bug this fixes: stdout used to be PIPED only when downstream output was
122
+ * piped (`piped = !isTTY`). For a normal headless run AT A TERMINAL, stdout was
123
+ * 'inherit', so `child.stdout` was null and the watcher — hence the mid-run
124
+ * hard-cap kill — was silently skipped. We now tap stdout for ALL
125
+ * non-interactive runs when caps are active, regardless of TTY, and tee it back
126
+ * so the user still sees output. Interactive REPLs are never tapped (the human
127
+ * owns the TTY; they rely on the pre-flight gate).
128
+ *
129
+ * @param interactive resolveInteractive() result for the run
130
+ * @param piped true when the parent's stdout is NOT a TTY (output piped)
131
+ * @param capsActive true when a budget watcher is attached (caps configured)
132
+ */
133
+ export function shouldTapStdout(interactive, piped, capsActive) {
134
+ if (interactive)
135
+ return false;
136
+ // Always pipe when the caller pipes us downstream (preserve composability),
137
+ // OR when caps are active so the watcher can read the stream at a TTY.
138
+ return piped || capsActive;
139
+ }
117
140
  /** Pattern for valid environment variable names (C identifier rules). */
118
141
  const EXEC_ENV_KEY_PATTERN = /^[A-Za-z_][A-Za-z0-9_]*$/;
119
142
  /** Parse a single KEY=VALUE string into a tuple, validating the key name. */
@@ -540,6 +563,39 @@ export function buildExecCommand(options) {
540
563
  cmd.push('--add-dir', dir);
541
564
  }
542
565
  }
566
+ // Claude-specific: workflow capability scoping. WORKFLOW.md frontmatter
567
+ // `tools:` / `mcpServers:` is translated to the headless flags that ACTUALLY
568
+ // restrict the run (verified against `claude --help` on the installed CLI):
569
+ //
570
+ // tools: -> `--tools <names...>` — restricts the AVAILABLE built-in
571
+ // tool set. This is the security boundary: tools NOT named
572
+ // here (e.g. Write, Bash, Edit) are unavailable for the whole
573
+ // run. `--allowedTools` would only auto-approve without
574
+ // restricting, so it is the WRONG flag for sandboxing.
575
+ // We also emit `--allowedTools <names...>` for the same set so
576
+ // the permitted tools don't prompt in headless `-p` mode.
577
+ // mcpServers: -> `--mcp-config <path>` PLUS `--strict-mcp-config`. The
578
+ // config flag alone ADDS servers to the existing set; only
579
+ // `--strict-mcp-config` makes the run use *only* the named
580
+ // servers, which is what scoping means.
581
+ //
582
+ // The command layer gates this behind the `allowlist` capability and assembles
583
+ // the mcp-config file; buildExecCommand stays a pure string-builder.
584
+ //
585
+ // `<tools...>` is variadic. Emit the names as separate argv tokens. The flags
586
+ // here are appended AFTER the positional prompt (added above), so the variadic
587
+ // never swallows the prompt; the trailing `--allowedTools` / `--strict-mcp-config`
588
+ // tokens also terminate the `--tools` variadic cleanly.
589
+ if (options.agent === 'claude') {
590
+ if (options.toolsRestrict && options.toolsRestrict.length > 0) {
591
+ cmd.push('--tools', ...options.toolsRestrict);
592
+ cmd.push('--allowedTools', ...options.toolsRestrict);
593
+ }
594
+ if (options.mcpConfigPath) {
595
+ cmd.push('--mcp-config', options.mcpConfigPath);
596
+ cmd.push('--strict-mcp-config');
597
+ }
598
+ }
543
599
  return cmd;
544
600
  }
545
601
  /** Spawn an agent and return its exit code. Convenience wrapper over spawnAgent. */
@@ -599,6 +655,15 @@ async function spawnAgent(options) {
599
655
  const timeoutMs = options.timeout ? parseTimeout(options.timeout) : undefined;
600
656
  const piped = !process.stdout.isTTY;
601
657
  const interactive = resolveInteractive(options);
658
+ // Budget live kill-switch (issue #346). For headless runs we incrementally
659
+ // parse stream-json usage off stdout, accumulate cost, and kill the child the
660
+ // moment a configured cap is crossed — exactly like the --timeout path, but
661
+ // resolving with a DISTINCT exit code so CI/headless can tell budget-kill from
662
+ // timeout. Spend is recorded to the shared ledger in the close handler. The
663
+ // watcher is dormant (and zero-cost) when no caps are configured.
664
+ const cwd = options.cwd || process.cwd();
665
+ const runId = randomUUID();
666
+ const watcherState = await setupBudgetWatcher(options, cwd, runId);
602
667
  maybeRotate();
603
668
  const timer = createTimer('agent.run', {
604
669
  agent: options.agent,
@@ -617,9 +682,13 @@ async function spawnAgent(options) {
617
682
  // rendering, raw-mode keystrokes, colored output). Headless mode pipes
618
683
  // stderr so we can scan for rate limits and feed fallback. stdout stays
619
684
  // inherited for TTY, piped when the caller pipes us downstream.
685
+ // PIPE (and later tee) stdout whenever the live budget watcher must read it
686
+ // — for ALL non-interactive runs when caps are active, regardless of TTY.
687
+ // See shouldTapStdout() for the rationale (FIX 3, issue #346).
688
+ const tapStdout = shouldTapStdout(interactive, piped, watcherState !== null);
620
689
  const stdio = interactive
621
690
  ? ['inherit', 'inherit', 'inherit']
622
- : ['inherit', piped ? 'pipe' : 'inherit', 'pipe'];
691
+ : ['inherit', tapStdout ? 'pipe' : 'inherit', 'pipe'];
623
692
  // On Windows, .cmd batch wrappers (npm-installed CLIs) require shell:true
624
693
  // whether addressed by name or absolute path.
625
694
  const useShell = process.platform === 'win32' && (!path.isAbsolute(executable) || executable.endsWith('.cmd'));
@@ -631,8 +700,29 @@ async function spawnAgent(options) {
631
700
  });
632
701
  // Mark startup time (time from function call to process spawn)
633
702
  timer.mark('startup');
634
- if (!interactive && piped && child.stdout) {
703
+ let budgetKilled = false;
704
+ let budgetKillTimer;
705
+ if (!interactive && tapStdout && child.stdout) {
706
+ // TEE the child's stdout back to the parent's so the user still sees
707
+ // output (mirrors stdio:'inherit') while we tap the same stream for usage.
635
708
  child.stdout.pipe(process.stdout);
709
+ // Tap the same stream for budget usage events without consuming the pipe
710
+ // (a 'data' listener and .pipe() both receive every chunk). Kill on breach.
711
+ if (watcherState) {
712
+ let pendingLine = '';
713
+ child.stdout.on('data', (chunk) => {
714
+ const { events, rest } = watcherState.extract(chunk.toString('utf-8'), pendingLine);
715
+ pendingLine = rest;
716
+ for (const ev of events)
717
+ watcherState.watcher.feedUsage(ev);
718
+ if (watcherState.watcher.breached() && !budgetKilled) {
719
+ budgetKilled = true;
720
+ process.stderr.write(`[budget] hard cap exceeded — terminating ${options.agent} run\n`);
721
+ child.kill('SIGTERM');
722
+ budgetKillTimer = setTimeout(() => child.kill('SIGKILL'), 5000);
723
+ }
724
+ });
725
+ }
636
726
  }
637
727
  let stderrBuffer = '';
638
728
  const STDERR_BUFFER_CAP = 64 * 1024;
@@ -663,11 +753,94 @@ async function spawnAgent(options) {
663
753
  child.on('close', (code) => {
664
754
  if (timeoutTimer)
665
755
  clearTimeout(timeoutTimer);
666
- timer.end({ exitCode: code ?? 0, status: code === 0 ? 'success' : 'failed' });
667
- resolve({ exitCode: code ?? 0, stderr: stderrBuffer });
756
+ // Clear the budget-kill SIGKILL escalation timer (mirror the --timeout
757
+ // timer cleanup) so a programmatic caller reusing execAgent (the #332 loop
758
+ // driver) never sees a stray 5s kill event fire after the child has exited.
759
+ if (budgetKillTimer)
760
+ clearTimeout(budgetKillTimer);
761
+ // Record final spend to the shared ledger (issue #346). Best-effort: a
762
+ // ledger write must never mask the run's own outcome.
763
+ if (watcherState) {
764
+ try {
765
+ watcherState.finalize();
766
+ }
767
+ catch { /* ledger write is non-critical */ }
768
+ // Release the watcher's references / stop accepting events (symmetry).
769
+ try {
770
+ watcherState.watcher.dispose();
771
+ }
772
+ catch { /* dispose is best-effort */ }
773
+ }
774
+ // Budget kill resolves with a DISTINCT non-zero exit so CI/headless and
775
+ // teams/cloud can tell a budget termination apart from a normal failure.
776
+ const exitCode = budgetKilled ? BUDGET_KILL_EXIT_CODE : (code ?? 0);
777
+ timer.end({ exitCode, status: budgetKilled ? 'budget_killed' : code === 0 ? 'success' : 'failed' });
778
+ resolve({ exitCode, stderr: stderrBuffer });
668
779
  });
669
780
  });
670
781
  }
782
+ /** Exit code spawnAgent resolves with when a run is killed for crossing a budget cap. */
783
+ export const BUDGET_KILL_EXIT_CODE = 7;
784
+ /**
785
+ * Resolve the budget watcher for a run. Returns null (watcher dormant) when no
786
+ * caps are configured, so non-budget users pay nothing. When caps exist, builds
787
+ * a live watcher seeded with the day/project spend already on the ledger, plus
788
+ * a finalize() that appends this run's accumulated spend.
789
+ */
790
+ async function setupBudgetWatcher(options, cwd, runId) {
791
+ const interactive = resolveInteractive(options);
792
+ if (interactive)
793
+ return null;
794
+ const [{ resolveBudgetConfig, hasAnyCap }, { makeLiveSpendWatcher, capsFromConfig, extractUsageEvents }, ledger] = await Promise.all([
795
+ import('./budget/config.js'),
796
+ import('./budget/enforce.js'),
797
+ import('./budget/ledger.js'),
798
+ ]);
799
+ const cfg = resolveBudgetConfig(cwd);
800
+ if (!hasAnyCap(cfg))
801
+ return null;
802
+ const today = ledger.localDay();
803
+ const entries = ledger.loadLedger();
804
+ const caps = capsFromConfig(cfg, {
805
+ daySpend: ledger.spendForDay(today, entries),
806
+ projectSpend: ledger.spendForProject(cwd, entries),
807
+ agentDaySpend: { [options.agent]: ledger.spendForAgentDay(options.agent, today, entries) },
808
+ });
809
+ const watcher = makeLiveSpendWatcher({ caps, onBreach: () => { } });
810
+ // Accumulate per-(model) usage for a clean final ledger record.
811
+ const seen = [];
812
+ const model = options.model ?? `${options.agent}-default`;
813
+ return {
814
+ watcher,
815
+ extract: (chunk, pending) => {
816
+ const res = extractUsageEvents(chunk, pending, model, options.agent);
817
+ for (const ev of res.events) {
818
+ seen.push({
819
+ model: ev.model ?? model,
820
+ usage: {
821
+ inputTokens: ev.inputTokens,
822
+ outputTokens: ev.outputTokens,
823
+ cacheReadTokens: ev.cacheReadTokens,
824
+ cacheCreationTokens: ev.cacheCreationTokens,
825
+ },
826
+ });
827
+ }
828
+ return res;
829
+ },
830
+ finalize: () => {
831
+ for (const s of seen) {
832
+ ledger.recordSpend({
833
+ runId,
834
+ agent: options.agent,
835
+ project: cwd,
836
+ model: s.model,
837
+ usage: s.usage,
838
+ source: 'run',
839
+ });
840
+ }
841
+ },
842
+ };
843
+ }
671
844
  /**
672
845
  * Patterns that indicate a rate/usage limit. Matching is intentionally broad
673
846
  * because providers phrase these differently -- Anthropic uses "5-hour limit"
@@ -733,6 +906,21 @@ export async function runWithFallback(options) {
733
906
  ];
734
907
  let prevAgent;
735
908
  let prevSessionId;
909
+ // Workflow capability scoping only takes effect on claude (buildExecCommand
910
+ // guards `--tools` / `--mcp-config` / `--strict-mcp-config` on agent==='claude').
911
+ // A fallback to any non-claude agent would run with NONE of that scoping — the
912
+ // declared sandbox silently evaporates. Warn loudly so a rate-limit handoff to
913
+ // an unscoped agent is never silent (issue #324 fail-open).
914
+ const scopingActive = (options.toolsRestrict && options.toolsRestrict.length > 0)
915
+ || !!options.mcpConfigPath;
916
+ if (scopingActive) {
917
+ const unscoped = options.fallback.filter(f => f.agent !== 'claude').map(f => f.agent);
918
+ if (unscoped.length > 0) {
919
+ process.stderr.write(`[agents] WARNING: workflow tool/MCP scoping is enforced on claude only. ` +
920
+ `Fallback agent(s) ${[...new Set(unscoped)].join(', ')} would run UNSCOPED ` +
921
+ `(no --tools / --strict-mcp-config restriction) if claude hits a rate limit.\n`);
922
+ }
923
+ }
736
924
  for (let i = 0; i < chain.length; i++) {
737
925
  const { agent, version } = chain[i];
738
926
  const pinnedSessionId = agent === 'claude' ? randomUUID() : undefined;
package/dist/lib/git.d.ts CHANGED
@@ -1,3 +1,21 @@
1
+ /**
2
+ * Validate that a clone/pull source uses a safe git transport before it is
3
+ * handed to `git`.
4
+ *
5
+ * Git's remote-helper transports (`ext::`, `fd::`, …) execute arbitrary
6
+ * commands at clone time, `file://`/`git://` are unauthenticated, and a source
7
+ * beginning with `-` is parsed by `git` as a command-line flag (option
8
+ * injection). We therefore allow only:
9
+ * - `https://` (encrypted + authenticated)
10
+ * - `ssh://` and SCP-style `git@host:path` / `host:path`
11
+ * - local filesystem paths (callers handle these before reaching `git clone`)
12
+ *
13
+ * Pure string inspection — no filesystem or platform calls — so it behaves
14
+ * identically on Linux, macOS, and Windows.
15
+ *
16
+ * @throws Error if the source uses a disallowed transport.
17
+ */
18
+ export declare function assertSafeGitTransport(source: string): void;
1
19
  /** Parsed representation of a git source string (GitHub, generic URL, or local path). */
2
20
  export interface GitSource {
3
21
  type: 'github' | 'url' | 'local';
package/dist/lib/git.js CHANGED
@@ -11,17 +11,76 @@ import * as path from 'path';
11
11
  import { IS_WINDOWS, isWindowsAbsolutePath } from './platform/index.js';
12
12
  import { getPackageLocalPath } from './state.js';
13
13
  import { DEFAULT_SYSTEM_REPO, systemRepoSlug } from './types.js';
14
+ /**
15
+ * Validate that a clone/pull source uses a safe git transport before it is
16
+ * handed to `git`.
17
+ *
18
+ * Git's remote-helper transports (`ext::`, `fd::`, …) execute arbitrary
19
+ * commands at clone time, `file://`/`git://` are unauthenticated, and a source
20
+ * beginning with `-` is parsed by `git` as a command-line flag (option
21
+ * injection). We therefore allow only:
22
+ * - `https://` (encrypted + authenticated)
23
+ * - `ssh://` and SCP-style `git@host:path` / `host:path`
24
+ * - local filesystem paths (callers handle these before reaching `git clone`)
25
+ *
26
+ * Pure string inspection — no filesystem or platform calls — so it behaves
27
+ * identically on Linux, macOS, and Windows.
28
+ *
29
+ * @throws Error if the source uses a disallowed transport.
30
+ */
31
+ export function assertSafeGitTransport(source) {
32
+ const s = source.trim();
33
+ // A leading dash is interpreted by git as an option, not a source.
34
+ if (s.startsWith('-')) {
35
+ throw new Error(`Refusing to use git source "${source}": a source starting with "-" is interpreted as a git option.`);
36
+ }
37
+ // Remote-helper transports look like "<name>::…" (ext::, fd::, …). SCP-style
38
+ // "git@host:path" uses a single ":" and is intentionally not matched here.
39
+ const helper = s.match(/^[a-zA-Z][a-zA-Z0-9+.-]*::/);
40
+ if (helper) {
41
+ throw new Error(`Refusing to use git source "${source}": git remote-helper transports (ext::, fd::, …) are not allowed.`);
42
+ }
43
+ // Explicit "<scheme>://" URLs: permit only https and ssh.
44
+ const scheme = s.match(/^([a-zA-Z][a-zA-Z0-9+.-]*):\/\//);
45
+ if (scheme) {
46
+ const name = scheme[1].toLowerCase();
47
+ if (name !== 'https' && name !== 'ssh') {
48
+ throw new Error(`Refusing to use git source "${source}": "${name}://" is not an allowed transport (use https:// or ssh://).`);
49
+ }
50
+ }
51
+ // No scheme -> SCP-style SSH ("git@host:path") or a local path; both safe.
52
+ }
53
+ /**
54
+ * Whether installing a cloned/pulled repo's `.githooks/` is enabled.
55
+ *
56
+ * Installing hooks wires those scripts into `.git/hooks/`, so `git` EXECUTES
57
+ * them on the next commit/checkout/merge. A repo added via `agents repo add
58
+ * <source>` is untrusted, so auto-installing its hooks is remote code
59
+ * execution. We require explicit opt-in via `AGENTS_ENABLE_GITHOOKS=1`.
60
+ */
61
+ function githooksEnabled() {
62
+ const v = process.env.AGENTS_ENABLE_GITHOOKS;
63
+ return v === '1' || v === 'true';
64
+ }
14
65
  /**
15
66
  * Install hooks from `.githooks/` by symlinking each entry into `.git/hooks/`.
16
67
  *
17
- * Why: `git config core.hooksPath` is a known sandbox-escape vector and is
18
- * blocked by some sandboxed environments (e.g. Claude Code). Symlinks inside
19
- * `.git/hooks/` sidestep that restriction entirely -- Git runs them the same way.
68
+ * Gated behind `AGENTS_ENABLE_GITHOOKS=1` (see {@link githooksEnabled}) because
69
+ * the hooks run code on git operations and the source repo may be untrusted.
70
+ *
71
+ * Why symlinks rather than `git config core.hooksPath`: `core.hooksPath` is a
72
+ * known sandbox-escape vector and is blocked by some sandboxed environments
73
+ * (e.g. Claude Code). Symlinks inside `.git/hooks/` run the same way.
20
74
  */
21
75
  function installGithooksSymlinks(repoDir) {
22
76
  const githooksDir = path.join(repoDir, '.githooks');
23
77
  if (!fs.existsSync(githooksDir))
24
78
  return;
79
+ if (!githooksEnabled()) {
80
+ console.error(`Skipped installing git hooks from ${githooksDir} (they run code on git operations).\n` +
81
+ ` Set AGENTS_ENABLE_GITHOOKS=1 to enable hooks for repos you trust.`);
82
+ return;
83
+ }
25
84
  const hooksDir = path.join(repoDir, '.git', 'hooks');
26
85
  fs.mkdirSync(hooksDir, { recursive: true });
27
86
  for (const name of fs.readdirSync(githooksDir)) {
@@ -121,7 +180,9 @@ export function parseSource(source) {
121
180
  ref: ref || 'main',
122
181
  };
123
182
  }
124
- // Generic URL
183
+ // Generic URL -- must be an encrypted, authenticated transport
184
+ // (rejects http://, file://, git://, ext::, and leading "-").
185
+ assertSafeGitTransport(cleanSource);
125
186
  return {
126
187
  type: 'url',
127
188
  url: cleanSource.endsWith('.git') ? cleanSource : `${cleanSource}.git`,
@@ -183,6 +244,7 @@ export async function cloneOrPull(source, targetDir) {
183
244
  const log = await repoGit.log({ maxCount: 1 });
184
245
  return { isNew: false, commit: log.latest?.hash.slice(0, 8) || 'unknown' };
185
246
  }
247
+ assertSafeGitTransport(source.url);
186
248
  fs.mkdirSync(targetDir, { recursive: true });
187
249
  await git.clone(source.url, targetDir);
188
250
  const repoGit = simpleGit(targetDir);
@@ -364,6 +426,7 @@ export async function cloneIntoExisting(source, targetDir) {
364
426
  const git = simpleGit();
365
427
  const tempDir = path.join(targetDir, '.git-clone-temp');
366
428
  try {
429
+ assertSafeGitTransport(parsed.url);
367
430
  // Clone to temp directory
368
431
  fs.mkdirSync(tempDir, { recursive: true });
369
432
  await git.clone(parsed.url, tempDir);
@@ -0,0 +1,145 @@
1
+ /**
2
+ * Autonomous loop driver (issue #332).
3
+ *
4
+ * Re-injects an entrypoint each iteration until a stop condition is met. The
5
+ * driver is the deterministic skeleton; the entrypoint inside stays dynamic (it
6
+ * can spawn subagents freely). Every guard — `max_iterations`, `budget`, the
7
+ * `until: signal` condition, SIGINT/SIGTERM — lives OUTSIDE the agent, so the
8
+ * agent cannot vote past a kill-switch (the standard answer to runaway-loop and
9
+ * runaway-cost failure modes; see docs/07-entrypoints-and-loops.md).
10
+ *
11
+ * Structure mirrors the teams supervisor (`runSupervisor` in teams/supervisor.ts):
12
+ * a bounded for-loop with a hard cap, a SIGINT/SIGTERM trap that flips a stop
13
+ * flag, a per-iteration guard check, an interval sleep, and a typed `stoppedBy`
14
+ * union for the exit reason.
15
+ *
16
+ * Token accounting: the budget cap is a TOKEN hard-cap, enforced after each
17
+ * turn from the usage events parsed off the agent's stream-json output. Token
18
+ * extraction reuses `extractUsageEvents` from budget/enforce.ts (read-only
19
+ * import) rather than re-implementing the per-provider parsing.
20
+ */
21
+ import type { AgentId } from './types.js';
22
+ import type { ExecOptions } from './exec.js';
23
+ import { type Checkpoint } from './checkpoint.js';
24
+ /** Loop block config (docs/07-entrypoints-and-loops.md → "The loop block"). */
25
+ export interface LoopConfig {
26
+ /** Stop condition. `signal` reads loop-signal.json; absence is fail-closed. */
27
+ until?: 'signal';
28
+ /** Hard cap on iterations. */
29
+ maxIterations?: number;
30
+ /** Token hard-cap, enforced outside the agent. */
31
+ budget?: number;
32
+ /** Delay between iterations: "0" back-to-back, "30m" paces. */
33
+ interval?: string;
34
+ }
35
+ /** The loop-signal.json contract the entrypoint writes each iteration. */
36
+ export interface LoopSignal {
37
+ continue: boolean;
38
+ reason?: string;
39
+ }
40
+ /** Why the loop stopped. Mirrors the teams supervisor exit reasons. */
41
+ export type LoopStoppedBy = 'condition-met' | 'budget' | 'stalled' | 'max' | 'signal' | 'error';
42
+ /** Result of a loop run. */
43
+ export interface LoopResult {
44
+ /** Iterations actually executed. */
45
+ iterations: number;
46
+ stoppedBy: LoopStoppedBy;
47
+ elapsedMs: number;
48
+ /** Cumulative tokens consumed across all iterations. */
49
+ tokens: number;
50
+ /** Last loop-signal read, if any. */
51
+ lastSignal?: LoopSignal;
52
+ }
53
+ /** What a single iteration's run function returns. */
54
+ export interface IterationResult {
55
+ exitCode: number;
56
+ /** Tokens consumed this iteration (input + output + cache). */
57
+ tokens: number;
58
+ }
59
+ /** Per-iteration run function — the injectable seam that makes the driver testable. */
60
+ export type RunIteration = (options: ExecOptions) => Promise<IterationResult>;
61
+ /** Context the driver needs that isn't part of ExecOptions. */
62
+ export interface LoopContext {
63
+ runId: string;
64
+ runDir: string;
65
+ agent: AgentId;
66
+ version?: string;
67
+ /** Iteration to start at (1 for a fresh run, checkpoint.iteration+1 for a resume). */
68
+ startIteration?: number;
69
+ /** Tokens already consumed before this driver started (carried across a resume). */
70
+ startTokens?: number;
71
+ /**
72
+ * On a resume, the killed run's LAST iteration session id. The first resumed
73
+ * iteration `/continue`s from it to thread conversation memory forward.
74
+ * Undefined on a fresh run (iteration 1 mints its own id, no prior to continue).
75
+ */
76
+ sessionId?: string;
77
+ }
78
+ /** Dependency seams for testing. */
79
+ export interface LoopDeps {
80
+ /** Per-iteration runner. Defaults to a token-capturing spawn (defaultRunIteration). */
81
+ runIteration?: RunIteration;
82
+ /** Sleep function (ms). Defaults to setTimeout-backed. Injectable so tests don't wait. */
83
+ sleep?: (ms: number) => Promise<void>;
84
+ /** Checkpoint writer. Defaults to writeCheckpoint. */
85
+ writeCheckpoint?: (c: Checkpoint) => void;
86
+ }
87
+ /** Path to a run's loop-signal.json. */
88
+ export declare function loopSignalPath(runDir: string): string;
89
+ /**
90
+ * Build the prompt for iteration >= 2 so the agent CONTINUES the prior
91
+ * iteration's conversation instead of starting fresh.
92
+ *
93
+ * This reuses the repo's established cross-process Claude-continuity mechanism —
94
+ * the `/continue <id>` skill (see `buildFallbackPrompt` in exec.ts, which hands
95
+ * a rate-limit successor `/continue ${prevSessionId}`). The skill loads the
96
+ * prior transcript via `agents sessions <id>`, so continuity does NOT depend on
97
+ * the provider's native session being "active"; it reads the transcript off
98
+ * disk. That is why each loop iteration can safely pin a FRESH session id (the
99
+ * `--session-id` flag CREATES a session — re-passing one errors "Session ID
100
+ * already in use") while still threading the conversation forward via the
101
+ * prior id.
102
+ *
103
+ * The original entrypoint is re-appended after the continue directive so the
104
+ * agent both recalls the prior turn AND knows what to do this iteration.
105
+ */
106
+ export declare function buildLoopContinuePrompt(prevSessionId: string, entrypoint: string): string;
107
+ /**
108
+ * Resolve a loop interval string to milliseconds. `"0"` is an explicit
109
+ * back-to-back run (0ms). Any other string must parse via parseTimeout
110
+ * (e.g. "30m", "1h"); an unparseable value (e.g. "30s", "5", "abc") is a
111
+ * configuration error and must NOT silently coalesce to 0 (which would run the
112
+ * loop full-speed on a typo). Throws on bad input; validate at config build
113
+ * time (validateLoopInterval) so the error surfaces before the loop starts.
114
+ */
115
+ export declare function parseLoopInterval(interval: string | undefined): number;
116
+ /**
117
+ * Read and parse loop-signal.json. Returns null when the file is absent or
118
+ * unparseable — the caller treats null as fail-closed (continue:false).
119
+ */
120
+ export declare function readLoopSignal(runDir: string): LoopSignal | null;
121
+ /** Delete loop-signal.json so a stale signal never carries into the next iteration. */
122
+ export declare function clearLoopSignal(runDir: string): void;
123
+ /**
124
+ * Default per-iteration runner: spawn the agent, tee stdout, and sum token usage
125
+ * off the stream. This is a purpose-built token-capturing spawn for the loop's
126
+ * budget guard, not a re-implementation of exec's fallback/budget machinery —
127
+ * it reuses `buildExecCommand` / `buildExecEnv` (the canonical command/env
128
+ * builders) and `extractUsageEvents` (the canonical stream parser). The agent
129
+ * is forced to JSON/headless so the usage stream is parseable.
130
+ */
131
+ export declare function defaultRunIteration(options: ExecOptions): Promise<IterationResult>;
132
+ /**
133
+ * Run the autonomous loop. Returns when a guard trips, the until-condition is
134
+ * met, the iteration cap is reached, or a signal arrives.
135
+ *
136
+ * stoppedBy semantics:
137
+ * - `condition-met` — until=signal and the signal said stop (continue:false
138
+ * OR the file was absent/corrupt → fail-closed).
139
+ * - `budget` — cumulative tokens crossed the budget cap (checked after
140
+ * each turn, outside the agent).
141
+ * - `max` — ran maxIterations iterations without any earlier stop.
142
+ * - `signal` — SIGINT/SIGTERM arrived; checkpoint is written before exit.
143
+ * - `error` — an iteration threw or exited non-zero.
144
+ */
145
+ export declare function runLoop(execOptions: ExecOptions, loop: LoopConfig, ctx: LoopContext, deps?: LoopDeps): Promise<LoopResult>;