voratiq 0.1.0-beta.0 → 0.1.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/README.md +49 -23
  2. package/dist/{commands/run/agents/auth-stage.d.ts → agents/runtime/auth.d.ts} +8 -5
  3. package/dist/{commands/run/agents/auth-stage.js → agents/runtime/auth.js} +49 -19
  4. package/dist/agents/runtime/chat.d.ts +5 -0
  5. package/dist/agents/runtime/chat.js +28 -0
  6. package/dist/agents/runtime/errors.d.ts +27 -0
  7. package/dist/agents/runtime/errors.js +51 -0
  8. package/dist/{commands/run/agents → agents/runtime}/failures.d.ts +0 -1
  9. package/dist/agents/runtime/failures.js +106 -0
  10. package/dist/agents/runtime/harness.d.ts +2 -0
  11. package/dist/agents/runtime/harness.js +125 -0
  12. package/dist/{commands/run/agents/sandbox-launcher.d.ts → agents/runtime/launcher.d.ts} +20 -5
  13. package/dist/{commands/run/agents/sandbox-launcher.js → agents/runtime/launcher.js} +18 -38
  14. package/dist/{commands/run/agents/workspace-prep.d.ts → agents/runtime/manifest.d.ts} +5 -4
  15. package/dist/{commands/run/agents/workspace-prep.js → agents/runtime/manifest.js} +15 -9
  16. package/dist/agents/runtime/registry.d.ts +4 -0
  17. package/dist/agents/runtime/registry.js +54 -0
  18. package/dist/agents/runtime/sandbox.d.ts +42 -0
  19. package/dist/agents/runtime/sandbox.js +226 -0
  20. package/dist/agents/runtime/types.d.ts +83 -0
  21. package/dist/agents/runtime/types.js +1 -0
  22. package/dist/{commands/run/agents → agents/runtime}/watchdog.d.ts +6 -2
  23. package/dist/{commands/run/agents → agents/runtime}/watchdog.js +89 -4
  24. package/dist/auth/providers/codex.js +3 -2
  25. package/dist/auth/providers/gemini.js +4 -3
  26. package/dist/auth/providers/types.d.ts +1 -0
  27. package/dist/bin.js +203 -66
  28. package/dist/cli/apply.d.ts +1 -0
  29. package/dist/cli/apply.js +4 -1
  30. package/dist/cli/auto.d.ts +23 -0
  31. package/dist/cli/auto.js +225 -0
  32. package/dist/cli/errors.d.ts +3 -0
  33. package/dist/cli/errors.js +6 -0
  34. package/dist/cli/init.d.ts +3 -0
  35. package/dist/cli/init.js +19 -4
  36. package/dist/cli/output.d.ts +14 -0
  37. package/dist/cli/output.js +151 -3
  38. package/dist/cli/prune.d.ts +4 -3
  39. package/dist/cli/prune.js +31 -7
  40. package/dist/cli/review.d.ts +4 -0
  41. package/dist/cli/review.js +36 -5
  42. package/dist/cli/run.d.ts +20 -1
  43. package/dist/cli/run.js +36 -6
  44. package/dist/cli/spec.d.ts +17 -0
  45. package/dist/cli/spec.js +59 -0
  46. package/dist/commands/apply/command.d.ts +1 -0
  47. package/dist/commands/apply/command.js +75 -5
  48. package/dist/commands/apply/errors.d.ts +16 -0
  49. package/dist/commands/apply/errors.js +41 -3
  50. package/dist/commands/apply/types.d.ts +2 -1
  51. package/dist/commands/fetch.d.ts +1 -1
  52. package/dist/commands/fetch.js +2 -2
  53. package/dist/commands/init/agents.d.ts +2 -1
  54. package/dist/commands/init/agents.js +11 -6
  55. package/dist/commands/init/command.js +182 -7
  56. package/dist/commands/init/types.d.ts +4 -0
  57. package/dist/commands/list/command.js +1 -1
  58. package/dist/commands/prune/command.d.ts +2 -1
  59. package/dist/commands/prune/command.js +65 -8
  60. package/dist/commands/prune/errors.d.ts +0 -3
  61. package/dist/commands/prune/errors.js +1 -7
  62. package/dist/commands/prune/types.d.ts +21 -0
  63. package/dist/commands/review/command.d.ts +7 -1
  64. package/dist/commands/review/command.js +194 -5
  65. package/dist/commands/review/errors.d.ts +11 -0
  66. package/dist/commands/review/errors.js +21 -0
  67. package/dist/commands/review/manifest.d.ts +46 -0
  68. package/dist/commands/review/manifest.js +94 -0
  69. package/dist/commands/review/prompt.d.ts +13 -0
  70. package/dist/commands/review/prompt.js +79 -0
  71. package/dist/commands/run/agent-execution.d.ts +2 -1
  72. package/dist/commands/run/agent-execution.js +2 -1
  73. package/dist/commands/run/agents/artifacts.d.ts +17 -0
  74. package/dist/commands/run/agents/artifacts.js +168 -0
  75. package/dist/commands/run/agents/eval-runner.d.ts +4 -3
  76. package/dist/commands/run/agents/eval-runner.js +2 -4
  77. package/dist/commands/run/agents/lifecycle.d.ts +1 -1
  78. package/dist/commands/run/agents/lifecycle.js +60 -35
  79. package/dist/commands/run/agents/preparation.js +15 -86
  80. package/dist/commands/run/agents/run-context.d.ts +5 -2
  81. package/dist/commands/run/agents/run-context.js +17 -21
  82. package/dist/commands/run/agents/types.d.ts +5 -7
  83. package/dist/commands/run/agents/workspace.d.ts +22 -0
  84. package/dist/commands/run/agents/workspace.js +50 -0
  85. package/dist/commands/run/agents.d.ts +1 -0
  86. package/dist/commands/run/agents.js +2 -1
  87. package/dist/commands/run/command.d.ts +1 -1
  88. package/dist/commands/run/command.js +6 -6
  89. package/dist/commands/run/errors.d.ts +9 -0
  90. package/dist/commands/run/errors.js +49 -0
  91. package/dist/commands/run/lifecycle.d.ts +2 -2
  92. package/dist/commands/run/lifecycle.js +5 -5
  93. package/dist/commands/run/phases.d.ts +1 -1
  94. package/dist/commands/run/prompt.d.ts +5 -0
  95. package/dist/commands/run/prompt.js +19 -0
  96. package/dist/commands/run/record-init.js +1 -1
  97. package/dist/commands/run/reports.d.ts +1 -1
  98. package/dist/commands/run/shim/run-agent-shim.js +46 -2
  99. package/dist/commands/run/validation.d.ts +1 -1
  100. package/dist/commands/run/validation.js +29 -10
  101. package/dist/commands/shared/preview.d.ts +5 -0
  102. package/dist/commands/shared/preview.js +32 -0
  103. package/dist/commands/shared/prompt-helpers.d.ts +7 -0
  104. package/dist/commands/shared/prompt-helpers.js +14 -0
  105. package/dist/commands/shared/prune.d.ts +1 -0
  106. package/dist/commands/shared/prune.js +4 -0
  107. package/dist/commands/shared/session-id.d.ts +1 -0
  108. package/dist/commands/shared/session-id.js +22 -0
  109. package/dist/commands/spec/command.d.ts +23 -0
  110. package/dist/commands/spec/command.js +459 -0
  111. package/dist/commands/spec/errors.d.ts +17 -0
  112. package/dist/commands/spec/errors.js +33 -0
  113. package/dist/commands/spec/preview.d.ts +1 -0
  114. package/dist/commands/spec/preview.js +10 -0
  115. package/dist/commands/spec/prompt.d.ts +10 -0
  116. package/dist/commands/spec/prompt.js +44 -0
  117. package/dist/configs/agents/defaults.d.ts +6 -1
  118. package/dist/configs/agents/defaults.js +110 -30
  119. package/dist/configs/agents/errors.d.ts +9 -0
  120. package/dist/configs/agents/errors.js +21 -0
  121. package/dist/configs/agents/loader.d.ts +12 -1
  122. package/dist/configs/agents/loader.js +90 -6
  123. package/dist/configs/environment/detect.js +9 -4
  124. package/dist/configs/sandbox/defaults.js +1 -1
  125. package/dist/configs/sandbox/loader.d.ts +1 -1
  126. package/dist/configs/sandbox/loader.js +30 -0
  127. package/dist/configs/sandbox/schemas.d.ts +25 -0
  128. package/dist/configs/sandbox/schemas.js +11 -0
  129. package/dist/configs/sandbox/types.d.ts +9 -0
  130. package/dist/configs/settings/loader.d.ts +7 -0
  131. package/dist/configs/settings/loader.js +57 -0
  132. package/dist/configs/settings/types.d.ts +19 -0
  133. package/dist/configs/settings/types.js +11 -0
  134. package/dist/evals/runner.js +0 -2
  135. package/dist/preflight/branch.d.ts +9 -0
  136. package/dist/preflight/branch.js +48 -0
  137. package/dist/preflight/errors.d.ts +3 -0
  138. package/dist/preflight/errors.js +7 -0
  139. package/dist/preflight/index.d.ts +4 -0
  140. package/dist/preflight/index.js +5 -1
  141. package/dist/records/enhanced.d.ts +1 -38
  142. package/dist/records/enhanced.js +1 -139
  143. package/dist/records/errors.d.ts +1 -23
  144. package/dist/records/errors.js +1 -43
  145. package/dist/records/history-lock.d.ts +1 -27
  146. package/dist/records/history-lock.js +1 -184
  147. package/dist/records/mutators.d.ts +1 -17
  148. package/dist/records/mutators.js +1 -144
  149. package/dist/records/persistence.d.ts +1 -95
  150. package/dist/records/persistence.js +1 -459
  151. package/dist/records/types.d.ts +1 -238
  152. package/dist/records/types.js +1 -131
  153. package/dist/render/interactions/confirmation.js +4 -2
  154. package/dist/render/transcripts/apply.js +8 -2
  155. package/dist/render/transcripts/auto.d.ts +24 -0
  156. package/dist/render/transcripts/auto.js +17 -0
  157. package/dist/render/transcripts/init.d.ts +1 -0
  158. package/dist/render/transcripts/init.js +34 -16
  159. package/dist/render/transcripts/list.d.ts +1 -1
  160. package/dist/render/transcripts/list.js +1 -1
  161. package/dist/render/transcripts/prune.d.ts +7 -1
  162. package/dist/render/transcripts/prune.js +58 -11
  163. package/dist/render/transcripts/review.d.ts +7 -2
  164. package/dist/render/transcripts/review.js +19 -35
  165. package/dist/render/transcripts/run.d.ts +6 -2
  166. package/dist/render/transcripts/run.js +96 -38
  167. package/dist/render/transcripts/spec.d.ts +3 -0
  168. package/dist/render/transcripts/spec.js +12 -0
  169. package/dist/render/transcripts/update-check.d.ts +2 -0
  170. package/dist/render/transcripts/update-check.js +22 -0
  171. package/dist/render/utils/agents.d.ts +2 -2
  172. package/dist/render/utils/records.js +4 -4
  173. package/dist/render/utils/runs.d.ts +1 -0
  174. package/dist/render/utils/runs.js +1 -0
  175. package/dist/render/utils/timezone.d.ts +3 -0
  176. package/dist/render/utils/timezone.js +45 -0
  177. package/dist/render/utils/transcript.d.ts +7 -1
  178. package/dist/render/utils/transcript.js +12 -2
  179. package/dist/reviews/records/persistence.d.ts +27 -0
  180. package/dist/reviews/records/persistence.js +118 -0
  181. package/dist/reviews/records/types.d.ts +20 -0
  182. package/dist/reviews/records/types.js +29 -0
  183. package/dist/runs/records/enhanced.d.ts +37 -0
  184. package/dist/runs/records/enhanced.js +137 -0
  185. package/dist/runs/records/errors.d.ts +23 -0
  186. package/dist/runs/records/errors.js +43 -0
  187. package/dist/runs/records/history-lock.d.ts +27 -0
  188. package/dist/runs/records/history-lock.js +184 -0
  189. package/dist/runs/records/mutators.d.ts +17 -0
  190. package/dist/runs/records/mutators.js +144 -0
  191. package/dist/runs/records/persistence.d.ts +101 -0
  192. package/dist/runs/records/persistence.js +309 -0
  193. package/dist/runs/records/types.d.ts +246 -0
  194. package/dist/runs/records/types.js +144 -0
  195. package/dist/runs/records.d.ts +6 -0
  196. package/dist/runs/records.js +6 -0
  197. package/dist/sessions/errors.d.ts +26 -0
  198. package/dist/sessions/errors.js +49 -0
  199. package/dist/sessions/persistence.d.ts +92 -0
  200. package/dist/sessions/persistence.js +412 -0
  201. package/dist/specs/records/persistence.d.ts +50 -0
  202. package/dist/specs/records/persistence.js +157 -0
  203. package/dist/specs/records/types.d.ts +36 -0
  204. package/dist/specs/records/types.js +36 -0
  205. package/dist/status/index.d.ts +58 -7
  206. package/dist/status/index.js +81 -4
  207. package/dist/update-check/mvp.d.ts +24 -0
  208. package/dist/update-check/mvp.js +130 -0
  209. package/dist/update-check/prompt.d.ts +25 -0
  210. package/dist/update-check/prompt.js +62 -0
  211. package/dist/update-check/semver.d.ts +17 -0
  212. package/dist/update-check/semver.js +36 -0
  213. package/dist/update-check/state-path.d.ts +8 -0
  214. package/dist/update-check/state-path.js +18 -0
  215. package/dist/utils/git.d.ts +1 -0
  216. package/dist/utils/git.js +18 -8
  217. package/dist/utils/output.d.ts +5 -1
  218. package/dist/utils/output.js +4 -2
  219. package/dist/utils/slug.d.ts +2 -0
  220. package/dist/utils/slug.js +15 -0
  221. package/dist/workspace/agents.d.ts +0 -16
  222. package/dist/workspace/agents.js +5 -144
  223. package/dist/workspace/chat/artifacts.js +9 -10
  224. package/dist/workspace/layout.d.ts +14 -6
  225. package/dist/workspace/layout.js +37 -33
  226. package/dist/workspace/promotion.d.ts +32 -0
  227. package/dist/workspace/promotion.js +34 -0
  228. package/dist/workspace/prune.d.ts +12 -1
  229. package/dist/workspace/prune.js +14 -1
  230. package/dist/workspace/run.d.ts +1 -3
  231. package/dist/workspace/run.js +5 -14
  232. package/dist/workspace/setup.js +91 -15
  233. package/dist/workspace/structure.d.ts +41 -2
  234. package/dist/workspace/structure.js +136 -23
  235. package/dist/workspace/templates.d.ts +9 -2
  236. package/dist/workspace/templates.js +17 -6
  237. package/package.json +3 -3
  238. package/dist/commands/run/agents/chat-preserver.d.ts +0 -9
  239. package/dist/commands/run/agents/chat-preserver.js +0 -35
  240. package/dist/commands/run/agents/failures.js +0 -32
  241. package/dist/commands/run/prompts.d.ts +0 -4
  242. package/dist/commands/run/prompts.js +0 -16
  243. package/dist/commands/run/sandbox-registry.d.ts +0 -4
  244. package/dist/commands/run/sandbox-registry.js +0 -54
  245. package/dist/commands/run/sandbox.d.ts +0 -16
  246. package/dist/commands/run/sandbox.js +0 -96
package/README.md CHANGED
@@ -1,58 +1,84 @@
1
1
  # Voratiq
2
2
 
3
- Run multiple AI coding agents in parallel, compare their results, and apply the best solution.
3
+ Run coding agents against each other. Merge the winner.
4
4
 
5
- ![`voratiq run --spec specs/p1/agent-workspace-guardrails.md`](https://raw.githubusercontent.com/voratiq/voratiq/main/assets/run-demo.png)
5
+ ![`voratiq run --spec .voratiq/specs/standardize-docker-test-scripts.md`](https://raw.githubusercontent.com/voratiq/voratiq/main/assets/run-demo.png)
6
+
7
+ Why? Because no single model is best for every task. We use selection because it leads to [higher quality code](https://voratiq.com/blog/selection-rather-than-prediction/).
6
8
 
7
9
  ## Installation
8
10
 
9
- Voratiq is in public beta. Install via npm:
11
+ Install via npm:
10
12
 
11
13
  ```bash
12
- npm install -g voratiq@beta
14
+ npm install -g voratiq
13
15
  ```
14
16
 
15
17
  ### Requirements
16
18
 
17
- Core:
18
-
19
19
  - Node 20+
20
20
  - git
21
- - 1+ AI coding agent (Claude [(>=2.0.55)](https://github.com/anthropics/claude-code?tab=readme-ov-file#get-started), Codex [(>=0.66.0)](https://github.com/openai/codex?tab=readme-ov-file#quickstart), or Gemini [(>=0.19.4)](https://github.com/google-gemini/gemini-cli?tab=readme-ov-file#quick-install))
22
-
23
- Platform-specific:
24
-
21
+ - 1+ AI coding agent (Claude [>=2.0.55](https://github.com/anthropics/claude-code?tab=readme-ov-file#get-started), Codex [>=0.66.0](https://github.com/openai/codex?tab=readme-ov-file#quickstart), or Gemini [>=0.19.4](https://github.com/google-gemini/gemini-cli?tab=readme-ov-file#quick-install))
25
22
  - macOS: `ripgrep`
26
23
  - Linux (Debian/Ubuntu): `bubblewrap`, `socat`, `ripgrep`
27
24
 
28
- See the [sandbox runtime docs](https://github.com/anthropic-experimental/sandbox-runtime/blob/1bafa66a2c3ebc52569fc0c1a868e85e778f66a0/README.md#platform-specific-dependencies) for installation instructions.
25
+ See the [sandbox runtime docs](https://github.com/anthropic-experimental/sandbox-runtime/blob/1bafa66a2c3ebc52569fc0c1a868e85e778f66a0/README.md#platform-specific-dependencies) for guidance on the platform-specific dependencies.
29
26
 
30
- Note: Windows is not currently supported.
27
+ Windows is not currently supported.
31
28
 
32
29
  ## Quick Start
33
30
 
34
31
  ```bash
35
32
  # Initialize workspace
36
- voratiq init
33
+ voratiq init --yes
37
34
 
38
- # Write a spec
39
- cat > specs/fix-auth.md <<EOF
40
- # Fix Session Timeout Bug
41
- Users are logged out after 5 minutes instead of 30.
42
- Sessions should honor SESSION_TIMEOUT_MS (default 30 minutes).
43
- EOF
35
+ # Generate a spec
36
+ voratiq spec \
37
+ --description "add dark mode toggle with localStorage persistence" \
38
+ --agent <agent-id> \
39
+ --yes
44
40
 
45
- # Run agents in parallel
46
- voratiq run --spec specs/fix-auth.md
41
+ # Run agent ensemble against that spec
42
+ voratiq run --spec .voratiq/specs/add-dark-mode-toggle.md
47
43
 
48
44
  # Review results
49
- voratiq review --run <run-id>
45
+ voratiq review --run <run-id> --agent <agent-id>
50
46
 
51
47
  # Apply the best solution
52
48
  voratiq apply --run <run-id> --agent <agent-id>
49
+
50
+ # Clean up workspace
51
+ voratiq prune --run <run-id>
53
52
  ```
54
53
 
55
- See the [docs](https://github.com/voratiq/voratiq/blob/main/docs/index.md) for core concepts, CLI reference, and guides on configuring agents, evals, runtime environments, and sandbox restrictions.
54
+ For a detailed walkthrough, see the [tutorial](https://github.com/voratiq/voratiq/blob/main/docs/tutorial.md).
55
+
56
+ ## How It Works
57
+
58
+ Voratiq positions you as the architect and reviewer, and shifts implementation onto an ensemble of agents.
59
+
60
+ In practice, the same spec goes to all agents, evals run automatically, and you pick the winner.
61
+
62
+ <p align="center">
63
+ <img src="https://raw.githubusercontent.com/voratiq/voratiq/main/assets/voratiq-workflow.svg" alt="Voratiq workflow" width="500">
64
+ </p>
65
+
66
+ Every run (diffs, logs, eval results, and agent summaries) is local, configurable, inspectable, and fully auditable.
67
+
68
+ ## Documentation
69
+
70
+ Learn about the Voratiq workflow and CLI:
71
+
72
+ - [Tutorial](https://github.com/voratiq/voratiq/blob/main/docs/tutorial.md) - End-to-end walkthrough
73
+ - [Core Concepts](https://github.com/voratiq/voratiq/blob/main/docs/core-concepts.md) - Mental model and design philosophy
74
+ - [CLI Reference](https://github.com/voratiq/voratiq/blob/main/docs/cli-reference.md) - All commands and options
75
+
76
+ How to configure agents, evaluations, and execution environments:
77
+
78
+ - [Agents](https://github.com/voratiq/voratiq/blob/main/docs/configs/agents.md) - Define which agents run and how to invoke them
79
+ - [Environment](https://github.com/voratiq/voratiq/blob/main/docs/configs/environment.md) - Configure runtime environments
80
+ - [Evals](https://github.com/voratiq/voratiq/blob/main/docs/configs/evals.md) - Define checks that gate agent output
81
+ - [Sandbox](https://github.com/voratiq/voratiq/blob/main/docs/configs/sandbox.md) - Network and filesystem restrictions
56
82
 
57
83
  ## License
58
84
 
@@ -1,23 +1,26 @@
1
- import type { AuthProvider, AuthRuntimeContext } from "../../../auth/providers/types.js";
2
- import type { AgentDefinition } from "../../../configs/agents/types.js";
1
+ import type { AuthProvider, AuthRuntimeContext } from "../../auth/providers/types.js";
2
+ import type { AgentDefinition } from "../../configs/agents/types.js";
3
3
  export interface StagedAuthContext {
4
4
  provider: AuthProvider;
5
5
  sandboxPath: string;
6
6
  runtime: AuthRuntimeContext;
7
7
  agentId: string;
8
- runId: string;
9
8
  }
10
9
  export interface StageAuthOptions {
11
10
  agent: AgentDefinition;
12
11
  agentRoot: string;
13
- runId: string;
14
12
  root: string;
13
+ runId?: string;
15
14
  runtime?: AuthRuntimeContext;
16
15
  }
17
16
  export interface StageAuthResult {
18
17
  env: Record<string, string>;
19
18
  context: StagedAuthContext;
20
19
  }
21
- export declare function verifyAgentProviders(agents: readonly AgentDefinition[]): Promise<void>;
20
+ export interface AgentProviderPreflightIssue {
21
+ readonly agentId: string;
22
+ readonly message: string;
23
+ }
24
+ export declare function verifyAgentProviders(agents: readonly Pick<AgentDefinition, "id" | "provider">[]): Promise<readonly AgentProviderPreflightIssue[]>;
22
25
  export declare function stageAgentAuth(options: StageAuthOptions): Promise<StageAuthResult>;
23
26
  export declare function teardownAuthContext(context: StagedAuthContext | undefined): Promise<void>;
@@ -1,40 +1,57 @@
1
1
  import { rm } from "node:fs/promises";
2
- import { resolveAuthProvider } from "../../../auth/providers/index.js";
3
- import { buildAuthRuntimeContext } from "../../../auth/runtime.js";
4
- import { toErrorMessage } from "../../../utils/errors.js";
5
- import { isFileSystemError } from "../../../utils/fs.js";
6
- import { AuthProviderStageError, AuthProviderVerificationError, MissingAgentProviderError, RunCommandError, UnknownAuthProviderError, } from "../errors.js";
7
- import { getRunCommand } from "./sandbox-launcher.js";
2
+ import { resolveAuthProvider } from "../../auth/providers/index.js";
3
+ import { buildAuthRuntimeContext } from "../../auth/runtime.js";
4
+ import { loadRepoSettings } from "../../configs/settings/loader.js";
5
+ import { toErrorMessage } from "../../utils/errors.js";
6
+ import { isFileSystemError } from "../../utils/fs.js";
7
+ import { AuthProviderStageError, MissingAgentProviderError, UnknownAuthProviderError, } from "./errors.js";
8
+ import { getRunCommand } from "./launcher.js";
9
+ import { checkPlatformSupport } from "./sandbox.js";
8
10
  export async function verifyAgentProviders(agents) {
9
11
  if (agents.length === 0) {
10
- return;
12
+ return [];
11
13
  }
14
+ // Ensure platform and runtime dependencies are present.
15
+ checkPlatformSupport();
12
16
  await getRunCommand();
13
17
  const runtime = buildAuthRuntimeContext();
18
+ const issues = [];
14
19
  for (const agent of agents) {
15
- const provider = resolveAgentProvider(agent);
16
- try {
17
- await provider.verify({
20
+ const providerId = agent.provider?.trim();
21
+ if (!providerId) {
22
+ issues.push({ agentId: agent.id, message: "missing provider" });
23
+ continue;
24
+ }
25
+ const provider = resolveAuthProvider(providerId);
26
+ if (!provider) {
27
+ issues.push({
18
28
  agentId: agent.id,
19
- runtime,
29
+ message: `unknown auth provider "${providerId}"`,
20
30
  });
31
+ continue;
32
+ }
33
+ try {
34
+ await provider.verify({ agentId: agent.id, runtime });
21
35
  }
22
36
  catch (error) {
23
- throw new AuthProviderVerificationError(extractAuthProviderMessage(error));
37
+ pushIssueLines(issues, agent.id, extractAuthProviderMessage(error));
24
38
  }
25
39
  }
40
+ return issues;
26
41
  }
27
42
  export async function stageAgentAuth(options) {
28
- const { agent, agentRoot, runId } = options;
43
+ const { agent, agentRoot, runId, root } = options;
29
44
  const provider = resolveAgentProvider(agent);
30
45
  const runtime = options.runtime ?? buildAuthRuntimeContext();
46
+ const includeConfigToml = shouldIncludeCodexConfigToml(root);
31
47
  try {
32
48
  const stageResult = await provider.stage({
33
49
  agentId: agent.id,
34
50
  agentRoot,
35
51
  runtime,
36
- runId,
37
- root: options.root,
52
+ runId: runId ?? "runtime",
53
+ root,
54
+ includeConfigToml,
38
55
  });
39
56
  return {
40
57
  env: stageResult.env,
@@ -43,7 +60,6 @@ export async function stageAgentAuth(options) {
43
60
  sandboxPath: stageResult.sandboxPath,
44
61
  runtime,
45
62
  agentId: agent.id,
46
- runId,
47
63
  },
48
64
  };
49
65
  }
@@ -80,6 +96,10 @@ export async function teardownAuthContext(context) {
80
96
  }
81
97
  }
82
98
  const tornDownContexts = new WeakSet();
99
+ function shouldIncludeCodexConfigToml(root) {
100
+ const settings = loadRepoSettings({ root });
101
+ return settings.codex.globalConfigPolicy !== "ignore";
102
+ }
83
103
  function isIgnorableTeardownError(error) {
84
104
  if (!isFileSystemError(error)) {
85
105
  return false;
@@ -98,11 +118,21 @@ function resolveAgentProvider(agent) {
98
118
  return provider;
99
119
  }
100
120
  function extractAuthProviderMessage(error) {
101
- if (error instanceof RunCommandError) {
102
- return error.messageForDisplay();
103
- }
104
121
  if (error instanceof Error && error.message) {
105
122
  return error.message;
106
123
  }
107
124
  return toErrorMessage(error);
108
125
  }
126
+ function pushIssueLines(issues, agentId, message) {
127
+ const lines = message
128
+ .split(/\r?\n/u)
129
+ .map((line) => line.trim())
130
+ .filter((line) => line.length > 0);
131
+ if (lines.length === 0) {
132
+ issues.push({ agentId, message: "unknown error" });
133
+ return;
134
+ }
135
+ for (const line of lines) {
136
+ issues.push({ agentId, message: line });
137
+ }
138
+ }
@@ -0,0 +1,5 @@
1
+ import type { AgentRuntimeChatResult } from "./types.js";
2
+ export declare function captureAgentChatArtifacts(options: {
3
+ providerId: string | undefined;
4
+ agentRoot: string;
5
+ }): Promise<AgentRuntimeChatResult>;
@@ -0,0 +1,28 @@
1
+ import { preserveProviderChatTranscripts } from "../../workspace/chat/artifacts.js";
2
+ export async function captureAgentChatArtifacts(options) {
3
+ const providerId = options.providerId ?? "";
4
+ if (!providerId) {
5
+ return { captured: false };
6
+ }
7
+ const result = await preserveProviderChatTranscripts({
8
+ providerId,
9
+ agentRoot: options.agentRoot,
10
+ });
11
+ const format = result.format;
12
+ if ((result.status === "captured" || result.status === "already-exists") &&
13
+ format) {
14
+ return {
15
+ captured: true,
16
+ format,
17
+ artifactPath: result.artifactPath,
18
+ sourceCount: result.sourceCount,
19
+ };
20
+ }
21
+ if (result.status === "not-found") {
22
+ return { captured: false };
23
+ }
24
+ return {
25
+ captured: false,
26
+ error: result.status === "error" ? result.error : undefined,
27
+ };
28
+ }
@@ -0,0 +1,27 @@
1
+ import { DisplayableError } from "../../utils/errors.js";
2
+ export type AgentRuntimeErrorKind = "auth" | "manifest" | "sandbox" | "process";
3
+ export declare class AgentRuntimeError extends DisplayableError {
4
+ readonly kind: AgentRuntimeErrorKind;
5
+ constructor(kind: AgentRuntimeErrorKind, message: string);
6
+ }
7
+ export declare class MissingAgentProviderError extends AgentRuntimeError {
8
+ constructor(agentId: string);
9
+ }
10
+ export declare class UnknownAuthProviderError extends AgentRuntimeError {
11
+ constructor(providerId: string);
12
+ }
13
+ export declare class AuthProviderVerificationError extends AgentRuntimeError {
14
+ constructor(detail: string);
15
+ }
16
+ export declare class AuthProviderStageError extends AgentRuntimeError {
17
+ constructor(detail: string);
18
+ }
19
+ export declare class AgentRuntimeManifestError extends AgentRuntimeError {
20
+ constructor(detail: string);
21
+ }
22
+ export declare class AgentRuntimeSandboxError extends AgentRuntimeError {
23
+ constructor(detail: string);
24
+ }
25
+ export declare class AgentRuntimeProcessError extends AgentRuntimeError {
26
+ constructor(detail: string);
27
+ }
@@ -0,0 +1,51 @@
1
+ import { DisplayableError } from "../../utils/errors.js";
2
+ export class AgentRuntimeError extends DisplayableError {
3
+ kind;
4
+ constructor(kind, message) {
5
+ super(message);
6
+ this.kind = kind;
7
+ this.name = "AgentRuntimeError";
8
+ }
9
+ }
10
+ export class MissingAgentProviderError extends AgentRuntimeError {
11
+ constructor(agentId) {
12
+ super("auth", `Agent "${agentId}" missing provider.`);
13
+ this.name = "MissingAgentProviderError";
14
+ }
15
+ }
16
+ export class UnknownAuthProviderError extends AgentRuntimeError {
17
+ constructor(providerId) {
18
+ super("auth", `Unknown auth provider "${providerId}".`);
19
+ this.name = "UnknownAuthProviderError";
20
+ }
21
+ }
22
+ export class AuthProviderVerificationError extends AgentRuntimeError {
23
+ constructor(detail) {
24
+ super("auth", detail);
25
+ this.name = "AuthProviderVerificationError";
26
+ }
27
+ }
28
+ export class AuthProviderStageError extends AgentRuntimeError {
29
+ constructor(detail) {
30
+ super("auth", detail);
31
+ this.name = "AuthProviderStageError";
32
+ }
33
+ }
34
+ export class AgentRuntimeManifestError extends AgentRuntimeError {
35
+ constructor(detail) {
36
+ super("manifest", detail);
37
+ this.name = "AgentRuntimeManifestError";
38
+ }
39
+ }
40
+ export class AgentRuntimeSandboxError extends AgentRuntimeError {
41
+ constructor(detail) {
42
+ super("sandbox", detail);
43
+ this.name = "AgentRuntimeSandboxError";
44
+ }
45
+ }
46
+ export class AgentRuntimeProcessError extends AgentRuntimeError {
47
+ constructor(detail) {
48
+ super("process", detail);
49
+ this.name = "AgentRuntimeProcessError";
50
+ }
51
+ }
@@ -1,5 +1,4 @@
1
1
  export interface AgentFailureDetectionInput {
2
- agentId: string;
3
2
  provider: string;
4
3
  stdoutPath: string;
5
4
  stderrPath: string;
@@ -0,0 +1,106 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { CLAUDE_OAUTH_RELOGIN_HINT, CLAUDE_PROVIDER_ID, } from "../../auth/providers/claude/constants.js";
3
+ const GEMINI_PROVIDER_ID = "gemini";
4
+ const CODEX_PROVIDER_ID = "codex";
5
+ const CLAUDE_FAILURE_PATTERNS = [
6
+ /Please run \/login/i,
7
+ /OAuth token has expired/i,
8
+ ];
9
+ const JSON_MESSAGE_PATTERN = /"message"\s*:\s*"((?:\\.|[^"\\])*)"/;
10
+ export async function detectAgentProcessFailureDetail(input) {
11
+ if (input.provider !== CLAUDE_PROVIDER_ID &&
12
+ input.provider !== GEMINI_PROVIDER_ID &&
13
+ input.provider !== CODEX_PROVIDER_ID) {
14
+ return undefined;
15
+ }
16
+ const combinedLogs = await readCombinedLogs(input.stdoutPath, input.stderrPath);
17
+ if (!combinedLogs) {
18
+ return undefined;
19
+ }
20
+ if (input.provider === CLAUDE_PROVIDER_ID) {
21
+ if (CLAUDE_FAILURE_PATTERNS.some((pattern) => pattern.test(combinedLogs))) {
22
+ return CLAUDE_OAUTH_RELOGIN_HINT;
23
+ }
24
+ return undefined;
25
+ }
26
+ if (input.provider === GEMINI_PROVIDER_ID) {
27
+ return (extractFirstJsonMessage(combinedLogs) ??
28
+ extractGeminiFallbackLine(combinedLogs));
29
+ }
30
+ if (input.provider === CODEX_PROVIDER_ID) {
31
+ return (extractFirstJsonMessage(combinedLogs) ??
32
+ findFirstMatchingLine(combinedLogs, [
33
+ /invalid_request_error/,
34
+ /unsupported_value/,
35
+ /thread .* panicked/i,
36
+ ]));
37
+ }
38
+ return undefined;
39
+ }
40
+ function extractFirstJsonMessage(text) {
41
+ const match = JSON_MESSAGE_PATTERN.exec(text);
42
+ if (!match) {
43
+ return undefined;
44
+ }
45
+ const raw = match[1]?.trim();
46
+ if (!raw) {
47
+ return undefined;
48
+ }
49
+ try {
50
+ const parsed = JSON.parse(`"${raw}"`);
51
+ return isMeaningfulMessage(parsed) ? parsed : undefined;
52
+ }
53
+ catch {
54
+ return isMeaningfulMessage(raw) ? raw : undefined;
55
+ }
56
+ }
57
+ function isMeaningfulMessage(message) {
58
+ const normalized = message.trim();
59
+ if (!normalized) {
60
+ return false;
61
+ }
62
+ if (normalized === "[object Object]") {
63
+ return false;
64
+ }
65
+ return true;
66
+ }
67
+ function findFirstMatchingLine(text, matchers) {
68
+ const lines = text.split(/\r?\n/);
69
+ for (const line of lines) {
70
+ const trimmed = line.trim();
71
+ if (!trimmed)
72
+ continue;
73
+ for (const matcher of matchers) {
74
+ const match = matcher.exec(trimmed);
75
+ if (match) {
76
+ return trimmed.slice(match.index).trim();
77
+ }
78
+ }
79
+ }
80
+ return undefined;
81
+ }
82
+ function extractGeminiFallbackLine(text) {
83
+ return findFirstMatchingLine(text, [
84
+ /TerminalQuotaError:/,
85
+ /PERMISSION_DENIED/,
86
+ /RESOURCE_EXHAUSTED/,
87
+ /No capacity available/i,
88
+ /You have exhausted your capacity/i,
89
+ /exhausted your capacity/i,
90
+ ]);
91
+ }
92
+ async function readCombinedLogs(stdoutPath, stderrPath) {
93
+ const [stdout, stderr] = await Promise.all([
94
+ safeRead(stdoutPath),
95
+ safeRead(stderrPath),
96
+ ]);
97
+ return `${stdout}\n${stderr}`;
98
+ }
99
+ async function safeRead(path) {
100
+ try {
101
+ return await readFile(path, "utf8");
102
+ }
103
+ catch {
104
+ return "";
105
+ }
106
+ }
@@ -0,0 +1,2 @@
1
+ import type { AgentRuntimeHarnessInput, AgentRuntimeHarnessResult } from "./types.js";
2
+ export declare function runSandboxedAgent(input: AgentRuntimeHarnessInput): Promise<AgentRuntimeHarnessResult>;
@@ -0,0 +1,125 @@
1
+ import { randomBytes } from "node:crypto";
2
+ import { mkdir, rm, writeFile } from "node:fs/promises";
3
+ import { dirname, join } from "node:path";
4
+ import { loadSandboxProviderConfig } from "../../configs/sandbox/loader.js";
5
+ import { toErrorMessage } from "../../utils/errors.js";
6
+ import { stageAgentAuth } from "./auth.js";
7
+ import { captureAgentChatArtifacts } from "./chat.js";
8
+ import { AgentRuntimeError, AgentRuntimeProcessError, AgentRuntimeSandboxError, } from "./errors.js";
9
+ import { configureSandboxSettings, runAgentProcess } from "./launcher.js";
10
+ import { writeAgentManifest } from "./manifest.js";
11
+ import { registerStagedAuthContext, teardownRegisteredAuthContext, } from "./registry.js";
12
+ import { DEFAULT_DENIAL_BACKOFF } from "./sandbox.js";
13
+ const PROMPT_TMP_PREFIX = "prompt.ephemeral";
14
+ export async function runSandboxedAgent(input) {
15
+ const { root, agent, prompt, environment, paths, sessionId, sandboxProviderId, sandboxPolicyOverrides, extraWriteProtectedPaths, extraReadProtectedPaths, captureChat = true, onWatchdogTrigger, } = input;
16
+ const providerId = sandboxProviderId ?? agent.provider ?? "";
17
+ if (!providerId) {
18
+ throw new AgentRuntimeSandboxError(`Agent "${agent.id}" missing provider.`);
19
+ }
20
+ await mkdir(dirname(paths.runtimeManifestPath), { recursive: true });
21
+ await mkdir(dirname(paths.sandboxSettingsPath), { recursive: true });
22
+ await mkdir(dirname(paths.stdoutPath), { recursive: true });
23
+ await mkdir(dirname(paths.stderrPath), { recursive: true });
24
+ const promptPath = await writeEphemeralPrompt({
25
+ runtimePath: paths.runtimePath,
26
+ prompt,
27
+ });
28
+ let authContext;
29
+ try {
30
+ const staged = await stageAgentAuth({
31
+ agent,
32
+ agentRoot: paths.agentRoot,
33
+ root,
34
+ runId: sessionId,
35
+ });
36
+ authContext = staged.context;
37
+ if (sessionId) {
38
+ registerStagedAuthContext(sessionId, authContext);
39
+ }
40
+ const manifestEnv = await writeAgentManifest({
41
+ agent,
42
+ runtimeManifestPath: paths.runtimeManifestPath,
43
+ promptPath,
44
+ workspacePath: paths.workspacePath,
45
+ env: staged.env,
46
+ environment,
47
+ });
48
+ const denialBackoff = resolveDenialBackoff({
49
+ root,
50
+ providerId,
51
+ override: input.denialBackoff,
52
+ });
53
+ const { sandboxSettings } = await configureSandboxSettings({
54
+ sandboxHomePath: paths.sandboxHomePath,
55
+ workspacePath: paths.workspacePath,
56
+ providerId,
57
+ root,
58
+ sandboxSettingsPath: paths.sandboxSettingsPath,
59
+ runtimePath: paths.runtimePath,
60
+ artifactsPath: paths.artifactsPath,
61
+ policyOverrides: sandboxPolicyOverrides,
62
+ extraWriteProtectedPaths,
63
+ extraReadProtectedPaths,
64
+ });
65
+ const processResult = await runAgentProcess({
66
+ runtimeManifestPath: paths.runtimeManifestPath,
67
+ agentRoot: paths.agentRoot,
68
+ stdoutPath: paths.stdoutPath,
69
+ stderrPath: paths.stderrPath,
70
+ sandboxSettingsPath: paths.sandboxSettingsPath,
71
+ providerId,
72
+ denialBackoff,
73
+ onWatchdogTrigger,
74
+ });
75
+ const chat = captureChat
76
+ ? await captureAgentChatArtifacts({
77
+ providerId: agent.provider,
78
+ agentRoot: paths.agentRoot,
79
+ })
80
+ : undefined;
81
+ return {
82
+ exitCode: processResult.exitCode,
83
+ errorMessage: processResult.errorMessage,
84
+ signal: processResult.signal,
85
+ watchdog: processResult.watchdog,
86
+ failFast: processResult.failFast,
87
+ sandboxSettings,
88
+ manifestEnv,
89
+ ...(chat ? { chat } : {}),
90
+ };
91
+ }
92
+ catch (error) {
93
+ if (error instanceof AgentRuntimeError) {
94
+ throw error;
95
+ }
96
+ throw new AgentRuntimeProcessError(error instanceof Error ? error.message : toErrorMessage(error));
97
+ }
98
+ finally {
99
+ await rm(promptPath, { force: true }).catch(() => { });
100
+ await teardownRegisteredAuthContext(sessionId ?? "runtime", authContext).catch(() => { });
101
+ }
102
+ }
103
+ async function writeEphemeralPrompt(options) {
104
+ const { runtimePath, prompt } = options;
105
+ const nonce = randomBytes(8).toString("hex");
106
+ const path = join(runtimePath, `${PROMPT_TMP_PREFIX}.${nonce}.txt`);
107
+ await mkdir(dirname(path), { recursive: true });
108
+ await writeFile(path, prompt, { encoding: "utf8" });
109
+ return path;
110
+ }
111
+ function resolveDenialBackoff(options) {
112
+ if (options.override) {
113
+ return options.override;
114
+ }
115
+ try {
116
+ const config = loadSandboxProviderConfig({
117
+ root: options.root,
118
+ providerId: options.providerId,
119
+ });
120
+ return config.denialBackoff;
121
+ }
122
+ catch {
123
+ return DEFAULT_DENIAL_BACKOFF;
124
+ }
125
+ }
@@ -1,5 +1,7 @@
1
- import type { WatchdogMetadata } from "../../../records/types.js";
2
- import type { AgentWorkspacePaths } from "../../../workspace/layout.js";
1
+ import type { DenialBackoffConfig } from "../../configs/sandbox/types.js";
2
+ import type { WatchdogMetadata } from "../../runs/records/types.js";
3
+ import { generateSandboxSettings, type SandboxFailFastInfo } from "./sandbox.js";
4
+ import type { SandboxPolicyOverrides } from "./types.js";
3
5
  import { type WatchdogTrigger } from "./watchdog.js";
4
6
  export interface AgentProcessOptions {
5
7
  runtimeManifestPath: string;
@@ -7,11 +9,12 @@ export interface AgentProcessOptions {
7
9
  stdoutPath: string;
8
10
  stderrPath: string;
9
11
  sandboxSettingsPath: string;
12
+ denialBackoff?: DenialBackoffConfig;
10
13
  resolveRunInvocation?: RunInvocationResolver;
11
14
  /** Provider ID for watchdog fatal pattern matching. */
12
15
  providerId?: string;
13
16
  /** Callback fired immediately when watchdog triggers, before process exits. */
14
- onWatchdogTrigger?: (trigger: WatchdogTrigger, reason: string) => void;
17
+ onWatchdogTrigger?: (trigger: WatchdogTrigger, reason: string, failFast?: SandboxFailFastInfo) => void;
15
18
  }
16
19
  export interface AgentProcessResult {
17
20
  exitCode: number;
@@ -19,6 +22,8 @@ export interface AgentProcessResult {
19
22
  signal?: NodeJS.Signals | null;
20
23
  /** Watchdog metadata showing enforced limits and trigger reason. */
21
24
  watchdog?: WatchdogMetadata;
25
+ /** Sandbox fail-fast metadata when repeated denials trigger an abort. */
26
+ failFast?: SandboxFailFastInfo;
22
27
  }
23
28
  export interface RunInvocationContext {
24
29
  agentRoot: string;
@@ -32,11 +37,21 @@ export interface RunInvocation {
32
37
  }
33
38
  export type RunInvocationResolver = (context: RunInvocationContext) => Promise<RunInvocation> | RunInvocation;
34
39
  export interface SandboxSettingsInput {
35
- workspacePaths: AgentWorkspacePaths;
40
+ sandboxHomePath: string;
41
+ workspacePath: string;
36
42
  providerId: string;
37
43
  root: string;
44
+ repoRootPath?: string;
45
+ sandboxSettingsPath: string;
46
+ runtimePath: string;
47
+ artifactsPath: string;
48
+ policyOverrides?: SandboxPolicyOverrides;
49
+ extraWriteProtectedPaths?: readonly string[];
50
+ extraReadProtectedPaths?: readonly string[];
38
51
  }
39
- export declare function configureSandboxSettings(input: SandboxSettingsInput): Promise<void>;
52
+ export declare function configureSandboxSettings(input: SandboxSettingsInput): Promise<{
53
+ sandboxSettings: ReturnType<typeof generateSandboxSettings>;
54
+ }>;
40
55
  export declare function getRunCommand(): Promise<string>;
41
56
  export declare function runAgentProcess(options: AgentProcessOptions): Promise<AgentProcessResult>;
42
57
  export declare function stageManifestForSandbox(options: {