skimpyclaw 0.3.6 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/README.md +14 -6
  2. package/dist/__tests__/api.test.js +1 -0
  3. package/dist/__tests__/channels.test.js +1 -1
  4. package/dist/__tests__/code-agents-orchestrator.test.js +74 -7
  5. package/dist/__tests__/code-agents-preflight.test.d.ts +1 -0
  6. package/dist/__tests__/code-agents-preflight.test.js +88 -0
  7. package/dist/__tests__/code-agents-sandbox.test.d.ts +1 -0
  8. package/dist/__tests__/code-agents-sandbox.test.js +163 -0
  9. package/dist/__tests__/code-agents-utils.test.js +12 -1
  10. package/dist/__tests__/context-manager.test.d.ts +1 -0
  11. package/dist/__tests__/context-manager.test.js +236 -0
  12. package/dist/__tests__/package-manager-detection.test.js +5 -5
  13. package/dist/__tests__/setup.test.js +7 -5
  14. package/dist/__tests__/skills.test.js +2 -2
  15. package/dist/__tests__/structured-context.test.d.ts +1 -0
  16. package/dist/__tests__/structured-context.test.js +100 -0
  17. package/dist/__tests__/tools.test.js +65 -3
  18. package/dist/agent.js +4 -5
  19. package/dist/api.js +10 -58
  20. package/dist/audit.js +5 -51
  21. package/dist/channels/telegram/handlers.js +2 -60
  22. package/dist/channels/telegram/index.js +0 -7
  23. package/dist/channels.js +1 -1
  24. package/dist/cli.js +151 -16
  25. package/dist/code-agents/executor.d.ts +9 -4
  26. package/dist/code-agents/executor.js +187 -13
  27. package/dist/code-agents/index.d.ts +1 -1
  28. package/dist/code-agents/index.js +30 -22
  29. package/dist/code-agents/orchestrator.d.ts +8 -2
  30. package/dist/code-agents/orchestrator.js +318 -27
  31. package/dist/code-agents/structured-context.d.ts +7 -0
  32. package/dist/code-agents/structured-context.js +54 -0
  33. package/dist/code-agents/types.d.ts +2 -0
  34. package/dist/code-agents/utils.d.ts +4 -0
  35. package/dist/code-agents/utils.js +38 -2
  36. package/dist/code-agents/worktree.d.ts +40 -0
  37. package/dist/code-agents/worktree.js +215 -0
  38. package/dist/config.d.ts +1 -0
  39. package/dist/config.js +5 -3
  40. package/dist/cron.js +18 -4
  41. package/dist/dashboard/assets/{index-CkonC7Cd.js → index-BoTHPby4.js} +20 -20
  42. package/dist/dashboard/assets/{index-EAg6lqF5.css → index-D4mufvBg.css} +1 -1
  43. package/dist/dashboard/index.html +2 -2
  44. package/dist/discord.js +4 -40
  45. package/dist/exec-approval.js +1 -1
  46. package/dist/file-lock.js +1 -1
  47. package/dist/gateway.js +3 -10
  48. package/dist/providers/anthropic.js +9 -5
  49. package/dist/providers/codex.js +10 -6
  50. package/dist/providers/context-manager.d.ts +22 -0
  51. package/dist/providers/context-manager.js +100 -0
  52. package/dist/providers/openai.js +9 -5
  53. package/dist/providers/types.d.ts +1 -0
  54. package/dist/security.js +9 -0
  55. package/dist/setup.js +122 -27
  56. package/dist/skills.js +9 -2
  57. package/dist/subagent.js +33 -2
  58. package/dist/tools/bash-tool.js +8 -0
  59. package/dist/tools/browser-tool.js +2 -1
  60. package/dist/tools/definitions.d.ts +0 -27
  61. package/dist/tools/definitions.js +0 -18
  62. package/dist/tools/execute-context.d.ts +4 -4
  63. package/dist/tools/file-tools.d.ts +1 -1
  64. package/dist/tools/file-tools.js +1 -1
  65. package/dist/tools.d.ts +5 -5
  66. package/dist/tools.js +87 -98
  67. package/dist/types.d.ts +14 -22
  68. package/dist/usage.d.ts +1 -0
  69. package/dist/usage.js +30 -46
  70. package/dist/utils.d.ts +18 -0
  71. package/dist/utils.js +71 -0
  72. package/dist/voice.js +9 -7
  73. package/package.json +26 -21
package/README.md CHANGED
@@ -21,7 +21,7 @@ Use SkimpyClaw if you live in Telegram or Discord, want to read and own every li
21
21
  - **Chat interface** — Telegram and Discord bots with persistent conversation history
22
22
  - **Tool-enabled agent** — file read/write, bash, browser (Playwright), MCP tools via mcporter
23
23
  - **Multi-modal support** — voice messages (STT/TTS), image analysis
24
- - **Subagents** — model autonomously spawns coding/research subagents with retry + concurrency control
24
+ - **Coding agents** — delegate to Claude Code / Codex CLI; `code_with_team` for parallel multi-agent work
25
25
  - **Code agents** — delegate coding tasks to Claude Code, Codex, or Kimi CLI with `code_with_agent` and `code_with_team`
26
26
  - **Cron scheduler** — run agent prompts or shell scripts on a schedule
27
27
  - **Web dashboard** — Preact/Vite SPA with status, cron, audit log, memory, templates, config editor, skills, approvals
@@ -49,7 +49,7 @@ flowchart LR
49
49
 
50
50
  subgraph Core
51
51
  agent["Agent Runtime"]
52
- subagents["Subagent Pool"]
52
+ codeAgents["Coding Agents"]
53
53
  cron["Cron"]
54
54
  hb["Heartbeat"]
55
55
  audit["Audit Log"]
@@ -61,7 +61,7 @@ flowchart LR
61
61
  browser --> dash --> api --> agent
62
62
  cron --> agent
63
63
  hb --> agent
64
- agent --> subagents
64
+ agent --> codeAgents
65
65
  agent --> audit
66
66
  agent --> skills
67
67
  agent --> approvals
@@ -85,6 +85,16 @@ Onboarding validates your Telegram token, provider auth, and creates:
85
85
  - `~/.skimpyclaw/config.json`
86
86
  - `~/.skimpyclaw/agents/main/*.md` (from templates)
87
87
 
88
+ **Sandbox setup (optional — isolates Bash commands in a container):**
89
+
90
+ ```bash
91
+ skimpyclaw sandbox init # detect runtime, build image, update config
92
+ skimpyclaw sandbox doctor # verify everything works
93
+ skimpyclaw restart # pick up sandbox config
94
+ ```
95
+
96
+ Requires Apple Containers (macOS 26+) or Docker. See [docs/guide/sandbox.md](docs/guide/sandbox.md) for options.
97
+
88
98
  **Stop/Restart daemon (macOS):**
89
99
 
90
100
  ```bash
@@ -136,8 +146,7 @@ src/
136
146
  providers/ # Provider routing + provider implementations (anthropic/openai/codex)
137
147
  code-agents/ # Background coding-agent runtime (executor/parser/orchestrator/registry)
138
148
  channels/ # Channel adapters/utilities (telegram/discord)
139
- subagent.ts # Background task dispatch: retry, concurrency, disk registry
140
- file-lock.ts # In-memory file lock for concurrent subagent writes
149
+ file-lock.ts # In-memory file lock for concurrent writes
141
150
  audit.ts # Append-only audit log (trace/event model, JSONL storage)
142
151
  cron.ts # Job scheduling + execution + cron logging
143
152
  heartbeat.ts # Periodic health/attention checks
@@ -196,7 +205,6 @@ dist/ # Compiled output + built dashboard assets
196
205
  | [docs/architecture.md](docs/architecture.md) | Component diagram, runtime flow, startup sequence, source layout |
197
206
  | [docs/configuration.md](docs/configuration.md) | Full config reference, all sections with examples |
198
207
  | [docs/tools.md](docs/tools.md) | Built-in tools, browser tool, MCP integration, code agents |
199
- | [docs/subagents.md](docs/subagents.md) | Subagent types, concurrency, file locking, flow diagram |
200
208
  | [docs/dashboard.md](docs/dashboard.md) | Web dashboard, all HTTP endpoints + API routes |
201
209
  | [docs/coding-agents.md](docs/coding-agents.md) | Coding-agent execution model and CLI backends |
202
210
  | [docs/cli.md](docs/cli.md) | CLI commands, service management |
@@ -116,6 +116,7 @@ vi.mock('../config.js', () => ({
116
116
  getSessionsDir: () => SESSIONS_DIR,
117
117
  getLogsDir: () => LOGS_DIR,
118
118
  getAgentDir: (agentId) => join(TEST_ROOT, 'agents', agentId),
119
+ isValidAgentId: (agentId) => /^[a-zA-Z0-9_-]+$/.test(agentId),
119
120
  listMemoryFiles: (agentId) => {
120
121
  const { existsSync, readdirSync, statSync } = require('fs');
121
122
  const memDir = join(TEST_ROOT, 'agents', agentId, 'memory', 'logs');
@@ -17,7 +17,7 @@ const discordMock = vi.hoisted(() => ({
17
17
  sendDiscordProactiveVoice: vi.fn(async () => { }),
18
18
  getDiscordDefaultTarget: vi.fn(() => '999'),
19
19
  }));
20
- vi.mock('../telegram.js', () => telegramMock);
20
+ vi.mock('../channels/telegram/index.js', () => telegramMock);
21
21
  vi.mock('../discord.js', () => discordMock);
22
22
  import { getActiveChannelId, initActiveChannel, sendActiveChannelProactiveMessage, } from '../channels.js';
23
23
  function makeConfig(overrides = {}) {
@@ -33,7 +33,7 @@ vi.mock('../audit.js', () => ({
33
33
  addEvent: vi.fn(),
34
34
  endTrace: vi.fn(async () => { }),
35
35
  }));
36
- import { computeWaves, decomposeTask, synthesizeResults } from '../code-agents/orchestrator.js';
36
+ import { computeWaves, decomposeTask, synthesizeResults, gatherCodebaseContext } from '../code-agents/orchestrator.js';
37
37
  import { runAgentTurn } from '../agent.js';
38
38
  import { getCodeAgent, writeCodeAgentTask } from '../code-agents/registry.js';
39
39
  const mockRunAgentTurn = vi.mocked(runAgentTurn);
@@ -98,19 +98,20 @@ describe('decomposeTask', () => {
98
98
  });
99
99
  });
100
100
  describe('synthesizeResults', () => {
101
- it('truncates child output to 300 chars', async () => {
101
+ it('uses structured context (summary capped at 500 chars, not raw 1000)', async () => {
102
102
  const longOutput = 'x'.repeat(1000);
103
103
  mockRunAgentTurn.mockResolvedValueOnce('Synthesis complete');
104
104
  const config = { providers: {} };
105
105
  await synthesizeResults('original task', [
106
106
  { subtask: 'sub1', status: 'completed', output: longOutput },
107
107
  ], config);
108
- // The prompt sent to runAgentTurn should contain truncated output
109
108
  const call = mockRunAgentTurn.mock.calls[mockRunAgentTurn.mock.calls.length - 1];
110
109
  const prompt = call[1];
111
- // Output should be sliced to 300, not 1000
112
- expect(prompt).toContain('Output: ' + 'x'.repeat(300));
113
- expect(prompt).not.toContain('x'.repeat(301));
110
+ // Summary should be capped at 500 chars, not the full 1000
111
+ expect(prompt).toContain('x'.repeat(500));
112
+ expect(prompt).not.toContain('x'.repeat(501));
113
+ // Should use the structured format (Summary: prefix)
114
+ expect(prompt).toContain('Summary:');
114
115
  });
115
116
  });
116
117
  describe('orchestrator - cancellation after wave spawn', () => {
@@ -134,7 +135,7 @@ describe('orchestrator - spawn failure marks child as failed', () => {
134
135
  expect(src).toContain("const child = getCodeAgent(childId);");
135
136
  expect(src).toContain("if (child && child.status === 'running')");
136
137
  expect(src).toContain("status: 'failed'");
137
- expect(src).toContain("error: err instanceof Error ? err.message : String(err)");
138
+ expect(src).toContain("error: toErrorMessage(err)");
138
139
  });
139
140
  });
140
141
  describe('orchestrator - skip redundant parent writes', () => {
@@ -147,3 +148,69 @@ describe('orchestrator - skip redundant parent writes', () => {
147
148
  expect(src).toContain('lastLiveOutput = newLiveOutput');
148
149
  });
149
150
  });
151
+ describe('gatherCodebaseContext', () => {
152
+ it('returns a non-empty string for the project root', () => {
153
+ // Use this project's own root as the workdir
154
+ const { resolve } = require('path');
155
+ const projectRoot = resolve(__dirname, '..', '..');
156
+ const context = gatherCodebaseContext(projectRoot);
157
+ // Should contain at least scripts or source files
158
+ expect(context.length).toBeGreaterThan(0);
159
+ expect(context.length).toBeLessThanOrEqual(2000);
160
+ });
161
+ it('returns empty string for nonexistent directory', () => {
162
+ const context = gatherCodebaseContext('/tmp/nonexistent-dir-12345');
163
+ // Should not throw, just return empty or minimal context
164
+ expect(typeof context).toBe('string');
165
+ });
166
+ });
167
+ describe('decomposeTask with workdir', () => {
168
+ it('passes workdir context to the decomposition prompt', async () => {
169
+ mockRunAgentTurn.mockResolvedValueOnce('{"subtasks": [{"description": "sub1", "dependsOn": []}, {"description": "sub2", "dependsOn": []}]}');
170
+ const config = { providers: {} };
171
+ const result = await decomposeTask('test task', 2, config, '/tmp');
172
+ expect(result).toHaveLength(2);
173
+ // Check the prompt sent to the model includes the richer decomposition instructions
174
+ const call = mockRunAgentTurn.mock.calls[mockRunAgentTurn.mock.calls.length - 1];
175
+ const prompt = call[1];
176
+ expect(prompt).toContain('task decomposition expert');
177
+ expect(prompt).toContain('git worktree');
178
+ });
179
+ });
180
+ describe('synthesizeResults with workdir', () => {
181
+ it('includes git diff info when workdir is a git repo', async () => {
182
+ mockRunAgentTurn.mockResolvedValueOnce('Synthesis complete');
183
+ const { resolve } = require('path');
184
+ const projectRoot = resolve(__dirname, '..', '..');
185
+ const config = { providers: {} };
186
+ await synthesizeResults('original task', [
187
+ { subtask: 'sub1', status: 'completed', output: 'done' },
188
+ ], config, projectRoot);
189
+ const call = mockRunAgentTurn.mock.calls[mockRunAgentTurn.mock.calls.length - 1];
190
+ const prompt = call[1];
191
+ // Should include the success/failure counts
192
+ expect(prompt).toContain('1 succeeded, 0 failed');
193
+ });
194
+ });
195
+ describe('orchestrator - per-wave validation and retry', () => {
196
+ it('source includes per-wave validation logic', async () => {
197
+ const { readFileSync } = await vi.importActual('fs');
198
+ const src = readFileSync(new URL('../../src/code-agents/orchestrator.ts', import.meta.url).pathname.replace('/.worktrees/hardening-code-agents/src/__tests__/../../', '/.worktrees/hardening-code-agents/'), 'utf-8');
199
+ // Verify per-wave validation exists
200
+ expect(src).toContain('Per-wave validation: run build after each wave');
201
+ expect(src).toContain('wave_validation');
202
+ // Verify retry logic
203
+ expect(src).toContain('wave_retry_complete');
204
+ expect(src).toContain('retryPrompt');
205
+ });
206
+ });
207
+ describe('orchestrator - timeout budgeting', () => {
208
+ it('computes perChildTimeout based on wave count not team size', async () => {
209
+ const { readFileSync } = await vi.importActual('fs');
210
+ const src = readFileSync(new URL('../../src/code-agents/orchestrator.ts', import.meta.url).pathname.replace('/.worktrees/hardening-code-agents/src/__tests__/../../', '/.worktrees/hardening-code-agents/'), 'utf-8');
211
+ // Verify budget-aware timeout
212
+ expect(src).toContain('overheadMinutes');
213
+ expect(src).toContain('availableForChildren');
214
+ expect(src).toContain('Math.floor(availableForChildren / waves.length)');
215
+ });
216
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,88 @@
1
+ import { afterEach, describe, expect, it, vi } from 'vitest';
2
+ const PRECHECK_ERROR = 'Error: No supported coding CLI found on PATH. Install Codex CLI (`codex`), Claude Code CLI (`claude` or `claude-code`), or Kimi CLI (`kimi`).';
3
+ const toolConfig = {
4
+ enabled: true,
5
+ allowedPaths: [process.cwd()],
6
+ maxIterations: 5,
7
+ bashTimeout: 5000,
8
+ };
9
+ async function loadSubject(preflightError) {
10
+ vi.resetModules();
11
+ const runCodeAgentBackground = vi.fn().mockResolvedValue(undefined);
12
+ const runTeamOrchestrator = vi.fn().mockResolvedValue(undefined);
13
+ vi.doMock('../code-agents/utils.js', async () => {
14
+ const actual = await vi.importActual('../code-agents/utils.js');
15
+ return {
16
+ ...actual,
17
+ getCodingCliPreflightError: () => preflightError,
18
+ };
19
+ });
20
+ vi.doMock('../code-agents/executor.js', () => ({
21
+ runCodeAgentBackground,
22
+ runValidation: vi.fn(),
23
+ buildValidationCommand: vi.fn(() => 'pnpm build && pnpm test'),
24
+ }));
25
+ vi.doMock('../code-agents/orchestrator.js', () => ({
26
+ runTeamOrchestrator,
27
+ computeWaves: vi.fn(),
28
+ decomposeTask: vi.fn(),
29
+ synthesizeResults: vi.fn(),
30
+ gatherCodebaseContext: vi.fn(),
31
+ }));
32
+ vi.doMock('../code-agents/registry.js', () => ({
33
+ getNextCodeAgentId: vi.fn(() => 'ca_test_1'),
34
+ storeCodeAgentTask: vi.fn(),
35
+ writeCodeAgentTask: vi.fn(),
36
+ getActiveCodeAgents: vi.fn(() => []),
37
+ getRecentCodeAgents: vi.fn(() => []),
38
+ getAllCodeAgents: vi.fn(() => []),
39
+ getCodeAgent: vi.fn(() => null),
40
+ cancelCodeAgent: vi.fn(),
41
+ restoreCodeAgentTasks: vi.fn(),
42
+ getCodeAgentsDir: vi.fn(() => process.cwd()),
43
+ }));
44
+ const subject = await import('../code-agents/index.js');
45
+ return { ...subject, runCodeAgentBackground, runTeamOrchestrator };
46
+ }
47
+ afterEach(() => {
48
+ vi.doUnmock('../code-agents/utils.js');
49
+ vi.doUnmock('../code-agents/executor.js');
50
+ vi.doUnmock('../code-agents/orchestrator.js');
51
+ vi.doUnmock('../code-agents/registry.js');
52
+ vi.clearAllMocks();
53
+ vi.resetModules();
54
+ });
55
+ describe('coding CLI preflight guard', () => {
56
+ it('fails code_with_agent before spawning when no supported CLI is available', async () => {
57
+ const { executeCodeWithAgent, runCodeAgentBackground } = await loadSubject(PRECHECK_ERROR);
58
+ const result = await executeCodeWithAgent({ task: 'Fix bug', workdir: process.cwd() }, toolConfig, {
59
+ fullConfig: { codeAgents: { maxConcurrent: 99 } },
60
+ });
61
+ expect(result).toBe(PRECHECK_ERROR);
62
+ expect(runCodeAgentBackground).not.toHaveBeenCalled();
63
+ });
64
+ it('fails code_with_team before spawning when no supported CLI is available', async () => {
65
+ const { executeCodeWithTeam, runTeamOrchestrator } = await loadSubject(PRECHECK_ERROR);
66
+ const result = await executeCodeWithTeam({ task: 'Refactor auth', workdir: process.cwd() }, toolConfig, {
67
+ fullConfig: { codeAgents: { maxConcurrent: 99 } },
68
+ });
69
+ expect(result).toBe(PRECHECK_ERROR);
70
+ expect(runTeamOrchestrator).not.toHaveBeenCalled();
71
+ });
72
+ it('allows code_with_agent when at least one supported CLI exists', async () => {
73
+ const { executeCodeWithAgent, runCodeAgentBackground } = await loadSubject(null);
74
+ const result = await executeCodeWithAgent({ task: 'Fix bug', workdir: process.cwd() }, toolConfig, {
75
+ fullConfig: { codeAgents: { maxConcurrent: 99 } },
76
+ });
77
+ expect(result).toContain('Started coding agent');
78
+ expect(runCodeAgentBackground).toHaveBeenCalledTimes(1);
79
+ });
80
+ it('allows code_with_team when at least one supported CLI exists', async () => {
81
+ const { executeCodeWithTeam, runTeamOrchestrator } = await loadSubject(null);
82
+ const result = await executeCodeWithTeam({ task: 'Refactor auth', workdir: process.cwd() }, toolConfig, {
83
+ fullConfig: { codeAgents: { maxConcurrent: 99 } },
84
+ });
85
+ expect(result).toContain('Started coding team');
86
+ expect(runTeamOrchestrator).toHaveBeenCalledTimes(1);
87
+ });
88
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,163 @@
1
+ import { describe, expect, it, vi, beforeEach } from 'vitest';
2
+ // Mock heavy dependencies before importing
3
+ vi.mock('../code-agents/registry.js', () => {
4
+ const tasks = new Map();
5
+ return {
6
+ getCodeAgentsDir: () => '/tmp/test-code-agents',
7
+ ensureCodeAgentsDir: vi.fn(),
8
+ writeCodeAgentTask: vi.fn((task) => tasks.set(task.id, { ...task })),
9
+ storeCodeAgentTask: vi.fn((task) => tasks.set(task.id, { ...task })),
10
+ getCodeAgent: vi.fn((id) => tasks.get(id) || null),
11
+ setCodeAgentCanceller: vi.fn(),
12
+ deleteCodeAgentCanceller: vi.fn(),
13
+ getNextCodeAgentId: vi.fn(() => 'ca-test-1'),
14
+ getActiveCodeAgents: vi.fn(() => []),
15
+ getRecentCodeAgents: vi.fn(() => []),
16
+ _tasks: tasks,
17
+ };
18
+ });
19
+ vi.mock('../code-agents/parser.js', () => ({
20
+ parseStreamJsonForLive: vi.fn((s) => s.slice(0, 200)),
21
+ parseClaudeOutput: vi.fn((s) => ({ text: s, totalCost: 0, inputTokens: 0, outputTokens: 0 })),
22
+ parseCodexOutput: vi.fn((s) => s),
23
+ }));
24
+ vi.mock('../code-agents/utils.js', () => ({
25
+ buildCodeAgentArgs: vi.fn(() => ({ cmd: 'echo', args: ['hello'] })),
26
+ notifyCodeAgentResult: vi.fn(async () => { }),
27
+ resolveModelAlias: vi.fn((m) => m || 'claude'),
28
+ resolveSelectedCodeAgent: vi.fn((_req, def) => def || 'claude'),
29
+ resolveWorkdir: vi.fn((_raw, _proj, root) => root),
30
+ }));
31
+ vi.mock('../audit.js', () => ({
32
+ startTrace: vi.fn(() => 'trace-1'),
33
+ addEvent: vi.fn(),
34
+ endTrace: vi.fn(async () => { }),
35
+ }));
36
+ vi.mock('../usage.js', () => ({
37
+ buildUsageRecord: vi.fn(() => ({})),
38
+ recordUsage: vi.fn(),
39
+ }));
40
+ vi.mock('fs', async () => {
41
+ const actual = await vi.importActual('fs');
42
+ return {
43
+ ...actual,
44
+ createWriteStream: vi.fn(() => ({
45
+ write: vi.fn(),
46
+ end: vi.fn(),
47
+ })),
48
+ existsSync: vi.fn(() => false),
49
+ };
50
+ });
51
+ vi.mock('../sandbox/index.js', () => ({
52
+ ensureContainer: vi.fn(async () => 'sandbox-code-test'),
53
+ SANDBOX_DEFAULTS: { image: 'skimpyclaw-sandbox:latest', runtime: 'docker' },
54
+ getRuntime: vi.fn(() => 'docker'),
55
+ execInContainer: vi.fn(async () => ({ exitCode: 0, stdout: 'PASS', stderr: '' })),
56
+ }));
57
+ describe('buildContainerEnvArgs', () => {
58
+ it('returns -e flags for allowlisted env vars that exist', async () => {
59
+ const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
60
+ const env = {
61
+ ANTHROPIC_API_KEY: 'sk-test-123',
62
+ GH_TOKEN: 'ghp_abc',
63
+ HOME: '/home/sandbox',
64
+ SOME_OTHER_VAR: 'should-not-appear',
65
+ PATH: '/usr/bin:/usr/local/bin',
66
+ };
67
+ const result = buildContainerEnvArgs(env);
68
+ expect(result).toContain('-e');
69
+ expect(result).toContain('ANTHROPIC_API_KEY=sk-test-123');
70
+ expect(result).toContain('GH_TOKEN=ghp_abc');
71
+ expect(result).toContain('HOME=/home/sandbox');
72
+ expect(result).toContain('PATH=/usr/bin:/usr/local/bin');
73
+ // Non-allowlisted vars should not appear
74
+ expect(result.join(' ')).not.toContain('SOME_OTHER_VAR');
75
+ });
76
+ it('excludes empty and undefined values', async () => {
77
+ const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
78
+ const env = {
79
+ ANTHROPIC_API_KEY: '',
80
+ GH_TOKEN: undefined,
81
+ GITHUB_TOKEN: 'ghp_123',
82
+ };
83
+ const result = buildContainerEnvArgs(env);
84
+ expect(result).toContain('GITHUB_TOKEN=ghp_123');
85
+ // No git signing key passed, so no GIT_CONFIG overrides
86
+ expect(result.join(' ')).not.toContain('GIT_CONFIG');
87
+ });
88
+ it('includes git signing config overrides when gitSigningKey is provided', async () => {
89
+ const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
90
+ const result = buildContainerEnvArgs({ ANTHROPIC_API_KEY: 'sk-test' }, { gitSigningKey: '/home/sandbox/.ssh/git_signing_ed25519' });
91
+ expect(result).toContain('GIT_CONFIG_COUNT=1');
92
+ expect(result).toContain('GIT_CONFIG_KEY_0=user.signingkey');
93
+ expect(result).toContain('GIT_CONFIG_VALUE_0=/home/sandbox/.ssh/git_signing_ed25519');
94
+ });
95
+ it('omits git signing config when gitSigningKey is not provided', async () => {
96
+ const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
97
+ const result = buildContainerEnvArgs({ FOO: 'bar', BAZ: 'qux' });
98
+ expect(result).not.toContain('FOO=bar');
99
+ expect(result.join(' ')).not.toContain('GIT_CONFIG');
100
+ expect(result).toEqual([]);
101
+ });
102
+ });
103
+ describe('runValidation with sandbox', () => {
104
+ beforeEach(() => {
105
+ vi.clearAllMocks();
106
+ });
107
+ it('uses execInContainer when sandboxContainer is provided', async () => {
108
+ const { execInContainer } = await import('../sandbox/index.js');
109
+ const { runValidation } = await import('../code-agents/executor.js');
110
+ const result = await runValidation('/workspace', 'sandbox-test');
111
+ expect(execInContainer).toHaveBeenCalledWith('sandbox-test', expect.any(Array), expect.objectContaining({ timeout: expect.any(Number) }));
112
+ expect(result.passed).toBe(true);
113
+ expect(result.output).toBe('PASS');
114
+ });
115
+ it('returns failure when execInContainer returns non-zero exit', async () => {
116
+ const { execInContainer } = await import('../sandbox/index.js');
117
+ vi.mocked(execInContainer).mockResolvedValueOnce({
118
+ exitCode: 1,
119
+ stdout: 'Error: test failed',
120
+ stderr: 'FAIL',
121
+ });
122
+ const { runValidation } = await import('../code-agents/executor.js');
123
+ const result = await runValidation('/workspace', 'sandbox-fail');
124
+ expect(result.passed).toBe(false);
125
+ expect(result.output).toContain('VALIDATION FAILED');
126
+ });
127
+ it('returns failure when execInContainer throws', async () => {
128
+ const { execInContainer } = await import('../sandbox/index.js');
129
+ vi.mocked(execInContainer).mockRejectedValueOnce(new Error('container not found'));
130
+ const { runValidation } = await import('../code-agents/executor.js');
131
+ const result = await runValidation('/workspace', 'sandbox-crash');
132
+ expect(result.passed).toBe(false);
133
+ expect(result.output).toContain('sandbox exec error');
134
+ expect(result.output).toContain('container not found');
135
+ });
136
+ });
137
+ describe('sandbox config resolution in executeCodeWithAgent', () => {
138
+ it('resolves sandboxConfig from context.sandboxConfig', async () => {
139
+ // Test the resolution logic directly — the config should be picked up
140
+ // from context.sandboxConfig or context.fullConfig.sandbox
141
+ const contextWithSandbox = {
142
+ sandboxConfig: { enabled: true, image: 'test-image' },
143
+ fullConfig: { sandbox: { enabled: false } },
144
+ };
145
+ // sandboxConfig takes precedence over fullConfig.sandbox
146
+ const resolved = contextWithSandbox.sandboxConfig ?? contextWithSandbox.fullConfig?.sandbox;
147
+ expect(resolved).toEqual({ enabled: true, image: 'test-image' });
148
+ });
149
+ it('falls back to fullConfig.sandbox when context.sandboxConfig is absent', () => {
150
+ const contextFallback = {
151
+ fullConfig: { sandbox: { enabled: true, image: 'fallback-image' } },
152
+ };
153
+ const resolved = contextFallback.sandboxConfig ?? contextFallback.fullConfig?.sandbox;
154
+ expect(resolved).toEqual({ enabled: true, image: 'fallback-image' });
155
+ });
156
+ it('resolves to undefined when no sandbox config present', () => {
157
+ const contextNone = {
158
+ fullConfig: {},
159
+ };
160
+ const resolved = contextNone.sandboxConfig ?? contextNone.fullConfig?.sandbox;
161
+ expect(resolved).toBeUndefined();
162
+ });
163
+ });
@@ -1,5 +1,5 @@
1
1
  import { describe, expect, it } from 'vitest';
2
- import { normalizeCodeAgent, resolveSelectedCodeAgent } from '../code-agents/utils.js';
2
+ import { normalizeCodeAgent, resolveSelectedCodeAgent, getAvailableCodingCliTools, getCodingCliPreflightError, } from '../code-agents/utils.js';
3
3
  describe('normalizeCodeAgent', () => {
4
4
  it('accepts strict ids', () => {
5
5
  expect(normalizeCodeAgent('claude')).toBe('claude');
@@ -39,3 +39,14 @@ describe('resolveSelectedCodeAgent', () => {
39
39
  expect(resolveSelectedCodeAgent('claude', 'claude', 'gpt-4.1')).toBe('claude');
40
40
  });
41
41
  });
42
+ describe('coding CLI preflight', () => {
43
+ it('returns a clear error when no supported CLI is found on PATH', () => {
44
+ expect(getAvailableCodingCliTools(() => false)).toEqual([]);
45
+ expect(getCodingCliPreflightError(() => false)).toBe('Error: No supported coding CLI found on PATH. Install Codex CLI (`codex`), Claude Code CLI (`claude` or `claude-code`), or Kimi CLI (`kimi`).');
46
+ });
47
+ it('accepts claude-code binary as claude support', () => {
48
+ const hasCommand = (name) => name === 'claude-code';
49
+ expect(getAvailableCodingCliTools(hasCommand)).toEqual(['claude']);
50
+ expect(getCodingCliPreflightError(hasCommand)).toBeNull();
51
+ });
52
+ });
@@ -0,0 +1 @@
1
+ export {};