skimpyclaw 0.3.5 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -6
- package/dist/__tests__/api.test.js +1 -19
- package/dist/__tests__/channels.test.js +1 -1
- package/dist/__tests__/code-agents-orchestrator.test.js +74 -7
- package/dist/__tests__/code-agents-sandbox.test.d.ts +1 -0
- package/dist/__tests__/code-agents-sandbox.test.js +163 -0
- package/dist/__tests__/context-manager.test.d.ts +1 -0
- package/dist/__tests__/context-manager.test.js +236 -0
- package/dist/__tests__/package-manager-detection.test.js +5 -5
- package/dist/__tests__/setup.test.js +10 -7
- package/dist/__tests__/skills.test.js +2 -2
- package/dist/__tests__/structured-context.test.d.ts +1 -0
- package/dist/__tests__/structured-context.test.js +100 -0
- package/dist/__tests__/tools.test.js +65 -3
- package/dist/agent.js +4 -5
- package/dist/api.js +10 -85
- package/dist/audit.js +5 -51
- package/dist/channels/telegram/handlers.js +2 -60
- package/dist/channels/telegram/index.js +0 -7
- package/dist/channels.js +1 -1
- package/dist/cli.js +186 -17
- package/dist/code-agents/executor.d.ts +9 -4
- package/dist/code-agents/executor.js +187 -13
- package/dist/code-agents/index.d.ts +1 -1
- package/dist/code-agents/index.js +23 -21
- package/dist/code-agents/orchestrator.d.ts +8 -2
- package/dist/code-agents/orchestrator.js +297 -27
- package/dist/code-agents/structured-context.d.ts +7 -0
- package/dist/code-agents/structured-context.js +54 -0
- package/dist/code-agents/types.d.ts +2 -0
- package/dist/code-agents/utils.js +12 -2
- package/dist/code-agents/worktree.d.ts +40 -0
- package/dist/code-agents/worktree.js +215 -0
- package/dist/config.d.ts +1 -0
- package/dist/config.js +5 -3
- package/dist/cron.js +18 -4
- package/dist/dashboard/assets/index-BoTHPby4.js +65 -0
- package/dist/dashboard/assets/{index-EAg6lqF5.css → index-D4mufvBg.css} +1 -1
- package/dist/dashboard/index.html +2 -2
- package/dist/discord.js +4 -40
- package/dist/exec-approval.js +1 -1
- package/dist/file-lock.js +1 -1
- package/dist/gateway.js +3 -10
- package/dist/providers/anthropic.js +9 -5
- package/dist/providers/codex.js +10 -6
- package/dist/providers/context-manager.d.ts +22 -0
- package/dist/providers/context-manager.js +100 -0
- package/dist/providers/openai.js +9 -5
- package/dist/providers/types.d.ts +1 -0
- package/dist/security.js +9 -0
- package/dist/setup.d.ts +2 -1
- package/dist/setup.js +156 -34
- package/dist/skills.js +9 -2
- package/dist/subagent.js +33 -2
- package/dist/tools/bash-tool.js +8 -0
- package/dist/tools/browser-tool.js +3 -2
- package/dist/tools/definitions.d.ts +0 -27
- package/dist/tools/definitions.js +0 -18
- package/dist/tools/execute-context.d.ts +4 -4
- package/dist/tools/file-tools.d.ts +1 -1
- package/dist/tools/file-tools.js +1 -1
- package/dist/tools.d.ts +5 -5
- package/dist/tools.js +87 -98
- package/dist/types.d.ts +14 -22
- package/dist/usage.d.ts +1 -0
- package/dist/usage.js +30 -46
- package/dist/utils.d.ts +18 -0
- package/dist/utils.js +71 -0
- package/dist/voice.js +9 -7
- package/package.json +1 -1
- package/dist/dashboard/assets/index-UVAjSXCG.js +0 -107
package/README.md
CHANGED
|
@@ -21,7 +21,7 @@ Use SkimpyClaw if you live in Telegram or Discord, want to read and own every li
|
|
|
21
21
|
- **Chat interface** — Telegram and Discord bots with persistent conversation history
|
|
22
22
|
- **Tool-enabled agent** — file read/write, bash, browser (Playwright), MCP tools via mcporter
|
|
23
23
|
- **Multi-modal support** — voice messages (STT/TTS), image analysis
|
|
24
|
-
- **
|
|
24
|
+
- **Coding agents** — delegate to Claude Code / Codex CLI; `code_with_team` for parallel multi-agent work
|
|
25
25
|
- **Code agents** — delegate coding tasks to Claude Code, Codex, or Kimi CLI with `code_with_agent` and `code_with_team`
|
|
26
26
|
- **Cron scheduler** — run agent prompts or shell scripts on a schedule
|
|
27
27
|
- **Web dashboard** — Preact/Vite SPA with status, cron, audit log, memory, templates, config editor, skills, approvals
|
|
@@ -49,7 +49,7 @@ flowchart LR
|
|
|
49
49
|
|
|
50
50
|
subgraph Core
|
|
51
51
|
agent["Agent Runtime"]
|
|
52
|
-
|
|
52
|
+
codeAgents["Coding Agents"]
|
|
53
53
|
cron["Cron"]
|
|
54
54
|
hb["Heartbeat"]
|
|
55
55
|
audit["Audit Log"]
|
|
@@ -61,7 +61,7 @@ flowchart LR
|
|
|
61
61
|
browser --> dash --> api --> agent
|
|
62
62
|
cron --> agent
|
|
63
63
|
hb --> agent
|
|
64
|
-
agent -->
|
|
64
|
+
agent --> codeAgents
|
|
65
65
|
agent --> audit
|
|
66
66
|
agent --> skills
|
|
67
67
|
agent --> approvals
|
|
@@ -85,6 +85,16 @@ Onboarding validates your Telegram token, provider auth, and creates:
|
|
|
85
85
|
- `~/.skimpyclaw/config.json`
|
|
86
86
|
- `~/.skimpyclaw/agents/main/*.md` (from templates)
|
|
87
87
|
|
|
88
|
+
**Sandbox setup (optional — isolates Bash commands in a container):**
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
skimpyclaw sandbox init # detect runtime, build image, update config
|
|
92
|
+
skimpyclaw sandbox doctor # verify everything works
|
|
93
|
+
skimpyclaw restart # pick up sandbox config
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Requires Apple Containers (macOS 26+) or Docker. See [docs/guide/sandbox.md](docs/guide/sandbox.md) for options.
|
|
97
|
+
|
|
88
98
|
**Stop/Restart daemon (macOS):**
|
|
89
99
|
|
|
90
100
|
```bash
|
|
@@ -136,8 +146,7 @@ src/
|
|
|
136
146
|
providers/ # Provider routing + provider implementations (anthropic/openai/codex)
|
|
137
147
|
code-agents/ # Background coding-agent runtime (executor/parser/orchestrator/registry)
|
|
138
148
|
channels/ # Channel adapters/utilities (telegram/discord)
|
|
139
|
-
|
|
140
|
-
file-lock.ts # In-memory file lock for concurrent subagent writes
|
|
149
|
+
file-lock.ts # In-memory file lock for concurrent writes
|
|
141
150
|
audit.ts # Append-only audit log (trace/event model, JSONL storage)
|
|
142
151
|
cron.ts # Job scheduling + execution + cron logging
|
|
143
152
|
heartbeat.ts # Periodic health/attention checks
|
|
@@ -196,7 +205,6 @@ dist/ # Compiled output + built dashboard assets
|
|
|
196
205
|
| [docs/architecture.md](docs/architecture.md) | Component diagram, runtime flow, startup sequence, source layout |
|
|
197
206
|
| [docs/configuration.md](docs/configuration.md) | Full config reference, all sections with examples |
|
|
198
207
|
| [docs/tools.md](docs/tools.md) | Built-in tools, browser tool, MCP integration, code agents |
|
|
199
|
-
| [docs/subagents.md](docs/subagents.md) | Subagent types, concurrency, file locking, flow diagram |
|
|
200
208
|
| [docs/dashboard.md](docs/dashboard.md) | Web dashboard, all HTTP endpoints + API routes |
|
|
201
209
|
| [docs/coding-agents.md](docs/coding-agents.md) | Coding-agent execution model and CLI backends |
|
|
202
210
|
| [docs/cli.md](docs/cli.md) | CLI commands, service management |
|
|
@@ -116,6 +116,7 @@ vi.mock('../config.js', () => ({
|
|
|
116
116
|
getSessionsDir: () => SESSIONS_DIR,
|
|
117
117
|
getLogsDir: () => LOGS_DIR,
|
|
118
118
|
getAgentDir: (agentId) => join(TEST_ROOT, 'agents', agentId),
|
|
119
|
+
isValidAgentId: (agentId) => /^[a-zA-Z0-9_-]+$/.test(agentId),
|
|
119
120
|
listMemoryFiles: (agentId) => {
|
|
120
121
|
const { existsSync, readdirSync, statSync } = require('fs');
|
|
121
122
|
const memDir = join(TEST_ROOT, 'agents', agentId, 'memory', 'logs');
|
|
@@ -1225,22 +1226,3 @@ describe('Skills endpoints', () => {
|
|
|
1225
1226
|
expect(res.json()).toHaveProperty('deleted', true);
|
|
1226
1227
|
});
|
|
1227
1228
|
});
|
|
1228
|
-
describe('Office endpoints', () => {
|
|
1229
|
-
it('GET /api/dashboard/office-status returns office status', async () => {
|
|
1230
|
-
const res = await inject({ method: 'GET', url: '/api/dashboard/office-status' });
|
|
1231
|
-
expect(res.statusCode).toBe(200);
|
|
1232
|
-
const json = res.json();
|
|
1233
|
-
expect(json).toHaveProperty('state');
|
|
1234
|
-
expect(json).toHaveProperty('currentTask');
|
|
1235
|
-
expect(json).toHaveProperty('updatedAt');
|
|
1236
|
-
expect(['working', 'thinking', 'waiting', 'offline']).toContain(json.state);
|
|
1237
|
-
});
|
|
1238
|
-
it('GET /api/dashboard/office-scene.png returns image or 404', async () => {
|
|
1239
|
-
const res = await inject({ method: 'GET', url: '/api/dashboard/office-scene.png' });
|
|
1240
|
-
// In test env the image file may not exist, so accept 200 or 404
|
|
1241
|
-
expect([200, 404]).toContain(res.statusCode);
|
|
1242
|
-
if (res.statusCode === 200) {
|
|
1243
|
-
expect(res.headers['content-type']).toBe('image/png');
|
|
1244
|
-
}
|
|
1245
|
-
});
|
|
1246
|
-
});
|
|
@@ -17,7 +17,7 @@ const discordMock = vi.hoisted(() => ({
|
|
|
17
17
|
sendDiscordProactiveVoice: vi.fn(async () => { }),
|
|
18
18
|
getDiscordDefaultTarget: vi.fn(() => '999'),
|
|
19
19
|
}));
|
|
20
|
-
vi.mock('../telegram.js', () => telegramMock);
|
|
20
|
+
vi.mock('../channels/telegram/index.js', () => telegramMock);
|
|
21
21
|
vi.mock('../discord.js', () => discordMock);
|
|
22
22
|
import { getActiveChannelId, initActiveChannel, sendActiveChannelProactiveMessage, } from '../channels.js';
|
|
23
23
|
function makeConfig(overrides = {}) {
|
|
@@ -33,7 +33,7 @@ vi.mock('../audit.js', () => ({
|
|
|
33
33
|
addEvent: vi.fn(),
|
|
34
34
|
endTrace: vi.fn(async () => { }),
|
|
35
35
|
}));
|
|
36
|
-
import { computeWaves, decomposeTask, synthesizeResults } from '../code-agents/orchestrator.js';
|
|
36
|
+
import { computeWaves, decomposeTask, synthesizeResults, gatherCodebaseContext } from '../code-agents/orchestrator.js';
|
|
37
37
|
import { runAgentTurn } from '../agent.js';
|
|
38
38
|
import { getCodeAgent, writeCodeAgentTask } from '../code-agents/registry.js';
|
|
39
39
|
const mockRunAgentTurn = vi.mocked(runAgentTurn);
|
|
@@ -98,19 +98,20 @@ describe('decomposeTask', () => {
|
|
|
98
98
|
});
|
|
99
99
|
});
|
|
100
100
|
describe('synthesizeResults', () => {
|
|
101
|
-
it('
|
|
101
|
+
it('uses structured context (summary capped at 500 chars, not raw 1000)', async () => {
|
|
102
102
|
const longOutput = 'x'.repeat(1000);
|
|
103
103
|
mockRunAgentTurn.mockResolvedValueOnce('Synthesis complete');
|
|
104
104
|
const config = { providers: {} };
|
|
105
105
|
await synthesizeResults('original task', [
|
|
106
106
|
{ subtask: 'sub1', status: 'completed', output: longOutput },
|
|
107
107
|
], config);
|
|
108
|
-
// The prompt sent to runAgentTurn should contain truncated output
|
|
109
108
|
const call = mockRunAgentTurn.mock.calls[mockRunAgentTurn.mock.calls.length - 1];
|
|
110
109
|
const prompt = call[1];
|
|
111
|
-
//
|
|
112
|
-
expect(prompt).toContain('
|
|
113
|
-
expect(prompt).not.toContain('x'.repeat(
|
|
110
|
+
// Summary should be capped at 500 chars, not the full 1000
|
|
111
|
+
expect(prompt).toContain('x'.repeat(500));
|
|
112
|
+
expect(prompt).not.toContain('x'.repeat(501));
|
|
113
|
+
// Should use the structured format (Summary: prefix)
|
|
114
|
+
expect(prompt).toContain('Summary:');
|
|
114
115
|
});
|
|
115
116
|
});
|
|
116
117
|
describe('orchestrator - cancellation after wave spawn', () => {
|
|
@@ -134,7 +135,7 @@ describe('orchestrator - spawn failure marks child as failed', () => {
|
|
|
134
135
|
expect(src).toContain("const child = getCodeAgent(childId);");
|
|
135
136
|
expect(src).toContain("if (child && child.status === 'running')");
|
|
136
137
|
expect(src).toContain("status: 'failed'");
|
|
137
|
-
expect(src).toContain("error:
|
|
138
|
+
expect(src).toContain("error: toErrorMessage(err)");
|
|
138
139
|
});
|
|
139
140
|
});
|
|
140
141
|
describe('orchestrator - skip redundant parent writes', () => {
|
|
@@ -147,3 +148,69 @@ describe('orchestrator - skip redundant parent writes', () => {
|
|
|
147
148
|
expect(src).toContain('lastLiveOutput = newLiveOutput');
|
|
148
149
|
});
|
|
149
150
|
});
|
|
151
|
+
describe('gatherCodebaseContext', () => {
|
|
152
|
+
it('returns a non-empty string for the project root', () => {
|
|
153
|
+
// Use this project's own root as the workdir
|
|
154
|
+
const { resolve } = require('path');
|
|
155
|
+
const projectRoot = resolve(__dirname, '..', '..');
|
|
156
|
+
const context = gatherCodebaseContext(projectRoot);
|
|
157
|
+
// Should contain at least scripts or source files
|
|
158
|
+
expect(context.length).toBeGreaterThan(0);
|
|
159
|
+
expect(context.length).toBeLessThanOrEqual(2000);
|
|
160
|
+
});
|
|
161
|
+
it('returns empty string for nonexistent directory', () => {
|
|
162
|
+
const context = gatherCodebaseContext('/tmp/nonexistent-dir-12345');
|
|
163
|
+
// Should not throw, just return empty or minimal context
|
|
164
|
+
expect(typeof context).toBe('string');
|
|
165
|
+
});
|
|
166
|
+
});
|
|
167
|
+
describe('decomposeTask with workdir', () => {
|
|
168
|
+
it('passes workdir context to the decomposition prompt', async () => {
|
|
169
|
+
mockRunAgentTurn.mockResolvedValueOnce('{"subtasks": [{"description": "sub1", "dependsOn": []}, {"description": "sub2", "dependsOn": []}]}');
|
|
170
|
+
const config = { providers: {} };
|
|
171
|
+
const result = await decomposeTask('test task', 2, config, '/tmp');
|
|
172
|
+
expect(result).toHaveLength(2);
|
|
173
|
+
// Check the prompt sent to the model includes the richer decomposition instructions
|
|
174
|
+
const call = mockRunAgentTurn.mock.calls[mockRunAgentTurn.mock.calls.length - 1];
|
|
175
|
+
const prompt = call[1];
|
|
176
|
+
expect(prompt).toContain('task decomposition expert');
|
|
177
|
+
expect(prompt).toContain('git worktree');
|
|
178
|
+
});
|
|
179
|
+
});
|
|
180
|
+
describe('synthesizeResults with workdir', () => {
|
|
181
|
+
it('includes git diff info when workdir is a git repo', async () => {
|
|
182
|
+
mockRunAgentTurn.mockResolvedValueOnce('Synthesis complete');
|
|
183
|
+
const { resolve } = require('path');
|
|
184
|
+
const projectRoot = resolve(__dirname, '..', '..');
|
|
185
|
+
const config = { providers: {} };
|
|
186
|
+
await synthesizeResults('original task', [
|
|
187
|
+
{ subtask: 'sub1', status: 'completed', output: 'done' },
|
|
188
|
+
], config, projectRoot);
|
|
189
|
+
const call = mockRunAgentTurn.mock.calls[mockRunAgentTurn.mock.calls.length - 1];
|
|
190
|
+
const prompt = call[1];
|
|
191
|
+
// Should include the success/failure counts
|
|
192
|
+
expect(prompt).toContain('1 succeeded, 0 failed');
|
|
193
|
+
});
|
|
194
|
+
});
|
|
195
|
+
describe('orchestrator - per-wave validation and retry', () => {
|
|
196
|
+
it('source includes per-wave validation logic', async () => {
|
|
197
|
+
const { readFileSync } = await vi.importActual('fs');
|
|
198
|
+
const src = readFileSync(new URL('../../src/code-agents/orchestrator.ts', import.meta.url).pathname.replace('/.worktrees/hardening-code-agents/src/__tests__/../../', '/.worktrees/hardening-code-agents/'), 'utf-8');
|
|
199
|
+
// Verify per-wave validation exists
|
|
200
|
+
expect(src).toContain('Per-wave validation: run build after each wave');
|
|
201
|
+
expect(src).toContain('wave_validation');
|
|
202
|
+
// Verify retry logic
|
|
203
|
+
expect(src).toContain('wave_retry_complete');
|
|
204
|
+
expect(src).toContain('retryPrompt');
|
|
205
|
+
});
|
|
206
|
+
});
|
|
207
|
+
describe('orchestrator - timeout budgeting', () => {
|
|
208
|
+
it('computes perChildTimeout based on wave count not team size', async () => {
|
|
209
|
+
const { readFileSync } = await vi.importActual('fs');
|
|
210
|
+
const src = readFileSync(new URL('../../src/code-agents/orchestrator.ts', import.meta.url).pathname.replace('/.worktrees/hardening-code-agents/src/__tests__/../../', '/.worktrees/hardening-code-agents/'), 'utf-8');
|
|
211
|
+
// Verify budget-aware timeout
|
|
212
|
+
expect(src).toContain('overheadMinutes');
|
|
213
|
+
expect(src).toContain('availableForChildren');
|
|
214
|
+
expect(src).toContain('Math.floor(availableForChildren / waves.length)');
|
|
215
|
+
});
|
|
216
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { describe, expect, it, vi, beforeEach } from 'vitest';
|
|
2
|
+
// Mock heavy dependencies before importing
|
|
3
|
+
vi.mock('../code-agents/registry.js', () => {
|
|
4
|
+
const tasks = new Map();
|
|
5
|
+
return {
|
|
6
|
+
getCodeAgentsDir: () => '/tmp/test-code-agents',
|
|
7
|
+
ensureCodeAgentsDir: vi.fn(),
|
|
8
|
+
writeCodeAgentTask: vi.fn((task) => tasks.set(task.id, { ...task })),
|
|
9
|
+
storeCodeAgentTask: vi.fn((task) => tasks.set(task.id, { ...task })),
|
|
10
|
+
getCodeAgent: vi.fn((id) => tasks.get(id) || null),
|
|
11
|
+
setCodeAgentCanceller: vi.fn(),
|
|
12
|
+
deleteCodeAgentCanceller: vi.fn(),
|
|
13
|
+
getNextCodeAgentId: vi.fn(() => 'ca-test-1'),
|
|
14
|
+
getActiveCodeAgents: vi.fn(() => []),
|
|
15
|
+
getRecentCodeAgents: vi.fn(() => []),
|
|
16
|
+
_tasks: tasks,
|
|
17
|
+
};
|
|
18
|
+
});
|
|
19
|
+
vi.mock('../code-agents/parser.js', () => ({
|
|
20
|
+
parseStreamJsonForLive: vi.fn((s) => s.slice(0, 200)),
|
|
21
|
+
parseClaudeOutput: vi.fn((s) => ({ text: s, totalCost: 0, inputTokens: 0, outputTokens: 0 })),
|
|
22
|
+
parseCodexOutput: vi.fn((s) => s),
|
|
23
|
+
}));
|
|
24
|
+
vi.mock('../code-agents/utils.js', () => ({
|
|
25
|
+
buildCodeAgentArgs: vi.fn(() => ({ cmd: 'echo', args: ['hello'] })),
|
|
26
|
+
notifyCodeAgentResult: vi.fn(async () => { }),
|
|
27
|
+
resolveModelAlias: vi.fn((m) => m || 'claude'),
|
|
28
|
+
resolveSelectedCodeAgent: vi.fn((_req, def) => def || 'claude'),
|
|
29
|
+
resolveWorkdir: vi.fn((_raw, _proj, root) => root),
|
|
30
|
+
}));
|
|
31
|
+
vi.mock('../audit.js', () => ({
|
|
32
|
+
startTrace: vi.fn(() => 'trace-1'),
|
|
33
|
+
addEvent: vi.fn(),
|
|
34
|
+
endTrace: vi.fn(async () => { }),
|
|
35
|
+
}));
|
|
36
|
+
vi.mock('../usage.js', () => ({
|
|
37
|
+
buildUsageRecord: vi.fn(() => ({})),
|
|
38
|
+
recordUsage: vi.fn(),
|
|
39
|
+
}));
|
|
40
|
+
vi.mock('fs', async () => {
|
|
41
|
+
const actual = await vi.importActual('fs');
|
|
42
|
+
return {
|
|
43
|
+
...actual,
|
|
44
|
+
createWriteStream: vi.fn(() => ({
|
|
45
|
+
write: vi.fn(),
|
|
46
|
+
end: vi.fn(),
|
|
47
|
+
})),
|
|
48
|
+
existsSync: vi.fn(() => false),
|
|
49
|
+
};
|
|
50
|
+
});
|
|
51
|
+
vi.mock('../sandbox/index.js', () => ({
|
|
52
|
+
ensureContainer: vi.fn(async () => 'sandbox-code-test'),
|
|
53
|
+
SANDBOX_DEFAULTS: { image: 'skimpyclaw-sandbox:latest', runtime: 'docker' },
|
|
54
|
+
getRuntime: vi.fn(() => 'docker'),
|
|
55
|
+
execInContainer: vi.fn(async () => ({ exitCode: 0, stdout: 'PASS', stderr: '' })),
|
|
56
|
+
}));
|
|
57
|
+
describe('buildContainerEnvArgs', () => {
|
|
58
|
+
it('returns -e flags for allowlisted env vars that exist', async () => {
|
|
59
|
+
const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
|
|
60
|
+
const env = {
|
|
61
|
+
ANTHROPIC_API_KEY: 'sk-test-123',
|
|
62
|
+
GH_TOKEN: 'ghp_abc',
|
|
63
|
+
HOME: '/home/sandbox',
|
|
64
|
+
SOME_OTHER_VAR: 'should-not-appear',
|
|
65
|
+
PATH: '/usr/bin:/usr/local/bin',
|
|
66
|
+
};
|
|
67
|
+
const result = buildContainerEnvArgs(env);
|
|
68
|
+
expect(result).toContain('-e');
|
|
69
|
+
expect(result).toContain('ANTHROPIC_API_KEY=sk-test-123');
|
|
70
|
+
expect(result).toContain('GH_TOKEN=ghp_abc');
|
|
71
|
+
expect(result).toContain('HOME=/home/sandbox');
|
|
72
|
+
expect(result).toContain('PATH=/usr/bin:/usr/local/bin');
|
|
73
|
+
// Non-allowlisted vars should not appear
|
|
74
|
+
expect(result.join(' ')).not.toContain('SOME_OTHER_VAR');
|
|
75
|
+
});
|
|
76
|
+
it('excludes empty and undefined values', async () => {
|
|
77
|
+
const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
|
|
78
|
+
const env = {
|
|
79
|
+
ANTHROPIC_API_KEY: '',
|
|
80
|
+
GH_TOKEN: undefined,
|
|
81
|
+
GITHUB_TOKEN: 'ghp_123',
|
|
82
|
+
};
|
|
83
|
+
const result = buildContainerEnvArgs(env);
|
|
84
|
+
expect(result).toContain('GITHUB_TOKEN=ghp_123');
|
|
85
|
+
// No git signing key passed, so no GIT_CONFIG overrides
|
|
86
|
+
expect(result.join(' ')).not.toContain('GIT_CONFIG');
|
|
87
|
+
});
|
|
88
|
+
it('includes git signing config overrides when gitSigningKey is provided', async () => {
|
|
89
|
+
const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
|
|
90
|
+
const result = buildContainerEnvArgs({ ANTHROPIC_API_KEY: 'sk-test' }, { gitSigningKey: '/home/sandbox/.ssh/git_signing_ed25519' });
|
|
91
|
+
expect(result).toContain('GIT_CONFIG_COUNT=1');
|
|
92
|
+
expect(result).toContain('GIT_CONFIG_KEY_0=user.signingkey');
|
|
93
|
+
expect(result).toContain('GIT_CONFIG_VALUE_0=/home/sandbox/.ssh/git_signing_ed25519');
|
|
94
|
+
});
|
|
95
|
+
it('omits git signing config when gitSigningKey is not provided', async () => {
|
|
96
|
+
const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
|
|
97
|
+
const result = buildContainerEnvArgs({ FOO: 'bar', BAZ: 'qux' });
|
|
98
|
+
expect(result).not.toContain('FOO=bar');
|
|
99
|
+
expect(result.join(' ')).not.toContain('GIT_CONFIG');
|
|
100
|
+
expect(result).toEqual([]);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
describe('runValidation with sandbox', () => {
|
|
104
|
+
beforeEach(() => {
|
|
105
|
+
vi.clearAllMocks();
|
|
106
|
+
});
|
|
107
|
+
it('uses execInContainer when sandboxContainer is provided', async () => {
|
|
108
|
+
const { execInContainer } = await import('../sandbox/index.js');
|
|
109
|
+
const { runValidation } = await import('../code-agents/executor.js');
|
|
110
|
+
const result = await runValidation('/workspace', 'sandbox-test');
|
|
111
|
+
expect(execInContainer).toHaveBeenCalledWith('sandbox-test', expect.any(Array), expect.objectContaining({ timeout: expect.any(Number) }));
|
|
112
|
+
expect(result.passed).toBe(true);
|
|
113
|
+
expect(result.output).toBe('PASS');
|
|
114
|
+
});
|
|
115
|
+
it('returns failure when execInContainer returns non-zero exit', async () => {
|
|
116
|
+
const { execInContainer } = await import('../sandbox/index.js');
|
|
117
|
+
vi.mocked(execInContainer).mockResolvedValueOnce({
|
|
118
|
+
exitCode: 1,
|
|
119
|
+
stdout: 'Error: test failed',
|
|
120
|
+
stderr: 'FAIL',
|
|
121
|
+
});
|
|
122
|
+
const { runValidation } = await import('../code-agents/executor.js');
|
|
123
|
+
const result = await runValidation('/workspace', 'sandbox-fail');
|
|
124
|
+
expect(result.passed).toBe(false);
|
|
125
|
+
expect(result.output).toContain('VALIDATION FAILED');
|
|
126
|
+
});
|
|
127
|
+
it('returns failure when execInContainer throws', async () => {
|
|
128
|
+
const { execInContainer } = await import('../sandbox/index.js');
|
|
129
|
+
vi.mocked(execInContainer).mockRejectedValueOnce(new Error('container not found'));
|
|
130
|
+
const { runValidation } = await import('../code-agents/executor.js');
|
|
131
|
+
const result = await runValidation('/workspace', 'sandbox-crash');
|
|
132
|
+
expect(result.passed).toBe(false);
|
|
133
|
+
expect(result.output).toContain('sandbox exec error');
|
|
134
|
+
expect(result.output).toContain('container not found');
|
|
135
|
+
});
|
|
136
|
+
});
|
|
137
|
+
describe('sandbox config resolution in executeCodeWithAgent', () => {
|
|
138
|
+
it('resolves sandboxConfig from context.sandboxConfig', async () => {
|
|
139
|
+
// Test the resolution logic directly — the config should be picked up
|
|
140
|
+
// from context.sandboxConfig or context.fullConfig.sandbox
|
|
141
|
+
const contextWithSandbox = {
|
|
142
|
+
sandboxConfig: { enabled: true, image: 'test-image' },
|
|
143
|
+
fullConfig: { sandbox: { enabled: false } },
|
|
144
|
+
};
|
|
145
|
+
// sandboxConfig takes precedence over fullConfig.sandbox
|
|
146
|
+
const resolved = contextWithSandbox.sandboxConfig ?? contextWithSandbox.fullConfig?.sandbox;
|
|
147
|
+
expect(resolved).toEqual({ enabled: true, image: 'test-image' });
|
|
148
|
+
});
|
|
149
|
+
it('falls back to fullConfig.sandbox when context.sandboxConfig is absent', () => {
|
|
150
|
+
const contextFallback = {
|
|
151
|
+
fullConfig: { sandbox: { enabled: true, image: 'fallback-image' } },
|
|
152
|
+
};
|
|
153
|
+
const resolved = contextFallback.sandboxConfig ?? contextFallback.fullConfig?.sandbox;
|
|
154
|
+
expect(resolved).toEqual({ enabled: true, image: 'fallback-image' });
|
|
155
|
+
});
|
|
156
|
+
it('resolves to undefined when no sandbox config present', () => {
|
|
157
|
+
const contextNone = {
|
|
158
|
+
fullConfig: {},
|
|
159
|
+
};
|
|
160
|
+
const resolved = contextNone.sandboxConfig ?? contextNone.fullConfig?.sandbox;
|
|
161
|
+
expect(resolved).toBeUndefined();
|
|
162
|
+
});
|
|
163
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { estimateTokens, compactAnthropicMessages, compactOpenAIMessages, compactCodexMessages, } from '../providers/context-manager.js';
|
|
3
|
+
// Helper: build an Anthropic-style tool exchange (assistant + user pair)
|
|
4
|
+
function anthropicExchange(toolResult) {
|
|
5
|
+
return [
|
|
6
|
+
{
|
|
7
|
+
role: 'assistant',
|
|
8
|
+
content: [
|
|
9
|
+
{ type: 'text', text: 'Using a tool.' },
|
|
10
|
+
{ type: 'tool_use', id: 'tu_1', name: 'Bash', input: { command: 'ls' } },
|
|
11
|
+
],
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
role: 'user',
|
|
15
|
+
content: [{ type: 'tool_result', tool_use_id: 'tu_1', content: toolResult }],
|
|
16
|
+
},
|
|
17
|
+
];
|
|
18
|
+
}
|
|
19
|
+
// Helper: build a Codex-style function call exchange
|
|
20
|
+
function codexExchange(output) {
|
|
21
|
+
return [
|
|
22
|
+
{ type: 'function_call', call_id: 'fc_1', name: 'Bash', arguments: '{}' },
|
|
23
|
+
{ type: 'function_call_output', call_id: 'fc_1', output },
|
|
24
|
+
];
|
|
25
|
+
}
|
|
26
|
+
describe('estimateTokens', () => {
|
|
27
|
+
it('returns a positive number for non-empty data', () => {
|
|
28
|
+
expect(estimateTokens([{ role: 'user', content: 'hello' }])).toBeGreaterThan(0);
|
|
29
|
+
});
|
|
30
|
+
it('returns a small number for empty array', () => {
|
|
31
|
+
expect(estimateTokens([])).toBeLessThan(5);
|
|
32
|
+
});
|
|
33
|
+
it('grows with more content', () => {
|
|
34
|
+
const small = estimateTokens([{ content: 'hi' }]);
|
|
35
|
+
const large = estimateTokens([{ content: 'x'.repeat(10_000) }]);
|
|
36
|
+
expect(large).toBeGreaterThan(small);
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
describe('compactAnthropicMessages', () => {
|
|
40
|
+
it('passes through unchanged when under threshold', () => {
|
|
41
|
+
const messages = anthropicExchange('short result');
|
|
42
|
+
const result = compactAnthropicMessages(messages, { maxContextTokens: 100_000 });
|
|
43
|
+
expect(result).toEqual(messages);
|
|
44
|
+
});
|
|
45
|
+
it('returns same reference when no compaction needed', () => {
|
|
46
|
+
const messages = anthropicExchange('short result');
|
|
47
|
+
const result = compactAnthropicMessages(messages, { maxContextTokens: 100_000 });
|
|
48
|
+
expect(result).toBe(messages);
|
|
49
|
+
});
|
|
50
|
+
it('truncates old tool_result content when over threshold', () => {
|
|
51
|
+
const longResult = 'x'.repeat(10_000);
|
|
52
|
+
// Build many exchanges to exceed threshold
|
|
53
|
+
const messages = [];
|
|
54
|
+
for (let i = 0; i < 30; i++) {
|
|
55
|
+
messages.push(...anthropicExchange(longResult));
|
|
56
|
+
}
|
|
57
|
+
const result = compactAnthropicMessages(messages, { maxContextTokens: 1_000 });
|
|
58
|
+
// Head messages should have truncated tool results
|
|
59
|
+
const headMessages = result.slice(0, -8);
|
|
60
|
+
const toolResultMessages = headMessages.filter(m => Array.isArray(m.content) && m.content.some((b) => b.type === 'tool_result'));
|
|
61
|
+
for (const msg of toolResultMessages) {
|
|
62
|
+
const block = msg.content.find((b) => b.type === 'tool_result');
|
|
63
|
+
expect(block.content).toContain('[truncated]');
|
|
64
|
+
expect(block.content.length).toBeLessThan(longResult.length);
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
it('keeps last 8 messages intact when compacting', () => {
|
|
68
|
+
const longResult = 'x'.repeat(10_000);
|
|
69
|
+
const messages = [];
|
|
70
|
+
for (let i = 0; i < 30; i++) {
|
|
71
|
+
messages.push(...anthropicExchange(longResult));
|
|
72
|
+
}
|
|
73
|
+
const result = compactAnthropicMessages(messages, { maxContextTokens: 1_000 });
|
|
74
|
+
// Last 8 messages should be untouched
|
|
75
|
+
const tail = result.slice(-8);
|
|
76
|
+
const originalTail = messages.slice(-8);
|
|
77
|
+
expect(tail).toEqual(originalTail);
|
|
78
|
+
});
|
|
79
|
+
it('does not mutate the input array', () => {
|
|
80
|
+
const longResult = 'x'.repeat(10_000);
|
|
81
|
+
const messages = [];
|
|
82
|
+
for (let i = 0; i < 30; i++) {
|
|
83
|
+
messages.push(...anthropicExchange(longResult));
|
|
84
|
+
}
|
|
85
|
+
const originalJson = JSON.stringify(messages);
|
|
86
|
+
compactAnthropicMessages(messages, { maxContextTokens: 1_000 });
|
|
87
|
+
expect(JSON.stringify(messages)).toBe(originalJson);
|
|
88
|
+
});
|
|
89
|
+
it('preserves non-tool_result blocks unchanged', () => {
|
|
90
|
+
const longResult = 'x'.repeat(10_000);
|
|
91
|
+
const messages = [];
|
|
92
|
+
for (let i = 0; i < 30; i++) {
|
|
93
|
+
messages.push(...anthropicExchange(longResult));
|
|
94
|
+
}
|
|
95
|
+
const result = compactAnthropicMessages(messages, { maxContextTokens: 1_000 });
|
|
96
|
+
// Assistant messages (tool_use blocks) should be untouched
|
|
97
|
+
const assistantMessages = result.filter(m => m.role === 'assistant');
|
|
98
|
+
for (const msg of assistantMessages) {
|
|
99
|
+
const toolUse = msg.content.find((b) => b.type === 'tool_use');
|
|
100
|
+
expect(toolUse).toBeDefined();
|
|
101
|
+
expect(toolUse.name).toBe('Bash');
|
|
102
|
+
}
|
|
103
|
+
});
|
|
104
|
+
it('passes through unchanged when disabled', () => {
|
|
105
|
+
const longResult = 'x'.repeat(10_000);
|
|
106
|
+
const messages = [];
|
|
107
|
+
for (let i = 0; i < 30; i++) {
|
|
108
|
+
messages.push(...anthropicExchange(longResult));
|
|
109
|
+
}
|
|
110
|
+
const result = compactAnthropicMessages(messages, { enabled: false, maxContextTokens: 1 });
|
|
111
|
+
expect(result).toBe(messages);
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
// Helper: build an OpenAI-style tool exchange (assistant + tool result)
|
|
115
|
+
function openaiExchange(toolResult) {
|
|
116
|
+
return [
|
|
117
|
+
{
|
|
118
|
+
role: 'assistant',
|
|
119
|
+
content: null,
|
|
120
|
+
tool_calls: [{ id: 'tc_1', type: 'function', function: { name: 'Bash', arguments: '{}' } }],
|
|
121
|
+
},
|
|
122
|
+
{ role: 'tool', tool_call_id: 'tc_1', content: toolResult },
|
|
123
|
+
];
|
|
124
|
+
}
|
|
125
|
+
describe('compactOpenAIMessages', () => {
|
|
126
|
+
it('passes through unchanged when under threshold', () => {
|
|
127
|
+
const messages = openaiExchange('short result');
|
|
128
|
+
const result = compactOpenAIMessages(messages, { maxContextTokens: 100_000 });
|
|
129
|
+
expect(result).toBe(messages);
|
|
130
|
+
});
|
|
131
|
+
it('truncates old tool content when over threshold', () => {
|
|
132
|
+
const longResult = 'x'.repeat(10_000);
|
|
133
|
+
const messages = [];
|
|
134
|
+
for (let i = 0; i < 30; i++) {
|
|
135
|
+
messages.push(...openaiExchange(longResult));
|
|
136
|
+
}
|
|
137
|
+
const result = compactOpenAIMessages(messages, { maxContextTokens: 1_000 });
|
|
138
|
+
const headItems = result.slice(0, -8);
|
|
139
|
+
const toolMessages = headItems.filter((m) => m.role === 'tool');
|
|
140
|
+
for (const msg of toolMessages) {
|
|
141
|
+
expect(msg.content).toContain('[truncated]');
|
|
142
|
+
expect(msg.content.length).toBeLessThan(longResult.length);
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
it('keeps last 8 messages intact', () => {
|
|
146
|
+
const longResult = 'x'.repeat(10_000);
|
|
147
|
+
const messages = [];
|
|
148
|
+
for (let i = 0; i < 30; i++) {
|
|
149
|
+
messages.push(...openaiExchange(longResult));
|
|
150
|
+
}
|
|
151
|
+
const result = compactOpenAIMessages(messages, { maxContextTokens: 1_000 });
|
|
152
|
+
expect(result.slice(-8)).toEqual(messages.slice(-8));
|
|
153
|
+
});
|
|
154
|
+
it('does not mutate the input array', () => {
|
|
155
|
+
const longResult = 'x'.repeat(10_000);
|
|
156
|
+
const messages = [];
|
|
157
|
+
for (let i = 0; i < 30; i++) {
|
|
158
|
+
messages.push(...openaiExchange(longResult));
|
|
159
|
+
}
|
|
160
|
+
const original = JSON.stringify(messages);
|
|
161
|
+
compactOpenAIMessages(messages, { maxContextTokens: 1_000 });
|
|
162
|
+
expect(JSON.stringify(messages)).toBe(original);
|
|
163
|
+
});
|
|
164
|
+
it('passes through unchanged when disabled', () => {
|
|
165
|
+
const longResult = 'x'.repeat(10_000);
|
|
166
|
+
const messages = [];
|
|
167
|
+
for (let i = 0; i < 30; i++) {
|
|
168
|
+
messages.push(...openaiExchange(longResult));
|
|
169
|
+
}
|
|
170
|
+
const result = compactOpenAIMessages(messages, { enabled: false, maxContextTokens: 1 });
|
|
171
|
+
expect(result).toBe(messages);
|
|
172
|
+
});
|
|
173
|
+
});
|
|
174
|
+
describe('compactCodexMessages', () => {
|
|
175
|
+
it('passes through unchanged when under threshold', () => {
|
|
176
|
+
const items = codexExchange('short result');
|
|
177
|
+
const result = compactCodexMessages(items, { maxContextTokens: 100_000 });
|
|
178
|
+
expect(result).toEqual(items);
|
|
179
|
+
});
|
|
180
|
+
it('truncates old function_call_output when over threshold', () => {
|
|
181
|
+
const longOutput = 'x'.repeat(10_000);
|
|
182
|
+
const items = [];
|
|
183
|
+
for (let i = 0; i < 30; i++) {
|
|
184
|
+
items.push(...codexExchange(longOutput));
|
|
185
|
+
}
|
|
186
|
+
const result = compactCodexMessages(items, { maxContextTokens: 1_000 });
|
|
187
|
+
const headItems = result.slice(0, -8);
|
|
188
|
+
const outputItems = headItems.filter((item) => item.type === 'function_call_output');
|
|
189
|
+
for (const item of outputItems) {
|
|
190
|
+
expect(item.output).toContain('[truncated]');
|
|
191
|
+
expect(item.output.length).toBeLessThan(longOutput.length);
|
|
192
|
+
}
|
|
193
|
+
});
|
|
194
|
+
it('keeps last 8 items intact when compacting', () => {
|
|
195
|
+
const longOutput = 'x'.repeat(10_000);
|
|
196
|
+
const items = [];
|
|
197
|
+
for (let i = 0; i < 30; i++) {
|
|
198
|
+
items.push(...codexExchange(longOutput));
|
|
199
|
+
}
|
|
200
|
+
const result = compactCodexMessages(items, { maxContextTokens: 1_000 });
|
|
201
|
+
const tail = result.slice(-8);
|
|
202
|
+
const originalTail = items.slice(-8);
|
|
203
|
+
expect(tail).toEqual(originalTail);
|
|
204
|
+
});
|
|
205
|
+
it('does not mutate the input array', () => {
|
|
206
|
+
const longOutput = 'x'.repeat(10_000);
|
|
207
|
+
const items = [];
|
|
208
|
+
for (let i = 0; i < 30; i++) {
|
|
209
|
+
items.push(...codexExchange(longOutput));
|
|
210
|
+
}
|
|
211
|
+
const originalJson = JSON.stringify(items);
|
|
212
|
+
compactCodexMessages(items, { maxContextTokens: 1_000 });
|
|
213
|
+
expect(JSON.stringify(items)).toBe(originalJson);
|
|
214
|
+
});
|
|
215
|
+
it('preserves function_call items (not just outputs) unchanged', () => {
|
|
216
|
+
const longOutput = 'x'.repeat(10_000);
|
|
217
|
+
const items = [];
|
|
218
|
+
for (let i = 0; i < 30; i++) {
|
|
219
|
+
items.push(...codexExchange(longOutput));
|
|
220
|
+
}
|
|
221
|
+
const result = compactCodexMessages(items, { maxContextTokens: 1_000 });
|
|
222
|
+
const callItems = result.filter((item) => item.type === 'function_call');
|
|
223
|
+
for (const item of callItems) {
|
|
224
|
+
expect(item.name).toBe('Bash');
|
|
225
|
+
}
|
|
226
|
+
});
|
|
227
|
+
it('passes through unchanged when disabled', () => {
|
|
228
|
+
const longOutput = 'x'.repeat(10_000);
|
|
229
|
+
const items = [];
|
|
230
|
+
for (let i = 0; i < 30; i++) {
|
|
231
|
+
items.push(...codexExchange(longOutput));
|
|
232
|
+
}
|
|
233
|
+
const result = compactCodexMessages(items, { enabled: false, maxContextTokens: 1 });
|
|
234
|
+
expect(result).toBe(items);
|
|
235
|
+
});
|
|
236
|
+
});
|
|
@@ -147,17 +147,17 @@ describe('buildValidationCommand', () => {
|
|
|
147
147
|
writeFileSync(join(tempDir, 'yarn.lock'), '');
|
|
148
148
|
expect(buildValidationCommand(tempDir)).toBe('yarn build');
|
|
149
149
|
});
|
|
150
|
-
it('
|
|
151
|
-
// No package.json, no lockfile →
|
|
152
|
-
expect(buildValidationCommand(tempDir)).toBe('
|
|
150
|
+
it('returns empty when no package.json exists', () => {
|
|
151
|
+
// No package.json, no lockfile → nothing to validate
|
|
152
|
+
expect(buildValidationCommand(tempDir)).toBe('');
|
|
153
153
|
});
|
|
154
|
-
it('
|
|
154
|
+
it('returns empty when scripts object is empty', () => {
|
|
155
155
|
writeFileSync(join(tempDir, 'package.json'), JSON.stringify({
|
|
156
156
|
name: 'test',
|
|
157
157
|
scripts: {},
|
|
158
158
|
}));
|
|
159
159
|
writeFileSync(join(tempDir, 'yarn.lock'), '');
|
|
160
|
-
expect(buildValidationCommand(tempDir)).toBe('
|
|
160
|
+
expect(buildValidationCommand(tempDir)).toBe('');
|
|
161
161
|
});
|
|
162
162
|
// wp-calypso scenario
|
|
163
163
|
it('generates yarn commands for wp-calypso-like project', () => {
|