alvin-bot 4.18.0 → 4.18.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AEC-PLUGINS-SOURCES.md +53 -0
- package/CHANGELOG.md +37 -2
- package/DESIGN-SKILLS-SOURCES.md +81 -0
- package/bin/cli.js +1 -1
- package/dist/providers/claude-sdk-provider.js +24 -0
- package/package.json +3 -1
- package/test/allowed-users-gate.test.ts +0 -98
- package/test/alvin-dispatch.test.ts +0 -220
- package/test/async-agent-chunk-flow.test.ts +0 -244
- package/test/async-agent-parser-staleness.test.ts +0 -412
- package/test/async-agent-parser-streamjson.test.ts +0 -273
- package/test/async-agent-parser.test.ts +0 -322
- package/test/async-agent-watcher.test.ts +0 -229
- package/test/background-bypass-integration.test.ts +0 -443
- package/test/background-bypass-stress.test.ts +0 -417
- package/test/background-bypass.test.ts +0 -127
- package/test/browser-webfetch.test.ts +0 -121
- package/test/claude-sdk-provider.test.ts +0 -115
- package/test/claude-sdk-tool-use-id.test.ts +0 -180
- package/test/console-timestamps.test.ts +0 -98
- package/test/cron-progress-ticker.test.ts +0 -76
- package/test/cron-restart-resilience.test.ts +0 -191
- package/test/cron-run-resolver.test.ts +0 -133
- package/test/cron-runjobnow-throw.test.ts +0 -100
- package/test/debounce.test.ts +0 -60
- package/test/delivery-registry.test.ts +0 -71
- package/test/exec-guard-metachars.test.ts +0 -110
- package/test/file-permissions.test.ts +0 -130
- package/test/i18n.test.ts +0 -108
- package/test/list-subagents-merged.test.ts +0 -172
- package/test/memory-extractor.test.ts +0 -151
- package/test/memory-layers.test.ts +0 -169
- package/test/memory-sdk-injection.test.ts +0 -146
- package/test/memory-stress-restart.test.ts +0 -337
- package/test/multi-session-stress.test.ts +0 -255
- package/test/platform-session-key.test.ts +0 -69
- package/test/process-manager.test.ts +0 -186
- package/test/registry.test.ts +0 -201
- package/test/session-pending-background.test.ts +0 -59
- package/test/session-persistence.test.ts +0 -195
- package/test/slack-progress-ticker.test.ts +0 -123
- package/test/slack-slash-command.test.ts +0 -61
- package/test/slack-test-connection.test.ts +0 -176
- package/test/stress-scenarios.test.ts +0 -356
- package/test/stuck-timer.test.ts +0 -116
- package/test/subagent-delivery-markdown-fallback.test.ts +0 -147
- package/test/subagent-delivery-platform-routing.test.ts +0 -232
- package/test/subagent-delivery.test.ts +0 -273
- package/test/subagent-final-text.test.ts +0 -132
- package/test/subagent-stats.test.ts +0 -119
- package/test/subagent-toolset-allowlist.test.ts +0 -146
- package/test/subagents-commands.test.ts +0 -64
- package/test/subagents-config.test.ts +0 -114
- package/test/subagents-depth.test.ts +0 -58
- package/test/subagents-inheritance.test.ts +0 -67
- package/test/subagents-name-resolver.test.ts +0 -122
- package/test/subagents-priority-reject.test.ts +0 -88
- package/test/subagents-queue.test.ts +0 -127
- package/test/subagents-shutdown.test.ts +0 -126
- package/test/subagents-toolset.test.ts +0 -71
- package/test/sync-task-timeout.test.ts +0 -153
- package/test/system-prompt-background-hint.test.ts +0 -65
- package/test/telegram-error-filter.test.ts +0 -85
- package/test/telegram-workspace-command.test.ts +0 -78
- package/test/timing-safe-bearer.test.ts +0 -65
- package/test/watchdog-brake.test.ts +0 -157
- package/test/watcher-pending-count.test.ts +0 -228
- package/test/watcher-zombie-fix.test.ts +0 -252
- package/test/web-server-integration.test.ts +0 -189
- package/test/web-server-resilience.test.ts +0 -118
- package/test/web-server-shutdown.test.ts +0 -117
- package/test/whatsapp-auth-resilience.test.ts +0 -96
- package/test/workspaces.test.ts +0 -196
- package/vitest.config.ts +0 -17
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Fix #5 — runSubAgent must preserve the full final text, even when the
|
|
3
|
-
* stream ends on a tool_use or is aborted mid-stream.
|
|
4
|
-
*
|
|
5
|
-
* Regressions this closes:
|
|
6
|
-
*
|
|
7
|
-
* (a) The SDK yields `text` chunks as accumulated strings, then tool
|
|
8
|
-
* calls, then more text, then finally a `done` chunk that ALSO
|
|
9
|
-
* carries the final accumulated text. The old runSubAgent read
|
|
10
|
-
* `text` from text-chunks only and ignored `done.text`. If the
|
|
11
|
-
* assistant's very last action was a tool call with no trailing
|
|
12
|
-
* text block, `finalText` kept the pre-tool text and the
|
|
13
|
-
* cron-jobs.json `lastResult` ended mid-sentence.
|
|
14
|
-
*
|
|
15
|
-
* (b) When queryWithFallback threw mid-stream (provider aborted,
|
|
16
|
-
* network error, etc.), the catch block set `output: ""` —
|
|
17
|
-
* throwing away whatever text had already streamed in before the
|
|
18
|
-
* failure. Users saw an empty "(empty output)" delivery.
|
|
19
|
-
*
|
|
20
|
-
* Contract:
|
|
21
|
-
* - Output = last non-empty value observed from (text.text | done.text)
|
|
22
|
-
* - On error / abort: output = whatever we'd buffered so far (never "")
|
|
23
|
-
*/
|
|
24
|
-
import { describe, it, expect, beforeEach, vi } from "vitest";
|
|
25
|
-
import fs from "fs";
|
|
26
|
-
import os from "os";
|
|
27
|
-
import { resolve } from "path";
|
|
28
|
-
import type { StreamChunk } from "../src/providers/types.js";
|
|
29
|
-
|
|
30
|
-
const TEST_DATA_DIR = resolve(os.tmpdir(), `alvin-bot-finaltext-${process.pid}-${Date.now()}`);
|
|
31
|
-
|
|
32
|
-
beforeEach(() => {
|
|
33
|
-
if (fs.existsSync(TEST_DATA_DIR)) fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
|
34
|
-
fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
|
|
35
|
-
process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
|
|
36
|
-
delete process.env.MAX_SUBAGENTS;
|
|
37
|
-
vi.resetModules();
|
|
38
|
-
});
|
|
39
|
-
|
|
40
|
-
function mockStream(chunks: StreamChunk[] | (() => AsyncIterable<StreamChunk>)) {
|
|
41
|
-
vi.doMock("../src/engine.js", () => ({
|
|
42
|
-
getRegistry: () => ({
|
|
43
|
-
queryWithFallback: typeof chunks === "function"
|
|
44
|
-
? chunks
|
|
45
|
-
: async function* () { for (const c of chunks) yield c; },
|
|
46
|
-
}),
|
|
47
|
-
}));
|
|
48
|
-
vi.doMock("../src/services/subagent-delivery.js", () => ({
|
|
49
|
-
deliverSubAgentResult: async () => { /* no-op */ },
|
|
50
|
-
attachBotApi: () => {},
|
|
51
|
-
__setBotApiForTest: () => {},
|
|
52
|
-
}));
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
async function runAndGetResult(prompt = "test") {
|
|
56
|
-
const mod = await import("../src/services/subagents.js");
|
|
57
|
-
return new Promise<{ output: string; status: string; tokensUsed: { input: number; output: number } }>((resolveResult) => {
|
|
58
|
-
mod.spawnSubAgent({
|
|
59
|
-
name: "test-agent",
|
|
60
|
-
prompt,
|
|
61
|
-
source: "cron",
|
|
62
|
-
parentChatId: 1,
|
|
63
|
-
onComplete: (r) => resolveResult({
|
|
64
|
-
output: r.output,
|
|
65
|
-
status: r.status,
|
|
66
|
-
tokensUsed: r.tokensUsed,
|
|
67
|
-
}),
|
|
68
|
-
}).catch(() => { /* spawn errors handled elsewhere */ });
|
|
69
|
-
});
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
describe("runSubAgent finalText (Fix #5)", () => {
|
|
73
|
-
it("uses done.text as the authoritative final output", async () => {
|
|
74
|
-
mockStream([
|
|
75
|
-
{ type: "text", text: "Working on it…" },
|
|
76
|
-
{ type: "tool_use", toolName: "Bash" },
|
|
77
|
-
{ type: "text", text: "Intermediate finding: 5 results." },
|
|
78
|
-
{ type: "tool_use", toolName: "Write" },
|
|
79
|
-
// No trailing text chunk — the assistant ended on a tool call,
|
|
80
|
-
// then the done chunk carries the authoritative final text.
|
|
81
|
-
{ type: "done", text: "Job complete. Report at /tmp/out.html", inputTokens: 100, outputTokens: 50 },
|
|
82
|
-
]);
|
|
83
|
-
const r = await runAndGetResult();
|
|
84
|
-
expect(r.status).toBe("completed");
|
|
85
|
-
expect(r.output).toBe("Job complete. Report at /tmp/out.html");
|
|
86
|
-
expect(r.tokensUsed).toEqual({ input: 100, output: 50 });
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
it("falls back to last text chunk when done has no text", async () => {
|
|
90
|
-
mockStream([
|
|
91
|
-
{ type: "text", text: "First sentence." },
|
|
92
|
-
{ type: "text", text: "Second sentence." },
|
|
93
|
-
{ type: "done", inputTokens: 10, outputTokens: 5 },
|
|
94
|
-
]);
|
|
95
|
-
const r = await runAndGetResult();
|
|
96
|
-
expect(r.output).toBe("Second sentence.");
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
it("preserves buffered text when stream errors mid-way", async () => {
|
|
100
|
-
mockStream(async function* () {
|
|
101
|
-
yield { type: "text", text: "Partial progress so far…" };
|
|
102
|
-
yield { type: "tool_use", toolName: "Bash" };
|
|
103
|
-
throw new Error("network: socket hang up");
|
|
104
|
-
});
|
|
105
|
-
const r = await runAndGetResult();
|
|
106
|
-
// Status can legitimately be "error" or "cancelled" — but output
|
|
107
|
-
// must NOT be an empty string. That's the regression.
|
|
108
|
-
expect(r.output.length).toBeGreaterThan(0);
|
|
109
|
-
expect(r.output).toContain("Partial progress");
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
it("preserves buffered text when the provider yields an error chunk", async () => {
|
|
113
|
-
mockStream([
|
|
114
|
-
{ type: "text", text: "Started the task." },
|
|
115
|
-
{ type: "text", text: "Started the task. More detail here." },
|
|
116
|
-
{ type: "error", error: "Provider 'claude-sdk' failed: Request aborted" },
|
|
117
|
-
]);
|
|
118
|
-
const r = await runAndGetResult();
|
|
119
|
-
expect(r.output).toContain("More detail");
|
|
120
|
-
});
|
|
121
|
-
|
|
122
|
-
it("returns empty output gracefully when nothing was buffered", async () => {
|
|
123
|
-
mockStream(async function* () {
|
|
124
|
-
throw new Error("immediate failure");
|
|
125
|
-
});
|
|
126
|
-
const r = await runAndGetResult();
|
|
127
|
-
// No text at all → empty is acceptable (nothing to preserve), but
|
|
128
|
-
// status must reflect the failure.
|
|
129
|
-
expect(r.output).toBe("");
|
|
130
|
-
expect(["error", "cancelled", "timeout"]).toContain(r.status);
|
|
131
|
-
});
|
|
132
|
-
});
|
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach, vi } from "vitest";
|
|
2
|
-
import fs from "fs";
|
|
3
|
-
import os from "os";
|
|
4
|
-
import { resolve } from "path";
|
|
5
|
-
import type { SubAgentInfo, SubAgentResult } from "../src/services/subagents.js";
|
|
6
|
-
|
|
7
|
-
const TEST_DATA_DIR = resolve(os.tmpdir(), `alvin-bot-stats-${process.pid}-${Date.now()}`);
|
|
8
|
-
|
|
9
|
-
beforeEach(() => {
|
|
10
|
-
if (fs.existsSync(TEST_DATA_DIR)) fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
|
11
|
-
fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
|
|
12
|
-
process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
|
|
13
|
-
vi.resetModules();
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
function makeInfo(overrides: Partial<SubAgentInfo> = {}): SubAgentInfo {
|
|
17
|
-
return {
|
|
18
|
-
id: "x",
|
|
19
|
-
name: "test",
|
|
20
|
-
status: "completed",
|
|
21
|
-
startedAt: Date.now() - 1000,
|
|
22
|
-
source: "user",
|
|
23
|
-
depth: 0,
|
|
24
|
-
...overrides,
|
|
25
|
-
};
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
function makeResult(overrides: Partial<SubAgentResult> = {}): SubAgentResult {
|
|
29
|
-
return {
|
|
30
|
-
id: "x",
|
|
31
|
-
name: "test",
|
|
32
|
-
status: "completed",
|
|
33
|
-
output: "ok",
|
|
34
|
-
tokensUsed: { input: 100, output: 50 },
|
|
35
|
-
duration: 1000,
|
|
36
|
-
...overrides,
|
|
37
|
-
};
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
describe("subagent-stats (H3)", () => {
|
|
41
|
-
it("getSubAgentStats returns zeros on a fresh install", async () => {
|
|
42
|
-
const mod = await import("../src/services/subagent-stats.js");
|
|
43
|
-
const stats = mod.getSubAgentStats();
|
|
44
|
-
expect(stats.total.runs).toBe(0);
|
|
45
|
-
expect(stats.bySource.user.runs).toBe(0);
|
|
46
|
-
expect(stats.byStatus.completed).toBe(0);
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
it("recordSubAgentRun appends and updates totals", async () => {
|
|
50
|
-
const mod = await import("../src/services/subagent-stats.js");
|
|
51
|
-
mod.recordSubAgentRun(makeInfo({ source: "user" }), makeResult({ tokensUsed: { input: 100, output: 50 } }));
|
|
52
|
-
mod.recordSubAgentRun(makeInfo({ source: "cron" }), makeResult({ tokensUsed: { input: 200, output: 75 } }));
|
|
53
|
-
mod.recordSubAgentRun(makeInfo({ source: "user" }), makeResult({ tokensUsed: { input: 50, output: 25 } }));
|
|
54
|
-
|
|
55
|
-
const stats = mod.getSubAgentStats();
|
|
56
|
-
expect(stats.total.runs).toBe(3);
|
|
57
|
-
expect(stats.total.inputTokens).toBe(350);
|
|
58
|
-
expect(stats.total.outputTokens).toBe(150);
|
|
59
|
-
expect(stats.bySource.user.runs).toBe(2);
|
|
60
|
-
expect(stats.bySource.user.inputTokens).toBe(150);
|
|
61
|
-
expect(stats.bySource.cron.runs).toBe(1);
|
|
62
|
-
expect(stats.bySource.cron.inputTokens).toBe(200);
|
|
63
|
-
expect(stats.byStatus.completed).toBe(3);
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
it("persists to disk and round-trips through reload", async () => {
|
|
67
|
-
let mod = await import("../src/services/subagent-stats.js");
|
|
68
|
-
mod.recordSubAgentRun(makeInfo({ source: "cron" }), makeResult());
|
|
69
|
-
|
|
70
|
-
// Force a reload by resetting modules
|
|
71
|
-
vi.resetModules();
|
|
72
|
-
mod = await import("../src/services/subagent-stats.js");
|
|
73
|
-
|
|
74
|
-
const stats = mod.getSubAgentStats();
|
|
75
|
-
expect(stats.total.runs).toBe(1);
|
|
76
|
-
expect(stats.bySource.cron.runs).toBe(1);
|
|
77
|
-
});
|
|
78
|
-
|
|
79
|
-
it("prunes entries older than 24h", async () => {
|
|
80
|
-
const mod = await import("../src/services/subagent-stats.js");
|
|
81
|
-
// Seed the file with an entry from 25 hours ago
|
|
82
|
-
const ancient = [
|
|
83
|
-
{
|
|
84
|
-
completedAt: Date.now() - 25 * 60 * 60 * 1000,
|
|
85
|
-
name: "ancient",
|
|
86
|
-
source: "user",
|
|
87
|
-
status: "completed",
|
|
88
|
-
durationMs: 100,
|
|
89
|
-
inputTokens: 999,
|
|
90
|
-
outputTokens: 999,
|
|
91
|
-
},
|
|
92
|
-
];
|
|
93
|
-
fs.writeFileSync(
|
|
94
|
-
resolve(TEST_DATA_DIR, "subagent-stats.json"),
|
|
95
|
-
JSON.stringify(ancient),
|
|
96
|
-
);
|
|
97
|
-
mod.__resetStatsCacheForTest();
|
|
98
|
-
|
|
99
|
-
// Fresh read should exclude the ancient entry
|
|
100
|
-
const stats = mod.getSubAgentStats();
|
|
101
|
-
expect(stats.total.runs).toBe(0);
|
|
102
|
-
expect(stats.total.inputTokens).toBe(0);
|
|
103
|
-
});
|
|
104
|
-
|
|
105
|
-
it("tracks byStatus separately for cancelled/error/timeout", async () => {
|
|
106
|
-
const mod = await import("../src/services/subagent-stats.js");
|
|
107
|
-
mod.recordSubAgentRun(makeInfo(), makeResult({ status: "completed" }));
|
|
108
|
-
mod.recordSubAgentRun(makeInfo(), makeResult({ status: "cancelled" }));
|
|
109
|
-
mod.recordSubAgentRun(makeInfo(), makeResult({ status: "error" }));
|
|
110
|
-
mod.recordSubAgentRun(makeInfo(), makeResult({ status: "timeout" }));
|
|
111
|
-
mod.recordSubAgentRun(makeInfo(), makeResult({ status: "completed" }));
|
|
112
|
-
|
|
113
|
-
const stats = mod.getSubAgentStats();
|
|
114
|
-
expect(stats.byStatus.completed).toBe(2);
|
|
115
|
-
expect(stats.byStatus.cancelled).toBe(1);
|
|
116
|
-
expect(stats.byStatus.error).toBe(1);
|
|
117
|
-
expect(stats.byStatus.timeout).toBe(1);
|
|
118
|
-
});
|
|
119
|
-
});
|
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* v4.12.2 — Sub-agent toolset allowlist (Task G).
|
|
3
|
-
*
|
|
4
|
-
* Sub-agents can now be spawned with a toolset preset that restricts which
|
|
5
|
-
* tools Claude has access to:
|
|
6
|
-
* - "full" — all tools (default, matches pre-v4.12.2 behavior)
|
|
7
|
-
* - "readonly" — Read, Glob, Grep (analyze, no write, no shell, no net)
|
|
8
|
-
* - "research" — Read, Glob, Grep, WebSearch, WebFetch (no write, no shell)
|
|
9
|
-
*
|
|
10
|
-
* This test verifies that the preset → allowedTools mapping is correct
|
|
11
|
-
* and that the provider honors the override. The integration path
|
|
12
|
-
* (spawnSubAgent → registry.queryWithFallback → claude-sdk-provider) is
|
|
13
|
-
* exercised via mocked SDK.
|
|
14
|
-
*/
|
|
15
|
-
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
16
|
-
import type { StreamChunk } from "../src/providers/types.js";
|
|
17
|
-
|
|
18
|
-
beforeEach(() => vi.resetModules());
|
|
19
|
-
|
|
20
|
-
describe("claude-sdk-provider honors options.allowedTools (v4.12.2)", () => {
|
|
21
|
-
it("uses the default full toolset when options.allowedTools is undefined", async () => {
|
|
22
|
-
let capturedOpts: Record<string, unknown> | undefined;
|
|
23
|
-
vi.doMock("../src/find-claude-binary.js", () => ({
|
|
24
|
-
findClaudeBinary: () => "/usr/bin/false",
|
|
25
|
-
}));
|
|
26
|
-
vi.doMock("@anthropic-ai/claude-agent-sdk", () => ({
|
|
27
|
-
query: (opts: { options: Record<string, unknown> }) => {
|
|
28
|
-
capturedOpts = opts.options;
|
|
29
|
-
return (async function* () {
|
|
30
|
-
yield { type: "system", subtype: "init", session_id: "s1" };
|
|
31
|
-
yield { type: "result", session_id: "s1", total_cost_usd: 0, usage: null };
|
|
32
|
-
})();
|
|
33
|
-
},
|
|
34
|
-
}));
|
|
35
|
-
|
|
36
|
-
const { ClaudeSDKProvider } = await import("../src/providers/claude-sdk-provider.js");
|
|
37
|
-
const provider = new ClaudeSDKProvider();
|
|
38
|
-
|
|
39
|
-
for await (const _c of provider.query({ prompt: "test", systemPrompt: "test" })) {
|
|
40
|
-
void _c;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
expect(capturedOpts).toBeDefined();
|
|
44
|
-
expect(capturedOpts!.allowedTools).toEqual([
|
|
45
|
-
"Read", "Write", "Edit", "Bash", "Glob", "Grep",
|
|
46
|
-
"WebSearch", "WebFetch", "Task",
|
|
47
|
-
]);
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
it("overrides allowedTools when caller passes a restricted list (readonly preset)", async () => {
|
|
51
|
-
let capturedOpts: Record<string, unknown> | undefined;
|
|
52
|
-
vi.doMock("../src/find-claude-binary.js", () => ({
|
|
53
|
-
findClaudeBinary: () => "/usr/bin/false",
|
|
54
|
-
}));
|
|
55
|
-
vi.doMock("@anthropic-ai/claude-agent-sdk", () => ({
|
|
56
|
-
query: (opts: { options: Record<string, unknown> }) => {
|
|
57
|
-
capturedOpts = opts.options;
|
|
58
|
-
return (async function* () {
|
|
59
|
-
yield { type: "system", subtype: "init", session_id: "s1" };
|
|
60
|
-
yield { type: "result", session_id: "s1", total_cost_usd: 0, usage: null };
|
|
61
|
-
})();
|
|
62
|
-
},
|
|
63
|
-
}));
|
|
64
|
-
|
|
65
|
-
const { ClaudeSDKProvider } = await import("../src/providers/claude-sdk-provider.js");
|
|
66
|
-
const provider = new ClaudeSDKProvider();
|
|
67
|
-
|
|
68
|
-
const readonlyTools = ["Read", "Glob", "Grep"];
|
|
69
|
-
for await (const _c of provider.query({
|
|
70
|
-
prompt: "test",
|
|
71
|
-
systemPrompt: "test",
|
|
72
|
-
allowedTools: readonlyTools,
|
|
73
|
-
})) {
|
|
74
|
-
void _c;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
expect(capturedOpts!.allowedTools).toEqual(readonlyTools);
|
|
78
|
-
// Critically: Bash, Write, Edit are NOT in the list
|
|
79
|
-
expect(capturedOpts!.allowedTools).not.toContain("Bash");
|
|
80
|
-
expect(capturedOpts!.allowedTools).not.toContain("Write");
|
|
81
|
-
expect(capturedOpts!.allowedTools).not.toContain("Edit");
|
|
82
|
-
});
|
|
83
|
-
|
|
84
|
-
it("overrides allowedTools with research preset (adds web tools)", async () => {
|
|
85
|
-
let capturedOpts: Record<string, unknown> | undefined;
|
|
86
|
-
vi.doMock("../src/find-claude-binary.js", () => ({
|
|
87
|
-
findClaudeBinary: () => "/usr/bin/false",
|
|
88
|
-
}));
|
|
89
|
-
vi.doMock("@anthropic-ai/claude-agent-sdk", () => ({
|
|
90
|
-
query: (opts: { options: Record<string, unknown> }) => {
|
|
91
|
-
capturedOpts = opts.options;
|
|
92
|
-
return (async function* () {
|
|
93
|
-
yield { type: "system", subtype: "init", session_id: "s1" };
|
|
94
|
-
yield { type: "result", session_id: "s1", total_cost_usd: 0, usage: null };
|
|
95
|
-
})();
|
|
96
|
-
},
|
|
97
|
-
}));
|
|
98
|
-
|
|
99
|
-
const { ClaudeSDKProvider } = await import("../src/providers/claude-sdk-provider.js");
|
|
100
|
-
const provider = new ClaudeSDKProvider();
|
|
101
|
-
|
|
102
|
-
const researchTools = ["Read", "Glob", "Grep", "WebSearch", "WebFetch"];
|
|
103
|
-
for await (const _c of provider.query({
|
|
104
|
-
prompt: "test",
|
|
105
|
-
systemPrompt: "test",
|
|
106
|
-
allowedTools: researchTools,
|
|
107
|
-
})) {
|
|
108
|
-
void _c;
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
expect(capturedOpts!.allowedTools).toEqual(researchTools);
|
|
112
|
-
expect(capturedOpts!.allowedTools).toContain("WebSearch");
|
|
113
|
-
expect(capturedOpts!.allowedTools).not.toContain("Bash");
|
|
114
|
-
});
|
|
115
|
-
|
|
116
|
-
it("empty allowedTools array is honored as such (no tools at all)", async () => {
|
|
117
|
-
let capturedOpts: Record<string, unknown> | undefined;
|
|
118
|
-
vi.doMock("../src/find-claude-binary.js", () => ({
|
|
119
|
-
findClaudeBinary: () => "/usr/bin/false",
|
|
120
|
-
}));
|
|
121
|
-
vi.doMock("@anthropic-ai/claude-agent-sdk", () => ({
|
|
122
|
-
query: (opts: { options: Record<string, unknown> }) => {
|
|
123
|
-
capturedOpts = opts.options;
|
|
124
|
-
return (async function* () {
|
|
125
|
-
yield { type: "system", subtype: "init", session_id: "s1" };
|
|
126
|
-
yield { type: "result", session_id: "s1", total_cost_usd: 0, usage: null };
|
|
127
|
-
})();
|
|
128
|
-
},
|
|
129
|
-
}));
|
|
130
|
-
|
|
131
|
-
const { ClaudeSDKProvider } = await import("../src/providers/claude-sdk-provider.js");
|
|
132
|
-
const provider = new ClaudeSDKProvider();
|
|
133
|
-
|
|
134
|
-
for await (const _c of provider.query({
|
|
135
|
-
prompt: "test",
|
|
136
|
-
systemPrompt: "test",
|
|
137
|
-
allowedTools: [],
|
|
138
|
-
})) {
|
|
139
|
-
void _c;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
// Empty array → no tools. Note: JS ?? operator treats [] as truthy,
|
|
143
|
-
// so this IS honored as "empty allowlist" not "use default".
|
|
144
|
-
expect(capturedOpts!.allowedTools).toEqual([]);
|
|
145
|
-
});
|
|
146
|
-
});
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach, vi } from "vitest";
|
|
2
|
-
import fs from "fs";
|
|
3
|
-
import os from "os";
|
|
4
|
-
import { resolve } from "path";
|
|
5
|
-
|
|
6
|
-
const TEST_DATA_DIR = resolve(os.tmpdir(), `alvin-bot-cmds-${process.pid}-${Date.now()}`);
|
|
7
|
-
|
|
8
|
-
beforeEach(() => {
|
|
9
|
-
if (fs.existsSync(TEST_DATA_DIR)) fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
|
10
|
-
fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
|
|
11
|
-
process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
|
|
12
|
-
delete process.env.MAX_SUBAGENTS;
|
|
13
|
-
vi.resetModules();
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
vi.mock("../src/engine.js", () => ({
|
|
17
|
-
getRegistry: () => ({
|
|
18
|
-
queryWithFallback: async function* () {
|
|
19
|
-
await new Promise((r) => setTimeout(r, 500));
|
|
20
|
-
yield { type: "done", text: "ok", inputTokens: 0, outputTokens: 0 };
|
|
21
|
-
},
|
|
22
|
-
}),
|
|
23
|
-
}));
|
|
24
|
-
|
|
25
|
-
describe("cancelSubAgentByName / getSubAgentResultByName (B2 helpers)", () => {
|
|
26
|
-
it("cancels an agent by its exact name", async () => {
|
|
27
|
-
const mod = await import("../src/services/subagents.js");
|
|
28
|
-
const id = await mod.spawnSubAgent({ name: "foo", prompt: "a" });
|
|
29
|
-
const ok = mod.cancelSubAgentByName("foo");
|
|
30
|
-
expect(ok).toBe(true);
|
|
31
|
-
|
|
32
|
-
const info = mod.listSubAgents().find((a) => a.id === id);
|
|
33
|
-
expect(info?.status).toBe("cancelled");
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
it("cancels the base-name when unambiguous", async () => {
|
|
37
|
-
const mod = await import("../src/services/subagents.js");
|
|
38
|
-
await mod.spawnSubAgent({ name: "bar", prompt: "a" });
|
|
39
|
-
expect(mod.cancelSubAgentByName("bar")).toBe(true);
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
it("returns false for unknown name", async () => {
|
|
43
|
-
const mod = await import("../src/services/subagents.js");
|
|
44
|
-
expect(mod.cancelSubAgentByName("ghost")).toBe(false);
|
|
45
|
-
});
|
|
46
|
-
|
|
47
|
-
it("cancels the #N variant when addressed directly", async () => {
|
|
48
|
-
const mod = await import("../src/services/subagents.js");
|
|
49
|
-
await mod.spawnSubAgent({ name: "baz", prompt: "a" });
|
|
50
|
-
await mod.spawnSubAgent({ name: "baz", prompt: "b" });
|
|
51
|
-
const ok = mod.cancelSubAgentByName("baz#2");
|
|
52
|
-
expect(ok).toBe(true);
|
|
53
|
-
|
|
54
|
-
const agents = mod.listSubAgents();
|
|
55
|
-
const canceledNames = agents.filter((a) => a.status === "cancelled").map((a) => a.name);
|
|
56
|
-
expect(canceledNames).toEqual(["baz#2"]);
|
|
57
|
-
});
|
|
58
|
-
|
|
59
|
-
it("getSubAgentResultByName returns null when still running", async () => {
|
|
60
|
-
const mod = await import("../src/services/subagents.js");
|
|
61
|
-
await mod.spawnSubAgent({ name: "running", prompt: "a" });
|
|
62
|
-
expect(mod.getSubAgentResultByName("running")).toBeNull();
|
|
63
|
-
});
|
|
64
|
-
});
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
|
2
|
-
import fs from "fs";
|
|
3
|
-
import { resolve } from "path";
|
|
4
|
-
import os from "os";
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Tests for the file-backed sub-agents config.
|
|
8
|
-
*
|
|
9
|
-
* We isolate via ALVIN_DATA_DIR pointing at a temp directory, so the test
|
|
10
|
-
* never touches the real ~/.alvin-bot/sub-agents.json. vi.resetModules()
|
|
11
|
-
* clears Vitest's module cache between tests so each import() gets a
|
|
12
|
-
* fresh module with a fresh configCache.
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
const TEST_DATA_DIR = resolve(os.tmpdir(), `alvin-bot-test-${process.pid}-${Date.now()}`);
|
|
16
|
-
|
|
17
|
-
beforeEach(() => {
|
|
18
|
-
if (fs.existsSync(TEST_DATA_DIR)) {
|
|
19
|
-
fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
|
20
|
-
}
|
|
21
|
-
fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
|
|
22
|
-
process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
|
|
23
|
-
delete process.env.MAX_SUBAGENTS;
|
|
24
|
-
vi.resetModules(); // force re-import of subagents.ts next time
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
afterEach(() => {
|
|
28
|
-
if (fs.existsSync(TEST_DATA_DIR)) {
|
|
29
|
-
fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
|
30
|
-
}
|
|
31
|
-
});
|
|
32
|
-
|
|
33
|
-
describe("sub-agents config", () => {
|
|
34
|
-
it("returns 0 as the configured value on a fresh install", async () => {
|
|
35
|
-
const mod = await import("../src/services/subagents.js");
|
|
36
|
-
expect(mod.getConfiguredMaxParallel()).toBe(0);
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
it("resolves 0 to min(cpuCount, 16) in getMaxParallelAgents", async () => {
|
|
40
|
-
const mod = await import("../src/services/subagents.js");
|
|
41
|
-
const effective = mod.getMaxParallelAgents();
|
|
42
|
-
const cpuCount = os.cpus().length;
|
|
43
|
-
expect(effective).toBe(Math.min(cpuCount, 16));
|
|
44
|
-
});
|
|
45
|
-
|
|
46
|
-
it("setMaxParallelAgents persists the value to disk", async () => {
|
|
47
|
-
const mod = await import("../src/services/subagents.js");
|
|
48
|
-
mod.setMaxParallelAgents(5);
|
|
49
|
-
expect(mod.getConfiguredMaxParallel()).toBe(5);
|
|
50
|
-
expect(mod.getMaxParallelAgents()).toBe(5);
|
|
51
|
-
|
|
52
|
-
// Verify file on disk
|
|
53
|
-
const configPath = resolve(TEST_DATA_DIR, "sub-agents.json");
|
|
54
|
-
expect(fs.existsSync(configPath)).toBe(true);
|
|
55
|
-
const persisted = JSON.parse(fs.readFileSync(configPath, "utf-8"));
|
|
56
|
-
expect(persisted.maxParallel).toBe(5);
|
|
57
|
-
});
|
|
58
|
-
|
|
59
|
-
it("clamps values above ABSOLUTE_MAX (16) down to 16", async () => {
|
|
60
|
-
const mod = await import("../src/services/subagents.js");
|
|
61
|
-
const effective = mod.setMaxParallelAgents(500);
|
|
62
|
-
expect(effective).toBe(16);
|
|
63
|
-
expect(mod.getConfiguredMaxParallel()).toBe(16);
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
it("clamps negative values to 0 (which then resolves to auto)", async () => {
|
|
67
|
-
const mod = await import("../src/services/subagents.js");
|
|
68
|
-
const effective = mod.setMaxParallelAgents(-5);
|
|
69
|
-
expect(mod.getConfiguredMaxParallel()).toBe(0);
|
|
70
|
-
expect(effective).toBe(Math.min(os.cpus().length, 16));
|
|
71
|
-
});
|
|
72
|
-
|
|
73
|
-
it("floors fractional values", async () => {
|
|
74
|
-
const mod = await import("../src/services/subagents.js");
|
|
75
|
-
mod.setMaxParallelAgents(7.8);
|
|
76
|
-
expect(mod.getConfiguredMaxParallel()).toBe(7);
|
|
77
|
-
});
|
|
78
|
-
});
|
|
79
|
-
|
|
80
|
-
describe("sub-agents visibility config (A4)", () => {
|
|
81
|
-
it("defaults visibility to 'auto' on a fresh install", async () => {
|
|
82
|
-
const mod = await import("../src/services/subagents.js");
|
|
83
|
-
expect(mod.getVisibility()).toBe("auto");
|
|
84
|
-
});
|
|
85
|
-
|
|
86
|
-
it("setVisibility persists the value to disk", async () => {
|
|
87
|
-
const mod = await import("../src/services/subagents.js");
|
|
88
|
-
mod.setVisibility("banner");
|
|
89
|
-
expect(mod.getVisibility()).toBe("banner");
|
|
90
|
-
|
|
91
|
-
const configPath = resolve(TEST_DATA_DIR, "sub-agents.json");
|
|
92
|
-
expect(fs.existsSync(configPath)).toBe(true);
|
|
93
|
-
const persisted = JSON.parse(fs.readFileSync(configPath, "utf-8"));
|
|
94
|
-
expect(persisted.visibility).toBe("banner");
|
|
95
|
-
});
|
|
96
|
-
|
|
97
|
-
it("rejects invalid visibility values", async () => {
|
|
98
|
-
const mod = await import("../src/services/subagents.js");
|
|
99
|
-
expect(() => mod.setVisibility("bogus" as "auto")).toThrow(/invalid/i);
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
it("accepts 'live' as a valid visibility mode (A4 Stufe 2)", async () => {
|
|
103
|
-
const mod = await import("../src/services/subagents.js");
|
|
104
|
-
mod.setVisibility("live");
|
|
105
|
-
expect(mod.getVisibility()).toBe("live");
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
it("setVisibility('auto') round-trips through disk", async () => {
|
|
109
|
-
const mod = await import("../src/services/subagents.js");
|
|
110
|
-
mod.setVisibility("banner");
|
|
111
|
-
mod.setVisibility("auto");
|
|
112
|
-
expect(mod.getVisibility()).toBe("auto");
|
|
113
|
-
});
|
|
114
|
-
});
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach, vi } from "vitest";
|
|
2
|
-
import fs from "fs";
|
|
3
|
-
import os from "os";
|
|
4
|
-
import { resolve } from "path";
|
|
5
|
-
|
|
6
|
-
const TEST_DATA_DIR = resolve(os.tmpdir(), `alvin-bot-depth-${process.pid}-${Date.now()}`);
|
|
7
|
-
|
|
8
|
-
beforeEach(() => {
|
|
9
|
-
if (fs.existsSync(TEST_DATA_DIR)) fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
|
10
|
-
fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
|
|
11
|
-
process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
|
|
12
|
-
delete process.env.MAX_SUBAGENTS;
|
|
13
|
-
vi.resetModules();
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
// Stub the engine so spawnSubAgent doesn't actually invoke any LLM.
|
|
17
|
-
vi.mock("../src/engine.js", () => ({
|
|
18
|
-
getRegistry: () => ({
|
|
19
|
-
queryWithFallback: async function* () {
|
|
20
|
-
yield { type: "text", text: "ok" };
|
|
21
|
-
yield { type: "done", text: "ok", inputTokens: 1, outputTokens: 1 };
|
|
22
|
-
},
|
|
23
|
-
}),
|
|
24
|
-
}));
|
|
25
|
-
|
|
26
|
-
describe("sub-agents depth-cap (F2)", () => {
|
|
27
|
-
it("accepts depth 0 (root)", async () => {
|
|
28
|
-
const mod = await import("../src/services/subagents.js");
|
|
29
|
-
const id = await mod.spawnSubAgent({ name: "d0", prompt: "hi", depth: 0 });
|
|
30
|
-
expect(id).toMatch(/^[0-9a-f-]{36}$/);
|
|
31
|
-
});
|
|
32
|
-
|
|
33
|
-
it("accepts depth 1", async () => {
|
|
34
|
-
const mod = await import("../src/services/subagents.js");
|
|
35
|
-
const id = await mod.spawnSubAgent({ name: "d1", prompt: "hi", depth: 1 });
|
|
36
|
-
expect(id).toMatch(/^[0-9a-f-]{36}$/);
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
it("accepts depth 2 (the cap)", async () => {
|
|
40
|
-
const mod = await import("../src/services/subagents.js");
|
|
41
|
-
const id = await mod.spawnSubAgent({ name: "d2", prompt: "hi", depth: 2 });
|
|
42
|
-
expect(id).toMatch(/^[0-9a-f-]{36}$/);
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
it("rejects depth 3 with a clear error", async () => {
|
|
46
|
-
const mod = await import("../src/services/subagents.js");
|
|
47
|
-
await expect(
|
|
48
|
-
mod.spawnSubAgent({ name: "d3", prompt: "hi", depth: 3 }),
|
|
49
|
-
).rejects.toThrow(/depth limit/i);
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
it("defaults depth to 0 when omitted", async () => {
|
|
53
|
-
const mod = await import("../src/services/subagents.js");
|
|
54
|
-
const id = await mod.spawnSubAgent({ name: "nodepth", prompt: "hi" });
|
|
55
|
-
const info = mod.listSubAgents().find((a) => a.id === id);
|
|
56
|
-
expect(info?.depth).toBe(0);
|
|
57
|
-
});
|
|
58
|
-
});
|