joonecli 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +4 -1
- package/dist/cli/index.js.map +1 -1
- package/dist/commands/builtinCommands.js +6 -6
- package/dist/commands/builtinCommands.js.map +1 -1
- package/dist/commands/commandRegistry.d.ts +3 -1
- package/dist/commands/commandRegistry.js.map +1 -1
- package/dist/core/agentLoop.d.ts +3 -1
- package/dist/core/agentLoop.js +17 -7
- package/dist/core/agentLoop.js.map +1 -1
- package/dist/core/compactor.js +2 -2
- package/dist/core/compactor.js.map +1 -1
- package/dist/core/contextGuard.d.ts +5 -0
- package/dist/core/contextGuard.js +30 -3
- package/dist/core/contextGuard.js.map +1 -1
- package/dist/core/events.d.ts +45 -0
- package/dist/core/events.js +8 -0
- package/dist/core/events.js.map +1 -0
- package/dist/core/sessionStore.js +3 -2
- package/dist/core/sessionStore.js.map +1 -1
- package/dist/core/subAgent.js +2 -2
- package/dist/core/subAgent.js.map +1 -1
- package/dist/core/tokenCounter.d.ts +8 -1
- package/dist/core/tokenCounter.js +28 -0
- package/dist/core/tokenCounter.js.map +1 -1
- package/dist/middleware/permission.js +1 -0
- package/dist/middleware/permission.js.map +1 -1
- package/dist/tools/browser.js +4 -1
- package/dist/tools/browser.js.map +1 -1
- package/dist/tools/index.d.ts +2 -1
- package/dist/tools/index.js +11 -3
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/installHostDeps.d.ts +2 -0
- package/dist/tools/installHostDeps.js +37 -0
- package/dist/tools/installHostDeps.js.map +1 -0
- package/dist/tools/router.js +1 -0
- package/dist/tools/router.js.map +1 -1
- package/dist/tools/spawnAgent.js +3 -1
- package/dist/tools/spawnAgent.js.map +1 -1
- package/dist/tracing/sessionTracer.d.ts +1 -0
- package/dist/tracing/sessionTracer.js +4 -1
- package/dist/tracing/sessionTracer.js.map +1 -1
- package/dist/ui/App.js +6 -1
- package/dist/ui/App.js.map +1 -1
- package/dist/ui/components/ActionLog.d.ts +7 -0
- package/dist/ui/components/ActionLog.js +63 -0
- package/dist/ui/components/ActionLog.js.map +1 -0
- package/dist/ui/components/FileBrowser.d.ts +2 -0
- package/dist/ui/components/FileBrowser.js +41 -0
- package/dist/ui/components/FileBrowser.js.map +1 -0
- package/package.json +3 -5
- package/AGENTS.md +0 -56
- package/Handover.md +0 -115
- package/PROGRESS.md +0 -160
- package/docs/01_insights_and_patterns.md +0 -27
- package/docs/02_edge_cases_and_mitigations.md +0 -143
- package/docs/03_initial_implementation_plan.md +0 -66
- package/docs/04_tech_stack_proposal.md +0 -20
- package/docs/05_prd.md +0 -87
- package/docs/06_user_stories.md +0 -72
- package/docs/07_system_architecture.md +0 -138
- package/docs/08_roadmap.md +0 -200
- package/e2b/Dockerfile +0 -26
- package/src/__tests__/bootstrap.test.ts +0 -111
- package/src/__tests__/config.test.ts +0 -97
- package/src/__tests__/m55.test.ts +0 -238
- package/src/__tests__/middleware.test.ts +0 -219
- package/src/__tests__/modelFactory.test.ts +0 -63
- package/src/__tests__/optimizations.test.ts +0 -201
- package/src/__tests__/promptBuilder.test.ts +0 -141
- package/src/__tests__/sandbox.test.ts +0 -102
- package/src/__tests__/security.test.ts +0 -122
- package/src/__tests__/streaming.test.ts +0 -82
- package/src/__tests__/toolRouter.test.ts +0 -52
- package/src/__tests__/tools.test.ts +0 -146
- package/src/__tests__/tracing.test.ts +0 -196
- package/src/agents/agentRegistry.ts +0 -69
- package/src/agents/agentSpec.ts +0 -67
- package/src/agents/builtinAgents.ts +0 -142
- package/src/cli/config.ts +0 -124
- package/src/cli/index.ts +0 -742
- package/src/cli/modelFactory.ts +0 -174
- package/src/cli/postinstall.ts +0 -28
- package/src/cli/providers.ts +0 -107
- package/src/commands/builtinCommands.ts +0 -293
- package/src/commands/commandRegistry.ts +0 -194
- package/src/core/agentLoop.d.ts.map +0 -1
- package/src/core/agentLoop.ts +0 -312
- package/src/core/autoSave.ts +0 -95
- package/src/core/compactor.ts +0 -252
- package/src/core/contextGuard.ts +0 -129
- package/src/core/errors.ts +0 -202
- package/src/core/promptBuilder.d.ts.map +0 -1
- package/src/core/promptBuilder.ts +0 -139
- package/src/core/reasoningRouter.ts +0 -121
- package/src/core/retry.ts +0 -75
- package/src/core/sessionResumer.ts +0 -90
- package/src/core/sessionStore.ts +0 -216
- package/src/core/subAgent.ts +0 -339
- package/src/core/tokenCounter.ts +0 -64
- package/src/evals/dataset.ts +0 -67
- package/src/evals/evaluator.ts +0 -81
- package/src/hitl/bridge.ts +0 -160
- package/src/middleware/commandSanitizer.ts +0 -60
- package/src/middleware/loopDetection.ts +0 -63
- package/src/middleware/permission.ts +0 -72
- package/src/middleware/pipeline.ts +0 -75
- package/src/middleware/preCompletion.ts +0 -94
- package/src/middleware/types.ts +0 -45
- package/src/sandbox/bootstrap.ts +0 -121
- package/src/sandbox/manager.ts +0 -239
- package/src/sandbox/sync.ts +0 -157
- package/src/skills/loader.ts +0 -143
- package/src/skills/tools.ts +0 -99
- package/src/skills/types.ts +0 -13
- package/src/test_cache.ts +0 -72
- package/src/tools/askUser.ts +0 -47
- package/src/tools/browser.ts +0 -137
- package/src/tools/index.d.ts.map +0 -1
- package/src/tools/index.ts +0 -237
- package/src/tools/registry.ts +0 -198
- package/src/tools/router.ts +0 -78
- package/src/tools/security.ts +0 -220
- package/src/tools/spawnAgent.ts +0 -158
- package/src/tools/webSearch.ts +0 -142
- package/src/tracing/analyzer.ts +0 -265
- package/src/tracing/langsmith.ts +0 -63
- package/src/tracing/sessionTracer.ts +0 -202
- package/src/tracing/types.ts +0 -49
- package/src/types/valyu.d.ts +0 -37
- package/src/ui/App.tsx +0 -404
- package/src/ui/components/HITLPrompt.tsx +0 -119
- package/src/ui/components/Header.tsx +0 -51
- package/src/ui/components/MessageBubble.tsx +0 -46
- package/src/ui/components/StatusBar.tsx +0 -138
- package/src/ui/components/StreamingText.tsx +0 -48
- package/src/ui/components/ToolCallPanel.tsx +0 -80
- package/tests/commands/commands.test.ts +0 -356
- package/tests/core/compactor.test.ts +0 -217
- package/tests/core/retryAndErrors.test.ts +0 -164
- package/tests/core/sessionResumer.test.ts +0 -95
- package/tests/core/sessionStore.test.ts +0 -84
- package/tests/core/stability.test.ts +0 -165
- package/tests/core/subAgent.test.ts +0 -238
- package/tests/hitl/hitlBridge.test.ts +0 -115
- package/tsconfig.json +0 -16
- package/vitest.config.ts +0 -10
- package/vitest.out +0 -48
|
@@ -1,219 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi } from "vitest";
|
|
2
|
-
import { MiddlewarePipeline } from "../middleware/pipeline.js";
|
|
3
|
-
import { ToolCallContext, ToolMiddleware } from "../middleware/types.js";
|
|
4
|
-
import { LoopDetectionMiddleware } from "../middleware/loopDetection.js";
|
|
5
|
-
import { CommandSanitizerMiddleware } from "../middleware/commandSanitizer.js";
|
|
6
|
-
import { PreCompletionMiddleware } from "../middleware/preCompletion.js";
|
|
7
|
-
|
|
8
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
9
|
-
// Pipeline Core
|
|
10
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
11
|
-
|
|
12
|
-
describe("MiddlewarePipeline", () => {
|
|
13
|
-
const makeCtx = (overrides?: Partial<ToolCallContext>): ToolCallContext => ({
|
|
14
|
-
toolName: "bash",
|
|
15
|
-
args: { command: "echo hello" },
|
|
16
|
-
callId: "call-1",
|
|
17
|
-
...overrides,
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
// ─── Test #44: Runs before/after hooks in order ───
|
|
21
|
-
|
|
22
|
-
it("runs before hooks in registration order and after hooks in reverse", async () => {
|
|
23
|
-
const order: string[] = [];
|
|
24
|
-
|
|
25
|
-
const pipeline = new MiddlewarePipeline();
|
|
26
|
-
pipeline.use({
|
|
27
|
-
name: "A",
|
|
28
|
-
before: (ctx) => { order.push("A:before"); return ctx; },
|
|
29
|
-
after: (_ctx, r) => { order.push("A:after"); return r; },
|
|
30
|
-
});
|
|
31
|
-
pipeline.use({
|
|
32
|
-
name: "B",
|
|
33
|
-
before: (ctx) => { order.push("B:before"); return ctx; },
|
|
34
|
-
after: (_ctx, r) => { order.push("B:after"); return r; },
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
const executeFn = vi.fn(async () => ({ content: "result" }));
|
|
38
|
-
await pipeline.run(makeCtx(), executeFn);
|
|
39
|
-
|
|
40
|
-
expect(order).toEqual(["A:before", "B:before", "B:after", "A:after"]);
|
|
41
|
-
expect(executeFn).toHaveBeenCalledOnce();
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
// ─── Test #45: Short-circuits when before returns string ───
|
|
45
|
-
|
|
46
|
-
it("short-circuits and does NOT execute the tool when before returns a string", async () => {
|
|
47
|
-
const pipeline = new MiddlewarePipeline();
|
|
48
|
-
pipeline.use({
|
|
49
|
-
name: "Blocker",
|
|
50
|
-
before: () => "⚠ Blocked!",
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
const executeFn = vi.fn(async () => ({ content: "should not reach this" }));
|
|
54
|
-
const result = await pipeline.run(makeCtx(), executeFn);
|
|
55
|
-
|
|
56
|
-
expect(result).toBe("⚠ Blocked!");
|
|
57
|
-
expect(executeFn).not.toHaveBeenCalled();
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
// ─── Test #46: After hooks can transform the result ───
|
|
61
|
-
|
|
62
|
-
it("after hooks can transform the tool result", async () => {
|
|
63
|
-
const pipeline = new MiddlewarePipeline();
|
|
64
|
-
pipeline.use({
|
|
65
|
-
name: "Uppercaser",
|
|
66
|
-
after: (_ctx, result) => { result.content = result.content.toUpperCase(); return result; },
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
const result = await pipeline.run(makeCtx(), async () => ({ content: "hello" }));
|
|
70
|
-
|
|
71
|
-
expect(result).toBe("HELLO");
|
|
72
|
-
});
|
|
73
|
-
});
|
|
74
|
-
|
|
75
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
76
|
-
// LoopDetectionMiddleware
|
|
77
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
78
|
-
|
|
79
|
-
describe("LoopDetectionMiddleware", () => {
|
|
80
|
-
const makeCtx = (cmd = "echo hello"): ToolCallContext => ({
|
|
81
|
-
toolName: "bash",
|
|
82
|
-
args: { command: cmd },
|
|
83
|
-
callId: "call-x",
|
|
84
|
-
});
|
|
85
|
-
|
|
86
|
-
// ─── Test #47: Allows first 2 identical calls ───
|
|
87
|
-
|
|
88
|
-
it("allows calls below the threshold", () => {
|
|
89
|
-
const mw = new LoopDetectionMiddleware(3);
|
|
90
|
-
|
|
91
|
-
expect(mw.before(makeCtx())).toEqual(makeCtx());
|
|
92
|
-
expect(mw.before(makeCtx())).toEqual(makeCtx());
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
// ─── Test #48: Blocks on 3rd identical call ───
|
|
96
|
-
|
|
97
|
-
it("blocks on the Nth identical consecutive call", () => {
|
|
98
|
-
const mw = new LoopDetectionMiddleware(3);
|
|
99
|
-
|
|
100
|
-
mw.before(makeCtx());
|
|
101
|
-
mw.before(makeCtx());
|
|
102
|
-
const result = mw.before(makeCtx());
|
|
103
|
-
|
|
104
|
-
expect(typeof result).toBe("string");
|
|
105
|
-
expect(result as string).toMatch(/loop detected/i);
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
// ─── Test #49: Resets when args change ───
|
|
109
|
-
|
|
110
|
-
it("resets the count when a different call is made", () => {
|
|
111
|
-
const mw = new LoopDetectionMiddleware(3);
|
|
112
|
-
|
|
113
|
-
mw.before(makeCtx("echo a"));
|
|
114
|
-
mw.before(makeCtx("echo a"));
|
|
115
|
-
// Different call breaks the streak
|
|
116
|
-
mw.before(makeCtx("echo b"));
|
|
117
|
-
// Back to "echo a" — only 1 in a row now
|
|
118
|
-
const result = mw.before(makeCtx("echo a"));
|
|
119
|
-
|
|
120
|
-
expect(typeof result).not.toBe("string");
|
|
121
|
-
});
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
125
|
-
// CommandSanitizerMiddleware
|
|
126
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
127
|
-
|
|
128
|
-
describe("CommandSanitizerMiddleware", () => {
|
|
129
|
-
const mw = new CommandSanitizerMiddleware();
|
|
130
|
-
|
|
131
|
-
const makeCtx = (cmd: string): ToolCallContext => ({
|
|
132
|
-
toolName: "bash",
|
|
133
|
-
args: { command: cmd },
|
|
134
|
-
callId: "call-x",
|
|
135
|
-
});
|
|
136
|
-
|
|
137
|
-
// ─── Test #50: Blocks rm -rf / ───
|
|
138
|
-
|
|
139
|
-
it("blocks rm -rf /", () => {
|
|
140
|
-
const result = mw.before(makeCtx("rm -rf /"));
|
|
141
|
-
expect(typeof result).toBe("string");
|
|
142
|
-
expect(result as string).toMatch(/blocked/i);
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
// ─── Test #51: Blocks interactive commands ───
|
|
146
|
-
|
|
147
|
-
it("blocks interactive commands like vim", () => {
|
|
148
|
-
const result = mw.before(makeCtx("vim src/index.ts"));
|
|
149
|
-
expect(typeof result).toBe("string");
|
|
150
|
-
expect(result as string).toMatch(/interactive/i);
|
|
151
|
-
});
|
|
152
|
-
|
|
153
|
-
// ─── Test #52: Allows safe commands ───
|
|
154
|
-
|
|
155
|
-
it("allows safe commands through", () => {
|
|
156
|
-
const result = mw.before(makeCtx("npm test"));
|
|
157
|
-
expect(result).toEqual(makeCtx("npm test"));
|
|
158
|
-
});
|
|
159
|
-
|
|
160
|
-
// ─── Test #53: Ignores non-bash tools ───
|
|
161
|
-
|
|
162
|
-
it("ignores non-bash tool calls entirely", () => {
|
|
163
|
-
const ctx: ToolCallContext = {
|
|
164
|
-
toolName: "read_file",
|
|
165
|
-
args: { path: "/etc/passwd" },
|
|
166
|
-
callId: "call-x",
|
|
167
|
-
};
|
|
168
|
-
expect(mw.before(ctx)).toEqual(ctx);
|
|
169
|
-
});
|
|
170
|
-
});
|
|
171
|
-
|
|
172
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
173
|
-
// PreCompletionMiddleware
|
|
174
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
175
|
-
|
|
176
|
-
describe("PreCompletionMiddleware", () => {
|
|
177
|
-
// ─── Test #54: Blocks completion without tests ───
|
|
178
|
-
|
|
179
|
-
it("blocks task_complete when no tests have been run", () => {
|
|
180
|
-
const mw = new PreCompletionMiddleware();
|
|
181
|
-
|
|
182
|
-
const ctx: ToolCallContext = {
|
|
183
|
-
toolName: "task_complete",
|
|
184
|
-
args: {},
|
|
185
|
-
callId: "call-x",
|
|
186
|
-
};
|
|
187
|
-
const result = mw.before(ctx);
|
|
188
|
-
|
|
189
|
-
expect(typeof result).toBe("string");
|
|
190
|
-
expect(result as string).toMatch(/must run tests/i);
|
|
191
|
-
});
|
|
192
|
-
|
|
193
|
-
// ─── Test #55: Allows completion after tests ───
|
|
194
|
-
|
|
195
|
-
it("allows task_complete after a test command has been run", () => {
|
|
196
|
-
const mw = new PreCompletionMiddleware();
|
|
197
|
-
|
|
198
|
-
// Simulate running tests
|
|
199
|
-
const testCtx: ToolCallContext = {
|
|
200
|
-
toolName: "bash",
|
|
201
|
-
args: { command: "npm test" },
|
|
202
|
-
callId: "call-1",
|
|
203
|
-
};
|
|
204
|
-
mw.before(testCtx);
|
|
205
|
-
mw.after(testCtx, { content: "tests passed", metadata: { exitCode: 0 }, isError: false });
|
|
206
|
-
|
|
207
|
-
expect(mw.hasPassedTests()).toBe(true);
|
|
208
|
-
|
|
209
|
-
// Now try completion
|
|
210
|
-
const completeCtx: ToolCallContext = {
|
|
211
|
-
toolName: "task_complete",
|
|
212
|
-
args: {},
|
|
213
|
-
callId: "call-2",
|
|
214
|
-
};
|
|
215
|
-
const result = mw.before(completeCtx);
|
|
216
|
-
|
|
217
|
-
expect(result).toEqual(completeCtx);
|
|
218
|
-
});
|
|
219
|
-
});
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
import { JooneConfig, DEFAULT_CONFIG } from "../cli/config.js";
|
|
3
|
-
import { createModel } from "../cli/modelFactory.js";
|
|
4
|
-
|
|
5
|
-
describe("Model Factory", () => {
|
|
6
|
-
// ─── RED Test #4: createModel returns a ChatAnthropic for "anthropic" ───
|
|
7
|
-
|
|
8
|
-
it("creates an Anthropic model when provider is 'anthropic'", async () => {
|
|
9
|
-
const config: JooneConfig = {
|
|
10
|
-
...DEFAULT_CONFIG,
|
|
11
|
-
provider: "anthropic",
|
|
12
|
-
model: "claude-sonnet-4-20250514",
|
|
13
|
-
apiKey: "sk-ant-test-key",
|
|
14
|
-
};
|
|
15
|
-
|
|
16
|
-
const model = await createModel(config);
|
|
17
|
-
|
|
18
|
-
// The model should have the correct type identifier
|
|
19
|
-
expect(model).toBeDefined();
|
|
20
|
-
expect(model.constructor.name).toContain("ChatAnthropic");
|
|
21
|
-
}, 15000);
|
|
22
|
-
|
|
23
|
-
// ─── RED Test #5: createModel returns a ChatOpenAI for "openai" ───
|
|
24
|
-
|
|
25
|
-
it("creates an OpenAI model when provider is 'openai'", async () => {
|
|
26
|
-
const config: JooneConfig = {
|
|
27
|
-
...DEFAULT_CONFIG,
|
|
28
|
-
provider: "openai",
|
|
29
|
-
model: "gpt-4o",
|
|
30
|
-
apiKey: "sk-openai-test-key",
|
|
31
|
-
};
|
|
32
|
-
|
|
33
|
-
const model = await createModel(config);
|
|
34
|
-
|
|
35
|
-
expect(model).toBeDefined();
|
|
36
|
-
expect(model.constructor.name).toContain("ChatOpenAI");
|
|
37
|
-
}, 15000);
|
|
38
|
-
|
|
39
|
-
// ─── RED Test #6: createModel throws if API key is missing ───
|
|
40
|
-
|
|
41
|
-
it("throws a descriptive error when API key is missing for a cloud provider", async () => {
|
|
42
|
-
const config: JooneConfig = {
|
|
43
|
-
...DEFAULT_CONFIG,
|
|
44
|
-
provider: "anthropic",
|
|
45
|
-
model: "claude-sonnet-4-20250514",
|
|
46
|
-
apiKey: undefined,
|
|
47
|
-
};
|
|
48
|
-
|
|
49
|
-
await expect(createModel(config)).rejects.toThrow(/API key/i);
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
// ─── RED Test #7: createModel throws with install command for missing package ───
|
|
53
|
-
|
|
54
|
-
it("throws an error with install instructions for unsupported/missing provider", async () => {
|
|
55
|
-
const config: JooneConfig = {
|
|
56
|
-
...DEFAULT_CONFIG,
|
|
57
|
-
provider: "unknown-provider",
|
|
58
|
-
apiKey: "some-key",
|
|
59
|
-
};
|
|
60
|
-
|
|
61
|
-
await expect(createModel(config)).rejects.toThrow(/unsupported provider/i);
|
|
62
|
-
});
|
|
63
|
-
});
|
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach } from "vitest";
|
|
2
|
-
import { HumanMessage, AIMessage, SystemMessage } from "@langchain/core/messages";
|
|
3
|
-
import {
|
|
4
|
-
SearchToolsTool,
|
|
5
|
-
ActivateToolTool,
|
|
6
|
-
activateTool,
|
|
7
|
-
getActivatedTools,
|
|
8
|
-
resetActivatedTools,
|
|
9
|
-
} from "../tools/registry.js";
|
|
10
|
-
import {
|
|
11
|
-
estimateTokens,
|
|
12
|
-
countMessageTokens,
|
|
13
|
-
isNearCapacity,
|
|
14
|
-
} from "../core/tokenCounter.js";
|
|
15
|
-
import { CacheOptimizedPromptBuilder } from "../core/promptBuilder.js";
|
|
16
|
-
import {
|
|
17
|
-
ReasoningRouter,
|
|
18
|
-
ReasoningLevel,
|
|
19
|
-
} from "../core/reasoningRouter.js";
|
|
20
|
-
|
|
21
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
22
|
-
// 5a: Enhanced Tool Registry
|
|
23
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
24
|
-
|
|
25
|
-
describe("Enhanced Tool Registry", () => {
|
|
26
|
-
beforeEach(() => {
|
|
27
|
-
resetActivatedTools();
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
// ─── Test #56: Fuzzy search matches by description keyword ───
|
|
31
|
-
|
|
32
|
-
it("fuzzy search matches tools by description keyword", async () => {
|
|
33
|
-
const result = await SearchToolsTool.execute({ query: "commit" });
|
|
34
|
-
|
|
35
|
-
expect(result.content).toContain("git_commit");
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
// ─── Test #57: Fuzzy search matches by name ───
|
|
39
|
-
|
|
40
|
-
it("fuzzy search matches tools by name", async () => {
|
|
41
|
-
const result = await SearchToolsTool.execute({ query: "grep" });
|
|
42
|
-
|
|
43
|
-
expect(result.content).toContain("grep_search");
|
|
44
|
-
});
|
|
45
|
-
|
|
46
|
-
// ─── Test #58: activateTool adds tool to the active set ───
|
|
47
|
-
|
|
48
|
-
it("activateTool adds a tool to the active set", () => {
|
|
49
|
-
expect(getActivatedTools()).toHaveLength(0);
|
|
50
|
-
|
|
51
|
-
const tool = activateTool("git_commit");
|
|
52
|
-
|
|
53
|
-
expect(tool).toBeDefined();
|
|
54
|
-
expect(tool!.name).toBe("git_commit");
|
|
55
|
-
expect(getActivatedTools()).toHaveLength(1);
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
// ─── Test #59: ActivateToolTool returns schema on activation ───
|
|
59
|
-
|
|
60
|
-
it("ActivateToolTool returns the schema on successful activation", async () => {
|
|
61
|
-
const result = await ActivateToolTool.execute({ name: "git_diff" });
|
|
62
|
-
|
|
63
|
-
expect(result.content).toContain("activated");
|
|
64
|
-
expect(result.content).toContain("Schema");
|
|
65
|
-
expect(getActivatedTools()).toHaveLength(1);
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
// ─── Test #60: ActivateToolTool returns error for unknown tool ───
|
|
69
|
-
|
|
70
|
-
it("ActivateToolTool returns error for unknown tool", async () => {
|
|
71
|
-
const result = await ActivateToolTool.execute({ name: "nonexistent" });
|
|
72
|
-
|
|
73
|
-
expect(result.content).toMatch(/not found/i);
|
|
74
|
-
});
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
78
|
-
// 5b: Token Counter & Context Compaction
|
|
79
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
80
|
-
|
|
81
|
-
describe("Token Counter", () => {
|
|
82
|
-
// ─── Test #61: Estimates tokens for short string ───
|
|
83
|
-
|
|
84
|
-
it("estimates tokens using ~4 chars per token", () => {
|
|
85
|
-
const tokens = estimateTokens("Hello world!"); // 12 chars → 3 tokens
|
|
86
|
-
expect(tokens).toBe(3);
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
// ─── Test #62: Counts tokens across messages ───
|
|
90
|
-
|
|
91
|
-
it("counts tokens across multiple messages", () => {
|
|
92
|
-
const messages = [
|
|
93
|
-
new HumanMessage("Hello"), // 5 chars → 2 tokens + 4 overhead = 6
|
|
94
|
-
new AIMessage("Hi there"), // 8 chars → 2 tokens + 4 overhead = 6
|
|
95
|
-
];
|
|
96
|
-
const total = countMessageTokens(messages);
|
|
97
|
-
|
|
98
|
-
expect(total).toBeGreaterThan(0);
|
|
99
|
-
expect(total).toBe(12); // (2+4) + (2+4)
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
// ─── Test #63: isNearCapacity detects threshold ───
|
|
103
|
-
|
|
104
|
-
it("returns true when messages exceed 80% of capacity", () => {
|
|
105
|
-
// Create a big message ~320 chars → ~80 tokens
|
|
106
|
-
const bigMsg = new HumanMessage("x".repeat(320));
|
|
107
|
-
const messages = [bigMsg];
|
|
108
|
-
|
|
109
|
-
// maxTokens=100, threshold=0.8 → trigger at 80 tokens
|
|
110
|
-
// 320/4=80 + 4 overhead = 84 > 80
|
|
111
|
-
expect(isNearCapacity(messages, 100, 0.8)).toBe(true);
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
// ─── Test #64: isNearCapacity returns false below threshold ───
|
|
115
|
-
|
|
116
|
-
it("returns false when well below capacity", () => {
|
|
117
|
-
const messages = [new HumanMessage("short")];
|
|
118
|
-
|
|
119
|
-
expect(isNearCapacity(messages, 100000, 0.8)).toBe(false);
|
|
120
|
-
});
|
|
121
|
-
});
|
|
122
|
-
|
|
123
|
-
describe("Context Compaction", () => {
|
|
124
|
-
// ─── Test #65: compactHistory preserves last N messages ───
|
|
125
|
-
|
|
126
|
-
it("preserves the last N messages and prepends summary", () => {
|
|
127
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
128
|
-
const history = [
|
|
129
|
-
new HumanMessage("msg 1"),
|
|
130
|
-
new AIMessage("response 1"),
|
|
131
|
-
new HumanMessage("msg 2"),
|
|
132
|
-
new AIMessage("response 2"),
|
|
133
|
-
new HumanMessage("msg 3"),
|
|
134
|
-
new AIMessage("response 3"),
|
|
135
|
-
];
|
|
136
|
-
|
|
137
|
-
const compacted = builder.compactHistory(history, "Summary of turns 1-2.", 4);
|
|
138
|
-
|
|
139
|
-
// Should have: 1 summary + 4 preserved
|
|
140
|
-
expect(compacted).toHaveLength(5);
|
|
141
|
-
expect((compacted[0] as HumanMessage).content).toContain("compacted");
|
|
142
|
-
expect((compacted[0] as HumanMessage).content).toContain("Summary of turns 1-2.");
|
|
143
|
-
});
|
|
144
|
-
});
|
|
145
|
-
|
|
146
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
147
|
-
// 5c: Reasoning Sandwich
|
|
148
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
149
|
-
|
|
150
|
-
describe("ReasoningRouter", () => {
|
|
151
|
-
// ─── Test #66: First turns are HIGH (planning) ───
|
|
152
|
-
|
|
153
|
-
it("returns HIGH for the first turn (planning phase)", () => {
|
|
154
|
-
const router = new ReasoningRouter();
|
|
155
|
-
|
|
156
|
-
router.advanceTurn(false, false);
|
|
157
|
-
const level = router.getLevel();
|
|
158
|
-
|
|
159
|
-
expect(level).toBe(ReasoningLevel.HIGH);
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
// ─── Test #67: Tool-heavy turns are MEDIUM ───
|
|
163
|
-
|
|
164
|
-
it("returns MEDIUM for tool-heavy turns after planning", () => {
|
|
165
|
-
const router = new ReasoningRouter({ planningTurns: 1 });
|
|
166
|
-
|
|
167
|
-
router.advanceTurn(false, false); // turn 1
|
|
168
|
-
router.getLevel(); // HIGH (planning)
|
|
169
|
-
|
|
170
|
-
router.advanceTurn(true, false); // turn 2
|
|
171
|
-
const level = router.getLevel(); // tool call shouldn't be high
|
|
172
|
-
|
|
173
|
-
expect(level).toBe(ReasoningLevel.MEDIUM);
|
|
174
|
-
});
|
|
175
|
-
|
|
176
|
-
// ─── Test #68: Post-error turns are HIGH (recovery) ───
|
|
177
|
-
|
|
178
|
-
it("returns HIGH for recovery after an error", () => {
|
|
179
|
-
const router = new ReasoningRouter({ planningTurns: 1 });
|
|
180
|
-
|
|
181
|
-
router.advanceTurn(false, false); // turn 1
|
|
182
|
-
router.getLevel(); // planning
|
|
183
|
-
|
|
184
|
-
router.advanceTurn(true, false); // turn 2
|
|
185
|
-
router.getLevel(); // tool call (MEDIUM)
|
|
186
|
-
|
|
187
|
-
router.advanceTurn(false, true); // turn 3
|
|
188
|
-
const level = router.getLevel(); // error!
|
|
189
|
-
|
|
190
|
-
expect(level).toBe(ReasoningLevel.HIGH);
|
|
191
|
-
});
|
|
192
|
-
|
|
193
|
-
// ─── Test #69: Temperature mapping ───
|
|
194
|
-
|
|
195
|
-
it("maps reasoning levels to correct temperatures", () => {
|
|
196
|
-
const router = new ReasoningRouter({ highTemp: 0, mediumTemp: 0.3 });
|
|
197
|
-
|
|
198
|
-
expect(router.getTemperature(ReasoningLevel.HIGH)).toBe(0);
|
|
199
|
-
expect(router.getTemperature(ReasoningLevel.MEDIUM)).toBe(0.3);
|
|
200
|
-
});
|
|
201
|
-
});
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
import {
|
|
3
|
-
CacheOptimizedPromptBuilder,
|
|
4
|
-
ContextState,
|
|
5
|
-
} from "../core/promptBuilder.js";
|
|
6
|
-
import {
|
|
7
|
-
SystemMessage,
|
|
8
|
-
HumanMessage,
|
|
9
|
-
AIMessage,
|
|
10
|
-
} from "@langchain/core/messages";
|
|
11
|
-
|
|
12
|
-
describe("CacheOptimizedPromptBuilder", () => {
|
|
13
|
-
// ─── Behavior 1: Static prefix ordering ───
|
|
14
|
-
// The most critical behavior: the first 3 messages must ALWAYS be
|
|
15
|
-
// SystemMessages in the order: global → project → session.
|
|
16
|
-
// This is the foundation of prompt cache validity.
|
|
17
|
-
|
|
18
|
-
it("builds prompt with static prefix in strict order: global, project, session", () => {
|
|
19
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
20
|
-
const state: ContextState = {
|
|
21
|
-
globalSystemInstructions: "You are a coding assistant.",
|
|
22
|
-
projectMemory: "Use TypeScript.",
|
|
23
|
-
sessionContext: "OS: Windows",
|
|
24
|
-
conversationHistory: [],
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
const messages = builder.buildPrompt(state);
|
|
28
|
-
|
|
29
|
-
// exactly 1 static message when history is empty
|
|
30
|
-
expect(messages).toHaveLength(1);
|
|
31
|
-
|
|
32
|
-
// must be system-type message
|
|
33
|
-
expect(messages[0]._getType()).toBe("system");
|
|
34
|
-
|
|
35
|
-
// Order must be: global → project → session
|
|
36
|
-
expect(messages[0].content).toContain("You are a coding assistant.");
|
|
37
|
-
expect(messages[0].content).toContain("Use TypeScript.");
|
|
38
|
-
expect(messages[0].content).toContain("OS: Windows");
|
|
39
|
-
});
|
|
40
|
-
|
|
41
|
-
// ─── Behavior 2: Conversation history appended AFTER the static prefix ───
|
|
42
|
-
// Dynamic messages must never appear before the static prefix.
|
|
43
|
-
|
|
44
|
-
it("appends conversation history after the static prefix", () => {
|
|
45
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
46
|
-
const state: ContextState = {
|
|
47
|
-
globalSystemInstructions: "System prompt.",
|
|
48
|
-
projectMemory: "Project rules.",
|
|
49
|
-
sessionContext: "Session info.",
|
|
50
|
-
conversationHistory: [
|
|
51
|
-
new HumanMessage("Hello"),
|
|
52
|
-
new AIMessage("Hi there!"),
|
|
53
|
-
],
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
const messages = builder.buildPrompt(state);
|
|
57
|
-
|
|
58
|
-
// 1 static + 2 conversation = 3
|
|
59
|
-
expect(messages).toHaveLength(3);
|
|
60
|
-
|
|
61
|
-
// First is system messages (static prefix)
|
|
62
|
-
expect(messages[0]._getType()).toBe("system");
|
|
63
|
-
|
|
64
|
-
// Last 2 are conversation messages
|
|
65
|
-
expect(messages[1]._getType()).toBe("human");
|
|
66
|
-
expect(messages[2]._getType()).toBe("ai");
|
|
67
|
-
expect(messages[1].content).toBe("Hello");
|
|
68
|
-
expect(messages[2].content).toBe("Hi there!");
|
|
69
|
-
});
|
|
70
|
-
|
|
71
|
-
// ─── Behavior 3: Static prefix is identical across calls ───
|
|
72
|
-
// If we call buildPrompt twice with the same state (but more history),
|
|
73
|
-
// the first 3 messages must be byte-identical to preserve the cache.
|
|
74
|
-
|
|
75
|
-
it("produces identical static prefix across multiple calls with growing history", () => {
|
|
76
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
77
|
-
const state: ContextState = {
|
|
78
|
-
globalSystemInstructions: "Be helpful.",
|
|
79
|
-
projectMemory: "Use strict types.",
|
|
80
|
-
sessionContext: "Env: Node",
|
|
81
|
-
conversationHistory: [],
|
|
82
|
-
};
|
|
83
|
-
|
|
84
|
-
const firstCall = builder.buildPrompt(state);
|
|
85
|
-
|
|
86
|
-
// Simulate a conversation turn
|
|
87
|
-
state.conversationHistory.push(new HumanMessage("What is 2+2?"));
|
|
88
|
-
state.conversationHistory.push(new AIMessage("4"));
|
|
89
|
-
|
|
90
|
-
const secondCall = builder.buildPrompt(state);
|
|
91
|
-
|
|
92
|
-
// Static prefix (first message) must be identical
|
|
93
|
-
expect(secondCall[0].content).toBe(firstCall[0].content);
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
// ─── Behavior 4: System reminder is injected as a HumanMessage ───
|
|
97
|
-
|
|
98
|
-
it("injects a system reminder as a HumanMessage with <system-reminder> tags", () => {
|
|
99
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
100
|
-
const history = [new HumanMessage("Start task")];
|
|
101
|
-
|
|
102
|
-
const updated = builder.injectSystemReminder(
|
|
103
|
-
history,
|
|
104
|
-
"File auth.ts was deleted."
|
|
105
|
-
);
|
|
106
|
-
|
|
107
|
-
// Original history is not mutated
|
|
108
|
-
expect(history).toHaveLength(1);
|
|
109
|
-
|
|
110
|
-
// Updated history has the reminder appended
|
|
111
|
-
expect(updated).toHaveLength(2);
|
|
112
|
-
expect(updated[1]._getType()).toBe("human");
|
|
113
|
-
expect(updated[1].content).toContain("<system-reminder>");
|
|
114
|
-
expect(updated[1].content).toContain("File auth.ts was deleted.");
|
|
115
|
-
expect(updated[1].content).toContain("</system-reminder>");
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
// ─── Behavior 5: Compaction preserves recent messages with summary ───
|
|
119
|
-
|
|
120
|
-
it("compacts history into summary + preserved recent messages", () => {
|
|
121
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
122
|
-
const longHistory = [
|
|
123
|
-
new HumanMessage("Step 1"),
|
|
124
|
-
new AIMessage("Done 1"),
|
|
125
|
-
new HumanMessage("Step 2"),
|
|
126
|
-
new AIMessage("Done 2"),
|
|
127
|
-
];
|
|
128
|
-
|
|
129
|
-
const compacted = builder.compactHistory(
|
|
130
|
-
longHistory,
|
|
131
|
-
"Completed steps 1 and 2."
|
|
132
|
-
);
|
|
133
|
-
|
|
134
|
-
// Default keepLastN=6, history has 4 → summary + all 4 preserved
|
|
135
|
-
expect(compacted).toHaveLength(5);
|
|
136
|
-
expect(compacted[0]._getType()).toBe("human");
|
|
137
|
-
expect(compacted[0].content).toContain("Completed steps 1 and 2.");
|
|
138
|
-
// Recent messages are preserved after the summary
|
|
139
|
-
expect(compacted[1].content).toBe("Step 1");
|
|
140
|
-
});
|
|
141
|
-
});
|