joonecli 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -12
- package/dist/__tests__/optimizations.test.js.map +1 -1
- package/dist/__tests__/promptBuilder.test.js +14 -20
- package/dist/__tests__/promptBuilder.test.js.map +1 -1
- package/dist/agents/agentRegistry.d.ts +37 -0
- package/dist/agents/agentRegistry.js +58 -0
- package/dist/agents/agentRegistry.js.map +1 -0
- package/dist/agents/agentSpec.d.ts +54 -0
- package/dist/agents/agentSpec.js +9 -0
- package/dist/agents/agentSpec.js.map +1 -0
- package/dist/agents/builtinAgents.d.ts +20 -0
- package/{src/agents/builtinAgents.ts → dist/agents/builtinAgents.js} +84 -101
- package/dist/agents/builtinAgents.js.map +1 -0
- package/dist/cli/config.d.ts +4 -0
- package/dist/cli/config.js.map +1 -1
- package/dist/cli/index.js +29 -2
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/postinstall.d.ts +2 -0
- package/dist/cli/postinstall.js +25 -0
- package/dist/cli/postinstall.js.map +1 -0
- package/dist/commands/builtinCommands.d.ts +21 -0
- package/dist/commands/builtinCommands.js +241 -0
- package/dist/commands/builtinCommands.js.map +1 -0
- package/dist/commands/commandRegistry.d.ts +92 -0
- package/dist/commands/commandRegistry.js +128 -0
- package/dist/commands/commandRegistry.js.map +1 -0
- package/dist/core/agentLoop.d.ts +7 -2
- package/dist/core/agentLoop.js +35 -13
- package/dist/core/agentLoop.js.map +1 -1
- package/dist/core/autoSave.d.ts +41 -0
- package/dist/core/autoSave.js +69 -0
- package/dist/core/autoSave.js.map +1 -0
- package/dist/core/compactor.d.ts +66 -0
- package/dist/core/compactor.js +170 -0
- package/dist/core/compactor.js.map +1 -0
- package/dist/core/contextGuard.d.ts +38 -0
- package/dist/core/contextGuard.js +122 -0
- package/dist/core/contextGuard.js.map +1 -0
- package/dist/core/events.d.ts +45 -0
- package/dist/core/events.js +8 -0
- package/dist/core/events.js.map +1 -0
- package/dist/core/promptBuilder.d.ts +16 -1
- package/dist/core/promptBuilder.js +27 -14
- package/dist/core/promptBuilder.js.map +1 -1
- package/dist/core/sessionResumer.js +3 -3
- package/dist/core/sessionResumer.js.map +1 -1
- package/dist/core/sessionStore.js +3 -2
- package/dist/core/sessionStore.js.map +1 -1
- package/dist/core/subAgent.d.ts +56 -0
- package/dist/core/subAgent.js +240 -0
- package/dist/core/subAgent.js.map +1 -0
- package/dist/core/tokenCounter.d.ts +8 -1
- package/dist/core/tokenCounter.js +28 -0
- package/dist/core/tokenCounter.js.map +1 -1
- package/dist/debug_google.d.ts +1 -0
- package/dist/debug_google.js +23 -0
- package/dist/debug_google.js.map +1 -0
- package/dist/middleware/permission.js +1 -0
- package/dist/middleware/permission.js.map +1 -1
- package/dist/test_google.d.ts +1 -0
- package/dist/test_google.js +32 -89
- package/dist/test_google.js.map +1 -0
- package/dist/tools/browser.js +4 -1
- package/dist/tools/browser.js.map +1 -1
- package/dist/tools/index.d.ts +2 -1
- package/dist/tools/index.js +11 -3
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/installHostDeps.d.ts +2 -0
- package/dist/tools/installHostDeps.js +37 -0
- package/dist/tools/installHostDeps.js.map +1 -0
- package/dist/tools/router.js +3 -0
- package/dist/tools/router.js.map +1 -1
- package/dist/tools/spawnAgent.d.ts +19 -0
- package/dist/tools/spawnAgent.js +132 -0
- package/dist/tools/spawnAgent.js.map +1 -0
- package/dist/tracing/sessionTracer.d.ts +1 -0
- package/dist/tracing/sessionTracer.js +4 -1
- package/dist/tracing/sessionTracer.js.map +1 -1
- package/dist/ui/App.js +94 -6
- package/dist/ui/App.js.map +1 -1
- package/dist/ui/components/ActionLog.d.ts +7 -0
- package/dist/ui/components/ActionLog.js +63 -0
- package/dist/ui/components/ActionLog.js.map +1 -0
- package/dist/ui/components/FileBrowser.d.ts +2 -0
- package/dist/ui/components/FileBrowser.js +41 -0
- package/dist/ui/components/FileBrowser.js.map +1 -0
- package/package.json +5 -6
- package/AGENTS.md +0 -56
- package/Handover.md +0 -115
- package/PROGRESS.md +0 -160
- package/docs/01_insights_and_patterns.md +0 -27
- package/docs/02_edge_cases_and_mitigations.md +0 -143
- package/docs/03_initial_implementation_plan.md +0 -66
- package/docs/04_tech_stack_proposal.md +0 -20
- package/docs/05_prd.md +0 -87
- package/docs/06_user_stories.md +0 -72
- package/docs/07_system_architecture.md +0 -138
- package/docs/08_roadmap.md +0 -200
- package/e2b/Dockerfile +0 -26
- package/src/__tests__/bootstrap.test.ts +0 -111
- package/src/__tests__/config.test.ts +0 -97
- package/src/__tests__/m55.test.ts +0 -238
- package/src/__tests__/middleware.test.ts +0 -219
- package/src/__tests__/modelFactory.test.ts +0 -63
- package/src/__tests__/optimizations.test.ts +0 -201
- package/src/__tests__/promptBuilder.test.ts +0 -141
- package/src/__tests__/sandbox.test.ts +0 -102
- package/src/__tests__/security.test.ts +0 -122
- package/src/__tests__/streaming.test.ts +0 -82
- package/src/__tests__/toolRouter.test.ts +0 -52
- package/src/__tests__/tools.test.ts +0 -146
- package/src/__tests__/tracing.test.ts +0 -196
- package/src/agents/agentRegistry.ts +0 -69
- package/src/agents/agentSpec.ts +0 -67
- package/src/cli/config.ts +0 -124
- package/src/cli/index.ts +0 -730
- package/src/cli/modelFactory.ts +0 -174
- package/src/cli/providers.ts +0 -107
- package/src/commands/builtinCommands.ts +0 -293
- package/src/commands/commandRegistry.ts +0 -194
- package/src/core/agentLoop.d.ts.map +0 -1
- package/src/core/agentLoop.ts +0 -312
- package/src/core/autoSave.ts +0 -95
- package/src/core/compactor.ts +0 -252
- package/src/core/contextGuard.ts +0 -129
- package/src/core/errors.ts +0 -202
- package/src/core/promptBuilder.d.ts.map +0 -1
- package/src/core/promptBuilder.ts +0 -139
- package/src/core/reasoningRouter.ts +0 -121
- package/src/core/retry.ts +0 -75
- package/src/core/sessionResumer.ts +0 -90
- package/src/core/sessionStore.ts +0 -215
- package/src/core/subAgent.ts +0 -339
- package/src/core/tokenCounter.ts +0 -64
- package/src/evals/dataset.ts +0 -67
- package/src/evals/evaluator.ts +0 -81
- package/src/hitl/bridge.ts +0 -160
- package/src/middleware/commandSanitizer.ts +0 -60
- package/src/middleware/loopDetection.ts +0 -63
- package/src/middleware/permission.ts +0 -72
- package/src/middleware/pipeline.ts +0 -75
- package/src/middleware/preCompletion.ts +0 -94
- package/src/middleware/types.ts +0 -45
- package/src/sandbox/bootstrap.ts +0 -121
- package/src/sandbox/manager.ts +0 -239
- package/src/sandbox/sync.ts +0 -157
- package/src/skills/loader.ts +0 -143
- package/src/skills/tools.ts +0 -99
- package/src/skills/types.ts +0 -13
- package/src/test_cache.ts +0 -72
- package/src/test_google.js +0 -40
- package/src/test_google.ts +0 -40
- package/src/tools/askUser.ts +0 -47
- package/src/tools/browser.ts +0 -137
- package/src/tools/index.d.ts.map +0 -1
- package/src/tools/index.ts +0 -237
- package/src/tools/registry.ts +0 -198
- package/src/tools/router.ts +0 -78
- package/src/tools/security.ts +0 -220
- package/src/tools/spawnAgent.ts +0 -158
- package/src/tools/webSearch.ts +0 -142
- package/src/tracing/analyzer.ts +0 -265
- package/src/tracing/langsmith.ts +0 -63
- package/src/tracing/sessionTracer.ts +0 -202
- package/src/tracing/types.ts +0 -49
- package/src/types/valyu.d.ts +0 -37
- package/src/ui/App.tsx +0 -404
- package/src/ui/components/HITLPrompt.tsx +0 -119
- package/src/ui/components/Header.tsx +0 -51
- package/src/ui/components/MessageBubble.tsx +0 -46
- package/src/ui/components/StatusBar.tsx +0 -138
- package/src/ui/components/StreamingText.tsx +0 -48
- package/src/ui/components/ToolCallPanel.tsx +0 -80
- package/tests/commands/commands.test.ts +0 -356
- package/tests/core/compactor.test.ts +0 -217
- package/tests/core/retryAndErrors.test.ts +0 -164
- package/tests/core/sessionResumer.test.ts +0 -95
- package/tests/core/sessionStore.test.ts +0 -84
- package/tests/core/stability.test.ts +0 -165
- package/tests/core/subAgent.test.ts +0 -238
- package/tests/hitl/hitlBridge.test.ts +0 -115
- package/tsconfig.json +0 -16
- package/vitest.config.ts +0 -10
- package/vitest.out +0 -48
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach } from "vitest";
|
|
2
|
-
import { HumanMessage, AIMessage, SystemMessage } from "@langchain/core/messages";
|
|
3
|
-
import {
|
|
4
|
-
SearchToolsTool,
|
|
5
|
-
ActivateToolTool,
|
|
6
|
-
activateTool,
|
|
7
|
-
getActivatedTools,
|
|
8
|
-
resetActivatedTools,
|
|
9
|
-
} from "../tools/registry.js";
|
|
10
|
-
import {
|
|
11
|
-
estimateTokens,
|
|
12
|
-
countMessageTokens,
|
|
13
|
-
isNearCapacity,
|
|
14
|
-
} from "../core/tokenCounter.js";
|
|
15
|
-
import { CacheOptimizedPromptBuilder } from "../core/promptBuilder.js";
|
|
16
|
-
import {
|
|
17
|
-
ReasoningRouter,
|
|
18
|
-
ReasoningLevel,
|
|
19
|
-
} from "../core/reasoningRouter.js";
|
|
20
|
-
|
|
21
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
22
|
-
// 5a: Enhanced Tool Registry
|
|
23
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
24
|
-
|
|
25
|
-
describe("Enhanced Tool Registry", () => {
|
|
26
|
-
beforeEach(() => {
|
|
27
|
-
resetActivatedTools();
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
// ─── Test #56: Fuzzy search matches by description keyword ───
|
|
31
|
-
|
|
32
|
-
it("fuzzy search matches tools by description keyword", async () => {
|
|
33
|
-
const result = await SearchToolsTool.execute({ query: "commit" });
|
|
34
|
-
|
|
35
|
-
expect(result.content).toContain("git_commit");
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
// ─── Test #57: Fuzzy search matches by name ───
|
|
39
|
-
|
|
40
|
-
it("fuzzy search matches tools by name", async () => {
|
|
41
|
-
const result = await SearchToolsTool.execute({ query: "grep" });
|
|
42
|
-
|
|
43
|
-
expect(result.content).toContain("grep_search");
|
|
44
|
-
});
|
|
45
|
-
|
|
46
|
-
// ─── Test #58: activateTool adds tool to the active set ───
|
|
47
|
-
|
|
48
|
-
it("activateTool adds a tool to the active set", () => {
|
|
49
|
-
expect(getActivatedTools()).toHaveLength(0);
|
|
50
|
-
|
|
51
|
-
const tool = activateTool("git_commit");
|
|
52
|
-
|
|
53
|
-
expect(tool).toBeDefined();
|
|
54
|
-
expect(tool!.name).toBe("git_commit");
|
|
55
|
-
expect(getActivatedTools()).toHaveLength(1);
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
// ─── Test #59: ActivateToolTool returns schema on activation ───
|
|
59
|
-
|
|
60
|
-
it("ActivateToolTool returns the schema on successful activation", async () => {
|
|
61
|
-
const result = await ActivateToolTool.execute({ name: "git_diff" });
|
|
62
|
-
|
|
63
|
-
expect(result.content).toContain("activated");
|
|
64
|
-
expect(result.content).toContain("Schema");
|
|
65
|
-
expect(getActivatedTools()).toHaveLength(1);
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
// ─── Test #60: ActivateToolTool returns error for unknown tool ───
|
|
69
|
-
|
|
70
|
-
it("ActivateToolTool returns error for unknown tool", async () => {
|
|
71
|
-
const result = await ActivateToolTool.execute({ name: "nonexistent" });
|
|
72
|
-
|
|
73
|
-
expect(result.content).toMatch(/not found/i);
|
|
74
|
-
});
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
78
|
-
// 5b: Token Counter & Context Compaction
|
|
79
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
80
|
-
|
|
81
|
-
describe("Token Counter", () => {
|
|
82
|
-
// ─── Test #61: Estimates tokens for short string ───
|
|
83
|
-
|
|
84
|
-
it("estimates tokens using ~4 chars per token", () => {
|
|
85
|
-
const tokens = estimateTokens("Hello world!"); // 12 chars → 3 tokens
|
|
86
|
-
expect(tokens).toBe(3);
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
// ─── Test #62: Counts tokens across messages ───
|
|
90
|
-
|
|
91
|
-
it("counts tokens across multiple messages", () => {
|
|
92
|
-
const messages = [
|
|
93
|
-
new HumanMessage("Hello"), // 5 chars → 2 tokens + 4 overhead = 6
|
|
94
|
-
new AIMessage("Hi there"), // 8 chars → 2 tokens + 4 overhead = 6
|
|
95
|
-
];
|
|
96
|
-
const total = countMessageTokens(messages);
|
|
97
|
-
|
|
98
|
-
expect(total).toBeGreaterThan(0);
|
|
99
|
-
expect(total).toBe(12); // (2+4) + (2+4)
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
// ─── Test #63: isNearCapacity detects threshold ───
|
|
103
|
-
|
|
104
|
-
it("returns true when messages exceed 80% of capacity", () => {
|
|
105
|
-
// Create a big message ~320 chars → ~80 tokens
|
|
106
|
-
const bigMsg = new HumanMessage("x".repeat(320));
|
|
107
|
-
const messages = [bigMsg];
|
|
108
|
-
|
|
109
|
-
// maxTokens=100, threshold=0.8 → trigger at 80 tokens
|
|
110
|
-
// 320/4=80 + 4 overhead = 84 > 80
|
|
111
|
-
expect(isNearCapacity(messages, 100, 0.8)).toBe(true);
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
// ─── Test #64: isNearCapacity returns false below threshold ───
|
|
115
|
-
|
|
116
|
-
it("returns false when well below capacity", () => {
|
|
117
|
-
const messages = [new HumanMessage("short")];
|
|
118
|
-
|
|
119
|
-
expect(isNearCapacity(messages, 100000, 0.8)).toBe(false);
|
|
120
|
-
});
|
|
121
|
-
});
|
|
122
|
-
|
|
123
|
-
describe("Context Compaction", () => {
|
|
124
|
-
// ─── Test #65: compactHistory preserves last N messages ───
|
|
125
|
-
|
|
126
|
-
it("preserves the last N messages and prepends summary", () => {
|
|
127
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
128
|
-
const history = [
|
|
129
|
-
new HumanMessage("msg 1"),
|
|
130
|
-
new AIMessage("response 1"),
|
|
131
|
-
new HumanMessage("msg 2"),
|
|
132
|
-
new AIMessage("response 2"),
|
|
133
|
-
new HumanMessage("msg 3"),
|
|
134
|
-
new AIMessage("response 3"),
|
|
135
|
-
];
|
|
136
|
-
|
|
137
|
-
const compacted = builder.compactHistory(history, "Summary of turns 1-2.", 4);
|
|
138
|
-
|
|
139
|
-
// Should have: 1 summary + 4 preserved
|
|
140
|
-
expect(compacted).toHaveLength(5);
|
|
141
|
-
expect((compacted[0] as HumanMessage).content).toContain("compacted");
|
|
142
|
-
expect((compacted[0] as HumanMessage).content).toContain("Summary of turns 1-2.");
|
|
143
|
-
});
|
|
144
|
-
});
|
|
145
|
-
|
|
146
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
147
|
-
// 5c: Reasoning Sandwich
|
|
148
|
-
// ═══════════════════════════════════════════════════════════════════════════════
|
|
149
|
-
|
|
150
|
-
describe("ReasoningRouter", () => {
|
|
151
|
-
// ─── Test #66: First turns are HIGH (planning) ───
|
|
152
|
-
|
|
153
|
-
it("returns HIGH for the first turn (planning phase)", () => {
|
|
154
|
-
const router = new ReasoningRouter();
|
|
155
|
-
|
|
156
|
-
router.advanceTurn(false, false);
|
|
157
|
-
const level = router.getLevel();
|
|
158
|
-
|
|
159
|
-
expect(level).toBe(ReasoningLevel.HIGH);
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
// ─── Test #67: Tool-heavy turns are MEDIUM ───
|
|
163
|
-
|
|
164
|
-
it("returns MEDIUM for tool-heavy turns after planning", () => {
|
|
165
|
-
const router = new ReasoningRouter({ planningTurns: 1 });
|
|
166
|
-
|
|
167
|
-
router.advanceTurn(false, false); // turn 1
|
|
168
|
-
router.getLevel(); // HIGH (planning)
|
|
169
|
-
|
|
170
|
-
router.advanceTurn(true, false); // turn 2
|
|
171
|
-
const level = router.getLevel(); // tool call shouldn't be high
|
|
172
|
-
|
|
173
|
-
expect(level).toBe(ReasoningLevel.MEDIUM);
|
|
174
|
-
});
|
|
175
|
-
|
|
176
|
-
// ─── Test #68: Post-error turns are HIGH (recovery) ───
|
|
177
|
-
|
|
178
|
-
it("returns HIGH for recovery after an error", () => {
|
|
179
|
-
const router = new ReasoningRouter({ planningTurns: 1 });
|
|
180
|
-
|
|
181
|
-
router.advanceTurn(false, false); // turn 1
|
|
182
|
-
router.getLevel(); // planning
|
|
183
|
-
|
|
184
|
-
router.advanceTurn(true, false); // turn 2
|
|
185
|
-
router.getLevel(); // tool call (MEDIUM)
|
|
186
|
-
|
|
187
|
-
router.advanceTurn(false, true); // turn 3
|
|
188
|
-
const level = router.getLevel(); // error!
|
|
189
|
-
|
|
190
|
-
expect(level).toBe(ReasoningLevel.HIGH);
|
|
191
|
-
});
|
|
192
|
-
|
|
193
|
-
// ─── Test #69: Temperature mapping ───
|
|
194
|
-
|
|
195
|
-
it("maps reasoning levels to correct temperatures", () => {
|
|
196
|
-
const router = new ReasoningRouter({ highTemp: 0, mediumTemp: 0.3 });
|
|
197
|
-
|
|
198
|
-
expect(router.getTemperature(ReasoningLevel.HIGH)).toBe(0);
|
|
199
|
-
expect(router.getTemperature(ReasoningLevel.MEDIUM)).toBe(0.3);
|
|
200
|
-
});
|
|
201
|
-
});
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
import {
|
|
3
|
-
CacheOptimizedPromptBuilder,
|
|
4
|
-
ContextState,
|
|
5
|
-
} from "../core/promptBuilder.js";
|
|
6
|
-
import {
|
|
7
|
-
SystemMessage,
|
|
8
|
-
HumanMessage,
|
|
9
|
-
AIMessage,
|
|
10
|
-
} from "@langchain/core/messages";
|
|
11
|
-
|
|
12
|
-
describe("CacheOptimizedPromptBuilder", () => {
|
|
13
|
-
// ─── Behavior 1: Static prefix ordering ───
|
|
14
|
-
// The most critical behavior: the first 3 messages must ALWAYS be
|
|
15
|
-
// SystemMessages in the order: global → project → session.
|
|
16
|
-
// This is the foundation of prompt cache validity.
|
|
17
|
-
|
|
18
|
-
it("builds prompt with static prefix in strict order: global, project, session", () => {
|
|
19
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
20
|
-
const state: ContextState = {
|
|
21
|
-
globalSystemInstructions: "You are a coding assistant.",
|
|
22
|
-
projectMemory: "Use TypeScript.",
|
|
23
|
-
sessionContext: "OS: Windows",
|
|
24
|
-
conversationHistory: [],
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
const messages = builder.buildPrompt(state);
|
|
28
|
-
|
|
29
|
-
// exactly 1 static message when history is empty
|
|
30
|
-
expect(messages).toHaveLength(1);
|
|
31
|
-
|
|
32
|
-
// must be system-type message
|
|
33
|
-
expect(messages[0]._getType()).toBe("system");
|
|
34
|
-
|
|
35
|
-
// Order must be: global → project → session
|
|
36
|
-
expect(messages[0].content).toContain("You are a coding assistant.");
|
|
37
|
-
expect(messages[0].content).toContain("Use TypeScript.");
|
|
38
|
-
expect(messages[0].content).toContain("OS: Windows");
|
|
39
|
-
});
|
|
40
|
-
|
|
41
|
-
// ─── Behavior 2: Conversation history appended AFTER the static prefix ───
|
|
42
|
-
// Dynamic messages must never appear before the static prefix.
|
|
43
|
-
|
|
44
|
-
it("appends conversation history after the static prefix", () => {
|
|
45
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
46
|
-
const state: ContextState = {
|
|
47
|
-
globalSystemInstructions: "System prompt.",
|
|
48
|
-
projectMemory: "Project rules.",
|
|
49
|
-
sessionContext: "Session info.",
|
|
50
|
-
conversationHistory: [
|
|
51
|
-
new HumanMessage("Hello"),
|
|
52
|
-
new AIMessage("Hi there!"),
|
|
53
|
-
],
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
const messages = builder.buildPrompt(state);
|
|
57
|
-
|
|
58
|
-
// 1 static + 2 conversation = 3
|
|
59
|
-
expect(messages).toHaveLength(3);
|
|
60
|
-
|
|
61
|
-
// First is system messages (static prefix)
|
|
62
|
-
expect(messages[0]._getType()).toBe("system");
|
|
63
|
-
|
|
64
|
-
// Last 2 are conversation messages
|
|
65
|
-
expect(messages[1]._getType()).toBe("human");
|
|
66
|
-
expect(messages[2]._getType()).toBe("ai");
|
|
67
|
-
expect(messages[1].content).toBe("Hello");
|
|
68
|
-
expect(messages[2].content).toBe("Hi there!");
|
|
69
|
-
});
|
|
70
|
-
|
|
71
|
-
// ─── Behavior 3: Static prefix is identical across calls ───
|
|
72
|
-
// If we call buildPrompt twice with the same state (but more history),
|
|
73
|
-
// the first 3 messages must be byte-identical to preserve the cache.
|
|
74
|
-
|
|
75
|
-
it("produces identical static prefix across multiple calls with growing history", () => {
|
|
76
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
77
|
-
const state: ContextState = {
|
|
78
|
-
globalSystemInstructions: "Be helpful.",
|
|
79
|
-
projectMemory: "Use strict types.",
|
|
80
|
-
sessionContext: "Env: Node",
|
|
81
|
-
conversationHistory: [],
|
|
82
|
-
};
|
|
83
|
-
|
|
84
|
-
const firstCall = builder.buildPrompt(state);
|
|
85
|
-
|
|
86
|
-
// Simulate a conversation turn
|
|
87
|
-
state.conversationHistory.push(new HumanMessage("What is 2+2?"));
|
|
88
|
-
state.conversationHistory.push(new AIMessage("4"));
|
|
89
|
-
|
|
90
|
-
const secondCall = builder.buildPrompt(state);
|
|
91
|
-
|
|
92
|
-
// Static prefix (first message) must be identical
|
|
93
|
-
expect(secondCall[0].content).toBe(firstCall[0].content);
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
// ─── Behavior 4: System reminder is injected as a HumanMessage ───
|
|
97
|
-
|
|
98
|
-
it("injects a system reminder as a HumanMessage with <system-reminder> tags", () => {
|
|
99
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
100
|
-
const history = [new HumanMessage("Start task")];
|
|
101
|
-
|
|
102
|
-
const updated = builder.injectSystemReminder(
|
|
103
|
-
history,
|
|
104
|
-
"File auth.ts was deleted."
|
|
105
|
-
);
|
|
106
|
-
|
|
107
|
-
// Original history is not mutated
|
|
108
|
-
expect(history).toHaveLength(1);
|
|
109
|
-
|
|
110
|
-
// Updated history has the reminder appended
|
|
111
|
-
expect(updated).toHaveLength(2);
|
|
112
|
-
expect(updated[1]._getType()).toBe("human");
|
|
113
|
-
expect(updated[1].content).toContain("<system-reminder>");
|
|
114
|
-
expect(updated[1].content).toContain("File auth.ts was deleted.");
|
|
115
|
-
expect(updated[1].content).toContain("</system-reminder>");
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
// ─── Behavior 5: Compaction preserves recent messages with summary ───
|
|
119
|
-
|
|
120
|
-
it("compacts history into summary + preserved recent messages", () => {
|
|
121
|
-
const builder = new CacheOptimizedPromptBuilder();
|
|
122
|
-
const longHistory = [
|
|
123
|
-
new HumanMessage("Step 1"),
|
|
124
|
-
new AIMessage("Done 1"),
|
|
125
|
-
new HumanMessage("Step 2"),
|
|
126
|
-
new AIMessage("Done 2"),
|
|
127
|
-
];
|
|
128
|
-
|
|
129
|
-
const compacted = builder.compactHistory(
|
|
130
|
-
longHistory,
|
|
131
|
-
"Completed steps 1 and 2."
|
|
132
|
-
);
|
|
133
|
-
|
|
134
|
-
// Default keepLastN=6, history has 4 → summary + all 4 preserved
|
|
135
|
-
expect(compacted).toHaveLength(5);
|
|
136
|
-
expect(compacted[0]._getType()).toBe("human");
|
|
137
|
-
expect(compacted[0].content).toContain("Completed steps 1 and 2.");
|
|
138
|
-
// Recent messages are preserved after the summary
|
|
139
|
-
expect(compacted[1].content).toBe("Step 1");
|
|
140
|
-
});
|
|
141
|
-
});
|
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
|
2
|
-
import { SandboxManager } from "../sandbox/manager.js";
|
|
3
|
-
|
|
4
|
-
// Mock the e2b SDK since we don't want real sandbox creation in tests
|
|
5
|
-
vi.mock("e2b", () => {
|
|
6
|
-
const mockSandbox = {
|
|
7
|
-
sandboxId: "test-sandbox-123",
|
|
8
|
-
commands: {
|
|
9
|
-
run: vi.fn().mockResolvedValue({
|
|
10
|
-
stdout: "mock output",
|
|
11
|
-
stderr: "",
|
|
12
|
-
exitCode: 0,
|
|
13
|
-
}),
|
|
14
|
-
},
|
|
15
|
-
files: {
|
|
16
|
-
write: vi.fn().mockResolvedValue(undefined),
|
|
17
|
-
read: vi.fn().mockResolvedValue("file content"),
|
|
18
|
-
list: vi.fn().mockResolvedValue([]),
|
|
19
|
-
},
|
|
20
|
-
kill: vi.fn().mockResolvedValue(undefined),
|
|
21
|
-
isRunning: vi.fn().mockResolvedValue(true),
|
|
22
|
-
setTimeout: vi.fn().mockResolvedValue(undefined),
|
|
23
|
-
};
|
|
24
|
-
|
|
25
|
-
return {
|
|
26
|
-
Sandbox: {
|
|
27
|
-
create: vi.fn().mockResolvedValue(mockSandbox),
|
|
28
|
-
},
|
|
29
|
-
};
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
describe("SandboxManager", () => {
|
|
33
|
-
let manager: SandboxManager;
|
|
34
|
-
|
|
35
|
-
beforeEach(() => {
|
|
36
|
-
vi.clearAllMocks();
|
|
37
|
-
manager = new SandboxManager({ apiKey: "test-e2b-key" });
|
|
38
|
-
});
|
|
39
|
-
|
|
40
|
-
afterEach(async () => {
|
|
41
|
-
// Ensure sandbox is cleaned up after each test
|
|
42
|
-
try {
|
|
43
|
-
await manager.destroy();
|
|
44
|
-
} catch {
|
|
45
|
-
// Already destroyed or never created
|
|
46
|
-
}
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
// ─── Test #15: SandboxManager.create() initializes a sandbox ───
|
|
50
|
-
|
|
51
|
-
it("creates a sandbox and returns the sandbox ID", async () => {
|
|
52
|
-
const sandboxId = await manager.create();
|
|
53
|
-
|
|
54
|
-
expect(sandboxId).toBe("test-sandbox-123");
|
|
55
|
-
expect(manager.isActive()).toBe(true);
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
// ─── Test #16: SandboxManager.destroy() cleans up the sandbox ───
|
|
59
|
-
|
|
60
|
-
it("destroys the sandbox and marks it as inactive", async () => {
|
|
61
|
-
await manager.create();
|
|
62
|
-
expect(manager.isActive()).toBe(true);
|
|
63
|
-
|
|
64
|
-
await manager.destroy();
|
|
65
|
-
expect(manager.isActive()).toBe(false);
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
// ─── Test #17: SandboxManager.exec() runs a command in the sandbox ───
|
|
69
|
-
|
|
70
|
-
it("executes a command in the sandbox and returns output", async () => {
|
|
71
|
-
await manager.create();
|
|
72
|
-
|
|
73
|
-
const result = await manager.exec("echo hello");
|
|
74
|
-
|
|
75
|
-
expect(result.stdout).toBe("mock output");
|
|
76
|
-
expect(result.exitCode).toBe(0);
|
|
77
|
-
});
|
|
78
|
-
|
|
79
|
-
// ─── Test #18: SandboxManager.exec() throws if sandbox not active ───
|
|
80
|
-
|
|
81
|
-
it("throws an error if exec is called before create", async () => {
|
|
82
|
-
await expect(manager.exec("echo hello")).rejects.toThrow(
|
|
83
|
-
/sandbox is not active/i
|
|
84
|
-
);
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
// ─── Test #19: SandboxManager.uploadFile() writes a file to the sandbox ───
|
|
88
|
-
|
|
89
|
-
it("uploads a file to the sandbox filesystem", async () => {
|
|
90
|
-
await manager.create();
|
|
91
|
-
|
|
92
|
-
await manager.uploadFile("/workspace/src/foo.ts", "const x = 1;");
|
|
93
|
-
|
|
94
|
-
// Verify the E2B files.write was called
|
|
95
|
-
const { Sandbox } = await import("e2b");
|
|
96
|
-
const mockSandbox = await Sandbox.create();
|
|
97
|
-
expect(mockSandbox.files.write).toHaveBeenCalledWith(
|
|
98
|
-
"/workspace/src/foo.ts",
|
|
99
|
-
"const x = 1;"
|
|
100
|
-
);
|
|
101
|
-
});
|
|
102
|
-
});
|
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
-
import {
|
|
3
|
-
SecurityScanTool,
|
|
4
|
-
DepScanTool,
|
|
5
|
-
bindSecuritySandbox,
|
|
6
|
-
} from "../tools/security.js";
|
|
7
|
-
import { SandboxManager } from "../sandbox/manager.js";
|
|
8
|
-
import { LazyInstaller } from "../sandbox/bootstrap.js";
|
|
9
|
-
|
|
10
|
-
// Helpers
|
|
11
|
-
const createMockSandbox = (active = true) => ({
|
|
12
|
-
exec: vi.fn(),
|
|
13
|
-
isActive: vi.fn().mockReturnValue(active),
|
|
14
|
-
create: vi.fn(),
|
|
15
|
-
destroy: vi.fn(),
|
|
16
|
-
uploadFile: vi.fn(),
|
|
17
|
-
getSandbox: vi.fn(),
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
describe("SecurityScanTool", () => {
|
|
21
|
-
let mockSandbox: ReturnType<typeof createMockSandbox>;
|
|
22
|
-
let installer: LazyInstaller;
|
|
23
|
-
|
|
24
|
-
beforeEach(() => {
|
|
25
|
-
vi.clearAllMocks();
|
|
26
|
-
mockSandbox = createMockSandbox();
|
|
27
|
-
// Use custom template mode so ensureGeminiCli is instant
|
|
28
|
-
installer = new LazyInstaller(true);
|
|
29
|
-
bindSecuritySandbox(
|
|
30
|
-
mockSandbox as unknown as SandboxManager,
|
|
31
|
-
installer
|
|
32
|
-
);
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
// ─── Test #39: Runs security:analyze and returns report ───
|
|
36
|
-
|
|
37
|
-
it("runs gemini security:analyze and returns the report", async () => {
|
|
38
|
-
mockSandbox.exec.mockResolvedValueOnce({
|
|
39
|
-
exitCode: 0,
|
|
40
|
-
stdout: "## Security Report\n\nNo critical vulnerabilities found.",
|
|
41
|
-
stderr: "",
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
const result = await SecurityScanTool.execute({ target: "changes" });
|
|
45
|
-
|
|
46
|
-
expect(result.content).toContain("Security Report");
|
|
47
|
-
expect(mockSandbox.exec).toHaveBeenCalledWith(
|
|
48
|
-
expect.stringContaining("security:analyze")
|
|
49
|
-
);
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
// ─── Test #40: Returns error for file scan without path ───
|
|
53
|
-
|
|
54
|
-
it("returns error when target is 'file' but no path provided", async () => {
|
|
55
|
-
const result = await SecurityScanTool.execute({ target: "file" });
|
|
56
|
-
|
|
57
|
-
expect(result.content).toMatch(/path.*required/i);
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
// ─── Test #41: Handles failed scans gracefully ───
|
|
61
|
-
|
|
62
|
-
it("returns failure info when scan exits with non-zero code", async () => {
|
|
63
|
-
mockSandbox.exec.mockResolvedValueOnce({
|
|
64
|
-
exitCode: 1,
|
|
65
|
-
stdout: "",
|
|
66
|
-
stderr: "Some error occurred",
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
const result = await SecurityScanTool.execute({ target: "changes" });
|
|
70
|
-
|
|
71
|
-
expect(result.content).toContain("failed");
|
|
72
|
-
expect(result.content).toContain("Some error occurred");
|
|
73
|
-
});
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
describe("DepScanTool", () => {
|
|
77
|
-
let mockSandbox: ReturnType<typeof createMockSandbox>;
|
|
78
|
-
let installer: LazyInstaller;
|
|
79
|
-
|
|
80
|
-
beforeEach(() => {
|
|
81
|
-
vi.clearAllMocks();
|
|
82
|
-
mockSandbox = createMockSandbox();
|
|
83
|
-
installer = new LazyInstaller(true); // pre-baked template
|
|
84
|
-
bindSecuritySandbox(
|
|
85
|
-
mockSandbox as unknown as SandboxManager,
|
|
86
|
-
installer
|
|
87
|
-
);
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
// ─── Test #42: OSV-Scanner returns vulnerability report ───
|
|
91
|
-
|
|
92
|
-
it("runs osv-scanner and returns the report", async () => {
|
|
93
|
-
mockSandbox.exec.mockResolvedValueOnce({
|
|
94
|
-
exitCode: 0,
|
|
95
|
-
stdout: "Found 2 vulnerabilities:\n- CVE-2024-1234\n- CVE-2024-5678",
|
|
96
|
-
stderr: "",
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
const result = await DepScanTool.execute({ format: "summary" });
|
|
100
|
-
|
|
101
|
-
expect(result.content).toContain("CVE-2024-1234");
|
|
102
|
-
expect(result.content).toContain("CVE-2024-5678");
|
|
103
|
-
});
|
|
104
|
-
|
|
105
|
-
// ─── Test #43: Falls back to npm audit when OSV-Scanner fails ───
|
|
106
|
-
|
|
107
|
-
it("falls back to npm audit if osv-scanner returns empty output", async () => {
|
|
108
|
-
// OSV-Scanner: empty output
|
|
109
|
-
mockSandbox.exec
|
|
110
|
-
.mockResolvedValueOnce({ exitCode: 1, stdout: "", stderr: "error" })
|
|
111
|
-
// npm audit fallback
|
|
112
|
-
.mockResolvedValueOnce({
|
|
113
|
-
exitCode: 0,
|
|
114
|
-
stdout: "found 0 vulnerabilities",
|
|
115
|
-
stderr: "",
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
const result = await DepScanTool.execute({ format: "summary" });
|
|
119
|
-
|
|
120
|
-
expect(result.content).toContain("0 vulnerabilities");
|
|
121
|
-
});
|
|
122
|
-
});
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi } from "vitest";
|
|
2
|
-
import { AIMessageChunk } from "@langchain/core/messages";
|
|
3
|
-
import { ExecutionHarness } from "../core/agentLoop.js";
|
|
4
|
-
import { ContextState } from "../core/promptBuilder.js";
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Creates a mock LLM that yields predefined chunks when .stream() is called.
|
|
8
|
-
* This avoids real API calls while testing streaming behavior.
|
|
9
|
-
*/
|
|
10
|
-
function createMockStreamingLlm(chunks: AIMessageChunk[]) {
|
|
11
|
-
return {
|
|
12
|
-
invoke: vi.fn(),
|
|
13
|
-
stream: vi.fn().mockResolvedValue({
|
|
14
|
-
async *[Symbol.asyncIterator]() {
|
|
15
|
-
for (const chunk of chunks) {
|
|
16
|
-
yield chunk;
|
|
17
|
-
}
|
|
18
|
-
},
|
|
19
|
-
}),
|
|
20
|
-
};
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
describe("ExecutionHarness Streaming", () => {
|
|
24
|
-
const baseState: ContextState = {
|
|
25
|
-
globalSystemInstructions: "You are a helpful assistant.",
|
|
26
|
-
projectMemory: "",
|
|
27
|
-
sessionContext: "",
|
|
28
|
-
conversationHistory: [],
|
|
29
|
-
};
|
|
30
|
-
|
|
31
|
-
// ─── RED Test #8: streamStep emits text chunks to a callback ───
|
|
32
|
-
|
|
33
|
-
it("emits text content chunks to an onToken callback", async () => {
|
|
34
|
-
const chunks = [
|
|
35
|
-
new AIMessageChunk({ content: "Hello" }),
|
|
36
|
-
new AIMessageChunk({ content: " world" }),
|
|
37
|
-
new AIMessageChunk({ content: "!" }),
|
|
38
|
-
];
|
|
39
|
-
const mockLlm = createMockStreamingLlm(chunks);
|
|
40
|
-
const harness = new ExecutionHarness(mockLlm as any);
|
|
41
|
-
|
|
42
|
-
const receivedTokens: string[] = [];
|
|
43
|
-
const result = await harness.streamStep(baseState, {
|
|
44
|
-
onToken: (token: string) => receivedTokens.push(token),
|
|
45
|
-
});
|
|
46
|
-
|
|
47
|
-
// Callback should have received each text chunk
|
|
48
|
-
expect(receivedTokens).toEqual(["Hello", " world", "!"]);
|
|
49
|
-
|
|
50
|
-
// The returned message should contain the full concatenated content
|
|
51
|
-
expect(result.content).toBe("Hello world!");
|
|
52
|
-
});
|
|
53
|
-
|
|
54
|
-
// ─── RED Test #9: streamStep buffers tool calls and returns complete AIMessage ───
|
|
55
|
-
|
|
56
|
-
it("buffers tool call chunks and returns a complete AIMessage with tool_calls", async () => {
|
|
57
|
-
const chunks = [
|
|
58
|
-
new AIMessageChunk({
|
|
59
|
-
content: "",
|
|
60
|
-
tool_call_chunks: [
|
|
61
|
-
{ name: "read_file", args: '{"path": "', index: 0, id: "tc_1", type: "tool_call_chunk" },
|
|
62
|
-
],
|
|
63
|
-
}),
|
|
64
|
-
new AIMessageChunk({
|
|
65
|
-
content: "",
|
|
66
|
-
tool_call_chunks: [
|
|
67
|
-
{ name: undefined, args: 'src/index.ts"}', index: 0, id: undefined, type: "tool_call_chunk" },
|
|
68
|
-
],
|
|
69
|
-
}),
|
|
70
|
-
];
|
|
71
|
-
const mockLlm = createMockStreamingLlm(chunks);
|
|
72
|
-
const harness = new ExecutionHarness(mockLlm as any);
|
|
73
|
-
|
|
74
|
-
const result = await harness.streamStep(baseState, {});
|
|
75
|
-
|
|
76
|
-
// The result should have tool_calls populated
|
|
77
|
-
expect(result.tool_calls).toBeDefined();
|
|
78
|
-
expect(result.tool_calls!.length).toBe(1);
|
|
79
|
-
expect(result.tool_calls![0].name).toBe("read_file");
|
|
80
|
-
expect(result.tool_calls![0].args).toEqual({ path: "src/index.ts" });
|
|
81
|
-
});
|
|
82
|
-
});
|