joonecli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/AGENTS.md +56 -0
  2. package/Handover.md +115 -0
  3. package/LICENSE +201 -0
  4. package/PROGRESS.md +160 -0
  5. package/README.md +114 -0
  6. package/dist/__tests__/bootstrap.test.d.ts +1 -0
  7. package/dist/__tests__/bootstrap.test.js +76 -0
  8. package/dist/__tests__/bootstrap.test.js.map +1 -0
  9. package/dist/__tests__/config.test.d.ts +1 -0
  10. package/dist/__tests__/config.test.js +84 -0
  11. package/dist/__tests__/config.test.js.map +1 -0
  12. package/dist/__tests__/m55.test.d.ts +1 -0
  13. package/dist/__tests__/m55.test.js +160 -0
  14. package/dist/__tests__/m55.test.js.map +1 -0
  15. package/dist/__tests__/middleware.test.d.ts +1 -0
  16. package/dist/__tests__/middleware.test.js +169 -0
  17. package/dist/__tests__/middleware.test.js.map +1 -0
  18. package/dist/__tests__/modelFactory.test.d.ts +1 -0
  19. package/dist/__tests__/modelFactory.test.js +50 -0
  20. package/dist/__tests__/modelFactory.test.js.map +1 -0
  21. package/dist/__tests__/optimizations.test.d.ts +1 -0
  22. package/dist/__tests__/optimizations.test.js +136 -0
  23. package/dist/__tests__/optimizations.test.js.map +1 -0
  24. package/dist/__tests__/promptBuilder.test.d.ts +1 -0
  25. package/dist/__tests__/promptBuilder.test.js +108 -0
  26. package/dist/__tests__/promptBuilder.test.js.map +1 -0
  27. package/dist/__tests__/sandbox.test.d.ts +1 -0
  28. package/dist/__tests__/sandbox.test.js +78 -0
  29. package/dist/__tests__/sandbox.test.js.map +1 -0
  30. package/dist/__tests__/security.test.d.ts +1 -0
  31. package/dist/__tests__/security.test.js +86 -0
  32. package/dist/__tests__/security.test.js.map +1 -0
  33. package/dist/__tests__/streaming.test.d.ts +1 -0
  34. package/dist/__tests__/streaming.test.js +71 -0
  35. package/dist/__tests__/streaming.test.js.map +1 -0
  36. package/dist/__tests__/toolRouter.test.d.ts +1 -0
  37. package/dist/__tests__/toolRouter.test.js +37 -0
  38. package/dist/__tests__/toolRouter.test.js.map +1 -0
  39. package/dist/__tests__/tools.test.d.ts +1 -0
  40. package/dist/__tests__/tools.test.js +112 -0
  41. package/dist/__tests__/tools.test.js.map +1 -0
  42. package/dist/__tests__/tracing.test.d.ts +1 -0
  43. package/dist/__tests__/tracing.test.js +147 -0
  44. package/dist/__tests__/tracing.test.js.map +1 -0
  45. package/dist/cli/config.d.ts +49 -0
  46. package/dist/cli/config.js +86 -0
  47. package/dist/cli/config.js.map +1 -0
  48. package/dist/cli/index.d.ts +2 -0
  49. package/dist/cli/index.js +625 -0
  50. package/dist/cli/index.js.map +1 -0
  51. package/dist/cli/modelFactory.d.ts +9 -0
  52. package/dist/cli/modelFactory.js +154 -0
  53. package/dist/cli/modelFactory.js.map +1 -0
  54. package/dist/cli/providers.d.ts +18 -0
  55. package/dist/cli/providers.js +94 -0
  56. package/dist/cli/providers.js.map +1 -0
  57. package/dist/core/agentLoop.d.ts +43 -0
  58. package/dist/core/agentLoop.js +245 -0
  59. package/dist/core/agentLoop.js.map +1 -0
  60. package/dist/core/errors.d.ts +62 -0
  61. package/dist/core/errors.js +139 -0
  62. package/dist/core/errors.js.map +1 -0
  63. package/dist/core/promptBuilder.d.ts +49 -0
  64. package/dist/core/promptBuilder.js +84 -0
  65. package/dist/core/promptBuilder.js.map +1 -0
  66. package/dist/core/reasoningRouter.d.ts +62 -0
  67. package/dist/core/reasoningRouter.js +102 -0
  68. package/dist/core/reasoningRouter.js.map +1 -0
  69. package/dist/core/retry.d.ts +25 -0
  70. package/dist/core/retry.js +49 -0
  71. package/dist/core/retry.js.map +1 -0
  72. package/dist/core/sessionResumer.d.ts +17 -0
  73. package/dist/core/sessionResumer.js +78 -0
  74. package/dist/core/sessionResumer.js.map +1 -0
  75. package/dist/core/sessionStore.d.ts +45 -0
  76. package/dist/core/sessionStore.js +167 -0
  77. package/dist/core/sessionStore.js.map +1 -0
  78. package/dist/core/tokenCounter.d.ts +17 -0
  79. package/dist/core/tokenCounter.js +54 -0
  80. package/dist/core/tokenCounter.js.map +1 -0
  81. package/dist/evals/dataset.d.ts +4 -0
  82. package/dist/evals/dataset.js +61 -0
  83. package/dist/evals/dataset.js.map +1 -0
  84. package/dist/evals/evaluator.d.ts +21 -0
  85. package/dist/evals/evaluator.js +68 -0
  86. package/dist/evals/evaluator.js.map +1 -0
  87. package/dist/hitl/bridge.d.ts +65 -0
  88. package/dist/hitl/bridge.js +120 -0
  89. package/dist/hitl/bridge.js.map +1 -0
  90. package/dist/middleware/commandSanitizer.d.ts +18 -0
  91. package/dist/middleware/commandSanitizer.js +50 -0
  92. package/dist/middleware/commandSanitizer.js.map +1 -0
  93. package/dist/middleware/loopDetection.d.ts +28 -0
  94. package/dist/middleware/loopDetection.js +49 -0
  95. package/dist/middleware/loopDetection.js.map +1 -0
  96. package/dist/middleware/permission.d.ts +17 -0
  97. package/dist/middleware/permission.js +59 -0
  98. package/dist/middleware/permission.js.map +1 -0
  99. package/dist/middleware/pipeline.d.ts +31 -0
  100. package/dist/middleware/pipeline.js +62 -0
  101. package/dist/middleware/pipeline.js.map +1 -0
  102. package/dist/middleware/preCompletion.d.ts +29 -0
  103. package/dist/middleware/preCompletion.js +82 -0
  104. package/dist/middleware/preCompletion.js.map +1 -0
  105. package/dist/middleware/types.d.ts +40 -0
  106. package/dist/middleware/types.js +8 -0
  107. package/dist/middleware/types.js.map +1 -0
  108. package/dist/sandbox/bootstrap.d.ts +38 -0
  109. package/dist/sandbox/bootstrap.js +107 -0
  110. package/dist/sandbox/bootstrap.js.map +1 -0
  111. package/dist/sandbox/manager.d.ts +72 -0
  112. package/dist/sandbox/manager.js +180 -0
  113. package/dist/sandbox/manager.js.map +1 -0
  114. package/dist/sandbox/sync.d.ts +55 -0
  115. package/dist/sandbox/sync.js +135 -0
  116. package/dist/sandbox/sync.js.map +1 -0
  117. package/dist/skills/loader.d.ts +55 -0
  118. package/dist/skills/loader.js +132 -0
  119. package/dist/skills/loader.js.map +1 -0
  120. package/dist/skills/tools.d.ts +5 -0
  121. package/dist/skills/tools.js +78 -0
  122. package/dist/skills/tools.js.map +1 -0
  123. package/dist/skills/types.d.ts +13 -0
  124. package/dist/skills/types.js +2 -0
  125. package/dist/skills/types.js.map +1 -0
  126. package/dist/test_cache.d.ts +1 -0
  127. package/dist/test_cache.js +55 -0
  128. package/dist/test_cache.js.map +1 -0
  129. package/dist/test_google.js +93 -0
  130. package/dist/tools/askUser.d.ts +10 -0
  131. package/dist/tools/askUser.js +42 -0
  132. package/dist/tools/askUser.js.map +1 -0
  133. package/dist/tools/browser.d.ts +19 -0
  134. package/dist/tools/browser.js +111 -0
  135. package/dist/tools/browser.js.map +1 -0
  136. package/dist/tools/index.d.ts +27 -0
  137. package/dist/tools/index.js +184 -0
  138. package/dist/tools/index.js.map +1 -0
  139. package/dist/tools/registry.d.ts +31 -0
  140. package/dist/tools/registry.js +168 -0
  141. package/dist/tools/registry.js.map +1 -0
  142. package/dist/tools/router.d.ts +34 -0
  143. package/dist/tools/router.js +73 -0
  144. package/dist/tools/router.js.map +1 -0
  145. package/dist/tools/security.d.ts +28 -0
  146. package/dist/tools/security.js +183 -0
  147. package/dist/tools/security.js.map +1 -0
  148. package/dist/tools/webSearch.d.ts +6 -0
  149. package/dist/tools/webSearch.js +120 -0
  150. package/dist/tools/webSearch.js.map +1 -0
  151. package/dist/tracing/analyzer.d.ts +58 -0
  152. package/dist/tracing/analyzer.js +190 -0
  153. package/dist/tracing/analyzer.js.map +1 -0
  154. package/dist/tracing/langsmith.d.ts +38 -0
  155. package/dist/tracing/langsmith.js +50 -0
  156. package/dist/tracing/langsmith.js.map +1 -0
  157. package/dist/tracing/sessionTracer.d.ts +73 -0
  158. package/dist/tracing/sessionTracer.js +157 -0
  159. package/dist/tracing/sessionTracer.js.map +1 -0
  160. package/dist/tracing/types.d.ts +46 -0
  161. package/dist/tracing/types.js +5 -0
  162. package/dist/tracing/types.js.map +1 -0
  163. package/dist/ui/App.d.ts +24 -0
  164. package/dist/ui/App.js +172 -0
  165. package/dist/ui/App.js.map +1 -0
  166. package/dist/ui/components/HITLPrompt.d.ts +15 -0
  167. package/dist/ui/components/HITLPrompt.js +35 -0
  168. package/dist/ui/components/HITLPrompt.js.map +1 -0
  169. package/dist/ui/components/Header.d.ts +8 -0
  170. package/dist/ui/components/Header.js +6 -0
  171. package/dist/ui/components/Header.js.map +1 -0
  172. package/dist/ui/components/MessageBubble.d.ts +13 -0
  173. package/dist/ui/components/MessageBubble.js +17 -0
  174. package/dist/ui/components/MessageBubble.js.map +1 -0
  175. package/dist/ui/components/StatusBar.d.ts +21 -0
  176. package/dist/ui/components/StatusBar.js +34 -0
  177. package/dist/ui/components/StatusBar.js.map +1 -0
  178. package/dist/ui/components/StreamingText.d.ts +13 -0
  179. package/dist/ui/components/StreamingText.js +24 -0
  180. package/dist/ui/components/StreamingText.js.map +1 -0
  181. package/dist/ui/components/ToolCallPanel.d.ts +15 -0
  182. package/dist/ui/components/ToolCallPanel.js +18 -0
  183. package/dist/ui/components/ToolCallPanel.js.map +1 -0
  184. package/docs/01_insights_and_patterns.md +27 -0
  185. package/docs/02_edge_cases_and_mitigations.md +143 -0
  186. package/docs/03_initial_implementation_plan.md +66 -0
  187. package/docs/04_tech_stack_proposal.md +20 -0
  188. package/docs/05_prd.md +87 -0
  189. package/docs/06_user_stories.md +72 -0
  190. package/docs/07_system_architecture.md +138 -0
  191. package/docs/08_roadmap.md +200 -0
  192. package/e2b/Dockerfile +26 -0
  193. package/package.json +57 -0
  194. package/src/__tests__/bootstrap.test.ts +111 -0
  195. package/src/__tests__/config.test.ts +97 -0
  196. package/src/__tests__/m55.test.ts +238 -0
  197. package/src/__tests__/middleware.test.ts +219 -0
  198. package/src/__tests__/modelFactory.test.ts +63 -0
  199. package/src/__tests__/optimizations.test.ts +201 -0
  200. package/src/__tests__/promptBuilder.test.ts +141 -0
  201. package/src/__tests__/sandbox.test.ts +102 -0
  202. package/src/__tests__/security.test.ts +122 -0
  203. package/src/__tests__/streaming.test.ts +82 -0
  204. package/src/__tests__/toolRouter.test.ts +52 -0
  205. package/src/__tests__/tools.test.ts +146 -0
  206. package/src/__tests__/tracing.test.ts +196 -0
  207. package/src/agents/agentRegistry.ts +69 -0
  208. package/src/agents/agentSpec.ts +67 -0
  209. package/src/agents/builtinAgents.ts +142 -0
  210. package/src/cli/config.ts +124 -0
  211. package/src/cli/index.ts +730 -0
  212. package/src/cli/modelFactory.ts +174 -0
  213. package/src/cli/providers.ts +107 -0
  214. package/src/commands/builtinCommands.ts +293 -0
  215. package/src/commands/commandRegistry.ts +194 -0
  216. package/src/core/agentLoop.d.ts.map +1 -0
  217. package/src/core/agentLoop.ts +312 -0
  218. package/src/core/autoSave.ts +95 -0
  219. package/src/core/compactor.ts +252 -0
  220. package/src/core/contextGuard.ts +129 -0
  221. package/src/core/errors.ts +202 -0
  222. package/src/core/promptBuilder.d.ts.map +1 -0
  223. package/src/core/promptBuilder.ts +139 -0
  224. package/src/core/reasoningRouter.ts +121 -0
  225. package/src/core/retry.ts +75 -0
  226. package/src/core/sessionResumer.ts +90 -0
  227. package/src/core/sessionStore.ts +215 -0
  228. package/src/core/subAgent.ts +339 -0
  229. package/src/core/tokenCounter.ts +64 -0
  230. package/src/evals/dataset.ts +67 -0
  231. package/src/evals/evaluator.ts +81 -0
  232. package/src/hitl/bridge.ts +160 -0
  233. package/src/middleware/commandSanitizer.ts +60 -0
  234. package/src/middleware/loopDetection.ts +63 -0
  235. package/src/middleware/permission.ts +72 -0
  236. package/src/middleware/pipeline.ts +75 -0
  237. package/src/middleware/preCompletion.ts +94 -0
  238. package/src/middleware/types.ts +45 -0
  239. package/src/sandbox/bootstrap.ts +121 -0
  240. package/src/sandbox/manager.ts +239 -0
  241. package/src/sandbox/sync.ts +157 -0
  242. package/src/skills/loader.ts +143 -0
  243. package/src/skills/tools.ts +99 -0
  244. package/src/skills/types.ts +13 -0
  245. package/src/test_cache.ts +72 -0
  246. package/src/test_google.js +40 -0
  247. package/src/test_google.ts +40 -0
  248. package/src/tools/askUser.ts +47 -0
  249. package/src/tools/browser.ts +137 -0
  250. package/src/tools/index.d.ts.map +1 -0
  251. package/src/tools/index.ts +237 -0
  252. package/src/tools/registry.ts +198 -0
  253. package/src/tools/router.ts +78 -0
  254. package/src/tools/security.ts +220 -0
  255. package/src/tools/spawnAgent.ts +158 -0
  256. package/src/tools/webSearch.ts +142 -0
  257. package/src/tracing/analyzer.ts +265 -0
  258. package/src/tracing/langsmith.ts +63 -0
  259. package/src/tracing/sessionTracer.ts +202 -0
  260. package/src/tracing/types.ts +49 -0
  261. package/src/types/valyu.d.ts +37 -0
  262. package/src/ui/App.tsx +404 -0
  263. package/src/ui/components/HITLPrompt.tsx +119 -0
  264. package/src/ui/components/Header.tsx +51 -0
  265. package/src/ui/components/MessageBubble.tsx +46 -0
  266. package/src/ui/components/StatusBar.tsx +138 -0
  267. package/src/ui/components/StreamingText.tsx +48 -0
  268. package/src/ui/components/ToolCallPanel.tsx +80 -0
  269. package/tests/commands/commands.test.ts +356 -0
  270. package/tests/core/compactor.test.ts +217 -0
  271. package/tests/core/retryAndErrors.test.ts +164 -0
  272. package/tests/core/sessionResumer.test.ts +95 -0
  273. package/tests/core/sessionStore.test.ts +84 -0
  274. package/tests/core/stability.test.ts +165 -0
  275. package/tests/core/subAgent.test.ts +238 -0
  276. package/tests/hitl/hitlBridge.test.ts +115 -0
  277. package/tsconfig.json +16 -0
  278. package/vitest.config.ts +10 -0
  279. package/vitest.out +48 -0
@@ -0,0 +1,201 @@
1
+ import { describe, it, expect, beforeEach } from "vitest";
2
+ import { HumanMessage, AIMessage, SystemMessage } from "@langchain/core/messages";
3
+ import {
4
+ SearchToolsTool,
5
+ ActivateToolTool,
6
+ activateTool,
7
+ getActivatedTools,
8
+ resetActivatedTools,
9
+ } from "../tools/registry.js";
10
+ import {
11
+ estimateTokens,
12
+ countMessageTokens,
13
+ isNearCapacity,
14
+ } from "../core/tokenCounter.js";
15
+ import { CacheOptimizedPromptBuilder } from "../core/promptBuilder.js";
16
+ import {
17
+ ReasoningRouter,
18
+ ReasoningLevel,
19
+ } from "../core/reasoningRouter.js";
20
+
21
+ // ═══════════════════════════════════════════════════════════════════════════════
22
+ // 5a: Enhanced Tool Registry
23
+ // ═══════════════════════════════════════════════════════════════════════════════
24
+
25
+ describe("Enhanced Tool Registry", () => {
26
+ beforeEach(() => {
27
+ resetActivatedTools();
28
+ });
29
+
30
+ // ─── Test #56: Fuzzy search matches by description keyword ───
31
+
32
+ it("fuzzy search matches tools by description keyword", async () => {
33
+ const result = await SearchToolsTool.execute({ query: "commit" });
34
+
35
+ expect(result.content).toContain("git_commit");
36
+ });
37
+
38
+ // ─── Test #57: Fuzzy search matches by name ───
39
+
40
+ it("fuzzy search matches tools by name", async () => {
41
+ const result = await SearchToolsTool.execute({ query: "grep" });
42
+
43
+ expect(result.content).toContain("grep_search");
44
+ });
45
+
46
+ // ─── Test #58: activateTool adds tool to the active set ───
47
+
48
+ it("activateTool adds a tool to the active set", () => {
49
+ expect(getActivatedTools()).toHaveLength(0);
50
+
51
+ const tool = activateTool("git_commit");
52
+
53
+ expect(tool).toBeDefined();
54
+ expect(tool!.name).toBe("git_commit");
55
+ expect(getActivatedTools()).toHaveLength(1);
56
+ });
57
+
58
+ // ─── Test #59: ActivateToolTool returns schema on activation ───
59
+
60
+ it("ActivateToolTool returns the schema on successful activation", async () => {
61
+ const result = await ActivateToolTool.execute({ name: "git_diff" });
62
+
63
+ expect(result.content).toContain("activated");
64
+ expect(result.content).toContain("Schema");
65
+ expect(getActivatedTools()).toHaveLength(1);
66
+ });
67
+
68
+ // ─── Test #60: ActivateToolTool returns error for unknown tool ───
69
+
70
+ it("ActivateToolTool returns error for unknown tool", async () => {
71
+ const result = await ActivateToolTool.execute({ name: "nonexistent" });
72
+
73
+ expect(result.content).toMatch(/not found/i);
74
+ });
75
+ });
76
+
77
+ // ═══════════════════════════════════════════════════════════════════════════════
78
+ // 5b: Token Counter & Context Compaction
79
+ // ═══════════════════════════════════════════════════════════════════════════════
80
+
81
+ describe("Token Counter", () => {
82
+ // ─── Test #61: Estimates tokens for short string ───
83
+
84
+ it("estimates tokens using ~4 chars per token", () => {
85
+ const tokens = estimateTokens("Hello world!"); // 12 chars → 3 tokens
86
+ expect(tokens).toBe(3);
87
+ });
88
+
89
+ // ─── Test #62: Counts tokens across messages ───
90
+
91
+ it("counts tokens across multiple messages", () => {
92
+ const messages = [
93
+ new HumanMessage("Hello"), // 5 chars → 2 tokens + 4 overhead = 6
94
+ new AIMessage("Hi there"), // 8 chars → 2 tokens + 4 overhead = 6
95
+ ];
96
+ const total = countMessageTokens(messages);
97
+
98
+ expect(total).toBeGreaterThan(0);
99
+ expect(total).toBe(12); // (2+4) + (2+4)
100
+ });
101
+
102
+ // ─── Test #63: isNearCapacity detects threshold ───
103
+
104
+ it("returns true when messages exceed 80% of capacity", () => {
105
+ // Create a big message ~320 chars → ~80 tokens
106
+ const bigMsg = new HumanMessage("x".repeat(320));
107
+ const messages = [bigMsg];
108
+
109
+ // maxTokens=100, threshold=0.8 → trigger at 80 tokens
110
+ // 320/4=80 + 4 overhead = 84 > 80
111
+ expect(isNearCapacity(messages, 100, 0.8)).toBe(true);
112
+ });
113
+
114
+ // ─── Test #64: isNearCapacity returns false below threshold ───
115
+
116
+ it("returns false when well below capacity", () => {
117
+ const messages = [new HumanMessage("short")];
118
+
119
+ expect(isNearCapacity(messages, 100000, 0.8)).toBe(false);
120
+ });
121
+ });
122
+
123
+ describe("Context Compaction", () => {
124
+ // ─── Test #65: compactHistory preserves last N messages ───
125
+
126
+ it("preserves the last N messages and prepends summary", () => {
127
+ const builder = new CacheOptimizedPromptBuilder();
128
+ const history = [
129
+ new HumanMessage("msg 1"),
130
+ new AIMessage("response 1"),
131
+ new HumanMessage("msg 2"),
132
+ new AIMessage("response 2"),
133
+ new HumanMessage("msg 3"),
134
+ new AIMessage("response 3"),
135
+ ];
136
+
137
+ const compacted = builder.compactHistory(history, "Summary of turns 1-2.", 4);
138
+
139
+ // Should have: 1 summary + 4 preserved
140
+ expect(compacted).toHaveLength(5);
141
+ expect((compacted[0] as HumanMessage).content).toContain("compacted");
142
+ expect((compacted[0] as HumanMessage).content).toContain("Summary of turns 1-2.");
143
+ });
144
+ });
145
+
146
+ // ═══════════════════════════════════════════════════════════════════════════════
147
+ // 5c: Reasoning Sandwich
148
+ // ═══════════════════════════════════════════════════════════════════════════════
149
+
150
+ describe("ReasoningRouter", () => {
151
+ // ─── Test #66: First turns are HIGH (planning) ───
152
+
153
+ it("returns HIGH for the first turn (planning phase)", () => {
154
+ const router = new ReasoningRouter();
155
+
156
+ router.advanceTurn(false, false);
157
+ const level = router.getLevel();
158
+
159
+ expect(level).toBe(ReasoningLevel.HIGH);
160
+ });
161
+
162
+ // ─── Test #67: Tool-heavy turns are MEDIUM ───
163
+
164
+ it("returns MEDIUM for tool-heavy turns after planning", () => {
165
+ const router = new ReasoningRouter({ planningTurns: 1 });
166
+
167
+ router.advanceTurn(false, false); // turn 1
168
+ router.getLevel(); // HIGH (planning)
169
+
170
+ router.advanceTurn(true, false); // turn 2
171
+ const level = router.getLevel(); // tool call shouldn't be high
172
+
173
+ expect(level).toBe(ReasoningLevel.MEDIUM);
174
+ });
175
+
176
+ // ─── Test #68: Post-error turns are HIGH (recovery) ───
177
+
178
+ it("returns HIGH for recovery after an error", () => {
179
+ const router = new ReasoningRouter({ planningTurns: 1 });
180
+
181
+ router.advanceTurn(false, false); // turn 1
182
+ router.getLevel(); // planning
183
+
184
+ router.advanceTurn(true, false); // turn 2
185
+ router.getLevel(); // tool call (MEDIUM)
186
+
187
+ router.advanceTurn(false, true); // turn 3
188
+ const level = router.getLevel(); // error!
189
+
190
+ expect(level).toBe(ReasoningLevel.HIGH);
191
+ });
192
+
193
+ // ─── Test #69: Temperature mapping ───
194
+
195
+ it("maps reasoning levels to correct temperatures", () => {
196
+ const router = new ReasoningRouter({ highTemp: 0, mediumTemp: 0.3 });
197
+
198
+ expect(router.getTemperature(ReasoningLevel.HIGH)).toBe(0);
199
+ expect(router.getTemperature(ReasoningLevel.MEDIUM)).toBe(0.3);
200
+ });
201
+ });
@@ -0,0 +1,141 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import {
3
+ CacheOptimizedPromptBuilder,
4
+ ContextState,
5
+ } from "../core/promptBuilder.js";
6
+ import {
7
+ SystemMessage,
8
+ HumanMessage,
9
+ AIMessage,
10
+ } from "@langchain/core/messages";
11
+
12
+ describe("CacheOptimizedPromptBuilder", () => {
13
+ // ─── Behavior 1: Static prefix ordering ───
14
+ // The most critical behavior: the first 3 messages must ALWAYS be
15
+ // SystemMessages in the order: global → project → session.
16
+ // This is the foundation of prompt cache validity.
17
+
18
+ it("builds prompt with static prefix in strict order: global, project, session", () => {
19
+ const builder = new CacheOptimizedPromptBuilder();
20
+ const state: ContextState = {
21
+ globalSystemInstructions: "You are a coding assistant.",
22
+ projectMemory: "Use TypeScript.",
23
+ sessionContext: "OS: Windows",
24
+ conversationHistory: [],
25
+ };
26
+
27
+ const messages = builder.buildPrompt(state);
28
+
29
+ // exactly 1 static message when history is empty
30
+ expect(messages).toHaveLength(1);
31
+
32
+ // must be system-type message
33
+ expect(messages[0]._getType()).toBe("system");
34
+
35
+ // Order must be: global → project → session
36
+ expect(messages[0].content).toContain("You are a coding assistant.");
37
+ expect(messages[0].content).toContain("Use TypeScript.");
38
+ expect(messages[0].content).toContain("OS: Windows");
39
+ });
40
+
41
+ // ─── Behavior 2: Conversation history appended AFTER the static prefix ───
42
+ // Dynamic messages must never appear before the static prefix.
43
+
44
+ it("appends conversation history after the static prefix", () => {
45
+ const builder = new CacheOptimizedPromptBuilder();
46
+ const state: ContextState = {
47
+ globalSystemInstructions: "System prompt.",
48
+ projectMemory: "Project rules.",
49
+ sessionContext: "Session info.",
50
+ conversationHistory: [
51
+ new HumanMessage("Hello"),
52
+ new AIMessage("Hi there!"),
53
+ ],
54
+ };
55
+
56
+ const messages = builder.buildPrompt(state);
57
+
58
+ // 1 static + 2 conversation = 3
59
+ expect(messages).toHaveLength(3);
60
+
61
+ // First is system messages (static prefix)
62
+ expect(messages[0]._getType()).toBe("system");
63
+
64
+ // Last 2 are conversation messages
65
+ expect(messages[1]._getType()).toBe("human");
66
+ expect(messages[2]._getType()).toBe("ai");
67
+ expect(messages[1].content).toBe("Hello");
68
+ expect(messages[2].content).toBe("Hi there!");
69
+ });
70
+
71
+ // ─── Behavior 3: Static prefix is identical across calls ───
72
+ // If we call buildPrompt twice with the same state (but more history),
73
+ // the first 3 messages must be byte-identical to preserve the cache.
74
+
75
+ it("produces identical static prefix across multiple calls with growing history", () => {
76
+ const builder = new CacheOptimizedPromptBuilder();
77
+ const state: ContextState = {
78
+ globalSystemInstructions: "Be helpful.",
79
+ projectMemory: "Use strict types.",
80
+ sessionContext: "Env: Node",
81
+ conversationHistory: [],
82
+ };
83
+
84
+ const firstCall = builder.buildPrompt(state);
85
+
86
+ // Simulate a conversation turn
87
+ state.conversationHistory.push(new HumanMessage("What is 2+2?"));
88
+ state.conversationHistory.push(new AIMessage("4"));
89
+
90
+ const secondCall = builder.buildPrompt(state);
91
+
92
+ // Static prefix (first message) must be identical
93
+ expect(secondCall[0].content).toBe(firstCall[0].content);
94
+ });
95
+
96
+ // ─── Behavior 4: System reminder is injected as a HumanMessage ───
97
+
98
+ it("injects a system reminder as a HumanMessage with <system-reminder> tags", () => {
99
+ const builder = new CacheOptimizedPromptBuilder();
100
+ const history = [new HumanMessage("Start task")];
101
+
102
+ const updated = builder.injectSystemReminder(
103
+ history,
104
+ "File auth.ts was deleted."
105
+ );
106
+
107
+ // Original history is not mutated
108
+ expect(history).toHaveLength(1);
109
+
110
+ // Updated history has the reminder appended
111
+ expect(updated).toHaveLength(2);
112
+ expect(updated[1]._getType()).toBe("human");
113
+ expect(updated[1].content).toContain("<system-reminder>");
114
+ expect(updated[1].content).toContain("File auth.ts was deleted.");
115
+ expect(updated[1].content).toContain("</system-reminder>");
116
+ });
117
+
118
+ // ─── Behavior 5: Compaction preserves recent messages with summary ───
119
+
120
+ it("compacts history into summary + preserved recent messages", () => {
121
+ const builder = new CacheOptimizedPromptBuilder();
122
+ const longHistory = [
123
+ new HumanMessage("Step 1"),
124
+ new AIMessage("Done 1"),
125
+ new HumanMessage("Step 2"),
126
+ new AIMessage("Done 2"),
127
+ ];
128
+
129
+ const compacted = builder.compactHistory(
130
+ longHistory,
131
+ "Completed steps 1 and 2."
132
+ );
133
+
134
+ // Default keepLastN=6, history has 4 → summary + all 4 preserved
135
+ expect(compacted).toHaveLength(5);
136
+ expect(compacted[0]._getType()).toBe("human");
137
+ expect(compacted[0].content).toContain("Completed steps 1 and 2.");
138
+ // Recent messages are preserved after the summary
139
+ expect(compacted[1].content).toBe("Step 1");
140
+ });
141
+ });
@@ -0,0 +1,102 @@
1
+ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
2
+ import { SandboxManager } from "../sandbox/manager.js";
3
+
4
+ // Mock the e2b SDK since we don't want real sandbox creation in tests
5
+ vi.mock("e2b", () => {
6
+ const mockSandbox = {
7
+ sandboxId: "test-sandbox-123",
8
+ commands: {
9
+ run: vi.fn().mockResolvedValue({
10
+ stdout: "mock output",
11
+ stderr: "",
12
+ exitCode: 0,
13
+ }),
14
+ },
15
+ files: {
16
+ write: vi.fn().mockResolvedValue(undefined),
17
+ read: vi.fn().mockResolvedValue("file content"),
18
+ list: vi.fn().mockResolvedValue([]),
19
+ },
20
+ kill: vi.fn().mockResolvedValue(undefined),
21
+ isRunning: vi.fn().mockResolvedValue(true),
22
+ setTimeout: vi.fn().mockResolvedValue(undefined),
23
+ };
24
+
25
+ return {
26
+ Sandbox: {
27
+ create: vi.fn().mockResolvedValue(mockSandbox),
28
+ },
29
+ };
30
+ });
31
+
32
+ describe("SandboxManager", () => {
33
+ let manager: SandboxManager;
34
+
35
+ beforeEach(() => {
36
+ vi.clearAllMocks();
37
+ manager = new SandboxManager({ apiKey: "test-e2b-key" });
38
+ });
39
+
40
+ afterEach(async () => {
41
+ // Ensure sandbox is cleaned up after each test
42
+ try {
43
+ await manager.destroy();
44
+ } catch {
45
+ // Already destroyed or never created
46
+ }
47
+ });
48
+
49
+ // ─── Test #15: SandboxManager.create() initializes a sandbox ───
50
+
51
+ it("creates a sandbox and returns the sandbox ID", async () => {
52
+ const sandboxId = await manager.create();
53
+
54
+ expect(sandboxId).toBe("test-sandbox-123");
55
+ expect(manager.isActive()).toBe(true);
56
+ });
57
+
58
+ // ─── Test #16: SandboxManager.destroy() cleans up the sandbox ───
59
+
60
+ it("destroys the sandbox and marks it as inactive", async () => {
61
+ await manager.create();
62
+ expect(manager.isActive()).toBe(true);
63
+
64
+ await manager.destroy();
65
+ expect(manager.isActive()).toBe(false);
66
+ });
67
+
68
+ // ─── Test #17: SandboxManager.exec() runs a command in the sandbox ───
69
+
70
+ it("executes a command in the sandbox and returns output", async () => {
71
+ await manager.create();
72
+
73
+ const result = await manager.exec("echo hello");
74
+
75
+ expect(result.stdout).toBe("mock output");
76
+ expect(result.exitCode).toBe(0);
77
+ });
78
+
79
+ // ─── Test #18: SandboxManager.exec() throws if sandbox not active ───
80
+
81
+ it("throws an error if exec is called before create", async () => {
82
+ await expect(manager.exec("echo hello")).rejects.toThrow(
83
+ /sandbox is not active/i
84
+ );
85
+ });
86
+
87
+ // ─── Test #19: SandboxManager.uploadFile() writes a file to the sandbox ───
88
+
89
+ it("uploads a file to the sandbox filesystem", async () => {
90
+ await manager.create();
91
+
92
+ await manager.uploadFile("/workspace/src/foo.ts", "const x = 1;");
93
+
94
+ // Verify the E2B files.write was called
95
+ const { Sandbox } = await import("e2b");
96
+ const mockSandbox = await Sandbox.create();
97
+ expect(mockSandbox.files.write).toHaveBeenCalledWith(
98
+ "/workspace/src/foo.ts",
99
+ "const x = 1;"
100
+ );
101
+ });
102
+ });
@@ -0,0 +1,122 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+ import {
3
+ SecurityScanTool,
4
+ DepScanTool,
5
+ bindSecuritySandbox,
6
+ } from "../tools/security.js";
7
+ import { SandboxManager } from "../sandbox/manager.js";
8
+ import { LazyInstaller } from "../sandbox/bootstrap.js";
9
+
10
+ // Helpers
11
+ const createMockSandbox = (active = true) => ({
12
+ exec: vi.fn(),
13
+ isActive: vi.fn().mockReturnValue(active),
14
+ create: vi.fn(),
15
+ destroy: vi.fn(),
16
+ uploadFile: vi.fn(),
17
+ getSandbox: vi.fn(),
18
+ });
19
+
20
+ describe("SecurityScanTool", () => {
21
+ let mockSandbox: ReturnType<typeof createMockSandbox>;
22
+ let installer: LazyInstaller;
23
+
24
+ beforeEach(() => {
25
+ vi.clearAllMocks();
26
+ mockSandbox = createMockSandbox();
27
+ // Use custom template mode so ensureGeminiCli is instant
28
+ installer = new LazyInstaller(true);
29
+ bindSecuritySandbox(
30
+ mockSandbox as unknown as SandboxManager,
31
+ installer
32
+ );
33
+ });
34
+
35
+ // ─── Test #39: Runs security:analyze and returns report ───
36
+
37
+ it("runs gemini security:analyze and returns the report", async () => {
38
+ mockSandbox.exec.mockResolvedValueOnce({
39
+ exitCode: 0,
40
+ stdout: "## Security Report\n\nNo critical vulnerabilities found.",
41
+ stderr: "",
42
+ });
43
+
44
+ const result = await SecurityScanTool.execute({ target: "changes" });
45
+
46
+ expect(result.content).toContain("Security Report");
47
+ expect(mockSandbox.exec).toHaveBeenCalledWith(
48
+ expect.stringContaining("security:analyze")
49
+ );
50
+ });
51
+
52
+ // ─── Test #40: Returns error for file scan without path ───
53
+
54
+ it("returns error when target is 'file' but no path provided", async () => {
55
+ const result = await SecurityScanTool.execute({ target: "file" });
56
+
57
+ expect(result.content).toMatch(/path.*required/i);
58
+ });
59
+
60
+ // ─── Test #41: Handles failed scans gracefully ───
61
+
62
+ it("returns failure info when scan exits with non-zero code", async () => {
63
+ mockSandbox.exec.mockResolvedValueOnce({
64
+ exitCode: 1,
65
+ stdout: "",
66
+ stderr: "Some error occurred",
67
+ });
68
+
69
+ const result = await SecurityScanTool.execute({ target: "changes" });
70
+
71
+ expect(result.content).toContain("failed");
72
+ expect(result.content).toContain("Some error occurred");
73
+ });
74
+ });
75
+
76
+ describe("DepScanTool", () => {
77
+ let mockSandbox: ReturnType<typeof createMockSandbox>;
78
+ let installer: LazyInstaller;
79
+
80
+ beforeEach(() => {
81
+ vi.clearAllMocks();
82
+ mockSandbox = createMockSandbox();
83
+ installer = new LazyInstaller(true); // pre-baked template
84
+ bindSecuritySandbox(
85
+ mockSandbox as unknown as SandboxManager,
86
+ installer
87
+ );
88
+ });
89
+
90
+ // ─── Test #42: OSV-Scanner returns vulnerability report ───
91
+
92
+ it("runs osv-scanner and returns the report", async () => {
93
+ mockSandbox.exec.mockResolvedValueOnce({
94
+ exitCode: 0,
95
+ stdout: "Found 2 vulnerabilities:\n- CVE-2024-1234\n- CVE-2024-5678",
96
+ stderr: "",
97
+ });
98
+
99
+ const result = await DepScanTool.execute({ format: "summary" });
100
+
101
+ expect(result.content).toContain("CVE-2024-1234");
102
+ expect(result.content).toContain("CVE-2024-5678");
103
+ });
104
+
105
+ // ─── Test #43: Falls back to npm audit when OSV-Scanner fails ───
106
+
107
+ it("falls back to npm audit if osv-scanner returns empty output", async () => {
108
+ // OSV-Scanner: empty output
109
+ mockSandbox.exec
110
+ .mockResolvedValueOnce({ exitCode: 1, stdout: "", stderr: "error" })
111
+ // npm audit fallback
112
+ .mockResolvedValueOnce({
113
+ exitCode: 0,
114
+ stdout: "found 0 vulnerabilities",
115
+ stderr: "",
116
+ });
117
+
118
+ const result = await DepScanTool.execute({ format: "summary" });
119
+
120
+ expect(result.content).toContain("0 vulnerabilities");
121
+ });
122
+ });
@@ -0,0 +1,82 @@
1
+ import { describe, it, expect, vi } from "vitest";
2
+ import { AIMessageChunk } from "@langchain/core/messages";
3
+ import { ExecutionHarness } from "../core/agentLoop.js";
4
+ import { ContextState } from "../core/promptBuilder.js";
5
+
6
+ /**
7
+ * Creates a mock LLM that yields predefined chunks when .stream() is called.
8
+ * This avoids real API calls while testing streaming behavior.
9
+ */
10
+ function createMockStreamingLlm(chunks: AIMessageChunk[]) {
11
+ return {
12
+ invoke: vi.fn(),
13
+ stream: vi.fn().mockResolvedValue({
14
+ async *[Symbol.asyncIterator]() {
15
+ for (const chunk of chunks) {
16
+ yield chunk;
17
+ }
18
+ },
19
+ }),
20
+ };
21
+ }
22
+
23
+ describe("ExecutionHarness Streaming", () => {
24
+ const baseState: ContextState = {
25
+ globalSystemInstructions: "You are a helpful assistant.",
26
+ projectMemory: "",
27
+ sessionContext: "",
28
+ conversationHistory: [],
29
+ };
30
+
31
+ // ─── RED Test #8: streamStep emits text chunks to a callback ───
32
+
33
+ it("emits text content chunks to an onToken callback", async () => {
34
+ const chunks = [
35
+ new AIMessageChunk({ content: "Hello" }),
36
+ new AIMessageChunk({ content: " world" }),
37
+ new AIMessageChunk({ content: "!" }),
38
+ ];
39
+ const mockLlm = createMockStreamingLlm(chunks);
40
+ const harness = new ExecutionHarness(mockLlm as any);
41
+
42
+ const receivedTokens: string[] = [];
43
+ const result = await harness.streamStep(baseState, {
44
+ onToken: (token: string) => receivedTokens.push(token),
45
+ });
46
+
47
+ // Callback should have received each text chunk
48
+ expect(receivedTokens).toEqual(["Hello", " world", "!"]);
49
+
50
+ // The returned message should contain the full concatenated content
51
+ expect(result.content).toBe("Hello world!");
52
+ });
53
+
54
+ // ─── RED Test #9: streamStep buffers tool calls and returns complete AIMessage ───
55
+
56
+ it("buffers tool call chunks and returns a complete AIMessage with tool_calls", async () => {
57
+ const chunks = [
58
+ new AIMessageChunk({
59
+ content: "",
60
+ tool_call_chunks: [
61
+ { name: "read_file", args: '{"path": "', index: 0, id: "tc_1", type: "tool_call_chunk" },
62
+ ],
63
+ }),
64
+ new AIMessageChunk({
65
+ content: "",
66
+ tool_call_chunks: [
67
+ { name: undefined, args: 'src/index.ts"}', index: 0, id: undefined, type: "tool_call_chunk" },
68
+ ],
69
+ }),
70
+ ];
71
+ const mockLlm = createMockStreamingLlm(chunks);
72
+ const harness = new ExecutionHarness(mockLlm as any);
73
+
74
+ const result = await harness.streamStep(baseState, {});
75
+
76
+ // The result should have tool_calls populated
77
+ expect(result.tool_calls).toBeDefined();
78
+ expect(result.tool_calls!.length).toBe(1);
79
+ expect(result.tool_calls![0].name).toBe("read_file");
80
+ expect(result.tool_calls![0].args).toEqual({ path: "src/index.ts" });
81
+ });
82
+ });