joonecli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/AGENTS.md +56 -0
  2. package/Handover.md +115 -0
  3. package/LICENSE +201 -0
  4. package/PROGRESS.md +160 -0
  5. package/README.md +114 -0
  6. package/dist/__tests__/bootstrap.test.d.ts +1 -0
  7. package/dist/__tests__/bootstrap.test.js +76 -0
  8. package/dist/__tests__/bootstrap.test.js.map +1 -0
  9. package/dist/__tests__/config.test.d.ts +1 -0
  10. package/dist/__tests__/config.test.js +84 -0
  11. package/dist/__tests__/config.test.js.map +1 -0
  12. package/dist/__tests__/m55.test.d.ts +1 -0
  13. package/dist/__tests__/m55.test.js +160 -0
  14. package/dist/__tests__/m55.test.js.map +1 -0
  15. package/dist/__tests__/middleware.test.d.ts +1 -0
  16. package/dist/__tests__/middleware.test.js +169 -0
  17. package/dist/__tests__/middleware.test.js.map +1 -0
  18. package/dist/__tests__/modelFactory.test.d.ts +1 -0
  19. package/dist/__tests__/modelFactory.test.js +50 -0
  20. package/dist/__tests__/modelFactory.test.js.map +1 -0
  21. package/dist/__tests__/optimizations.test.d.ts +1 -0
  22. package/dist/__tests__/optimizations.test.js +136 -0
  23. package/dist/__tests__/optimizations.test.js.map +1 -0
  24. package/dist/__tests__/promptBuilder.test.d.ts +1 -0
  25. package/dist/__tests__/promptBuilder.test.js +108 -0
  26. package/dist/__tests__/promptBuilder.test.js.map +1 -0
  27. package/dist/__tests__/sandbox.test.d.ts +1 -0
  28. package/dist/__tests__/sandbox.test.js +78 -0
  29. package/dist/__tests__/sandbox.test.js.map +1 -0
  30. package/dist/__tests__/security.test.d.ts +1 -0
  31. package/dist/__tests__/security.test.js +86 -0
  32. package/dist/__tests__/security.test.js.map +1 -0
  33. package/dist/__tests__/streaming.test.d.ts +1 -0
  34. package/dist/__tests__/streaming.test.js +71 -0
  35. package/dist/__tests__/streaming.test.js.map +1 -0
  36. package/dist/__tests__/toolRouter.test.d.ts +1 -0
  37. package/dist/__tests__/toolRouter.test.js +37 -0
  38. package/dist/__tests__/toolRouter.test.js.map +1 -0
  39. package/dist/__tests__/tools.test.d.ts +1 -0
  40. package/dist/__tests__/tools.test.js +112 -0
  41. package/dist/__tests__/tools.test.js.map +1 -0
  42. package/dist/__tests__/tracing.test.d.ts +1 -0
  43. package/dist/__tests__/tracing.test.js +147 -0
  44. package/dist/__tests__/tracing.test.js.map +1 -0
  45. package/dist/cli/config.d.ts +49 -0
  46. package/dist/cli/config.js +86 -0
  47. package/dist/cli/config.js.map +1 -0
  48. package/dist/cli/index.d.ts +2 -0
  49. package/dist/cli/index.js +625 -0
  50. package/dist/cli/index.js.map +1 -0
  51. package/dist/cli/modelFactory.d.ts +9 -0
  52. package/dist/cli/modelFactory.js +154 -0
  53. package/dist/cli/modelFactory.js.map +1 -0
  54. package/dist/cli/providers.d.ts +18 -0
  55. package/dist/cli/providers.js +94 -0
  56. package/dist/cli/providers.js.map +1 -0
  57. package/dist/core/agentLoop.d.ts +43 -0
  58. package/dist/core/agentLoop.js +245 -0
  59. package/dist/core/agentLoop.js.map +1 -0
  60. package/dist/core/errors.d.ts +62 -0
  61. package/dist/core/errors.js +139 -0
  62. package/dist/core/errors.js.map +1 -0
  63. package/dist/core/promptBuilder.d.ts +49 -0
  64. package/dist/core/promptBuilder.js +84 -0
  65. package/dist/core/promptBuilder.js.map +1 -0
  66. package/dist/core/reasoningRouter.d.ts +62 -0
  67. package/dist/core/reasoningRouter.js +102 -0
  68. package/dist/core/reasoningRouter.js.map +1 -0
  69. package/dist/core/retry.d.ts +25 -0
  70. package/dist/core/retry.js +49 -0
  71. package/dist/core/retry.js.map +1 -0
  72. package/dist/core/sessionResumer.d.ts +17 -0
  73. package/dist/core/sessionResumer.js +78 -0
  74. package/dist/core/sessionResumer.js.map +1 -0
  75. package/dist/core/sessionStore.d.ts +45 -0
  76. package/dist/core/sessionStore.js +167 -0
  77. package/dist/core/sessionStore.js.map +1 -0
  78. package/dist/core/tokenCounter.d.ts +17 -0
  79. package/dist/core/tokenCounter.js +54 -0
  80. package/dist/core/tokenCounter.js.map +1 -0
  81. package/dist/evals/dataset.d.ts +4 -0
  82. package/dist/evals/dataset.js +61 -0
  83. package/dist/evals/dataset.js.map +1 -0
  84. package/dist/evals/evaluator.d.ts +21 -0
  85. package/dist/evals/evaluator.js +68 -0
  86. package/dist/evals/evaluator.js.map +1 -0
  87. package/dist/hitl/bridge.d.ts +65 -0
  88. package/dist/hitl/bridge.js +120 -0
  89. package/dist/hitl/bridge.js.map +1 -0
  90. package/dist/middleware/commandSanitizer.d.ts +18 -0
  91. package/dist/middleware/commandSanitizer.js +50 -0
  92. package/dist/middleware/commandSanitizer.js.map +1 -0
  93. package/dist/middleware/loopDetection.d.ts +28 -0
  94. package/dist/middleware/loopDetection.js +49 -0
  95. package/dist/middleware/loopDetection.js.map +1 -0
  96. package/dist/middleware/permission.d.ts +17 -0
  97. package/dist/middleware/permission.js +59 -0
  98. package/dist/middleware/permission.js.map +1 -0
  99. package/dist/middleware/pipeline.d.ts +31 -0
  100. package/dist/middleware/pipeline.js +62 -0
  101. package/dist/middleware/pipeline.js.map +1 -0
  102. package/dist/middleware/preCompletion.d.ts +29 -0
  103. package/dist/middleware/preCompletion.js +82 -0
  104. package/dist/middleware/preCompletion.js.map +1 -0
  105. package/dist/middleware/types.d.ts +40 -0
  106. package/dist/middleware/types.js +8 -0
  107. package/dist/middleware/types.js.map +1 -0
  108. package/dist/sandbox/bootstrap.d.ts +38 -0
  109. package/dist/sandbox/bootstrap.js +107 -0
  110. package/dist/sandbox/bootstrap.js.map +1 -0
  111. package/dist/sandbox/manager.d.ts +72 -0
  112. package/dist/sandbox/manager.js +180 -0
  113. package/dist/sandbox/manager.js.map +1 -0
  114. package/dist/sandbox/sync.d.ts +55 -0
  115. package/dist/sandbox/sync.js +135 -0
  116. package/dist/sandbox/sync.js.map +1 -0
  117. package/dist/skills/loader.d.ts +55 -0
  118. package/dist/skills/loader.js +132 -0
  119. package/dist/skills/loader.js.map +1 -0
  120. package/dist/skills/tools.d.ts +5 -0
  121. package/dist/skills/tools.js +78 -0
  122. package/dist/skills/tools.js.map +1 -0
  123. package/dist/skills/types.d.ts +13 -0
  124. package/dist/skills/types.js +2 -0
  125. package/dist/skills/types.js.map +1 -0
  126. package/dist/test_cache.d.ts +1 -0
  127. package/dist/test_cache.js +55 -0
  128. package/dist/test_cache.js.map +1 -0
  129. package/dist/test_google.js +93 -0
  130. package/dist/tools/askUser.d.ts +10 -0
  131. package/dist/tools/askUser.js +42 -0
  132. package/dist/tools/askUser.js.map +1 -0
  133. package/dist/tools/browser.d.ts +19 -0
  134. package/dist/tools/browser.js +111 -0
  135. package/dist/tools/browser.js.map +1 -0
  136. package/dist/tools/index.d.ts +27 -0
  137. package/dist/tools/index.js +184 -0
  138. package/dist/tools/index.js.map +1 -0
  139. package/dist/tools/registry.d.ts +31 -0
  140. package/dist/tools/registry.js +168 -0
  141. package/dist/tools/registry.js.map +1 -0
  142. package/dist/tools/router.d.ts +34 -0
  143. package/dist/tools/router.js +73 -0
  144. package/dist/tools/router.js.map +1 -0
  145. package/dist/tools/security.d.ts +28 -0
  146. package/dist/tools/security.js +183 -0
  147. package/dist/tools/security.js.map +1 -0
  148. package/dist/tools/webSearch.d.ts +6 -0
  149. package/dist/tools/webSearch.js +120 -0
  150. package/dist/tools/webSearch.js.map +1 -0
  151. package/dist/tracing/analyzer.d.ts +58 -0
  152. package/dist/tracing/analyzer.js +190 -0
  153. package/dist/tracing/analyzer.js.map +1 -0
  154. package/dist/tracing/langsmith.d.ts +38 -0
  155. package/dist/tracing/langsmith.js +50 -0
  156. package/dist/tracing/langsmith.js.map +1 -0
  157. package/dist/tracing/sessionTracer.d.ts +73 -0
  158. package/dist/tracing/sessionTracer.js +157 -0
  159. package/dist/tracing/sessionTracer.js.map +1 -0
  160. package/dist/tracing/types.d.ts +46 -0
  161. package/dist/tracing/types.js +5 -0
  162. package/dist/tracing/types.js.map +1 -0
  163. package/dist/ui/App.d.ts +24 -0
  164. package/dist/ui/App.js +172 -0
  165. package/dist/ui/App.js.map +1 -0
  166. package/dist/ui/components/HITLPrompt.d.ts +15 -0
  167. package/dist/ui/components/HITLPrompt.js +35 -0
  168. package/dist/ui/components/HITLPrompt.js.map +1 -0
  169. package/dist/ui/components/Header.d.ts +8 -0
  170. package/dist/ui/components/Header.js +6 -0
  171. package/dist/ui/components/Header.js.map +1 -0
  172. package/dist/ui/components/MessageBubble.d.ts +13 -0
  173. package/dist/ui/components/MessageBubble.js +17 -0
  174. package/dist/ui/components/MessageBubble.js.map +1 -0
  175. package/dist/ui/components/StatusBar.d.ts +21 -0
  176. package/dist/ui/components/StatusBar.js +34 -0
  177. package/dist/ui/components/StatusBar.js.map +1 -0
  178. package/dist/ui/components/StreamingText.d.ts +13 -0
  179. package/dist/ui/components/StreamingText.js +24 -0
  180. package/dist/ui/components/StreamingText.js.map +1 -0
  181. package/dist/ui/components/ToolCallPanel.d.ts +15 -0
  182. package/dist/ui/components/ToolCallPanel.js +18 -0
  183. package/dist/ui/components/ToolCallPanel.js.map +1 -0
  184. package/docs/01_insights_and_patterns.md +27 -0
  185. package/docs/02_edge_cases_and_mitigations.md +143 -0
  186. package/docs/03_initial_implementation_plan.md +66 -0
  187. package/docs/04_tech_stack_proposal.md +20 -0
  188. package/docs/05_prd.md +87 -0
  189. package/docs/06_user_stories.md +72 -0
  190. package/docs/07_system_architecture.md +138 -0
  191. package/docs/08_roadmap.md +200 -0
  192. package/e2b/Dockerfile +26 -0
  193. package/package.json +57 -0
  194. package/src/__tests__/bootstrap.test.ts +111 -0
  195. package/src/__tests__/config.test.ts +97 -0
  196. package/src/__tests__/m55.test.ts +238 -0
  197. package/src/__tests__/middleware.test.ts +219 -0
  198. package/src/__tests__/modelFactory.test.ts +63 -0
  199. package/src/__tests__/optimizations.test.ts +201 -0
  200. package/src/__tests__/promptBuilder.test.ts +141 -0
  201. package/src/__tests__/sandbox.test.ts +102 -0
  202. package/src/__tests__/security.test.ts +122 -0
  203. package/src/__tests__/streaming.test.ts +82 -0
  204. package/src/__tests__/toolRouter.test.ts +52 -0
  205. package/src/__tests__/tools.test.ts +146 -0
  206. package/src/__tests__/tracing.test.ts +196 -0
  207. package/src/agents/agentRegistry.ts +69 -0
  208. package/src/agents/agentSpec.ts +67 -0
  209. package/src/agents/builtinAgents.ts +142 -0
  210. package/src/cli/config.ts +124 -0
  211. package/src/cli/index.ts +730 -0
  212. package/src/cli/modelFactory.ts +174 -0
  213. package/src/cli/providers.ts +107 -0
  214. package/src/commands/builtinCommands.ts +293 -0
  215. package/src/commands/commandRegistry.ts +194 -0
  216. package/src/core/agentLoop.d.ts.map +1 -0
  217. package/src/core/agentLoop.ts +312 -0
  218. package/src/core/autoSave.ts +95 -0
  219. package/src/core/compactor.ts +252 -0
  220. package/src/core/contextGuard.ts +129 -0
  221. package/src/core/errors.ts +202 -0
  222. package/src/core/promptBuilder.d.ts.map +1 -0
  223. package/src/core/promptBuilder.ts +139 -0
  224. package/src/core/reasoningRouter.ts +121 -0
  225. package/src/core/retry.ts +75 -0
  226. package/src/core/sessionResumer.ts +90 -0
  227. package/src/core/sessionStore.ts +215 -0
  228. package/src/core/subAgent.ts +339 -0
  229. package/src/core/tokenCounter.ts +64 -0
  230. package/src/evals/dataset.ts +67 -0
  231. package/src/evals/evaluator.ts +81 -0
  232. package/src/hitl/bridge.ts +160 -0
  233. package/src/middleware/commandSanitizer.ts +60 -0
  234. package/src/middleware/loopDetection.ts +63 -0
  235. package/src/middleware/permission.ts +72 -0
  236. package/src/middleware/pipeline.ts +75 -0
  237. package/src/middleware/preCompletion.ts +94 -0
  238. package/src/middleware/types.ts +45 -0
  239. package/src/sandbox/bootstrap.ts +121 -0
  240. package/src/sandbox/manager.ts +239 -0
  241. package/src/sandbox/sync.ts +157 -0
  242. package/src/skills/loader.ts +143 -0
  243. package/src/skills/tools.ts +99 -0
  244. package/src/skills/types.ts +13 -0
  245. package/src/test_cache.ts +72 -0
  246. package/src/test_google.js +40 -0
  247. package/src/test_google.ts +40 -0
  248. package/src/tools/askUser.ts +47 -0
  249. package/src/tools/browser.ts +137 -0
  250. package/src/tools/index.d.ts.map +1 -0
  251. package/src/tools/index.ts +237 -0
  252. package/src/tools/registry.ts +198 -0
  253. package/src/tools/router.ts +78 -0
  254. package/src/tools/security.ts +220 -0
  255. package/src/tools/spawnAgent.ts +158 -0
  256. package/src/tools/webSearch.ts +142 -0
  257. package/src/tracing/analyzer.ts +265 -0
  258. package/src/tracing/langsmith.ts +63 -0
  259. package/src/tracing/sessionTracer.ts +202 -0
  260. package/src/tracing/types.ts +49 -0
  261. package/src/types/valyu.d.ts +37 -0
  262. package/src/ui/App.tsx +404 -0
  263. package/src/ui/components/HITLPrompt.tsx +119 -0
  264. package/src/ui/components/Header.tsx +51 -0
  265. package/src/ui/components/MessageBubble.tsx +46 -0
  266. package/src/ui/components/StatusBar.tsx +138 -0
  267. package/src/ui/components/StreamingText.tsx +48 -0
  268. package/src/ui/components/ToolCallPanel.tsx +80 -0
  269. package/tests/commands/commands.test.ts +356 -0
  270. package/tests/core/compactor.test.ts +217 -0
  271. package/tests/core/retryAndErrors.test.ts +164 -0
  272. package/tests/core/sessionResumer.test.ts +95 -0
  273. package/tests/core/sessionStore.test.ts +84 -0
  274. package/tests/core/stability.test.ts +165 -0
  275. package/tests/core/subAgent.test.ts +238 -0
  276. package/tests/hitl/hitlBridge.test.ts +115 -0
  277. package/tsconfig.json +16 -0
  278. package/vitest.config.ts +10 -0
  279. package/vitest.out +48 -0
@@ -0,0 +1,72 @@
1
+ # User Stories
2
+
3
+ This document contains the foundational user stories for the Joone agent, organized by Epic. It does not include exhaustive acceptance criteria, but rather serves as a high-level requirements tracker for the core features.
4
+
5
+ ## Epic 1: CLI & Configuration
6
+
7
+ - **US 1.1**: As a user, I want to install joone globally via `npm i -g joone` and run it with `joone` in any project directory.
8
+ - **US 1.2**: As a user, I want to select my preferred LLM provider and model on first run or via `joone config`, choosing from at least 9 providers (Anthropic, OpenAI, Google, Mistral, Groq, DeepSeek, Fireworks, Together AI, Ollama).
9
+ - **US 1.3**: As a user, I want my API key collected via masked interactive input during `joone config`, so I never have to manually create `.env` files.
10
+ - **US 1.4**: As a user, I want my preferences stored at `~/.joone/config.json` with restrictive file permissions, so I don't re-enter them every session.
11
+ - **US 1.5**: As a user, I want the CLI to tell me which provider package to install if it's missing (e.g., `Run: npm install @langchain/groq`).
12
+ - **US 1.6** _(Planned)_: As a security-conscious user, I want to choose during onboarding whether to store my API key in a plain config file, OS Keychain, or encrypted config.
13
+
14
+ ## Epic 2: Streaming & Output
15
+
16
+ - **US 2.1**: As a user, I want to see the agent's response stream token-by-token in my terminal, not wait for the entire response to finish.
17
+ - **US 2.2**: As the system, I want to buffer tool call JSON during streaming until the full call is received, then execute it.
18
+ - **US 2.3**: As a user, I want the option to disable streaming via `joone config` or a CLI flag (`--no-stream`).
19
+
20
+ ## Epic 3: The Context & Prompt Layer
21
+
22
+ - **US 3.1**: As a developer, I want the system prompt to be strictly divided into static and dynamic sections, so that I maximize prompt caching and reduce costs.
23
+ - **US 3.2**: As the system, I need to inject state updates (like time or file changes) into the conversation history as simulated messages (`<system-reminder>`), so I avoid invalidating the static prefix cache.
24
+ - **US 3.3**: As the system, when the context window reaches 90% capacity, I want to execute a cache-safe compaction that summarizes early history while keeping the system prompt matching the parent thread.
25
+
26
+ ## Epic 4: Hybrid Sandbox Execution
27
+
28
+ - **US 4.1**: As a user, I want `write_file` and `read_file` to operate on my host filesystem, so I can see the agent's code changes in my IDE in real-time.
29
+ - **US 4.4**: As the system, I want to create a new E2B sandbox at the start of each agent session and destroy it when the session ends or times out, so that each session has a clean isolated environment and resources are properly released.
30
+ - **US 4.5**: As a developer, I want the tool router to automatically determine whether a tool runs on the host or in the sandbox based on tool type.
31
+
32
+ ## Epic 5: Tooling & Lazy Loading
33
+
34
+ - **US 5.1**: As an agent, I want access to core tools (`read_file`, `write_file`, `run_bash_command`) defined statically at the beginning of the session.
35
+ - **US 5.2**: As an agent, I want to use a "Search Tools" endpoint to learn about complex or specific tools, rather than having all 50+ tool schemas loaded simultaneously into my context window.
36
+ - **US 5.3**: As a developer, I want guardrails on `read_file` so the agent cannot accidentally load a 10MB file into the context window and blind itself.
37
+
38
+ ## Epic 6: Middleware Guards & Execution Loops
39
+
40
+ - **US 6.1**: As a developer, I want a `LoopDetectionMiddleware` that counts how many consecutive times an agent has failed a specific action.
41
+ - **US 6.2**: As an agent stuck in a loop, I want the system to interrupt me and tell me to reconsider my approach, so I don't waste tokens repeating a failure.
42
+ - **US 6.3**: As an agent trying to finish a task, I want a `PreCompletionMiddleware` to ask me if I have run tests. If I haven't, it should block completion and ask me to run verifications.
43
+ - **US 6.4**: As the system, I want to parse test exit codes; if a test fails (`exit 1`), I want to block the agent from declaring the task "Done" unless a max retry limit is reached.
44
+
45
+ - **US 7.1**: As an operator, I want every agent decision, tool call, and token metric logged to a standard trace format so I can monitor cache hit rates.
46
+ - **US 7.2**: As an operator, I want a script that can read failed traces and use an LLM to automatically summarize _why_ the agent failed tasks, allowing me to refine the harness.
47
+
48
+ ## Epic 8: TUI Slash Commands (M11)
49
+
50
+ - **US 8.1**: As a user, I want to type `/help` or `/?` to see a list of all available commands without making an LLM call.
51
+ - **US 8.2**: As a user, I want to switch models mid-session securely by typing `/model <name>`.
52
+ - **US 8.3**: As a user with a bloated history context, I want to type `/compact` to manually force a context summarization.
53
+ - **US 8.4**: As an error-prone user, if I type `/cls` instead of `/clear`, I want the UI to suggest `/clear` via Levenshtein distance grouping instead of sending garbage tokens to the API.
54
+
55
+ ## Epic 9: LLM-Powered Compaction (M12)
56
+
57
+ - **US 9.1**: As an agent managing a huge conversation history, I want to delegate summarization of my older messages to an LLM, so the resulting summary is precise, preserving file paths and tool outcomes perfectly.
58
+ - **US 9.2**: As the system, I want to automatically select a cheaper, faster LLM model (like `gpt-4o-mini` instead of `gpt-4o`) to perform the background compaction, saving the user money.
59
+ - **US 9.3**: As a resumed agent, I want a seamless Handoff Prompt injected directly beneath the compaction summary, so I instantly understand my persona and context haven't broken.
60
+
61
+ ## Epic 10: Sub-Agent Orchestration (M13)
62
+
63
+ - **US 10.1**: As the main reasoning agent, I want the ability to spawn named "sub-agents" to handle specialized tasks (e.g., executing scripts, analyzing directories) so I don't clutter my own context overhead.
64
+ - **US 10.2**: As the main agent, I want to spawn certain sub-agents asynchronously, allowing me to continue reasoning or writing files while the sub-agent scans tests in the background.
65
+ - **US 10.3**: As an orchestrator, I want hard limitations (a Depth-1 limit) that strictly prevent a sub-agent from accidentally spawning another sub-agent ad infinitum.
66
+
67
+ ## Epic 11: Stability & Reliability (M14)
68
+
69
+ - **US 11.1**: As the core engine, I want a proactive `ContextGuard` that estimates API token payloads before sending the request to the provider, automatically triggering compaction at 80% usage.
70
+ - **US 11.2**: As the core engine, I want an absolute Emergency Truncation trap door at 95% capacity to prevent immediate process death when compaction isn't fast enough.
71
+ - **US 11.3**: As a user working on a long-running complex task, I want the `AutoSave` feature to quietly save my `.jsonl` session file atomically in the background every few turns.
72
+ - **US 11.4**: As a user, when I hit `Ctrl+C` in my terminal, I want the CLI to intercept the shutdown signal, force a final instantaneous save, and clean up the sandbox before exiting.
@@ -0,0 +1,138 @@
1
+ # System Architecture
2
+
3
+ ## High-Level Architecture Overview
4
+
5
+ The system operates as a CLI-based REPL (Read-Eval-Print Loop) Agent Wrapper. The user runs `joone` in their project directory. The LLM is nested within an "Execution Harness" that mediates all inputs, actions, and memory. Responses are **streamed** token-by-token.
6
+
7
+ ### Hybrid Sandbox Model
8
+
9
+ Joone uses a **Hybrid** architecture for safety and developer experience:
10
+
11
+ - **File operations** (`write_file`, `read_file`) run on the **host machine**, so the user sees changes in real-time in their IDE.
12
+ - **Code execution** (`bash`, `npm test`, scripts) runs inside an **E2B sandboxed microVM**, protecting the host from destructive commands.
13
+ - A **File Sync** layer mirrors changed files from host → sandbox before each execution.
14
+
15
+ ```
16
+ ┌─────────────────────────┐ ┌──────────────────────────┐
17
+ │ HOST MACHINE │ sync │ E2B SANDBOX │
18
+ │ │ ───────► │ │
19
+ │ write_file ──► disk │ │ /workspace/ (mirror) │
20
+ │ read_file ◄── disk │ │ │
21
+ │ │ │ bash, npm test, scripts │
22
+ │ User sees changes │ │ run here (isolated) │
23
+ │ live in their IDE │ │ │
24
+ └─────────────────────────┘ └──────────────────────────┘
25
+ ```
26
+
27
+ ## System Diagram
28
+
29
+ ```mermaid
30
+ graph TD
31
+ Client["User CLI (joone)"] -->|Task Input| Config
32
+ Config["Config Manager (~/.joone/config.json)"] -->|Provider + Key| Factory
33
+ Factory[Model Factory] -->|BaseChatModel| MainLoop
34
+
35
+ subgraph Agent Execution Harness
36
+ MainLoop[Execution Engine]
37
+ State[Conversation State Manager]
38
+ PromptBuilder[Cache-Oriented Prompt Builder]
39
+ StreamHandler[Stream Handler]
40
+
41
+ State --> PromptBuilder
42
+ MainLoop --> PromptBuilder
43
+ PromptBuilder --> LLM((LLM API))
44
+ LLM -->|Streamed Chunks| StreamHandler
45
+ StreamHandler -->|Complete Tool Call| Middlewares
46
+ StreamHandler -->|Text Tokens| Terminal[Terminal Output]
47
+ end
48
+
49
+ subgraph Middleware Pipeline
50
+ Middlewares{Middleware Orchestrator}
51
+ LoopDet[Loop Detection]
52
+ PreComp[Pre-Completion Check]
53
+ Guard[File Size Guardrails]
54
+
55
+ Middlewares --> LoopDet
56
+ Middlewares --> PreComp
57
+ Middlewares --> Guard
58
+ end
59
+
60
+ subgraph "Tool Routing (Hybrid)"
61
+ Middlewares -->|Approved Tool Call| Router{Tool Router}
62
+ Router -->|"write_file, read_file"| HostFS["Host Filesystem (Node.js fs)"]
63
+ Router -->|"bash, test, install"| Sync[File Sync Layer]
64
+ Sync -->|Upload changed files| Sandbox["E2B MicroVM (Ubuntu)"]
65
+ Sandbox -->|stdout/stderr| MainLoop
66
+ HostFS -->|File content| MainLoop
67
+ end
68
+ ```
69
+
70
+ ## Component Breakdown
71
+
72
+ 1. **CLI & Config Layer** (`src/cli/`):
73
+ - `index.ts`: Parses user commands (`joone`, `joone config`) via Commander.js.
74
+ - `config.ts`: Reads/writes `~/.joone/config.json`. Stores provider, model, API key (plain text + `chmod 600`), streaming preference, and temperature.
75
+ - `modelFactory.ts`: Factory that dynamically imports the correct LangChain provider package and returns a `BaseChatModel`. Supports 9+ providers.
76
+
77
+ 2. **State Manager & Prompt Builder** (`src/core/promptBuilder.ts`):
78
+ - Maintains the "Prefix Match". Compiles the static system prompt, appends project variables once, and exclusively appends subsequent messages.
79
+
80
+ 3. **Execution Engine** (`src/core/agentLoop.ts`):
81
+ - Polls the LLM via `.stream()` (default) or `.invoke()`.
82
+ - The **Stream Handler** prints text tokens to stdout in real-time and buffers tool call JSON chunks until complete.
83
+ - Routes completed tool calls to the Middleware pipeline.
84
+
85
+ 4. **Middleware Orchestrator** (`src/middleware/`):
86
+ - Implements the Observer pattern over the `on_tool_call` and `on_submit` events.
87
+ - Operates on a structured `ToolResult` interface (`{ content, metadata, isError }`) to robustly pass execution metadata (like process exit codes) through the pipeline without brittle string parsing.
88
+ - Can _intercept_ or _modify_ a tool request before it hits the tools.
89
+ - Can _inject_ `<system-reminder>` messages back to the Execution Engine.
90
+
91
+ 5. **Tool Router & Hybrid Execution**:
92
+ - **Host tools** (`write_file`, `read_file`): Execute directly on the host via Node.js `fs`. Changes appear instantly in the user's IDE.
93
+ - **Sandbox tools** (`bash`, `run_tests`, `install_deps`): Route through the File Sync layer → E2B sandbox.
94
+ - The split is determined by tool type, not configuration.
95
+
96
+ 6. **File Sync Layer** (`src/sandbox/sync.ts`):
97
+ - Tracks which files have changed on the host since the last sandbox sync.
98
+ - Before each sandbox execution, uploads only the changed files to the sandbox's `/workspace/` directory.
99
+ - Strategies: **upload-on-execute** (default) or **watch & mirror** (future).
100
+
101
+ 7. **E2B Sandbox** (`src/sandbox/`):
102
+ - Each agent session initializes an E2B cloud sandbox via the `e2b` TypeScript SDK.
103
+ - All bash commands and code execution run via `sandbox.commands.run()`.
104
+ - The sandbox is destroyed on session end or timeout.
105
+ - The host machine is **never** exposed to agent-executed commands.
106
+
107
+ ## Tool Routing Table
108
+
109
+ | Tool | Runs On | Why |
110
+ | ---------------------- | ----------- | ----------------------------------------- |
111
+ | `write_file` | **Host** | User sees changes in IDE instantly |
112
+ | `read_file` | **Host** | Reads the real project files |
113
+ | `run_bash_command` | **Sandbox** | Protects host from destructive commands |
114
+ | `run_tests` | **Sandbox** | Tests may have side-effects |
115
+ | `install_dependencies` | **Sandbox** | npm install can execute arbitrary scripts |
116
+ | `search_tools` | **Host** | Registry lookup, no execution |
117
+
118
+ ## Supported LLM Providers
119
+
120
+ | Provider | Package | Dynamic Import |
121
+ | -------------- | ------------------------- | -------------------------- |
122
+ | Anthropic | `@langchain/anthropic` | `ChatAnthropic` |
123
+ | OpenAI | `@langchain/openai` | `ChatOpenAI` |
124
+ | Google | `@langchain/google-genai` | `ChatGoogleGenerativeAI` |
125
+ | Mistral | `@langchain/mistralai` | `ChatMistralAI` |
126
+ | Groq | `@langchain/groq` | `ChatGroq` |
127
+ | DeepSeek | OpenAI-compatible | `ChatOpenAI` with base URL |
128
+ | Fireworks | `@langchain/community` | `ChatFireworks` |
129
+ | Together AI | `@langchain/community` | `ChatTogetherAI` |
130
+ | Ollama (Local) | `@langchain/ollama` | `ChatOllama` |
131
+
132
+ ## Security Roadmap
133
+
134
+ | Tier | Method | Status |
135
+ | ---- | -------------------------- | -------------------- |
136
+ | 1 | Plain config + `chmod 600` | **Active (Default)** |
137
+ | 2 | OS Keychain (`keytar`) | Planned |
138
+ | 3 | AES-256 encrypted config | Planned |
@@ -0,0 +1,200 @@
1
+ # Implementation Roadmap
2
+
3
+ We will tackle this project moving from the foundation outward.
4
+
5
+ ## Milestone 1: The Foundation (Core Execution & Caching) ✅
6
+
7
+ **Goal:** Build a basic agent that successfully executes simple loops while maintaining a 100% cache prefix validity across turns.
8
+
9
+ 1. ~~**Setup Project**: Initialize the repository based on the chosen Tech Stack.~~
10
+ 2. ~~**The Prompt Builder Engine**: Build the class responsible for layering static instruction strings, tools, and message arrays cleanly.~~
11
+ 3. **Core Tooling**: Implement `bash_executor` and `file_reader` / `file_writer`.
12
+ 4. **Basic Event Loop**: Implement a while loop that queries the LLM and runs the exact tool.
13
+
14
+ ## Milestone 2: CLI Packaging & Provider Selection
15
+
16
+ **Goal:** Package joone as an installable CLI tool with dynamic LLM provider configuration, streaming output, and secure API key management.
17
+
18
+ ### 2a. Config Manager (`src/cli/config.ts`)
19
+
20
+ 1. **`JooneConfig` interface**: Define shape: `provider`, `model`, `apiKey`, `maxTokens`, `temperature`, `streaming`.
21
+ 2. **`loadConfig()`**: Reads `~/.joone/config.json`. Returns sensible defaults if file doesn't exist.
22
+ 3. **`saveConfig(config)`**: Writes JSON to `~/.joone/config.json`. Sets file permissions to `600` (owner-only).
23
+ 4. **Env var fallback**: If `apiKey` is missing from config, check `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, etc.
24
+
25
+ ### 2b. Model Factory (`src/cli/modelFactory.ts`)
26
+
27
+ 1. **`createModel(config)`**: Factory function that switches on `config.provider`.
28
+ 2. **Dynamic imports**: Uses `await import("@langchain/anthropic")` etc. to avoid bundling all providers.
29
+ 3. **Missing package detection**: If the import fails, print `"Provider X requires @langchain/X. Run: npm install @langchain/X"`.
30
+ 4. **API key validation**: Throws a descriptive error if the API key is missing for the selected provider.
31
+ 5. **Supported providers** (9+): Anthropic, OpenAI, Google, Mistral, Groq, DeepSeek, Fireworks, Together AI, Ollama.
32
+
33
+ ### 2c. CLI Entry Point (`src/cli/index.ts`)
34
+
35
+ 1. **Commander.js** for command parsing.
36
+ 2. **`joone` (default command)**: Loads config → creates model → starts execution harness REPL.
37
+ 3. **`joone config`**: Interactive prompts (via `@inquirer/prompts`) for provider, model, API key (masked), streaming toggle.
38
+ 4. **`package.json` `"bin"` field**: Maps `joone` → `./dist/cli/index.js`.
39
+
40
+ ### 2d. Streaming Support
41
+
42
+ 1. **`ExecutionHarness.streamStep()`**: New method using `this.llm.stream(messages)`.
43
+ 2. **Text chunks**: Printed to `process.stdout` in real-time.
44
+ 3. **Tool call chunks**: Buffered until the full tool call JSON is received, then executed via the middleware pipeline.
45
+ 4. **Config flag**: `streaming: true` (default). Disable via `joone config` or `--no-stream` flag.
46
+
47
+ ### 2e. Security Tiers (Phased)
48
+
49
+ 1. **Tier 1 (Now)**: Plain `config.json` + `chmod 600` + masked input.
50
+ 2. **Tier 2 (Planned)**: OS Keychain via `keytar` — user selects during onboarding.
51
+ 3. **Tier 3 (Planned)**: AES-256 encrypted config with machine-derived key — user selects during onboarding.
52
+
53
+ ### TDD Test Plan (Vertical Slices)
54
+
55
+ | # | RED Test | GREEN Implementation |
56
+ | --- | -------------------------------------------------------------------------- | ------------------------------------ |
57
+ | 1 | `loadConfig` returns defaults when no file exists | `loadConfig()` with default fallback |
58
+ | 2 | `saveConfig` writes JSON and `loadConfig` reads it back | `saveConfig()` implementation |
59
+ | 3 | `loadConfig` falls back to env var if `apiKey` is missing | Env var fallback logic |
60
+ | 4 | `createModel` returns `ChatAnthropic` when provider is `"anthropic"` | Factory Anthropic branch |
61
+ | 5 | `createModel` returns `ChatOpenAI` when provider is `"openai"` | Factory OpenAI branch |
62
+ | 6 | `createModel` throws descriptive error if API key missing | Key validation |
63
+ | 7 | `createModel` throws with install instructions if provider package missing | Dynamic import error handling |
64
+ | 8 | `streamStep` emits text chunks to provided callback | Stream handler implementation |
65
+ | 9 | `streamStep` buffers tool call JSON and returns complete `AIMessage` | Tool call buffering |
66
+
67
+ ---
68
+
69
+ ## Milestone 3: Hybrid Sandbox Integration
70
+
71
+ **Goal:** Route all agent code execution through isolated E2B cloud microVMs while keeping file I/O on the host for real-time IDE visibility.
72
+
73
+ ### 3a. E2B Sandbox Lifecycle (`src/sandbox/manager.ts`)
74
+
75
+ 1. **Install E2B SDK**: Add `e2b` to dependencies.
76
+ 2. **`SandboxManager`**: Class that creates/destroys an E2B sandbox per session.
77
+ 3. **Timeout & Cleanup**: Auto-destroy sandbox after configurable idle timeout.
78
+
79
+ ### 3b. File Sync Layer (`src/sandbox/sync.ts`)
80
+
81
+ 1. **Change Tracker**: Track which host files have been modified since last sync using file mtimes or a dirty set.
82
+ 2. **`syncToSandbox()`**: Upload only changed files to `/workspace/` in the sandbox before each execution.
83
+ 3. **Initial Sync**: On session start, upload the full project directory.
84
+
85
+ ### 3c. Tool Router (`src/tools/router.ts`)
86
+
87
+ 1. **Host tools**: `write_file`, `read_file` → execute via Node.js `fs` on the host.
88
+ 2. **Sandbox tools**: `bash`, `run_tests`, `install_deps` → sync files, then execute via `sandbox.commands.run()`.
89
+ 3. **Automatic routing**: Tool router determines target based on tool type.
90
+
91
+ ### 3d. Rewire Existing Tools
92
+
93
+ 1. **`BashTool`**: Remove stub, connect to `sandbox.commands.run()`.
94
+ 2. **`ReadFileTool`**: Keep on host, add size guardrail.
95
+ 3. **`WriteFileTool`**: Keep on host, mark file as dirty for next sync.
96
+
97
+ ### TDD Test Plan
98
+
99
+ | # | Test | Behavior |
100
+ | --- | ------------------------------------------------ | ----------------- |
101
+ | 1 | `SandboxManager.create()` initializes a sandbox | Session lifecycle |
102
+ | 2 | `SandboxManager.destroy()` cleans up the sandbox | Teardown |
103
+ | 3 | `syncToSandbox()` uploads dirty files | Change tracking |
104
+ | 4 | Tool router sends `write_file` to host | Host routing |
105
+ | 5 | Tool router sends `bash` to sandbox | Sandbox routing |
106
+
107
+ ---
108
+
109
+ ## Milestone 3.5: Security Scanning Tool
110
+
111
+ **Goal:** Give the agent the ability to scan code for security vulnerabilities using the Gemini CLI Security Extension, with a native LLM-powered fallback.
112
+
113
+ ### 3.5a. SecurityScanTool (`src/tools/security.ts`)
114
+
115
+ 1. **Gemini CLI path** (preferred): Shell out to `gemini -x security:analyze` via sandbox.
116
+ 2. **Native LLM fallback**: Use the agent's configured LLM with a security-focused prompt to analyze code diffs.
117
+ 3. Accepts `target`: `"changes"` | `"file"` | `"deps"`, optional `path`.
118
+
119
+ ### 3.5b. DepScanTool (`src/tools/depScan.ts`)
120
+
121
+ 1. **OSV-Scanner**: Run `osv-scanner --json .` in sandbox, parse JSON results.
122
+ 2. **Fallback**: Run `npm audit --json` if OSV-Scanner is not installed.
123
+
124
+ ### 3.5c. Tool Registration
125
+
126
+ 1. Add both tools to `CORE_TOOLS` in `tools/index.ts`.
127
+ 2. Add `security_scan` and `dep_scan` to `SANDBOX_TOOLS` in `tools/router.ts`.
128
+ 3. Add security scan stubs to `DeferredToolsDB` in `tools/registry.ts`.
129
+
130
+ ### TDD Test Plan
131
+
132
+ | # | Test | Behavior |
133
+ | --- | --------------------------------------------------------- | ----------- |
134
+ | 1 | SecurityScanTool returns report when Gemini CLI available | Shell path |
135
+ | 2 | SecurityScanTool falls back to LLM analysis | Native path |
136
+ | 3 | DepScanTool parses OSV-Scanner JSON output | Dep scan |
137
+ | 4 | DepScanTool falls back to `npm audit` | Fallback |
138
+ | 5 | ToolRouter routes both to sandbox | Routing |
139
+
140
+ ## Milestone 4: Harness Engineering & Middlewares
141
+
142
+ **Goal:** Make the agent resilient. Stop it from breaking itself.
143
+
144
+ 1. **Middleware Pipeline Pattern**: Implement a generic pre/post execution hook system for tool calls.
145
+ 2. **Build `LoopDetectionMiddleware`**: Track hashes or signatures of tool calls. Throw errors/warnings when duplicated explicitly.
146
+ 3. **Build `SafeguardMiddleware`**: Prevent massive file reads.
147
+ 4. **Build `PreCompletionMiddleware`**: Intercept task completion and require proof of verification (e.g. running tests).
148
+
149
+ ## Milestone 5: Advanced Optimizations
150
+
151
+ **Goal:** Scale the agent for complex workspaces and heavy memory.
152
+
153
+ 1. **Tool Lazy Loading**: Implement the "Tool Search" mechanism for dynamic capabilities.
154
+ 2. **Context Compaction**: Implement Cache-Safe Forking. When tokens hit 80% capacity, summarize earlier messages, retaining the static prefix format.
155
+ 3. **Reasoning Sandwich**: Implement dynamic logic routing. Allow the agent to use `high-reasoning` mode for planning, and drop to `medium-reasoning` for mechanical typing.
156
+
157
+ ## Milestone 5.5: Browser, Web Search & Skills
158
+
159
+ **Goal:** Give the agent internet access, browser interaction, and extensible skill loading.
160
+
161
+ ### 5.5a. Browser Tool (`agent-browser`)
162
+
163
+ - Wrap Vercel Labs' `agent-browser` CLI via sandbox shell calls
164
+ - Commands: `navigate`, `snapshot` (accessibility tree), `click`, `type`, `screenshot`, `scroll`
165
+ - Lazy-installed in dev, pre-baked in prod template
166
+
167
+ ### 5.5b. Web Search Tool (`@valyu/ai-sdk`)
168
+
169
+ - AI-native search via Valyu API (runs on Host)
170
+ - Sources: web, papers (arXiv/PubMed), finance, patents, SEC filings, companies
171
+ - API key stored in config (`valyuApiKey`)
172
+
173
+ ### 5.5c. Skills System
174
+
175
+ - Discovery paths: `./skills/`, `./.agents/skills/`, `~/.joone/skills/`, `~/.agents/skills/`
176
+ - SKILL.md format: YAML frontmatter (name, description) + markdown instructions
177
+ - Tools: `search_skills`, `load_skill` (injects into conversation as system-reminder)
178
+ - Project skills override user skills with same name
179
+
180
+ ## Milestone 6: Tracing & Refinement
181
+
182
+ **Goal:** Monitor performance and improve via feedback.
183
+
184
+ 1. **Integrate Tracing**: (LangSmith / LangFuse / OpenTelemetry) to track exact costs, cache hit rates, and execution paths.
185
+ 2. **Trace Analyzer Subagent**: Build the offline script that reads failed traces and outputs summaries for human harness engineers.
186
+
187
+ ## Milestone 7: Testing & Evaluations (TDD - Ongoing)
188
+
189
+ **Goal:** Ensure the context boundaries, middlewares, and tools function flawlessly before production. This milestone runs **in parallel** with all others via TDD.
190
+
191
+ 1. ~~**Setup Vitest**~~
192
+ 2. **Unit Testing (Red-Green-Refactor)**:
193
+ - ~~`CacheOptimizedPromptBuilder` (5/5 GREEN)~~
194
+ - `ConfigManager`: loadConfig, saveConfig, env fallback
195
+ - `ModelFactory`: provider switching, error handling
196
+ - `MiddlewarePipeline`: Loop detection, pre-completion interception
197
+ - `SandboxLifecycleManager`: create/destroy lifecycle hooks
198
+ 3. **E2E Evaluations (Evals)**:
199
+ - Hook LangSmith datasets up to the `ExecutionHarness` to run regression tests against known code tasks.
200
+ - Measure **Cache Hit Rate** assertions (e.g., Assert CacheHit > 90% over a 10-turn conversation).
package/e2b/Dockerfile ADDED
@@ -0,0 +1,26 @@
1
+ # joone-base: Pre-baked E2B sandbox template for production.
2
+ # All security and development tools are pre-installed for zero startup cost.
3
+ #
4
+ # Build command:
5
+ # e2b template create --name joone-base --dockerfile ./e2b/Dockerfile
6
+ #
7
+ # Usage in config (~/.joone/config.json):
8
+ # { "sandboxTemplate": "joone-base" }
9
+
10
+ FROM e2b/base
11
+
12
+ # Install Node.js tooling
13
+ RUN npm install -g @google/gemini-cli
14
+
15
+ # Install Gemini CLI security extension
16
+ RUN gemini extensions install https://github.com/gemini-cli-extensions/security
17
+
18
+ # Install OSV-Scanner for dependency vulnerability scanning
19
+ RUN curl -sSfL https://github.com/google/osv-scanner/releases/latest/download/osv-scanner_linux_amd64 \
20
+ -o /usr/local/bin/osv-scanner && \
21
+ chmod +x /usr/local/bin/osv-scanner
22
+
23
+ # Verify installations
24
+ RUN gemini --version && osv-scanner --version
25
+
26
+ WORKDIR /workspace
package/package.json ADDED
@@ -0,0 +1,57 @@
1
+ {
2
+ "name": "joonecli",
3
+ "version": "0.1.0",
4
+ "description": "An autonomous coding agent",
5
+ "main": "dist/cli/index.js",
6
+ "bin": {
7
+ "joone": "./dist/cli/index.js"
8
+ },
9
+ "directories": {
10
+ "doc": "docs"
11
+ },
12
+ "scripts": {
13
+ "build": "tsc",
14
+ "test": "vitest run",
15
+ "test:watch": "vitest"
16
+ },
17
+ "repository": {
18
+ "type": "git",
19
+ "url": "git+https://github.com/tuzzy08/joone.git"
20
+ },
21
+ "keywords": [],
22
+ "author": "",
23
+ "license": "ISC",
24
+ "type": "module",
25
+ "bugs": {
26
+ "url": "https://github.com/tuzzy08/joone/issues"
27
+ },
28
+ "homepage": "https://github.com/tuzzy08/joone#readme",
29
+ "devDependencies": {
30
+ "@types/cross-spawn": "^6.0.6",
31
+ "@types/node": "^25.3.0",
32
+ "@types/react": "^19.2.14",
33
+ "tsx": "^4.21.0",
34
+ "typescript": "^5.9.3",
35
+ "vitest": "^4.0.18"
36
+ },
37
+ "dependencies": {
38
+ "@alibaba-group/opensandbox": "^0.1.4",
39
+ "@alibaba-group/opensandbox-code-interpreter": "^0.1.3-dev1",
40
+ "@clack/prompts": "^1.0.1",
41
+ "@langchain/anthropic": "^1.3.19",
42
+ "@langchain/core": "^1.1.27",
43
+ "@langchain/openai": "^1.2.11",
44
+ "chalk": "^5.6.2",
45
+ "commander": "^14.0.3",
46
+ "cross-spawn": "^7.0.6",
47
+ "dotenv": "^17.3.1",
48
+ "e2b": "^2.13.0",
49
+ "ink": "^6.8.0",
50
+ "ink-spinner": "^5.0.0",
51
+ "ink-text-input": "^6.0.0",
52
+ "langchain": "^1.2.25",
53
+ "langsmith": "^0.5.7",
54
+ "react": "^19.2.4",
55
+ "zod": "^4.3.6"
56
+ }
57
+ }
@@ -0,0 +1,111 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+ import { LazyInstaller } from "../sandbox/bootstrap.js";
3
+ import { SandboxManager } from "../sandbox/manager.js";
4
+
5
+ // Mock SandboxManager
6
+ const createMockSandbox = () => ({
7
+ exec: vi.fn(),
8
+ isActive: vi.fn().mockReturnValue(true),
9
+ create: vi.fn(),
10
+ destroy: vi.fn(),
11
+ uploadFile: vi.fn(),
12
+ getSandbox: vi.fn(),
13
+ });
14
+
15
+ describe("LazyInstaller", () => {
16
+ let mockSandbox: ReturnType<typeof createMockSandbox>;
17
+
18
+ beforeEach(() => {
19
+ vi.clearAllMocks();
20
+ mockSandbox = createMockSandbox();
21
+ });
22
+
23
+ // ─── Test #34: Custom template skips all installs ───
24
+
25
+ it("skips installation when using a custom template", async () => {
26
+ const installer = new LazyInstaller(true);
27
+
28
+ expect(installer.isGeminiCliReady()).toBe(true);
29
+ expect(installer.isOsvScannerReady()).toBe(true);
30
+
31
+ // Should not call exec at all
32
+ const result = await installer.ensureGeminiCli(
33
+ mockSandbox as unknown as SandboxManager
34
+ );
35
+ expect(result).toBe(true);
36
+ expect(mockSandbox.exec).not.toHaveBeenCalled();
37
+ });
38
+
39
+ // ─── Test #35: Dev mode installs Gemini CLI on first use ───
40
+
41
+ it("installs Gemini CLI on first call in dev mode", async () => {
42
+ const installer = new LazyInstaller(false);
43
+
44
+ // First check fails (not installed), then install succeeds, then extension succeeds
45
+ mockSandbox.exec
46
+ .mockRejectedValueOnce(new Error("not found")) // version check
47
+ .mockResolvedValueOnce({ exitCode: 0, stdout: "installed", stderr: "" }) // npm install
48
+ .mockResolvedValueOnce({ exitCode: 0, stdout: "ok", stderr: "" }); // extension install
49
+
50
+ const result = await installer.ensureGeminiCli(
51
+ mockSandbox as unknown as SandboxManager
52
+ );
53
+
54
+ expect(result).toBe(true);
55
+ expect(installer.isGeminiCliReady()).toBe(true);
56
+ });
57
+
58
+ // ─── Test #36: Caches install state — second call is a no-op ───
59
+
60
+ it("does not re-install on second call (cached)", async () => {
61
+ const installer = new LazyInstaller(false);
62
+
63
+ // First: fails check, succeeds install + extension
64
+ mockSandbox.exec
65
+ .mockRejectedValueOnce(new Error("not found"))
66
+ .mockResolvedValueOnce({ exitCode: 0, stdout: "", stderr: "" })
67
+ .mockResolvedValueOnce({ exitCode: 0, stdout: "", stderr: "" });
68
+
69
+ await installer.ensureGeminiCli(mockSandbox as unknown as SandboxManager);
70
+ mockSandbox.exec.mockClear();
71
+
72
+ // Second call — should return immediately
73
+ const result = await installer.ensureGeminiCli(
74
+ mockSandbox as unknown as SandboxManager
75
+ );
76
+ expect(result).toBe(true);
77
+ expect(mockSandbox.exec).not.toHaveBeenCalled();
78
+ });
79
+
80
+ // ─── Test #37: Returns false if install fails ───
81
+
82
+ it("returns false if Gemini CLI installation fails", async () => {
83
+ const installer = new LazyInstaller(false);
84
+
85
+ mockSandbox.exec
86
+ .mockRejectedValueOnce(new Error("not found")) // version check
87
+ .mockResolvedValueOnce({ exitCode: 1, stdout: "", stderr: "error" }); // install fails
88
+
89
+ const result = await installer.ensureGeminiCli(
90
+ mockSandbox as unknown as SandboxManager
91
+ );
92
+ expect(result).toBe(false);
93
+ expect(installer.isGeminiCliReady()).toBe(false);
94
+ });
95
+
96
+ // ─── Test #38: OSV-Scanner install attempt ───
97
+
98
+ it("installs OSV-Scanner via curl when not available", async () => {
99
+ const installer = new LazyInstaller(false);
100
+
101
+ mockSandbox.exec
102
+ .mockRejectedValueOnce(new Error("not found")) // version check
103
+ .mockResolvedValueOnce({ exitCode: 0, stdout: "", stderr: "" }); // curl install
104
+
105
+ const result = await installer.ensureOsvScanner(
106
+ mockSandbox as unknown as SandboxManager
107
+ );
108
+ expect(result).toBe(true);
109
+ expect(installer.isOsvScannerReady()).toBe(true);
110
+ });
111
+ });