joonecli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/AGENTS.md +56 -0
  2. package/Handover.md +115 -0
  3. package/LICENSE +201 -0
  4. package/PROGRESS.md +160 -0
  5. package/README.md +114 -0
  6. package/dist/__tests__/bootstrap.test.d.ts +1 -0
  7. package/dist/__tests__/bootstrap.test.js +76 -0
  8. package/dist/__tests__/bootstrap.test.js.map +1 -0
  9. package/dist/__tests__/config.test.d.ts +1 -0
  10. package/dist/__tests__/config.test.js +84 -0
  11. package/dist/__tests__/config.test.js.map +1 -0
  12. package/dist/__tests__/m55.test.d.ts +1 -0
  13. package/dist/__tests__/m55.test.js +160 -0
  14. package/dist/__tests__/m55.test.js.map +1 -0
  15. package/dist/__tests__/middleware.test.d.ts +1 -0
  16. package/dist/__tests__/middleware.test.js +169 -0
  17. package/dist/__tests__/middleware.test.js.map +1 -0
  18. package/dist/__tests__/modelFactory.test.d.ts +1 -0
  19. package/dist/__tests__/modelFactory.test.js +50 -0
  20. package/dist/__tests__/modelFactory.test.js.map +1 -0
  21. package/dist/__tests__/optimizations.test.d.ts +1 -0
  22. package/dist/__tests__/optimizations.test.js +136 -0
  23. package/dist/__tests__/optimizations.test.js.map +1 -0
  24. package/dist/__tests__/promptBuilder.test.d.ts +1 -0
  25. package/dist/__tests__/promptBuilder.test.js +108 -0
  26. package/dist/__tests__/promptBuilder.test.js.map +1 -0
  27. package/dist/__tests__/sandbox.test.d.ts +1 -0
  28. package/dist/__tests__/sandbox.test.js +78 -0
  29. package/dist/__tests__/sandbox.test.js.map +1 -0
  30. package/dist/__tests__/security.test.d.ts +1 -0
  31. package/dist/__tests__/security.test.js +86 -0
  32. package/dist/__tests__/security.test.js.map +1 -0
  33. package/dist/__tests__/streaming.test.d.ts +1 -0
  34. package/dist/__tests__/streaming.test.js +71 -0
  35. package/dist/__tests__/streaming.test.js.map +1 -0
  36. package/dist/__tests__/toolRouter.test.d.ts +1 -0
  37. package/dist/__tests__/toolRouter.test.js +37 -0
  38. package/dist/__tests__/toolRouter.test.js.map +1 -0
  39. package/dist/__tests__/tools.test.d.ts +1 -0
  40. package/dist/__tests__/tools.test.js +112 -0
  41. package/dist/__tests__/tools.test.js.map +1 -0
  42. package/dist/__tests__/tracing.test.d.ts +1 -0
  43. package/dist/__tests__/tracing.test.js +147 -0
  44. package/dist/__tests__/tracing.test.js.map +1 -0
  45. package/dist/cli/config.d.ts +49 -0
  46. package/dist/cli/config.js +86 -0
  47. package/dist/cli/config.js.map +1 -0
  48. package/dist/cli/index.d.ts +2 -0
  49. package/dist/cli/index.js +625 -0
  50. package/dist/cli/index.js.map +1 -0
  51. package/dist/cli/modelFactory.d.ts +9 -0
  52. package/dist/cli/modelFactory.js +154 -0
  53. package/dist/cli/modelFactory.js.map +1 -0
  54. package/dist/cli/providers.d.ts +18 -0
  55. package/dist/cli/providers.js +94 -0
  56. package/dist/cli/providers.js.map +1 -0
  57. package/dist/core/agentLoop.d.ts +43 -0
  58. package/dist/core/agentLoop.js +245 -0
  59. package/dist/core/agentLoop.js.map +1 -0
  60. package/dist/core/errors.d.ts +62 -0
  61. package/dist/core/errors.js +139 -0
  62. package/dist/core/errors.js.map +1 -0
  63. package/dist/core/promptBuilder.d.ts +49 -0
  64. package/dist/core/promptBuilder.js +84 -0
  65. package/dist/core/promptBuilder.js.map +1 -0
  66. package/dist/core/reasoningRouter.d.ts +62 -0
  67. package/dist/core/reasoningRouter.js +102 -0
  68. package/dist/core/reasoningRouter.js.map +1 -0
  69. package/dist/core/retry.d.ts +25 -0
  70. package/dist/core/retry.js +49 -0
  71. package/dist/core/retry.js.map +1 -0
  72. package/dist/core/sessionResumer.d.ts +17 -0
  73. package/dist/core/sessionResumer.js +78 -0
  74. package/dist/core/sessionResumer.js.map +1 -0
  75. package/dist/core/sessionStore.d.ts +45 -0
  76. package/dist/core/sessionStore.js +167 -0
  77. package/dist/core/sessionStore.js.map +1 -0
  78. package/dist/core/tokenCounter.d.ts +17 -0
  79. package/dist/core/tokenCounter.js +54 -0
  80. package/dist/core/tokenCounter.js.map +1 -0
  81. package/dist/evals/dataset.d.ts +4 -0
  82. package/dist/evals/dataset.js +61 -0
  83. package/dist/evals/dataset.js.map +1 -0
  84. package/dist/evals/evaluator.d.ts +21 -0
  85. package/dist/evals/evaluator.js +68 -0
  86. package/dist/evals/evaluator.js.map +1 -0
  87. package/dist/hitl/bridge.d.ts +65 -0
  88. package/dist/hitl/bridge.js +120 -0
  89. package/dist/hitl/bridge.js.map +1 -0
  90. package/dist/middleware/commandSanitizer.d.ts +18 -0
  91. package/dist/middleware/commandSanitizer.js +50 -0
  92. package/dist/middleware/commandSanitizer.js.map +1 -0
  93. package/dist/middleware/loopDetection.d.ts +28 -0
  94. package/dist/middleware/loopDetection.js +49 -0
  95. package/dist/middleware/loopDetection.js.map +1 -0
  96. package/dist/middleware/permission.d.ts +17 -0
  97. package/dist/middleware/permission.js +59 -0
  98. package/dist/middleware/permission.js.map +1 -0
  99. package/dist/middleware/pipeline.d.ts +31 -0
  100. package/dist/middleware/pipeline.js +62 -0
  101. package/dist/middleware/pipeline.js.map +1 -0
  102. package/dist/middleware/preCompletion.d.ts +29 -0
  103. package/dist/middleware/preCompletion.js +82 -0
  104. package/dist/middleware/preCompletion.js.map +1 -0
  105. package/dist/middleware/types.d.ts +40 -0
  106. package/dist/middleware/types.js +8 -0
  107. package/dist/middleware/types.js.map +1 -0
  108. package/dist/sandbox/bootstrap.d.ts +38 -0
  109. package/dist/sandbox/bootstrap.js +107 -0
  110. package/dist/sandbox/bootstrap.js.map +1 -0
  111. package/dist/sandbox/manager.d.ts +72 -0
  112. package/dist/sandbox/manager.js +180 -0
  113. package/dist/sandbox/manager.js.map +1 -0
  114. package/dist/sandbox/sync.d.ts +55 -0
  115. package/dist/sandbox/sync.js +135 -0
  116. package/dist/sandbox/sync.js.map +1 -0
  117. package/dist/skills/loader.d.ts +55 -0
  118. package/dist/skills/loader.js +132 -0
  119. package/dist/skills/loader.js.map +1 -0
  120. package/dist/skills/tools.d.ts +5 -0
  121. package/dist/skills/tools.js +78 -0
  122. package/dist/skills/tools.js.map +1 -0
  123. package/dist/skills/types.d.ts +13 -0
  124. package/dist/skills/types.js +2 -0
  125. package/dist/skills/types.js.map +1 -0
  126. package/dist/test_cache.d.ts +1 -0
  127. package/dist/test_cache.js +55 -0
  128. package/dist/test_cache.js.map +1 -0
  129. package/dist/test_google.js +93 -0
  130. package/dist/tools/askUser.d.ts +10 -0
  131. package/dist/tools/askUser.js +42 -0
  132. package/dist/tools/askUser.js.map +1 -0
  133. package/dist/tools/browser.d.ts +19 -0
  134. package/dist/tools/browser.js +111 -0
  135. package/dist/tools/browser.js.map +1 -0
  136. package/dist/tools/index.d.ts +27 -0
  137. package/dist/tools/index.js +184 -0
  138. package/dist/tools/index.js.map +1 -0
  139. package/dist/tools/registry.d.ts +31 -0
  140. package/dist/tools/registry.js +168 -0
  141. package/dist/tools/registry.js.map +1 -0
  142. package/dist/tools/router.d.ts +34 -0
  143. package/dist/tools/router.js +73 -0
  144. package/dist/tools/router.js.map +1 -0
  145. package/dist/tools/security.d.ts +28 -0
  146. package/dist/tools/security.js +183 -0
  147. package/dist/tools/security.js.map +1 -0
  148. package/dist/tools/webSearch.d.ts +6 -0
  149. package/dist/tools/webSearch.js +120 -0
  150. package/dist/tools/webSearch.js.map +1 -0
  151. package/dist/tracing/analyzer.d.ts +58 -0
  152. package/dist/tracing/analyzer.js +190 -0
  153. package/dist/tracing/analyzer.js.map +1 -0
  154. package/dist/tracing/langsmith.d.ts +38 -0
  155. package/dist/tracing/langsmith.js +50 -0
  156. package/dist/tracing/langsmith.js.map +1 -0
  157. package/dist/tracing/sessionTracer.d.ts +73 -0
  158. package/dist/tracing/sessionTracer.js +157 -0
  159. package/dist/tracing/sessionTracer.js.map +1 -0
  160. package/dist/tracing/types.d.ts +46 -0
  161. package/dist/tracing/types.js +5 -0
  162. package/dist/tracing/types.js.map +1 -0
  163. package/dist/ui/App.d.ts +24 -0
  164. package/dist/ui/App.js +172 -0
  165. package/dist/ui/App.js.map +1 -0
  166. package/dist/ui/components/HITLPrompt.d.ts +15 -0
  167. package/dist/ui/components/HITLPrompt.js +35 -0
  168. package/dist/ui/components/HITLPrompt.js.map +1 -0
  169. package/dist/ui/components/Header.d.ts +8 -0
  170. package/dist/ui/components/Header.js +6 -0
  171. package/dist/ui/components/Header.js.map +1 -0
  172. package/dist/ui/components/MessageBubble.d.ts +13 -0
  173. package/dist/ui/components/MessageBubble.js +17 -0
  174. package/dist/ui/components/MessageBubble.js.map +1 -0
  175. package/dist/ui/components/StatusBar.d.ts +21 -0
  176. package/dist/ui/components/StatusBar.js +34 -0
  177. package/dist/ui/components/StatusBar.js.map +1 -0
  178. package/dist/ui/components/StreamingText.d.ts +13 -0
  179. package/dist/ui/components/StreamingText.js +24 -0
  180. package/dist/ui/components/StreamingText.js.map +1 -0
  181. package/dist/ui/components/ToolCallPanel.d.ts +15 -0
  182. package/dist/ui/components/ToolCallPanel.js +18 -0
  183. package/dist/ui/components/ToolCallPanel.js.map +1 -0
  184. package/docs/01_insights_and_patterns.md +27 -0
  185. package/docs/02_edge_cases_and_mitigations.md +143 -0
  186. package/docs/03_initial_implementation_plan.md +66 -0
  187. package/docs/04_tech_stack_proposal.md +20 -0
  188. package/docs/05_prd.md +87 -0
  189. package/docs/06_user_stories.md +72 -0
  190. package/docs/07_system_architecture.md +138 -0
  191. package/docs/08_roadmap.md +200 -0
  192. package/e2b/Dockerfile +26 -0
  193. package/package.json +57 -0
  194. package/src/__tests__/bootstrap.test.ts +111 -0
  195. package/src/__tests__/config.test.ts +97 -0
  196. package/src/__tests__/m55.test.ts +238 -0
  197. package/src/__tests__/middleware.test.ts +219 -0
  198. package/src/__tests__/modelFactory.test.ts +63 -0
  199. package/src/__tests__/optimizations.test.ts +201 -0
  200. package/src/__tests__/promptBuilder.test.ts +141 -0
  201. package/src/__tests__/sandbox.test.ts +102 -0
  202. package/src/__tests__/security.test.ts +122 -0
  203. package/src/__tests__/streaming.test.ts +82 -0
  204. package/src/__tests__/toolRouter.test.ts +52 -0
  205. package/src/__tests__/tools.test.ts +146 -0
  206. package/src/__tests__/tracing.test.ts +196 -0
  207. package/src/agents/agentRegistry.ts +69 -0
  208. package/src/agents/agentSpec.ts +67 -0
  209. package/src/agents/builtinAgents.ts +142 -0
  210. package/src/cli/config.ts +124 -0
  211. package/src/cli/index.ts +730 -0
  212. package/src/cli/modelFactory.ts +174 -0
  213. package/src/cli/providers.ts +107 -0
  214. package/src/commands/builtinCommands.ts +293 -0
  215. package/src/commands/commandRegistry.ts +194 -0
  216. package/src/core/agentLoop.d.ts.map +1 -0
  217. package/src/core/agentLoop.ts +312 -0
  218. package/src/core/autoSave.ts +95 -0
  219. package/src/core/compactor.ts +252 -0
  220. package/src/core/contextGuard.ts +129 -0
  221. package/src/core/errors.ts +202 -0
  222. package/src/core/promptBuilder.d.ts.map +1 -0
  223. package/src/core/promptBuilder.ts +139 -0
  224. package/src/core/reasoningRouter.ts +121 -0
  225. package/src/core/retry.ts +75 -0
  226. package/src/core/sessionResumer.ts +90 -0
  227. package/src/core/sessionStore.ts +215 -0
  228. package/src/core/subAgent.ts +339 -0
  229. package/src/core/tokenCounter.ts +64 -0
  230. package/src/evals/dataset.ts +67 -0
  231. package/src/evals/evaluator.ts +81 -0
  232. package/src/hitl/bridge.ts +160 -0
  233. package/src/middleware/commandSanitizer.ts +60 -0
  234. package/src/middleware/loopDetection.ts +63 -0
  235. package/src/middleware/permission.ts +72 -0
  236. package/src/middleware/pipeline.ts +75 -0
  237. package/src/middleware/preCompletion.ts +94 -0
  238. package/src/middleware/types.ts +45 -0
  239. package/src/sandbox/bootstrap.ts +121 -0
  240. package/src/sandbox/manager.ts +239 -0
  241. package/src/sandbox/sync.ts +157 -0
  242. package/src/skills/loader.ts +143 -0
  243. package/src/skills/tools.ts +99 -0
  244. package/src/skills/types.ts +13 -0
  245. package/src/test_cache.ts +72 -0
  246. package/src/test_google.js +40 -0
  247. package/src/test_google.ts +40 -0
  248. package/src/tools/askUser.ts +47 -0
  249. package/src/tools/browser.ts +137 -0
  250. package/src/tools/index.d.ts.map +1 -0
  251. package/src/tools/index.ts +237 -0
  252. package/src/tools/registry.ts +198 -0
  253. package/src/tools/router.ts +78 -0
  254. package/src/tools/security.ts +220 -0
  255. package/src/tools/spawnAgent.ts +158 -0
  256. package/src/tools/webSearch.ts +142 -0
  257. package/src/tracing/analyzer.ts +265 -0
  258. package/src/tracing/langsmith.ts +63 -0
  259. package/src/tracing/sessionTracer.ts +202 -0
  260. package/src/tracing/types.ts +49 -0
  261. package/src/types/valyu.d.ts +37 -0
  262. package/src/ui/App.tsx +404 -0
  263. package/src/ui/components/HITLPrompt.tsx +119 -0
  264. package/src/ui/components/Header.tsx +51 -0
  265. package/src/ui/components/MessageBubble.tsx +46 -0
  266. package/src/ui/components/StatusBar.tsx +138 -0
  267. package/src/ui/components/StreamingText.tsx +48 -0
  268. package/src/ui/components/ToolCallPanel.tsx +80 -0
  269. package/tests/commands/commands.test.ts +356 -0
  270. package/tests/core/compactor.test.ts +217 -0
  271. package/tests/core/retryAndErrors.test.ts +164 -0
  272. package/tests/core/sessionResumer.test.ts +95 -0
  273. package/tests/core/sessionStore.test.ts +84 -0
  274. package/tests/core/stability.test.ts +165 -0
  275. package/tests/core/subAgent.test.ts +238 -0
  276. package/tests/hitl/hitlBridge.test.ts +115 -0
  277. package/tsconfig.json +16 -0
  278. package/vitest.config.ts +10 -0
  279. package/vitest.out +48 -0
@@ -0,0 +1,160 @@
1
+ import { EventEmitter } from "node:events";
2
+
3
+ export interface HITLQuestion {
4
+ /** Unique ID for this question. */
5
+ id: string;
6
+ /** The question text to display to the user. */
7
+ question: string;
8
+ /** Optional predefined answer choices. */
9
+ options?: string[];
10
+ /** Timestamp when the question was posed. */
11
+ createdAt: number;
12
+ }
13
+
14
+ export interface HITLPermissionRequest {
15
+ /** Unique ID for this request. */
16
+ id: string;
17
+ /** The tool requesting permission. */
18
+ toolName: string;
19
+ /** The arguments the tool was called with. */
20
+ args: Record<string, unknown>;
21
+ /** Timestamp when the request was created. */
22
+ createdAt: number;
23
+ }
24
+
25
+ /**
26
+ * HITLBridge — Human-in-the-Loop communication bridge.
27
+ *
28
+ * Provides a typed event-based interface between the tool execution layer
29
+ * and the TUI rendering layer. When a tool needs user input, it emits
30
+ * a question event and awaits the response. The TUI listens, renders
31
+ * the prompt, and resolves the answer.
32
+ *
33
+ * Singleton pattern: one bridge per session.
34
+ */
35
+ export class HITLBridge extends EventEmitter {
36
+ private static instance: HITLBridge | null = null;
37
+ private pendingResolvers = new Map<string, (answer: string) => void>();
38
+ private timeoutMs: number;
39
+ private questionCounter = 0;
40
+
41
+ constructor(timeoutMs: number = 5 * 60 * 1000) {
42
+ super();
43
+ this.timeoutMs = timeoutMs;
44
+ }
45
+
46
+ static getInstance(timeoutMs?: number): HITLBridge {
47
+ if (!HITLBridge.instance) {
48
+ HITLBridge.instance = new HITLBridge(timeoutMs);
49
+ }
50
+ return HITLBridge.instance;
51
+ }
52
+
53
+ static resetInstance(): void {
54
+ HITLBridge.instance = null;
55
+ }
56
+
57
+ /**
58
+ * Called by a tool to ask the user a free-form question.
59
+ * Blocks until the user responds (or times out).
60
+ *
61
+ * @returns The user's answer as a string.
62
+ */
63
+ async askUser(question: string, options?: string[]): Promise<string> {
64
+ const id = `hitl-q-${++this.questionCounter}-${Date.now()}`;
65
+
66
+ const payload: HITLQuestion = {
67
+ id,
68
+ question,
69
+ options,
70
+ createdAt: Date.now(),
71
+ };
72
+
73
+ return new Promise<string>((resolve, reject) => {
74
+ this.pendingResolvers.set(id, resolve);
75
+
76
+ // Emit the question so the TUI can render it
77
+ this.emit("question", payload);
78
+
79
+ // Timeout: auto-reject if user doesn't respond
80
+ const timer = setTimeout(() => {
81
+ if (this.pendingResolvers.has(id)) {
82
+ this.pendingResolvers.delete(id);
83
+ resolve("[No response — the user did not answer within the timeout period.]");
84
+ }
85
+ }, this.timeoutMs);
86
+
87
+ // Clean up timer if resolved before timeout
88
+ const originalResolve = this.pendingResolvers.get(id)!;
89
+ this.pendingResolvers.set(id, (answer: string) => {
90
+ clearTimeout(timer);
91
+ originalResolve(answer);
92
+ });
93
+ });
94
+ }
95
+
96
+ /**
97
+ * Called by the PermissionMiddleware to request tool execution approval.
98
+ * Blocks until the user responds [y/n] (or times out with denial).
99
+ *
100
+ * @returns true if approved, false if denied or timed out.
101
+ */
102
+ async requestPermission(toolName: string, args: Record<string, unknown>): Promise<boolean> {
103
+ const id = `hitl-perm-${++this.questionCounter}-${Date.now()}`;
104
+
105
+ const payload: HITLPermissionRequest = {
106
+ id,
107
+ toolName,
108
+ args,
109
+ createdAt: Date.now(),
110
+ };
111
+
112
+ return new Promise<boolean>((resolve) => {
113
+ const wrappedResolve = (answer: string) => {
114
+ const normalized = answer.trim().toLowerCase();
115
+ resolve(normalized === "y" || normalized === "yes" || normalized === "approve");
116
+ };
117
+
118
+ this.pendingResolvers.set(id, wrappedResolve as any);
119
+
120
+ // Emit so the TUI can render the permission prompt
121
+ this.emit("permission", payload);
122
+
123
+ // Timeout: auto-deny
124
+ const timer = setTimeout(() => {
125
+ if (this.pendingResolvers.has(id)) {
126
+ this.pendingResolvers.delete(id);
127
+ resolve(false); // Denied by timeout
128
+ }
129
+ }, this.timeoutMs);
130
+
131
+ // Clean up timer on resolve
132
+ const current = this.pendingResolvers.get(id)!;
133
+ this.pendingResolvers.set(id, (answer: string) => {
134
+ clearTimeout(timer);
135
+ (current as any)(answer);
136
+ });
137
+ });
138
+ }
139
+
140
+ /**
141
+ * Called by the TUI when the user submits an answer.
142
+ *
143
+ * @param id - The question/permission request ID.
144
+ * @param answer - The user's text response.
145
+ */
146
+ resolveAnswer(id: string, answer: string): void {
147
+ const resolver = this.pendingResolvers.get(id);
148
+ if (resolver) {
149
+ this.pendingResolvers.delete(id);
150
+ resolver(answer);
151
+ }
152
+ }
153
+
154
+ /**
155
+ * Returns true if there is an outstanding question awaiting an answer.
156
+ */
157
+ hasPendingQuestion(): boolean {
158
+ return this.pendingResolvers.size > 0;
159
+ }
160
+ }
@@ -0,0 +1,60 @@
1
+ import { ToolCallContext, ToolMiddleware } from "./types.js";
2
+
3
+ /**
4
+ * Intercepts bash tool calls to block dangerous or interactive commands.
5
+ *
6
+ * Categories of blocked commands:
7
+ * 1. Destructive: `rm -rf /`, `mkfs`, `dd if=`, fork bombs
8
+ * 2. Interactive/hanging: `vim`, `nano`, `less`, `top`, `htop`, `man`
9
+ * 3. Network abuse: `curl | sh`, `wget | bash`
10
+ */
11
+ export class CommandSanitizerMiddleware implements ToolMiddleware {
12
+ readonly name = "CommandSanitizer";
13
+
14
+ /**
15
+ * Patterns that will cause a command to be blocked.
16
+ * Each entry is [regex, human-readable reason].
17
+ */
18
+ private readonly blockedPatterns: [RegExp, string][] = [
19
+ // Destructive
20
+ [/rm\s+(-\w*r\w*f\w*|-\w*f\w*r\w*)\s+\/(\*)?(?:\s|$)/, "destructive: rm -rf /"],
21
+ [/mkfs\b/, "destructive: filesystem format"],
22
+ [/\bdd\s+.*of=\/dev\//, "destructive: raw disk write"],
23
+ [/chmod\s+(-\w+\s+)*777\s+\//, "dangerous: chmod 777 on root"],
24
+
25
+ // Interactive / hanging
26
+ [/\b(vim|vi|nano|emacs|pico)\b/, "interactive: text editor (hangs the sandbox)"],
27
+ [/\b(less|more)\b/, "interactive: pager (hangs the sandbox)"],
28
+ [/\b(top|htop|glances)\b/, "interactive: process monitor (hangs the sandbox)"],
29
+ [/\bman\s+\w+/, "interactive: man page (hangs the sandbox)"],
30
+
31
+ // Network abuse: pipe-to-shell
32
+ [/curl\s+.*\|\s*(sh|bash|zsh)/, "unsafe: pipe remote script to shell"],
33
+ [/wget\s+.*\|\s*(sh|bash|zsh)/, "unsafe: pipe remote script to shell"],
34
+ ];
35
+
36
+ before(ctx: ToolCallContext): ToolCallContext | string {
37
+ // Only applies to bash/shell tool calls
38
+ if (ctx.toolName !== "bash") {
39
+ return ctx;
40
+ }
41
+
42
+ const command = ctx.args.command;
43
+ if (typeof command !== "string") {
44
+ return ctx;
45
+ }
46
+
47
+ for (const [pattern, reason] of this.blockedPatterns) {
48
+ if (pattern.test(command)) {
49
+ return (
50
+ `⚠ Blocked: Command rejected by sanitizer.\n` +
51
+ `Reason: ${reason}\n` +
52
+ `Command: ${command}\n` +
53
+ `Use a safer alternative or refine your approach.`
54
+ );
55
+ }
56
+ }
57
+
58
+ return ctx;
59
+ }
60
+ }
@@ -0,0 +1,63 @@
1
+ import { ToolCallContext, ToolMiddleware } from "./types.js";
2
+
3
+ /**
4
+ * Prevents the "Blind Retry" doom loop.
5
+ *
6
+ * Tracks a rolling window of recent tool call signatures. If the same
7
+ * tool + args combination appears N times consecutively, the call is
8
+ * rejected with an instruction to try a different approach.
9
+ *
10
+ * Reference: docs/02_edge_cases_and_mitigations.md — "The Blind Retry Doom Loop"
11
+ */
12
+ export class LoopDetectionMiddleware implements ToolMiddleware {
13
+ readonly name = "LoopDetection";
14
+
15
+ private history: string[] = [];
16
+ private readonly threshold: number;
17
+
18
+ /**
19
+ * @param threshold - Number of identical consecutive calls before blocking (default: 3).
20
+ */
21
+ constructor(threshold = 3) {
22
+ this.threshold = threshold;
23
+ }
24
+
25
+ /**
26
+ * Creates a signature string for a tool call (name + sorted args JSON).
27
+ */
28
+ private signature(ctx: ToolCallContext): string {
29
+ return `${ctx.toolName}:${JSON.stringify(ctx.args, Object.keys(ctx.args).sort())}`;
30
+ }
31
+
32
+ before(ctx: ToolCallContext): ToolCallContext | string {
33
+ const sig = this.signature(ctx);
34
+
35
+ this.history.push(sig);
36
+
37
+ // Keep only the last N entries to avoid unbounded growth
38
+ if (this.history.length > this.threshold * 2) {
39
+ this.history = this.history.slice(-this.threshold * 2);
40
+ }
41
+
42
+ // Check if the last `threshold` entries are all identical
43
+ const tail = this.history.slice(-this.threshold);
44
+ if (
45
+ tail.length >= this.threshold &&
46
+ tail.every((s) => s === sig)
47
+ ) {
48
+ return (
49
+ `⚠ Loop detected: You have called "${ctx.toolName}" with identical arguments ` +
50
+ `${this.threshold} times consecutively. Stop this approach and try a different strategy.`
51
+ );
52
+ }
53
+
54
+ return ctx;
55
+ }
56
+
57
+ /**
58
+ * Resets the history. Useful for testing or session boundaries.
59
+ */
60
+ reset(): void {
61
+ this.history = [];
62
+ }
63
+ }
@@ -0,0 +1,72 @@
1
+ import { ToolCallContext, ToolMiddleware } from "./types.js";
2
+ import { HITLBridge } from "../hitl/bridge.js";
3
+ import { ToolResult } from "../tools/index.js";
4
+
5
+ export type PermissionMode = "auto" | "ask_dangerous" | "ask_all";
6
+
7
+ /** Tools that are always safe and never need user approval. */
8
+ const SAFE_TOOLS = new Set([
9
+ "read_file",
10
+ "view_file_outline",
11
+ "search_skills",
12
+ "load_skill",
13
+ "search_tools",
14
+ "ask_user_question", // Meta: the ask tool itself is always safe
15
+ ]);
16
+
17
+ /** Tools that perform destructive or side-effect-heavy operations. */
18
+ const DANGEROUS_TOOLS = new Set([
19
+ "bash",
20
+ "write_file",
21
+ "replace_file_content",
22
+ "multi_replace_file_content",
23
+ "install_deps",
24
+ ]);
25
+
26
+ /**
27
+ * PermissionMiddleware — gates dangerous tool calls behind user approval.
28
+ *
29
+ * Behavior per mode:
30
+ * - `auto`: All tools execute without asking. (Default for power users.)
31
+ * - `ask_dangerous`: Only tools in DANGEROUS_TOOLS require approval.
32
+ * - `ask_all`: Every tool except SAFE_TOOLS requires approval.
33
+ */
34
+ export class PermissionMiddleware implements ToolMiddleware {
35
+ name = "PermissionMiddleware";
36
+ private mode: PermissionMode;
37
+
38
+ constructor(mode: PermissionMode = "auto") {
39
+ this.mode = mode;
40
+ }
41
+
42
+ async before(ctx: ToolCallContext): Promise<ToolCallContext | string | void> {
43
+ if (this.mode === "auto") return ctx;
44
+
45
+ const toolName = ctx.toolName;
46
+ const needsApproval = this.requiresApproval(toolName);
47
+
48
+ if (!needsApproval) return ctx;
49
+
50
+ const bridge = HITLBridge.getInstance();
51
+ const approved = await bridge.requestPermission(toolName, ctx.args);
52
+
53
+ if (!approved) {
54
+ // Short-circuit: return a string to deny the tool call
55
+ return (
56
+ `Permission denied: The user declined to approve the execution of "${toolName}". ` +
57
+ `Try an alternative approach or ask the user for guidance using the ask_user_question tool.`
58
+ );
59
+ }
60
+
61
+ return ctx;
62
+ }
63
+
64
+ private requiresApproval(toolName: string): boolean {
65
+ if (SAFE_TOOLS.has(toolName)) return false;
66
+
67
+ if (this.mode === "ask_all") return true;
68
+ if (this.mode === "ask_dangerous") return DANGEROUS_TOOLS.has(toolName);
69
+
70
+ return false;
71
+ }
72
+ }
@@ -0,0 +1,75 @@
1
+ import { ToolCallContext, ToolMiddleware } from "./types.js";
2
+ import { ToolResult } from "../tools/index.js";
3
+
4
+ /**
5
+ * Executes tool calls through a chain of middleware hooks.
6
+ *
7
+ * Execution flow:
8
+ * 1. Run all `before()` hooks in registration order.
9
+ * - If any returns a string → short-circuit (tool is NOT executed).
10
+ * 2. Execute the actual tool function.
11
+ * 3. Run all `after()` hooks in reverse registration order.
12
+ * - Each can transform the result before it enters conversation history.
13
+ */
14
+ export class MiddlewarePipeline {
15
+ private middlewares: ToolMiddleware[] = [];
16
+
17
+ /**
18
+ * Register a middleware. Middlewares run in the order they are added.
19
+ */
20
+ use(middleware: ToolMiddleware): void {
21
+ this.middlewares.push(middleware);
22
+ }
23
+
24
+ /**
25
+ * Returns the number of registered middlewares.
26
+ */
27
+ get length(): number {
28
+ return this.middlewares.length;
29
+ }
30
+
31
+ /**
32
+ * Execute a tool call through the middleware pipeline.
33
+ *
34
+ * @param ctx - The tool call context (name, args, callId).
35
+ * @param executeFn - The actual tool execution function.
36
+ * @returns The final result string (possibly transformed by after-hooks).
37
+ */
38
+ async run(
39
+ ctx: ToolCallContext,
40
+ executeFn: (ctx: ToolCallContext) => Promise<ToolResult> | ToolResult
41
+ ): Promise<string> {
42
+ // ── Before phase: run hooks in order ──
43
+ let currentCtx = ctx;
44
+
45
+ for (const mw of this.middlewares) {
46
+ if (mw.before) {
47
+ const result = await mw.before(currentCtx);
48
+
49
+ if (typeof result === "string") {
50
+ // Short-circuit: middleware rejected the call
51
+ return result;
52
+ }
53
+
54
+ if (result !== undefined) {
55
+ currentCtx = result;
56
+ }
57
+ }
58
+ }
59
+
60
+ // ── Execute the tool ──
61
+ let output: ToolResult = await executeFn(currentCtx);
62
+
63
+ // ── After phase: run hooks in reverse order ──
64
+ for (let i = this.middlewares.length - 1; i >= 0; i--) {
65
+ const mw = this.middlewares[i];
66
+ if (mw.after) {
67
+ const transformed = await mw.after(currentCtx, output);
68
+ if (transformed !== undefined) {
69
+ output = transformed;
70
+ }
71
+ } }
72
+
73
+ return output.content;
74
+ }
75
+ }
@@ -0,0 +1,94 @@
1
+ import { ToolCallContext, ToolMiddleware } from "./types.js";
2
+ import { ToolResult } from "../tools/index.js";
3
+
4
+ /**
5
+ * Prevents the agent from marking a task as "done" without running tests.
6
+ *
7
+ * Tracks whether any test command has been executed during the session.
8
+ * If the agent attempts to signal completion without running tests first,
9
+ * the middleware intercepts and forces verification.
10
+ *
11
+ * Reference: docs/02_edge_cases_and_mitigations.md — "The Fake Success Verification"
12
+ */
13
+ export class PreCompletionMiddleware implements ToolMiddleware {
14
+ readonly name = "PreCompletion";
15
+
16
+ private testsPassed = false;
17
+
18
+ /** Patterns in bash commands that count as "running tests". */
19
+ private readonly testPatterns = [
20
+ /\bvitest\b/,
21
+ /\bjest\b/,
22
+ /\bpytest\b/,
23
+ /\bmocha\b/,
24
+ /\bnpm\s+test\b/,
25
+ /\bnpm\s+run\s+test\b/,
26
+ /\byarn\s+test\b/,
27
+ /\bpnpm\s+test\b/,
28
+ /\bgo\s+test\b/,
29
+ /\bcargo\s+test\b/,
30
+ ];
31
+
32
+ /** Tool names that signal the agent is trying to complete the task. */
33
+ private readonly completionSignals = new Set([
34
+ "task_complete",
35
+ "attempt_completion",
36
+ "finish_task",
37
+ "submit_result",
38
+ ]);
39
+
40
+ before(ctx: ToolCallContext): ToolCallContext | string {
41
+ // When a test command is initiated, assume it hasn't passed yet
42
+ if (ctx.toolName === "bash" && typeof ctx.args.command === "string") {
43
+ for (const pattern of this.testPatterns) {
44
+ if (pattern.test(ctx.args.command)) {
45
+ this.testsPassed = false;
46
+ break;
47
+ }
48
+ }
49
+ }
50
+
51
+ // Intercept completion attempts
52
+ if (this.completionSignals.has(ctx.toolName)) {
53
+ if (!this.testsPassed) {
54
+ return (
55
+ "⚠ You must run tests before completing the task, AND they must pass.\n" +
56
+ "Use the bash tool to execute your test suite (e.g., `npm test`, `vitest`, `pytest`).\n" +
57
+ "If tests fail, fix the issues. Once tests pass cleanly, you may attempt completion again."
58
+ );
59
+ }
60
+ }
61
+
62
+ return ctx;
63
+ }
64
+
65
+ after(ctx: ToolCallContext, result: ToolResult): void {
66
+ if (ctx.toolName === "bash" && typeof ctx.args.command === "string") {
67
+ for (const pattern of this.testPatterns) {
68
+ if (pattern.test(ctx.args.command)) {
69
+ // Robustly check the exact exit code from the tool metadata
70
+ if (result.metadata?.exitCode === 0) {
71
+ this.testsPassed = true;
72
+ } else {
73
+ this.testsPassed = false;
74
+ }
75
+ break;
76
+ }
77
+ }
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Returns whether tests have been run and passed in this session.
83
+ */
84
+ hasPassedTests(): boolean {
85
+ return this.testsPassed;
86
+ }
87
+
88
+ /**
89
+ * Resets state. Useful for testing or session boundaries.
90
+ */
91
+ reset(): void {
92
+ this.testsPassed = false;
93
+ }
94
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Middleware types for the tool execution pipeline.
3
+ *
4
+ * Each middleware can hook into both the "before" and "after" phases
5
+ * of a tool call. The pipeline chains them in order.
6
+ */
7
+
8
+ import { ToolResult } from "../tools/index.js";
9
+
10
+ /**
11
+ * Context object passed through the middleware chain for each tool call.
12
+ */
13
+ export interface ToolCallContext {
14
+ /** Name of the tool being called (e.g., "bash", "read_file"). */
15
+ toolName: string;
16
+ /** Arguments passed to the tool. */
17
+ args: Record<string, any>;
18
+ /** Unique ID of this tool call (from the LLM response). */
19
+ callId: string;
20
+ }
21
+
22
+ /**
23
+ * A middleware that can intercept tool calls before and after execution.
24
+ *
25
+ * - `before()`: Runs before the tool executes. Return the (possibly modified)
26
+ * context to continue, or a `string` to short-circuit with an error/warning.
27
+ * - `after()`: Runs after the tool executes. Can transform the result before
28
+ * it enters the conversation history.
29
+ */
30
+ export interface ToolMiddleware {
31
+ /** Human-readable name for logging and debugging. */
32
+ name: string;
33
+
34
+ /**
35
+ * Pre-execution hook.
36
+ * @returns ToolCallContext to mutate, a string to short-circuit, or void to pass through unmodified.
37
+ */
38
+ before?(ctx: ToolCallContext): Promise<ToolCallContext | string | void> | ToolCallContext | string | void;
39
+
40
+ /**
41
+ * Post-execution hook.
42
+ * @returns The transformed tool result object, or void to pass through unmodified.
43
+ */
44
+ after?(ctx: ToolCallContext, result: ToolResult): Promise<ToolResult | void> | ToolResult | void;
45
+ }
@@ -0,0 +1,121 @@
1
+ import { SandboxManager } from "./manager.js";
2
+
3
+ /**
4
+ * LazyInstaller handles on-demand tool installation inside the sandbox.
5
+ *
6
+ * - In DEV mode (no custom template), tools are installed lazily on first use.
7
+ * - In PROD mode (custom template like "joone-base"), tools are pre-baked —
8
+ * the installer detects this and skips installation.
9
+ *
10
+ * Install state is cached per session so each tool is installed at most once.
11
+ */
12
+ export class LazyInstaller {
13
+ private geminiCliInstalled = false;
14
+ private osvScannerInstalled = false;
15
+ private readonly usingCustomTemplate: boolean;
16
+
17
+ constructor(usingCustomTemplate: boolean) {
18
+ this.usingCustomTemplate = usingCustomTemplate;
19
+
20
+ // If using a custom template, assume all tools are pre-installed
21
+ if (usingCustomTemplate) {
22
+ this.geminiCliInstalled = true;
23
+ this.osvScannerInstalled = true;
24
+ }
25
+ }
26
+
27
+ /**
28
+ * Ensures Gemini CLI + security extension are available in the sandbox.
29
+ * Installs them if needed (dev mode). No-op if using a custom template.
30
+ *
31
+ * @returns true if Gemini CLI is now available.
32
+ */
33
+ async ensureGeminiCli(sandbox: SandboxManager): Promise<boolean> {
34
+ if (this.geminiCliInstalled) return true;
35
+
36
+ try {
37
+ // Check if already installed
38
+ const check = await sandbox.exec("gemini --version");
39
+ if (check.exitCode === 0) {
40
+ this.geminiCliInstalled = true;
41
+ return true;
42
+ }
43
+ } catch {
44
+ // Not installed — proceed to install
45
+ }
46
+
47
+ try {
48
+ // Install Gemini CLI globally
49
+ const install = await sandbox.exec(
50
+ "npm install -g @google/gemini-cli 2>&1"
51
+ );
52
+ if (install.exitCode !== 0) {
53
+ return false;
54
+ }
55
+
56
+ // Install security extension
57
+ const ext = await sandbox.exec(
58
+ "gemini extensions install https://github.com/gemini-cli-extensions/security 2>&1"
59
+ );
60
+ if (ext.exitCode !== 0) {
61
+ // CLI installed but extension failed — still partially useful
62
+ this.geminiCliInstalled = true;
63
+ return true;
64
+ }
65
+
66
+ this.geminiCliInstalled = true;
67
+ return true;
68
+ } catch {
69
+ return false;
70
+ }
71
+ }
72
+
73
+ /**
74
+ * Ensures OSV-Scanner is available in the sandbox.
75
+ * Falls back gracefully — callers should use npm audit if this returns false.
76
+ *
77
+ * @returns true if osv-scanner is now available.
78
+ */
79
+ async ensureOsvScanner(sandbox: SandboxManager): Promise<boolean> {
80
+ if (this.osvScannerInstalled) return true;
81
+
82
+ try {
83
+ const check = await sandbox.exec("osv-scanner --version");
84
+ if (check.exitCode === 0) {
85
+ this.osvScannerInstalled = true;
86
+ return true;
87
+ }
88
+ } catch {
89
+ // Not installed
90
+ }
91
+
92
+ try {
93
+ // Try to install via go or download binary
94
+ const install = await sandbox.exec(
95
+ "curl -sSfL https://github.com/google/osv-scanner/releases/latest/download/osv-scanner_linux_amd64 -o /usr/local/bin/osv-scanner && chmod +x /usr/local/bin/osv-scanner 2>&1"
96
+ );
97
+ if (install.exitCode === 0) {
98
+ this.osvScannerInstalled = true;
99
+ return true;
100
+ }
101
+ } catch {
102
+ // Install failed
103
+ }
104
+
105
+ return false;
106
+ }
107
+
108
+ /**
109
+ * Returns whether Gemini CLI is installed (cached state).
110
+ */
111
+ isGeminiCliReady(): boolean {
112
+ return this.geminiCliInstalled;
113
+ }
114
+
115
+ /**
116
+ * Returns whether OSV-Scanner is installed (cached state).
117
+ */
118
+ isOsvScannerReady(): boolean {
119
+ return this.osvScannerInstalled;
120
+ }
121
+ }