joonecli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/AGENTS.md +56 -0
  2. package/Handover.md +115 -0
  3. package/LICENSE +201 -0
  4. package/PROGRESS.md +160 -0
  5. package/README.md +114 -0
  6. package/dist/__tests__/bootstrap.test.d.ts +1 -0
  7. package/dist/__tests__/bootstrap.test.js +76 -0
  8. package/dist/__tests__/bootstrap.test.js.map +1 -0
  9. package/dist/__tests__/config.test.d.ts +1 -0
  10. package/dist/__tests__/config.test.js +84 -0
  11. package/dist/__tests__/config.test.js.map +1 -0
  12. package/dist/__tests__/m55.test.d.ts +1 -0
  13. package/dist/__tests__/m55.test.js +160 -0
  14. package/dist/__tests__/m55.test.js.map +1 -0
  15. package/dist/__tests__/middleware.test.d.ts +1 -0
  16. package/dist/__tests__/middleware.test.js +169 -0
  17. package/dist/__tests__/middleware.test.js.map +1 -0
  18. package/dist/__tests__/modelFactory.test.d.ts +1 -0
  19. package/dist/__tests__/modelFactory.test.js +50 -0
  20. package/dist/__tests__/modelFactory.test.js.map +1 -0
  21. package/dist/__tests__/optimizations.test.d.ts +1 -0
  22. package/dist/__tests__/optimizations.test.js +136 -0
  23. package/dist/__tests__/optimizations.test.js.map +1 -0
  24. package/dist/__tests__/promptBuilder.test.d.ts +1 -0
  25. package/dist/__tests__/promptBuilder.test.js +108 -0
  26. package/dist/__tests__/promptBuilder.test.js.map +1 -0
  27. package/dist/__tests__/sandbox.test.d.ts +1 -0
  28. package/dist/__tests__/sandbox.test.js +78 -0
  29. package/dist/__tests__/sandbox.test.js.map +1 -0
  30. package/dist/__tests__/security.test.d.ts +1 -0
  31. package/dist/__tests__/security.test.js +86 -0
  32. package/dist/__tests__/security.test.js.map +1 -0
  33. package/dist/__tests__/streaming.test.d.ts +1 -0
  34. package/dist/__tests__/streaming.test.js +71 -0
  35. package/dist/__tests__/streaming.test.js.map +1 -0
  36. package/dist/__tests__/toolRouter.test.d.ts +1 -0
  37. package/dist/__tests__/toolRouter.test.js +37 -0
  38. package/dist/__tests__/toolRouter.test.js.map +1 -0
  39. package/dist/__tests__/tools.test.d.ts +1 -0
  40. package/dist/__tests__/tools.test.js +112 -0
  41. package/dist/__tests__/tools.test.js.map +1 -0
  42. package/dist/__tests__/tracing.test.d.ts +1 -0
  43. package/dist/__tests__/tracing.test.js +147 -0
  44. package/dist/__tests__/tracing.test.js.map +1 -0
  45. package/dist/cli/config.d.ts +49 -0
  46. package/dist/cli/config.js +86 -0
  47. package/dist/cli/config.js.map +1 -0
  48. package/dist/cli/index.d.ts +2 -0
  49. package/dist/cli/index.js +625 -0
  50. package/dist/cli/index.js.map +1 -0
  51. package/dist/cli/modelFactory.d.ts +9 -0
  52. package/dist/cli/modelFactory.js +154 -0
  53. package/dist/cli/modelFactory.js.map +1 -0
  54. package/dist/cli/providers.d.ts +18 -0
  55. package/dist/cli/providers.js +94 -0
  56. package/dist/cli/providers.js.map +1 -0
  57. package/dist/core/agentLoop.d.ts +43 -0
  58. package/dist/core/agentLoop.js +245 -0
  59. package/dist/core/agentLoop.js.map +1 -0
  60. package/dist/core/errors.d.ts +62 -0
  61. package/dist/core/errors.js +139 -0
  62. package/dist/core/errors.js.map +1 -0
  63. package/dist/core/promptBuilder.d.ts +49 -0
  64. package/dist/core/promptBuilder.js +84 -0
  65. package/dist/core/promptBuilder.js.map +1 -0
  66. package/dist/core/reasoningRouter.d.ts +62 -0
  67. package/dist/core/reasoningRouter.js +102 -0
  68. package/dist/core/reasoningRouter.js.map +1 -0
  69. package/dist/core/retry.d.ts +25 -0
  70. package/dist/core/retry.js +49 -0
  71. package/dist/core/retry.js.map +1 -0
  72. package/dist/core/sessionResumer.d.ts +17 -0
  73. package/dist/core/sessionResumer.js +78 -0
  74. package/dist/core/sessionResumer.js.map +1 -0
  75. package/dist/core/sessionStore.d.ts +45 -0
  76. package/dist/core/sessionStore.js +167 -0
  77. package/dist/core/sessionStore.js.map +1 -0
  78. package/dist/core/tokenCounter.d.ts +17 -0
  79. package/dist/core/tokenCounter.js +54 -0
  80. package/dist/core/tokenCounter.js.map +1 -0
  81. package/dist/evals/dataset.d.ts +4 -0
  82. package/dist/evals/dataset.js +61 -0
  83. package/dist/evals/dataset.js.map +1 -0
  84. package/dist/evals/evaluator.d.ts +21 -0
  85. package/dist/evals/evaluator.js +68 -0
  86. package/dist/evals/evaluator.js.map +1 -0
  87. package/dist/hitl/bridge.d.ts +65 -0
  88. package/dist/hitl/bridge.js +120 -0
  89. package/dist/hitl/bridge.js.map +1 -0
  90. package/dist/middleware/commandSanitizer.d.ts +18 -0
  91. package/dist/middleware/commandSanitizer.js +50 -0
  92. package/dist/middleware/commandSanitizer.js.map +1 -0
  93. package/dist/middleware/loopDetection.d.ts +28 -0
  94. package/dist/middleware/loopDetection.js +49 -0
  95. package/dist/middleware/loopDetection.js.map +1 -0
  96. package/dist/middleware/permission.d.ts +17 -0
  97. package/dist/middleware/permission.js +59 -0
  98. package/dist/middleware/permission.js.map +1 -0
  99. package/dist/middleware/pipeline.d.ts +31 -0
  100. package/dist/middleware/pipeline.js +62 -0
  101. package/dist/middleware/pipeline.js.map +1 -0
  102. package/dist/middleware/preCompletion.d.ts +29 -0
  103. package/dist/middleware/preCompletion.js +82 -0
  104. package/dist/middleware/preCompletion.js.map +1 -0
  105. package/dist/middleware/types.d.ts +40 -0
  106. package/dist/middleware/types.js +8 -0
  107. package/dist/middleware/types.js.map +1 -0
  108. package/dist/sandbox/bootstrap.d.ts +38 -0
  109. package/dist/sandbox/bootstrap.js +107 -0
  110. package/dist/sandbox/bootstrap.js.map +1 -0
  111. package/dist/sandbox/manager.d.ts +72 -0
  112. package/dist/sandbox/manager.js +180 -0
  113. package/dist/sandbox/manager.js.map +1 -0
  114. package/dist/sandbox/sync.d.ts +55 -0
  115. package/dist/sandbox/sync.js +135 -0
  116. package/dist/sandbox/sync.js.map +1 -0
  117. package/dist/skills/loader.d.ts +55 -0
  118. package/dist/skills/loader.js +132 -0
  119. package/dist/skills/loader.js.map +1 -0
  120. package/dist/skills/tools.d.ts +5 -0
  121. package/dist/skills/tools.js +78 -0
  122. package/dist/skills/tools.js.map +1 -0
  123. package/dist/skills/types.d.ts +13 -0
  124. package/dist/skills/types.js +2 -0
  125. package/dist/skills/types.js.map +1 -0
  126. package/dist/test_cache.d.ts +1 -0
  127. package/dist/test_cache.js +55 -0
  128. package/dist/test_cache.js.map +1 -0
  129. package/dist/test_google.js +93 -0
  130. package/dist/tools/askUser.d.ts +10 -0
  131. package/dist/tools/askUser.js +42 -0
  132. package/dist/tools/askUser.js.map +1 -0
  133. package/dist/tools/browser.d.ts +19 -0
  134. package/dist/tools/browser.js +111 -0
  135. package/dist/tools/browser.js.map +1 -0
  136. package/dist/tools/index.d.ts +27 -0
  137. package/dist/tools/index.js +184 -0
  138. package/dist/tools/index.js.map +1 -0
  139. package/dist/tools/registry.d.ts +31 -0
  140. package/dist/tools/registry.js +168 -0
  141. package/dist/tools/registry.js.map +1 -0
  142. package/dist/tools/router.d.ts +34 -0
  143. package/dist/tools/router.js +73 -0
  144. package/dist/tools/router.js.map +1 -0
  145. package/dist/tools/security.d.ts +28 -0
  146. package/dist/tools/security.js +183 -0
  147. package/dist/tools/security.js.map +1 -0
  148. package/dist/tools/webSearch.d.ts +6 -0
  149. package/dist/tools/webSearch.js +120 -0
  150. package/dist/tools/webSearch.js.map +1 -0
  151. package/dist/tracing/analyzer.d.ts +58 -0
  152. package/dist/tracing/analyzer.js +190 -0
  153. package/dist/tracing/analyzer.js.map +1 -0
  154. package/dist/tracing/langsmith.d.ts +38 -0
  155. package/dist/tracing/langsmith.js +50 -0
  156. package/dist/tracing/langsmith.js.map +1 -0
  157. package/dist/tracing/sessionTracer.d.ts +73 -0
  158. package/dist/tracing/sessionTracer.js +157 -0
  159. package/dist/tracing/sessionTracer.js.map +1 -0
  160. package/dist/tracing/types.d.ts +46 -0
  161. package/dist/tracing/types.js +5 -0
  162. package/dist/tracing/types.js.map +1 -0
  163. package/dist/ui/App.d.ts +24 -0
  164. package/dist/ui/App.js +172 -0
  165. package/dist/ui/App.js.map +1 -0
  166. package/dist/ui/components/HITLPrompt.d.ts +15 -0
  167. package/dist/ui/components/HITLPrompt.js +35 -0
  168. package/dist/ui/components/HITLPrompt.js.map +1 -0
  169. package/dist/ui/components/Header.d.ts +8 -0
  170. package/dist/ui/components/Header.js +6 -0
  171. package/dist/ui/components/Header.js.map +1 -0
  172. package/dist/ui/components/MessageBubble.d.ts +13 -0
  173. package/dist/ui/components/MessageBubble.js +17 -0
  174. package/dist/ui/components/MessageBubble.js.map +1 -0
  175. package/dist/ui/components/StatusBar.d.ts +21 -0
  176. package/dist/ui/components/StatusBar.js +34 -0
  177. package/dist/ui/components/StatusBar.js.map +1 -0
  178. package/dist/ui/components/StreamingText.d.ts +13 -0
  179. package/dist/ui/components/StreamingText.js +24 -0
  180. package/dist/ui/components/StreamingText.js.map +1 -0
  181. package/dist/ui/components/ToolCallPanel.d.ts +15 -0
  182. package/dist/ui/components/ToolCallPanel.js +18 -0
  183. package/dist/ui/components/ToolCallPanel.js.map +1 -0
  184. package/docs/01_insights_and_patterns.md +27 -0
  185. package/docs/02_edge_cases_and_mitigations.md +143 -0
  186. package/docs/03_initial_implementation_plan.md +66 -0
  187. package/docs/04_tech_stack_proposal.md +20 -0
  188. package/docs/05_prd.md +87 -0
  189. package/docs/06_user_stories.md +72 -0
  190. package/docs/07_system_architecture.md +138 -0
  191. package/docs/08_roadmap.md +200 -0
  192. package/e2b/Dockerfile +26 -0
  193. package/package.json +57 -0
  194. package/src/__tests__/bootstrap.test.ts +111 -0
  195. package/src/__tests__/config.test.ts +97 -0
  196. package/src/__tests__/m55.test.ts +238 -0
  197. package/src/__tests__/middleware.test.ts +219 -0
  198. package/src/__tests__/modelFactory.test.ts +63 -0
  199. package/src/__tests__/optimizations.test.ts +201 -0
  200. package/src/__tests__/promptBuilder.test.ts +141 -0
  201. package/src/__tests__/sandbox.test.ts +102 -0
  202. package/src/__tests__/security.test.ts +122 -0
  203. package/src/__tests__/streaming.test.ts +82 -0
  204. package/src/__tests__/toolRouter.test.ts +52 -0
  205. package/src/__tests__/tools.test.ts +146 -0
  206. package/src/__tests__/tracing.test.ts +196 -0
  207. package/src/agents/agentRegistry.ts +69 -0
  208. package/src/agents/agentSpec.ts +67 -0
  209. package/src/agents/builtinAgents.ts +142 -0
  210. package/src/cli/config.ts +124 -0
  211. package/src/cli/index.ts +730 -0
  212. package/src/cli/modelFactory.ts +174 -0
  213. package/src/cli/providers.ts +107 -0
  214. package/src/commands/builtinCommands.ts +293 -0
  215. package/src/commands/commandRegistry.ts +194 -0
  216. package/src/core/agentLoop.d.ts.map +1 -0
  217. package/src/core/agentLoop.ts +312 -0
  218. package/src/core/autoSave.ts +95 -0
  219. package/src/core/compactor.ts +252 -0
  220. package/src/core/contextGuard.ts +129 -0
  221. package/src/core/errors.ts +202 -0
  222. package/src/core/promptBuilder.d.ts.map +1 -0
  223. package/src/core/promptBuilder.ts +139 -0
  224. package/src/core/reasoningRouter.ts +121 -0
  225. package/src/core/retry.ts +75 -0
  226. package/src/core/sessionResumer.ts +90 -0
  227. package/src/core/sessionStore.ts +215 -0
  228. package/src/core/subAgent.ts +339 -0
  229. package/src/core/tokenCounter.ts +64 -0
  230. package/src/evals/dataset.ts +67 -0
  231. package/src/evals/evaluator.ts +81 -0
  232. package/src/hitl/bridge.ts +160 -0
  233. package/src/middleware/commandSanitizer.ts +60 -0
  234. package/src/middleware/loopDetection.ts +63 -0
  235. package/src/middleware/permission.ts +72 -0
  236. package/src/middleware/pipeline.ts +75 -0
  237. package/src/middleware/preCompletion.ts +94 -0
  238. package/src/middleware/types.ts +45 -0
  239. package/src/sandbox/bootstrap.ts +121 -0
  240. package/src/sandbox/manager.ts +239 -0
  241. package/src/sandbox/sync.ts +157 -0
  242. package/src/skills/loader.ts +143 -0
  243. package/src/skills/tools.ts +99 -0
  244. package/src/skills/types.ts +13 -0
  245. package/src/test_cache.ts +72 -0
  246. package/src/test_google.js +40 -0
  247. package/src/test_google.ts +40 -0
  248. package/src/tools/askUser.ts +47 -0
  249. package/src/tools/browser.ts +137 -0
  250. package/src/tools/index.d.ts.map +1 -0
  251. package/src/tools/index.ts +237 -0
  252. package/src/tools/registry.ts +198 -0
  253. package/src/tools/router.ts +78 -0
  254. package/src/tools/security.ts +220 -0
  255. package/src/tools/spawnAgent.ts +158 -0
  256. package/src/tools/webSearch.ts +142 -0
  257. package/src/tracing/analyzer.ts +265 -0
  258. package/src/tracing/langsmith.ts +63 -0
  259. package/src/tracing/sessionTracer.ts +202 -0
  260. package/src/tracing/types.ts +49 -0
  261. package/src/types/valyu.d.ts +37 -0
  262. package/src/ui/App.tsx +404 -0
  263. package/src/ui/components/HITLPrompt.tsx +119 -0
  264. package/src/ui/components/Header.tsx +51 -0
  265. package/src/ui/components/MessageBubble.tsx +46 -0
  266. package/src/ui/components/StatusBar.tsx +138 -0
  267. package/src/ui/components/StreamingText.tsx +48 -0
  268. package/src/ui/components/ToolCallPanel.tsx +80 -0
  269. package/tests/commands/commands.test.ts +356 -0
  270. package/tests/core/compactor.test.ts +217 -0
  271. package/tests/core/retryAndErrors.test.ts +164 -0
  272. package/tests/core/sessionResumer.test.ts +95 -0
  273. package/tests/core/sessionStore.test.ts +84 -0
  274. package/tests/core/stability.test.ts +165 -0
  275. package/tests/core/subAgent.test.ts +238 -0
  276. package/tests/hitl/hitlBridge.test.ts +115 -0
  277. package/tsconfig.json +16 -0
  278. package/vitest.config.ts +10 -0
  279. package/vitest.out +48 -0
@@ -0,0 +1,120 @@
1
+ import { EventEmitter } from "node:events";
2
+ /**
3
+ * HITLBridge — Human-in-the-Loop communication bridge.
4
+ *
5
+ * Provides a typed event-based interface between the tool execution layer
6
+ * and the TUI rendering layer. When a tool needs user input, it emits
7
+ * a question event and awaits the response. The TUI listens, renders
8
+ * the prompt, and resolves the answer.
9
+ *
10
+ * Singleton pattern: one bridge per session.
11
+ */
12
+ export class HITLBridge extends EventEmitter {
13
+ static instance = null;
14
+ pendingResolvers = new Map();
15
+ timeoutMs;
16
+ questionCounter = 0;
17
+ constructor(timeoutMs = 5 * 60 * 1000) {
18
+ super();
19
+ this.timeoutMs = timeoutMs;
20
+ }
21
+ static getInstance(timeoutMs) {
22
+ if (!HITLBridge.instance) {
23
+ HITLBridge.instance = new HITLBridge(timeoutMs);
24
+ }
25
+ return HITLBridge.instance;
26
+ }
27
+ static resetInstance() {
28
+ HITLBridge.instance = null;
29
+ }
30
+ /**
31
+ * Called by a tool to ask the user a free-form question.
32
+ * Blocks until the user responds (or times out).
33
+ *
34
+ * @returns The user's answer as a string.
35
+ */
36
+ async askUser(question, options) {
37
+ const id = `hitl-q-${++this.questionCounter}-${Date.now()}`;
38
+ const payload = {
39
+ id,
40
+ question,
41
+ options,
42
+ createdAt: Date.now(),
43
+ };
44
+ return new Promise((resolve, reject) => {
45
+ this.pendingResolvers.set(id, resolve);
46
+ // Emit the question so the TUI can render it
47
+ this.emit("question", payload);
48
+ // Timeout: auto-reject if user doesn't respond
49
+ const timer = setTimeout(() => {
50
+ if (this.pendingResolvers.has(id)) {
51
+ this.pendingResolvers.delete(id);
52
+ resolve("[No response — the user did not answer within the timeout period.]");
53
+ }
54
+ }, this.timeoutMs);
55
+ // Clean up timer if resolved before timeout
56
+ const originalResolve = this.pendingResolvers.get(id);
57
+ this.pendingResolvers.set(id, (answer) => {
58
+ clearTimeout(timer);
59
+ originalResolve(answer);
60
+ });
61
+ });
62
+ }
63
+ /**
64
+ * Called by the PermissionMiddleware to request tool execution approval.
65
+ * Blocks until the user responds [y/n] (or times out with denial).
66
+ *
67
+ * @returns true if approved, false if denied or timed out.
68
+ */
69
+ async requestPermission(toolName, args) {
70
+ const id = `hitl-perm-${++this.questionCounter}-${Date.now()}`;
71
+ const payload = {
72
+ id,
73
+ toolName,
74
+ args,
75
+ createdAt: Date.now(),
76
+ };
77
+ return new Promise((resolve) => {
78
+ const wrappedResolve = (answer) => {
79
+ const normalized = answer.trim().toLowerCase();
80
+ resolve(normalized === "y" || normalized === "yes" || normalized === "approve");
81
+ };
82
+ this.pendingResolvers.set(id, wrappedResolve);
83
+ // Emit so the TUI can render the permission prompt
84
+ this.emit("permission", payload);
85
+ // Timeout: auto-deny
86
+ const timer = setTimeout(() => {
87
+ if (this.pendingResolvers.has(id)) {
88
+ this.pendingResolvers.delete(id);
89
+ resolve(false); // Denied by timeout
90
+ }
91
+ }, this.timeoutMs);
92
+ // Clean up timer on resolve
93
+ const current = this.pendingResolvers.get(id);
94
+ this.pendingResolvers.set(id, (answer) => {
95
+ clearTimeout(timer);
96
+ current(answer);
97
+ });
98
+ });
99
+ }
100
+ /**
101
+ * Called by the TUI when the user submits an answer.
102
+ *
103
+ * @param id - The question/permission request ID.
104
+ * @param answer - The user's text response.
105
+ */
106
+ resolveAnswer(id, answer) {
107
+ const resolver = this.pendingResolvers.get(id);
108
+ if (resolver) {
109
+ this.pendingResolvers.delete(id);
110
+ resolver(answer);
111
+ }
112
+ }
113
+ /**
114
+ * Returns true if there is an outstanding question awaiting an answer.
115
+ */
116
+ hasPendingQuestion() {
117
+ return this.pendingResolvers.size > 0;
118
+ }
119
+ }
120
+ //# sourceMappingURL=bridge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bridge.js","sourceRoot":"","sources":["../../src/hitl/bridge.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAwB3C;;;;;;;;;GASG;AACH,MAAM,OAAO,UAAW,SAAQ,YAAY;IAChC,MAAM,CAAC,QAAQ,GAAsB,IAAI,CAAC;IAC1C,gBAAgB,GAAG,IAAI,GAAG,EAAoC,CAAC;IAC/D,SAAS,CAAS;IAClB,eAAe,GAAG,CAAC,CAAC;IAE5B,YAAY,YAAoB,CAAC,GAAG,EAAE,GAAG,IAAI;QACzC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC/B,CAAC;IAED,MAAM,CAAC,WAAW,CAAC,SAAkB;QACjC,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC;YACvB,UAAU,CAAC,QAAQ,GAAG,IAAI,UAAU,CAAC,SAAS,CAAC,CAAC;QACpD,CAAC;QACD,OAAO,UAAU,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAED,MAAM,CAAC,aAAa;QAChB,UAAU,CAAC,QAAQ,GAAG,IAAI,CAAC;IAC/B,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,OAAO,CAAC,QAAgB,EAAE,OAAkB;QAC9C,MAAM,EAAE,GAAG,UAAU,EAAE,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;QAE5D,MAAM,OAAO,GAAiB;YAC1B,EAAE;YACF,QAAQ;YACR,OAAO;YACP,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACxB,CAAC;QAEF,OAAO,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC3C,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;YAEvC,6CAA6C;YAC7C,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;YAE/B,+CAA+C;YAC/C,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;gBAC1B,IAAI,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;oBAChC,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;oBACjC,OAAO,CAAC,oEAAoE,CAAC,CAAC;gBAClF,CAAC;YACL,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;YAEnB,4CAA4C;YAC5C,MAAM,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,EAAE,CAAE,CAAC;YACvD,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,MAAc,EAAE,EAAE;gBAC7C,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,eAAe,CAAC,MAAM,CAAC,CAAC;YAC5B,CAAC,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;IACP,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,iBAAiB,CAAC,QAAgB,EAAE,IAA6B;QACnE,MAAM,EAAE,GAAG,aAAa,EAAE,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;QAE/D,MAAM,OAAO,GAA0B;YACnC,EAAE;YACF,QAAQ;YACR,IAAI;YACJ,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACxB,CAAC;QAEF,OAAO,IAAI,OAAO,CAAU,CAAC,OAAO,EAAE,EAAE;YACpC,MAAM,cAAc,GAAG,CAAC,MAAc,EAAE,EAAE;gBACtC,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;gBAC/C,OAAO,CAAC,UAAU,KAAK,GAAG,IAAI,UAAU,KAAK,KAAK,IAAI,UAAU,KAAK,SAAS,CAAC,CAAC;YACpF,CAAC,CAAC;YAEF,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,EAAE,EAAE,cAAqB,CAAC,CAAC;YAErD,mDAAmD;YACnD,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;YAEjC,qBAAqB;YACrB,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;gBAC1B,IAAI,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;oBAChC,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;oBACjC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,oBAAoB;gBACxC,CAAC;YACL,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;YAEnB,4BAA4B;YAC5B,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,EAAE,CAAE,CAAC;YAC/C,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,MAAc,EAAE,EAAE;gBAC7C,YAAY,CAAC,KAAK,CAAC,CAAC;gBACnB,OAAe,CAAC,MAAM,CAAC,CAAC;YAC7B,CAAC,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;IACP,CAAC;IAED;;;;;OAKG;IACH,aAAa,CAAC,EAAU,EAAE,MAAc;QACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC/C,IAAI,QAAQ,EAAE,CAAC;YACX,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YACjC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACrB,CAAC;IACL,CAAC;IAED;;OAEG;IACH,kBAAkB;QACd,OAAO,IAAI,CAAC,gBAAgB,CAAC,IAAI,GAAG,CAAC,CAAC;IAC1C,CAAC"}
@@ -0,0 +1,18 @@
1
+ import { ToolCallContext, ToolMiddleware } from "./types.js";
2
+ /**
3
+ * Intercepts bash tool calls to block dangerous or interactive commands.
4
+ *
5
+ * Categories of blocked commands:
6
+ * 1. Destructive: `rm -rf /`, `mkfs`, `dd if=`, fork bombs
7
+ * 2. Interactive/hanging: `vim`, `nano`, `less`, `top`, `htop`, `man`
8
+ * 3. Network abuse: `curl | sh`, `wget | bash`
9
+ */
10
+ export declare class CommandSanitizerMiddleware implements ToolMiddleware {
11
+ readonly name = "CommandSanitizer";
12
+ /**
13
+ * Patterns that will cause a command to be blocked.
14
+ * Each entry is [regex, human-readable reason].
15
+ */
16
+ private readonly blockedPatterns;
17
+ before(ctx: ToolCallContext): ToolCallContext | string;
18
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Intercepts bash tool calls to block dangerous or interactive commands.
3
+ *
4
+ * Categories of blocked commands:
5
+ * 1. Destructive: `rm -rf /`, `mkfs`, `dd if=`, fork bombs
6
+ * 2. Interactive/hanging: `vim`, `nano`, `less`, `top`, `htop`, `man`
7
+ * 3. Network abuse: `curl | sh`, `wget | bash`
8
+ */
9
+ export class CommandSanitizerMiddleware {
10
+ name = "CommandSanitizer";
11
+ /**
12
+ * Patterns that will cause a command to be blocked.
13
+ * Each entry is [regex, human-readable reason].
14
+ */
15
+ blockedPatterns = [
16
+ // Destructive
17
+ [/rm\s+(-\w*r\w*f\w*|-\w*f\w*r\w*)\s+\/(\*)?(?:\s|$)/, "destructive: rm -rf /"],
18
+ [/mkfs\b/, "destructive: filesystem format"],
19
+ [/\bdd\s+.*of=\/dev\//, "destructive: raw disk write"],
20
+ [/chmod\s+(-\w+\s+)*777\s+\//, "dangerous: chmod 777 on root"],
21
+ // Interactive / hanging
22
+ [/\b(vim|vi|nano|emacs|pico)\b/, "interactive: text editor (hangs the sandbox)"],
23
+ [/\b(less|more)\b/, "interactive: pager (hangs the sandbox)"],
24
+ [/\b(top|htop|glances)\b/, "interactive: process monitor (hangs the sandbox)"],
25
+ [/\bman\s+\w+/, "interactive: man page (hangs the sandbox)"],
26
+ // Network abuse: pipe-to-shell
27
+ [/curl\s+.*\|\s*(sh|bash|zsh)/, "unsafe: pipe remote script to shell"],
28
+ [/wget\s+.*\|\s*(sh|bash|zsh)/, "unsafe: pipe remote script to shell"],
29
+ ];
30
+ before(ctx) {
31
+ // Only applies to bash/shell tool calls
32
+ if (ctx.toolName !== "bash") {
33
+ return ctx;
34
+ }
35
+ const command = ctx.args.command;
36
+ if (typeof command !== "string") {
37
+ return ctx;
38
+ }
39
+ for (const [pattern, reason] of this.blockedPatterns) {
40
+ if (pattern.test(command)) {
41
+ return (`⚠ Blocked: Command rejected by sanitizer.\n` +
42
+ `Reason: ${reason}\n` +
43
+ `Command: ${command}\n` +
44
+ `Use a safer alternative or refine your approach.`);
45
+ }
46
+ }
47
+ return ctx;
48
+ }
49
+ }
50
+ //# sourceMappingURL=commandSanitizer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"commandSanitizer.js","sourceRoot":"","sources":["../../src/middleware/commandSanitizer.ts"],"names":[],"mappings":"AAEA;;;;;;;GAOG;AACH,MAAM,OAAO,0BAA0B;IAC5B,IAAI,GAAG,kBAAkB,CAAC;IAEnC;;;OAGG;IACc,eAAe,GAAuB;QACrD,cAAc;QACd,CAAC,oDAAoD,EAAE,uBAAuB,CAAC;QAC/E,CAAC,QAAQ,EAAE,gCAAgC,CAAC;QAC5C,CAAC,qBAAqB,EAAE,6BAA6B,CAAC;QACtD,CAAC,4BAA4B,EAAE,8BAA8B,CAAC;QAE9D,wBAAwB;QACxB,CAAC,8BAA8B,EAAE,8CAA8C,CAAC;QAChF,CAAC,iBAAiB,EAAE,wCAAwC,CAAC;QAC7D,CAAC,wBAAwB,EAAE,kDAAkD,CAAC;QAC9E,CAAC,aAAa,EAAE,2CAA2C,CAAC;QAE5D,+BAA+B;QAC/B,CAAC,6BAA6B,EAAE,qCAAqC,CAAC;QACtE,CAAC,6BAA6B,EAAE,qCAAqC,CAAC;KACvE,CAAC;IAEF,MAAM,CAAC,GAAoB;QACzB,wCAAwC;QACxC,IAAI,GAAG,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;YAC5B,OAAO,GAAG,CAAC;QACb,CAAC;QAED,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC;QACjC,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;YAChC,OAAO,GAAG,CAAC;QACb,CAAC;QAED,KAAK,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACrD,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC1B,OAAO,CACL,6CAA6C;oBAC7C,WAAW,MAAM,IAAI;oBACrB,YAAY,OAAO,IAAI;oBACvB,kDAAkD,CACnD,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,GAAG,CAAC;IACb,CAAC;CACF"}
@@ -0,0 +1,28 @@
1
+ import { ToolCallContext, ToolMiddleware } from "./types.js";
2
+ /**
3
+ * Prevents the "Blind Retry" doom loop.
4
+ *
5
+ * Tracks a rolling window of recent tool call signatures. If the same
6
+ * tool + args combination appears N times consecutively, the call is
7
+ * rejected with an instruction to try a different approach.
8
+ *
9
+ * Reference: docs/02_edge_cases_and_mitigations.md — "The Blind Retry Doom Loop"
10
+ */
11
+ export declare class LoopDetectionMiddleware implements ToolMiddleware {
12
+ readonly name = "LoopDetection";
13
+ private history;
14
+ private readonly threshold;
15
+ /**
16
+ * @param threshold - Number of identical consecutive calls before blocking (default: 3).
17
+ */
18
+ constructor(threshold?: number);
19
+ /**
20
+ * Creates a signature string for a tool call (name + sorted args JSON).
21
+ */
22
+ private signature;
23
+ before(ctx: ToolCallContext): ToolCallContext | string;
24
+ /**
25
+ * Resets the history. Useful for testing or session boundaries.
26
+ */
27
+ reset(): void;
28
+ }
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Prevents the "Blind Retry" doom loop.
3
+ *
4
+ * Tracks a rolling window of recent tool call signatures. If the same
5
+ * tool + args combination appears N times consecutively, the call is
6
+ * rejected with an instruction to try a different approach.
7
+ *
8
+ * Reference: docs/02_edge_cases_and_mitigations.md — "The Blind Retry Doom Loop"
9
+ */
10
+ export class LoopDetectionMiddleware {
11
+ name = "LoopDetection";
12
+ history = [];
13
+ threshold;
14
+ /**
15
+ * @param threshold - Number of identical consecutive calls before blocking (default: 3).
16
+ */
17
+ constructor(threshold = 3) {
18
+ this.threshold = threshold;
19
+ }
20
+ /**
21
+ * Creates a signature string for a tool call (name + sorted args JSON).
22
+ */
23
+ signature(ctx) {
24
+ return `${ctx.toolName}:${JSON.stringify(ctx.args, Object.keys(ctx.args).sort())}`;
25
+ }
26
+ before(ctx) {
27
+ const sig = this.signature(ctx);
28
+ this.history.push(sig);
29
+ // Keep only the last N entries to avoid unbounded growth
30
+ if (this.history.length > this.threshold * 2) {
31
+ this.history = this.history.slice(-this.threshold * 2);
32
+ }
33
+ // Check if the last `threshold` entries are all identical
34
+ const tail = this.history.slice(-this.threshold);
35
+ if (tail.length >= this.threshold &&
36
+ tail.every((s) => s === sig)) {
37
+ return (`⚠ Loop detected: You have called "${ctx.toolName}" with identical arguments ` +
38
+ `${this.threshold} times consecutively. Stop this approach and try a different strategy.`);
39
+ }
40
+ return ctx;
41
+ }
42
+ /**
43
+ * Resets the history. Useful for testing or session boundaries.
44
+ */
45
+ reset() {
46
+ this.history = [];
47
+ }
48
+ }
49
+ //# sourceMappingURL=loopDetection.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loopDetection.js","sourceRoot":"","sources":["../../src/middleware/loopDetection.ts"],"names":[],"mappings":"AAEA;;;;;;;;GAQG;AACH,MAAM,OAAO,uBAAuB;IACzB,IAAI,GAAG,eAAe,CAAC;IAExB,OAAO,GAAa,EAAE,CAAC;IACd,SAAS,CAAS;IAEnC;;OAEG;IACH,YAAY,SAAS,GAAG,CAAC;QACvB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED;;OAEG;IACK,SAAS,CAAC,GAAoB;QACpC,OAAO,GAAG,GAAG,CAAC,QAAQ,IAAI,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;IACrF,CAAC;IAED,MAAM,CAAC,GAAoB;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QAEhC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEvB,yDAAyD;QACzD,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;YAC7C,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;QACzD,CAAC;QAED,0DAA0D;QAC1D,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACjD,IACE,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS;YAC7B,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,CAAC,EAC5B,CAAC;YACD,OAAO,CACL,qCAAqC,GAAG,CAAC,QAAQ,6BAA6B;gBAC9E,GAAG,IAAI,CAAC,SAAS,wEAAwE,CAC1F,CAAC;QACJ,CAAC;QAED,OAAO,GAAG,CAAC;IACb,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC;IACpB,CAAC;CACF"}
@@ -0,0 +1,17 @@
1
+ import { ToolCallContext, ToolMiddleware } from "./types.js";
2
+ export type PermissionMode = "auto" | "ask_dangerous" | "ask_all";
3
+ /**
4
+ * PermissionMiddleware — gates dangerous tool calls behind user approval.
5
+ *
6
+ * Behavior per mode:
7
+ * - `auto`: All tools execute without asking. (Default for power users.)
8
+ * - `ask_dangerous`: Only tools in DANGEROUS_TOOLS require approval.
9
+ * - `ask_all`: Every tool except SAFE_TOOLS requires approval.
10
+ */
11
+ export declare class PermissionMiddleware implements ToolMiddleware {
12
+ name: string;
13
+ private mode;
14
+ constructor(mode?: PermissionMode);
15
+ before(ctx: ToolCallContext): Promise<ToolCallContext | string | void>;
16
+ private requiresApproval;
17
+ }
@@ -0,0 +1,59 @@
1
+ import { HITLBridge } from "../hitl/bridge.js";
2
+ /** Tools that are always safe and never need user approval. */
3
+ const SAFE_TOOLS = new Set([
4
+ "read_file",
5
+ "view_file_outline",
6
+ "search_skills",
7
+ "load_skill",
8
+ "search_tools",
9
+ "ask_user_question", // Meta: the ask tool itself is always safe
10
+ ]);
11
+ /** Tools that perform destructive or side-effect-heavy operations. */
12
+ const DANGEROUS_TOOLS = new Set([
13
+ "bash",
14
+ "write_file",
15
+ "replace_file_content",
16
+ "multi_replace_file_content",
17
+ "install_deps",
18
+ ]);
19
+ /**
20
+ * PermissionMiddleware — gates dangerous tool calls behind user approval.
21
+ *
22
+ * Behavior per mode:
23
+ * - `auto`: All tools execute without asking. (Default for power users.)
24
+ * - `ask_dangerous`: Only tools in DANGEROUS_TOOLS require approval.
25
+ * - `ask_all`: Every tool except SAFE_TOOLS requires approval.
26
+ */
27
+ export class PermissionMiddleware {
28
+ name = "PermissionMiddleware";
29
+ mode;
30
+ constructor(mode = "auto") {
31
+ this.mode = mode;
32
+ }
33
+ async before(ctx) {
34
+ if (this.mode === "auto")
35
+ return ctx;
36
+ const toolName = ctx.toolName;
37
+ const needsApproval = this.requiresApproval(toolName);
38
+ if (!needsApproval)
39
+ return ctx;
40
+ const bridge = HITLBridge.getInstance();
41
+ const approved = await bridge.requestPermission(toolName, ctx.args);
42
+ if (!approved) {
43
+ // Short-circuit: return a string to deny the tool call
44
+ return (`Permission denied: The user declined to approve the execution of "${toolName}". ` +
45
+ `Try an alternative approach or ask the user for guidance using the ask_user_question tool.`);
46
+ }
47
+ return ctx;
48
+ }
49
+ requiresApproval(toolName) {
50
+ if (SAFE_TOOLS.has(toolName))
51
+ return false;
52
+ if (this.mode === "ask_all")
53
+ return true;
54
+ if (this.mode === "ask_dangerous")
55
+ return DANGEROUS_TOOLS.has(toolName);
56
+ return false;
57
+ }
58
+ }
59
+ //# sourceMappingURL=permission.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"permission.js","sourceRoot":"","sources":["../../src/middleware/permission.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAK/C,+DAA+D;AAC/D,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC;IACvB,WAAW;IACX,mBAAmB;IACnB,eAAe;IACf,YAAY;IACZ,cAAc;IACd,mBAAmB,EAAE,2CAA2C;CACnE,CAAC,CAAC;AAEH,sEAAsE;AACtE,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC5B,MAAM;IACN,YAAY;IACZ,sBAAsB;IACtB,4BAA4B;IAC5B,cAAc;CACjB,CAAC,CAAC;AAEH;;;;;;;GAOG;AACH,MAAM,OAAO,oBAAoB;IAC7B,IAAI,GAAG,sBAAsB,CAAC;IACtB,IAAI,CAAiB;IAE7B,YAAY,OAAuB,MAAM;QACrC,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,GAAoB;QAC7B,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM;YAAE,OAAO,GAAG,CAAC;QAErC,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC;QAC9B,MAAM,aAAa,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;QAEtD,IAAI,CAAC,aAAa;YAAE,OAAO,GAAG,CAAC;QAE/B,MAAM,MAAM,GAAG,UAAU,CAAC,WAAW,EAAE,CAAC;QACxC,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;QAEpE,IAAI,CAAC,QAAQ,EAAE,CAAC;YACZ,uDAAuD;YACvD,OAAO,CACH,qEAAqE,QAAQ,KAAK;gBAClF,4FAA4F,CAC/F,CAAC;QACN,CAAC;QAED,OAAO,GAAG,CAAC;IACf,CAAC;IAEO,gBAAgB,CAAC,QAAgB;QACrC,IAAI,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC;YAAE,OAAO,KAAK,CAAC;QAE3C,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS;YAAE,OAAO,IAAI,CAAC;QACzC,IAAI,IAAI,CAAC,IAAI,KAAK,eAAe;YAAE,OAAO,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAExE,OAAO,KAAK,CAAC;IACjB,CAAC;CACJ"}
@@ -0,0 +1,31 @@
1
+ import { ToolCallContext, ToolMiddleware } from "./types.js";
2
+ import { ToolResult } from "../tools/index.js";
3
+ /**
4
+ * Executes tool calls through a chain of middleware hooks.
5
+ *
6
+ * Execution flow:
7
+ * 1. Run all `before()` hooks in registration order.
8
+ * - If any returns a string → short-circuit (tool is NOT executed).
9
+ * 2. Execute the actual tool function.
10
+ * 3. Run all `after()` hooks in reverse registration order.
11
+ * - Each can transform the result before it enters conversation history.
12
+ */
13
+ export declare class MiddlewarePipeline {
14
+ private middlewares;
15
+ /**
16
+ * Register a middleware. Middlewares run in the order they are added.
17
+ */
18
+ use(middleware: ToolMiddleware): void;
19
+ /**
20
+ * Returns the number of registered middlewares.
21
+ */
22
+ get length(): number;
23
+ /**
24
+ * Execute a tool call through the middleware pipeline.
25
+ *
26
+ * @param ctx - The tool call context (name, args, callId).
27
+ * @param executeFn - The actual tool execution function.
28
+ * @returns The final result string (possibly transformed by after-hooks).
29
+ */
30
+ run(ctx: ToolCallContext, executeFn: (ctx: ToolCallContext) => Promise<ToolResult> | ToolResult): Promise<string>;
31
+ }
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Executes tool calls through a chain of middleware hooks.
3
+ *
4
+ * Execution flow:
5
+ * 1. Run all `before()` hooks in registration order.
6
+ * - If any returns a string → short-circuit (tool is NOT executed).
7
+ * 2. Execute the actual tool function.
8
+ * 3. Run all `after()` hooks in reverse registration order.
9
+ * - Each can transform the result before it enters conversation history.
10
+ */
11
+ export class MiddlewarePipeline {
12
+ middlewares = [];
13
+ /**
14
+ * Register a middleware. Middlewares run in the order they are added.
15
+ */
16
+ use(middleware) {
17
+ this.middlewares.push(middleware);
18
+ }
19
+ /**
20
+ * Returns the number of registered middlewares.
21
+ */
22
+ get length() {
23
+ return this.middlewares.length;
24
+ }
25
+ /**
26
+ * Execute a tool call through the middleware pipeline.
27
+ *
28
+ * @param ctx - The tool call context (name, args, callId).
29
+ * @param executeFn - The actual tool execution function.
30
+ * @returns The final result string (possibly transformed by after-hooks).
31
+ */
32
+ async run(ctx, executeFn) {
33
+ // ── Before phase: run hooks in order ──
34
+ let currentCtx = ctx;
35
+ for (const mw of this.middlewares) {
36
+ if (mw.before) {
37
+ const result = await mw.before(currentCtx);
38
+ if (typeof result === "string") {
39
+ // Short-circuit: middleware rejected the call
40
+ return result;
41
+ }
42
+ if (result !== undefined) {
43
+ currentCtx = result;
44
+ }
45
+ }
46
+ }
47
+ // ── Execute the tool ──
48
+ let output = await executeFn(currentCtx);
49
+ // ── After phase: run hooks in reverse order ──
50
+ for (let i = this.middlewares.length - 1; i >= 0; i--) {
51
+ const mw = this.middlewares[i];
52
+ if (mw.after) {
53
+ const transformed = await mw.after(currentCtx, output);
54
+ if (transformed !== undefined) {
55
+ output = transformed;
56
+ }
57
+ }
58
+ }
59
+ return output.content;
60
+ }
61
+ }
62
+ //# sourceMappingURL=pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/middleware/pipeline.ts"],"names":[],"mappings":"AAGA;;;;;;;;;GASG;AACH,MAAM,OAAO,kBAAkB;IACrB,WAAW,GAAqB,EAAE,CAAC;IAE3C;;OAEG;IACH,GAAG,CAAC,UAA0B;QAC5B,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACH,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;IACjC,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,GAAG,CACP,GAAoB,EACpB,SAAqE;QAErE,yCAAyC;QACzC,IAAI,UAAU,GAAG,GAAG,CAAC;QAErB,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YAClC,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;gBACd,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;gBAE3C,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;oBAC/B,8CAA8C;oBAC9C,OAAO,MAAM,CAAC;gBAChB,CAAC;gBAED,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;oBACzB,UAAU,GAAG,MAAM,CAAC;gBACtB,CAAC;YACH,CAAC;QACH,CAAC;QAED,yBAAyB;QACzB,IAAI,MAAM,GAAe,MAAM,SAAS,CAAC,UAAU,CAAC,CAAC;QAErD,gDAAgD;QAChD,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACtD,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YAC/B,IAAI,EAAE,CAAC,KAAK,EAAE,CAAC;gBACb,MAAM,WAAW,GAAG,MAAM,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;gBACvD,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;oBAC9B,MAAM,GAAG,WAAW,CAAC;gBACvB,CAAC;YACH,CAAC;QAAI,CAAC;QAER,OAAO,MAAM,CAAC,OAAO,CAAC;IACxB,CAAC;CACF"}
@@ -0,0 +1,29 @@
1
+ import { ToolCallContext, ToolMiddleware } from "./types.js";
2
+ import { ToolResult } from "../tools/index.js";
3
+ /**
4
+ * Prevents the agent from marking a task as "done" without running tests.
5
+ *
6
+ * Tracks whether any test command has been executed during the session.
7
+ * If the agent attempts to signal completion without running tests first,
8
+ * the middleware intercepts and forces verification.
9
+ *
10
+ * Reference: docs/02_edge_cases_and_mitigations.md — "The Fake Success Verification"
11
+ */
12
+ export declare class PreCompletionMiddleware implements ToolMiddleware {
13
+ readonly name = "PreCompletion";
14
+ private testsPassed;
15
+ /** Patterns in bash commands that count as "running tests". */
16
+ private readonly testPatterns;
17
+ /** Tool names that signal the agent is trying to complete the task. */
18
+ private readonly completionSignals;
19
+ before(ctx: ToolCallContext): ToolCallContext | string;
20
+ after(ctx: ToolCallContext, result: ToolResult): void;
21
+ /**
22
+ * Returns whether tests have been run and passed in this session.
23
+ */
24
+ hasPassedTests(): boolean;
25
+ /**
26
+ * Resets state. Useful for testing or session boundaries.
27
+ */
28
+ reset(): void;
29
+ }
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Prevents the agent from marking a task as "done" without running tests.
3
+ *
4
+ * Tracks whether any test command has been executed during the session.
5
+ * If the agent attempts to signal completion without running tests first,
6
+ * the middleware intercepts and forces verification.
7
+ *
8
+ * Reference: docs/02_edge_cases_and_mitigations.md — "The Fake Success Verification"
9
+ */
10
+ export class PreCompletionMiddleware {
11
+ name = "PreCompletion";
12
+ testsPassed = false;
13
+ /** Patterns in bash commands that count as "running tests". */
14
+ testPatterns = [
15
+ /\bvitest\b/,
16
+ /\bjest\b/,
17
+ /\bpytest\b/,
18
+ /\bmocha\b/,
19
+ /\bnpm\s+test\b/,
20
+ /\bnpm\s+run\s+test\b/,
21
+ /\byarn\s+test\b/,
22
+ /\bpnpm\s+test\b/,
23
+ /\bgo\s+test\b/,
24
+ /\bcargo\s+test\b/,
25
+ ];
26
+ /** Tool names that signal the agent is trying to complete the task. */
27
+ completionSignals = new Set([
28
+ "task_complete",
29
+ "attempt_completion",
30
+ "finish_task",
31
+ "submit_result",
32
+ ]);
33
+ before(ctx) {
34
+ // When a test command is initiated, assume it hasn't passed yet
35
+ if (ctx.toolName === "bash" && typeof ctx.args.command === "string") {
36
+ for (const pattern of this.testPatterns) {
37
+ if (pattern.test(ctx.args.command)) {
38
+ this.testsPassed = false;
39
+ break;
40
+ }
41
+ }
42
+ }
43
+ // Intercept completion attempts
44
+ if (this.completionSignals.has(ctx.toolName)) {
45
+ if (!this.testsPassed) {
46
+ return ("⚠ You must run tests before completing the task, AND they must pass.\n" +
47
+ "Use the bash tool to execute your test suite (e.g., `npm test`, `vitest`, `pytest`).\n" +
48
+ "If tests fail, fix the issues. Once tests pass cleanly, you may attempt completion again.");
49
+ }
50
+ }
51
+ return ctx;
52
+ }
53
+ after(ctx, result) {
54
+ if (ctx.toolName === "bash" && typeof ctx.args.command === "string") {
55
+ for (const pattern of this.testPatterns) {
56
+ if (pattern.test(ctx.args.command)) {
57
+ // Robustly check the exact exit code from the tool metadata
58
+ if (result.metadata?.exitCode === 0) {
59
+ this.testsPassed = true;
60
+ }
61
+ else {
62
+ this.testsPassed = false;
63
+ }
64
+ break;
65
+ }
66
+ }
67
+ }
68
+ }
69
+ /**
70
+ * Returns whether tests have been run and passed in this session.
71
+ */
72
+ hasPassedTests() {
73
+ return this.testsPassed;
74
+ }
75
+ /**
76
+ * Resets state. Useful for testing or session boundaries.
77
+ */
78
+ reset() {
79
+ this.testsPassed = false;
80
+ }
81
+ }
82
+ //# sourceMappingURL=preCompletion.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"preCompletion.js","sourceRoot":"","sources":["../../src/middleware/preCompletion.ts"],"names":[],"mappings":"AAGA;;;;;;;;GAQG;AACH,MAAM,OAAO,uBAAuB;IACzB,IAAI,GAAG,eAAe,CAAC;IAExB,WAAW,GAAG,KAAK,CAAC;IAE5B,+DAA+D;IAC9C,YAAY,GAAG;QAC9B,YAAY;QACZ,UAAU;QACV,YAAY;QACZ,WAAW;QACX,gBAAgB;QAChB,sBAAsB;QACtB,iBAAiB;QACjB,iBAAiB;QACjB,eAAe;QACf,kBAAkB;KACnB,CAAC;IAEF,uEAAuE;IACtD,iBAAiB,GAAG,IAAI,GAAG,CAAC;QAC3C,eAAe;QACf,oBAAoB;QACpB,aAAa;QACb,eAAe;KAChB,CAAC,CAAC;IAEH,MAAM,CAAC,GAAoB;QACzB,gEAAgE;QAChE,IAAI,GAAG,CAAC,QAAQ,KAAK,MAAM,IAAI,OAAO,GAAG,CAAC,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACpE,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;gBACxC,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;oBACnC,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;oBACzB,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;QAED,gCAAgC;QAChC,IAAI,IAAI,CAAC,iBAAiB,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC7C,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;gBACtB,OAAO,CACL,wEAAwE;oBACxE,wFAAwF;oBACxF,2FAA2F,CAC5F,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,GAAG,CAAC;IACb,CAAC;IAED,KAAK,CAAC,GAAoB,EAAE,MAAkB;QAC5C,IAAI,GAAG,CAAC,QAAQ,KAAK,MAAM,IAAI,OAAO,GAAG,CAAC,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACpE,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;gBACxC,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;oBACnC,4DAA4D;oBAC5D,IAAI,MAAM,CAAC,QAAQ,EAAE,QAAQ,KAAK,CAAC,EAAE,CAAC;wBACpC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;oBAC1B,CAAC;yBAAM,CAAC;wBACN,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;oBAC3B,CAAC;oBACD,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,cAAc;QACZ,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;IAC3B,CAAC;CACF"}
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Middleware types for the tool execution pipeline.
3
+ *
4
+ * Each middleware can hook into both the "before" and "after" phases
5
+ * of a tool call. The pipeline chains them in order.
6
+ */
7
+ import { ToolResult } from "../tools/index.js";
8
+ /**
9
+ * Context object passed through the middleware chain for each tool call.
10
+ */
11
+ export interface ToolCallContext {
12
+ /** Name of the tool being called (e.g., "bash", "read_file"). */
13
+ toolName: string;
14
+ /** Arguments passed to the tool. */
15
+ args: Record<string, any>;
16
+ /** Unique ID of this tool call (from the LLM response). */
17
+ callId: string;
18
+ }
19
+ /**
20
+ * A middleware that can intercept tool calls before and after execution.
21
+ *
22
+ * - `before()`: Runs before the tool executes. Return the (possibly modified)
23
+ * context to continue, or a `string` to short-circuit with an error/warning.
24
+ * - `after()`: Runs after the tool executes. Can transform the result before
25
+ * it enters the conversation history.
26
+ */
27
+ export interface ToolMiddleware {
28
+ /** Human-readable name for logging and debugging. */
29
+ name: string;
30
+ /**
31
+ * Pre-execution hook.
32
+ * @returns ToolCallContext to mutate, a string to short-circuit, or void to pass through unmodified.
33
+ */
34
+ before?(ctx: ToolCallContext): Promise<ToolCallContext | string | void> | ToolCallContext | string | void;
35
+ /**
36
+ * Post-execution hook.
37
+ * @returns The transformed tool result object, or void to pass through unmodified.
38
+ */
39
+ after?(ctx: ToolCallContext, result: ToolResult): Promise<ToolResult | void> | ToolResult | void;
40
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Middleware types for the tool execution pipeline.
3
+ *
4
+ * Each middleware can hook into both the "before" and "after" phases
5
+ * of a tool call. The pipeline chains them in order.
6
+ */
7
+ export {};
8
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/middleware/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}