joonecli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/AGENTS.md +56 -0
  2. package/Handover.md +115 -0
  3. package/LICENSE +201 -0
  4. package/PROGRESS.md +160 -0
  5. package/README.md +114 -0
  6. package/dist/__tests__/bootstrap.test.d.ts +1 -0
  7. package/dist/__tests__/bootstrap.test.js +76 -0
  8. package/dist/__tests__/bootstrap.test.js.map +1 -0
  9. package/dist/__tests__/config.test.d.ts +1 -0
  10. package/dist/__tests__/config.test.js +84 -0
  11. package/dist/__tests__/config.test.js.map +1 -0
  12. package/dist/__tests__/m55.test.d.ts +1 -0
  13. package/dist/__tests__/m55.test.js +160 -0
  14. package/dist/__tests__/m55.test.js.map +1 -0
  15. package/dist/__tests__/middleware.test.d.ts +1 -0
  16. package/dist/__tests__/middleware.test.js +169 -0
  17. package/dist/__tests__/middleware.test.js.map +1 -0
  18. package/dist/__tests__/modelFactory.test.d.ts +1 -0
  19. package/dist/__tests__/modelFactory.test.js +50 -0
  20. package/dist/__tests__/modelFactory.test.js.map +1 -0
  21. package/dist/__tests__/optimizations.test.d.ts +1 -0
  22. package/dist/__tests__/optimizations.test.js +136 -0
  23. package/dist/__tests__/optimizations.test.js.map +1 -0
  24. package/dist/__tests__/promptBuilder.test.d.ts +1 -0
  25. package/dist/__tests__/promptBuilder.test.js +108 -0
  26. package/dist/__tests__/promptBuilder.test.js.map +1 -0
  27. package/dist/__tests__/sandbox.test.d.ts +1 -0
  28. package/dist/__tests__/sandbox.test.js +78 -0
  29. package/dist/__tests__/sandbox.test.js.map +1 -0
  30. package/dist/__tests__/security.test.d.ts +1 -0
  31. package/dist/__tests__/security.test.js +86 -0
  32. package/dist/__tests__/security.test.js.map +1 -0
  33. package/dist/__tests__/streaming.test.d.ts +1 -0
  34. package/dist/__tests__/streaming.test.js +71 -0
  35. package/dist/__tests__/streaming.test.js.map +1 -0
  36. package/dist/__tests__/toolRouter.test.d.ts +1 -0
  37. package/dist/__tests__/toolRouter.test.js +37 -0
  38. package/dist/__tests__/toolRouter.test.js.map +1 -0
  39. package/dist/__tests__/tools.test.d.ts +1 -0
  40. package/dist/__tests__/tools.test.js +112 -0
  41. package/dist/__tests__/tools.test.js.map +1 -0
  42. package/dist/__tests__/tracing.test.d.ts +1 -0
  43. package/dist/__tests__/tracing.test.js +147 -0
  44. package/dist/__tests__/tracing.test.js.map +1 -0
  45. package/dist/cli/config.d.ts +49 -0
  46. package/dist/cli/config.js +86 -0
  47. package/dist/cli/config.js.map +1 -0
  48. package/dist/cli/index.d.ts +2 -0
  49. package/dist/cli/index.js +625 -0
  50. package/dist/cli/index.js.map +1 -0
  51. package/dist/cli/modelFactory.d.ts +9 -0
  52. package/dist/cli/modelFactory.js +154 -0
  53. package/dist/cli/modelFactory.js.map +1 -0
  54. package/dist/cli/providers.d.ts +18 -0
  55. package/dist/cli/providers.js +94 -0
  56. package/dist/cli/providers.js.map +1 -0
  57. package/dist/core/agentLoop.d.ts +43 -0
  58. package/dist/core/agentLoop.js +245 -0
  59. package/dist/core/agentLoop.js.map +1 -0
  60. package/dist/core/errors.d.ts +62 -0
  61. package/dist/core/errors.js +139 -0
  62. package/dist/core/errors.js.map +1 -0
  63. package/dist/core/promptBuilder.d.ts +49 -0
  64. package/dist/core/promptBuilder.js +84 -0
  65. package/dist/core/promptBuilder.js.map +1 -0
  66. package/dist/core/reasoningRouter.d.ts +62 -0
  67. package/dist/core/reasoningRouter.js +102 -0
  68. package/dist/core/reasoningRouter.js.map +1 -0
  69. package/dist/core/retry.d.ts +25 -0
  70. package/dist/core/retry.js +49 -0
  71. package/dist/core/retry.js.map +1 -0
  72. package/dist/core/sessionResumer.d.ts +17 -0
  73. package/dist/core/sessionResumer.js +78 -0
  74. package/dist/core/sessionResumer.js.map +1 -0
  75. package/dist/core/sessionStore.d.ts +45 -0
  76. package/dist/core/sessionStore.js +167 -0
  77. package/dist/core/sessionStore.js.map +1 -0
  78. package/dist/core/tokenCounter.d.ts +17 -0
  79. package/dist/core/tokenCounter.js +54 -0
  80. package/dist/core/tokenCounter.js.map +1 -0
  81. package/dist/evals/dataset.d.ts +4 -0
  82. package/dist/evals/dataset.js +61 -0
  83. package/dist/evals/dataset.js.map +1 -0
  84. package/dist/evals/evaluator.d.ts +21 -0
  85. package/dist/evals/evaluator.js +68 -0
  86. package/dist/evals/evaluator.js.map +1 -0
  87. package/dist/hitl/bridge.d.ts +65 -0
  88. package/dist/hitl/bridge.js +120 -0
  89. package/dist/hitl/bridge.js.map +1 -0
  90. package/dist/middleware/commandSanitizer.d.ts +18 -0
  91. package/dist/middleware/commandSanitizer.js +50 -0
  92. package/dist/middleware/commandSanitizer.js.map +1 -0
  93. package/dist/middleware/loopDetection.d.ts +28 -0
  94. package/dist/middleware/loopDetection.js +49 -0
  95. package/dist/middleware/loopDetection.js.map +1 -0
  96. package/dist/middleware/permission.d.ts +17 -0
  97. package/dist/middleware/permission.js +59 -0
  98. package/dist/middleware/permission.js.map +1 -0
  99. package/dist/middleware/pipeline.d.ts +31 -0
  100. package/dist/middleware/pipeline.js +62 -0
  101. package/dist/middleware/pipeline.js.map +1 -0
  102. package/dist/middleware/preCompletion.d.ts +29 -0
  103. package/dist/middleware/preCompletion.js +82 -0
  104. package/dist/middleware/preCompletion.js.map +1 -0
  105. package/dist/middleware/types.d.ts +40 -0
  106. package/dist/middleware/types.js +8 -0
  107. package/dist/middleware/types.js.map +1 -0
  108. package/dist/sandbox/bootstrap.d.ts +38 -0
  109. package/dist/sandbox/bootstrap.js +107 -0
  110. package/dist/sandbox/bootstrap.js.map +1 -0
  111. package/dist/sandbox/manager.d.ts +72 -0
  112. package/dist/sandbox/manager.js +180 -0
  113. package/dist/sandbox/manager.js.map +1 -0
  114. package/dist/sandbox/sync.d.ts +55 -0
  115. package/dist/sandbox/sync.js +135 -0
  116. package/dist/sandbox/sync.js.map +1 -0
  117. package/dist/skills/loader.d.ts +55 -0
  118. package/dist/skills/loader.js +132 -0
  119. package/dist/skills/loader.js.map +1 -0
  120. package/dist/skills/tools.d.ts +5 -0
  121. package/dist/skills/tools.js +78 -0
  122. package/dist/skills/tools.js.map +1 -0
  123. package/dist/skills/types.d.ts +13 -0
  124. package/dist/skills/types.js +2 -0
  125. package/dist/skills/types.js.map +1 -0
  126. package/dist/test_cache.d.ts +1 -0
  127. package/dist/test_cache.js +55 -0
  128. package/dist/test_cache.js.map +1 -0
  129. package/dist/test_google.js +93 -0
  130. package/dist/tools/askUser.d.ts +10 -0
  131. package/dist/tools/askUser.js +42 -0
  132. package/dist/tools/askUser.js.map +1 -0
  133. package/dist/tools/browser.d.ts +19 -0
  134. package/dist/tools/browser.js +111 -0
  135. package/dist/tools/browser.js.map +1 -0
  136. package/dist/tools/index.d.ts +27 -0
  137. package/dist/tools/index.js +184 -0
  138. package/dist/tools/index.js.map +1 -0
  139. package/dist/tools/registry.d.ts +31 -0
  140. package/dist/tools/registry.js +168 -0
  141. package/dist/tools/registry.js.map +1 -0
  142. package/dist/tools/router.d.ts +34 -0
  143. package/dist/tools/router.js +73 -0
  144. package/dist/tools/router.js.map +1 -0
  145. package/dist/tools/security.d.ts +28 -0
  146. package/dist/tools/security.js +183 -0
  147. package/dist/tools/security.js.map +1 -0
  148. package/dist/tools/webSearch.d.ts +6 -0
  149. package/dist/tools/webSearch.js +120 -0
  150. package/dist/tools/webSearch.js.map +1 -0
  151. package/dist/tracing/analyzer.d.ts +58 -0
  152. package/dist/tracing/analyzer.js +190 -0
  153. package/dist/tracing/analyzer.js.map +1 -0
  154. package/dist/tracing/langsmith.d.ts +38 -0
  155. package/dist/tracing/langsmith.js +50 -0
  156. package/dist/tracing/langsmith.js.map +1 -0
  157. package/dist/tracing/sessionTracer.d.ts +73 -0
  158. package/dist/tracing/sessionTracer.js +157 -0
  159. package/dist/tracing/sessionTracer.js.map +1 -0
  160. package/dist/tracing/types.d.ts +46 -0
  161. package/dist/tracing/types.js +5 -0
  162. package/dist/tracing/types.js.map +1 -0
  163. package/dist/ui/App.d.ts +24 -0
  164. package/dist/ui/App.js +172 -0
  165. package/dist/ui/App.js.map +1 -0
  166. package/dist/ui/components/HITLPrompt.d.ts +15 -0
  167. package/dist/ui/components/HITLPrompt.js +35 -0
  168. package/dist/ui/components/HITLPrompt.js.map +1 -0
  169. package/dist/ui/components/Header.d.ts +8 -0
  170. package/dist/ui/components/Header.js +6 -0
  171. package/dist/ui/components/Header.js.map +1 -0
  172. package/dist/ui/components/MessageBubble.d.ts +13 -0
  173. package/dist/ui/components/MessageBubble.js +17 -0
  174. package/dist/ui/components/MessageBubble.js.map +1 -0
  175. package/dist/ui/components/StatusBar.d.ts +21 -0
  176. package/dist/ui/components/StatusBar.js +34 -0
  177. package/dist/ui/components/StatusBar.js.map +1 -0
  178. package/dist/ui/components/StreamingText.d.ts +13 -0
  179. package/dist/ui/components/StreamingText.js +24 -0
  180. package/dist/ui/components/StreamingText.js.map +1 -0
  181. package/dist/ui/components/ToolCallPanel.d.ts +15 -0
  182. package/dist/ui/components/ToolCallPanel.js +18 -0
  183. package/dist/ui/components/ToolCallPanel.js.map +1 -0
  184. package/docs/01_insights_and_patterns.md +27 -0
  185. package/docs/02_edge_cases_and_mitigations.md +143 -0
  186. package/docs/03_initial_implementation_plan.md +66 -0
  187. package/docs/04_tech_stack_proposal.md +20 -0
  188. package/docs/05_prd.md +87 -0
  189. package/docs/06_user_stories.md +72 -0
  190. package/docs/07_system_architecture.md +138 -0
  191. package/docs/08_roadmap.md +200 -0
  192. package/e2b/Dockerfile +26 -0
  193. package/package.json +57 -0
  194. package/src/__tests__/bootstrap.test.ts +111 -0
  195. package/src/__tests__/config.test.ts +97 -0
  196. package/src/__tests__/m55.test.ts +238 -0
  197. package/src/__tests__/middleware.test.ts +219 -0
  198. package/src/__tests__/modelFactory.test.ts +63 -0
  199. package/src/__tests__/optimizations.test.ts +201 -0
  200. package/src/__tests__/promptBuilder.test.ts +141 -0
  201. package/src/__tests__/sandbox.test.ts +102 -0
  202. package/src/__tests__/security.test.ts +122 -0
  203. package/src/__tests__/streaming.test.ts +82 -0
  204. package/src/__tests__/toolRouter.test.ts +52 -0
  205. package/src/__tests__/tools.test.ts +146 -0
  206. package/src/__tests__/tracing.test.ts +196 -0
  207. package/src/agents/agentRegistry.ts +69 -0
  208. package/src/agents/agentSpec.ts +67 -0
  209. package/src/agents/builtinAgents.ts +142 -0
  210. package/src/cli/config.ts +124 -0
  211. package/src/cli/index.ts +730 -0
  212. package/src/cli/modelFactory.ts +174 -0
  213. package/src/cli/providers.ts +107 -0
  214. package/src/commands/builtinCommands.ts +293 -0
  215. package/src/commands/commandRegistry.ts +194 -0
  216. package/src/core/agentLoop.d.ts.map +1 -0
  217. package/src/core/agentLoop.ts +312 -0
  218. package/src/core/autoSave.ts +95 -0
  219. package/src/core/compactor.ts +252 -0
  220. package/src/core/contextGuard.ts +129 -0
  221. package/src/core/errors.ts +202 -0
  222. package/src/core/promptBuilder.d.ts.map +1 -0
  223. package/src/core/promptBuilder.ts +139 -0
  224. package/src/core/reasoningRouter.ts +121 -0
  225. package/src/core/retry.ts +75 -0
  226. package/src/core/sessionResumer.ts +90 -0
  227. package/src/core/sessionStore.ts +215 -0
  228. package/src/core/subAgent.ts +339 -0
  229. package/src/core/tokenCounter.ts +64 -0
  230. package/src/evals/dataset.ts +67 -0
  231. package/src/evals/evaluator.ts +81 -0
  232. package/src/hitl/bridge.ts +160 -0
  233. package/src/middleware/commandSanitizer.ts +60 -0
  234. package/src/middleware/loopDetection.ts +63 -0
  235. package/src/middleware/permission.ts +72 -0
  236. package/src/middleware/pipeline.ts +75 -0
  237. package/src/middleware/preCompletion.ts +94 -0
  238. package/src/middleware/types.ts +45 -0
  239. package/src/sandbox/bootstrap.ts +121 -0
  240. package/src/sandbox/manager.ts +239 -0
  241. package/src/sandbox/sync.ts +157 -0
  242. package/src/skills/loader.ts +143 -0
  243. package/src/skills/tools.ts +99 -0
  244. package/src/skills/types.ts +13 -0
  245. package/src/test_cache.ts +72 -0
  246. package/src/test_google.js +40 -0
  247. package/src/test_google.ts +40 -0
  248. package/src/tools/askUser.ts +47 -0
  249. package/src/tools/browser.ts +137 -0
  250. package/src/tools/index.d.ts.map +1 -0
  251. package/src/tools/index.ts +237 -0
  252. package/src/tools/registry.ts +198 -0
  253. package/src/tools/router.ts +78 -0
  254. package/src/tools/security.ts +220 -0
  255. package/src/tools/spawnAgent.ts +158 -0
  256. package/src/tools/webSearch.ts +142 -0
  257. package/src/tracing/analyzer.ts +265 -0
  258. package/src/tracing/langsmith.ts +63 -0
  259. package/src/tracing/sessionTracer.ts +202 -0
  260. package/src/tracing/types.ts +49 -0
  261. package/src/types/valyu.d.ts +37 -0
  262. package/src/ui/App.tsx +404 -0
  263. package/src/ui/components/HITLPrompt.tsx +119 -0
  264. package/src/ui/components/Header.tsx +51 -0
  265. package/src/ui/components/MessageBubble.tsx +46 -0
  266. package/src/ui/components/StatusBar.tsx +138 -0
  267. package/src/ui/components/StreamingText.tsx +48 -0
  268. package/src/ui/components/ToolCallPanel.tsx +80 -0
  269. package/tests/commands/commands.test.ts +356 -0
  270. package/tests/core/compactor.test.ts +217 -0
  271. package/tests/core/retryAndErrors.test.ts +164 -0
  272. package/tests/core/sessionResumer.test.ts +95 -0
  273. package/tests/core/sessionStore.test.ts +84 -0
  274. package/tests/core/stability.test.ts +165 -0
  275. package/tests/core/subAgent.test.ts +238 -0
  276. package/tests/hitl/hitlBridge.test.ts +115 -0
  277. package/tsconfig.json +16 -0
  278. package/vitest.config.ts +10 -0
  279. package/vitest.out +48 -0
@@ -0,0 +1,52 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+ import { ToolRouter, ToolTarget } from "../tools/router.js";
3
+
4
+ describe("Tool Router", () => {
5
+ let router: ToolRouter;
6
+
7
+ beforeEach(() => {
8
+ router = new ToolRouter();
9
+ });
10
+
11
+ // ─── Test #20: Routes write_file to host ───
12
+
13
+ it("routes write_file to the host", () => {
14
+ expect(router.getTarget("write_file")).toBe(ToolTarget.HOST);
15
+ });
16
+
17
+ // ─── Test #21: Routes read_file to host ───
18
+
19
+ it("routes read_file to the host", () => {
20
+ expect(router.getTarget("read_file")).toBe(ToolTarget.HOST);
21
+ });
22
+
23
+ // ─── Test #22: Routes bash to sandbox ───
24
+
25
+ it("routes bash to the sandbox", () => {
26
+ expect(router.getTarget("bash")).toBe(ToolTarget.SANDBOX);
27
+ });
28
+
29
+ // ─── Test #23: Routes run_tests to sandbox ───
30
+
31
+ it("routes run_tests to the sandbox", () => {
32
+ expect(router.getTarget("run_tests")).toBe(ToolTarget.SANDBOX);
33
+ });
34
+
35
+ // ─── Test #24: Routes install_deps to sandbox ───
36
+
37
+ it("routes install_deps to the sandbox", () => {
38
+ expect(router.getTarget("install_deps")).toBe(ToolTarget.SANDBOX);
39
+ });
40
+
41
+ // ─── Test #25: Routes search_tools to host ───
42
+
43
+ it("routes search_tools to the host", () => {
44
+ expect(router.getTarget("search_tools")).toBe(ToolTarget.HOST);
45
+ });
46
+
47
+ // ─── Test #26: Unknown tools default to sandbox (safe) ───
48
+
49
+ it("defaults unknown tools to sandbox for safety", () => {
50
+ expect(router.getTarget("unknown_tool")).toBe(ToolTarget.SANDBOX);
51
+ });
52
+ });
@@ -0,0 +1,146 @@
1
+ import { describe, it, expect, beforeEach, afterEach } from "vitest";
2
+ import * as fs from "node:fs";
3
+ import * as path from "node:path";
4
+ import * as os from "node:os";
5
+ import { ReadFileTool, WriteFileTool } from "../tools/index.js";
6
+
7
+ describe("ReadFileTool", () => {
8
+ let tmpDir: string;
9
+
10
+ beforeEach(() => {
11
+ tmpDir = fs.mkdtempSync(path.join(process.cwd(), ".joone-tools-test-"));
12
+ });
13
+
14
+ afterEach(() => {
15
+ fs.rmSync(tmpDir, { recursive: true, force: true });
16
+ });
17
+
18
+ // ─── Test #27: Reads a normal file ───
19
+
20
+ it("reads a small file and returns its content", async () => {
21
+ const filePath = path.join(tmpDir, "hello.txt");
22
+ fs.writeFileSync(filePath, "Hello, world!", "utf-8");
23
+
24
+ const result = await ReadFileTool.execute({ path: filePath });
25
+
26
+ expect(result.content).toBe("Hello, world!");
27
+ });
28
+
29
+ // ─── Test #28: Returns error for non-existent file ───
30
+
31
+ it("returns an error message for a non-existent file", async () => {
32
+ const result = await ReadFileTool.execute({
33
+ path: path.join(tmpDir, "nope.txt"),
34
+ });
35
+
36
+ expect(result.content).toMatch(/not found/i);
37
+ });
38
+
39
+ // ─── Test #29: File size guardrail rejects files over 512KB ───
40
+
41
+ it("rejects files larger than 512KB with a descriptive error", async () => {
42
+ const filePath = path.join(tmpDir, "big.txt");
43
+ // Create a 600KB file
44
+ const bigContent = "x".repeat(600 * 1024);
45
+ fs.writeFileSync(filePath, bigContent, "utf-8");
46
+
47
+ const result = await ReadFileTool.execute({ path: filePath });
48
+
49
+ expect(result.content).toMatch(/too large/i);
50
+ expect(result.content).toMatch(/512/);
51
+ });
52
+
53
+ // ─── Test #30: Line range slicing works ───
54
+
55
+ it("returns only the requested line range", async () => {
56
+ const filePath = path.join(tmpDir, "lines.txt");
57
+ const lines = Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`);
58
+ fs.writeFileSync(filePath, lines.join("\n"), "utf-8");
59
+
60
+ const result = await ReadFileTool.execute({
61
+ path: filePath,
62
+ startLine: 5,
63
+ endLine: 7,
64
+ });
65
+
66
+ expect(result.content).toContain("5: Line 5");
67
+ expect(result.content).toContain("6: Line 6");
68
+ expect(result.content).toContain("7: Line 7");
69
+ expect(result.content).not.toContain("4: Line 4");
70
+ expect(result.content).not.toContain("8: Line 8");
71
+ });
72
+
73
+ // ─── Test #31: Line count guardrail truncates long files ───
74
+
75
+ it("truncates files with more than 2000 lines", async () => {
76
+ const filePath = path.join(tmpDir, "long.txt");
77
+ // Create a file with 2500 short lines (under 512KB)
78
+ const lines = Array.from({ length: 2500 }, (_, i) => `L${i + 1}`);
79
+ fs.writeFileSync(filePath, lines.join("\n"), "utf-8");
80
+
81
+ const result = await ReadFileTool.execute({ path: filePath });
82
+
83
+ expect(result.content).toMatch(/truncated at 2000 lines/i);
84
+ expect(result.content).toContain("1: L1");
85
+ expect(result.content).toContain("2000: L2000");
86
+ expect(result.content).not.toContain("2001: L2001");
87
+ });
88
+
89
+ // ─── Test #X: Security Guardrail Blocks Outside Files ───
90
+
91
+ it("blocks reading files outside the project workspace", async () => {
92
+ // Create a file in the OS tmp directory (guaranteed outside project workspace)
93
+ const outsideDir = fs.mkdtempSync(path.join(os.tmpdir(), "joone-outside-"));
94
+ const filePath = path.join(outsideDir, "secret.txt");
95
+ fs.writeFileSync(filePath, "secret token", "utf-8");
96
+
97
+ try {
98
+ const result = await ReadFileTool.execute({ path: filePath });
99
+ expect(result.isError).toBe(true);
100
+ expect(result.content).toMatch(/Security Error: Access Denied/i);
101
+ expect(result.content).toMatch(/outside the current project workspace/i);
102
+ } finally {
103
+ fs.rmSync(outsideDir, { recursive: true, force: true });
104
+ }
105
+ });
106
+ });
107
+
108
+ describe("WriteFileTool", () => {
109
+ let tmpDir: string;
110
+
111
+ beforeEach(() => {
112
+ tmpDir = fs.mkdtempSync(path.join(process.cwd(), ".joone-write-test-"));
113
+ });
114
+
115
+ afterEach(() => {
116
+ fs.rmSync(tmpDir, { recursive: true, force: true });
117
+ });
118
+
119
+ // ─── Test #32: Writes a file to disk ───
120
+
121
+ it("writes content to a file and confirms", async () => {
122
+ const filePath = path.join(tmpDir, "output.ts");
123
+
124
+ const result = await WriteFileTool.execute({
125
+ path: filePath,
126
+ content: "const x = 42;",
127
+ });
128
+
129
+ expect(result.content).toMatch(/file written/i);
130
+ expect(fs.readFileSync(filePath, "utf-8")).toBe("const x = 42;");
131
+ });
132
+
133
+ // ─── Test #33: Creates parent directories if needed ───
134
+
135
+ it("creates parent directories if they do not exist", async () => {
136
+ const filePath = path.join(tmpDir, "nested", "deep", "file.ts");
137
+
138
+ const result = await WriteFileTool.execute({
139
+ path: filePath,
140
+ content: "export {}",
141
+ });
142
+
143
+ expect(result.content).toMatch(/file written/i);
144
+ expect(fs.existsSync(filePath)).toBe(true);
145
+ });
146
+ });
@@ -0,0 +1,196 @@
1
+ import { describe, it, expect, beforeEach, afterEach } from "vitest";
2
+ import * as fs from "node:fs";
3
+ import * as path from "node:path";
4
+ import * as os from "node:os";
5
+ import { SessionTracer } from "../tracing/sessionTracer.js";
6
+ import {
7
+ enableLangSmith,
8
+ disableLangSmith,
9
+ isLangSmithEnabled,
10
+ } from "../tracing/langsmith.js";
11
+ import { TraceAnalyzer } from "../tracing/analyzer.js";
12
+ import type { SessionTrace } from "../tracing/types.js";
13
+
14
+ // ═══════════════════════════════════════════════════════════════════════════════
15
+ // 6a: SessionTracer
16
+ // ═══════════════════════════════════════════════════════════════════════════════
17
+
18
+ describe("SessionTracer", () => {
19
+ // ─── Test #83: Records LLM calls and computes totals ───
20
+
21
+ it("records LLM calls and computes token totals", () => {
22
+ const tracer = new SessionTracer("test-session-1");
23
+
24
+ tracer.recordLLMCall({ promptTokens: 500, completionTokens: 100, cached: false, duration: 800 });
25
+ tracer.recordLLMCall({ promptTokens: 400, completionTokens: 150, cached: true, duration: 600 });
26
+
27
+ const summary = tracer.getSummary();
28
+
29
+ expect(summary.promptTokens).toBe(900);
30
+ expect(summary.completionTokens).toBe(250);
31
+ expect(summary.totalTokens).toBe(1150);
32
+ expect(summary.turnCount).toBe(2);
33
+ });
34
+
35
+ // ─── Test #84: Records tool calls and counts them ───
36
+
37
+ it("records tool calls and counts them", () => {
38
+ const tracer = new SessionTracer("test-session-2");
39
+
40
+ tracer.recordToolCall({ name: "bash", args: { command: "ls" }, duration: 50, success: true });
41
+ tracer.recordToolCall({ name: "write_file", args: { path: "a.ts" }, duration: 30, success: true });
42
+ tracer.recordToolCall({ name: "bash", args: { command: "npm test" }, duration: 200, success: false });
43
+
44
+ const summary = tracer.getSummary();
45
+
46
+ expect(summary.toolCallCount).toBe(3);
47
+ });
48
+
49
+ // ─── Test #85: Computes cache hit rate correctly ───
50
+
51
+ it("computes cache hit rate correctly", () => {
52
+ const tracer = new SessionTracer("test-session-3");
53
+
54
+ // 3 calls: 2 cached, 1 not
55
+ tracer.recordLLMCall({ promptTokens: 100, completionTokens: 50, cached: true, duration: 100 });
56
+ tracer.recordLLMCall({ promptTokens: 100, completionTokens: 50, cached: true, duration: 100 });
57
+ tracer.recordLLMCall({ promptTokens: 100, completionTokens: 50, cached: false, duration: 100 });
58
+
59
+ const summary = tracer.getSummary();
60
+
61
+ // 200 cached out of 300 total prompt tokens = 66.7%
62
+ expect(summary.cacheHitRate).toBeCloseTo(0.667, 2);
63
+ });
64
+
65
+ // ─── Test #86: export() returns valid SessionTrace ───
66
+
67
+ it("export() returns a valid SessionTrace", () => {
68
+ const tracer = new SessionTracer("export-test");
69
+
70
+ tracer.recordLLMCall({ promptTokens: 100, completionTokens: 50, cached: true, duration: 200 });
71
+ tracer.recordError({ message: "Timeout", tool: "bash" });
72
+
73
+ const trace = tracer.export();
74
+
75
+ expect(trace.sessionId).toBe("export-test");
76
+ expect(trace.startedAt).toBeGreaterThan(0);
77
+ expect(trace.endedAt).toBeGreaterThanOrEqual(trace.startedAt);
78
+ expect(trace.events).toHaveLength(2);
79
+ expect(trace.summary.turnCount).toBe(1);
80
+ expect(trace.summary.errorCount).toBe(1);
81
+ });
82
+ });
83
+
84
+ // ═══════════════════════════════════════════════════════════════════════════════
85
+ // 6b: LangSmith Integration
86
+ // ═══════════════════════════════════════════════════════════════════════════════
87
+
88
+ describe("LangSmith Integration", () => {
89
+ afterEach(() => {
90
+ disableLangSmith();
91
+ });
92
+
93
+ // ─── Test #87: enableLangSmith sets correct env vars ───
94
+
95
+ it("sets the correct environment variables", () => {
96
+ enableLangSmith({ apiKey: "test-key-123", project: "my-project" });
97
+
98
+ expect(process.env.LANGCHAIN_TRACING_V2).toBe("true");
99
+ expect(process.env.LANGCHAIN_API_KEY).toBe("test-key-123");
100
+ expect(process.env.LANGCHAIN_PROJECT).toBe("my-project");
101
+ expect(isLangSmithEnabled()).toBe(true);
102
+ });
103
+
104
+ // ─── Test #88: disableLangSmith clears env vars ───
105
+
106
+ it("disableLangSmith clears the environment variables", () => {
107
+ enableLangSmith({ apiKey: "test-key" });
108
+ disableLangSmith();
109
+
110
+ expect(process.env.LANGCHAIN_TRACING_V2).toBeUndefined();
111
+ expect(isLangSmithEnabled()).toBe(false);
112
+ });
113
+ });
114
+
115
+ // ═══════════════════════════════════════════════════════════════════════════════
116
+ // 6c: TraceAnalyzer
117
+ // ═══════════════════════════════════════════════════════════════════════════════
118
+
119
+ describe("TraceAnalyzer", () => {
120
+ const createTrace = (overrides?: Partial<SessionTrace>): SessionTrace => ({
121
+ sessionId: "test",
122
+ startedAt: Date.now() - 10000,
123
+ endedAt: Date.now(),
124
+ events: [],
125
+ summary: {
126
+ totalTokens: 1000,
127
+ promptTokens: 700,
128
+ completionTokens: 300,
129
+ totalCost: 0.006,
130
+ cacheHitRate: 0.8,
131
+ toolCallCount: 5,
132
+ errorCount: 0,
133
+ totalDuration: 10000,
134
+ turnCount: 5,
135
+ },
136
+ ...overrides,
137
+ });
138
+ // ─── Test #89: Detects loop patterns ───
139
+
140
+ it("detects doom-loop patterns in tool calls", () => {
141
+ const trace = createTrace({
142
+ events: [
143
+ { type: "tool_call", timestamp: 1, data: { name: "bash", args: { command: "ls" } } },
144
+ { type: "tool_call", timestamp: 2, data: { name: "bash", args: { command: "ls" } } },
145
+ { type: "tool_call", timestamp: 3, data: { name: "bash", args: { command: "ls" } } },
146
+ ],
147
+ });
148
+
149
+ const analyzer = new TraceAnalyzer(trace);
150
+ const report = analyzer.analyze();
151
+
152
+ const loopIssues = report.issues.filter((i) => i.category === "loop");
153
+ expect(loopIssues.length).toBeGreaterThan(0);
154
+ expect(loopIssues[0].severity).toBe("critical");
155
+ });
156
+
157
+ // ─── Test #90: Detects cost hotspots ───
158
+
159
+ it("flags turns consuming >20% of total tokens", () => {
160
+ const trace = createTrace({
161
+ summary: {
162
+ ...createTrace().summary,
163
+ totalTokens: 1000,
164
+ },
165
+ events: [
166
+ { type: "llm_call", timestamp: 1, data: { promptTokens: 300, completionTokens: 100, cached: false } },
167
+ { type: "llm_call", timestamp: 2, data: { promptTokens: 100, completionTokens: 50, cached: true } },
168
+ ],
169
+ });
170
+
171
+ const analyzer = new TraceAnalyzer(trace);
172
+ const report = analyzer.analyze();
173
+
174
+ const costIssues = report.issues.filter((i) => i.category === "cost");
175
+ expect(costIssues.length).toBeGreaterThan(0);
176
+ });
177
+
178
+ // ─── Test #91: Warns on low cache hit rate ───
179
+
180
+ it("warns when cache hit rate is below 70%", () => {
181
+ const trace = createTrace({
182
+ summary: {
183
+ ...createTrace().summary,
184
+ cacheHitRate: 0.5,
185
+ turnCount: 5,
186
+ },
187
+ });
188
+
189
+ const analyzer = new TraceAnalyzer(trace);
190
+ const report = analyzer.analyze();
191
+
192
+ const cacheIssues = report.issues.filter((i) => i.category === "cache");
193
+ expect(cacheIssues.length).toBe(1);
194
+ expect(cacheIssues[0].message).toContain("50.0%");
195
+ });
196
+ });
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Agent Registry
3
+ *
4
+ * Central registry for named sub-agents. The registry enables:
5
+ * - Decoupled agent development (add agents without touching the main loop)
6
+ * - Prompt injection (registry summary included in the main agent's system prompt)
7
+ * - Lookup by name for the spawn_agent tool
8
+ */
9
+
10
+ import { AgentSpec } from "./agentSpec.js";
11
+
12
+ export class AgentRegistry {
13
+ private agents: Map<string, AgentSpec> = new Map();
14
+
15
+ /**
16
+ * Register a new agent spec. Overwrites if name already exists.
17
+ */
18
+ register(spec: AgentSpec): void {
19
+ this.agents.set(spec.name, spec);
20
+ }
21
+
22
+ /**
23
+ * Look up an agent by name.
24
+ */
25
+ get(name: string): AgentSpec | undefined {
26
+ return this.agents.get(name);
27
+ }
28
+
29
+ /**
30
+ * Returns all registered agent specs.
31
+ */
32
+ getAll(): AgentSpec[] {
33
+ return Array.from(this.agents.values());
34
+ }
35
+
36
+ /**
37
+ * Returns all registered agent names.
38
+ */
39
+ getNames(): string[] {
40
+ return Array.from(this.agents.keys());
41
+ }
42
+
43
+ /**
44
+ * Returns true if an agent with the given name exists.
45
+ */
46
+ has(name: string): boolean {
47
+ return this.agents.has(name);
48
+ }
49
+
50
+ /**
51
+ * Generates a summary of all available agents, formatted for injection
52
+ * into the main agent's system prompt.
53
+ */
54
+ getSummary(): string {
55
+ if (this.agents.size === 0) {
56
+ return "No sub-agents are currently registered.";
57
+ }
58
+
59
+ const lines = ["Available sub-agents (use spawn_agent tool to invoke):\n"];
60
+
61
+ for (const spec of this.agents.values()) {
62
+ const tools = spec.tools ? ` [tools: ${spec.tools.join(", ")}]` : " [all tools]";
63
+ const turns = spec.maxTurns ?? 10;
64
+ lines.push(` • ${spec.name}: ${spec.description}${tools} (max ${turns} turns)`);
65
+ }
66
+
67
+ return lines.join("\n");
68
+ }
69
+ }
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Agent Specification
3
+ *
4
+ * Defines the shape of a sub-agent: its identity, capabilities, constraints,
5
+ * and tools. This enables decoupled agent development — new agents can be
6
+ * added to the registry without modifying the main agent or harness.
7
+ */
8
+
9
+ /**
10
+ * Describes a named sub-agent with a purpose-tuned configuration.
11
+ */
12
+ export interface AgentSpec {
13
+ /** Unique name (e.g., "script_runner", "code_reviewer"). */
14
+ name: string;
15
+
16
+ /** Human-readable description included in the main agent's prompt. */
17
+ description: string;
18
+
19
+ /** Dedicated system prompt for this sub-agent. */
20
+ systemPrompt: string;
21
+
22
+ /** Restrict to specific tool names. If omitted, all main-agent tools are available. */
23
+ tools?: string[];
24
+
25
+ /** Maximum turns before the sub-agent is forcibly stopped (doom-loop protection). Default: 10. */
26
+ maxTurns?: number;
27
+
28
+ /** Override model for this agent (default: FAST_MODEL_DEFAULTS from same provider). */
29
+ model?: string;
30
+
31
+ /** Permission behavior for this agent. */
32
+ permissionMode?: "auto" | "ask_all";
33
+ }
34
+
35
+ /**
36
+ * Structured result returned by a sub-agent after completing (or failing) a task.
37
+ * Only this result is injected into the main agent's history — the sub-agent's
38
+ * full conversation is discarded to save context.
39
+ */
40
+ export interface SubAgentResult {
41
+ /** The agent name from AgentSpec. */
42
+ agentName: string;
43
+
44
+ /** The original task description. */
45
+ taskDescription: string;
46
+
47
+ /** Outcome status. */
48
+ outcome: "success" | "failure" | "partial";
49
+
50
+ /** The final text output from the sub-agent. */
51
+ result: string;
52
+
53
+ /** Files created, modified, or deleted during the sub-task. */
54
+ filesModified: string[];
55
+
56
+ /** Total tool calls executed. */
57
+ toolCallCount: number;
58
+
59
+ /** Approximate token usage. */
60
+ tokenUsage: { prompt: number; completion: number };
61
+
62
+ /** Wall-clock duration in milliseconds. */
63
+ duration: number;
64
+
65
+ /** Number of turns the sub-agent ran. */
66
+ turnsUsed: number;
67
+ }
@@ -0,0 +1,142 @@
1
+ /**
2
+ * Built-in Agent Specs
3
+ *
4
+ * Pre-configured sub-agents for common coding tasks. Each agent has a
5
+ * purpose-tuned system prompt and restricted tool access. This enables
6
+ * decoupled agent development — new agents are added here without
7
+ * touching the main agent or harness.
8
+ */
9
+
10
+ import { AgentSpec } from "./agentSpec.js";
11
+ import { AgentRegistry } from "./agentRegistry.js";
12
+
13
+ // ─── Script Runner ──────────────────────────────────────────────────────────────
14
+
15
+ export const ScriptRunnerAgent: AgentSpec = {
16
+ name: "script_runner",
17
+ description: "Execute and test scripts, return stdout/stderr and exit codes",
18
+ systemPrompt: `You are a script execution agent. Your task is to run scripts and commands, capturing their output.
19
+
20
+ Rules:
21
+ - Run the commands/scripts as specified in the task
22
+ - Capture ALL stdout and stderr output
23
+ - Report the exit code
24
+ - If the script fails, analyze the error and report the likely cause
25
+ - Do NOT modify any files unless explicitly asked
26
+ - Summarize the results clearly at the end`,
27
+ tools: ["bash", "read_file"],
28
+ maxTurns: 8,
29
+ permissionMode: "auto",
30
+ };
31
+
32
+ // ─── Code Reviewer ──────────────────────────────────────────────────────────────
33
+
34
+ export const CodeReviewerAgent: AgentSpec = {
35
+ name: "code_reviewer",
36
+ description: "Review code changes and suggest improvements",
37
+ systemPrompt: `You are a code review agent. Your task is to analyze code files and provide quality feedback.
38
+
39
+ Rules:
40
+ - Read the specified files and analyze them
41
+ - Look for: bugs, security issues, code smells, missing error handling, performance issues
42
+ - Check style consistency and naming conventions
43
+ - Provide specific, actionable suggestions with line numbers
44
+ - Rate overall quality: 1-5 stars
45
+ - Be constructive and specific — avoid vague feedback`,
46
+ tools: ["read_file", "bash"],
47
+ maxTurns: 6,
48
+ permissionMode: "auto",
49
+ };
50
+
51
+ // ─── Test Runner ────────────────────────────────────────────────────────────────
52
+
53
+ export const TestRunnerAgent: AgentSpec = {
54
+ name: "test_runner",
55
+ description: "Run test suites, diagnose failures, and suggest fixes",
56
+ systemPrompt: `You are a test execution agent. Your task is to run tests and analyze the results.
57
+
58
+ Rules:
59
+ - Execute the specified test command(s)
60
+ - Parse test output to identify passing, failing, and skipped tests
61
+ - For failures: read the relevant source files to diagnose the cause
62
+ - Suggest specific fixes for failing tests
63
+ - Report: total passes, failures, skips, and coverage if available
64
+ - If asked to fix tests, you may write corrected test files`,
65
+ tools: ["bash", "read_file", "write_file"],
66
+ maxTurns: 10,
67
+ permissionMode: "auto",
68
+ };
69
+
70
+ // ─── File Analyst ───────────────────────────────────────────────────────────────
71
+
72
+ export const FileAnalystAgent: AgentSpec = {
73
+ name: "file_analyst",
74
+ description: "Analyze project structure, find patterns, count metrics",
75
+ systemPrompt: `You are a file analysis agent. Your task is to analyze the project structure and report findings.
76
+
77
+ Rules:
78
+ - Use bash commands (find, grep, wc, etc.) to analyze the project
79
+ - Report: file counts by type, line counts, directory structure
80
+ - Identify patterns: naming conventions, common imports, dependency usage
81
+ - Highlight anything unusual or noteworthy
82
+ - Present results in a clear, structured format`,
83
+ tools: ["bash", "read_file"],
84
+ maxTurns: 6,
85
+ permissionMode: "auto",
86
+ };
87
+
88
+ // ─── Security Auditor ───────────────────────────────────────────────────────────
89
+
90
+ export const SecurityAuditorAgent: AgentSpec = {
91
+ name: "security_auditor",
92
+ description: "Run security scans and report vulnerabilities",
93
+ systemPrompt: `You are a security audit agent. Your task is to check for security issues in the codebase.
94
+
95
+ Rules:
96
+ - Check for: hardcoded secrets, SQL injection, XSS, insecure dependencies
97
+ - Run available security scanning tools
98
+ - Read configuration files for security misconfigurations
99
+ - Rate severity: Critical, High, Medium, Low, Info
100
+ - Provide remediation steps for each finding
101
+ - Do NOT expose actual secret values in your report`,
102
+ tools: ["bash", "read_file"],
103
+ maxTurns: 8,
104
+ permissionMode: "auto",
105
+ };
106
+
107
+ // ─── Browser Agent ──────────────────────────────────────────────────────────────
108
+
109
+ export const BrowserAgent: AgentSpec = {
110
+ name: "browser_agent",
111
+ description: "Browse URLs, extract content, analyze web pages",
112
+ systemPrompt: `You are a web browsing agent. Your task is to access URLs and extract information.
113
+
114
+ Rules:
115
+ - Navigate to the specified URL(s)
116
+ - Extract text content, titles, metadata as requested
117
+ - Summarize the page content clearly
118
+ - Report any errors (404, timeouts, etc.)
119
+ - Do NOT submit forms or make purchases unless explicitly instructed
120
+ - If the page requires authentication, report that you cannot access it`,
121
+ tools: ["bash"],
122
+ maxTurns: 6,
123
+ permissionMode: "auto",
124
+ };
125
+
126
+ // ─── Registry Factory ───────────────────────────────────────────────────────────
127
+
128
+ /**
129
+ * Creates an AgentRegistry pre-loaded with all built-in agents.
130
+ */
131
+ export function createDefaultAgentRegistry(): AgentRegistry {
132
+ const registry = new AgentRegistry();
133
+
134
+ registry.register(ScriptRunnerAgent);
135
+ registry.register(CodeReviewerAgent);
136
+ registry.register(TestRunnerAgent);
137
+ registry.register(FileAnalystAgent);
138
+ registry.register(SecurityAuditorAgent);
139
+ registry.register(BrowserAgent);
140
+
141
+ return registry;
142
+ }