skimpyclaw 0.3.14 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/README.md +47 -37
  2. package/dist/__tests__/adapter-types.test.d.ts +4 -0
  3. package/dist/__tests__/adapter-types.test.js +63 -0
  4. package/dist/__tests__/anthropic-adapter.test.d.ts +4 -0
  5. package/dist/__tests__/anthropic-adapter.test.js +264 -0
  6. package/dist/__tests__/api.test.js +0 -1
  7. package/dist/__tests__/cli.integration.test.js +2 -4
  8. package/dist/__tests__/cli.test.js +0 -1
  9. package/dist/__tests__/code-agents-notifications.test.js +137 -0
  10. package/dist/__tests__/code-agents-parser.test.js +19 -1
  11. package/dist/__tests__/code-agents-preflight.test.js +3 -28
  12. package/dist/__tests__/code-agents-utils.test.js +34 -9
  13. package/dist/__tests__/code-agents-worktrees.test.js +116 -0
  14. package/dist/__tests__/codex-adapter.test.js +184 -0
  15. package/dist/__tests__/codex-auth.test.js +66 -0
  16. package/dist/__tests__/codex-provider-gating.test.js +35 -0
  17. package/dist/__tests__/codex-unified-loop.test.js +111 -0
  18. package/dist/__tests__/config-security.test.js +127 -0
  19. package/dist/__tests__/config.test.js +23 -0
  20. package/dist/__tests__/context-manager.test.js +243 -164
  21. package/dist/__tests__/cron-run.test.js +250 -0
  22. package/dist/__tests__/cron.test.js +12 -38
  23. package/dist/__tests__/digests.test.js +67 -0
  24. package/dist/__tests__/discord-attachments.test.js +211 -0
  25. package/dist/__tests__/discord-docs.test.d.ts +1 -0
  26. package/dist/__tests__/discord-docs.test.js +27 -0
  27. package/dist/__tests__/discord-thread-agents.test.d.ts +1 -0
  28. package/dist/__tests__/discord-thread-agents.test.js +115 -0
  29. package/dist/__tests__/discord-thread-context.test.d.ts +1 -0
  30. package/dist/__tests__/discord-thread-context.test.js +42 -0
  31. package/dist/__tests__/doctor.formatters.test.js +4 -4
  32. package/dist/__tests__/doctor.index.test.js +1 -1
  33. package/dist/__tests__/doctor.runner.test.js +3 -15
  34. package/dist/__tests__/env-sanitizer.test.d.ts +1 -0
  35. package/dist/__tests__/env-sanitizer.test.js +45 -0
  36. package/dist/__tests__/exec-approval.test.js +61 -0
  37. package/dist/__tests__/fetch-tool.test.d.ts +1 -0
  38. package/dist/__tests__/fetch-tool.test.js +85 -0
  39. package/dist/__tests__/gateway-status-auth.test.d.ts +1 -0
  40. package/dist/__tests__/gateway-status-auth.test.js +72 -0
  41. package/dist/__tests__/heartbeat.test.js +3 -3
  42. package/dist/__tests__/interactive-sessions.test.d.ts +1 -0
  43. package/dist/__tests__/interactive-sessions.test.js +96 -0
  44. package/dist/__tests__/langfuse.test.js +6 -18
  45. package/dist/__tests__/model-selection.test.js +3 -4
  46. package/dist/__tests__/providers-init.test.js +2 -8
  47. package/dist/__tests__/providers-routing.test.js +1 -1
  48. package/dist/__tests__/providers-utils.test.js +13 -3
  49. package/dist/__tests__/sessions.test.js +14 -10
  50. package/dist/__tests__/setup.test.js +12 -29
  51. package/dist/__tests__/skills.test.js +10 -7
  52. package/dist/__tests__/stream-formatter.test.d.ts +1 -0
  53. package/dist/__tests__/stream-formatter.test.js +114 -0
  54. package/dist/__tests__/token-efficiency.test.js +131 -15
  55. package/dist/__tests__/tool-loop.test.d.ts +4 -0
  56. package/dist/__tests__/tool-loop.test.js +505 -0
  57. package/dist/__tests__/tools.test.js +101 -276
  58. package/dist/__tests__/utils.test.d.ts +1 -0
  59. package/dist/__tests__/utils.test.js +14 -0
  60. package/dist/__tests__/voice.test.js +21 -0
  61. package/dist/agent.js +35 -4
  62. package/dist/api.js +113 -37
  63. package/dist/channels/discord/attachments.d.ts +50 -0
  64. package/dist/channels/discord/attachments.js +137 -0
  65. package/dist/channels/discord/delegation.d.ts +5 -0
  66. package/dist/channels/discord/delegation.js +136 -0
  67. package/dist/channels/discord/handlers.js +694 -7
  68. package/dist/channels/discord/index.d.ts +16 -1
  69. package/dist/channels/discord/index.js +64 -1
  70. package/dist/channels/discord/thread-agents.d.ts +54 -0
  71. package/dist/channels/discord/thread-agents.js +323 -0
  72. package/dist/channels/discord/threads.d.ts +58 -0
  73. package/dist/channels/discord/threads.js +192 -0
  74. package/dist/channels/discord/types.js +4 -2
  75. package/dist/channels/discord/utils.d.ts +16 -0
  76. package/dist/channels/discord/utils.js +86 -6
  77. package/dist/channels/telegram/index.d.ts +1 -1
  78. package/dist/channels/telegram/types.js +1 -1
  79. package/dist/channels/telegram/utils.js +9 -3
  80. package/dist/channels.d.ts +1 -1
  81. package/dist/cli.js +20 -400
  82. package/dist/code-agents/executor.d.ts +1 -1
  83. package/dist/code-agents/executor.js +101 -45
  84. package/dist/code-agents/index.d.ts +2 -7
  85. package/dist/code-agents/index.js +111 -80
  86. package/dist/code-agents/interactive-resume.d.ts +6 -0
  87. package/dist/code-agents/interactive-resume.js +98 -0
  88. package/dist/code-agents/interactive-sessions.d.ts +20 -0
  89. package/dist/code-agents/interactive-sessions.js +132 -0
  90. package/dist/code-agents/parser.js +5 -1
  91. package/dist/code-agents/registry.d.ts +7 -1
  92. package/dist/code-agents/registry.js +11 -23
  93. package/dist/code-agents/stream-formatter.d.ts +8 -0
  94. package/dist/code-agents/stream-formatter.js +92 -0
  95. package/dist/code-agents/types.d.ts +16 -24
  96. package/dist/code-agents/utils.d.ts +35 -11
  97. package/dist/code-agents/utils.js +349 -95
  98. package/dist/code-agents/worktrees.d.ts +37 -0
  99. package/dist/code-agents/worktrees.js +116 -0
  100. package/dist/config.d.ts +2 -4
  101. package/dist/config.js +123 -23
  102. package/dist/cron.d.ts +1 -6
  103. package/dist/cron.js +175 -82
  104. package/dist/dashboard/assets/index-B345aOO-.js +65 -0
  105. package/dist/dashboard/assets/index-ZWK4dalJ.css +1 -0
  106. package/dist/dashboard/index.html +2 -2
  107. package/dist/digests.d.ts +1 -0
  108. package/dist/digests.js +132 -42
  109. package/dist/doctor/checks.d.ts +0 -3
  110. package/dist/doctor/checks.js +1 -108
  111. package/dist/doctor/runner.js +1 -4
  112. package/dist/env-sanitizer.d.ts +2 -0
  113. package/dist/env-sanitizer.js +61 -0
  114. package/dist/exec-approval.d.ts +11 -1
  115. package/dist/exec-approval.js +17 -4
  116. package/dist/gateway.d.ts +3 -1
  117. package/dist/gateway.js +17 -7
  118. package/dist/heartbeat.js +1 -6
  119. package/dist/langfuse.js +3 -29
  120. package/dist/model-selection.js +3 -1
  121. package/dist/providers/adapter.d.ts +118 -0
  122. package/dist/providers/adapter.js +6 -0
  123. package/dist/providers/adapters/anthropic-adapter.d.ts +22 -0
  124. package/dist/providers/adapters/anthropic-adapter.js +204 -0
  125. package/dist/providers/adapters/codex-adapter.d.ts +26 -0
  126. package/dist/providers/adapters/codex-adapter.js +203 -0
  127. package/dist/providers/anthropic.d.ts +1 -0
  128. package/dist/providers/anthropic.js +10 -272
  129. package/dist/providers/codex.d.ts +21 -0
  130. package/dist/providers/codex.js +149 -330
  131. package/dist/providers/content.d.ts +1 -1
  132. package/dist/providers/content.js +2 -2
  133. package/dist/providers/context-manager.d.ts +18 -6
  134. package/dist/providers/context-manager.js +199 -223
  135. package/dist/providers/index.d.ts +9 -1
  136. package/dist/providers/index.js +73 -64
  137. package/dist/providers/loop-utils.d.ts +20 -0
  138. package/dist/providers/loop-utils.js +30 -0
  139. package/dist/providers/tool-loop.d.ts +12 -0
  140. package/dist/providers/tool-loop.js +251 -0
  141. package/dist/providers/utils.d.ts +19 -3
  142. package/dist/providers/utils.js +100 -29
  143. package/dist/secure-store.d.ts +8 -0
  144. package/dist/secure-store.js +80 -0
  145. package/dist/service.js +3 -28
  146. package/dist/sessions.d.ts +3 -0
  147. package/dist/sessions.js +147 -18
  148. package/dist/setup-templates.js +13 -25
  149. package/dist/setup.d.ts +10 -6
  150. package/dist/setup.js +84 -292
  151. package/dist/skills.js +3 -11
  152. package/dist/tools/agent-delegation.d.ts +19 -0
  153. package/dist/tools/agent-delegation.js +49 -0
  154. package/dist/tools/bash-tool.js +89 -34
  155. package/dist/tools/definitions.d.ts +199 -302
  156. package/dist/tools/definitions.js +70 -123
  157. package/dist/tools/execute-context.d.ts +13 -4
  158. package/dist/tools/fetch-tool.js +109 -13
  159. package/dist/tools/file-tools.js +7 -1
  160. package/dist/tools.d.ts +7 -7
  161. package/dist/tools.js +133 -151
  162. package/dist/types.d.ts +37 -30
  163. package/dist/utils.js +4 -6
  164. package/dist/voice.d.ts +1 -1
  165. package/dist/voice.js +17 -4
  166. package/package.json +33 -23
  167. package/templates/TOOLS.md +0 -27
  168. package/dist/__tests__/audit.test.js +0 -122
  169. package/dist/__tests__/code-agents-orchestrator.test.js +0 -216
  170. package/dist/__tests__/code-agents-sandbox.test.js +0 -163
  171. package/dist/__tests__/orchestrator.test.js +0 -425
  172. package/dist/__tests__/sandbox-bridge.test.js +0 -116
  173. package/dist/__tests__/sandbox-manager.test.js +0 -144
  174. package/dist/__tests__/sandbox-mount-security.test.js +0 -139
  175. package/dist/__tests__/sandbox-runtime.test.js +0 -176
  176. package/dist/__tests__/subagent.test.js +0 -240
  177. package/dist/__tests__/telegram.test.js +0 -42
  178. package/dist/code-agents/orchestrator.d.ts +0 -29
  179. package/dist/code-agents/orchestrator.js +0 -694
  180. package/dist/code-agents/worktree.d.ts +0 -40
  181. package/dist/code-agents/worktree.js +0 -215
  182. package/dist/dashboard/assets/index-BoTHPby4.js +0 -65
  183. package/dist/dashboard/assets/index-D4mufvBg.css +0 -1
  184. package/dist/dashboard.d.ts +0 -8
  185. package/dist/dashboard.js +0 -4071
  186. package/dist/discord.d.ts +0 -8
  187. package/dist/discord.js +0 -792
  188. package/dist/mcp-context-a8c.d.ts +0 -13
  189. package/dist/mcp-context-a8c.js +0 -34
  190. package/dist/orchestrator.d.ts +0 -15
  191. package/dist/orchestrator.js +0 -676
  192. package/dist/providers/openai.d.ts +0 -10
  193. package/dist/providers/openai.js +0 -355
  194. package/dist/sandbox/bridge.d.ts +0 -5
  195. package/dist/sandbox/bridge.js +0 -63
  196. package/dist/sandbox/index.d.ts +0 -5
  197. package/dist/sandbox/index.js +0 -4
  198. package/dist/sandbox/manager.d.ts +0 -7
  199. package/dist/sandbox/manager.js +0 -100
  200. package/dist/sandbox/mount-security.d.ts +0 -12
  201. package/dist/sandbox/mount-security.js +0 -122
  202. package/dist/sandbox/runtime.d.ts +0 -39
  203. package/dist/sandbox/runtime.js +0 -192
  204. package/dist/sandbox-utils.d.ts +0 -6
  205. package/dist/sandbox-utils.js +0 -36
  206. package/dist/subagent.d.ts +0 -19
  207. package/dist/subagent.js +0 -407
  208. package/dist/telegram.d.ts +0 -2
  209. package/dist/telegram.js +0 -11
  210. package/dist/tools/browser-tool.d.ts +0 -3
  211. package/dist/tools/browser-tool.js +0 -266
  212. package/sandbox/Dockerfile +0 -40
  213. /package/dist/__tests__/{audit.test.d.ts → code-agents-notifications.test.d.ts} +0 -0
  214. /package/dist/__tests__/{code-agents-orchestrator.test.d.ts → code-agents-worktrees.test.d.ts} +0 -0
  215. /package/dist/__tests__/{code-agents-sandbox.test.d.ts → codex-adapter.test.d.ts} +0 -0
  216. /package/dist/__tests__/{orchestrator.test.d.ts → codex-auth.test.d.ts} +0 -0
  217. /package/dist/__tests__/{sandbox-bridge.test.d.ts → codex-provider-gating.test.d.ts} +0 -0
  218. /package/dist/__tests__/{sandbox-manager.test.d.ts → codex-unified-loop.test.d.ts} +0 -0
  219. /package/dist/__tests__/{sandbox-mount-security.test.d.ts → config-security.test.d.ts} +0 -0
  220. /package/dist/__tests__/{sandbox-runtime.test.d.ts → cron-run.test.d.ts} +0 -0
  221. /package/dist/__tests__/{subagent.test.d.ts → digests.test.d.ts} +0 -0
  222. /package/dist/__tests__/{telegram.test.d.ts → discord-attachments.test.d.ts} +0 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "skimpyclaw",
3
- "version": "0.3.14",
3
+ "version": "0.4.0",
4
4
  "description": "A lobster in a bikini",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -10,12 +10,31 @@
10
10
  },
11
11
  "files": [
12
12
  "dist",
13
- "sandbox",
14
13
  "templates",
15
14
  "com.skimpyclaw.gateway.plist.example",
16
15
  "README.md",
17
16
  "LICENSE"
18
17
  ],
18
+ "scripts": {
19
+ "cli": "tsx src/cli.ts",
20
+ "start": "tsx src/index.ts",
21
+ "dev": "tsx watch src/index.ts",
22
+ "dashboard:dev": "pnpm --dir web/dashboard dev",
23
+ "dashboard:build": "pnpm --dir web/dashboard build",
24
+ "docs:dev": "pnpm --dir docs dev",
25
+ "docs:build": "pnpm --dir docs install --frozen-lockfile && pnpm --dir docs build",
26
+ "docs:preview": "pnpm --dir docs preview",
27
+ "setup": "tsx src/setup.ts",
28
+ "onboard": "tsx src/cli.ts onboard",
29
+ "clean": "rm -rf dist",
30
+ "build": "pnpm clean && tsc && pnpm dashboard:build",
31
+ "release:check": "pnpm build && pnpm test",
32
+ "release:local": "bash ./scripts/release.sh",
33
+ "lint": "eslint \"src/**/*.ts\"",
34
+ "typecheck": "tsc --noEmit",
35
+ "test": "vitest run",
36
+ "ci": "pnpm run lint && pnpm run typecheck && pnpm run test"
37
+ },
19
38
  "dependencies": {
20
39
  "@anthropic-ai/sdk": "^0.52.0",
21
40
  "@grammyjs/runner": "^2.0.3",
@@ -34,7 +53,17 @@
34
53
  "gray-matter": "^4.0.3",
35
54
  "mcporter": "^0.7.3",
36
55
  "openai": "^4.47.0",
37
- "playwright": "^1.49.0"
56
+ "pdf-parse": "^2.4.5"
57
+ },
58
+ "pnpm": {
59
+ "overrides": {
60
+ "@eslint/config-array>minimatch": "3.1.2",
61
+ "@eslint/eslintrc>minimatch": "3.1.2",
62
+ "vite>rollup": "4.57.1"
63
+ },
64
+ "onlyBuiltDependencies": [
65
+ "esbuild"
66
+ ]
38
67
  },
39
68
  "devDependencies": {
40
69
  "@eslint/js": "^9.39.2",
@@ -45,24 +74,5 @@
45
74
  "typescript": "^5.4.0",
46
75
  "typescript-eslint": "^8.54.0",
47
76
  "vitest": "^4.0.18"
48
- },
49
- "scripts": {
50
- "cli": "tsx src/cli.ts",
51
- "start": "tsx src/index.ts",
52
- "dev": "tsx watch src/index.ts",
53
- "dashboard:dev": "pnpm --dir web/dashboard dev",
54
- "dashboard:build": "pnpm --dir web/dashboard install --frozen-lockfile && pnpm --dir web/dashboard build",
55
- "docs:dev": "pnpm --dir docs dev",
56
- "docs:build": "pnpm --dir docs install --frozen-lockfile && pnpm --dir docs build",
57
- "docs:preview": "pnpm --dir docs preview",
58
- "setup": "tsx src/setup.ts",
59
- "onboard": "tsx src/cli.ts onboard",
60
- "build": "tsc && pnpm dashboard:build",
61
- "release:check": "pnpm build && pnpm test",
62
- "release:local": "bash ./scripts/release.sh",
63
- "lint": "eslint \"src/**/*.ts\"",
64
- "typecheck": "tsc --noEmit",
65
- "test": "vitest run",
66
- "ci": "pnpm run lint && pnpm run typecheck && pnpm run test"
67
77
  }
68
- }
78
+ }
@@ -175,33 +175,6 @@ Do not use for:
175
175
 
176
176
  ---
177
177
 
178
- ## code_with_team
179
-
180
- Decompose a complex task into subtasks and run multiple `code_with_agent` instances in parallel.
181
-
182
- Parameters:
183
-
184
- - `task` (required, detailed and specific)
185
- - `team_size` (2-5, default 3)
186
- - `workdir` (optional)
187
- - `model` (optional)
188
- - `timeout_minutes` (optional, default 20, max 60)
189
- - `validate` (boolean, default true — runs once after all agents finish)
190
-
191
- Use when:
192
-
193
- - Multi-file refactors with independent parts
194
- - Cross-layer changes (frontend + backend + tests) that don't conflict
195
- - Tasks with clearly separable subtasks
196
-
197
- Do not use for:
198
-
199
- - Simple single-file changes (use code_with_agent)
200
- - Tightly coupled changes where agents would conflict on the same files
201
- - Non-coding tasks
202
- - Quick fixes or config edits
203
-
204
- ---
205
178
 
206
179
  ## Web Search
207
180
 
@@ -1,122 +0,0 @@
1
- import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest';
2
- import { mkdirSync, rmSync, existsSync, readFileSync } from 'fs';
3
- import { join } from 'path';
4
- import { tmpdir } from 'os';
5
- const TEST_ROOT = join(tmpdir(), `skimpyclaw-audit-test-${Date.now()}`);
6
- const LOGS_DIR = join(TEST_ROOT, 'logs');
7
- // Mock config.ts to use test directories
8
- vi.mock('../config.js', () => ({
9
- getLogsDir: () => LOGS_DIR,
10
- }));
11
- import { startTrace, addEvent, endTrace, getTraces, getTrace, getAuditDates } from '../audit.js';
12
- describe('audit', () => {
13
- beforeAll(() => {
14
- mkdirSync(LOGS_DIR, { recursive: true });
15
- });
16
- afterAll(() => {
17
- rmSync(TEST_ROOT, { recursive: true, force: true });
18
- });
19
- describe('startTrace + endTrace', () => {
20
- it('creates a trace and writes JSONL on endTrace', () => {
21
- const traceId = startTrace('telegram', 'telegram:hello', 'main', 'claude-opus-4-6', '123');
22
- expect(traceId).toBeTruthy();
23
- expect(typeof traceId).toBe('string');
24
- addEvent(traceId, 'message_in', 'hello world');
25
- addEvent(traceId, 'model_call', 'anthropic/claude-opus-4-6', { durationMs: 1500 });
26
- addEvent(traceId, 'message_out', 'hi there');
27
- endTrace(traceId, 'success');
28
- // Verify JSONL file was written
29
- const date = new Date().toISOString().split('T')[0];
30
- const filePath = join(LOGS_DIR, 'audit', `${date}.jsonl`);
31
- expect(existsSync(filePath)).toBe(true);
32
- const content = readFileSync(filePath, 'utf-8').trim();
33
- const trace = JSON.parse(content.split('\n').pop());
34
- expect(trace.traceId).toBe(traceId);
35
- expect(trace.trigger).toBe('telegram');
36
- expect(trace.triggerDetail).toBe('telegram:hello');
37
- expect(trace.agentId).toBe('main');
38
- expect(trace.model).toBe('claude-opus-4-6');
39
- expect(trace.userId).toBe('123');
40
- expect(trace.status).toBe('success');
41
- expect(trace.finishedAt).toBeTruthy();
42
- expect(trace.events).toHaveLength(3);
43
- expect(trace.events[0].type).toBe('message_in');
44
- expect(trace.events[1].type).toBe('model_call');
45
- expect(trace.events[1].durationMs).toBe(1500);
46
- expect(trace.events[2].type).toBe('message_out');
47
- });
48
- it('handles error traces', () => {
49
- const traceId = startTrace('cron', 'cron:morning', 'main', 'claude-opus-4-6');
50
- addEvent(traceId, 'model_call', 'failed', { status: 'error', error: 'API error' });
51
- endTrace(traceId, 'error');
52
- const trace = getTrace(traceId);
53
- expect(trace).not.toBeNull();
54
- expect(trace.status).toBe('error');
55
- expect(trace.events[0].status).toBe('error');
56
- expect(trace.events[0].error).toBe('API error');
57
- });
58
- });
59
- describe('addEvent', () => {
60
- it('silently ignores unknown traceIds', () => {
61
- // Should not throw
62
- addEvent('nonexistent', 'message_in', 'test');
63
- });
64
- it('adds events with detail and durationMs', () => {
65
- const traceId = startTrace('api', 'model_switch', 'main', 'claude-opus-4-6');
66
- addEvent(traceId, 'tool_call', 'Glob ~/vault/**/*.md', {
67
- durationMs: 45,
68
- detail: { tool: 'Glob', input: { pattern: '**/*.md' } },
69
- });
70
- endTrace(traceId, 'success');
71
- const trace = getTrace(traceId);
72
- expect(trace.events[0].durationMs).toBe(45);
73
- expect(trace.events[0].detail).toEqual({ tool: 'Glob', input: { pattern: '**/*.md' } });
74
- });
75
- });
76
- describe('getTraces', () => {
77
- it('returns traces for today newest-first', () => {
78
- const result = getTraces();
79
- expect(result.traces.length).toBeGreaterThan(0);
80
- expect(result.total).toBeGreaterThan(0);
81
- // Verify newest first
82
- if (result.traces.length >= 2) {
83
- const first = new Date(result.traces[0].startedAt).getTime();
84
- const second = new Date(result.traces[1].startedAt).getTime();
85
- expect(first).toBeGreaterThanOrEqual(second);
86
- }
87
- });
88
- it('returns empty for nonexistent date', () => {
89
- const result = getTraces('1999-01-01');
90
- expect(result.traces).toEqual([]);
91
- expect(result.total).toBe(0);
92
- });
93
- it('supports pagination', () => {
94
- const all = getTraces(undefined, 100, 0);
95
- const page = getTraces(undefined, 1, 0);
96
- expect(page.traces).toHaveLength(1);
97
- expect(page.total).toBe(all.total);
98
- });
99
- });
100
- describe('getTrace', () => {
101
- it('finds a trace by ID from file', () => {
102
- const traceId = startTrace('heartbeat', 'heartbeat', 'main', 'claude-opus-4-6');
103
- addEvent(traceId, 'message_out', 'HEARTBEAT_OK');
104
- endTrace(traceId, 'success');
105
- const found = getTrace(traceId);
106
- expect(found).not.toBeNull();
107
- expect(found.traceId).toBe(traceId);
108
- expect(found.trigger).toBe('heartbeat');
109
- });
110
- it('returns null for nonexistent traceId', () => {
111
- expect(getTrace('does-not-exist')).toBeNull();
112
- });
113
- });
114
- describe('getAuditDates', () => {
115
- it('returns dates with today included', () => {
116
- const dates = getAuditDates();
117
- expect(dates.length).toBeGreaterThan(0);
118
- const today = new Date().toISOString().split('T')[0];
119
- expect(dates).toContain(today);
120
- });
121
- });
122
- });
@@ -1,216 +0,0 @@
1
- import { describe, expect, it, vi } from 'vitest';
2
- // Mock heavy deps
3
- vi.mock('../agent.js', () => ({
4
- runAgentTurn: vi.fn(async () => '{"subtasks": [{"description": "task1", "dependsOn": []}, {"description": "task2", "dependsOn": [0]}]}'),
5
- }));
6
- vi.mock('../code-agents/registry.js', () => {
7
- const tasks = new Map();
8
- let counter = 0;
9
- return {
10
- getCodeAgentsDir: () => '/tmp/test-code-agents',
11
- ensureCodeAgentsDir: vi.fn(),
12
- writeCodeAgentTask: vi.fn((task) => tasks.set(task.id, { ...task })),
13
- storeCodeAgentTask: vi.fn((task) => tasks.set(task.id, { ...task })),
14
- getCodeAgent: vi.fn((id) => tasks.get(id) || null),
15
- getNextCodeAgentId: vi.fn(() => `ca-${++counter}`),
16
- setCodeAgentCanceller: vi.fn(),
17
- deleteCodeAgentCanceller: vi.fn(),
18
- _tasks: tasks,
19
- _resetCounter: () => { counter = 0; },
20
- };
21
- });
22
- vi.mock('../code-agents/executor.js', () => ({
23
- runCodeAgentBackground: vi.fn(async () => { }),
24
- runValidation: vi.fn(async () => ({ passed: true, output: 'PASS' })),
25
- }));
26
- vi.mock('../code-agents/utils.js', () => ({
27
- buildCodeAgentArgs: vi.fn(() => ({ cmd: 'echo', args: ['hello'] })),
28
- notifyCodeAgentResult: vi.fn(async () => { }),
29
- resolveModelAlias: vi.fn((m) => m),
30
- }));
31
- vi.mock('../audit.js', () => ({
32
- startTrace: vi.fn(() => 'trace-1'),
33
- addEvent: vi.fn(),
34
- endTrace: vi.fn(async () => { }),
35
- }));
36
- import { computeWaves, decomposeTask, synthesizeResults, gatherCodebaseContext } from '../code-agents/orchestrator.js';
37
- import { runAgentTurn } from '../agent.js';
38
- import { getCodeAgent, writeCodeAgentTask } from '../code-agents/registry.js';
39
- const mockRunAgentTurn = vi.mocked(runAgentTurn);
40
- const mockGetCodeAgent = vi.mocked(getCodeAgent);
41
- const mockWriteCodeAgentTask = vi.mocked(writeCodeAgentTask);
42
- describe('computeWaves', () => {
43
- it('puts independent tasks in one wave', () => {
44
- const waves = computeWaves([
45
- { description: 'a', dependsOn: [] },
46
- { description: 'b', dependsOn: [] },
47
- { description: 'c', dependsOn: [] },
48
- ]);
49
- expect(waves).toEqual([[0, 1, 2]]);
50
- });
51
- it('creates sequential waves for dependencies', () => {
52
- const waves = computeWaves([
53
- { description: 'a', dependsOn: [] },
54
- { description: 'b', dependsOn: [0] },
55
- { description: 'c', dependsOn: [1] },
56
- ]);
57
- expect(waves).toEqual([[0], [1], [2]]);
58
- });
59
- it('handles cycle detection without infinite loop', () => {
60
- // Tasks that depend on each other (cycle)
61
- const waves = computeWaves([
62
- { description: 'a', dependsOn: [1] },
63
- { description: 'b', dependsOn: [0] },
64
- ]);
65
- // Should force them into one wave rather than looping forever
66
- expect(waves.length).toBeGreaterThan(0);
67
- const allIndices = waves.flat();
68
- expect(allIndices).toContain(0);
69
- expect(allIndices).toContain(1);
70
- });
71
- it('handles mixed dependencies', () => {
72
- const waves = computeWaves([
73
- { description: 'a', dependsOn: [] },
74
- { description: 'b', dependsOn: [] },
75
- { description: 'c', dependsOn: [0, 1] },
76
- ]);
77
- expect(waves).toEqual([[0, 1], [2]]);
78
- });
79
- });
80
- describe('decomposeTask', () => {
81
- it('pads with "Additional part of:" when model returns fewer subtasks', async () => {
82
- mockRunAgentTurn.mockResolvedValueOnce('{"subtasks": [{"description": "only one task", "dependsOn": []}]}');
83
- const config = { providers: {} };
84
- const result = await decomposeTask('Build a full app with tests', 3, config);
85
- expect(result).toHaveLength(3);
86
- expect(result[0].description).toBe('only one task');
87
- // Padded entries should use "Additional part of:" not duplicate the last description
88
- expect(result[1].description).toMatch(/^Additional part of:/);
89
- expect(result[2].description).toMatch(/^Additional part of:/);
90
- });
91
- it('falls back to numbered splitting on parse error', async () => {
92
- mockRunAgentTurn.mockResolvedValueOnce('not valid json at all');
93
- const config = { providers: {} };
94
- const result = await decomposeTask('my task', 2, config);
95
- expect(result).toHaveLength(2);
96
- expect(result[0].description).toContain('Part 1 of 2');
97
- expect(result[1].description).toContain('Part 2 of 2');
98
- });
99
- });
100
- describe('synthesizeResults', () => {
101
- it('uses structured context (summary capped at 500 chars, not raw 1000)', async () => {
102
- const longOutput = 'x'.repeat(1000);
103
- mockRunAgentTurn.mockResolvedValueOnce('Synthesis complete');
104
- const config = { providers: {} };
105
- await synthesizeResults('original task', [
106
- { subtask: 'sub1', status: 'completed', output: longOutput },
107
- ], config);
108
- const call = mockRunAgentTurn.mock.calls[mockRunAgentTurn.mock.calls.length - 1];
109
- const prompt = call[1];
110
- // Summary should be capped at 500 chars, not the full 1000
111
- expect(prompt).toContain('x'.repeat(500));
112
- expect(prompt).not.toContain('x'.repeat(501));
113
- // Should use the structured format (Summary: prefix)
114
- expect(prompt).toContain('Summary:');
115
- });
116
- });
117
- describe('orchestrator - cancellation after wave spawn', () => {
118
- it('cancels just-spawned children when parent is cancelled', async () => {
119
- // This is tested structurally by reading the source
120
- const { readFileSync } = await vi.importActual('fs');
121
- const src = readFileSync(new URL('../../src/code-agents/orchestrator.ts', import.meta.url).pathname.replace('/.worktrees/hardening-code-agents/src/__tests__/../../', '/.worktrees/hardening-code-agents/'), 'utf-8');
122
- // Verify the cancellation check exists after wave spawn
123
- expect(src).toContain("// Check cancellation after spawning");
124
- expect(src).toContain("if (getCodeAgent(parentId)?.status === 'cancelled')");
125
- // Verify it sets children to cancelled
126
- expect(src).toContain("status: 'cancelled'");
127
- expect(src).toContain("error: CANCELLED_MESSAGE");
128
- });
129
- });
130
- describe('orchestrator - spawn failure marks child as failed', () => {
131
- it('catch handler updates child task status on spawn error', async () => {
132
- const { readFileSync } = await vi.importActual('fs');
133
- const src = readFileSync(new URL('../../src/code-agents/orchestrator.ts', import.meta.url).pathname.replace('/.worktrees/hardening-code-agents/src/__tests__/../../', '/.worktrees/hardening-code-agents/'), 'utf-8');
134
- // Verify the catch handler updates child status
135
- expect(src).toContain("const child = getCodeAgent(childId);");
136
- expect(src).toContain("if (child && child.status === 'running')");
137
- expect(src).toContain("status: 'failed'");
138
- expect(src).toContain("error: toErrorMessage(err)");
139
- });
140
- });
141
- describe('orchestrator - skip redundant parent writes', () => {
142
- it('only writes parent status when liveOutput changes', async () => {
143
- const { readFileSync } = await vi.importActual('fs');
144
- const src = readFileSync(new URL('../../src/code-agents/orchestrator.ts', import.meta.url).pathname.replace('/.worktrees/hardening-code-agents/src/__tests__/../../', '/.worktrees/hardening-code-agents/'), 'utf-8');
145
- // Verify the dedup pattern
146
- expect(src).toContain("let lastLiveOutput = ''");
147
- expect(src).toContain('if (newLiveOutput !== lastLiveOutput)');
148
- expect(src).toContain('lastLiveOutput = newLiveOutput');
149
- });
150
- });
151
- describe('gatherCodebaseContext', () => {
152
- it('returns a non-empty string for the project root', () => {
153
- // Use this project's own root as the workdir
154
- const { resolve } = require('path');
155
- const projectRoot = resolve(__dirname, '..', '..');
156
- const context = gatherCodebaseContext(projectRoot);
157
- // Should contain at least scripts or source files
158
- expect(context.length).toBeGreaterThan(0);
159
- expect(context.length).toBeLessThanOrEqual(2000);
160
- });
161
- it('returns empty string for nonexistent directory', () => {
162
- const context = gatherCodebaseContext('/tmp/nonexistent-dir-12345');
163
- // Should not throw, just return empty or minimal context
164
- expect(typeof context).toBe('string');
165
- });
166
- });
167
- describe('decomposeTask with workdir', () => {
168
- it('passes workdir context to the decomposition prompt', async () => {
169
- mockRunAgentTurn.mockResolvedValueOnce('{"subtasks": [{"description": "sub1", "dependsOn": []}, {"description": "sub2", "dependsOn": []}]}');
170
- const config = { providers: {} };
171
- const result = await decomposeTask('test task', 2, config, '/tmp');
172
- expect(result).toHaveLength(2);
173
- // Check the prompt sent to the model includes the richer decomposition instructions
174
- const call = mockRunAgentTurn.mock.calls[mockRunAgentTurn.mock.calls.length - 1];
175
- const prompt = call[1];
176
- expect(prompt).toContain('task decomposition expert');
177
- expect(prompt).toContain('git worktree');
178
- });
179
- });
180
- describe('synthesizeResults with workdir', () => {
181
- it('includes git diff info when workdir is a git repo', async () => {
182
- mockRunAgentTurn.mockResolvedValueOnce('Synthesis complete');
183
- const { resolve } = require('path');
184
- const projectRoot = resolve(__dirname, '..', '..');
185
- const config = { providers: {} };
186
- await synthesizeResults('original task', [
187
- { subtask: 'sub1', status: 'completed', output: 'done' },
188
- ], config, projectRoot);
189
- const call = mockRunAgentTurn.mock.calls[mockRunAgentTurn.mock.calls.length - 1];
190
- const prompt = call[1];
191
- // Should include the success/failure counts
192
- expect(prompt).toContain('1 succeeded, 0 failed');
193
- });
194
- });
195
- describe('orchestrator - per-wave validation and retry', () => {
196
- it('source includes per-wave validation logic', async () => {
197
- const { readFileSync } = await vi.importActual('fs');
198
- const src = readFileSync(new URL('../../src/code-agents/orchestrator.ts', import.meta.url).pathname.replace('/.worktrees/hardening-code-agents/src/__tests__/../../', '/.worktrees/hardening-code-agents/'), 'utf-8');
199
- // Verify per-wave validation exists
200
- expect(src).toContain('Per-wave validation: run build after each wave');
201
- expect(src).toContain('wave_validation');
202
- // Verify retry logic
203
- expect(src).toContain('wave_retry_complete');
204
- expect(src).toContain('retryPrompt');
205
- });
206
- });
207
- describe('orchestrator - timeout budgeting', () => {
208
- it('computes perChildTimeout based on wave count not team size', async () => {
209
- const { readFileSync } = await vi.importActual('fs');
210
- const src = readFileSync(new URL('../../src/code-agents/orchestrator.ts', import.meta.url).pathname.replace('/.worktrees/hardening-code-agents/src/__tests__/../../', '/.worktrees/hardening-code-agents/'), 'utf-8');
211
- // Verify budget-aware timeout
212
- expect(src).toContain('overheadMinutes');
213
- expect(src).toContain('availableForChildren');
214
- expect(src).toContain('Math.floor(availableForChildren / waves.length)');
215
- });
216
- });
@@ -1,163 +0,0 @@
1
- import { describe, expect, it, vi, beforeEach } from 'vitest';
2
- // Mock heavy dependencies before importing
3
- vi.mock('../code-agents/registry.js', () => {
4
- const tasks = new Map();
5
- return {
6
- getCodeAgentsDir: () => '/tmp/test-code-agents',
7
- ensureCodeAgentsDir: vi.fn(),
8
- writeCodeAgentTask: vi.fn((task) => tasks.set(task.id, { ...task })),
9
- storeCodeAgentTask: vi.fn((task) => tasks.set(task.id, { ...task })),
10
- getCodeAgent: vi.fn((id) => tasks.get(id) || null),
11
- setCodeAgentCanceller: vi.fn(),
12
- deleteCodeAgentCanceller: vi.fn(),
13
- getNextCodeAgentId: vi.fn(() => 'ca-test-1'),
14
- getActiveCodeAgents: vi.fn(() => []),
15
- getRecentCodeAgents: vi.fn(() => []),
16
- _tasks: tasks,
17
- };
18
- });
19
- vi.mock('../code-agents/parser.js', () => ({
20
- parseStreamJsonForLive: vi.fn((s) => s.slice(0, 200)),
21
- parseClaudeOutput: vi.fn((s) => ({ text: s, totalCost: 0, inputTokens: 0, outputTokens: 0 })),
22
- parseCodexOutput: vi.fn((s) => s),
23
- }));
24
- vi.mock('../code-agents/utils.js', () => ({
25
- buildCodeAgentArgs: vi.fn(() => ({ cmd: 'echo', args: ['hello'] })),
26
- notifyCodeAgentResult: vi.fn(async () => { }),
27
- resolveModelAlias: vi.fn((m) => m || 'claude'),
28
- resolveSelectedCodeAgent: vi.fn((_req, def) => def || 'claude'),
29
- resolveWorkdir: vi.fn((_raw, _proj, root) => root),
30
- }));
31
- vi.mock('../audit.js', () => ({
32
- startTrace: vi.fn(() => 'trace-1'),
33
- addEvent: vi.fn(),
34
- endTrace: vi.fn(async () => { }),
35
- }));
36
- vi.mock('../usage.js', () => ({
37
- buildUsageRecord: vi.fn(() => ({})),
38
- recordUsage: vi.fn(),
39
- }));
40
- vi.mock('fs', async () => {
41
- const actual = await vi.importActual('fs');
42
- return {
43
- ...actual,
44
- createWriteStream: vi.fn(() => ({
45
- write: vi.fn(),
46
- end: vi.fn(),
47
- })),
48
- existsSync: vi.fn(() => false),
49
- };
50
- });
51
- vi.mock('../sandbox/index.js', () => ({
52
- ensureContainer: vi.fn(async () => 'sandbox-code-test'),
53
- SANDBOX_DEFAULTS: { image: 'skimpyclaw-sandbox:latest', runtime: 'docker' },
54
- getRuntime: vi.fn(() => 'docker'),
55
- execInContainer: vi.fn(async () => ({ exitCode: 0, stdout: 'PASS', stderr: '' })),
56
- }));
57
- describe('buildContainerEnvArgs', () => {
58
- it('returns -e flags for allowlisted env vars that exist', async () => {
59
- const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
60
- const env = {
61
- ANTHROPIC_API_KEY: 'sk-test-123',
62
- GH_TOKEN: 'ghp_abc',
63
- HOME: '/home/sandbox',
64
- SOME_OTHER_VAR: 'should-not-appear',
65
- PATH: '/usr/bin:/usr/local/bin',
66
- };
67
- const result = buildContainerEnvArgs(env);
68
- expect(result).toContain('-e');
69
- expect(result).toContain('ANTHROPIC_API_KEY=sk-test-123');
70
- expect(result).toContain('GH_TOKEN=ghp_abc');
71
- expect(result).toContain('HOME=/home/sandbox');
72
- expect(result).toContain('PATH=/usr/bin:/usr/local/bin');
73
- // Non-allowlisted vars should not appear
74
- expect(result.join(' ')).not.toContain('SOME_OTHER_VAR');
75
- });
76
- it('excludes empty and undefined values', async () => {
77
- const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
78
- const env = {
79
- ANTHROPIC_API_KEY: '',
80
- GH_TOKEN: undefined,
81
- GITHUB_TOKEN: 'ghp_123',
82
- };
83
- const result = buildContainerEnvArgs(env);
84
- expect(result).toContain('GITHUB_TOKEN=ghp_123');
85
- // No git signing key passed, so no GIT_CONFIG overrides
86
- expect(result.join(' ')).not.toContain('GIT_CONFIG');
87
- });
88
- it('includes git signing config overrides when gitSigningKey is provided', async () => {
89
- const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
90
- const result = buildContainerEnvArgs({ ANTHROPIC_API_KEY: 'sk-test' }, { gitSigningKey: '/home/sandbox/.ssh/git_signing_ed25519' });
91
- expect(result).toContain('GIT_CONFIG_COUNT=1');
92
- expect(result).toContain('GIT_CONFIG_KEY_0=user.signingkey');
93
- expect(result).toContain('GIT_CONFIG_VALUE_0=/home/sandbox/.ssh/git_signing_ed25519');
94
- });
95
- it('omits git signing config when gitSigningKey is not provided', async () => {
96
- const { buildContainerEnvArgs } = await import('../code-agents/executor.js');
97
- const result = buildContainerEnvArgs({ FOO: 'bar', BAZ: 'qux' });
98
- expect(result).not.toContain('FOO=bar');
99
- expect(result.join(' ')).not.toContain('GIT_CONFIG');
100
- expect(result).toEqual([]);
101
- });
102
- });
103
- describe('runValidation with sandbox', () => {
104
- beforeEach(() => {
105
- vi.clearAllMocks();
106
- });
107
- it('uses execInContainer when sandboxContainer is provided', async () => {
108
- const { execInContainer } = await import('../sandbox/index.js');
109
- const { runValidation } = await import('../code-agents/executor.js');
110
- const result = await runValidation('/workspace', 'sandbox-test');
111
- expect(execInContainer).toHaveBeenCalledWith('sandbox-test', expect.any(Array), expect.objectContaining({ timeout: expect.any(Number) }));
112
- expect(result.passed).toBe(true);
113
- expect(result.output).toBe('PASS');
114
- });
115
- it('returns failure when execInContainer returns non-zero exit', async () => {
116
- const { execInContainer } = await import('../sandbox/index.js');
117
- vi.mocked(execInContainer).mockResolvedValueOnce({
118
- exitCode: 1,
119
- stdout: 'Error: test failed',
120
- stderr: 'FAIL',
121
- });
122
- const { runValidation } = await import('../code-agents/executor.js');
123
- const result = await runValidation('/workspace', 'sandbox-fail');
124
- expect(result.passed).toBe(false);
125
- expect(result.output).toContain('VALIDATION FAILED');
126
- });
127
- it('returns failure when execInContainer throws', async () => {
128
- const { execInContainer } = await import('../sandbox/index.js');
129
- vi.mocked(execInContainer).mockRejectedValueOnce(new Error('container not found'));
130
- const { runValidation } = await import('../code-agents/executor.js');
131
- const result = await runValidation('/workspace', 'sandbox-crash');
132
- expect(result.passed).toBe(false);
133
- expect(result.output).toContain('sandbox exec error');
134
- expect(result.output).toContain('container not found');
135
- });
136
- });
137
- describe('sandbox config resolution in executeCodeWithAgent', () => {
138
- it('resolves sandboxConfig from context.sandboxConfig', async () => {
139
- // Test the resolution logic directly — the config should be picked up
140
- // from context.sandboxConfig or context.fullConfig.sandbox
141
- const contextWithSandbox = {
142
- sandboxConfig: { enabled: true, image: 'test-image' },
143
- fullConfig: { sandbox: { enabled: false } },
144
- };
145
- // sandboxConfig takes precedence over fullConfig.sandbox
146
- const resolved = contextWithSandbox.sandboxConfig ?? contextWithSandbox.fullConfig?.sandbox;
147
- expect(resolved).toEqual({ enabled: true, image: 'test-image' });
148
- });
149
- it('falls back to fullConfig.sandbox when context.sandboxConfig is absent', () => {
150
- const contextFallback = {
151
- fullConfig: { sandbox: { enabled: true, image: 'fallback-image' } },
152
- };
153
- const resolved = contextFallback.sandboxConfig ?? contextFallback.fullConfig?.sandbox;
154
- expect(resolved).toEqual({ enabled: true, image: 'fallback-image' });
155
- });
156
- it('resolves to undefined when no sandbox config present', () => {
157
- const contextNone = {
158
- fullConfig: {},
159
- };
160
- const resolved = contextNone.sandboxConfig ?? contextNone.fullConfig?.sandbox;
161
- expect(resolved).toBeUndefined();
162
- });
163
- });