skimpyclaw 0.3.5 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +14 -6
  2. package/dist/__tests__/api.test.js +1 -19
  3. package/dist/__tests__/channels.test.js +1 -1
  4. package/dist/__tests__/code-agents-orchestrator.test.js +74 -7
  5. package/dist/__tests__/code-agents-sandbox.test.d.ts +1 -0
  6. package/dist/__tests__/code-agents-sandbox.test.js +163 -0
  7. package/dist/__tests__/context-manager.test.d.ts +1 -0
  8. package/dist/__tests__/context-manager.test.js +236 -0
  9. package/dist/__tests__/package-manager-detection.test.js +5 -5
  10. package/dist/__tests__/setup.test.js +10 -7
  11. package/dist/__tests__/skills.test.js +2 -2
  12. package/dist/__tests__/structured-context.test.d.ts +1 -0
  13. package/dist/__tests__/structured-context.test.js +100 -0
  14. package/dist/__tests__/tools.test.js +65 -3
  15. package/dist/agent.js +4 -5
  16. package/dist/api.js +10 -85
  17. package/dist/audit.js +5 -51
  18. package/dist/channels/telegram/handlers.js +2 -60
  19. package/dist/channels/telegram/index.js +0 -7
  20. package/dist/channels.js +1 -1
  21. package/dist/cli.js +186 -17
  22. package/dist/code-agents/executor.d.ts +9 -4
  23. package/dist/code-agents/executor.js +187 -13
  24. package/dist/code-agents/index.d.ts +1 -1
  25. package/dist/code-agents/index.js +23 -21
  26. package/dist/code-agents/orchestrator.d.ts +8 -2
  27. package/dist/code-agents/orchestrator.js +297 -27
  28. package/dist/code-agents/structured-context.d.ts +7 -0
  29. package/dist/code-agents/structured-context.js +54 -0
  30. package/dist/code-agents/types.d.ts +2 -0
  31. package/dist/code-agents/utils.js +12 -2
  32. package/dist/code-agents/worktree.d.ts +40 -0
  33. package/dist/code-agents/worktree.js +215 -0
  34. package/dist/config.d.ts +1 -0
  35. package/dist/config.js +5 -3
  36. package/dist/cron.js +18 -4
  37. package/dist/dashboard/assets/index-BoTHPby4.js +65 -0
  38. package/dist/dashboard/assets/{index-EAg6lqF5.css → index-D4mufvBg.css} +1 -1
  39. package/dist/dashboard/index.html +2 -2
  40. package/dist/discord.js +4 -40
  41. package/dist/exec-approval.js +1 -1
  42. package/dist/file-lock.js +1 -1
  43. package/dist/gateway.js +3 -10
  44. package/dist/providers/anthropic.js +9 -5
  45. package/dist/providers/codex.js +10 -6
  46. package/dist/providers/context-manager.d.ts +22 -0
  47. package/dist/providers/context-manager.js +100 -0
  48. package/dist/providers/openai.js +9 -5
  49. package/dist/providers/types.d.ts +1 -0
  50. package/dist/security.js +9 -0
  51. package/dist/setup.d.ts +2 -1
  52. package/dist/setup.js +156 -34
  53. package/dist/skills.js +9 -2
  54. package/dist/subagent.js +33 -2
  55. package/dist/tools/bash-tool.js +8 -0
  56. package/dist/tools/browser-tool.js +3 -2
  57. package/dist/tools/definitions.d.ts +0 -27
  58. package/dist/tools/definitions.js +0 -18
  59. package/dist/tools/execute-context.d.ts +4 -4
  60. package/dist/tools/file-tools.d.ts +1 -1
  61. package/dist/tools/file-tools.js +1 -1
  62. package/dist/tools.d.ts +5 -5
  63. package/dist/tools.js +87 -98
  64. package/dist/types.d.ts +14 -22
  65. package/dist/usage.d.ts +1 -0
  66. package/dist/usage.js +30 -46
  67. package/dist/utils.d.ts +18 -0
  68. package/dist/utils.js +71 -0
  69. package/dist/voice.js +9 -7
  70. package/package.json +1 -1
  71. package/dist/dashboard/assets/index-UVAjSXCG.js +0 -107
package/dist/cli.js CHANGED
@@ -43,6 +43,9 @@ Commands:
43
43
  tools list List available tools (built-in + MCP)
44
44
  tools install <name> Add MCP server (--command <cmd> [--args ...] or --url <url>)
45
45
  tools remove <name> Remove MCP server
46
+ agents List coding agents (active + recent)
47
+ agents <id> Show details for a coding agent (with live output)
48
+ agents <id> --follow Follow live output for an agent
46
49
  sandbox status Show active sandbox containers
47
50
  sandbox prune Force-prune all sandbox containers
48
51
  sandbox init Auto-setup sandbox runtime/image/config (supports --profile)
@@ -154,21 +157,44 @@ function startDaemon() {
154
157
  console.log(`Daemon started: ${LAUNCHD_LABEL}`);
155
158
  return 0;
156
159
  }
160
+ // All launchd labels that may be running (current + legacy)
161
+ const ALL_LAUNCHD_LABELS = [LAUNCHD_LABEL, 'com.katre.skimpyclaw'];
157
162
  function stopDaemon() {
158
- if (!launchctlAvailable()) {
159
- console.error('Daemon control is only supported on macOS with launchctl.');
160
- return 1;
161
- }
162
- if (!existsSync(LAUNCHD_PLIST)) {
163
- console.error(`Launchd plist not found: ${LAUNCHD_PLIST}`);
164
- return 1;
163
+ const launchAgentsDir = join(homedir(), 'Library', 'LaunchAgents');
164
+ const uid = process.getuid?.();
165
+ // 1. Unload and remove plists for all known labels
166
+ if (launchctlAvailable()) {
167
+ for (const label of ALL_LAUNCHD_LABELS) {
168
+ const plist = join(launchAgentsDir, `${label}.plist`);
169
+ if (existsSync(plist)) {
170
+ runLaunchctl(['unload', plist]);
171
+ rmSync(plist, { force: true });
172
+ console.log(`Unloaded and removed: ${label}`);
173
+ }
174
+ // Also try bootout in case the service is loaded without a plist
175
+ if (uid !== undefined) {
176
+ runLaunchctl(['bootout', `gui/${uid}/${label}`]);
177
+ }
178
+ }
165
179
  }
166
- const result = runLaunchctl(['unload', LAUNCHD_PLIST]);
167
- if (!result.ok && !result.output.includes('Could not find specified service')) {
168
- console.error(result.output || 'Failed to unload daemon');
169
- return 1;
180
+ // 2. Kill anything still listening on the gateway port
181
+ const lsofResult = spawnSync('lsof', ['-ti', `:${DEFAULT_PORT}`], { encoding: 'utf-8' });
182
+ const pids = (lsofResult.stdout || '')
183
+ .split('\n')
184
+ .map((s) => s.trim())
185
+ .filter(Boolean);
186
+ if (pids.length > 0) {
187
+ for (const pid of pids) {
188
+ try {
189
+ process.kill(Number(pid), 'SIGTERM');
190
+ console.log(`Killed process ${pid} on port ${DEFAULT_PORT}`);
191
+ }
192
+ catch {
193
+ // already dead
194
+ }
195
+ }
170
196
  }
171
- console.log(`Daemon stopped: ${LAUNCHD_LABEL}`);
197
+ console.log('Daemon stopped.');
172
198
  return 0;
173
199
  }
174
200
  function commandUninstall(args) {
@@ -795,10 +821,7 @@ function resolveSandboxDir() {
795
821
  return null;
796
822
  }
797
823
  function parseSandboxOption(args, flag) {
798
- const idx = args.indexOf(flag);
799
- if (idx === -1 || idx + 1 >= args.length)
800
- return undefined;
801
- return args[idx + 1];
824
+ return parseOption(args, flag, '') || undefined;
802
825
  }
803
826
  function runSandboxImageCheck(runtime, image, network, cmd) {
804
827
  const result = spawnSync(runtime, ['run', '--rm', '--network', network, image, 'sh', '-lc', cmd], { encoding: 'utf-8' });
@@ -815,6 +838,115 @@ function printSandboxCheck(ok, name, detail, hint) {
815
838
  console.log(` → ${hint}`);
816
839
  }
817
840
  }
841
+ async function commandAgents(args) {
842
+ const { getAllCodeAgents, getCodeAgent, restoreCodeAgentTasks } = await import('./code-agents/index.js');
843
+ // Restore tasks from disk so we can see them
844
+ restoreCodeAgentTasks();
845
+ const id = args.find(a => !a.startsWith('-'));
846
+ const follow = args.includes('--follow') || args.includes('-f');
847
+ if (id) {
848
+ // Show details for a specific agent
849
+ const showAgent = () => {
850
+ const agent = getCodeAgent(id);
851
+ if (!agent) {
852
+ console.error(`No coding agent found with ID "${id}".`);
853
+ return false;
854
+ }
855
+ // Clear screen in follow mode
856
+ if (follow)
857
+ process.stdout.write('\x1b[2J\x1b[H');
858
+ const elapsed = agent.durationSeconds != null
859
+ ? agent.durationSeconds
860
+ : Math.round((Date.now() - new Date(agent.startedAt).getTime()) / 1000);
861
+ const elapsedStr = elapsed < 60 ? `${elapsed}s` : `${Math.floor(elapsed / 60)}m${elapsed % 60}s`;
862
+ console.log(`\x1b[1m${agent.id}\x1b[0m ${agent.agent} \x1b[33m${agent.status}\x1b[0m (${elapsedStr})`);
863
+ if (agent.model)
864
+ console.log(`Model: ${agent.model}`);
865
+ console.log(`Workdir: ${agent.workdir}`);
866
+ console.log(`Task: ${agent.task.slice(0, 200)}${agent.task.length > 200 ? '...' : ''}`);
867
+ // Show children for team coordinator
868
+ if (agent.childTaskIds && agent.childTaskIds.length > 0) {
869
+ console.log(`\n\x1b[1mChildren:\x1b[0m`);
870
+ for (const childId of agent.childTaskIds) {
871
+ const child = getCodeAgent(childId);
872
+ if (!child)
873
+ continue;
874
+ const cElapsed = child.durationSeconds != null
875
+ ? child.durationSeconds
876
+ : Math.round((Date.now() - new Date(child.startedAt).getTime()) / 1000);
877
+ const cStr = cElapsed < 60 ? `${cElapsed}s` : `${Math.floor(cElapsed / 60)}m${cElapsed % 60}s`;
878
+ const waveLabel = child.wave != null ? ` [wave ${child.wave + 1}]` : '';
879
+ const icon = child.status === 'completed' ? '✅' : child.status === 'failed' ? '❌' : child.status === 'running' ? '🔄' : child.status === 'pending' ? '⏳' : '❓';
880
+ console.log(` ${icon} ${child.id} ${child.status} (${cStr})${waveLabel}`);
881
+ const subtask = (child.subtask || child.task).slice(0, 120);
882
+ console.log(` ${subtask}${(child.subtask || child.task).length > 120 ? '...' : ''}`);
883
+ }
884
+ }
885
+ // Show live output
886
+ if (agent.liveOutput) {
887
+ console.log(`\n\x1b[1mLive Output:\x1b[0m`);
888
+ console.log(agent.liveOutput.slice(-3000));
889
+ }
890
+ // Show result
891
+ if (agent.outputPreview) {
892
+ console.log(`\n\x1b[1mResult:\x1b[0m`);
893
+ console.log(agent.outputPreview.slice(0, 2000));
894
+ }
895
+ if (agent.error) {
896
+ console.log(`\n\x1b[31mError: ${agent.error}\x1b[0m`);
897
+ }
898
+ if (agent.validationOutput) {
899
+ console.log(`\n\x1b[1mValidation:\x1b[0m`);
900
+ console.log(agent.validationOutput.slice(0, 1000));
901
+ }
902
+ return agent.status === 'running' || agent.status === 'validating' || agent.status === 'pending';
903
+ };
904
+ if (follow) {
905
+ let stillRunning = showAgent();
906
+ while (stillRunning) {
907
+ await new Promise(r => setTimeout(r, 3000));
908
+ restoreCodeAgentTasks();
909
+ stillRunning = showAgent();
910
+ }
911
+ // Show final state
912
+ showAgent();
913
+ return 0;
914
+ }
915
+ showAgent();
916
+ return 0;
917
+ }
918
+ // List all agents
919
+ const all = getAllCodeAgents();
920
+ if (all.length === 0) {
921
+ console.log('No coding agents have run yet.');
922
+ return 0;
923
+ }
924
+ // Group: active first, then recent
925
+ const active = all.filter(a => a.status === 'running' || a.status === 'validating' || a.status === 'pending');
926
+ const finished = all.filter(a => a.status !== 'running' && a.status !== 'validating' && a.status !== 'pending');
927
+ if (active.length > 0) {
928
+ console.log('\x1b[1mActive:\x1b[0m');
929
+ for (const a of active) {
930
+ const elapsed = Math.round((Date.now() - new Date(a.startedAt).getTime()) / 1000);
931
+ const elapsedStr = elapsed < 60 ? `${elapsed}s` : `${Math.floor(elapsed / 60)}m${elapsed % 60}s`;
932
+ const taskPreview = a.task.slice(0, 80) + (a.task.length > 80 ? '...' : '');
933
+ const children = a.childTaskIds ? ` (${a.childTaskIds.length} children)` : '';
934
+ console.log(` ${a.id}: \x1b[33m${a.status}\x1b[0m ${a.agent} (${elapsedStr})${children} — ${taskPreview}`);
935
+ }
936
+ }
937
+ if (finished.length > 0) {
938
+ console.log(active.length > 0 ? '\n\x1b[1mRecent:\x1b[0m' : '\x1b[1mRecent:\x1b[0m');
939
+ for (const a of finished.slice(-15)) {
940
+ const dur = a.durationSeconds != null
941
+ ? (a.durationSeconds < 60 ? `${a.durationSeconds}s` : `${Math.floor(a.durationSeconds / 60)}m`)
942
+ : '?';
943
+ const icon = a.status === 'completed' ? '✅' : a.status === 'failed' ? '❌' : a.status === 'timeout' ? '⏰' : a.status === 'cancelled' ? '🚫' : '❓';
944
+ const taskPreview = a.task.slice(0, 80) + (a.task.length > 80 ? '...' : '');
945
+ console.log(` ${icon} ${a.id}: ${a.status} ${a.agent} (${dur}) — ${taskPreview}`);
946
+ }
947
+ }
948
+ return 0;
949
+ }
818
950
  async function commandSandbox(args) {
819
951
  const sub = args[0];
820
952
  if (sub === 'status') {
@@ -950,7 +1082,30 @@ async function commandSandbox(args) {
950
1082
  }
951
1083
  return failed ? 1 : 0;
952
1084
  }
953
- console.log('Usage: skimpyclaw sandbox <status|prune|init|doctor>');
1085
+ console.log(`Usage: skimpyclaw sandbox <command>
1086
+
1087
+ Commands:
1088
+ init Build sandbox image and enable in config
1089
+ status List active sandbox containers
1090
+ prune Remove orphaned sandbox containers
1091
+ doctor Run targeted sandbox diagnostics
1092
+
1093
+ Init options:
1094
+ --runtime <container|docker> Container runtime (default: auto-detect)
1095
+ --profile <minimal|dev|full> Package set (default: minimal)
1096
+ --image <name> Image name (default: skimpyclaw-sandbox:latest)
1097
+ --network <name> Network name (default: auto per runtime)
1098
+
1099
+ Profiles:
1100
+ minimal bash, curl, git, gh, jq, python3, ripgrep, pnpm
1101
+ dev minimal + gcc, g++, make
1102
+ full dev + pip3, sqlite3, unzip, less
1103
+
1104
+ Which runtime?
1105
+ Apple Containers (macOS 26+) — lighter, faster startup, no daemon.
1106
+ Docker — cross-platform, use if you already run Docker.
1107
+ Auto-detect prefers Apple Containers, falls back to Docker.
1108
+ `);
954
1109
  return 1;
955
1110
  }
956
1111
  export async function runCli(argv = process.argv.slice(2)) {
@@ -959,6 +1114,17 @@ export async function runCli(argv = process.argv.slice(2)) {
959
1114
  printHelp();
960
1115
  return 0;
961
1116
  }
1117
+ if (command === '--version' || command === '-v' || command === 'version') {
1118
+ const pkgPath = join(fileURLToPath(import.meta.url), '..', '..', 'package.json');
1119
+ try {
1120
+ const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
1121
+ console.log(`skimpyclaw v${pkg.version}`);
1122
+ }
1123
+ catch {
1124
+ console.log('skimpyclaw (version unknown)');
1125
+ }
1126
+ return 0;
1127
+ }
962
1128
  try {
963
1129
  if (command === 'start') {
964
1130
  if (args.includes('--daemon')) {
@@ -1016,6 +1182,9 @@ export async function runCli(argv = process.argv.slice(2)) {
1016
1182
  if (command === 'tools') {
1017
1183
  return await commandTools(args);
1018
1184
  }
1185
+ if (command === 'agents') {
1186
+ return await commandAgents(args);
1187
+ }
1019
1188
  if (command === 'sandbox') {
1020
1189
  return await commandSandbox(args);
1021
1190
  }
@@ -11,11 +11,16 @@ export type PackageManager = 'pnpm' | 'yarn' | 'npm' | 'bun';
11
11
  export declare function detectPackageManager(workdir: string): PackageManager;
12
12
  /**
13
13
  * Build the validation command for a project directory.
14
- * Checks for `build` and `test` scripts in package.json, then runs them
15
- * with the detected package manager. Falls back to `<pm> build && <pm> test`.
14
+ *
15
+ * Resolution order:
16
+ * 1. Per-project override from config `codeAgents.validationCommands`
17
+ * 2. Monorepo auto-detection: scope to changed packages only
18
+ * - Works both when workdir is the repo root AND when it's a package subdir
19
+ * 3. Auto-detect from package.json scripts (build + test)
20
+ * 4. Empty string (skip validation) if no scripts found
16
21
  */
17
- export declare function buildValidationCommand(workdir: string): string;
22
+ export declare function buildValidationCommand(workdir: string, validationCommands?: Record<string, string>): string;
18
23
  /** Run build/test validation. Shared by solo agents and team orchestrator. */
19
- export declare function runValidation(workdir: string): Promise<ValidationResult>;
24
+ export declare function runValidation(workdir: string, validationCommands?: Record<string, string>): Promise<ValidationResult>;
20
25
  /** Background execution of a coding agent. Updates task status throughout. */
21
26
  export declare function runCodeAgentBackground(id: string, agent: string, task: string, workdir: string, validate: boolean, input: Record<string, any>, startedAt: Date, options?: CodeAgentBackgroundOptions): Promise<void>;
@@ -1,9 +1,10 @@
1
1
  // Code Agent Executor - Background execution logic
2
- import { spawn, exec } from 'child_process';
2
+ import { spawn, exec, execSync } from 'child_process';
3
3
  import { createWriteStream, existsSync, readFileSync } from 'fs';
4
4
  import { join } from 'path';
5
5
  // SKIMPYCLAW_ROOT for log paths
6
6
  const SKIMPYCLAW_ROOT = join(import.meta.dirname || process.cwd(), '..', '..');
7
+ import { toErrorMessage } from '../utils.js';
7
8
  import { VALIDATE_TIMEOUT_MS } from './types.js';
8
9
  import { getCodeAgentsDir, ensureCodeAgentsDir, writeCodeAgentTask, setCodeAgentCanceller, deleteCodeAgentCanceller, getCodeAgent, } from './registry.js';
9
10
  import { buildCodeAgentArgs, notifyCodeAgentResult } from './utils.js';
@@ -51,15 +52,184 @@ export function detectPackageManager(workdir) {
51
52
  // 3. Fallback
52
53
  return 'pnpm';
53
54
  }
55
+ /**
56
+ * Detect monorepo workspaces from package.json.
57
+ * Returns workspace glob patterns or null if not a monorepo.
58
+ */
59
+ function getWorkspacePatterns(workdir) {
60
+ try {
61
+ const pkgPath = join(workdir, 'package.json');
62
+ if (!existsSync(pkgPath))
63
+ return null;
64
+ const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
65
+ // yarn/npm: "workspaces": ["packages/*"] or "workspaces": { "packages": [...] }
66
+ const ws = pkg.workspaces;
67
+ if (Array.isArray(ws))
68
+ return ws;
69
+ if (ws && Array.isArray(ws.packages))
70
+ return ws.packages;
71
+ // pnpm: check pnpm-workspace.yaml
72
+ const pnpmWsPath = join(workdir, 'pnpm-workspace.yaml');
73
+ if (existsSync(pnpmWsPath)) {
74
+ const content = readFileSync(pnpmWsPath, 'utf-8');
75
+ const matches = content.match(/- ['"]?([^'"\n]+)['"]?/g);
76
+ if (matches)
77
+ return matches.map(m => m.replace(/^- ['"]?|['"]?$/g, ''));
78
+ }
79
+ return null;
80
+ }
81
+ catch {
82
+ return null;
83
+ }
84
+ }
85
+ /**
86
+ * Find which monorepo packages have changed files (git diff).
87
+ * Returns package directories relative to workdir.
88
+ */
89
+ function getChangedPackageDirs(workdir) {
90
+ try {
91
+ // Get changed files vs HEAD (staged + unstaged + untracked)
92
+ const diff = execSync('git diff --name-only HEAD 2>/dev/null; git diff --name-only --cached 2>/dev/null; git ls-files --others --exclude-standard 2>/dev/null', { cwd: workdir, timeout: 5000, encoding: 'utf-8' }).trim();
93
+ if (!diff)
94
+ return [];
95
+ const files = [...new Set(diff.split('\n').filter(Boolean))];
96
+ // Extract unique top-level package directories (e.g. "packages/image-studio/src/foo.ts" → "packages/image-studio")
97
+ const pkgDirs = new Set();
98
+ for (const f of files) {
99
+ const parts = f.split('/');
100
+ // Look for package.json at each depth to find package boundary
101
+ for (let depth = 1; depth <= Math.min(parts.length - 1, 4); depth++) {
102
+ const candidate = parts.slice(0, depth).join('/');
103
+ if (existsSync(join(workdir, candidate, 'package.json'))) {
104
+ pkgDirs.add(candidate);
105
+ break;
106
+ }
107
+ }
108
+ }
109
+ return [...pkgDirs];
110
+ }
111
+ catch {
112
+ return [];
113
+ }
114
+ }
115
+ /**
116
+ * Build scoped validation commands for a monorepo by detecting changed packages.
117
+ * Returns a combined command that builds/tests only affected packages, or null
118
+ * if this doesn't look like a monorepo or no packages were changed.
119
+ */
120
+ function buildMonorepoValidationCommand(workdir) {
121
+ const wsPatterns = getWorkspacePatterns(workdir);
122
+ if (!wsPatterns)
123
+ return null;
124
+ const changedDirs = getChangedPackageDirs(workdir);
125
+ if (changedDirs.length === 0)
126
+ return null;
127
+ const pm = detectPackageManager(workdir);
128
+ const parts = [];
129
+ for (const dir of changedDirs) {
130
+ const pkgJsonPath = join(workdir, dir, 'package.json');
131
+ if (!existsSync(pkgJsonPath))
132
+ continue;
133
+ try {
134
+ const pkg = JSON.parse(readFileSync(pkgJsonPath, 'utf-8'));
135
+ const pkgName = pkg.name;
136
+ const scripts = pkg.scripts || {};
137
+ if (!pkgName)
138
+ continue;
139
+ // Build with workspace command
140
+ if (scripts.build) {
141
+ if (pm === 'pnpm')
142
+ parts.push(`pnpm --filter ${pkgName} run build`);
143
+ else if (pm === 'yarn')
144
+ parts.push(`yarn workspace ${pkgName} build`);
145
+ else if (pm === 'bun')
146
+ parts.push(`bun --filter ${pkgName} run build`);
147
+ else
148
+ parts.push(`npm -w ${pkgName} run build`);
149
+ }
150
+ // Test: prefer package-scoped test, fall back to root test runner scoped to path
151
+ if (scripts.test) {
152
+ if (pm === 'pnpm')
153
+ parts.push(`pnpm --filter ${pkgName} run test`);
154
+ else if (pm === 'yarn')
155
+ parts.push(`yarn workspace ${pkgName} test`);
156
+ else if (pm === 'bun')
157
+ parts.push(`bun --filter ${pkgName} run test`);
158
+ else
159
+ parts.push(`npm -w ${pkgName} run test`);
160
+ }
161
+ else {
162
+ // No package-level test script — try running root test scoped to the package path
163
+ // This handles monorepos like wp-calypso with `jest --testPathPattern`
164
+ const rootPkg = JSON.parse(readFileSync(join(workdir, 'package.json'), 'utf-8'));
165
+ const rootScripts = rootPkg.scripts || {};
166
+ // Check for common monorepo test patterns
167
+ if (rootScripts['test-packages']) {
168
+ if (pm === 'yarn')
169
+ parts.push(`yarn test-packages ${dir}`);
170
+ else
171
+ parts.push(`${pm} run test-packages ${dir}`);
172
+ }
173
+ }
174
+ }
175
+ catch { /* skip this package */ }
176
+ }
177
+ if (parts.length === 0)
178
+ return null;
179
+ console.log(`[validation] Monorepo: scoped to ${changedDirs.length} package(s): ${changedDirs.join(', ')}`);
180
+ return parts.join(' && ');
181
+ }
182
+ /**
183
+ * Walk up from a directory to find a monorepo root (directory with workspaces).
184
+ * Returns the root path or null if not inside a monorepo.
185
+ */
186
+ function findMonorepoRoot(startDir) {
187
+ let dir = startDir;
188
+ const root = '/';
189
+ while (dir !== root) {
190
+ if (getWorkspacePatterns(dir))
191
+ return dir;
192
+ const parent = join(dir, '..');
193
+ if (parent === dir)
194
+ break;
195
+ dir = parent;
196
+ }
197
+ return null;
198
+ }
54
199
  /**
55
200
  * Build the validation command for a project directory.
56
- * Checks for `build` and `test` scripts in package.json, then runs them
57
- * with the detected package manager. Falls back to `<pm> build && <pm> test`.
201
+ *
202
+ * Resolution order:
203
+ * 1. Per-project override from config `codeAgents.validationCommands`
204
+ * 2. Monorepo auto-detection: scope to changed packages only
205
+ * - Works both when workdir is the repo root AND when it's a package subdir
206
+ * 3. Auto-detect from package.json scripts (build + test)
207
+ * 4. Empty string (skip validation) if no scripts found
58
208
  */
59
- export function buildValidationCommand(workdir) {
209
+ export function buildValidationCommand(workdir, validationCommands) {
210
+ // 1. Check per-project overrides
211
+ if (validationCommands) {
212
+ const dirName = workdir.split('/').pop() || '';
213
+ for (const [key, cmd] of Object.entries(validationCommands)) {
214
+ if (key === dirName || workdir === key || workdir.endsWith(`/${key}`)) {
215
+ return cmd;
216
+ }
217
+ }
218
+ }
219
+ // 2. Monorepo auto-detection — check workdir and parent dirs
220
+ const monorepoCmd = buildMonorepoValidationCommand(workdir);
221
+ if (monorepoCmd)
222
+ return monorepoCmd;
223
+ // Also check if workdir is a subpackage inside a monorepo
224
+ const monorepoRoot = findMonorepoRoot(workdir);
225
+ if (monorepoRoot && monorepoRoot !== workdir) {
226
+ const rootCmd = buildMonorepoValidationCommand(monorepoRoot);
227
+ if (rootCmd)
228
+ return rootCmd;
229
+ }
230
+ // 3. Simple project — use root package.json scripts
60
231
  const pm = detectPackageManager(workdir);
61
232
  const run = pm === 'npm' ? 'npm run' : pm;
62
- // Check which scripts exist in package.json
63
233
  let hasBuild = false;
64
234
  let hasTest = false;
65
235
  try {
@@ -77,19 +247,23 @@ export function buildValidationCommand(workdir) {
77
247
  parts.push(`${run} build`);
78
248
  if (hasTest)
79
249
  parts.push(`${run} test`);
80
- // If neither build nor test scripts exist, still try — the scripts
81
- // might be defined in a workspace root or the commands may work anyway
82
250
  if (parts.length === 0) {
83
- parts.push(`${run} build`, `${run} test`);
251
+ return '';
84
252
  }
85
253
  return parts.join(' && ');
86
254
  }
87
255
  /** Run build/test validation. Shared by solo agents and team orchestrator. */
88
- export function runValidation(workdir) {
89
- const cmd = buildValidationCommand(workdir);
256
+ export function runValidation(workdir, validationCommands) {
257
+ const cmd = buildValidationCommand(workdir, validationCommands);
258
+ if (!cmd) {
259
+ // No build/test scripts found — nothing to validate, pass by default
260
+ return Promise.resolve({ passed: true, output: 'PASS (no build/test scripts found)' });
261
+ }
262
+ // If workdir is inside a monorepo, run from the repo root so workspace commands work
263
+ const execDir = findMonorepoRoot(workdir) || workdir;
90
264
  return new Promise((resolve) => {
91
265
  exec(cmd, {
92
- cwd: workdir,
266
+ cwd: execDir,
93
267
  timeout: VALIDATE_TIMEOUT_MS,
94
268
  maxBuffer: 5 * 1024 * 1024,
95
269
  }, (error, vStdout, vStderr) => {
@@ -315,7 +489,7 @@ export async function runCodeAgentBackground(id, agent, task, workdir, validate,
315
489
  caTask.outputPreview = agentOutput.slice(0, 500);
316
490
  caTask.liveOutput = undefined;
317
491
  writeCodeAgentTask(caTask);
318
- const validationCmd = buildValidationCommand(workdir);
492
+ const validationCmd = buildValidationCommand(workdir, options?.validationCommands);
319
493
  const runValidationPromise = () => new Promise((res) => {
320
494
  const validationProc = exec(validationCmd, {
321
495
  cwd: workdir,
@@ -521,7 +695,7 @@ export async function runCodeAgentBackground(id, agent, task, workdir, validate,
521
695
  await notifyCodeAgentResult(caTask, (id) => getCodeAgent(id) ?? null);
522
696
  }
523
697
  catch (err) {
524
- const errMsg = err instanceof Error ? err.message : String(err);
698
+ const errMsg = toErrorMessage(err);
525
699
  addEvent(traceId, { type: 'error', summary: errMsg.slice(0, 200), durationMs: Date.now() - startedAt.getTime() });
526
700
  await endTrace(traceId, 'error');
527
701
  Object.assign(caTask, {
@@ -4,7 +4,7 @@ export type { CodeAgentTask, DecomposedSubtask, CodeAgentBackgroundOptions, Buil
4
4
  export { CODE_AGENT_TIMEOUT_MS, VALIDATE_TIMEOUT_MS } from './types.js';
5
5
  export { getActiveCodeAgents, getRecentCodeAgents, getAllCodeAgents, getCodeAgent, cancelCodeAgent, restoreCodeAgentTasks, getCodeAgentsDir, } from './registry.js';
6
6
  export { runCodeAgentBackground, runValidation } from './executor.js';
7
- export { runTeamOrchestrator, computeWaves, decomposeTask, synthesizeResults, } from './orchestrator.js';
7
+ export { runTeamOrchestrator, computeWaves, decomposeTask, synthesizeResults, gatherCodebaseContext, } from './orchestrator.js';
8
8
  export { setCodeAgentConfig, getCodeAgentConfig, buildCodeAgentArgs, resolveSelectedCodeAgent, resolveWorkdir, resolveModelAlias, readTeamState, } from './utils.js';
9
9
  export { parseStreamJsonForLive, parseClaudeOutput, parseCodexOutput } from './parser.js';
10
10
  export type { ClaudeOutputResult } from './parser.js';
@@ -13,7 +13,7 @@ export { getActiveCodeAgents, getRecentCodeAgents, getAllCodeAgents, getCodeAgen
13
13
  // Re-export executor functions
14
14
  export { runCodeAgentBackground, runValidation } from './executor.js';
15
15
  // Re-export orchestrator functions
16
- export { runTeamOrchestrator, computeWaves, decomposeTask, synthesizeResults, } from './orchestrator.js';
16
+ export { runTeamOrchestrator, computeWaves, decomposeTask, synthesizeResults, gatherCodebaseContext, } from './orchestrator.js';
17
17
  // Re-export utility functions
18
18
  export { setCodeAgentConfig, getCodeAgentConfig, buildCodeAgentArgs, resolveSelectedCodeAgent, resolveWorkdir, resolveModelAlias, readTeamState, } from './utils.js';
19
19
  // Re-export parser functions
@@ -80,7 +80,7 @@ export async function executeCodeWithAgent(input, config, context) {
80
80
  catch { /* gateway not running */ }
81
81
  }
82
82
  const resolvedModel = resolveModelAlias(rawModel, context?.fullConfig?.models?.aliases);
83
- const configDefault = context?.fullConfig?.subagents?.defaultCodeAgent || 'claude';
83
+ const configDefault = context?.fullConfig?.codeAgents?.defaultAgent || 'claude';
84
84
  const requestedAgent = input.agent;
85
85
  const agent = resolveSelectedCodeAgent(requestedAgent, configDefault, resolvedModel);
86
86
  if (!agent) {
@@ -99,11 +99,11 @@ export async function executeCodeWithAgent(input, config, context) {
99
99
  : '';
100
100
  return `Error: Working directory not allowed. Permitted: ${config.allowedPaths.join(', ')}${projectNames}`;
101
101
  }
102
- // Concurrency check — share limit with subagents
103
- const maxConcurrent = context?.fullConfig?.subagents?.maxConcurrent ?? 5;
102
+ // Concurrency check
103
+ const maxConcurrent = context?.fullConfig?.codeAgents?.maxConcurrent ?? 5;
104
104
  const activeCount = getActiveCodeAgents().length;
105
105
  if (activeCount >= maxConcurrent) {
106
- return `Error: Concurrency limit reached (${activeCount}/${maxConcurrent} coding agents running). Wait for one to finish or increase subagents.maxConcurrent.`;
106
+ return `Error: Concurrency limit reached (${activeCount}/${maxConcurrent} coding agents running). Wait for one to finish or increase codeAgents.maxConcurrent.`;
107
107
  }
108
108
  const validate = input.validate !== false; // default true
109
109
  // Create task with unique ID
@@ -122,8 +122,14 @@ export async function executeCodeWithAgent(input, config, context) {
122
122
  storeCodeAgentTask(caTask);
123
123
  writeCodeAgentTask(caTask);
124
124
  // Fire-and-forget: spawn background process
125
- const resolvedInput = { ...input, model: resolvedModel };
126
- runCodeAgentBackground(id, agent, task, workdir, validate, resolvedInput, startedAt).catch((err) => {
125
+ const configTimeout = context?.fullConfig?.codeAgents?.timeoutMinutes ?? 30;
126
+ const soloTimeout = Math.min(input.timeout_minutes || configTimeout, 60);
127
+ const resolvedInput = { ...input, model: resolvedModel, timeout_minutes: soloTimeout };
128
+ runCodeAgentBackground(id, agent, task, workdir, validate, resolvedInput, startedAt, {
129
+ defaultTimeoutMinutes: soloTimeout,
130
+ maxTimeoutMinutes: 60,
131
+ validationCommands: context?.fullConfig?.codeAgents?.validationCommands,
132
+ }).catch((err) => {
127
133
  console.error(`[code-agent] Background error for ${id}:`, err);
128
134
  });
129
135
  const taskPreview = task.length > 100 ? task.slice(0, 100) + '...' : task;
@@ -136,18 +142,14 @@ export async function executeCodeWithTeam(input, config, context) {
136
142
  const task = input.task;
137
143
  if (!task)
138
144
  return 'Error: task is required';
139
- // Resolve model alias first so agent auto-selection can inspect it.
140
- // Fall back to current session model so codex/kimi models auto-select the right CLI.
141
- let rawTeamModel = input.model;
142
- if (!rawTeamModel) {
143
- try {
144
- const { getCurrentModel } = await import('../gateway.js');
145
- rawTeamModel = getCurrentModel();
146
- }
147
- catch { /* gateway not running */ }
148
- }
149
- const resolvedModel = resolveModelAlias(rawTeamModel, context?.fullConfig?.models?.aliases);
150
- const configDefault = context?.fullConfig?.subagents?.defaultCodeAgent || 'claude';
145
+ // Resolve model alias. Only fall back to session model when an explicit model
146
+ // was requested otherwise the session model (e.g. gpt-5.3-codex) would
147
+ // override agent selection even when the user wants claude.
148
+ const rawTeamModel = input.model;
149
+ const resolvedModel = rawTeamModel
150
+ ? resolveModelAlias(rawTeamModel, context?.fullConfig?.models?.aliases)
151
+ : undefined;
152
+ const configDefault = context?.fullConfig?.codeAgents?.defaultAgent || 'claude';
151
153
  const requestedAgent = input.agent;
152
154
  const agent = resolveSelectedCodeAgent(requestedAgent, configDefault, resolvedModel);
153
155
  if (!agent) {
@@ -167,13 +169,13 @@ export async function executeCodeWithTeam(input, config, context) {
167
169
  : '';
168
170
  return `Error: Working directory not allowed. Permitted: ${config.allowedPaths.join(', ')}${projectNames}`;
169
171
  }
172
+ const validate = input.validate !== false;
170
173
  // Concurrency check — need room for teamSize children
171
- const maxConcurrent = context?.fullConfig?.subagents?.maxConcurrent ?? 5;
174
+ const maxConcurrent = context?.fullConfig?.codeAgents?.maxConcurrent ?? 5;
172
175
  const activeCount = getActiveCodeAgents().length;
173
176
  if (activeCount + teamSize > maxConcurrent) {
174
177
  return `Error: Concurrency limit — need ${teamSize} slots but only ${maxConcurrent - activeCount} available (${activeCount}/${maxConcurrent} running). Wait for agents to finish.`;
175
178
  }
176
- const validate = input.validate !== false;
177
179
  // Create parent task
178
180
  const id = getNextCodeAgentId();
179
181
  const startedAt = new Date();
@@ -7,16 +7,22 @@ import type { DecomposedSubtask, ChildResult } from './types.js';
7
7
  * Throws if there's a cycle in the dependency graph.
8
8
  */
9
9
  export declare function computeWaves(subtasks: DecomposedSubtask[]): number[][];
10
+ /**
11
+ * Gather lightweight codebase context to improve task decomposition.
12
+ * Returns a short summary of the project structure (file tree, package.json scripts).
13
+ * Capped at ~2000 chars to keep the decomposition prompt small.
14
+ */
15
+ export declare function gatherCodebaseContext(workdir: string): string;
10
16
  /**
11
17
  * Use a quick model call to decompose a complex task into N subtasks with optional dependency info.
12
18
  * Falls back to numbered subtask splitting on parse error.
13
19
  * Falls back to all-independent if dependency info is missing or invalid.
14
20
  */
15
- export declare function decomposeTask(task: string, teamSize: number, config: Config): Promise<DecomposedSubtask[]>;
21
+ export declare function decomposeTask(task: string, teamSize: number, config: Config, workdir?: string): Promise<DecomposedSubtask[]>;
16
22
  /**
17
23
  * Use a quick model call to synthesize results from multiple subtask completions.
18
24
  */
19
- export declare function synthesizeResults(originalTask: string, results: ChildResult[], config: Config): Promise<string>;
25
+ export declare function synthesizeResults(originalTask: string, results: ChildResult[], config: Config, workdir?: string): Promise<string>;
20
26
  /**
21
27
  * Team orchestrator — decomposes task, spawns parallel agents, monitors, synthesizes.
22
28
  */