whale-code 6.4.0 → 6.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/bin/swagmanager-mcp.js +51 -0
  2. package/dist/cli/app.js +30 -2
  3. package/dist/cli/chat/ChatApp.d.ts +4 -4
  4. package/dist/cli/chat/ChatApp.js +114 -44
  5. package/dist/cli/chat/ChatInput.d.ts +13 -6
  6. package/dist/cli/chat/ChatInput.js +433 -89
  7. package/dist/cli/chat/MemoryManager.d.ts +15 -0
  8. package/dist/cli/chat/MemoryManager.js +61 -0
  9. package/dist/cli/chat/MessageList.d.ts +8 -0
  10. package/dist/cli/chat/MessageList.js +1 -1
  11. package/dist/cli/chat/NodeManager.d.ts +30 -0
  12. package/dist/cli/chat/NodeManager.js +89 -0
  13. package/dist/cli/chat/NodeSelector.d.ts +19 -0
  14. package/dist/cli/chat/NodeSelector.js +37 -0
  15. package/dist/cli/chat/PlanApproval.d.ts +17 -0
  16. package/dist/cli/chat/PlanApproval.js +82 -0
  17. package/dist/cli/chat/SessionManager.d.ts +16 -0
  18. package/dist/cli/chat/SessionManager.js +43 -0
  19. package/dist/cli/chat/SlashMenu.d.ts +38 -0
  20. package/dist/cli/chat/SlashMenu.js +208 -0
  21. package/dist/cli/chat/StatusBar.d.ts +16 -0
  22. package/dist/cli/chat/StatusBar.js +22 -0
  23. package/dist/cli/chat/ThemeSelector.d.ts +14 -0
  24. package/dist/cli/chat/ThemeSelector.js +29 -0
  25. package/dist/cli/chat/ToolIndicator.d.ts +8 -0
  26. package/dist/cli/chat/ToolIndicator.js +33 -9
  27. package/dist/cli/chat/hooks/useAgentLoop.d.ts +2 -1
  28. package/dist/cli/chat/hooks/useAgentLoop.js +22 -17
  29. package/dist/cli/chat/hooks/useSlashCommands.d.ts +19 -0
  30. package/dist/cli/chat/hooks/useSlashCommands.js +254 -15
  31. package/dist/cli/commands/config-cmd.js +4 -25
  32. package/dist/cli/commands/db.d.ts +13 -0
  33. package/dist/cli/commands/db.js +243 -0
  34. package/dist/cli/commands/doctor.js +6 -9
  35. package/dist/cli/commands/mcp.js +1 -20
  36. package/dist/cli/services/agent-events.d.ts +22 -1
  37. package/dist/cli/services/agent-events.js +9 -0
  38. package/dist/cli/services/agent-loop.js +65 -8
  39. package/dist/cli/services/agent-worker-base.js +21 -6
  40. package/dist/cli/services/api-retry.d.ts +25 -0
  41. package/dist/cli/services/api-retry.js +91 -0
  42. package/dist/cli/services/auth-service.d.ts +1 -1
  43. package/dist/cli/services/auth-service.js +40 -19
  44. package/dist/cli/services/background-processes.js +26 -2
  45. package/dist/cli/services/config-store.d.ts +13 -1
  46. package/dist/cli/services/config-store.js +116 -13
  47. package/dist/cli/services/format-server-response.js +12 -6
  48. package/dist/cli/services/ink-resize-fix.d.ts +18 -0
  49. package/dist/cli/services/ink-resize-fix.js +66 -0
  50. package/dist/cli/services/interactive-tools.d.ts +14 -0
  51. package/dist/cli/services/interactive-tools.js +47 -2
  52. package/dist/cli/services/keybinding-manager.js +1 -1
  53. package/dist/cli/services/local-tools.js +35 -2
  54. package/dist/cli/services/server-tools.js +175 -3
  55. package/dist/cli/services/subagent.js +7 -6
  56. package/dist/cli/services/system-prompt.js +5 -3
  57. package/dist/cli/services/task-decomposer.d.ts +35 -0
  58. package/dist/cli/services/task-decomposer.js +199 -0
  59. package/dist/cli/services/team-lead.d.ts +18 -0
  60. package/dist/cli/services/team-lead.js +80 -0
  61. package/dist/cli/services/teammate.js +5 -5
  62. package/dist/cli/services/telemetry.d.ts +8 -2
  63. package/dist/cli/services/telemetry.js +116 -92
  64. package/dist/cli/services/tools/agent-tools.d.ts +1 -0
  65. package/dist/cli/services/tools/agent-tools.js +50 -4
  66. package/dist/cli/services/tools/file-ops.d.ts +2 -0
  67. package/dist/cli/services/tools/file-ops.js +85 -19
  68. package/dist/cli/services/tools/shell-exec.js +22 -12
  69. package/dist/cli/shared/Theme.d.ts +1 -2
  70. package/dist/cli/shared/Theme.js +1 -1
  71. package/dist/cli/shared/WhaleBanner.d.ts +4 -1
  72. package/dist/cli/shared/WhaleBanner.js +12 -8
  73. package/dist/cli/shared/markdown.d.ts +5 -4
  74. package/dist/cli/shared/markdown.js +376 -334
  75. package/dist/cli/shared/theme-manager.d.ts +27 -0
  76. package/dist/cli/shared/theme-manager.js +178 -0
  77. package/dist/cli/shared/theme-presets.d.ts +16 -0
  78. package/dist/cli/shared/theme-presets.js +265 -0
  79. package/dist/index.js +0 -51
  80. package/dist/node/adapters/imessage.d.ts +10 -0
  81. package/dist/node/adapters/imessage.js +45 -6
  82. package/dist/node/cli.js +459 -8
  83. package/dist/node/config.d.ts +17 -0
  84. package/dist/node/gateway-client.d.ts +55 -0
  85. package/dist/node/gateway-client.js +201 -0
  86. package/dist/node/portal/clipboard.d.ts +28 -0
  87. package/dist/node/portal/clipboard.js +183 -0
  88. package/dist/node/portal/discovery.d.ts +29 -0
  89. package/dist/node/portal/discovery.js +61 -0
  90. package/dist/node/portal/forward.d.ts +30 -0
  91. package/dist/node/portal/forward.js +90 -0
  92. package/dist/node/portal/index.d.ts +47 -0
  93. package/dist/node/portal/index.js +250 -0
  94. package/dist/node/portal/multiplexer.d.ts +48 -0
  95. package/dist/node/portal/multiplexer.js +207 -0
  96. package/dist/node/portal/permissions.d.ts +36 -0
  97. package/dist/node/portal/permissions.js +131 -0
  98. package/dist/node/portal/protocol.d.ts +140 -0
  99. package/dist/node/portal/protocol.js +193 -0
  100. package/dist/node/portal/screen.d.ts +18 -0
  101. package/dist/node/portal/screen.js +93 -0
  102. package/dist/node/portal/session.d.ts +68 -0
  103. package/dist/node/portal/session.js +127 -0
  104. package/dist/node/portal/shell.d.ts +26 -0
  105. package/dist/node/portal/shell.js +142 -0
  106. package/dist/node/portal/stream.d.ts +43 -0
  107. package/dist/node/portal/stream.js +90 -0
  108. package/dist/node/portal/transfer.d.ts +33 -0
  109. package/dist/node/portal/transfer.js +231 -0
  110. package/dist/node/portal/ui.d.ts +16 -0
  111. package/dist/node/portal/ui.js +148 -0
  112. package/dist/node/remote-desktop/compile-helper.d.ts +13 -0
  113. package/dist/node/remote-desktop/compile-helper.js +73 -0
  114. package/dist/node/remote-desktop/index.d.ts +67 -0
  115. package/dist/node/remote-desktop/index.js +220 -0
  116. package/dist/node/remote-desktop/protocol.d.ts +96 -0
  117. package/dist/node/remote-desktop/protocol.js +67 -0
  118. package/dist/node/runtime.d.ts +8 -1
  119. package/dist/node/runtime.js +117 -9
  120. package/dist/server/handlers/__test-utils__/test-db.d.ts +25 -0
  121. package/dist/server/handlers/__test-utils__/test-db.js +128 -0
  122. package/dist/server/handlers/api-keys.js +26 -2
  123. package/dist/server/handlers/browser.d.ts +0 -4
  124. package/dist/server/handlers/browser.js +0 -46
  125. package/dist/server/handlers/catalog.js +37 -14
  126. package/dist/server/handlers/clickhouse.d.ts +10 -0
  127. package/dist/server/handlers/clickhouse.js +215 -0
  128. package/dist/server/handlers/comms.d.ts +308 -4
  129. package/dist/server/handlers/comms.js +444 -11
  130. package/dist/server/handlers/creations.js +1 -1
  131. package/dist/server/handlers/crm.d.ts +54 -8
  132. package/dist/server/handlers/crm.js +353 -68
  133. package/dist/server/handlers/embeddings.js +3 -3
  134. package/dist/server/handlers/enrichment.js +39 -55
  135. package/dist/server/handlers/inventory.js +1 -1
  136. package/dist/server/handlers/kali.d.ts +9 -1
  137. package/dist/server/handlers/kali.js +50 -1
  138. package/dist/server/handlers/media.d.ts +8 -0
  139. package/dist/server/handlers/media.js +902 -0
  140. package/dist/server/handlers/meta-ads.js +6 -3
  141. package/dist/server/handlers/nodes.d.ts +2 -0
  142. package/dist/server/handlers/nodes.js +331 -40
  143. package/dist/server/handlers/operations.d.ts +4 -6
  144. package/dist/server/handlers/operations.js +99 -38
  145. package/dist/server/handlers/platform.js +224 -107
  146. package/dist/server/handlers/remove-bg.d.ts +6 -0
  147. package/dist/server/handlers/remove-bg.js +96 -0
  148. package/dist/server/handlers/storefront.d.ts +6 -0
  149. package/dist/server/handlers/storefront.js +477 -0
  150. package/dist/server/handlers/supply-chain.js +21 -3
  151. package/dist/server/handlers/workflow-steps.js +87 -31
  152. package/dist/server/handlers/workflows.js +4 -1
  153. package/dist/server/index.js +334 -88
  154. package/dist/server/lib/clickhouse-buffer.d.ts +48 -0
  155. package/dist/server/lib/clickhouse-buffer.js +175 -0
  156. package/dist/server/lib/clickhouse-client.d.ts +112 -0
  157. package/dist/server/lib/clickhouse-client.js +141 -0
  158. package/dist/server/lib/coa-renderer.d.ts +91 -0
  159. package/dist/server/lib/coa-renderer.js +411 -0
  160. package/dist/server/lib/compaction-service.js +46 -1
  161. package/dist/server/lib/pdf-renderer.d.ts +143 -0
  162. package/dist/server/lib/pdf-renderer.js +867 -0
  163. package/dist/server/lib/react-pdf-layout.d.ts +40 -0
  164. package/dist/server/lib/react-pdf-layout.js +437 -0
  165. package/dist/server/lib/server-agent-loop.d.ts +2 -0
  166. package/dist/server/lib/server-agent-loop.js +36 -17
  167. package/dist/server/lib/server-subagent.d.ts +3 -0
  168. package/dist/server/lib/server-subagent.js +9 -6
  169. package/dist/server/lib/supabase-client.js +51 -3
  170. package/dist/server/lib/template-resolver.js +14 -4
  171. package/dist/server/lib/utils.js +15 -0
  172. package/dist/server/local-agent-gateway.d.ts +44 -0
  173. package/dist/server/local-agent-gateway.js +389 -49
  174. package/dist/server/providers/anthropic.js +12 -2
  175. package/dist/server/providers/gemini.js +17 -2
  176. package/dist/server/proxy-handlers.js +151 -0
  177. package/dist/server/tool-router.d.ts +2 -2
  178. package/dist/server/tool-router.js +25 -35
  179. package/dist/shared/agent-core.d.ts +25 -2
  180. package/dist/shared/agent-core.js +66 -5
  181. package/dist/shared/api-client.js +54 -3
  182. package/dist/shared/sse-parser.d.ts +1 -1
  183. package/dist/shared/sse-parser.js +5 -2
  184. package/dist/shared/tool-dispatch.js +15 -1
  185. package/package.json +16 -10
  186. package/dist/server/handlers/__test-utils__/mock-supabase.d.ts +0 -11
  187. package/dist/server/handlers/__test-utils__/mock-supabase.js +0 -393
@@ -229,11 +229,9 @@ async function downloadSingleMedia(data, toolName, outDir, inputArgs) {
229
229
  }
230
230
  const buffer = Buffer.from(await resp.arrayBuffer());
231
231
  writeFileSync(localPath, buffer);
232
- // Replace remote URL with local path the LLM reports this, no URL needed
232
+ // Keep file_url for downstream tools (email, workflows) that need public URLs
233
233
  data.local_file = localPath;
234
234
  data.file_size = buffer.length;
235
- // Remove fields that tempt the LLM to fabricate URLs
236
- delete data.file_url;
237
235
  delete data.download;
238
236
  }
239
237
  catch (err) {
@@ -396,6 +394,177 @@ export async function executeServerTool(name, input, emitter) {
396
394
  serverArgs.audio_base64 = samples[0];
397
395
  }
398
396
  }
397
+ // ── Pre-process file_path(s) for media upload tool ──
398
+ // Same pattern as voice: read local files in-process so base64 never flows
399
+ // through shell output (which truncates at ~100K chars) or conversation context.
400
+ // Also intercept file_url that's actually a local path (agents confuse the two).
401
+ if (name === "media" && (input.action === "upload" || input.action === "bulk_upload")) {
402
+ const fileUrl = input.file_url;
403
+ if (fileUrl && (fileUrl.startsWith("/") || /^[A-Z]:\\/i.test(fileUrl)) && !input.file_path && !input.file_paths) {
404
+ input = { ...input, file_path: fileUrl };
405
+ delete input.file_url;
406
+ }
407
+ }
408
+ if (name === "media" && (input.action === "upload" || input.action === "bulk_upload") && (input.file_path || input.file_paths)) {
409
+ let paths = [];
410
+ if (Array.isArray(input.file_paths)) {
411
+ paths = input.file_paths;
412
+ }
413
+ else if (typeof input.file_path === "string") {
414
+ paths = [input.file_path];
415
+ }
416
+ if (paths.length > 0) {
417
+ const MAX_FILE_SIZE = 10_000_000; // 10MB per file
418
+ const MAX_BATCH_BYTES = 50_000_000; // 50MB total batch
419
+ const MAX_BATCH_COUNT = 50;
420
+ if (paths.length === 1) {
421
+ // ── Single file → standard upload action ──
422
+ const filePath = paths[0];
423
+ try {
424
+ const buf = readFileSync(filePath);
425
+ if (buf.length > MAX_FILE_SIZE) {
426
+ return { success: false, output: `File too large: ${(buf.length / 1_000_000).toFixed(1)}MB (max 10MB). Compress or resize first.` };
427
+ }
428
+ const { file_path: _fp, file_paths: _fps, ...rest } = input;
429
+ serverArgs = { ...rest, action: "upload", base64: buf.toString("base64") };
430
+ if (!serverArgs.file_name) {
431
+ const basename = filePath.split("/").pop() || filePath.split("\\").pop() || "";
432
+ if (basename)
433
+ serverArgs.file_name = basename;
434
+ }
435
+ }
436
+ catch (err) {
437
+ return { success: false, output: `Cannot read file "${filePath}": ${err.message || err}` };
438
+ }
439
+ }
440
+ else {
441
+ // ── Multiple files → bulk_upload action ──
442
+ const files = [];
443
+ const errors = [];
444
+ let totalBytes = 0;
445
+ for (const filePath of paths.slice(0, MAX_BATCH_COUNT)) {
446
+ try {
447
+ const buf = readFileSync(filePath);
448
+ if (buf.length > MAX_FILE_SIZE) {
449
+ errors.push(`${filePath.split("/").pop()}: ${(buf.length / 1_000_000).toFixed(1)}MB exceeds 10MB limit, skipped`);
450
+ continue;
451
+ }
452
+ totalBytes += buf.length;
453
+ if (totalBytes > MAX_BATCH_BYTES) {
454
+ errors.push(`Batch limit reached (50MB) — remaining files skipped`);
455
+ break;
456
+ }
457
+ const basename = filePath.split("/").pop() || filePath.split("\\").pop() || "file";
458
+ files.push({ base64: buf.toString("base64"), file_name: basename });
459
+ }
460
+ catch (err) {
461
+ errors.push(`${filePath.split("/").pop()}: ${err.message || "unreadable"}`);
462
+ }
463
+ }
464
+ if (paths.length > MAX_BATCH_COUNT) {
465
+ errors.push(`Only first ${MAX_BATCH_COUNT} files processed (${paths.length} provided)`);
466
+ }
467
+ if (files.length === 0) {
468
+ return { success: false, output: `No valid files to upload. Errors:\n${errors.join("\n")}` };
469
+ }
470
+ const { file_path: _fp, file_paths: _fps, ...rest } = input;
471
+ serverArgs = { ...rest, action: "bulk_upload", files, _read_errors: errors.length > 0 ? errors : undefined };
472
+ }
473
+ }
474
+ }
475
+ // ── Pre-process file_path for media replace tool ──
476
+ // Same pattern as upload: read local file in-process, base64-encode for server.
477
+ if (name === "media" && input.action === "replace" && (input.file_path || (typeof input.file_url === "string" && input.file_url.startsWith("/")))) {
478
+ const localPath = (input.file_path || input.file_url);
479
+ try {
480
+ const buf = readFileSync(localPath);
481
+ if (buf.length > 10_000_000) {
482
+ return { success: false, output: `File too large: ${(buf.length / 1_000_000).toFixed(1)}MB (max 10MB). Compress or resize first.` };
483
+ }
484
+ const { file_path: _fp, file_url: _fu, ...rest } = input;
485
+ serverArgs = { ...rest, base64: buf.toString("base64") };
486
+ if (!serverArgs.file_name) {
487
+ const basename = localPath.split("/").pop() || localPath.split("\\").pop() || "";
488
+ if (basename)
489
+ serverArgs.file_name = basename;
490
+ }
491
+ }
492
+ catch (err) {
493
+ return { success: false, output: `Cannot read file "${localPath}": ${err.message || err}` };
494
+ }
495
+ }
496
+ // ── Pre-process image_path for remove_bg tool ──
497
+ // Same pattern: read local image, send as image_base64.
498
+ if (name === "remove_bg" && (input.image_path || (typeof input.image_url === "string" && input.image_url.startsWith("/")))) {
499
+ const imgPath = (input.image_path || input.image_url);
500
+ try {
501
+ const buf = readFileSync(imgPath);
502
+ if (buf.length > 10_000_000) {
503
+ return { success: false, output: `Image too large: ${(buf.length / 1_000_000).toFixed(1)}MB (max 10MB).` };
504
+ }
505
+ const { image_path: _ip, image_url: _iu, ...rest } = input;
506
+ serverArgs = { ...rest, image_base64: buf.toString("base64") };
507
+ }
508
+ catch (err) {
509
+ return { success: false, output: `Cannot read image "${imgPath}": ${err.message || err}` };
510
+ }
511
+ }
512
+ // ── Pre-process file_path in email attachments ──
513
+ // Reads local files referenced in attachments[].file_path, converts to base64 content.
514
+ if (name === "email" && input.action === "send" && Array.isArray(input.attachments)) {
515
+ const rawAtts = input.attachments;
516
+ const hasLocalFiles = rawAtts.some(a => a.file_path || (typeof a.url === "string" && a.url.startsWith("/")));
517
+ if (hasLocalFiles) {
518
+ const processed = [];
519
+ for (const att of rawAtts) {
520
+ const localPath = att.file_path || (typeof att.url === "string" && att.url.startsWith("/") ? att.url : null);
521
+ if (localPath) {
522
+ try {
523
+ const buf = readFileSync(localPath);
524
+ if (buf.length > 10_000_000) {
525
+ return { success: false, output: `Attachment too large: ${localPath.split("/").pop()} is ${(buf.length / 1_000_000).toFixed(1)}MB (max 10MB).` };
526
+ }
527
+ const filename = att.filename || localPath.split("/").pop() || "attachment";
528
+ processed.push({ filename, content: buf.toString("base64") });
529
+ }
530
+ catch (err) {
531
+ return { success: false, output: `Cannot read attachment "${localPath}": ${err.message || err}` };
532
+ }
533
+ }
534
+ else if (att.content && att.filename) {
535
+ processed.push({ filename: att.filename, content: att.content });
536
+ }
537
+ else if (att.url && att.filename) {
538
+ processed.push(att); // URL-based, let server fetch it
539
+ }
540
+ }
541
+ const { attachments: _a, ...rest } = input;
542
+ serverArgs = { ...rest, attachments: processed };
543
+ }
544
+ }
545
+ // ── Pre-process file_path for kali upload ──
546
+ // Reads local file and sends content as base64 to the kali box.
547
+ if (name === "kali" && input.action === "upload" && (input.file_path || input.local_path)) {
548
+ const localPath = (input.file_path || input.local_path);
549
+ if (localPath && !input.content) {
550
+ try {
551
+ const buf = readFileSync(localPath);
552
+ if (buf.length > 50_000_000) {
553
+ return { success: false, output: `File too large for kali upload: ${(buf.length / 1_000_000).toFixed(1)}MB (max 50MB).` };
554
+ }
555
+ const { file_path: _fp, local_path: _lp, ...rest } = input;
556
+ serverArgs = { ...rest, content: buf.toString("base64"), encoding: "base64" };
557
+ // Auto-set remote path from filename if not provided
558
+ if (!serverArgs.path) {
559
+ const basename = localPath.split("/").pop() || "upload";
560
+ serverArgs.path = `/tmp/${basename}`;
561
+ }
562
+ }
563
+ catch (err) {
564
+ return { success: false, output: `Cannot read file "${localPath}": ${err.message || err}` };
565
+ }
566
+ }
567
+ }
399
568
  // ── Streaming path for kali exec actions ──
400
569
  // Uses NDJSON streaming to show live stdout/stderr in the CLI while the command runs.
401
570
  const isStreamable = name === "kali" && emitter &&
@@ -452,6 +621,9 @@ export async function executeServerTool(name, input, emitter) {
452
621
  emitter.emitToolOutput(name, p.data);
453
622
  }
454
623
  }
624
+ else if (parsed.type === "status" && parsed.progress) {
625
+ emitter.emitToolProgress(name, parsed.progress);
626
+ }
455
627
  else if (parsed.type === "result") {
456
628
  finalResult = parsed;
457
629
  }
@@ -11,11 +11,11 @@ import { readFileSync, existsSync, writeFileSync, mkdirSync, appendFileSync } fr
11
11
  import { join } from "path";
12
12
  import { homedir, tmpdir } from "os";
13
13
  import { LOCAL_TOOL_DEFINITIONS, } from "./local-tools.js";
14
- import { LoopDetector, estimateCostUsd } from "../../shared/agent-core.js";
14
+ import { LoopDetector, estimateCostUsd, demoteSubagentModel } from "../../shared/agent-core.js";
15
15
  import { MODEL_MAP } from "../../shared/constants.js";
16
16
  import { loadServerToolDefinitions, } from "./server-tools.js";
17
17
  import { logSpan, generateSpanId, generateTraceId } from "./telemetry.js";
18
- import { loadClaudeMd, getModel } from "./agent-loop.js";
18
+ import { loadClaudeMd } from "./agent-loop.js";
19
19
  import { getGlobalEmitter } from "./agent-events.js";
20
20
  import { getAgentDefinition } from "./agent-definitions.js";
21
21
  import { callAgentAPI, executeToolBlocks, extractTextBlocks, extractToolUseBlocks, getResponseText, yieldToEventLoop, yieldForRender, } from "./agent-worker-base.js";
@@ -283,8 +283,9 @@ function emitSubagentProgress(agentType, agentId, message, turn, toolName) {
283
283
  export async function runSubagent(options) {
284
284
  const { prompt, subagent_type, model, resume, max_turns, name, parentContext, parentTraceContext } = options;
285
285
  const agentId = resume || generateAgentId();
286
- // Inherit parent model when not specified (Anthropic pattern)
287
- const modelId = model ? MODEL_MAP[model] : getModel();
286
+ // Sub-agents default to Haiku for cost control — uses shared demotion logic
287
+ const demotedAlias = demoteSubagentModel(model, subagent_type);
288
+ const modelId = MODEL_MAP[demotedAlias] || MODEL_MAP["haiku"];
288
289
  const cwd = process.cwd();
289
290
  const systemPrompt = buildAgentPrompt(subagent_type, cwd);
290
291
  const startTime = Date.now();
@@ -634,8 +635,8 @@ Each agent completes autonomously and returns results.`,
634
635
  },
635
636
  model: {
636
637
  type: "string",
637
- enum: ["sonnet", "opus", "haiku"],
638
- description: "Optional model override. If not specified, inherits from parent. Prefer haiku for quick, straightforward tasks to minimize cost and latency.",
638
+ enum: ["sonnet", "haiku"],
639
+ description: "Optional model. haiku (default) for fast tasks, sonnet for complex reasoning.",
639
640
  },
640
641
  resume: {
641
642
  type: "string",
@@ -103,9 +103,11 @@ Consider the reversibility and blast radius of your actions:
103
103
  - model:"sonnet" — code analysis, multi-step research, plan design ($3/$15 per 1M tokens)
104
104
  - model:"opus" — complex reasoning, judgment calls ($5/$25 per 1M tokens, use sparingly)
105
105
 
106
- ## Teams (team_create tool)
107
- - Do NOT use team_create for bug fixes or single-file changes — work directly. Teams are for large features with 3+ independent workstreams.
108
- - When creating teams, assign per-task models based on complexity. Most tasks should use haiku or sonnet.`;
106
+ ## Teams (team_create / team_auto)
107
+ - Do NOT use teams for bug fixes or single-file changes — work directly. Teams are for large features with 3+ independent workstreams.
108
+ - **team_auto**: Give it a single task string it auto-decomposes into parallel sub-tasks with file ownership, runs them, and reviews results. Best for refactoring, audits, and multi-file features.
109
+ - **team_create**: Use when you want explicit control over task breakdown, file assignments, and models.
110
+ - When creating teams manually, assign per-task models based on complexity. Most tasks should use haiku or sonnet.`;
109
111
  if (hasServerTools) {
110
112
  const storeCtx = storeConfig.store_name ? ` All operations are scoped to the active store: **${storeConfig.store_name}**.` : "";
111
113
  toolSection += `
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Task Decomposer — breaks a user request into N sub-tasks via Claude API.
3
+ *
4
+ * Scans the working directory to give the decomposer real filesystem context,
5
+ * so it can assign specific files/directories to each agent instead of duplicating tasks.
6
+ *
7
+ * Used by the Team system to auto-decompose tasks before assigning to teammates.
8
+ */
9
+ export interface SubTask {
10
+ id: string;
11
+ role: string;
12
+ prompt: string;
13
+ cell_index: number;
14
+ depends_on: string[];
15
+ }
16
+ export interface DecompositionResult {
17
+ sub_tasks: SubTask[];
18
+ summary: string;
19
+ }
20
+ export declare function decomposeTask(task: string, opts?: {
21
+ maxCells?: number;
22
+ context?: string;
23
+ apiKey?: string;
24
+ model?: string;
25
+ workingDirectory?: string;
26
+ }): Promise<DecompositionResult>;
27
+ /**
28
+ * Scan decomposed sub-task prompts for suspicious patterns.
29
+ * Returns an array of human-readable warnings (empty = clean).
30
+ */
31
+ export declare function sanitizePrompts(subTasks: Array<{
32
+ role: string;
33
+ prompt: string;
34
+ cell_index?: number;
35
+ }>): string[];
@@ -0,0 +1,199 @@
1
+ /**
2
+ * Task Decomposer — breaks a user request into N sub-tasks via Claude API.
3
+ *
4
+ * Scans the working directory to give the decomposer real filesystem context,
5
+ * so it can assign specific files/directories to each agent instead of duplicating tasks.
6
+ *
7
+ * Used by the Team system to auto-decompose tasks before assigning to teammates.
8
+ */
9
+ import Anthropic from "@anthropic-ai/sdk";
10
+ import { randomUUID } from "node:crypto";
11
+ import { readdirSync, statSync } from "node:fs";
12
+ import { join } from "node:path";
13
+ import { callWithRetry } from "./api-retry.js";
14
+ // ── Constants ──
15
+ const DECOMPOSE_FALLBACK_MODEL = "claude-haiku-4-5-20251001";
16
+ const DECOMPOSE_MAX_TOKENS = 8192;
17
+ /** Scan a directory tree up to `maxDepth`, returning a compact file listing. */
18
+ function scanDirectory(dir, maxDepth = 3, maxEntries = 200) {
19
+ const lines = [];
20
+ let count = 0;
21
+ function walk(current, depth) {
22
+ if (depth > maxDepth || count >= maxEntries)
23
+ return;
24
+ let entries;
25
+ try {
26
+ entries = readdirSync(current);
27
+ }
28
+ catch {
29
+ return;
30
+ }
31
+ const skip = new Set(["node_modules", ".git", ".next", "dist", "build", "__pycache__", ".cache", ".turbo", "coverage", ".DS_Store"]);
32
+ entries = entries.filter(e => !skip.has(e)).sort();
33
+ for (const entry of entries) {
34
+ if (count >= maxEntries)
35
+ break;
36
+ const fullPath = join(current, entry);
37
+ let isDir = false;
38
+ try {
39
+ isDir = statSync(fullPath).isDirectory();
40
+ }
41
+ catch {
42
+ continue;
43
+ }
44
+ const prefix = " ".repeat(depth);
45
+ lines.push(`${prefix}${isDir ? `${entry}/` : entry}`);
46
+ count++;
47
+ if (isDir) {
48
+ walk(fullPath, depth + 1);
49
+ }
50
+ }
51
+ }
52
+ walk(dir, 0);
53
+ return lines.join("\n");
54
+ }
55
+ function buildSystemPrompt(maxCells) {
56
+ return `You are a task decomposition engine for a multi-agent coding system.
57
+
58
+ Given a user request and the project's file structure, break it into up to ${maxCells} sub-tasks that can be executed by independent coding agents in parallel. Each agent runs in its own thread — it CANNOT see other agents' work until completion.
59
+
60
+ CRITICAL RULES:
61
+ - ALL sub-tasks MUST have depends_on: []
62
+ - Every sub-task MUST target DIFFERENT files/directories — NEVER assign the same file to two agents
63
+ - Each prompt MUST list the SPECIFIC files/directories that agent owns exclusively
64
+
65
+ Each sub-task has:
66
+ - role: one of "implementer", "tester", "reviewer", or "researcher"
67
+ - prompt: the specific instruction for this agent (MUST include exact file paths from the project structure)
68
+ - depends_on: ALWAYS [] (empty array)
69
+
70
+ Decomposition strategy:
71
+ 1. Study the file structure provided. Identify the major areas/modules/directories.
72
+ 2. Partition the work so each agent owns a DISTINCT set of files. Example: Agent 0 handles src/components/Header.tsx + src/components/Nav.tsx, Agent 1 handles src/pages/Home.tsx + src/pages/About.tsx, etc.
73
+ 3. Each prompt MUST start with "Working directory: <path>" and list the exact files the agent should focus on.
74
+ 4. If the task involves auditing/reviewing, assign different directories to each agent.
75
+ 5. If the task involves refactoring, assign specific files to each agent — NEVER say "refactor all components."
76
+ 6. Fewer focused tasks > many vague tasks. Use 2-4 cells for medium tasks, 5-6 only for large codebases.
77
+ 7. DO NOT create a "planner" or "coordinator" task — each agent plans its own work within its scope.
78
+ 8. If the task is simple enough for 1-2 agents, use fewer. Don't pad to ${maxCells}.
79
+
80
+ Respond with ONLY valid JSON:
81
+ {
82
+ "summary": "One-line description of the split strategy",
83
+ "sub_tasks": [
84
+ { "role": "implementer", "prompt": "Working directory: /path/to/project\\n\\nYour scope: src/components/Header.tsx, src/components/Nav.tsx\\n\\nTask: ...", "depends_on": [] }
85
+ ]
86
+ }`;
87
+ }
88
+ export async function decomposeTask(task, opts = {}) {
89
+ const maxCells = opts.maxCells || 6;
90
+ const apiKey = opts.apiKey || process.env.ANTHROPIC_API_KEY;
91
+ const model = opts.model || DECOMPOSE_FALLBACK_MODEL;
92
+ if (!apiKey) {
93
+ console.warn("[decomposer] No API key — falling back to single task");
94
+ return singleTaskFallback(task);
95
+ }
96
+ try {
97
+ const client = new Anthropic({ apiKey });
98
+ let fileTree = "";
99
+ if (opts.workingDirectory) {
100
+ try {
101
+ fileTree = scanDirectory(opts.workingDirectory);
102
+ }
103
+ catch (err) {
104
+ console.warn(`[decomposer] Failed to scan ${opts.workingDirectory}: ${err.message}`);
105
+ }
106
+ }
107
+ const parts = [];
108
+ if (opts.workingDirectory) {
109
+ parts.push(`Working directory: ${opts.workingDirectory}`);
110
+ }
111
+ if (fileTree) {
112
+ parts.push(`\nProject file structure:\n${fileTree}`);
113
+ }
114
+ if (opts.context) {
115
+ parts.push(`\nPrevious round context:\n${opts.context}`);
116
+ }
117
+ parts.push(`\nTask: ${task}`);
118
+ parts.push(`\nMax sub-tasks: ${maxCells}`);
119
+ const userMessage = parts.join("\n");
120
+ const response = await callWithRetry(() => client.messages.create({
121
+ model,
122
+ max_tokens: DECOMPOSE_MAX_TOKENS,
123
+ system: buildSystemPrompt(maxCells),
124
+ messages: [{ role: "user", content: userMessage }],
125
+ }), { label: "decomposer" });
126
+ const text = response.content
127
+ .filter((b) => b.type === "text")
128
+ .map(b => b.text)
129
+ .join("");
130
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
131
+ if (!jsonMatch) {
132
+ console.warn("[decomposer] No JSON found in response — falling back to single task");
133
+ return singleTaskFallback(task);
134
+ }
135
+ const parsed = JSON.parse(jsonMatch[0]);
136
+ if (!parsed.sub_tasks?.length) {
137
+ return singleTaskFallback(task);
138
+ }
139
+ const subTasks = parsed.sub_tasks.slice(0, maxCells).map((st, i) => ({
140
+ id: randomUUID(),
141
+ role: st.role,
142
+ prompt: st.prompt,
143
+ cell_index: i,
144
+ depends_on: [],
145
+ }));
146
+ // Resolve depends_on from indices to UUIDs
147
+ for (let i = 0; i < subTasks.length; i++) {
148
+ const raw = parsed.sub_tasks[i];
149
+ subTasks[i].depends_on = (raw.depends_on || [])
150
+ .filter(idx => idx >= 0 && idx < subTasks.length && idx !== i)
151
+ .map(idx => subTasks[idx].id);
152
+ }
153
+ return {
154
+ sub_tasks: subTasks,
155
+ summary: parsed.summary,
156
+ };
157
+ }
158
+ catch (err) {
159
+ console.warn("[decomposer] API call failed — falling back to single task:", err.message);
160
+ return singleTaskFallback(task);
161
+ }
162
+ }
163
+ /**
164
+ * Scan decomposed sub-task prompts for suspicious patterns.
165
+ * Returns an array of human-readable warnings (empty = clean).
166
+ */
167
+ export function sanitizePrompts(subTasks) {
168
+ const warnings = [];
169
+ const dangerous = [
170
+ { pattern: /rm\s+-rf\s+[\/~]/i, label: "rm -rf with absolute path" },
171
+ { pattern: /curl.*\|\s*(?:ba)?sh/i, label: "curl piped to shell" },
172
+ { pattern: /eval\s*\(/i, label: "eval()" },
173
+ { pattern: />\s*\/etc\//i, label: "write to /etc" },
174
+ { pattern: /chmod\s+777/i, label: "chmod 777" },
175
+ { pattern: /ssh\s+/i, label: "SSH command" },
176
+ { pattern: /scp\s+/i, label: "SCP command" },
177
+ ];
178
+ for (const st of subTasks) {
179
+ for (const { pattern, label } of dangerous) {
180
+ if (pattern.test(st.prompt)) {
181
+ const idx = st.cell_index ?? subTasks.indexOf(st);
182
+ warnings.push(`Task ${idx} (${st.role}): prompt contains suspicious pattern — ${label}`);
183
+ }
184
+ }
185
+ }
186
+ return warnings;
187
+ }
188
+ function singleTaskFallback(task) {
189
+ return {
190
+ sub_tasks: [{
191
+ id: randomUUID(),
192
+ role: "executor",
193
+ prompt: task,
194
+ cell_index: 0,
195
+ depends_on: [],
196
+ }],
197
+ summary: "Single agent execution",
198
+ };
199
+ }
@@ -71,3 +71,21 @@ export declare class TeamLead extends EventEmitter {
71
71
  stop(): void;
72
72
  }
73
73
  export declare function runAgentTeam(config: TeamConfig): Promise<TeamResult>;
74
+ export interface AutoTeamOptions {
75
+ maxTeammates?: number;
76
+ model?: "sonnet" | "opus" | "haiku";
77
+ workingDirectory?: string;
78
+ apiKey?: string;
79
+ review?: boolean;
80
+ }
81
+ /**
82
+ * Auto-decompose a task into sub-tasks and run them as a team.
83
+ *
84
+ * Uses Claude (haiku) to break the task into parallel sub-tasks with
85
+ * file ownership, then spawns a team to execute them. Optionally runs
86
+ * a lightweight review pass to summarize results and flag issues.
87
+ */
88
+ export declare function runAutoTeam(task: string, opts?: AutoTeamOptions): Promise<TeamResult & {
89
+ review?: string;
90
+ warnings?: string[];
91
+ }>;
@@ -14,6 +14,8 @@ import { logSpan, generateTraceId, generateSpanId, getConversationId } from "./t
14
14
  import { resolveConfig } from "./config-store.js";
15
15
  import { getGlobalEmitter } from "./agent-events.js";
16
16
  import { getModelShortName } from "./agent-loop.js";
17
+ import { decomposeTask, sanitizePrompts } from "./task-decomposer.js";
18
+ import { callWithRetry } from "./api-retry.js";
17
19
  // ============================================================================
18
20
  // TEAM LEAD CLASS
19
21
  // ============================================================================
@@ -510,3 +512,81 @@ export async function runAgentTeam(config) {
510
512
  await lead.createTeam(config);
511
513
  return lead.runTeam();
512
514
  }
515
+ /**
516
+ * Auto-decompose a task into sub-tasks and run them as a team.
517
+ *
518
+ * Uses Claude (haiku) to break the task into parallel sub-tasks with
519
+ * file ownership, then spawns a team to execute them. Optionally runs
520
+ * a lightweight review pass to summarize results and flag issues.
521
+ */
522
+ export async function runAutoTeam(task, opts = {}) {
523
+ const maxTeammates = opts.maxTeammates ?? 4;
524
+ const model = opts.model || "sonnet";
525
+ const workingDirectory = opts.workingDirectory || process.cwd();
526
+ const shouldReview = opts.review !== false;
527
+ // 1. Decompose
528
+ const decomposition = await decomposeTask(task, {
529
+ maxCells: maxTeammates,
530
+ apiKey: opts.apiKey,
531
+ workingDirectory,
532
+ });
533
+ // 2. Sanitize prompts
534
+ const warnings = sanitizePrompts(decomposition.sub_tasks);
535
+ if (warnings.length > 0) {
536
+ console.warn(`[team] Sanitization warnings:\n${warnings.join("\n")}`);
537
+ }
538
+ // 3. Map sub-tasks to TeamConfig
539
+ const tasks = decomposition.sub_tasks.map(st => ({
540
+ description: st.prompt,
541
+ files: extractFiles(st.prompt),
542
+ }));
543
+ const config = {
544
+ name: decomposition.summary.slice(0, 60) || "Auto Team",
545
+ teammateCount: tasks.length,
546
+ model,
547
+ tasks,
548
+ };
549
+ // 4. Run team
550
+ const result = await runAgentTeam(config);
551
+ // 5. Optional review
552
+ let review;
553
+ if (shouldReview && result.taskResults.length > 0) {
554
+ review = await reviewResults(task, result, opts.apiKey);
555
+ }
556
+ return { ...result, review, warnings: warnings.length > 0 ? warnings : undefined };
557
+ }
558
+ /** Extract file paths mentioned in a sub-task prompt. */
559
+ function extractFiles(prompt) {
560
+ const matches = prompt.match(/(?:^|\s)((?:src|lib|app|pages|components|test|spec|public)\/[\w./-]+)/gm);
561
+ return matches ? [...new Set(matches.map(m => m.trim()))] : [];
562
+ }
563
+ /** Lightweight review of team results using Claude haiku. */
564
+ async function reviewResults(originalTask, result, apiKey) {
565
+ const key = apiKey || process.env.ANTHROPIC_API_KEY;
566
+ if (!key)
567
+ return undefined;
568
+ try {
569
+ const Anthropic = (await import("@anthropic-ai/sdk")).default;
570
+ const client = new Anthropic({ apiKey: key });
571
+ const taskSummaries = result.taskResults
572
+ .map((t, i) => `Task ${i + 1} [${t.status}]: ${t.description.slice(0, 100)}\nResult: ${t.result.slice(0, 200)}`)
573
+ .join("\n\n");
574
+ const response = await callWithRetry(() => client.messages.create({
575
+ model: "claude-haiku-4-5-20251001",
576
+ max_tokens: 1024,
577
+ system: "You are a code review assistant. Given the original task and the results from parallel agents, provide a brief (2-4 sentence) assessment: were the goals met? Any conflicts or gaps between agents' work? Any follow-up needed?",
578
+ messages: [{
579
+ role: "user",
580
+ content: `Original task: ${originalTask}\n\nTeam results (${result.taskResults.length} agents):\n${taskSummaries}`,
581
+ }],
582
+ }), { label: "team-review", maxRetries: 1 });
583
+ const text = response.content
584
+ .filter((b) => b.type === "text")
585
+ .map((b) => b.text)
586
+ .join("");
587
+ return text || undefined;
588
+ }
589
+ catch {
590
+ return undefined;
591
+ }
592
+ }
@@ -11,7 +11,7 @@ import { Worker, parentPort, workerData, isMainThread } from "worker_threads";
11
11
  import { fileURLToPath } from "url";
12
12
  import { loadTeam, claimTask, completeTask, failTask, getAvailableTasks, sendMessage, getUnreadMessages, markMessagesRead, updateTeammate, } from "./team-state.js";
13
13
  import { LoopDetector, estimateCostUsd } from "../../shared/agent-core.js";
14
- import { MODEL_MAP, getProvider } from "../../shared/constants.js";
14
+ import { MODEL_MAP } from "../../shared/constants.js";
15
15
  import { LOCAL_TOOL_DEFINITIONS, } from "./local-tools.js";
16
16
  import { loadServerToolDefinitions, } from "./server-tools.js";
17
17
  import { getValidToken } from "./auth-service.js";
@@ -389,10 +389,10 @@ async function runTeammateLoop(data) {
389
389
  // Resolve per-task model: task-level → team default
390
390
  const taskModel = claimed.model || model;
391
391
  currentTaskModelId = MODEL_MAP[taskModel] || MODEL_MAP[model] || MODEL_MAP.opus;
392
- // Enable thinking for capable models (Opus/Sonnet adaptive, others budget/disabled)
393
- const taskProvider = getProvider(currentTaskModelId);
394
- currentTaskThinking = (taskProvider === "anthropic" || taskProvider === "bedrock")
395
- && (currentTaskModelId.includes("opus") || currentTaskModelId.includes("sonnet-4-6") || currentTaskModelId.includes("sonnet-4-5"));
392
+ // Disable thinking for teammates with MAX_OUTPUT_TOKENS=16K, adaptive thinking
393
+ // burns 8-12K tokens on reasoning per turn, leaving only 4-8K for actual code output.
394
+ // This caused catastrophic token bloat (359K input tokens per task) and truncated responses.
395
+ currentTaskThinking = false;
396
396
  await updateTeammate(teamId, teammateId, { status: "working", currentTask: currentTaskId });
397
397
  report({ type: "task_started", teammateId, taskId: currentTaskId, content: claimed.description });
398
398
  // Start fresh conversation for new task
@@ -1,8 +1,9 @@
1
1
  /**
2
- * CLI Telemetry — fire-and-forget span logging to audit_logs
2
+ * CLI Telemetry — spans are buffered and flushed to the Fly.io server,
3
+ * which queues them into ClickHouse ai_spans.
3
4
  *
4
5
  * Session-scoped conversationId + auto-incrementing turnNumber.
5
- * Uses same column schema as executor.ts telemetry (trace_id, span_id, etc).
6
+ * Uses same column schema as server-side telemetry (trace_id, span_id, etc).
6
7
  * Never blocks or crashes the chat.
7
8
  */
8
9
  export interface ExecutionContext {
@@ -49,6 +50,11 @@ export declare function generateSpanId(): string;
49
50
  export declare function nextTurn(): number;
50
51
  export declare function createTurnContext(overrides?: Partial<ExecutionContext>): ExecutionContext;
51
52
  export declare function getTurnNumber(): number;
53
+ /**
54
+ * Flush all buffered spans to the Fly.io server.
55
+ * Call this on session end or at shutdown.
56
+ */
57
+ export declare function flushCliSpans(): void;
52
58
  export interface SpanOptions {
53
59
  action: string;
54
60
  severity?: "info" | "warn" | "error";