stagent 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. package/README.md +8 -8
  2. package/dist/cli.js +146 -2
  3. package/docs/.coverage-gaps.json +21 -0
  4. package/docs/.last-generated +1 -1
  5. package/docs/features/agent-intelligence.md +36 -14
  6. package/docs/features/chat.md +33 -56
  7. package/docs/features/cost-usage.md +14 -10
  8. package/docs/features/dashboard-kanban.md +30 -13
  9. package/docs/features/delivery-channels.md +198 -0
  10. package/docs/features/design-system.md +10 -10
  11. package/docs/features/documents.md +8 -8
  12. package/docs/features/home-workspace.md +20 -15
  13. package/docs/features/inbox-notifications.md +22 -10
  14. package/docs/features/keyboard-navigation.md +11 -11
  15. package/docs/features/monitoring.md +1 -1
  16. package/docs/features/playbook.md +30 -32
  17. package/docs/features/profiles.md +33 -11
  18. package/docs/features/projects.md +2 -2
  19. package/docs/features/provider-runtimes.md +58 -14
  20. package/docs/features/schedules.md +70 -40
  21. package/docs/features/settings.md +74 -46
  22. package/docs/features/shared-components.md +7 -15
  23. package/docs/features/tool-permissions.md +9 -9
  24. package/docs/features/workflows.md +32 -21
  25. package/docs/getting-started.md +33 -9
  26. package/docs/index.md +25 -16
  27. package/docs/journeys/developer.md +124 -207
  28. package/docs/journeys/personal-use.md +70 -79
  29. package/docs/journeys/power-user.md +107 -151
  30. package/docs/journeys/work-use.md +81 -113
  31. package/docs/manifest.json +77 -45
  32. package/docs/superpowers/plans/2026-03-30-finish-in-progress-features.md +547 -0
  33. package/docs/use-cases/agency-operator.md +84 -0
  34. package/docs/use-cases/solo-founder.md +75 -0
  35. package/docs/why-stagent.md +59 -0
  36. package/package.json +10 -3
  37. package/src/app/api/channels/[id]/route.ts +104 -0
  38. package/src/app/api/channels/[id]/test/route.ts +52 -0
  39. package/src/app/api/channels/inbound/slack/route.ts +116 -0
  40. package/src/app/api/channels/inbound/telegram/poll/route.ts +140 -0
  41. package/src/app/api/channels/inbound/telegram/route.ts +87 -0
  42. package/src/app/api/channels/route.ts +72 -0
  43. package/src/app/api/chat/conversations/route.ts +15 -0
  44. package/src/app/api/chat/entities/search/route.ts +46 -31
  45. package/src/app/api/data/clear/route.ts +4 -0
  46. package/src/app/api/data/seed/route.ts +4 -0
  47. package/src/app/api/documents/route.ts +36 -6
  48. package/src/app/api/environment/profiles/suggest/route.ts +19 -3
  49. package/src/app/api/environment/scan/route.ts +8 -1
  50. package/src/app/api/handoffs/[id]/route.ts +76 -0
  51. package/src/app/api/handoffs/route.ts +89 -0
  52. package/src/app/api/memory/route.ts +181 -0
  53. package/src/app/api/profiles/[id]/route.ts +16 -1
  54. package/src/app/api/profiles/[id]/test/route.ts +4 -0
  55. package/src/app/api/profiles/[id]/test-results/route.ts +22 -0
  56. package/src/app/api/profiles/[id]/test-single/route.ts +64 -0
  57. package/src/app/api/profiles/assist/route.ts +35 -0
  58. package/src/app/api/profiles/import-repo/apply-updates/route.ts +123 -0
  59. package/src/app/api/profiles/import-repo/check-updates/route.ts +163 -0
  60. package/src/app/api/profiles/import-repo/confirm/route.ts +118 -0
  61. package/src/app/api/profiles/import-repo/preview/route.ts +107 -0
  62. package/src/app/api/profiles/import-repo/route.ts +29 -0
  63. package/src/app/api/profiles/import-repo/scan/route.ts +25 -0
  64. package/src/app/api/profiles/route.ts +73 -22
  65. package/src/app/api/runtimes/ollama/route.ts +86 -0
  66. package/src/app/api/runtimes/suggest/route.ts +29 -0
  67. package/src/app/api/schedules/[id]/heartbeat-history/route.ts +77 -0
  68. package/src/app/api/schedules/[id]/route.ts +41 -3
  69. package/src/app/api/schedules/parse/route.ts +66 -0
  70. package/src/app/api/schedules/route.ts +71 -12
  71. package/src/app/api/settings/author-default/route.ts +7 -0
  72. package/src/app/api/settings/learning/route.ts +41 -0
  73. package/src/app/api/settings/ollama/route.ts +34 -0
  74. package/src/app/api/settings/providers/route.ts +57 -0
  75. package/src/app/api/settings/routing/route.ts +24 -0
  76. package/src/app/api/settings/web-search/route.ts +28 -0
  77. package/src/app/api/tasks/[id]/execute/route.ts +13 -1
  78. package/src/app/api/tasks/[id]/respond/route.ts +23 -1
  79. package/src/app/documents/page.tsx +3 -0
  80. package/src/app/environment/page.tsx +8 -1
  81. package/src/app/settings/page.tsx +10 -4
  82. package/src/app/workflows/[id]/edit/page.tsx +2 -0
  83. package/src/app/workflows/new/page.tsx +2 -0
  84. package/src/components/chat/chat-command-popover.tsx +22 -19
  85. package/src/components/chat/chat-input.tsx +5 -0
  86. package/src/components/chat/chat-model-selector.tsx +42 -1
  87. package/src/components/chat/chat-shell.tsx +2 -0
  88. package/src/components/dashboard/welcome-landing.tsx +9 -9
  89. package/src/components/environment/artifact-card.tsx +27 -1
  90. package/src/components/environment/environment-dashboard.tsx +50 -2
  91. package/src/components/environment/environment-summary-card.tsx +5 -2
  92. package/src/components/environment/suggested-profiles.tsx +117 -52
  93. package/src/components/handoffs/handoff-approval-card.tsx +159 -0
  94. package/src/components/memory/memory-browser.tsx +315 -0
  95. package/src/components/profiles/learned-context-panel.tsx +4 -4
  96. package/src/components/profiles/profile-assist-panel.tsx +512 -0
  97. package/src/components/profiles/profile-browser.tsx +109 -8
  98. package/src/components/profiles/profile-card.tsx +29 -1
  99. package/src/components/profiles/profile-detail-view.tsx +200 -28
  100. package/src/components/profiles/profile-form-view.tsx +220 -82
  101. package/src/components/profiles/repo-import-wizard.tsx +648 -0
  102. package/src/components/profiles/smoke-test-editor.tsx +106 -0
  103. package/src/components/schedules/schedule-create-sheet.tsx +9 -1
  104. package/src/components/schedules/schedule-form.tsx +348 -9
  105. package/src/components/schedules/schedule-list.tsx +15 -2
  106. package/src/components/settings/auth-method-selector.tsx +7 -1
  107. package/src/components/settings/budget-guardrails-section.tsx +111 -48
  108. package/src/components/settings/channels-section.tsx +526 -0
  109. package/src/components/settings/chat-settings-section.tsx +27 -1
  110. package/src/components/settings/data-management-section.tsx +8 -6
  111. package/src/components/settings/learning-context-section.tsx +124 -0
  112. package/src/components/settings/ollama-section.tsx +270 -0
  113. package/src/components/settings/providers-runtimes-section.tsx +499 -0
  114. package/src/components/settings/web-search-section.tsx +101 -0
  115. package/src/components/shared/tag-input.tsx +156 -0
  116. package/src/components/tasks/kanban-board.tsx +32 -0
  117. package/src/components/tasks/kanban-column.tsx +4 -2
  118. package/src/components/tasks/task-card.tsx +1 -0
  119. package/src/components/tasks/task-chip-bar.tsx +6 -1
  120. package/src/components/tasks/task-create-panel.tsx +55 -5
  121. package/src/components/workflows/workflow-form-view.tsx +38 -3
  122. package/src/hooks/use-chat-autocomplete.ts +24 -26
  123. package/src/hooks/use-project-skills.ts +66 -0
  124. package/src/hooks/use-tag-suggestions.ts +31 -0
  125. package/src/instrumentation.ts +4 -1
  126. package/src/lib/agents/__tests__/claude-agent.test.ts +3 -0
  127. package/src/lib/agents/__tests__/learned-context.test.ts +10 -0
  128. package/src/lib/agents/agentic-loop.ts +235 -0
  129. package/src/lib/agents/browser-mcp.ts +59 -4
  130. package/src/lib/agents/claude-agent.ts +27 -200
  131. package/src/lib/agents/handoff/bus.ts +164 -0
  132. package/src/lib/agents/handoff/governance.ts +47 -0
  133. package/src/lib/agents/handoff/types.ts +16 -0
  134. package/src/lib/agents/learned-context.ts +27 -7
  135. package/src/lib/agents/memory/decay.ts +61 -0
  136. package/src/lib/agents/memory/extractor.ts +181 -0
  137. package/src/lib/agents/memory/retrieval.ts +96 -0
  138. package/src/lib/agents/memory/types.ts +6 -0
  139. package/src/lib/agents/profiles/__tests__/project-profiles.test.ts +119 -0
  140. package/src/lib/agents/profiles/__tests__/registry.test.ts +11 -3
  141. package/src/lib/agents/profiles/builtins/code-reviewer/profile.yaml +2 -2
  142. package/src/lib/agents/profiles/builtins/content-creator/SKILL.md +19 -0
  143. package/src/lib/agents/profiles/builtins/content-creator/profile.yaml +27 -0
  144. package/src/lib/agents/profiles/builtins/customer-support-agent/SKILL.md +19 -0
  145. package/src/lib/agents/profiles/builtins/customer-support-agent/profile.yaml +26 -0
  146. package/src/lib/agents/profiles/builtins/data-analyst/profile.yaml +2 -2
  147. package/src/lib/agents/profiles/builtins/devops-engineer/profile.yaml +2 -2
  148. package/src/lib/agents/profiles/builtins/document-writer/profile.yaml +2 -2
  149. package/src/lib/agents/profiles/builtins/financial-analyst/SKILL.md +19 -0
  150. package/src/lib/agents/profiles/builtins/financial-analyst/profile.yaml +24 -0
  151. package/src/lib/agents/profiles/builtins/general/profile.yaml +2 -2
  152. package/src/lib/agents/profiles/builtins/health-fitness-coach/profile.yaml +2 -2
  153. package/src/lib/agents/profiles/builtins/learning-coach/profile.yaml +2 -2
  154. package/src/lib/agents/profiles/builtins/marketing-strategist/SKILL.md +19 -0
  155. package/src/lib/agents/profiles/builtins/marketing-strategist/profile.yaml +27 -0
  156. package/src/lib/agents/profiles/builtins/operations-coordinator/SKILL.md +19 -0
  157. package/src/lib/agents/profiles/builtins/operations-coordinator/profile.yaml +26 -0
  158. package/src/lib/agents/profiles/builtins/project-manager/profile.yaml +2 -2
  159. package/src/lib/agents/profiles/builtins/researcher/SKILL.md +1 -0
  160. package/src/lib/agents/profiles/builtins/researcher/profile.yaml +2 -2
  161. package/src/lib/agents/profiles/builtins/sales-researcher/SKILL.md +19 -0
  162. package/src/lib/agents/profiles/builtins/sales-researcher/profile.yaml +26 -0
  163. package/src/lib/agents/profiles/builtins/shopping-assistant/SKILL.md +1 -0
  164. package/src/lib/agents/profiles/builtins/shopping-assistant/profile.yaml +2 -2
  165. package/src/lib/agents/profiles/builtins/sweep/profile.yaml +1 -1
  166. package/src/lib/agents/profiles/builtins/technical-writer/profile.yaml +2 -2
  167. package/src/lib/agents/profiles/builtins/travel-planner/SKILL.md +2 -0
  168. package/src/lib/agents/profiles/builtins/travel-planner/profile.yaml +2 -2
  169. package/src/lib/agents/profiles/builtins/wealth-manager/SKILL.md +2 -0
  170. package/src/lib/agents/profiles/builtins/wealth-manager/profile.yaml +2 -2
  171. package/src/lib/agents/profiles/project-profiles.ts +193 -0
  172. package/src/lib/agents/profiles/registry.ts +130 -6
  173. package/src/lib/agents/profiles/types.ts +28 -0
  174. package/src/lib/agents/router.ts +174 -2
  175. package/src/lib/agents/runtime/__tests__/catalog.test.ts +15 -4
  176. package/src/lib/agents/runtime/anthropic-direct.ts +644 -0
  177. package/src/lib/agents/runtime/catalog.ts +57 -2
  178. package/src/lib/agents/runtime/claude.ts +205 -1
  179. package/src/lib/agents/runtime/index.ts +22 -0
  180. package/src/lib/agents/runtime/ollama-adapter.ts +409 -0
  181. package/src/lib/agents/runtime/openai-direct.ts +514 -0
  182. package/src/lib/agents/runtime/profile-assist-types.ts +30 -0
  183. package/src/lib/agents/runtime/types.ts +2 -0
  184. package/src/lib/agents/tool-permissions.ts +203 -0
  185. package/src/lib/channels/gateway.ts +321 -0
  186. package/src/lib/channels/poller.ts +268 -0
  187. package/src/lib/channels/registry.ts +90 -0
  188. package/src/lib/channels/slack-adapter.ts +188 -0
  189. package/src/lib/channels/telegram-adapter.ts +218 -0
  190. package/src/lib/channels/types.ts +75 -0
  191. package/src/lib/channels/webhook-adapter.ts +74 -0
  192. package/src/lib/chat/context-builder.ts +22 -2
  193. package/src/lib/chat/engine.ts +95 -13
  194. package/src/lib/chat/ollama-engine.ts +198 -0
  195. package/src/lib/chat/stagent-tools.ts +106 -20
  196. package/src/lib/chat/tool-catalog.ts +24 -0
  197. package/src/lib/chat/tool-registry.ts +90 -0
  198. package/src/lib/chat/tools/chat-history-tools.ts +4 -4
  199. package/src/lib/chat/tools/document-tools.ts +7 -7
  200. package/src/lib/chat/tools/handoff-tools.ts +70 -0
  201. package/src/lib/chat/tools/notification-tools.ts +4 -4
  202. package/src/lib/chat/tools/profile-tools.ts +3 -3
  203. package/src/lib/chat/tools/project-tools.ts +3 -3
  204. package/src/lib/chat/tools/schedule-tools.ts +29 -13
  205. package/src/lib/chat/tools/settings-tools.ts +2 -2
  206. package/src/lib/chat/tools/task-tools.ts +66 -11
  207. package/src/lib/chat/tools/usage-tools.ts +2 -2
  208. package/src/lib/chat/tools/workflow-tools.ts +8 -8
  209. package/src/lib/chat/types.ts +11 -5
  210. package/src/lib/constants/known-tools.ts +19 -0
  211. package/src/lib/constants/prose-styles.ts +1 -1
  212. package/src/lib/constants/settings.ts +7 -0
  213. package/src/lib/data/channel-bindings.ts +85 -0
  214. package/src/lib/data/clear.ts +22 -0
  215. package/src/lib/data/profile-test-results.ts +48 -0
  216. package/src/lib/data/seed-data/conversations.ts +196 -0
  217. package/src/lib/data/seed-data/learned-context.ts +99 -0
  218. package/src/lib/data/seed-data/notifications.ts +54 -1
  219. package/src/lib/data/seed-data/profile-test-results.ts +96 -0
  220. package/src/lib/data/seed-data/repo-imports.ts +51 -0
  221. package/src/lib/data/seed-data/views.ts +60 -0
  222. package/src/lib/data/seed.ts +51 -0
  223. package/src/lib/db/bootstrap.ts +162 -0
  224. package/src/lib/db/migrations/0013_add_repo_imports.sql +15 -0
  225. package/src/lib/db/migrations/0014_add_linked_profile_id.sql +3 -0
  226. package/src/lib/db/migrations/0015_add_channel_bindings.sql +23 -0
  227. package/src/lib/db/schema.ts +190 -1
  228. package/src/lib/environment/__tests__/auto-scan.test.ts +86 -0
  229. package/src/lib/environment/__tests__/profile-linker.test.ts +187 -0
  230. package/src/lib/environment/auto-scan.ts +48 -0
  231. package/src/lib/environment/data.ts +25 -0
  232. package/src/lib/environment/profile-generator.ts +40 -10
  233. package/src/lib/environment/profile-linker.ts +143 -0
  234. package/src/lib/environment/profile-rules.ts +96 -0
  235. package/src/lib/import/dedup.ts +149 -0
  236. package/src/lib/import/format-adapter.ts +631 -0
  237. package/src/lib/import/github-api.ts +219 -0
  238. package/src/lib/import/repo-scanner.ts +251 -0
  239. package/src/lib/schedules/__tests__/nlp-parser.test.ts +330 -0
  240. package/src/lib/schedules/active-hours.ts +120 -0
  241. package/src/lib/schedules/heartbeat-parser.ts +224 -0
  242. package/src/lib/schedules/heartbeat-prompt.ts +153 -0
  243. package/src/lib/schedules/nlp-parser.ts +357 -0
  244. package/src/lib/schedules/scheduler.ts +218 -3
  245. package/src/lib/settings/__tests__/budget-guardrails.test.ts +39 -1
  246. package/src/lib/settings/helpers.ts +6 -0
  247. package/src/lib/settings/routing.ts +24 -0
  248. package/src/lib/settings/runtime-setup.ts +28 -1
  249. package/src/lib/usage/ledger.ts +2 -1
  250. package/src/lib/validators/__tests__/settings.test.ts +9 -0
  251. package/src/lib/validators/profile.ts +39 -0
  252. package/src/lib/workflows/blueprints/builtins/business-daily-briefing.yaml +102 -0
  253. package/src/lib/workflows/blueprints/builtins/content-marketing-pipeline.yaml +90 -0
  254. package/src/lib/workflows/blueprints/builtins/customer-support-triage.yaml +107 -0
  255. package/src/lib/workflows/blueprints/builtins/financial-reporting.yaml +104 -0
  256. package/src/lib/workflows/blueprints/builtins/lead-research-pipeline.yaml +82 -0
@@ -0,0 +1,235 @@
1
+ /**
2
+ * Provider-agnostic agentic loop for direct API runtimes.
3
+ *
4
+ * The loop handles turn counting, budget tracking, abort signaling,
5
+ * and HITL tool permission checks. Provider-specific logic (API calls,
6
+ * event mapping, tool result formatting) is injected via callbacks.
7
+ */
8
+
9
+ import type { ToolResult } from "@/lib/chat/tool-registry";
10
+ import type { ToolPermissionResponse } from "./tool-permissions";
11
+
12
+ // ── Types ────────────────────────────────────────────────────────────
13
+
14
+ /** A single tool call extracted from the model response. */
15
+ export interface ToolCall {
16
+ id: string;
17
+ name: string;
18
+ arguments: Record<string, unknown>;
19
+ }
20
+
21
+ /** Usage snapshot from a single model turn. */
22
+ export interface TurnUsage {
23
+ inputTokens?: number;
24
+ outputTokens?: number;
25
+ totalTokens?: number;
26
+ modelId?: string;
27
+ costUsd?: number;
28
+ }
29
+
30
+ /** Events emitted during the loop for SSE streaming. */
31
+ export type AgentStreamEvent =
32
+ | { type: "status"; phase: "running" | "tool_use" | "thinking"; message?: string }
33
+ | { type: "delta"; content: string }
34
+ | { type: "done"; finalText: string }
35
+ | { type: "error"; message: string };
36
+
37
+ /** Result of a single model API call (accumulated from stream). */
38
+ export interface ModelTurnResult {
39
+ /** Concatenated text output from the model. */
40
+ text: string;
41
+ /** Tool calls requested by the model. */
42
+ toolCalls: ToolCall[];
43
+ /** Whether the model indicated it is done (end_turn / stop). */
44
+ isComplete: boolean;
45
+ /** Whether output was truncated by max_tokens. */
46
+ needsContinuation: boolean;
47
+ /** Usage for this turn. */
48
+ usage: TurnUsage;
49
+ }
50
+
51
+ /** Message in the conversation history (provider-agnostic shape). */
52
+ export type LoopMessage = Record<string, unknown>;
53
+
54
+ /** Configuration for the agentic loop — provider injects callbacks. */
55
+ export interface AgenticLoopConfig {
56
+ /**
57
+ * Call the model API with the current messages. Must stream events
58
+ * via `emitEvent` and return the accumulated turn result.
59
+ */
60
+ callModel: (
61
+ messages: LoopMessage[],
62
+ signal: AbortSignal,
63
+ ) => Promise<ModelTurnResult>;
64
+
65
+ /** Format a tool result for appending to the message history. */
66
+ formatToolResult: (
67
+ toolCallId: string,
68
+ toolName: string,
69
+ result: ToolResult,
70
+ ) => LoopMessage;
71
+
72
+ /** Format a continuation message (e.g. after max_tokens truncation). */
73
+ formatContinuation: () => LoopMessage;
74
+
75
+ /** Execute a Stagent tool by name. */
76
+ executeTool: (
77
+ name: string,
78
+ args: Record<string, unknown>,
79
+ ) => Promise<ToolResult>;
80
+
81
+ /** HITL permission check. Return allow/deny. */
82
+ checkPermission: (
83
+ toolName: string,
84
+ args: Record<string, unknown>,
85
+ ) => Promise<ToolPermissionResponse>;
86
+
87
+ /** Emit SSE event for real-time UI streaming. */
88
+ emitEvent: (event: AgentStreamEvent) => void;
89
+
90
+ /** Maximum model turns before stopping. */
91
+ maxTurns: number;
92
+
93
+ /** Maximum budget in USD before stopping. */
94
+ maxBudgetUsd?: number;
95
+
96
+ /** Abort signal for cancellation. */
97
+ signal: AbortSignal;
98
+ }
99
+
100
+ /** Result of the agentic loop. */
101
+ export interface AgenticLoopResult {
102
+ finalText: string;
103
+ turnCount: number;
104
+ totalUsage: TurnUsage;
105
+ stopReason: "complete" | "max_turns" | "budget_exceeded" | "cancelled" | "error";
106
+ }
107
+
108
+ // ── Loop implementation ──────────────────────────────────────────────
109
+
110
+ function mergeTurnUsage(total: TurnUsage, turn: TurnUsage): TurnUsage {
111
+ return {
112
+ inputTokens: (total.inputTokens ?? 0) + (turn.inputTokens ?? 0),
113
+ outputTokens: (total.outputTokens ?? 0) + (turn.outputTokens ?? 0),
114
+ totalTokens: (total.totalTokens ?? 0) + (turn.totalTokens ?? 0),
115
+ modelId: turn.modelId ?? total.modelId,
116
+ costUsd: (total.costUsd ?? 0) + (turn.costUsd ?? 0),
117
+ };
118
+ }
119
+
120
+ /**
121
+ * Run a provider-agnostic agentic loop.
122
+ *
123
+ * Repeatedly calls the model, handles tool execution with HITL checks,
124
+ * and enforces turn/budget limits until the model completes or a limit
125
+ * is reached.
126
+ */
127
+ export async function runAgenticLoop(
128
+ initialMessages: LoopMessage[],
129
+ config: AgenticLoopConfig,
130
+ ): Promise<AgenticLoopResult> {
131
+ const messages = [...initialMessages];
132
+ let turnCount = 0;
133
+ let totalUsage: TurnUsage = {};
134
+ let lastText = "";
135
+
136
+ while (turnCount < config.maxTurns) {
137
+ // Check cancellation
138
+ if (config.signal.aborted) {
139
+ return { finalText: lastText, turnCount, totalUsage, stopReason: "cancelled" };
140
+ }
141
+
142
+ // Check budget
143
+ if (config.maxBudgetUsd && (totalUsage.costUsd ?? 0) >= config.maxBudgetUsd) {
144
+ config.emitEvent({ type: "error", message: "Budget limit exceeded" });
145
+ return { finalText: lastText, turnCount, totalUsage, stopReason: "budget_exceeded" };
146
+ }
147
+
148
+ // Call model
149
+ turnCount++;
150
+ let turnResult: ModelTurnResult;
151
+
152
+ try {
153
+ turnResult = await config.callModel(messages, config.signal);
154
+ } catch (err) {
155
+ if (config.signal.aborted) {
156
+ return { finalText: lastText, turnCount, totalUsage, stopReason: "cancelled" };
157
+ }
158
+ const message = err instanceof Error ? err.message : "Model API call failed";
159
+ config.emitEvent({ type: "error", message });
160
+ return { finalText: lastText, turnCount, totalUsage, stopReason: "error" };
161
+ }
162
+
163
+ totalUsage = mergeTurnUsage(totalUsage, turnResult.usage);
164
+ if (turnResult.text) lastText = turnResult.text;
165
+
166
+ // Handle completion
167
+ if (turnResult.isComplete && turnResult.toolCalls.length === 0) {
168
+ config.emitEvent({ type: "done", finalText: lastText });
169
+ return { finalText: lastText, turnCount, totalUsage, stopReason: "complete" };
170
+ }
171
+
172
+ // Handle tool calls
173
+ if (turnResult.toolCalls.length > 0) {
174
+ for (const toolCall of turnResult.toolCalls) {
175
+ if (config.signal.aborted) {
176
+ return { finalText: lastText, turnCount, totalUsage, stopReason: "cancelled" };
177
+ }
178
+
179
+ config.emitEvent({
180
+ type: "status",
181
+ phase: "tool_use",
182
+ message: toolCall.name,
183
+ });
184
+
185
+ // HITL permission check
186
+ const permission = await config.checkPermission(
187
+ toolCall.name,
188
+ toolCall.arguments,
189
+ );
190
+
191
+ let result: ToolResult;
192
+ if (permission.behavior === "deny") {
193
+ result = {
194
+ content: [{ type: "text", text: JSON.stringify({ error: permission.message ?? "Tool denied by user" }) }],
195
+ isError: true,
196
+ };
197
+ } else {
198
+ try {
199
+ result = await config.executeTool(
200
+ toolCall.name,
201
+ (permission.updatedInput as Record<string, unknown>) ?? toolCall.arguments,
202
+ );
203
+ } catch (err) {
204
+ result = {
205
+ content: [{ type: "text", text: JSON.stringify({ error: err instanceof Error ? err.message : "Tool execution failed" }) }],
206
+ isError: true,
207
+ };
208
+ }
209
+ }
210
+
211
+ // Append tool result to messages
212
+ messages.push(
213
+ config.formatToolResult(toolCall.id, toolCall.name, result),
214
+ );
215
+ }
216
+
217
+ // Continue loop — model needs to process tool results
218
+ continue;
219
+ }
220
+
221
+ // Handle max_tokens continuation
222
+ if (turnResult.needsContinuation) {
223
+ messages.push(config.formatContinuation());
224
+ continue;
225
+ }
226
+
227
+ // Shouldn't reach here — safeguard
228
+ config.emitEvent({ type: "done", finalText: lastText });
229
+ return { finalText: lastText, turnCount, totalUsage, stopReason: "complete" };
230
+ }
231
+
232
+ // Max turns exceeded
233
+ config.emitEvent({ type: "error", message: `Max turns (${config.maxTurns}) reached` });
234
+ return { finalText: lastText, turnCount, totalUsage, stopReason: "max_turns" };
235
+ }
@@ -1,13 +1,22 @@
1
1
  import { getSetting } from "@/lib/settings/helpers";
2
2
  import { SETTINGS_KEYS } from "@/lib/constants/settings";
3
3
 
4
- // ── MCP server config type (matches Claude Agent SDK shape) ──────────
4
+ // ── MCP server config types (matches Claude Agent SDK shape) ─────────
5
5
 
6
- interface McpServerConfig {
6
+ interface McpStdioConfig {
7
+ type?: "stdio";
7
8
  command: string;
8
9
  args: string[];
9
10
  }
10
11
 
12
+ interface McpHttpConfig {
13
+ type: "http";
14
+ url: string;
15
+ headers?: Record<string, string>;
16
+ }
17
+
18
+ type AnyMcpServerConfig = McpStdioConfig | McpHttpConfig;
19
+
11
20
  // ── Read-only browser tools — auto-approved in chat & task permission callbacks
12
21
 
13
22
  export const BROWSER_READ_ONLY_TOOLS = new Set([
@@ -72,7 +81,7 @@ function parseExtraArgs(config: string | null): string[] {
72
81
  *
73
82
  * Returns `{}` when neither server is enabled — zero overhead.
74
83
  */
75
- export async function getBrowserMcpServers(): Promise<Record<string, McpServerConfig>> {
84
+ export async function getBrowserMcpServers(): Promise<Record<string, McpStdioConfig>> {
76
85
  const [chromeEnabled, playwrightEnabled, chromeConfig, playwrightConfig] =
77
86
  await Promise.all([
78
87
  getSetting(SETTINGS_KEYS.BROWSER_MCP_CHROME_DEVTOOLS_ENABLED),
@@ -81,7 +90,7 @@ export async function getBrowserMcpServers(): Promise<Record<string, McpServerCo
81
90
  getSetting(SETTINGS_KEYS.BROWSER_MCP_PLAYWRIGHT_CONFIG),
82
91
  ]);
83
92
 
84
- const servers: Record<string, McpServerConfig> = {};
93
+ const servers: Record<string, McpStdioConfig> = {};
85
94
 
86
95
  if (chromeEnabled === "true") {
87
96
  const extraArgs = parseExtraArgs(chromeConfig);
@@ -117,3 +126,49 @@ export async function getBrowserAllowedToolPatterns(): Promise<string[]> {
117
126
  if (playwrightEnabled === "true") patterns.push("mcp__playwright__*");
118
127
  return patterns;
119
128
  }
129
+
130
+ // ── Exa Search MCP — semantic web search ────────────────────────────
131
+
132
+ /** All Exa tools are read-only (search, similarity, content fetch) */
133
+ export const EXA_READ_ONLY_TOOLS = new Set([
134
+ "mcp__exa__web_search_exa",
135
+ "mcp__exa__find_similar",
136
+ "mcp__exa__get_contents",
137
+ ]);
138
+
139
+ export function isExaTool(toolName: string): boolean {
140
+ return toolName.startsWith("mcp__exa__");
141
+ }
142
+
143
+ export function isExaReadOnly(toolName: string): boolean {
144
+ return EXA_READ_ONLY_TOOLS.has(toolName);
145
+ }
146
+
147
+ /**
148
+ * Read external MCP server settings from DB and return configs
149
+ * for any enabled servers. Currently supports Exa Search.
150
+ *
151
+ * Returns `{}` when nothing is enabled — zero overhead.
152
+ */
153
+ export async function getExternalMcpServers(): Promise<Record<string, AnyMcpServerConfig>> {
154
+ const exaEnabled = await getSetting(SETTINGS_KEYS.EXA_SEARCH_MCP_ENABLED);
155
+
156
+ const servers: Record<string, AnyMcpServerConfig> = {};
157
+
158
+ if (exaEnabled === "true") {
159
+ servers.exa = { type: "http", url: "https://mcp.exa.ai/mcp" };
160
+ }
161
+
162
+ return servers;
163
+ }
164
+
165
+ /**
166
+ * Build the allowedTools glob patterns for enabled external MCP servers.
167
+ */
168
+ export async function getExternalAllowedToolPatterns(): Promise<string[]> {
169
+ const exaEnabled = await getSetting(SETTINGS_KEYS.EXA_SEARCH_MCP_ENABLED);
170
+
171
+ const patterns: string[] = [];
172
+ if (exaEnabled === "true") patterns.push("mcp__exa__*");
173
+ return patterns;
174
+ }
@@ -1,5 +1,4 @@
1
1
  import { query } from "@anthropic-ai/claude-agent-sdk";
2
- import { z } from "zod";
3
2
  import { db } from "@/lib/db";
4
3
  import { tasks, projects, agentLogs, notifications } from "@/lib/db/schema";
5
4
  import { eq } from "drizzle-orm";
@@ -20,7 +19,7 @@ import { getActiveLearnedContext } from "./learned-context";
20
19
  import { getLaunchCwd, getWorkspaceContext } from "@/lib/environment/workspace-context";
21
20
  import { analyzeForLearnedPatterns } from "./pattern-extractor";
22
21
  import { processSweepResult } from "./sweep";
23
- import { getBrowserMcpServers } from "./browser-mcp";
22
+ import { getBrowserMcpServers, getExternalMcpServers } from "./browser-mcp";
24
23
  import { persistScreenshot, SCREENSHOT_TOOL_NAMES } from "@/lib/screenshots/persist";
25
24
  import {
26
25
  extractUsageSnapshot,
@@ -30,6 +29,10 @@ import {
30
29
  type UsageActivityType,
31
30
  type UsageSnapshot,
32
31
  } from "@/lib/usage/ledger";
32
+ import {
33
+ handleToolPermission,
34
+ clearPermissionCache,
35
+ } from "./tool-permissions";
33
36
 
34
37
  /** Typed representation of messages from the Agent SDK stream */
35
38
  interface AgentStreamMessage {
@@ -44,7 +47,7 @@ interface AgentStreamMessage {
44
47
  result?: unknown;
45
48
  }
46
49
 
47
- interface TaskUsageState extends UsageSnapshot {
50
+ export interface TaskUsageState extends UsageSnapshot {
48
51
  activityType: UsageActivityType;
49
52
  startedAt: Date;
50
53
  taskId: string;
@@ -53,44 +56,7 @@ interface TaskUsageState extends UsageSnapshot {
53
56
  scheduleId?: string | null;
54
57
  }
55
58
 
56
- const toolPermissionResponseSchema = z.object({
57
- behavior: z.enum(["allow", "deny"]),
58
- updatedInput: z.unknown().optional(),
59
- message: z.string().optional(),
60
- });
61
-
62
- type ToolPermissionResponse = z.infer<typeof toolPermissionResponseSchema>;
63
-
64
- const inFlightPermissionRequests = new Map<
65
- string,
66
- Promise<ToolPermissionResponse>
67
- >();
68
- const settledPermissionRequests = new Map<string, ToolPermissionResponse>();
69
-
70
- function buildAllowedToolPermissionResponse(
71
- input: Record<string, unknown>
72
- ): ToolPermissionResponse {
73
- return {
74
- behavior: "allow",
75
- updatedInput: input,
76
- };
77
- }
78
-
79
- function normalizeToolPermissionResponse(
80
- response: ToolPermissionResponse,
81
- input: Record<string, unknown>
82
- ): ToolPermissionResponse {
83
- if (response.behavior !== "allow" || response.updatedInput !== undefined) {
84
- return response;
85
- }
86
-
87
- return {
88
- ...response,
89
- updatedInput: input,
90
- };
91
- }
92
-
93
- function createTaskUsageState(
59
+ export function createTaskUsageState(
94
60
  task: {
95
61
  id: string;
96
62
  projectId?: string | null;
@@ -117,64 +83,7 @@ function applyUsageSnapshot(state: TaskUsageState, source: unknown) {
117
83
  Object.assign(state, mergeUsageSnapshot(state, extractUsageSnapshot(source)));
118
84
  }
119
85
 
120
- function buildPermissionCacheKey(
121
- taskId: string,
122
- toolName: string,
123
- input: Record<string, unknown>
124
- ): string {
125
- return `${taskId}::${toolName}::${JSON.stringify(input)}`;
126
- }
127
-
128
- function clearPermissionCache(taskId: string) {
129
- const prefix = `${taskId}::`;
130
-
131
- for (const key of inFlightPermissionRequests.keys()) {
132
- if (key.startsWith(prefix)) {
133
- inFlightPermissionRequests.delete(key);
134
- }
135
- }
136
-
137
- for (const key of settledPermissionRequests.keys()) {
138
- if (key.startsWith(prefix)) {
139
- settledPermissionRequests.delete(key);
140
- }
141
- }
142
- }
143
-
144
- async function waitForToolPermissionResponse(
145
- notificationId: string
146
- ): Promise<ToolPermissionResponse> {
147
- const deadline = Date.now() + 55_000;
148
- const pollInterval = 1500;
149
-
150
- while (Date.now() < deadline) {
151
- const [notification] = await db
152
- .select()
153
- .from(notifications)
154
- .where(eq(notifications.id, notificationId));
155
-
156
- if (notification?.response) {
157
- try {
158
- const parsed = JSON.parse(notification.response);
159
- const validated = toolPermissionResponseSchema.safeParse(parsed);
160
- if (validated.success) {
161
- return validated.data;
162
- }
163
- console.error("[claude-agent] Invalid permission response shape:", validated.error.message);
164
- return { behavior: "deny", message: "Invalid response format" };
165
- } catch (err) {
166
- console.error("[claude-agent] Failed to parse permission response:", err);
167
- return { behavior: "deny", message: "Invalid response format" };
168
- }
169
- }
170
-
171
- await new Promise((resolve) => setTimeout(resolve, pollInterval));
172
- }
173
-
174
- return { behavior: "deny", message: "Permission request timed out" };
175
- }
176
-
177
- async function finalizeTaskUsage(
86
+ export async function finalizeTaskUsage(
178
87
  state: TaskUsageState,
179
88
  status: "completed" | "failed" | "cancelled"
180
89
  ) {
@@ -272,8 +181,9 @@ async function processAgentStream(
272
181
  for (const block of message.message.content) {
273
182
  if (block.type === "tool_use") {
274
183
  // Track screenshot tool_use IDs for result interception
275
- if (typeof block.name === "string" && SCREENSHOT_TOOL_NAMES.has(block.name) && typeof block.id === "string") {
276
- pendingScreenshotTools.add(block.id);
184
+ const toolBlock = block as { type: string; id?: string; name?: string; input?: unknown };
185
+ if (typeof toolBlock.name === "string" && SCREENSHOT_TOOL_NAMES.has(toolBlock.name) && typeof toolBlock.id === "string") {
186
+ pendingScreenshotTools.add(toolBlock.id);
277
187
  }
278
188
  await db.insert(agentLogs).values({
279
189
  id: crypto.randomUUID(),
@@ -429,7 +339,7 @@ async function processAgentStream(
429
339
  // Shared prompt & query context builder (F12: eliminate duplication)
430
340
  // ---------------------------------------------------------------------------
431
341
 
432
- interface TaskQueryContext {
342
+ export interface TaskQueryContext {
433
343
  /** User task content — goes into `prompt` */
434
344
  userPrompt: string;
435
345
  /** System instructions — goes into `options.systemPrompt` */
@@ -444,7 +354,7 @@ interface TaskQueryContext {
444
354
  canUseToolPolicy?: CanUseToolPolicy;
445
355
  }
446
356
 
447
- async function buildTaskQueryContext(
357
+ export async function buildTaskQueryContext(
448
358
  task: { id: string; title: string; description?: string | null; projectId?: string | null },
449
359
  profileId: string
450
360
  ): Promise<TaskQueryContext> {
@@ -462,7 +372,7 @@ async function buildTaskQueryContext(
462
372
  const outputInstructions = buildTaskOutputInstructions(task.id);
463
373
  const learnedCtx = getActiveLearnedContext(profileId);
464
374
  const learnedCtxBlock = learnedCtx
465
- ? `## Learned Context\nPatterns and insights learned from previous tasks:\n\n${learnedCtx}`
375
+ ? `## Learned Context\n<learned-context>\n${learnedCtx}\n</learned-context>`
466
376
  : "";
467
377
 
468
378
  // Resolve working directory: project's workingDirectory > launch cwd
@@ -520,10 +430,13 @@ export async function executeClaudeTask(taskId: string): Promise<void> {
520
430
  await prepareTaskOutputDirectory(taskId, { clearExisting: true });
521
431
  const ctx = await buildTaskQueryContext(task, agentProfileId);
522
432
 
523
- // Merge browser MCP servers when enabled globally
524
- const browserServers = await getBrowserMcpServers();
433
+ // Merge browser + external MCP servers when enabled globally
434
+ const [browserServers, externalServers] = await Promise.all([
435
+ getBrowserMcpServers(),
436
+ getExternalMcpServers(),
437
+ ]);
525
438
  const profileMcpServers = ctx.payload?.mcpServers ?? {};
526
- const mergedMcpServers = { ...profileMcpServers, ...browserServers };
439
+ const mergedMcpServers = { ...profileMcpServers, ...browserServers, ...externalServers };
527
440
 
528
441
  const authEnv = await getAuthEnv();
529
442
  const response = query({
@@ -630,10 +543,13 @@ export async function resumeClaudeTask(taskId: string): Promise<void> {
630
543
  await prepareTaskOutputDirectory(taskId);
631
544
  const ctx = await buildTaskQueryContext(task, profileId);
632
545
 
633
- // Merge browser MCP servers when enabled globally
634
- const browserServers = await getBrowserMcpServers();
546
+ // Merge browser + external MCP servers when enabled globally
547
+ const [browserServers, externalServers] = await Promise.all([
548
+ getBrowserMcpServers(),
549
+ getExternalMcpServers(),
550
+ ]);
635
551
  const profileMcpServers = ctx.payload?.mcpServers ?? {};
636
- const mergedMcpServers = { ...profileMcpServers, ...browserServers };
552
+ const mergedMcpServers = { ...profileMcpServers, ...browserServers, ...externalServers };
637
553
 
638
554
  const authEnv = await getAuthEnv();
639
555
  const response = query({
@@ -781,93 +697,4 @@ async function handleExecutionError(
781
697
  }
782
698
  }
783
699
 
784
- /**
785
- * Handle tool permission by inserting a notification and polling for response.
786
- * Uses database polling pattern — the Inbox UI writes the response.
787
- */
788
- async function handleToolPermission(
789
- taskId: string,
790
- toolName: string,
791
- input: Record<string, unknown>,
792
- canUseToolPolicy?: CanUseToolPolicy
793
- ): Promise<ToolPermissionResponse> {
794
- const isQuestion = toolName === "AskUserQuestion";
795
-
796
- // Layer 1: Profile-level canUseToolPolicy — fastest check, no I/O
797
- if (!isQuestion && canUseToolPolicy) {
798
- if (canUseToolPolicy.autoApprove?.includes(toolName)) {
799
- return buildAllowedToolPermissionResponse(input);
800
- }
801
- if (canUseToolPolicy.autoDeny?.includes(toolName)) {
802
- return { behavior: "deny", message: `Profile policy denies ${toolName}` };
803
- }
804
- }
805
-
806
- // Layer 2: Saved user permissions — skip notification for pre-approved tools
807
- if (!isQuestion) {
808
- const { isToolAllowed } = await import("@/lib/settings/permissions");
809
- if (await isToolAllowed(toolName, input)) {
810
- return buildAllowedToolPermissionResponse(input);
811
- }
812
- }
813
-
814
- if (!isQuestion) {
815
- const cacheKey = buildPermissionCacheKey(taskId, toolName, input);
816
- const settledResponse = settledPermissionRequests.get(cacheKey);
817
- if (settledResponse) {
818
- return normalizeToolPermissionResponse(settledResponse, input);
819
- }
820
-
821
- const pendingRequest = inFlightPermissionRequests.get(cacheKey);
822
- if (pendingRequest) {
823
- return pendingRequest;
824
- }
825
-
826
- const requestPromise = (async () => {
827
- const notificationId = crypto.randomUUID();
828
-
829
- await db.insert(notifications).values({
830
- id: notificationId,
831
- taskId,
832
- type: "permission_required",
833
- title: `Permission required: ${toolName}`,
834
- body: JSON.stringify(input).slice(0, 1000),
835
- toolName,
836
- toolInput: JSON.stringify(input),
837
- createdAt: new Date(),
838
- });
839
-
840
- const response = normalizeToolPermissionResponse(
841
- await waitForToolPermissionResponse(notificationId),
842
- input
843
- );
844
- settledPermissionRequests.set(cacheKey, response);
845
- return response;
846
- })();
847
-
848
- inFlightPermissionRequests.set(cacheKey, requestPromise);
849
-
850
- try {
851
- return await requestPromise;
852
- } finally {
853
- inFlightPermissionRequests.delete(cacheKey);
854
- }
855
- }
856
-
857
- const notificationId = crypto.randomUUID();
858
-
859
- await db.insert(notifications).values({
860
- id: notificationId,
861
- taskId,
862
- type: isQuestion ? "agent_message" : "permission_required",
863
- title: isQuestion
864
- ? "Agent has a question"
865
- : `Permission required: ${toolName}`,
866
- body: JSON.stringify(input).slice(0, 1000),
867
- toolName,
868
- toolInput: JSON.stringify(input),
869
- createdAt: new Date(),
870
- });
871
-
872
- return waitForToolPermissionResponse(notificationId);
873
- }
700
+ // handleToolPermission and clearPermissionCache imported from ./tool-permissions