stagent 0.10.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/README.md +44 -31
  2. package/dist/cli.js +24 -0
  3. package/docs/.coverage-gaps.json +154 -24
  4. package/docs/.last-generated +1 -1
  5. package/docs/features/agent-intelligence.md +12 -2
  6. package/docs/features/chat.md +40 -5
  7. package/docs/features/cost-usage.md +1 -1
  8. package/docs/features/documents.md +5 -2
  9. package/docs/features/inbox-notifications.md +10 -2
  10. package/docs/features/keyboard-navigation.md +12 -3
  11. package/docs/features/provider-runtimes.md +16 -2
  12. package/docs/features/settings.md +2 -2
  13. package/docs/features/shared-components.md +7 -3
  14. package/docs/features/tables.md +3 -1
  15. package/docs/features/tool-permissions.md +6 -2
  16. package/docs/features/workflows.md +6 -2
  17. package/docs/getting-started.md +1 -1
  18. package/docs/index.md +1 -1
  19. package/docs/journeys/developer.md +25 -2
  20. package/docs/journeys/personal-use.md +12 -5
  21. package/docs/journeys/power-user.md +45 -14
  22. package/docs/journeys/work-use.md +17 -8
  23. package/docs/manifest.json +15 -15
  24. package/docs/superpowers/plans/2026-04-07-instance-bootstrap.md +2 -2
  25. package/docs/superpowers/plans/2026-04-14-chat-command-namespace-refactor.md +1390 -0
  26. package/docs/superpowers/plans/2026-04-14-chat-environment-integration.md +1561 -0
  27. package/docs/superpowers/plans/2026-04-14-chat-polish-bundle-v1.md +1219 -0
  28. package/docs/superpowers/plans/2026-04-14-chat-session-persistence-provider-closeout.md +399 -0
  29. package/next.config.mjs +1 -0
  30. package/package.json +3 -3
  31. package/src/app/api/chat/conversations/[id]/skills/__tests__/activate.test.ts +141 -0
  32. package/src/app/api/chat/conversations/[id]/skills/activate/route.ts +74 -0
  33. package/src/app/api/chat/conversations/[id]/skills/deactivate/route.ts +33 -0
  34. package/src/app/api/chat/export/route.ts +52 -0
  35. package/src/app/api/chat/files/search/route.ts +50 -0
  36. package/src/app/api/environment/rescan-if-stale/__tests__/route.test.ts +45 -0
  37. package/src/app/api/environment/rescan-if-stale/route.ts +23 -0
  38. package/src/app/api/environment/skills/route.ts +13 -0
  39. package/src/app/api/schedules/[id]/execute/route.ts +2 -2
  40. package/src/app/api/settings/chat/pins/route.ts +94 -0
  41. package/src/app/api/settings/chat/saved-searches/__tests__/route.test.ts +119 -0
  42. package/src/app/api/settings/chat/saved-searches/route.ts +79 -0
  43. package/src/app/api/settings/environment/route.ts +26 -0
  44. package/src/app/api/tasks/[id]/execute/route.ts +52 -12
  45. package/src/app/api/tasks/[id]/respond/route.ts +31 -15
  46. package/src/app/api/tasks/[id]/resume/route.ts +24 -3
  47. package/src/app/documents/page.tsx +4 -1
  48. package/src/app/settings/page.tsx +2 -0
  49. package/src/components/book/content-blocks.tsx +1 -1
  50. package/src/components/chat/__tests__/capability-banner.test.tsx +38 -0
  51. package/src/components/chat/__tests__/chat-session-provider.test.tsx +166 -1
  52. package/src/components/chat/__tests__/skill-row.test.tsx +91 -0
  53. package/src/components/chat/capability-banner.tsx +68 -0
  54. package/src/components/chat/chat-command-popover.tsx +668 -47
  55. package/src/components/chat/chat-input.tsx +103 -8
  56. package/src/components/chat/chat-message.tsx +12 -3
  57. package/src/components/chat/chat-session-provider.tsx +73 -3
  58. package/src/components/chat/chat-shell.tsx +62 -3
  59. package/src/components/chat/command-tab-bar.tsx +68 -0
  60. package/src/components/chat/conversation-template-picker.tsx +421 -0
  61. package/src/components/chat/help-dialog.tsx +39 -0
  62. package/src/components/chat/skill-composition-conflict-dialog.tsx +96 -0
  63. package/src/components/chat/skill-row.tsx +147 -0
  64. package/src/components/documents/document-browser.tsx +37 -19
  65. package/src/components/notifications/__tests__/permission-response-actions.test.tsx +70 -0
  66. package/src/components/notifications/permission-response-actions.tsx +155 -1
  67. package/src/components/playbook/playbook-detail-view.tsx +1 -1
  68. package/src/components/settings/environment-section.tsx +102 -0
  69. package/src/components/shared/__tests__/filter-hint.test.tsx +40 -0
  70. package/src/components/shared/__tests__/saved-searches-manager.test.tsx +147 -0
  71. package/src/components/shared/command-palette.tsx +262 -2
  72. package/src/components/shared/filter-hint.tsx +70 -0
  73. package/src/components/shared/filter-input.tsx +59 -0
  74. package/src/components/shared/saved-searches-manager.tsx +199 -0
  75. package/src/components/tasks/task-bento-grid.tsx +12 -2
  76. package/src/components/tasks/task-card.tsx +3 -0
  77. package/src/components/tasks/task-chip-bar.tsx +30 -1
  78. package/src/hooks/__tests__/use-chat-autocomplete-tabs.test.ts +47 -0
  79. package/src/hooks/__tests__/use-saved-searches.test.ts +70 -0
  80. package/src/hooks/use-active-skills.ts +110 -0
  81. package/src/hooks/use-chat-autocomplete.ts +120 -7
  82. package/src/hooks/use-enriched-skills.ts +19 -0
  83. package/src/hooks/use-pinned-entries.ts +104 -0
  84. package/src/hooks/use-recent-user-messages.ts +19 -0
  85. package/src/hooks/use-saved-searches.ts +142 -0
  86. package/src/lib/agents/__tests__/claude-agent-sdk-options.test.ts +56 -0
  87. package/src/lib/agents/__tests__/claude-agent.test.ts +17 -4
  88. package/src/lib/agents/__tests__/task-dispatch.test.ts +166 -0
  89. package/src/lib/agents/__tests__/tool-permissions.test.ts +60 -0
  90. package/src/lib/agents/claude-agent.ts +105 -46
  91. package/src/lib/agents/handoff/bus.ts +2 -2
  92. package/src/lib/agents/profiles/__tests__/list-fused-profiles.test.ts +110 -0
  93. package/src/lib/agents/profiles/__tests__/registry.test.ts +47 -0
  94. package/src/lib/agents/profiles/builtins/upgrade-assistant/SKILL.md +30 -3
  95. package/src/lib/agents/profiles/builtins/upgrade-assistant/profile.yaml +6 -2
  96. package/src/lib/agents/profiles/list-fused-profiles.ts +104 -0
  97. package/src/lib/agents/profiles/registry.ts +97 -22
  98. package/src/lib/agents/profiles/types.ts +7 -1
  99. package/src/lib/agents/router.ts +3 -6
  100. package/src/lib/agents/runtime/__tests__/catalog.test.ts +130 -0
  101. package/src/lib/agents/runtime/__tests__/execution-target.test.ts +183 -0
  102. package/src/lib/agents/runtime/anthropic-direct.ts +8 -0
  103. package/src/lib/agents/runtime/catalog.ts +121 -0
  104. package/src/lib/agents/runtime/claude-sdk.ts +32 -0
  105. package/src/lib/agents/runtime/execution-target.ts +456 -0
  106. package/src/lib/agents/runtime/index.ts +4 -0
  107. package/src/lib/agents/runtime/launch-failure.ts +101 -0
  108. package/src/lib/agents/runtime/openai-codex.ts +35 -0
  109. package/src/lib/agents/runtime/openai-direct.ts +8 -0
  110. package/src/lib/agents/task-dispatch.ts +220 -0
  111. package/src/lib/agents/tool-permissions.ts +16 -1
  112. package/src/lib/chat/__tests__/active-skill-injection.test.ts +261 -0
  113. package/src/lib/chat/__tests__/clean-filter-input.test.ts +68 -0
  114. package/src/lib/chat/__tests__/command-tabs.test.ts +68 -0
  115. package/src/lib/chat/__tests__/context-builder-files.test.ts +112 -0
  116. package/src/lib/chat/__tests__/dismissals.test.ts +65 -0
  117. package/src/lib/chat/__tests__/engine-sdk-options.test.ts +117 -0
  118. package/src/lib/chat/__tests__/skill-conflict.test.ts +35 -0
  119. package/src/lib/chat/__tests__/types.test.ts +28 -0
  120. package/src/lib/chat/active-skills.ts +31 -0
  121. package/src/lib/chat/clean-filter-input.ts +30 -0
  122. package/src/lib/chat/codex-engine.ts +30 -7
  123. package/src/lib/chat/command-tabs.ts +61 -0
  124. package/src/lib/chat/context-builder.ts +141 -1
  125. package/src/lib/chat/dismissals.ts +73 -0
  126. package/src/lib/chat/engine.ts +109 -15
  127. package/src/lib/chat/files/__tests__/search.test.ts +135 -0
  128. package/src/lib/chat/files/expand-mention.ts +76 -0
  129. package/src/lib/chat/files/search.ts +99 -0
  130. package/src/lib/chat/skill-composition.ts +210 -0
  131. package/src/lib/chat/skill-conflict.ts +105 -0
  132. package/src/lib/chat/stagent-tools.ts +6 -19
  133. package/src/lib/chat/stream-telemetry.ts +9 -4
  134. package/src/lib/chat/system-prompt.ts +22 -0
  135. package/src/lib/chat/tool-catalog.ts +33 -3
  136. package/src/lib/chat/tools/__tests__/profile-tools.test.ts +51 -0
  137. package/src/lib/chat/tools/__tests__/settings-tools.test.ts +294 -0
  138. package/src/lib/chat/tools/__tests__/skill-tools.test.ts +474 -0
  139. package/src/lib/chat/tools/__tests__/task-tools.test.ts +47 -0
  140. package/src/lib/chat/tools/__tests__/workflow-tools-dedup.test.ts +134 -0
  141. package/src/lib/chat/tools/blueprint-tools.ts +190 -0
  142. package/src/lib/chat/tools/helpers.ts +2 -0
  143. package/src/lib/chat/tools/profile-tools.ts +120 -23
  144. package/src/lib/chat/tools/skill-tools.ts +183 -0
  145. package/src/lib/chat/tools/task-tools.ts +6 -2
  146. package/src/lib/chat/tools/workflow-tools.ts +61 -20
  147. package/src/lib/chat/types.ts +15 -0
  148. package/src/lib/constants/settings.ts +2 -0
  149. package/src/lib/data/clear.ts +2 -6
  150. package/src/lib/db/bootstrap.ts +17 -0
  151. package/src/lib/db/schema.ts +26 -0
  152. package/src/lib/environment/__tests__/auto-promote.test.ts +132 -0
  153. package/src/lib/environment/__tests__/list-skills-enriched.test.ts +55 -0
  154. package/src/lib/environment/__tests__/skill-enrichment.test.ts +129 -0
  155. package/src/lib/environment/__tests__/skill-recommendations.test.ts +87 -0
  156. package/src/lib/environment/data.ts +9 -0
  157. package/src/lib/environment/list-skills.ts +176 -0
  158. package/src/lib/environment/parsers/__tests__/skill.test.ts +54 -0
  159. package/src/lib/environment/parsers/skill.ts +26 -5
  160. package/src/lib/environment/profile-generator.ts +56 -2
  161. package/src/lib/environment/skill-enrichment.ts +106 -0
  162. package/src/lib/environment/skill-recommendations.ts +66 -0
  163. package/src/lib/filters/__tests__/parse.quoted.test.ts +40 -0
  164. package/src/lib/filters/__tests__/parse.test.ts +135 -0
  165. package/src/lib/filters/parse.ts +86 -0
  166. package/src/lib/instance/__tests__/detect.test.ts +1 -1
  167. package/src/lib/instance/__tests__/upgrade-poller.test.ts +50 -0
  168. package/src/lib/instance/fingerprint.ts +8 -10
  169. package/src/lib/instance/upgrade-poller.ts +53 -1
  170. package/src/lib/schedules/scheduler.ts +4 -4
  171. package/src/lib/utils/stagent-paths.ts +4 -0
  172. package/src/lib/workflows/blueprints/__tests__/render-prompt.test.ts +124 -0
  173. package/src/lib/workflows/blueprints/render-prompt.ts +71 -0
  174. package/src/lib/workflows/blueprints/types.ts +6 -0
  175. package/src/lib/workflows/engine.ts +5 -3
  176. package/src/test/setup.ts +10 -0
@@ -15,7 +15,12 @@ import {
15
15
  import { getProfile } from "./profiles/registry";
16
16
  import { resolveProfileRuntimePayload, type ResolvedProfileRuntimePayload } from "./profiles/compatibility";
17
17
  import type { CanUseToolPolicy } from "./profiles/types";
18
- import { buildClaudeSdkEnv } from "./runtime/claude-sdk";
18
+ import {
19
+ buildClaudeSdkEnv,
20
+ CLAUDE_SDK_ALLOWED_TOOLS,
21
+ CLAUDE_SDK_SETTING_SOURCES,
22
+ } from "./runtime/claude-sdk";
23
+ import { getFeaturesForModel } from "@/lib/chat/types";
19
24
  import { getActiveLearnedContext } from "./learned-context";
20
25
  import { getLaunchCwd, getWorkspaceContext } from "@/lib/environment/workspace-context";
21
26
  import { analyzeForLearnedPatterns } from "./pattern-extractor";
@@ -34,6 +39,11 @@ import {
34
39
  handleToolPermission,
35
40
  clearPermissionCache,
36
41
  } from "./tool-permissions";
42
+ import {
43
+ classifyTaskFailureReason,
44
+ toRetryableRuntimeLaunchError,
45
+ type RuntimeLaunchProgress,
46
+ } from "@/lib/agents/runtime/launch-failure";
37
47
 
38
48
  // ─── Stagent MCP injection helpers ──────────────────────────────────────
39
49
  //
@@ -75,43 +85,33 @@ async function withStagentMcpServer(
75
85
 
76
86
  /**
77
87
  * Prepend `mcp__stagent__*` to a profile's explicit allowedTools so the
78
- * stagent tool registration survives the SDK preset filter. Returns
79
- * `undefined` when the profile has no allowedTools callers should spread
80
- * the result conditionally so the SDK falls through to preset defaults in
81
- * that case.
88
+ * stagent tool registration survives the SDK preset filter. When the
89
+ * profile has no explicit allowlist and `includeSdkTools` is true, fall
90
+ * back to Phase 1a's CLAUDE_SDK_ALLOWED_TOOLS (Skill, Read/Grep/Glob,
91
+ * Edit/Write/Bash, TodoWrite) so task execution gets the same toolset as
92
+ * chat. Returns `undefined` only when the profile has no allowlist AND
93
+ * the caller does not want SDK tools added — letting the SDK fall
94
+ * through to claude_code preset defaults.
82
95
  */
83
96
  function withStagentAllowedTools(
84
97
  profileAllowedTools: string[] | undefined,
98
+ includeSdkTools: boolean,
85
99
  ): string[] | undefined {
86
- if (!profileAllowedTools) return undefined;
87
- return Array.from(new Set(["mcp__stagent__*", ...profileAllowedTools]));
88
- }
89
-
90
- /**
91
- * Classify an error into a machine-readable failure reason string.
92
- * Used by writeTerminalFailureReason and handleExecutionError.
93
- */
94
- function classifyError(error: unknown): string {
95
- if (!(error instanceof Error)) return "sdk_error";
96
- if (error.name === "AbortError" || error.message.includes("aborted")) {
97
- return "aborted";
98
- }
99
- const lower = error.message.toLowerCase();
100
- if (
101
- lower.includes("turn") &&
102
- (lower.includes("limit") || lower.includes("exhausted") || lower.includes("max"))
103
- ) {
104
- return "turn_limit_exceeded";
100
+ // An empty `allowedTools: []` is treated the same as `undefined` — an
101
+ // empty array is almost never the profile author's intent (they'd get
102
+ // only `mcp__stagent__*` and nothing else). Require at least one tool
103
+ // name for the "profile has explicit list" branch.
104
+ if (profileAllowedTools && profileAllowedTools.length > 0) {
105
+ // Profile has explicit list respect it. Only prepend stagent.
106
+ return Array.from(new Set(["mcp__stagent__*", ...profileAllowedTools]));
105
107
  }
106
- if (lower.includes("timeout") || lower.includes("timed out")) return "timeout";
107
- if (lower.includes("budget")) return "budget_exceeded";
108
- if (lower.includes("authentication") || lower.includes("oauth")) {
109
- return "auth_failed";
108
+ if (includeSdkTools) {
109
+ // No profile allowlist but runtime has native skills — pass the
110
+ // Phase 1a tool set alongside mcp__stagent__* + browser/external
111
+ // (callers merge their own browser/external patterns into this list).
112
+ return ["mcp__stagent__*", ...CLAUDE_SDK_ALLOWED_TOOLS];
110
113
  }
111
- if (lower.includes("rate limit") || lower.includes("429")) {
112
- return "rate_limited";
113
- }
114
- return "sdk_error";
114
+ return undefined;
115
115
  }
116
116
 
117
117
  /**
@@ -124,7 +124,7 @@ export async function writeTerminalFailureReason(
124
124
  taskId: string,
125
125
  error: unknown,
126
126
  ): Promise<void> {
127
- const reason = classifyError(error);
127
+ const reason = classifyTaskFailureReason(error);
128
128
  await db
129
129
  .update(tasks)
130
130
  .set({ failureReason: reason, updatedAt: new Date() })
@@ -200,6 +200,14 @@ export async function finalizeTaskUsage(
200
200
  startedAt: state.startedAt,
201
201
  finishedAt: new Date(),
202
202
  });
203
+
204
+ await db
205
+ .update(tasks)
206
+ .set({
207
+ effectiveModelId: state.modelId ?? null,
208
+ updatedAt: new Date(),
209
+ })
210
+ .where(eq(tasks.id, state.taskId));
203
211
  }
204
212
 
205
213
  /**
@@ -212,7 +220,8 @@ async function processAgentStream(
212
220
  response: AsyncIterable<Record<string, unknown>>,
213
221
  abortController: AbortController,
214
222
  agentProfileId = "general",
215
- usageState: TaskUsageState
223
+ usageState: TaskUsageState,
224
+ launchProgress?: RuntimeLaunchProgress
216
225
  ): Promise<void> {
217
226
  let sessionId: string | null = null;
218
227
  let receivedResult = false;
@@ -275,8 +284,14 @@ async function processAgentStream(
275
284
  // Handle assistant messages (tool use starts)
276
285
  if (message.type === "assistant" && message.message?.content) {
277
286
  turnCount++;
287
+ if (launchProgress) {
288
+ launchProgress.hasTurnStarted = true;
289
+ }
278
290
  for (const block of message.message.content) {
279
291
  if (block.type === "tool_use") {
292
+ if (launchProgress) {
293
+ launchProgress.hasToolUse = true;
294
+ }
280
295
  // Track screenshot tool_use IDs for result interception
281
296
  const toolBlock = block as { type: string; id?: string; name?: string; input?: unknown };
282
297
  if (typeof toolBlock.name === "string" && SCREENSHOT_TOOL_NAMES.has(toolBlock.name) && typeof toolBlock.id === "string") {
@@ -345,6 +360,9 @@ async function processAgentStream(
345
360
  return;
346
361
  }
347
362
  receivedResult = true;
363
+ if (launchProgress) {
364
+ launchProgress.hasResult = true;
365
+ }
348
366
  const resultText =
349
367
  typeof message.result === "string"
350
368
  ? message.result
@@ -516,6 +534,7 @@ export async function executeClaudeTask(taskId: string): Promise<void> {
516
534
  const [task] = await db.select().from(tasks).where(eq(tasks.id, taskId));
517
535
  if (!task) throw new Error(`Task ${taskId} not found`);
518
536
  const usageState = createTaskUsageState(task);
537
+ const launchProgress: RuntimeLaunchProgress = {};
519
538
 
520
539
  const abortController = new AbortController();
521
540
  const agentProfileId = task.agentProfile ?? "general";
@@ -550,11 +569,25 @@ export async function executeClaudeTask(taskId: string): Promise<void> {
550
569
  externalServers,
551
570
  task.projectId,
552
571
  );
553
- // allowedTools prepended via shared helper (see withStagentAllowedTools).
554
- // Computed once so the conditional spread below does not invoke the
555
- // helper twice. Returns undefined when the profile has no allowlist so
556
- // the SDK falls through to claude_code preset defaults.
557
- const mergedAllowedTools = withStagentAllowedTools(ctx.payload?.allowedTools);
572
+ // Capability gate: only pass settingSources + CLAUDE_SDK tools when the
573
+ // runtime is claude-code (or a future runtime with hasNativeSkills).
574
+ // Anthropic-direct and OpenAI-direct task runtimes don't understand
575
+ // these SDK-specific options. Tasks do not carry a model field yet —
576
+ // an empty string falls through to the claude-code default in
577
+ // getFeaturesForModel, so the gate opens by default for the primary
578
+ // claude-code use case. Task 4's resume path follows the same pattern.
579
+ const runtimeFeatures = getFeaturesForModel("");
580
+ const includeSdkNativeTools = runtimeFeatures.hasNativeSkills;
581
+
582
+ // allowedTools merged via shared helper. When the profile has no explicit
583
+ // allowlist AND the runtime has native skills, we fall back to Phase 1a's
584
+ // CLAUDE_SDK_ALLOWED_TOOLS (Skill, Read/Grep/Glob, Edit/Write/Bash,
585
+ // TodoWrite) so task execution matches chat. Computed once so the
586
+ // conditional spread below does not invoke the helper twice.
587
+ const mergedAllowedTools = withStagentAllowedTools(
588
+ ctx.payload?.allowedTools,
589
+ includeSdkNativeTools,
590
+ );
558
591
 
559
592
  const authEnv = await getAuthEnv();
560
593
  const response = query({
@@ -573,6 +606,11 @@ export async function executeClaudeTask(taskId: string): Promise<void> {
573
606
  // F4: Per-execution budget cap — use task-specific override if set
574
607
  maxBudgetUsd: task.maxBudgetUsd ?? DEFAULT_MAX_BUDGET_USD,
575
608
  ...(mergedAllowedTools && { allowedTools: mergedAllowedTools }),
609
+ // Phase 1a parity: load user + project settings (.claude/skills,
610
+ // CLAUDE.md, .claude/rules/*.md) when the runtime supports it.
611
+ ...(includeSdkNativeTools && {
612
+ settingSources: [...CLAUDE_SDK_SETTING_SOURCES],
613
+ }),
576
614
  ...(Object.keys(mergedMcpServers).length > 0 && {
577
615
  mcpServers: mergedMcpServers,
578
616
  }),
@@ -592,7 +630,8 @@ export async function executeClaudeTask(taskId: string): Promise<void> {
592
630
  response as AsyncIterable<Record<string, unknown>>,
593
631
  abortController,
594
632
  agentProfileId,
595
- usageState
633
+ usageState,
634
+ launchProgress
596
635
  );
597
636
 
598
637
  try {
@@ -601,6 +640,14 @@ export async function executeClaudeTask(taskId: string): Promise<void> {
601
640
  console.error("[self-improvement] pattern extraction failed:", err);
602
641
  }
603
642
  } catch (error: unknown) {
643
+ const retryableLaunchError = toRetryableRuntimeLaunchError({
644
+ runtimeId: "claude-code",
645
+ error,
646
+ progress: launchProgress,
647
+ });
648
+ if (retryableLaunchError) {
649
+ throw retryableLaunchError;
650
+ }
604
651
  await handleExecutionError(
605
652
  taskId,
606
653
  task.title,
@@ -680,10 +727,19 @@ export async function resumeClaudeTask(taskId: string): Promise<void> {
680
727
  externalServers,
681
728
  task.projectId,
682
729
  );
683
- // allowedTools prepended via shared helper (see withStagentAllowedTools).
684
- // Computed once so the conditional spread below does not invoke the
685
- // helper twice.
686
- const mergedAllowedTools = withStagentAllowedTools(ctx.payload?.allowedTools);
730
+ // Capability gate: same logic as executeClaudeTask. Resumed tasks must
731
+ // get the same SDK options as their original run so skills that were
732
+ // visible on first execution remain visible after a resume. `task.model`
733
+ // does not exist on the tasks schema — pass "" which resolves to the
734
+ // claude-code default (hasNativeSkills: true) for every current task
735
+ // flow. See features/task-runtime-skill-parity.md Task 4.
736
+ const runtimeFeatures = getFeaturesForModel("");
737
+ const includeSdkNativeTools = runtimeFeatures.hasNativeSkills;
738
+
739
+ const mergedAllowedTools = withStagentAllowedTools(
740
+ ctx.payload?.allowedTools,
741
+ includeSdkNativeTools,
742
+ );
687
743
 
688
744
  const authEnv = await getAuthEnv();
689
745
  const response = query({
@@ -703,6 +759,10 @@ export async function resumeClaudeTask(taskId: string): Promise<void> {
703
759
  // F4: Per-execution budget cap — use task-specific override if set
704
760
  maxBudgetUsd: task.maxBudgetUsd ?? DEFAULT_MAX_BUDGET_USD,
705
761
  ...(mergedAllowedTools && { allowedTools: mergedAllowedTools }),
762
+ // Phase 1a parity: match executeClaudeTask — see Task 3 rationale.
763
+ ...(includeSdkNativeTools && {
764
+ settingSources: [...CLAUDE_SDK_SETTING_SOURCES],
765
+ }),
706
766
  ...(Object.keys(mergedMcpServers).length > 0 && {
707
767
  mcpServers: mergedMcpServers,
708
768
  }),
@@ -801,8 +861,7 @@ async function handleExecutionError(
801
861
  return;
802
862
  }
803
863
 
804
- const failureReason = classifyError(error);
805
-
864
+ const failureReason = classifyTaskFailureReason(error);
806
865
  await db
807
866
  .update(tasks)
808
867
  .set({
@@ -125,8 +125,8 @@ export async function processHandoffs(): Promise<void> {
125
125
 
126
126
  // Fire-and-forget task execution
127
127
  try {
128
- const { executeTaskWithRuntime } = await import("@/lib/agents/runtime");
129
- executeTaskWithRuntime(taskId).catch((err) => {
128
+ const { startTaskExecution } = await import("@/lib/agents/task-dispatch");
129
+ startTaskExecution(taskId).catch((err) => {
130
130
  console.error(`[handoff] task execution failed for message ${msg.id}:`, err);
131
131
  });
132
132
  } catch (err) {
@@ -0,0 +1,110 @@
1
+ import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
2
+ import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "fs";
3
+ import { tmpdir } from "os";
4
+ import { join } from "path";
5
+ import { listFusedProfiles } from "@/lib/agents/profiles/list-fused-profiles";
6
+
7
+ describe("listFusedProfiles", () => {
8
+ let projectDir: string;
9
+ let userSkillsDir: string;
10
+
11
+ beforeEach(() => {
12
+ projectDir = mkdtempSync(join(tmpdir(), "stagent-skills-"));
13
+ userSkillsDir = mkdtempSync(join(tmpdir(), "stagent-user-skills-"));
14
+ mkdirSync(join(projectDir, ".claude", "skills"), { recursive: true });
15
+ });
16
+
17
+ afterEach(() => {
18
+ rmSync(projectDir, { recursive: true, force: true });
19
+ rmSync(userSkillsDir, { recursive: true, force: true });
20
+ });
21
+
22
+ function writeSkill(baseDir: string, name: string, frontmatter: string) {
23
+ mkdirSync(join(baseDir, name), { recursive: true });
24
+ writeFileSync(
25
+ join(baseDir, name, "SKILL.md"),
26
+ `---\n${frontmatter}\n---\n\nbody for ${name}\n`
27
+ );
28
+ }
29
+
30
+ it("returns registry profiles when no filesystem skills exist", async () => {
31
+ const result = await listFusedProfiles(projectDir, userSkillsDir);
32
+ // Should contain at least one registry profile (builtin)
33
+ expect(result.length).toBeGreaterThan(0);
34
+ expect(result.every((p) => typeof p.id === "string")).toBe(true);
35
+ });
36
+
37
+ it("surfaces a project .claude/skills/<name> entry", async () => {
38
+ writeSkill(
39
+ join(projectDir, ".claude", "skills"),
40
+ "my-project-skill",
41
+ `name: my-project-skill\ndescription: Test project skill`
42
+ );
43
+ const result = await listFusedProfiles(projectDir, userSkillsDir);
44
+ expect(result.some((p) => p.id === "my-project-skill")).toBe(true);
45
+ const skill = result.find((p) => p.id === "my-project-skill")!;
46
+ expect(skill.name).toBe("my-project-skill");
47
+ expect(skill.description).toBe("Test project skill");
48
+ expect(skill.origin).toBe("filesystem-project");
49
+ });
50
+
51
+ it("sets projectDir to the project root (not the skills subdirectory) on filesystem-project entries", async () => {
52
+ writeSkill(
53
+ join(projectDir, ".claude", "skills"),
54
+ "my-scoped-skill",
55
+ `name: my-scoped-skill\ndescription: Scoped`
56
+ );
57
+ const result = await listFusedProfiles(projectDir, userSkillsDir);
58
+ const skill = result.find((p) => p.id === "my-scoped-skill")!;
59
+ expect(skill.projectDir).toBe(projectDir);
60
+ // Negative: must not be the .claude/skills subdirectory
61
+ expect(skill.projectDir).not.toContain(".claude/skills");
62
+ });
63
+
64
+ it("surfaces a user ~/.claude/skills/<name> entry", async () => {
65
+ writeSkill(
66
+ userSkillsDir,
67
+ "my-user-skill",
68
+ `name: my-user-skill\ndescription: Test user skill`
69
+ );
70
+ const result = await listFusedProfiles(projectDir, userSkillsDir);
71
+ expect(result.some((p) => p.id === "my-user-skill")).toBe(true);
72
+ expect(
73
+ result.find((p) => p.id === "my-user-skill")!.origin
74
+ ).toBe("filesystem-user");
75
+ });
76
+
77
+ it("dedupes by id — registry profile wins over filesystem skill with same id", async () => {
78
+ // "general" is a known builtin registry profile id; write a filesystem
79
+ // skill with the same id to force a collision.
80
+ writeSkill(
81
+ join(projectDir, ".claude", "skills"),
82
+ "general",
83
+ `name: general\ndescription: This should be overridden by registry`
84
+ );
85
+ const result = await listFusedProfiles(projectDir, userSkillsDir);
86
+ const entries = result.filter((p) => p.id === "general");
87
+ expect(entries).toHaveLength(1);
88
+ // Registry description should win (not the filesystem-overridden one)
89
+ expect(entries[0].description).not.toBe("This should be overridden by registry");
90
+ });
91
+
92
+ it("logs and skips a malformed SKILL.md (no name field in frontmatter)", async () => {
93
+ const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
94
+ writeSkill(
95
+ join(projectDir, ".claude", "skills"),
96
+ "broken-skill",
97
+ `description: Missing name field — broken`
98
+ );
99
+ const result = await listFusedProfiles(projectDir, userSkillsDir);
100
+ expect(result.some((p) => p.id === "broken-skill")).toBe(false);
101
+ expect(warnSpy).toHaveBeenCalled();
102
+ warnSpy.mockRestore();
103
+ });
104
+
105
+ it("returns an empty-safe result when projectDir does not exist", async () => {
106
+ const result = await listFusedProfiles("/nonexistent/path", userSkillsDir);
107
+ // Should still return registry + user skills, no throw
108
+ expect(Array.isArray(result)).toBe(true);
109
+ });
110
+ });
@@ -98,6 +98,53 @@ describe("profile registry", () => {
98
98
  expect(codeReviewer!.canUseToolPolicy!.autoApprove).toContain("Grep");
99
99
  });
100
100
 
101
+ it("preserves preferredRuntime from profile.yaml", async () => {
102
+ const originalHome = process.env.HOME;
103
+ const tempHome = fs.mkdtempSync(
104
+ path.join(os.tmpdir(), "registry-preferred-runtime-")
105
+ );
106
+
107
+ try {
108
+ process.env.HOME = tempHome;
109
+ vi.resetModules();
110
+
111
+ const profileId = `preferred-runtime-${Date.now()}`;
112
+ const profileDir = path.join(tempHome, ".claude", "skills", profileId);
113
+ fs.mkdirSync(profileDir, { recursive: true });
114
+ fs.writeFileSync(
115
+ path.join(profileDir, "profile.yaml"),
116
+ yaml.dump({
117
+ id: profileId,
118
+ name: "Preferred Runtime Test",
119
+ version: "1.0.0",
120
+ domain: "work",
121
+ tags: ["runtime"],
122
+ preferredRuntime: "openai-direct",
123
+ })
124
+ );
125
+ fs.writeFileSync(
126
+ path.join(profileDir, "SKILL.md"),
127
+ `---
128
+ name: ${profileId}
129
+ description: Preferred runtime test profile.
130
+ ---
131
+
132
+ Testing preferred runtime loading.
133
+ `
134
+ );
135
+
136
+ const registry = await import("../registry");
137
+ const loaded = registry.getProfile(profileId);
138
+
139
+ expect(loaded).toBeDefined();
140
+ expect(loaded?.preferredRuntime).toBe("openai-direct");
141
+ } finally {
142
+ process.env.HOME = originalHome;
143
+ fs.rmSync(tempHome, { recursive: true, force: true });
144
+ vi.resetModules();
145
+ }
146
+ });
147
+
101
148
  it("getProfileTags returns tag map", () => {
102
149
  const tagMap = getProfileTags();
103
150
  expect(tagMap.get("researcher")).toContain("research");
@@ -12,9 +12,36 @@ You are the Upgrade Assistant for a stagent clone. Your job is to pull upstream
12
12
  - **Data directory:** `{{DATA_DIR}}`
13
13
  - **Working directory:** the current repo root
14
14
 
15
+ ## How to ask the user a question
16
+
17
+ **Never emit a question as plain text in the log.** The session UI cannot turn text into a reply input. Always invoke the `AskUserQuestion` tool — the user sees a structured prompt in the task view and the tool call returns with their answer so you can continue.
18
+
19
+ Two canonical shapes:
20
+
21
+ - **Free-form answer** (for "should I…" questions that don't have a fixed choice set):
22
+ ```
23
+ AskUserQuestion({ question: "Your main branch has 3 commits not in origin/main. Move them to `local` and reset main, or abort so you can review?" })
24
+ ```
25
+ The user types a reply and the tool returns `{ answer: "..."}`. Act on it.
26
+
27
+ - **Choice-based answer** (for merge-conflict resolution — always three canonical choices):
28
+ ```
29
+ AskUserQuestion({
30
+ question: "Conflict in src/app/page.tsx — which version do you want?",
31
+ options: [
32
+ { label: "Keep my version", description: "Use your changes; discard main's version" },
33
+ { label: "Take main's version", description: "Use main's changes; discard yours" },
34
+ { label: "Show me the diff", description: "Output the full conflict diff for manual review" }
35
+ ]
36
+ })
37
+ ```
38
+ The tool returns `{ answer: "Keep my version" }` (or one of the other labels). Run the matching `git checkout --ours` / `--theirs` / `git diff` command and continue.
39
+
40
+ If the answer is free-form prose, read it literally. Do not second-guess the user.
41
+
15
42
  ## Crucial rules — read these before doing anything
16
43
 
17
- 1. **Never modify `main` except by fast-forward.** After fetching, merge `origin/main` into local `main` with `--ff-only`. If that fast-forward fails, it means the user has local commits on `main` that aren't in `origin/main` — **stop and ask the user** whether to move them to `{{INSTANCE_BRANCH}}` or abort so they can review. Do not auto-resolve.
44
+ 1. **Never modify `main` except by fast-forward.** After fetching, merge `origin/main` into local `main` with `--ff-only`. If that fast-forward fails, the user has local commits on `main` that aren't in `origin/main` — **invoke `AskUserQuestion`** asking whether to move them to `{{INSTANCE_BRANCH}}` or abort so they can review. Do not auto-resolve.
18
45
 
19
46
  2. **Never push any branch.** The pre-push hook blocks `{{INSTANCE_BRANCH}}` pushes, but you should not even attempt one. Your job ends at a local commit.
20
47
 
@@ -22,10 +49,10 @@ You are the Upgrade Assistant for a stagent clone. Your job is to pull upstream
22
49
 
23
50
  4. **Treat `local` identically to any named instance branch.** Users with a default single-clone setup have `{{INSTANCE_BRANCH}}=local`. Users running private domain clones have names like `wealth-mgr` or `investor-mgr`. The merge flow is identical in both cases.
24
51
 
25
- 5. **Stop and ask the user on merge conflicts.** Do not guess. For each conflict, use the three canonical choices:
52
+ 5. **Stop and ask on merge conflicts — always via `AskUserQuestion`.** Do not guess and do not emit the question as plain text. For each conflicted file, invoke `AskUserQuestion` with the three-choice payload shown above in "How to ask the user a question". Map the returned `answer` to the git command:
26
53
  - **"Keep my version"** → `git checkout --ours <file>`
27
54
  - **"Take main's version"** → `git checkout --theirs <file>`
28
- - **"Show me the diff"** → `git diff <file>` and output the full conflict for manual review
55
+ - **"Show me the diff"** → `git diff <file>` and output the full conflict; then re-invoke `AskUserQuestion` so the user can pick one of the first two options after reviewing.
29
56
  After all conflicts are resolved, `git add` the files and continue the merge.
30
57
 
31
58
  ## Standard merge flow
@@ -3,8 +3,8 @@ name: Upgrade Assistant
3
3
  version: "1.0.0"
4
4
  domain: work
5
5
  tags: [upgrade, git, merge, maintenance, instance]
6
- supportedRuntimes: [claude-code, anthropic-direct]
7
- preferredRuntime: anthropic-direct
6
+ supportedRuntimes: [claude-code, openai-codex-app-server]
7
+ preferredRuntime: claude-code
8
8
 
9
9
  maxTurns: 40
10
10
 
@@ -30,3 +30,7 @@ allowedTools:
30
30
  - Bash(npm install)
31
31
  - Read
32
32
  - Write
33
+ # Used for every decision point (merge conflicts, drifted main, etc.) —
34
+ # the agent must NEVER emit a question as plain text; always invoke this
35
+ # tool so the user can answer through the session UI.
36
+ - AskUserQuestion
@@ -0,0 +1,104 @@
1
+ import { readdirSync, readFileSync, statSync, existsSync } from "fs";
2
+ import { join } from "path";
3
+ import { homedir } from "os";
4
+ import { listProfiles } from "./registry";
5
+ import type { AgentProfile } from "./types";
6
+
7
+ /**
8
+ * Minimal YAML frontmatter parser — handles the `---\nkey: value\n---\n...`
9
+ * pattern used by SKILL.md files. Returns null if no frontmatter or no `name`.
10
+ */
11
+ function parseFrontmatter(content: string): Record<string, string> | null {
12
+ const match = content.match(/^---\n([\s\S]*?)\n---/);
13
+ if (!match) return null;
14
+ const result: Record<string, string> = {};
15
+ for (const line of match[1].split("\n")) {
16
+ const colonIdx = line.indexOf(":");
17
+ if (colonIdx === -1) continue;
18
+ const key = line.slice(0, colonIdx).trim();
19
+ const value = line.slice(colonIdx + 1).trim();
20
+ if (key) result[key] = value;
21
+ }
22
+ return result;
23
+ }
24
+
25
+ function loadFilesystemSkills(
26
+ skillsDir: string,
27
+ origin: "filesystem-project" | "filesystem-user",
28
+ projectRootDir: string | undefined
29
+ ): AgentProfile[] {
30
+ if (!existsSync(skillsDir)) return [];
31
+ const profiles: AgentProfile[] = [];
32
+ for (const entry of readdirSync(skillsDir)) {
33
+ const skillPath = join(skillsDir, entry);
34
+ try {
35
+ if (!statSync(skillPath).isDirectory()) continue;
36
+ const skillMdPath = join(skillPath, "SKILL.md");
37
+ if (!existsSync(skillMdPath)) continue;
38
+ const content = readFileSync(skillMdPath, "utf8");
39
+ const fm = parseFrontmatter(content);
40
+ if (!fm || !fm.name) {
41
+ console.warn(
42
+ `[listFusedProfiles] skipping ${skillMdPath}: missing name in frontmatter`
43
+ );
44
+ continue;
45
+ }
46
+ profiles.push({
47
+ id: fm.name,
48
+ name: fm.name,
49
+ description: fm.description ?? "",
50
+ domain: "skill",
51
+ tags: [],
52
+ systemPrompt: content,
53
+ skillMd: content,
54
+ allowedTools: [],
55
+ mcpServers: {},
56
+ supportedRuntimes: ["claude-code"],
57
+ origin,
58
+ scope: origin === "filesystem-project" ? "project" : "user",
59
+ readOnly: true,
60
+ projectDir: origin === "filesystem-project" ? projectRootDir : undefined,
61
+ } as AgentProfile);
62
+ } catch (err) {
63
+ console.warn(
64
+ `[listFusedProfiles] failed to load skill at ${skillPath}:`,
65
+ (err as Error).message
66
+ );
67
+ }
68
+ }
69
+ return profiles;
70
+ }
71
+
72
+ /**
73
+ * Lists every agent profile reachable from this Stagent instance, merging
74
+ * registry profiles with filesystem skills ("fused" view):
75
+ * 1. Registry profiles (builtins + user registry)
76
+ * 2. User filesystem skills at `~/.claude/skills/*\/SKILL.md` (or `userSkillsDir` override)
77
+ * 3. Project filesystem skills at `<projectDir>/.claude/skills/*\/SKILL.md`
78
+ * Dedupes by id — registry profiles win on collision (they're curated), then
79
+ * user skills win over project skills.
80
+ *
81
+ * @param projectDir Absolute path to the active project's working directory (project root)
82
+ * @param userSkillsDir Override for user skills dir (tests); defaults to `~/.claude/skills`
83
+ */
84
+ export async function listFusedProfiles(
85
+ projectDir: string | null | undefined,
86
+ userSkillsDir: string = join(homedir(), ".claude", "skills")
87
+ ): Promise<AgentProfile[]> {
88
+ const registry = listProfiles();
89
+ const registryIds = new Set(registry.map((p) => p.id));
90
+
91
+ const userSkills = loadFilesystemSkills(userSkillsDir, "filesystem-user", undefined).filter(
92
+ (p) => !registryIds.has(p.id)
93
+ );
94
+
95
+ const projectSkills = projectDir
96
+ ? loadFilesystemSkills(
97
+ join(projectDir, ".claude", "skills"),
98
+ "filesystem-project",
99
+ projectDir
100
+ ).filter((p) => !registryIds.has(p.id) && !userSkills.some((u) => u.id === p.id))
101
+ : [];
102
+
103
+ return [...registry, ...userSkills, ...projectSkills];
104
+ }