stagent 0.9.6 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (396) hide show
  1. package/README.md +20 -44
  2. package/dist/cli.js +66 -18
  3. package/docs/.coverage-gaps.json +144 -56
  4. package/docs/.last-generated +1 -1
  5. package/docs/features/agent-intelligence.md +12 -2
  6. package/docs/features/chat.md +40 -5
  7. package/docs/features/cost-usage.md +1 -1
  8. package/docs/features/documents.md +5 -2
  9. package/docs/features/inbox-notifications.md +10 -2
  10. package/docs/features/keyboard-navigation.md +12 -3
  11. package/docs/features/provider-runtimes.md +20 -2
  12. package/docs/features/schedules.md +32 -4
  13. package/docs/features/settings.md +28 -5
  14. package/docs/features/shared-components.md +7 -3
  15. package/docs/features/tables.md +11 -2
  16. package/docs/features/tool-permissions.md +6 -2
  17. package/docs/features/workflows.md +14 -4
  18. package/docs/index.md +1 -1
  19. package/docs/journeys/developer.md +39 -2
  20. package/docs/journeys/personal-use.md +32 -8
  21. package/docs/journeys/power-user.md +45 -14
  22. package/docs/journeys/work-use.md +17 -8
  23. package/docs/manifest.json +15 -15
  24. package/docs/superpowers/plans/2026-04-07-instance-bootstrap.md +1691 -0
  25. package/docs/superpowers/plans/2026-04-08-schedule-orchestration.md +2983 -0
  26. package/docs/superpowers/plans/2026-04-11-schedule-maxturns-api-control.md +551 -0
  27. package/docs/superpowers/plans/2026-04-11-task-create-profile-validation.md +864 -0
  28. package/docs/superpowers/plans/2026-04-11-task-runtime-stagent-mcp-injection.md +739 -0
  29. package/docs/superpowers/plans/2026-04-14-chat-command-namespace-refactor.md +1390 -0
  30. package/docs/superpowers/plans/2026-04-14-chat-environment-integration.md +1561 -0
  31. package/docs/superpowers/plans/2026-04-14-chat-polish-bundle-v1.md +1219 -0
  32. package/docs/superpowers/plans/2026-04-14-chat-session-persistence-provider-closeout.md +399 -0
  33. package/docs/superpowers/specs/2026-04-08-chat-sse-resilience-hotfix-design.md +201 -0
  34. package/docs/superpowers/specs/2026-04-08-schedule-orchestration-design.md +371 -0
  35. package/docs/superpowers/specs/2026-04-08-swarm-visibility-design.md +213 -0
  36. package/next.config.mjs +1 -0
  37. package/package.json +3 -2
  38. package/src/__tests__/instrumentation-smoke.test.ts +15 -0
  39. package/src/app/analytics/page.tsx +1 -21
  40. package/src/app/api/chat/conversations/[id]/messages/route.ts +22 -1
  41. package/src/app/api/chat/conversations/[id]/skills/__tests__/activate.test.ts +141 -0
  42. package/src/app/api/chat/conversations/[id]/skills/activate/route.ts +74 -0
  43. package/src/app/api/chat/conversations/[id]/skills/deactivate/route.ts +33 -0
  44. package/src/app/api/chat/export/route.ts +52 -0
  45. package/src/app/api/chat/files/search/route.ts +50 -0
  46. package/src/app/api/diagnostics/chat-streams/route.ts +65 -0
  47. package/src/app/api/environment/rescan-if-stale/__tests__/route.test.ts +45 -0
  48. package/src/app/api/environment/rescan-if-stale/route.ts +23 -0
  49. package/src/app/api/environment/skills/route.ts +13 -0
  50. package/src/app/api/instance/config/route.ts +41 -0
  51. package/src/app/api/instance/init/route.ts +34 -0
  52. package/src/app/api/instance/upgrade/check/route.ts +26 -0
  53. package/src/app/api/instance/upgrade/route.ts +96 -0
  54. package/src/app/api/instance/upgrade/status/route.ts +35 -0
  55. package/src/app/api/memory/route.ts +0 -11
  56. package/src/app/api/notifications/route.ts +4 -2
  57. package/src/app/api/projects/[id]/route.ts +5 -155
  58. package/src/app/api/projects/__tests__/delete-project.test.ts +10 -19
  59. package/src/app/api/schedules/[id]/execute/route.ts +111 -0
  60. package/src/app/api/schedules/[id]/route.ts +9 -1
  61. package/src/app/api/schedules/__tests__/execute-route.test.ts +118 -0
  62. package/src/app/api/schedules/route.ts +3 -12
  63. package/src/app/api/settings/chat/pins/route.ts +94 -0
  64. package/src/app/api/settings/chat/saved-searches/__tests__/route.test.ts +119 -0
  65. package/src/app/api/settings/chat/saved-searches/route.ts +79 -0
  66. package/src/app/api/settings/environment/route.ts +26 -0
  67. package/src/app/api/settings/openai/login/route.ts +22 -0
  68. package/src/app/api/settings/openai/logout/route.ts +7 -0
  69. package/src/app/api/settings/openai/route.ts +21 -1
  70. package/src/app/api/settings/providers/route.ts +35 -8
  71. package/src/app/api/tables/[id]/enrich/__tests__/route.test.ts +153 -0
  72. package/src/app/api/tables/[id]/enrich/plan/route.ts +98 -0
  73. package/src/app/api/tables/[id]/enrich/route.ts +147 -0
  74. package/src/app/api/tables/[id]/enrich/runs/route.ts +25 -0
  75. package/src/app/api/tasks/[id]/execute/route.ts +52 -33
  76. package/src/app/api/tasks/[id]/respond/route.ts +31 -15
  77. package/src/app/api/tasks/[id]/resume/route.ts +24 -3
  78. package/src/app/api/workflows/[id]/resume/route.ts +59 -0
  79. package/src/app/api/workflows/[id]/status/route.ts +22 -8
  80. package/src/app/api/workspace/context/route.ts +2 -0
  81. package/src/app/api/workspace/fix-data-dir/route.ts +81 -0
  82. package/src/app/chat/page.tsx +11 -0
  83. package/src/app/documents/page.tsx +4 -1
  84. package/src/app/inbox/page.tsx +12 -5
  85. package/src/app/layout.tsx +42 -21
  86. package/src/app/page.tsx +0 -2
  87. package/src/app/settings/page.tsx +8 -9
  88. package/src/components/chat/__tests__/capability-banner.test.tsx +38 -0
  89. package/src/components/chat/__tests__/chat-session-provider.test.tsx +573 -0
  90. package/src/components/chat/__tests__/skill-row.test.tsx +91 -0
  91. package/src/components/chat/capability-banner.tsx +68 -0
  92. package/src/components/chat/chat-command-popover.tsx +670 -49
  93. package/src/components/chat/chat-input.tsx +104 -10
  94. package/src/components/chat/chat-message.tsx +12 -3
  95. package/src/components/chat/chat-session-provider.tsx +790 -0
  96. package/src/components/chat/chat-shell.tsx +151 -401
  97. package/src/components/chat/command-tab-bar.tsx +68 -0
  98. package/src/components/chat/conversation-template-picker.tsx +421 -0
  99. package/src/components/chat/help-dialog.tsx +39 -0
  100. package/src/components/chat/skill-composition-conflict-dialog.tsx +96 -0
  101. package/src/components/chat/skill-row.tsx +147 -0
  102. package/src/components/documents/document-browser.tsx +37 -19
  103. package/src/components/instance/__tests__/instance-section.test.tsx +125 -0
  104. package/src/components/instance/instance-section.tsx +382 -0
  105. package/src/components/instance/upgrade-badge.tsx +219 -0
  106. package/src/components/notifications/__tests__/batch-proposal-review.test.tsx +95 -0
  107. package/src/components/notifications/__tests__/notification-item.test.tsx +106 -0
  108. package/src/components/notifications/__tests__/permission-response-actions.test.tsx +70 -0
  109. package/src/components/notifications/batch-proposal-review.tsx +20 -5
  110. package/src/components/notifications/inbox-list.tsx +11 -2
  111. package/src/components/notifications/notification-item.tsx +56 -2
  112. package/src/components/notifications/pending-approval-host.tsx +56 -37
  113. package/src/components/notifications/permission-response-actions.tsx +155 -1
  114. package/src/components/schedules/schedule-create-sheet.tsx +19 -1
  115. package/src/components/schedules/schedule-edit-sheet.tsx +20 -1
  116. package/src/components/schedules/schedule-form.tsx +31 -0
  117. package/src/components/settings/__tests__/providers-runtimes-section.test.tsx +149 -0
  118. package/src/components/settings/auth-method-selector.tsx +19 -4
  119. package/src/components/settings/auth-status-badge.tsx +28 -3
  120. package/src/components/settings/environment-section.tsx +102 -0
  121. package/src/components/settings/openai-chatgpt-auth-control.tsx +278 -0
  122. package/src/components/settings/openai-runtime-section.tsx +7 -1
  123. package/src/components/settings/providers-runtimes-section.tsx +138 -19
  124. package/src/components/shared/__tests__/filter-hint.test.tsx +40 -0
  125. package/src/components/shared/__tests__/saved-searches-manager.test.tsx +147 -0
  126. package/src/components/shared/app-sidebar.tsx +4 -3
  127. package/src/components/shared/command-palette.tsx +266 -7
  128. package/src/components/shared/filter-hint.tsx +70 -0
  129. package/src/components/shared/filter-input.tsx +59 -0
  130. package/src/components/shared/saved-searches-manager.tsx +199 -0
  131. package/src/components/shared/theme-toggle.tsx +5 -24
  132. package/src/components/shared/workspace-indicator.tsx +61 -2
  133. package/src/components/tables/__tests__/table-enrichment-sheet.test.tsx +130 -0
  134. package/src/components/tables/table-create-sheet.tsx +4 -0
  135. package/src/components/tables/table-enrichment-runs.tsx +103 -0
  136. package/src/components/tables/table-enrichment-sheet.tsx +538 -0
  137. package/src/components/tables/table-spreadsheet.tsx +29 -5
  138. package/src/components/tables/table-toolbar.tsx +10 -1
  139. package/src/components/tasks/kanban-board.tsx +1 -0
  140. package/src/components/tasks/kanban-column.tsx +53 -14
  141. package/src/components/tasks/task-bento-grid.tsx +31 -2
  142. package/src/components/tasks/task-card.tsx +29 -3
  143. package/src/components/tasks/task-chip-bar.tsx +54 -1
  144. package/src/components/tasks/task-result-renderer.tsx +1 -1
  145. package/src/components/workflows/delay-step-body.tsx +109 -0
  146. package/src/components/workflows/hooks/use-workflow-status.ts +50 -0
  147. package/src/components/workflows/loop-status-view.tsx +1 -1
  148. package/src/components/workflows/shared/step-result.tsx +78 -0
  149. package/src/components/workflows/shared/workflow-header.tsx +141 -0
  150. package/src/components/workflows/shared/workflow-loading-skeleton.tsx +36 -0
  151. package/src/components/workflows/swarm-dashboard.tsx +2 -15
  152. package/src/components/workflows/views/loop-pattern-view.tsx +137 -0
  153. package/src/components/workflows/views/sequence-pattern-view.tsx +511 -0
  154. package/src/components/workflows/workflow-form-view.tsx +133 -16
  155. package/src/components/workflows/workflow-status-view.tsx +30 -740
  156. package/src/hooks/__tests__/use-chat-autocomplete-tabs.test.ts +47 -0
  157. package/src/hooks/__tests__/use-saved-searches.test.ts +70 -0
  158. package/src/hooks/use-active-skills.ts +110 -0
  159. package/src/hooks/use-chat-autocomplete.ts +120 -7
  160. package/src/hooks/use-enriched-skills.ts +19 -0
  161. package/src/hooks/use-pinned-entries.ts +104 -0
  162. package/src/hooks/use-recent-user-messages.ts +19 -0
  163. package/src/hooks/use-saved-searches.ts +142 -0
  164. package/src/instrumentation-node.ts +94 -0
  165. package/src/instrumentation.ts +4 -48
  166. package/src/lib/agents/__tests__/claude-agent-sdk-options.test.ts +56 -0
  167. package/src/lib/agents/__tests__/claude-agent.test.ts +212 -0
  168. package/src/lib/agents/__tests__/execution-manager.test.ts +1 -27
  169. package/src/lib/agents/__tests__/failure-reason.test.ts +68 -0
  170. package/src/lib/agents/__tests__/learned-context.test.ts +0 -11
  171. package/src/lib/agents/__tests__/learning-session.test.ts +158 -0
  172. package/src/lib/agents/__tests__/pattern-extractor.test.ts +48 -0
  173. package/src/lib/agents/__tests__/task-dispatch.test.ts +166 -0
  174. package/src/lib/agents/__tests__/tool-permissions.test.ts +60 -0
  175. package/src/lib/agents/claude-agent.ts +217 -21
  176. package/src/lib/agents/execution-manager.ts +0 -35
  177. package/src/lib/agents/handoff/bus.ts +2 -2
  178. package/src/lib/agents/learned-context.ts +0 -12
  179. package/src/lib/agents/learning-session.ts +18 -5
  180. package/src/lib/agents/profiles/__tests__/list-fused-profiles.test.ts +110 -0
  181. package/src/lib/agents/profiles/__tests__/registry.test.ts +53 -4
  182. package/src/lib/agents/profiles/builtins/upgrade-assistant/SKILL.md +97 -0
  183. package/src/lib/agents/profiles/builtins/upgrade-assistant/profile.yaml +36 -0
  184. package/src/lib/agents/profiles/list-fused-profiles.ts +104 -0
  185. package/src/lib/agents/profiles/registry.ts +18 -0
  186. package/src/lib/agents/profiles/types.ts +7 -1
  187. package/src/lib/agents/router.ts +3 -6
  188. package/src/lib/agents/runtime/__tests__/catalog.test.ts +130 -0
  189. package/src/lib/agents/runtime/__tests__/execution-target.test.ts +183 -0
  190. package/src/lib/agents/runtime/__tests__/openai-codex-auth.test.ts +118 -0
  191. package/src/lib/agents/runtime/anthropic-direct.ts +8 -0
  192. package/src/lib/agents/runtime/catalog.ts +121 -0
  193. package/src/lib/agents/runtime/claude-sdk.ts +32 -0
  194. package/src/lib/agents/runtime/codex-app-server-client.ts +11 -5
  195. package/src/lib/agents/runtime/execution-target.ts +456 -0
  196. package/src/lib/agents/runtime/index.ts +4 -0
  197. package/src/lib/agents/runtime/launch-failure.ts +101 -0
  198. package/src/lib/agents/runtime/openai-codex-auth.ts +389 -0
  199. package/src/lib/agents/runtime/openai-codex.ts +64 -60
  200. package/src/lib/agents/runtime/openai-direct.ts +8 -0
  201. package/src/lib/agents/runtime/types.ts +8 -0
  202. package/src/lib/agents/task-dispatch.ts +220 -0
  203. package/src/lib/agents/tool-permissions.ts +16 -1
  204. package/src/lib/book/chapter-mapping.ts +11 -0
  205. package/src/lib/book/content.ts +10 -0
  206. package/src/lib/chat/__tests__/active-skill-injection.test.ts +261 -0
  207. package/src/lib/chat/__tests__/active-streams.test.ts +49 -0
  208. package/src/lib/chat/__tests__/clean-filter-input.test.ts +68 -0
  209. package/src/lib/chat/__tests__/command-tabs.test.ts +68 -0
  210. package/src/lib/chat/__tests__/context-builder-files.test.ts +112 -0
  211. package/src/lib/chat/__tests__/dismissals.test.ts +65 -0
  212. package/src/lib/chat/__tests__/engine-sdk-options.test.ts +117 -0
  213. package/src/lib/chat/__tests__/finalize-safety-net.test.ts +139 -0
  214. package/src/lib/chat/__tests__/reconcile.test.ts +137 -0
  215. package/src/lib/chat/__tests__/skill-conflict.test.ts +35 -0
  216. package/src/lib/chat/__tests__/stream-telemetry.test.ts +151 -0
  217. package/src/lib/chat/__tests__/types.test.ts +28 -0
  218. package/src/lib/chat/active-skills.ts +31 -0
  219. package/src/lib/chat/active-streams.ts +27 -0
  220. package/src/lib/chat/clean-filter-input.ts +30 -0
  221. package/src/lib/chat/codex-engine.ts +46 -24
  222. package/src/lib/chat/command-tabs.ts +61 -0
  223. package/src/lib/chat/context-builder.ts +146 -4
  224. package/src/lib/chat/dismissals.ts +73 -0
  225. package/src/lib/chat/engine.ts +159 -18
  226. package/src/lib/chat/files/__tests__/search.test.ts +135 -0
  227. package/src/lib/chat/files/expand-mention.ts +76 -0
  228. package/src/lib/chat/files/search.ts +99 -0
  229. package/src/lib/chat/reconcile.ts +117 -0
  230. package/src/lib/chat/skill-composition.ts +210 -0
  231. package/src/lib/chat/skill-conflict.ts +105 -0
  232. package/src/lib/chat/stagent-tools.ts +7 -19
  233. package/src/lib/chat/stream-telemetry.ts +137 -0
  234. package/src/lib/chat/suggested-prompts.ts +28 -1
  235. package/src/lib/chat/system-prompt.ts +48 -1
  236. package/src/lib/chat/tool-catalog.ts +35 -4
  237. package/src/lib/chat/tools/__tests__/enrich-table-tool.test.ts +127 -0
  238. package/src/lib/chat/tools/__tests__/profile-tools.test.ts +51 -0
  239. package/src/lib/chat/tools/__tests__/schedule-tools.test.ts +261 -0
  240. package/src/lib/chat/tools/__tests__/settings-tools.test.ts +294 -0
  241. package/src/lib/chat/tools/__tests__/skill-tools.test.ts +474 -0
  242. package/src/lib/chat/tools/__tests__/task-tools.test.ts +399 -0
  243. package/src/lib/chat/tools/__tests__/workflow-tools-dedup.test.ts +351 -0
  244. package/src/lib/chat/tools/blueprint-tools.ts +190 -0
  245. package/src/lib/chat/tools/document-tools.ts +29 -13
  246. package/src/lib/chat/tools/helpers.ts +41 -0
  247. package/src/lib/chat/tools/notification-tools.ts +9 -5
  248. package/src/lib/chat/tools/profile-tools.ts +120 -23
  249. package/src/lib/chat/tools/project-tools.ts +33 -0
  250. package/src/lib/chat/tools/schedule-tools.ts +44 -11
  251. package/src/lib/chat/tools/skill-tools.ts +183 -0
  252. package/src/lib/chat/tools/table-tools.ts +71 -0
  253. package/src/lib/chat/tools/task-tools.ts +89 -21
  254. package/src/lib/chat/tools/workflow-tools.ts +275 -32
  255. package/src/lib/chat/types.ts +15 -0
  256. package/src/lib/constants/settings.ts +10 -18
  257. package/src/lib/data/__tests__/clear.test.ts +56 -2
  258. package/src/lib/data/clear.ts +17 -16
  259. package/src/lib/data/delete-project.ts +171 -0
  260. package/src/lib/db/__tests__/bootstrap.test.ts +1 -1
  261. package/src/lib/db/bootstrap.ts +62 -16
  262. package/src/lib/db/index.ts +5 -0
  263. package/src/lib/db/migrations/0009_add_app_instances.sql +25 -0
  264. package/src/lib/db/migrations/0024_add_workflow_resume_at.sql +10 -0
  265. package/src/lib/db/migrations/0025_drop_app_instances.sql +3 -0
  266. package/src/lib/db/migrations/0026_drop_license.sql +3 -0
  267. package/src/lib/db/migrations/meta/_journal.json +21 -0
  268. package/src/lib/db/schema.ts +94 -23
  269. package/src/lib/environment/__tests__/auto-promote.test.ts +132 -0
  270. package/src/lib/environment/__tests__/list-skills-enriched.test.ts +55 -0
  271. package/src/lib/environment/__tests__/skill-enrichment.test.ts +129 -0
  272. package/src/lib/environment/__tests__/skill-recommendations.test.ts +87 -0
  273. package/src/lib/environment/data.ts +9 -0
  274. package/src/lib/environment/list-skills.ts +176 -0
  275. package/src/lib/environment/parsers/__tests__/skill.test.ts +54 -0
  276. package/src/lib/environment/parsers/skill.ts +26 -5
  277. package/src/lib/environment/profile-generator.ts +54 -0
  278. package/src/lib/environment/skill-enrichment.ts +106 -0
  279. package/src/lib/environment/skill-recommendations.ts +66 -0
  280. package/src/lib/environment/workspace-context.ts +13 -1
  281. package/src/lib/filters/__tests__/parse.quoted.test.ts +40 -0
  282. package/src/lib/filters/__tests__/parse.test.ts +135 -0
  283. package/src/lib/filters/parse.ts +86 -0
  284. package/src/lib/import/dedup.ts +4 -54
  285. package/src/lib/instance/__tests__/bootstrap.test.ts +362 -0
  286. package/src/lib/instance/__tests__/detect.test.ts +115 -0
  287. package/src/lib/instance/__tests__/fingerprint.test.ts +48 -0
  288. package/src/lib/instance/__tests__/git-ops.test.ts +95 -0
  289. package/src/lib/instance/__tests__/settings.test.ts +83 -0
  290. package/src/lib/instance/__tests__/upgrade-poller.test.ts +181 -0
  291. package/src/lib/instance/bootstrap.ts +270 -0
  292. package/src/lib/instance/detect.ts +49 -0
  293. package/src/lib/instance/fingerprint.ts +76 -0
  294. package/src/lib/instance/git-ops.ts +95 -0
  295. package/src/lib/instance/settings.ts +61 -0
  296. package/src/lib/instance/types.ts +77 -0
  297. package/src/lib/instance/upgrade-poller.ts +205 -0
  298. package/src/lib/notifications/__tests__/visibility.test.ts +51 -0
  299. package/src/lib/notifications/visibility.ts +33 -0
  300. package/src/lib/schedules/__tests__/collision-check.test.ts +93 -0
  301. package/src/lib/schedules/__tests__/config.test.ts +62 -0
  302. package/src/lib/schedules/__tests__/firing-metrics.test.ts +99 -0
  303. package/src/lib/schedules/__tests__/integration.test.ts +82 -0
  304. package/src/lib/schedules/__tests__/slot-claim.test.ts +242 -0
  305. package/src/lib/schedules/__tests__/tick-scheduler.test.ts +102 -0
  306. package/src/lib/schedules/__tests__/turn-budget.test.ts +228 -0
  307. package/src/lib/schedules/collision-check.ts +105 -0
  308. package/src/lib/schedules/config.ts +53 -0
  309. package/src/lib/schedules/scheduler.ts +236 -17
  310. package/src/lib/schedules/slot-claim.ts +105 -0
  311. package/src/lib/settings/__tests__/openai-auth.test.ts +101 -0
  312. package/src/lib/settings/__tests__/openai-login-manager.test.ts +64 -0
  313. package/src/lib/settings/__tests__/runtime-setup.test.ts +33 -0
  314. package/src/lib/settings/openai-auth.ts +105 -10
  315. package/src/lib/settings/openai-login-manager.ts +260 -0
  316. package/src/lib/settings/runtime-setup.ts +14 -4
  317. package/src/lib/tables/__tests__/enrichment-planner.test.ts +124 -0
  318. package/src/lib/tables/__tests__/enrichment.test.ts +147 -0
  319. package/src/lib/tables/enrichment-planner.ts +454 -0
  320. package/src/lib/tables/enrichment.ts +328 -0
  321. package/src/lib/tables/query-builder.ts +5 -2
  322. package/src/lib/tables/trigger-evaluator.ts +3 -2
  323. package/src/lib/theme.ts +71 -0
  324. package/src/lib/usage/ledger.ts +2 -18
  325. package/src/lib/util/__tests__/similarity.test.ts +106 -0
  326. package/src/lib/util/similarity.ts +77 -0
  327. package/src/lib/utils/format-timestamp.ts +24 -0
  328. package/src/lib/utils/stagent-paths.ts +12 -0
  329. package/src/lib/validators/__tests__/blueprint.test.ts +172 -0
  330. package/src/lib/validators/__tests__/settings.test.ts +10 -0
  331. package/src/lib/validators/blueprint.ts +70 -9
  332. package/src/lib/validators/profile.ts +2 -2
  333. package/src/lib/validators/settings.ts +3 -1
  334. package/src/lib/workflows/__tests__/delay.test.ts +196 -0
  335. package/src/lib/workflows/__tests__/engine.test.ts +8 -0
  336. package/src/lib/workflows/__tests__/loop-executor.test.ts +54 -0
  337. package/src/lib/workflows/__tests__/post-action.test.ts +108 -0
  338. package/src/lib/workflows/blueprints/__tests__/render-prompt.test.ts +124 -0
  339. package/src/lib/workflows/blueprints/instantiator.ts +22 -1
  340. package/src/lib/workflows/blueprints/render-prompt.ts +71 -0
  341. package/src/lib/workflows/blueprints/types.ts +16 -2
  342. package/src/lib/workflows/delay.ts +106 -0
  343. package/src/lib/workflows/engine.ts +212 -7
  344. package/src/lib/workflows/loop-executor.ts +349 -24
  345. package/src/lib/workflows/post-action.ts +91 -0
  346. package/src/lib/workflows/types.ts +166 -1
  347. package/src/test/setup.ts +10 -0
  348. package/src/app/api/license/checkout/route.ts +0 -28
  349. package/src/app/api/license/portal/route.ts +0 -26
  350. package/src/app/api/license/route.ts +0 -89
  351. package/src/app/api/license/usage/route.ts +0 -63
  352. package/src/app/api/marketplace/browse/route.ts +0 -15
  353. package/src/app/api/marketplace/import/route.ts +0 -28
  354. package/src/app/api/marketplace/publish/route.ts +0 -40
  355. package/src/app/api/onboarding/email/route.ts +0 -53
  356. package/src/app/api/settings/telemetry/route.ts +0 -14
  357. package/src/app/api/sync/export/route.ts +0 -54
  358. package/src/app/api/sync/restore/route.ts +0 -37
  359. package/src/app/api/sync/sessions/route.ts +0 -24
  360. package/src/app/auth/callback/route.ts +0 -73
  361. package/src/app/marketplace/page.tsx +0 -19
  362. package/src/components/analytics/analytics-gate-card.tsx +0 -101
  363. package/src/components/marketplace/blueprint-card.tsx +0 -61
  364. package/src/components/marketplace/marketplace-browser.tsx +0 -131
  365. package/src/components/onboarding/email-capture-card.tsx +0 -104
  366. package/src/components/settings/activation-form.tsx +0 -95
  367. package/src/components/settings/cloud-account-section.tsx +0 -147
  368. package/src/components/settings/cloud-sync-section.tsx +0 -155
  369. package/src/components/settings/subscription-section.tsx +0 -410
  370. package/src/components/settings/telemetry-section.tsx +0 -80
  371. package/src/components/shared/premium-gate-overlay.tsx +0 -50
  372. package/src/components/shared/schedule-gate-dialog.tsx +0 -64
  373. package/src/components/shared/upgrade-banner.tsx +0 -112
  374. package/src/hooks/use-supabase-auth.ts +0 -79
  375. package/src/lib/billing/email.ts +0 -54
  376. package/src/lib/billing/products.ts +0 -80
  377. package/src/lib/billing/stripe.ts +0 -101
  378. package/src/lib/cloud/supabase-browser.ts +0 -32
  379. package/src/lib/cloud/supabase-client.ts +0 -56
  380. package/src/lib/license/__tests__/features.test.ts +0 -56
  381. package/src/lib/license/__tests__/key-format.test.ts +0 -88
  382. package/src/lib/license/__tests__/manager.test.ts +0 -64
  383. package/src/lib/license/__tests__/tier-limits.test.ts +0 -79
  384. package/src/lib/license/cloud-validation.ts +0 -60
  385. package/src/lib/license/features.ts +0 -44
  386. package/src/lib/license/key-format.ts +0 -101
  387. package/src/lib/license/limit-check.ts +0 -111
  388. package/src/lib/license/limit-queries.ts +0 -51
  389. package/src/lib/license/manager.ts +0 -345
  390. package/src/lib/license/notifications.ts +0 -59
  391. package/src/lib/license/tier-limits.ts +0 -71
  392. package/src/lib/marketplace/marketplace-client.ts +0 -107
  393. package/src/lib/sync/cloud-sync.ts +0 -235
  394. package/src/lib/telemetry/conversion-events.ts +0 -71
  395. package/src/lib/telemetry/queue.ts +0 -122
  396. package/src/lib/validators/license.ts +0 -33
@@ -0,0 +1,371 @@
1
+ # Spec A — Schedule Orchestration
2
+
3
+ **Status:** Approved
4
+ **Created:** 2026-04-08
5
+ **Scope mode:** HOLD (maximum rigor)
6
+ **Related:** [Chat SSE Resilience Hotfix (Spec B)](./2026-04-08-chat-sse-resilience-hotfix-design.md), [Swarm Visibility (Spec C)](./2026-04-08-swarm-visibility-design.md)
7
+
8
+ ## Context
9
+
10
+ On 2026-04-08 at 12:20:49 UTC, five scheduled agents fired simultaneously and consumed ~12,600 combined turns on Claude Opus 4.6 via the claude-code runtime. The concurrent load saturated the single Node.js event loop that hosts both scheduled tasks and interactive chat. A user's chat message sent at 12:21:55 dropped its SSE stream and persisted with `content: ""`.
11
+
12
+ The root cause is twofold: (1) schedules fire independently with no concurrency control beyond a title-pattern sibling guard, and (2) in-prompt instructions like "MAX 18 turns" are model hints, not runtime-enforced limits. This spec introduces a global concurrency cap, per-schedule turn budgets, lease-based timeouts, a minimal collision warning, and time-series metrics for evidence-based tuning.
13
+
14
+ **Key codebase discoveries that shape this design:**
15
+
16
+ 1. **"Turn" = one SDK assistant message.** `src/lib/agents/claude-agent.ts:181` increments `turnCount` on `message.type === "assistant"`. No runtime enforcement today.
17
+ 2. **Active execution tracking already exists.** `src/lib/agents/execution-manager.ts:14-62` maintains a `Map<taskId, RunningExecution>` with `getAllExecutions()`.
18
+ 3. **Scheduler already atomically claims schedules** at `src/lib/schedules/scheduler.ts:238-252` via conditional UPDATE, and serializes drain via `drainQueue()` at line 51. The existing `.then(drainQueue)` chain at line 420 runs concurrent with the tick loop — any new coordination primitive must be correct under that interleaving.
19
+ 4. **Turn budget header infrastructure exists.** `buildTurnBudgetHeader()` at claude-agent.ts:103 reads a global `MAX_TURNS` setting.
20
+ 5. **Failure detection + auto-pause already shipped.** `detectFailureReason()` at scheduler.ts:122 parses error text; auto-pause after 3-streak exists.
21
+
22
+ ## Goals
23
+
24
+ 1. **Prevent concurrent schedule overload from starving chat.** No more than `SCHEDULE_MAX_CONCURRENT` scheduled agents run simultaneously.
25
+ 2. **Enforce per-schedule turn budgets at runtime**, not via prompt hints.
26
+ 3. **Prevent permanent lock holder hangs.** Every slot and lock carries a lease; a reaper cleans expired leases.
27
+ 4. **Give users pre-flight awareness of cron overlaps** without forcing them to auto-stagger.
28
+ 5. **Collect enough telemetry to tune the concurrency cap from evidence**, not intuition.
29
+
30
+ ## Non-goals (NOT in scope)
31
+
32
+ These are deferred to follow-up specs to keep the initial ship focused and de-risked:
33
+
34
+ - **`concurrencyGroup` column and group locks** — future spec "Schedule Concurrency Groups". The incident was a global-cap problem, not a group problem.
35
+ - **Auto-stagger endpoint, 48h forecast report, collision-forecast notifications** — future spec "Schedule Predictability & Forecasting".
36
+ - **Turn drift detection, efficiency scoring (`useful_actions / total_turns`)** — future spec "Schedule Observability".
37
+ - **`turnBudgetAction: 'optimize'` meta-agent prompt rewriter** — future spec "Agent Self-Optimization".
38
+ - **Hard chat priority / `pauseSchedulesDuringChat` setting** — only if the AR1b soft pressure signal (below) proves insufficient.
39
+ - **Dynamic adaptive cap** based on measured P99 chat latency — architect explicitly recommended against until static cap proves insufficient.
40
+ - **`usage_ledger.turn_count` column** — derivable from `schedule_firing_metrics` and `agent_logs`.
41
+
42
+ ## Design
43
+
44
+ ### A.1 Concurrency limiter
45
+
46
+ **Cap:** `SCHEDULE_MAX_CONCURRENT` env var, default **2** for initial ship. Raise to 3 after one week of telemetry validates chat SSE P99 under load.
47
+
48
+ **Primitive:** atomic single-SQL conditional UPDATE. Check-then-act is forbidden — the tick loop and `drainQueue()` run concurrently via the `.then()` chain at scheduler.ts:420, and a `SELECT count(*) ... then fire` sequence will allow two callers to both see `activeCount < cap` and both fire.
49
+
50
+ Correct claim:
51
+
52
+ ```sql
53
+ UPDATE tasks
54
+ SET status = 'running',
55
+ slot_claimed_at = :now,
56
+ lease_expires_at = :now + :leaseSec
57
+ WHERE id = :taskId
58
+ AND status = 'queued'
59
+ AND (SELECT COUNT(*)
60
+ FROM tasks
61
+ WHERE status = 'running'
62
+ AND source_type = 'scheduled') < :cap;
63
+ ```
64
+
65
+ `changes = 1` → proceed to `executeTaskWithRuntime()`. `changes = 0` → leave the task in `queued` state; `drainQueue()` will retry it after the next completion.
66
+
67
+ The primitive lives in a new helper `src/lib/schedules/slot-claim.ts` and is called by:
68
+ - `fireSchedule()` in scheduler.ts at line 412 (replace direct `executeTaskWithRuntime` call with `claimSlotThenExecute`)
69
+ - `drainQueue()` in scheduler.ts at line 74 (same)
70
+ - `POST /api/schedules/:id/execute` route handler (honors cap by default — see A.1.1)
71
+
72
+ ### A.1.1 Manual execute
73
+
74
+ `POST /api/schedules/:id/execute` honors the cap by default. Behavior:
75
+
76
+ - **Slot available:** claim and run normally.
77
+ - **Cap full:** return `429 Too Many Requests` with body `{ error: 'capacity_full', slotEtaSec: N }` where `N` is the minimum `lease_expires_at - now()` across running slots.
78
+ - **Explicit bypass:** `?force=true` query parameter bypasses the cap, writes an audit-log entry to `usage_ledger` with `activityType = 'manual_force_bypass'`, and triggers a confirmation modal in the UI (handled client-side).
79
+
80
+ This closes the footgun where a user clicking "Run now" five times in 2 seconds could spawn five concurrent Opus runs.
81
+
82
+ ### A.1.2 Chat soft pressure signal (AR1b)
83
+
84
+ An in-memory `activeChatStreams: Set<string>` lives in a new `src/lib/chat/active-streams.ts`. The chat engine at `src/lib/chat/engine.ts` adds to the set at stream start and removes at stream end (in the finally block — safe because Spec B already guarantees finally runs).
85
+
86
+ `tickScheduler()` calls `applyChatPressure()` before processing due schedules: if `activeChatStreams.size > 0`, any schedule whose `nextFireAt` is due gets its `nextFireAt` pushed forward by `SCHEDULE_CHAT_PRESSURE_DELAY_SEC` (default 30s) and skipped this tick. In-flight scheduled runs are not affected.
87
+
88
+ This is a soft signal, not a hard block — chat never starves schedules indefinitely because the delay is per-tick and one-shot.
89
+
90
+ ### A.2 Lease + timeout + reaper
91
+
92
+ Every claimed slot carries a lease. The reaper runs at each `tickScheduler()` pass (60s cadence) and reaps expired leases.
93
+
94
+ **Schema additions to `tasks`:**
95
+ - `slot_claimed_at TIMESTAMP` — set atomically with the slot claim
96
+ - `lease_expires_at TIMESTAMP` — `slot_claimed_at + max_run_duration_sec`
97
+ - `failure_reason TEXT` — written explicitly by runtime adapter at terminal transitions
98
+
99
+ **Schema additions to `schedules`:**
100
+ - `max_run_duration_sec INTEGER DEFAULT NULL` — NULL inherits global default (1200s = 20 min)
101
+
102
+ **Reaper query:**
103
+
104
+ ```sql
105
+ SELECT id FROM tasks
106
+ WHERE status = 'running'
107
+ AND source_type = 'scheduled'
108
+ AND lease_expires_at < :now;
109
+ ```
110
+
111
+ For each expired task: call `abortController.abort()` via the `RunningExecution` map at `execution-manager.ts:5`, then `UPDATE tasks SET status='failed', failure_reason='lease_expired', completed_at=:now`. The slot is freed automatically by the status change (the claim SQL counts `status='running'` rows).
112
+
113
+ **Runtime adapter change:** thread `AbortSignal` from `RunningExecution.abortController` into the SDK `query()` options in the scheduled runtime adapter. Chat already does this at `src/lib/chat/engine.ts:300`; mirror the pattern.
114
+
115
+ ### A.3 Turn budget
116
+
117
+ **Schema addition to `schedules`:**
118
+
119
+ ```sql
120
+ ALTER TABLE schedules ADD COLUMN max_turns INTEGER DEFAULT NULL
121
+ CHECK (max_turns IS NULL OR (max_turns BETWEEN 1 AND 10000));
122
+ ALTER TABLE schedules ADD COLUMN max_turns_set_at TIMESTAMP;
123
+ ALTER TABLE schedules ADD COLUMN turn_budget_breach_streak INTEGER DEFAULT 0;
124
+ ```
125
+
126
+ NULL `max_turns` inherits from the global `MAX_TURNS` setting already read by `buildTurnBudgetHeader()`.
127
+
128
+ **Enforcement:** pass `maxTurns` to SDK `query()` options in the scheduled runtime adapter. The SDK hard-stops at the limit (same mechanism chat uses at engine.ts:299).
129
+
130
+ **On breach — footgun-mitigated flow:**
131
+
132
+ 1. **First-breach grace:** if `tasks.completed_at < schedules.max_turns_set_at + 2 × cron_interval`, the breach is logged only — it does not increment `turn_budget_breach_streak`. Protects users from tripping auto-pause on the very first firing after a config edit.
133
+ 2. **Drift warning at streak ≥ 2:** send a notification: "Schedule X used {lastTurnCount}/{maxTurns} agent steps. Consider raising the budget or reducing the prompt scope."
134
+ 3. **Auto-pause at streak ≥ 5** (higher than generic failure's 3): "Schedule X paused — 5 consecutive runs exceeded the {N}-step budget. Budget may be too low; typical runs use {avgTurnsPerFiring} steps."
135
+
136
+ The separate `turn_budget_breach_streak` counter is critical: conflating budget breaches with generic failures would let a user trip auto-pause in 3 minutes by setting `maxTurns=10` on a schedule that averages 40.
137
+
138
+ **Explicit `failure_reason` writes:** the runtime adapter writes `failure_reason` directly at terminal transitions (`turn_limit_exceeded`, `lease_expired`, `sdk_error`, `aborted`, etc.). `detectFailureReason()` at scheduler.ts:122 remains as a fallback for legacy or unknown cases but is no longer the primary classifier. String-matching is fragile.
139
+
140
+ ### A.4 UI: rename + tooltips + calibration hint (PM recommendation)
141
+
142
+ - **Schedule form field rename:** "Max turns per firing" → **"Max agent steps per run"**. Keep `maxTurns` in code/API.
143
+ - **Tooltip on field:** "One step = one agent action (message, tool call, or sub-response). Most schedules use 50–500 steps; heavy research runs 2,000+."
144
+ - **Tooltip on prompt field:** "Note: writing 'MAX N turns' in your prompt is a hint to the model, not a runtime limit. Use Max agent steps below to enforce a budget."
145
+ - **Inline calibration hint:** when a user types a prompt, show "Schedules like this average ~{N} steps" derived from `avgTurnsPerFiring` across schedules with similar characteristics. Cheap — data already exists.
146
+
147
+ ### A.5 Collision warning (PR1b — minimal, restored to scope)
148
+
149
+ **Trigger:** `POST /api/schedules` and `PUT /api/schedules/:id`.
150
+
151
+ **Check:** expand the incoming `cronExpression` over the next 24h using the existing cron parser at `src/lib/schedules/interval-parser.ts`. Bucket fire times by 5-minute windows. Compare against all other active schedules in the same project. If any 5-min bucket has ≥2 schedules whose combined `avgTurnsPerFiring > 3000`, return a warning.
152
+
153
+ **Response shape:** `200 OK` with the saved schedule plus:
154
+
155
+ ```json
156
+ {
157
+ "warnings": [{
158
+ "type": "cron_collision",
159
+ "overlappingSchedules": ["Price Monitor", "News Sentinel"],
160
+ "nextCollisionAt": "2026-04-09T12:20:00Z",
161
+ "estimatedConcurrentSteps": 6878
162
+ }]
163
+ }
164
+ ```
165
+
166
+ **UI:** the create/edit sheet renders a dismissible amber banner inside `SheetContent` (with `px-6 pb-6` per the recurring shadcn Sheet padding issue logged in MEMORY.md). Copy: "This schedule overlaps with Price Monitor and News Sentinel at {time}. They'll take turns; the last to run may be delayed ~2–4 min." One action: "[Save anyway]".
167
+
168
+ **Non-blocking:** the warning does not prevent save. It informs.
169
+
170
+ **Deferred:** auto-stagger endpoint, 48h forecast, collision-forecast notifications.
171
+
172
+ ### A.6 Time-series metrics (AR3b)
173
+
174
+ New table `schedule_firing_metrics` for evidence-based cap tuning and post-hoc incident forensics. EMA on a single row erases the signal we need.
175
+
176
+ ```sql
177
+ CREATE TABLE schedule_firing_metrics (
178
+ id TEXT PRIMARY KEY,
179
+ schedule_id TEXT NOT NULL REFERENCES schedules(id),
180
+ task_id TEXT REFERENCES tasks(id),
181
+ fired_at TIMESTAMP NOT NULL,
182
+ slot_claimed_at TIMESTAMP,
183
+ completed_at TIMESTAMP,
184
+ slot_wait_ms INTEGER, -- fired_at → slot_claimed_at
185
+ duration_ms INTEGER, -- slot_claimed_at → completed_at
186
+ turn_count INTEGER,
187
+ max_turns_at_firing INTEGER,
188
+ event_loop_lag_ms REAL, -- perf_hooks.monitorEventLoopDelay p99 during run
189
+ peak_rss_mb INTEGER,
190
+ chat_streams_active INTEGER, -- count at slot claim
191
+ concurrent_schedules INTEGER, -- count at slot claim
192
+ failure_reason TEXT
193
+ );
194
+ CREATE INDEX idx_sfm_schedule_time ON schedule_firing_metrics(schedule_id, fired_at DESC);
195
+ ```
196
+
197
+ Insert a row in `recordFiringMetrics()` at scheduler.ts:419, on every completion (success or failure).
198
+
199
+ **Critical:** add matching bootstrap `CREATE TABLE IF NOT EXISTS` in `src/lib/db/index.ts` (per CLAUDE.md's recurring-issue note about bootstrap vs migrations). Also add `db.delete()` call in `src/lib/data/clear.ts` in FK-safe order (delete from `schedule_firing_metrics` before `schedules`).
200
+
201
+ ### A.7 Data model — consolidated
202
+
203
+ ```sql
204
+ -- schedules table
205
+ ALTER TABLE schedules ADD COLUMN max_turns INTEGER DEFAULT NULL
206
+ CHECK (max_turns IS NULL OR (max_turns BETWEEN 1 AND 10000));
207
+ ALTER TABLE schedules ADD COLUMN max_turns_set_at TIMESTAMP;
208
+ ALTER TABLE schedules ADD COLUMN max_run_duration_sec INTEGER DEFAULT NULL;
209
+ ALTER TABLE schedules ADD COLUMN turn_budget_breach_streak INTEGER DEFAULT 0;
210
+
211
+ -- tasks table
212
+ ALTER TABLE tasks ADD COLUMN slot_claimed_at TIMESTAMP;
213
+ ALTER TABLE tasks ADD COLUMN lease_expires_at TIMESTAMP;
214
+ ALTER TABLE tasks ADD COLUMN failure_reason TEXT;
215
+ CREATE INDEX idx_tasks_slot_running
216
+ ON tasks(status, source_type, lease_expires_at)
217
+ WHERE status = 'running';
218
+
219
+ -- schedule_firing_metrics (new)
220
+ -- [see A.6]
221
+ ```
222
+
223
+ **Settings (existing key-value table, no schema change):**
224
+ - `schedule.maxConcurrent` default: `2`
225
+ - `schedule.maxRunDurationSec` default: `1200`
226
+ - `schedule.chatPressureDelaySec` default: `30`
227
+
228
+ ### A.8 API surface
229
+
230
+ | Method | Path | Change |
231
+ |---|---|---|
232
+ | POST | `/api/schedules` | Response includes `warnings: [...]` from collision check |
233
+ | PUT | `/api/schedules/:id` | Same |
234
+ | POST | `/api/schedules/:id/execute` | Honors global cap by default; `?force=true` bypasses with audit log; returns `429 {error, slotEtaSec}` when full |
235
+ | GET | `/api/schedules/:id/metrics` | Returns recent `schedule_firing_metrics` rows for tuning/debug |
236
+
237
+ No new endpoints for orchestration proper. `/api/swarm-status` is defined in Spec C.
238
+
239
+ ## Data flow — scheduler tick + slot claim
240
+
241
+ ```
242
+ tickScheduler() (every 60s)
243
+ |
244
+ v
245
+ ┌──────────────────────────────┐
246
+ │ reapExpiredLeases() │ ── abort via RunningExecution
247
+ │ UPDATE tasks SET status= │ + mark lease_expired
248
+ │ 'failed' WHERE status= │
249
+ │ 'running' AND │
250
+ │ lease_expires_at < now() │
251
+ └──────────────┬───────────────┘
252
+ |
253
+ v
254
+ ┌──────────────────────────────┐
255
+ │ findDueSchedules() │
256
+ │ SELECT ... WHERE │
257
+ │ next_fire_at <= now() │
258
+ └──────────────┬───────────────┘
259
+ |
260
+ v
261
+ ┌──────────────────────────────┐
262
+ │ applyChatPressure() [AR1b] │
263
+ │ if activeChatStreams > 0: │
264
+ │ push nextFireAt +30s, │
265
+ │ skip this tick │
266
+ └──────────────┬───────────────┘
267
+ |
268
+ v
269
+ ┌──────────────────────────────┐
270
+ │ for each due schedule: │
271
+ │ insertQueuedTask() │
272
+ │ atomicSlotClaim() ◄──────┐│
273
+ │ UPDATE tasks SET ││
274
+ │ status='running', ││ (single SQL, guarantees cap)
275
+ │ slot_claimed_at=now(),││
276
+ │ lease_expires_at=... ││
277
+ │ WHERE id=? AND ││
278
+ │ status='queued' AND ││
279
+ │ (SELECT COUNT(*) ││
280
+ │ FROM tasks WHERE ││
281
+ │ status='running' ││
282
+ │ AND source_type= ││
283
+ │ 'scheduled') < :cap ││
284
+ │ if changes=0: ││
285
+ │ leave in queued, ││
286
+ │ drain will retry ││
287
+ │ if changes=1: ││
288
+ │ executeTaskWithRuntime ││
289
+ │ .then(recordMetrics) ││
290
+ │ .then(drainQueue) ─────┘│
291
+ └──────────────────────────────┘
292
+ ```
293
+
294
+ ## Error & Rescue Registry
295
+
296
+ | Error | Trigger | Impact | Rescue |
297
+ |---|---|---|---|
298
+ | Two ticks race on slot claim | `drainQueue()` concurrent with `tickScheduler()` | Cap breached (3 running when cap=2) | Atomic single-SQL claim (A.1); `changes=0` means lost the race — leave in queued |
299
+ | SDK hangs mid-run | Upstream Opus stall, network partition | Slot held forever, cap permanently reduced | Lease expiry + reaper aborts via AbortController after `max_run_duration_sec` |
300
+ | Reaper fails to fire | `tickScheduler` crashes or paused | Expired leases accumulate | Reaper is idempotent; runs at next tick. If tickScheduler itself is down, `bootstrapNextFireTimes` at startup repairs state |
301
+ | User sets `maxTurns=10` on schedule averaging 40 | Config footgun | Auto-pause in 3 firings (under naive design) | First-breach grace + separate `turn_budget_breach_streak` counter with threshold 5 + drift warning at streak 2 |
302
+ | `detectFailureReason` misclassifies | SDK error message format changes | Wrong streak incremented | Runtime adapter writes explicit `failure_reason` at terminal transitions; string-match is fallback only |
303
+ | Manual execute spammed | User double-clicks Run now 5× | Could exceed cap under naive design | Manual honors cap by default; `429 + slotEtaSec`; explicit `?force=true` for deliberate bypass with audit log |
304
+ | Chat pressure delay causes schedule to miss a cron interval | User has `* * * * *` cron, chat is streaming for 45s | Minute skipped | 30s delay is one-shot per tick; next tick re-evaluates. Document in UI help text |
305
+ | `schedule_firing_metrics` table unbounded growth | High-frequency schedules over months | Disk bloat | Periodic cleanup: `DELETE WHERE fired_at < now() - 30 days`. Deferred to follow-up if general maintenance sweep doesn't exist yet |
306
+ | Clock skew between scheduler and DB | Container restart, NTP drift | `lease_expires_at` mismatches | Use SQLite `CURRENT_TIMESTAMP` consistently; avoid mixing JS `Date.now()` |
307
+ | Collision check false positive under chat pressure | A delayed schedule shifts into a bucket that was previously clear | Confusing warning | Collision check runs against *nominal* cron expansion, not chat-pressure-adjusted times. Warning remains deterministic |
308
+ | Cap env var typo | User sets `SCHEDULE_MAX_CONCURRENT=abc` | Silent fallback to default | Parse with `parseInt`, log warning on NaN, use default; add settings-page validation UI |
309
+ | Lease expiry fires during a legitimate long run | Schedule takes 25 min, default lease 20 min | Run aborted falsely | Per-schedule `max_run_duration_sec` override; drift warning at 80% of lease |
310
+
311
+ ## Telemetry / 48h post-ship watchlist
312
+
313
+ 1. `COUNT(*) FROM chat_messages WHERE content='' AND status IN ('streaming','pending')` — must be 0 (Spec B success signal)
314
+ 2. `schedule_firing_metrics.slot_wait_ms` — p50/p95 per schedule. If p95 > 300s, cap too tight
315
+ 3. `schedules.failure_streak >= 3` count — auto-pause rate vs baseline
316
+ 4. `schedules.turn_budget_breach_streak > 0` count — tracks `maxTurns` misconfig rate
317
+ 5. `schedule_firing_metrics.failure_reason = 'lease_expired'` count — indicates timeouts too tight or SDK hangs
318
+ 6. Chat SSE completion rate (`status='complete'` / total) — must stay at or above pre-incident baseline
319
+ 7. `schedule_firing_metrics.event_loop_lag_ms` p99 — validates/falsifies cap=2; if always <50ms, raise to 3
320
+ 8. Collision-warning acceptance rate (how often users save despite warning)
321
+ 9. Manual `?force=true` bypass frequency — should be near-zero; alert if >5/week
322
+
323
+ ## TDRs to capture
324
+
325
+ Create in `.claude/skills/architect/references/`:
326
+
327
+ 1. **TDR: Concurrency slot claim is a single SQL statement, not check-then-act.** References the 2026-04-08 incident.
328
+ 2. **TDR: Scheduler cap is static and evidence-based.** Changes require re-running the load test.
329
+ 3. **TDR: Auto-pause streak counts per failure class.** Forces future failure modes to reason about whether they feed the generic streak or a dedicated one.
330
+ 4. **TDR: Manual execute honors the global cap by default.** Operational controls prefer safety over convenience.
331
+ 5. **TDR: All lock holders carry lease expiries + reapers.** Generalize beyond concurrency slots.
332
+ 6. **TDR: Chat and scheduled agents compete for the same Node event loop.** Architectural constraint; future features must not starve chat.
333
+
334
+ ## Tests
335
+
336
+ 1. **Race-condition test:** spawn 10 concurrent `fireSchedule` calls against cap=3; assert exactly 3 slots claimed, no breach.
337
+ 2. **Lease reaper test:** set tiny lease, trigger run, wait, assert reaper marks `failed`/`lease_expired` and frees slot.
338
+ 3. **Turn budget enforcement test:** `maxTurns=5`, prompt that needs 50 turns, assert SDK hard-stops and `turn_budget_breach_streak` increments.
339
+ 4. **First-breach grace test:** set new `maxTurns`, first firing breaches, assert streak stays at 0.
340
+ 5. **Manual execute cap test:** fill cap, POST execute, assert 429 + `slotEtaSec`. POST with `?force=true`, assert 200 + audit log entry.
341
+ 6. **Chat pressure test:** start fake chat stream, trigger scheduler tick, assert due schedules get `next_fire_at` pushed forward 30s.
342
+ 7. **Collision warning test:** create overlapping cron, assert `warnings` array populated.
343
+ 8. **Load test (validation of cap):** 5 schedules × 500-turn dummy prompts, measure chat SSE P99 first-token with cap=2. Assert P99 < 2s.
344
+ 9. **Incident reproduction:** fire 5 real schedules → queue of 3, 2 wait → chat message sent → chat SSE stays responsive → no `content=''` row.
345
+
346
+ ## Files touched
347
+
348
+ ### Modify
349
+ - `src/lib/schedules/scheduler.ts` — tick loop, drain queue, reaper, firing metrics recording, chat pressure application
350
+ - `src/lib/agents/execution-manager.ts` — abortController surface (already exists, just wire)
351
+ - `src/lib/agents/claude-agent.ts` — runtime adapter turn budget + failure reason writes
352
+ - `src/lib/db/schema.ts` — new columns + table
353
+ - `src/lib/db/index.ts` — bootstrap CREATE TABLE IF NOT EXISTS for `schedule_firing_metrics`
354
+ - `src/lib/data/clear.ts` — add delete for new table (FK-ordered)
355
+ - `src/lib/schedules/interval-parser.ts` — reuse for collision check
356
+ - `src/app/api/schedules/route.ts` + `[id]/route.ts` — collision warning response shape
357
+ - `src/app/api/schedules/[id]/execute/route.ts` — cap check + force bypass
358
+ - `src/components/schedules/schedule-form.tsx` — new "Max agent steps" field + rename + tooltip + calibration hint
359
+
360
+ ### New
361
+ - `src/lib/schedules/slot-claim.ts` — atomic primitive
362
+ - `src/lib/chat/active-streams.ts` — in-memory set for chat pressure signal
363
+ - `src/lib/schedules/collision-check.ts` — 24h cron expansion + bucket compare
364
+ - `.claude/skills/architect/references/tdr-*.md` — 6 new TDRs
365
+
366
+ ## Ship plan
367
+
368
+ - **Feature flag:** `SCHEDULE_MAX_CONCURRENT` env var, default 2. Override raises post-telemetry.
369
+ - **Parallel with Spec B** — zero shared code; Spec B is a separate commit/PR.
370
+ - **After 1 week of telemetry:** raise cap from 2 → 3 if metrics healthy.
371
+ - **Update `features/roadmap.md`** post-ship with a "Schedule Orchestration Resilience" subsection including A/B/C completed entries plus future `schedule-collision-prevention` and `schedule-forecasting` entries.
@@ -0,0 +1,213 @@
1
+ # Spec C — Swarm Visibility
2
+
3
+ **Status:** Approved
4
+ **Created:** 2026-04-08
5
+ **Scope mode:** REDUCE
6
+ **Related:** [Schedule Orchestration (Spec A)](./2026-04-08-schedule-orchestration-design.md), [Chat SSE Resilience Hotfix (Spec B)](./2026-04-08-chat-sse-resilience-hotfix-design.md)
7
+
8
+ ## Context
9
+
10
+ Spec A introduces a global concurrency cap on scheduled agents. Power users running many schedules will observe queueing delays that they previously did not. Without a visible signal for "how busy is the swarm right now," they'll experience unexplained schedule lateness and file tickets.
11
+
12
+ This spec adds minimal, always-visible swarm-load signal to the app chrome, a saturation-only pre-chat banner, and small enhancements to the schedule list. It is deliberately small — REDUCE mode — to avoid overbuilding visibility infrastructure before we know what users actually need.
13
+
14
+ ## Goals
15
+
16
+ 1. Give users a passive, always-visible signal of swarm load state (quiet / working / saturated).
17
+ 2. Warn users *before* they send a chat message if the swarm is at capacity and their chat will queue behind running agents.
18
+ 3. Make the new concurrency-driven queueing visible on the schedule list.
19
+ 4. Rename "turns" to "agent steps" everywhere user-facing, to close the semantic gap between prompt-level "MAX N turns" hints and runtime-counted turns.
20
+
21
+ ## Non-goals (NOT in scope)
22
+
23
+ - **Activity feed route** (`/swarm/activity`) with event log, filters, pagination — future spec "Swarm Activity Feed"
24
+ - **`swarm_snapshots` time-series table** — future spec "Swarm Activity Feed"
25
+ - **Proactive push notifications for overload** — the indicator is always visible, no push needed
26
+ - **Bulk "pause all schedules" action** — users can pause individual schedules from existing pages
27
+ - **Efficiency scoring rings** / turn drift detection alerts — future spec "Schedule Observability"
28
+ - **New `busyness` StatusChip family** — rejected by design review; use custom primitive
29
+ - **Pre-chat banner in `working` state** — only render in `saturated` state to avoid anxiety copy
30
+ - **Traffic-light turn-budget badge** (`lastTurnCount / maxTurns` with color gradient) — leaks policy as warning on normal operation
31
+ - **Popover / Sheet for running schedules list** — hover tooltip + deep link to existing route is sufficient
32
+
33
+ ## Design
34
+
35
+ ### C.1 `GET /api/swarm-status` endpoint
36
+
37
+ New route at `src/app/api/swarm-status/route.ts`. Reads:
38
+
39
+ - `getAllExecutions()` from `src/lib/agents/execution-manager.ts:60` — filters to `sourceType === 'scheduled'` for running count and schedule metadata
40
+ - A count query on `tasks` table for `status='queued' AND source_type='scheduled'` — queued count
41
+ - `activeChatStreams.size` from Spec A's `src/lib/chat/active-streams.ts`
42
+
43
+ **Response shape:**
44
+
45
+ ```json
46
+ {
47
+ "runningScheduled": [
48
+ {
49
+ "scheduleId": "abc",
50
+ "name": "Portfolio Coach",
51
+ "startedAt": "2026-04-08T21:00:03Z",
52
+ "elapsedSec": 42,
53
+ "maxTurns": 500,
54
+ "currentTurns": 127
55
+ }
56
+ ],
57
+ "queuedScheduled": [
58
+ { "scheduleId": "def", "name": "News Sentinel", "queuedAt": "2026-04-08T21:00:14Z", "position": 1 }
59
+ ],
60
+ "chatStreamsActive": 0,
61
+ "loadState": "working"
62
+ }
63
+ ```
64
+
65
+ `loadState` is computed server-side:
66
+ - `quiet` — `runningScheduled.length === 0`
67
+ - `working` — `runningScheduled.length >= 1 && queuedScheduled.length === 0`
68
+ - `saturated` — `queuedScheduled.length > 0` (at-or-above cap)
69
+
70
+ No new DB state — the endpoint reads from in-memory execution map + one SQL count.
71
+
72
+ ### C.2 `<SwarmLoadIndicator />` component
73
+
74
+ **Placement:** top of `<SidebarContent>` in `src/components/shared/app-sidebar.tsx`, above the first NavGroup. Not the footer — per design review, the footer is already dense (UpgradeBadge, WorkspaceIndicator, AuthStatusDot, TrustTierBadge, theme toggle) and the sidebar-as-chrome pattern means aggregate system state belongs at the top, where nav groups live.
75
+
76
+ **Visual:** custom primitive (NOT a StatusChip family — semantic mismatch; StatusChip is "one entity, one state", swarm load is "aggregate cardinality"). Reuses badge tokens and the pulse animation pattern but renders as a thin one-line row.
77
+
78
+ **Three states** (not four — red is reserved for actual failures, not backpressure):
79
+
80
+ | State | Condition | Token | Label |
81
+ |---|---|---|---|
82
+ | Quiet | `loadState === 'quiet'` | `text-muted-foreground` | `Swarm quiet` |
83
+ | Working | `loadState === 'working'` | `text-status-running` (indigo, pulse) | `● 2 running` |
84
+ | Saturated | `loadState === 'saturated'` | `text-status-warning` (amber, pulse) | `● 3 running · 1 queued` |
85
+
86
+ **Hover tooltip:** lists up to 3 running schedules inline with elapsed time. Delivers ~80% of the "activity feed" value with zero new overlay state:
87
+
88
+ ```
89
+ Swarm · 2 running
90
+ ─────────────────
91
+ • portfolio-coach 2m
92
+ • launch-copy-chief 41s
93
+ ```
94
+
95
+ If there are more than 3 running, append `• +N more`.
96
+
97
+ **Click behavior:** the indicator is a `<Link>` to `/schedules?status=running` — deep-link to the existing schedules page with a filter. No popover, no sheet, no new overlay pattern.
98
+
99
+ **Polling:** every 8s via new `usePolling(url, intervalMs)` hook (C.5). Shared state is used by both the indicator and the pre-chat banner so there is no double-fetch.
100
+
101
+ **Accessibility:** `aria-live="polite"`, tooltip keyboard-focusable, text contrast meets Calm Ops baseline.
102
+
103
+ ### C.3 `<ChatOverloadBanner />` component
104
+
105
+ **Placement:** above `<ChatInput />` in `src/components/chat/chat-shell.tsx`.
106
+
107
+ **Render condition:** ONLY when `loadState === 'saturated'` (queue depth > 0). Not `working`. Anxiety copy on normal operation violates Calm Ops tone — "responses may be slower" with zero agency tells users a bad thing might happen and gives them no action.
108
+
109
+ **Visual:** surface-2 bordered banner, `rounded-lg`, amber accent matching the indicator's saturated state.
110
+
111
+ **Copy:** `"Swarm at capacity — your chat will queue behind {N} running agents."` where N is `runningScheduled.length`. One action: `[View Activity]` links to `/schedules?status=running`.
112
+
113
+ **Dismissal:** per conversation, stored in `sessionStorage` keyed by conversation ID. Re-appears if load state flips back to `saturated` after being `working`.
114
+
115
+ ### C.4 Schedule list row enhancements
116
+
117
+ Modify `src/components/schedules/schedule-list.tsx` (or equivalent):
118
+
119
+ 1. **Queue-depth badge (PR2a):** if a schedule has queued firings waiting for a slot, render `+{N} queued` as an `outline` Badge next to the schedule name. Almost free — reuses existing badge component. Addresses the power-user scenario where 10 schedules fire in a 5-min window and #4-10 queue silently.
120
+
121
+ 2. **"Near turn cap" outline badge:** rendered ONLY when `lastTurnCount / maxTurns >= 0.9`. No traffic-light gradient. Progressive disclosure — normal operation shows nothing. At ≥90%, shows a subtle outline badge: `Near step cap`.
122
+
123
+ ### C.5 `usePolling(url, intervalMs)` shared hook
124
+
125
+ New file `src/hooks/use-polling.ts`. Extracted from the pattern at `src/components/notifications/inbox-list.tsx:40-43`. Signature:
126
+
127
+ ```typescript
128
+ export function usePolling<T>(url: string, intervalMs: number): {
129
+ data: T | null;
130
+ error: Error | null;
131
+ loading: boolean;
132
+ };
133
+ ```
134
+
135
+ Fetches on mount, re-fetches every `intervalMs`. Handles unmount cleanup. Stable query key (URL) so multiple consumers of the same URL share state via module-level cache.
136
+
137
+ Used by: `<SwarmLoadIndicator />`, `<ChatOverloadBanner />`. Can be adopted by other components (inbox list, schedule detail sheet) in future cleanups.
138
+
139
+ ### C.6 UI rename: "turns" → "agent steps"
140
+
141
+ User-facing strings only. Keep `maxTurns` as the code/API identifier.
142
+
143
+ - `schedule-form.tsx` field label: "Max turns per firing" → "Max agent steps per run"
144
+ - Tooltip on field: "One step = one agent action (message, tool call, or sub-response). Most schedules use 50–500 steps; heavy research runs 2,000+."
145
+ - Tooltip on prompt field: "Note: writing 'MAX N turns' in your prompt is a hint to the model, not a runtime limit. Use Max agent steps below to enforce a budget."
146
+ - Inline calibration hint after prompt entry: "Schedules like this average ~{N} steps" (derived from `avgTurnsPerFiring` on similar schedules).
147
+ - Schedule list "Near step cap" badge (C.4)
148
+ - Notifications: "Schedule X used 812 / 800 agent steps" (formerly "turns")
149
+
150
+ ## Calm Ops compliance checklist
151
+
152
+ - [x] No backdrop-filter, rgba, glass, gradient
153
+ - [x] Running state uses `status-running` (indigo), NOT green (green is `status-completed`)
154
+ - [x] Saturated state uses `status-warning` (amber), NOT red (red is `status-failed`)
155
+ - [x] No new StatusChip family added (use custom `SwarmLoadIndicator`)
156
+ - [x] No popover/sheet overlay — tooltip + deep link only
157
+ - [x] Banner only renders when actionable (saturated state), not `working`
158
+ - [x] All radii ≤ `rounded-xl`
159
+ - [x] Polling pattern reuses existing template (`inbox-list.tsx`)
160
+ - [x] Any Sheet usage (N/A here) would need `px-6 pb-6` body padding
161
+
162
+ ## Tests
163
+
164
+ ### Unit / component
165
+ 1. `<SwarmLoadIndicator />` renders correct state (Quiet / Working / Saturated) for each input
166
+ 2. Tooltip shows running schedules; click navigates to `/schedules?status=running`
167
+ 3. `<ChatOverloadBanner />` renders ONLY in `saturated` state
168
+ 4. Dismissal persists in sessionStorage across re-mounts
169
+ 5. Queue-depth badge renders when schedule has queued firings
170
+ 6. "Near step cap" badge renders only at ≥90% ratio
171
+ 7. `usePolling` hook fetches on mount, re-fetches on interval, cleans up on unmount
172
+ 8. Multiple consumers of same URL share state (no duplicate fetches)
173
+
174
+ ### API
175
+ 9. `GET /api/swarm-status` returns correct shape with running/queued/chat counts
176
+ 10. `loadState` computed correctly at boundary conditions (0 running, cap-1 running, cap running, queue>0)
177
+
178
+ ### Accessibility
179
+ 11. Indicator has `aria-live="polite"`
180
+ 12. Tooltip is keyboard-focusable
181
+ 13. Contrast meets Calm Ops baseline (manual check)
182
+
183
+ ### Visual regression
184
+ 14. Screenshot sidebar in all 3 states; compare to Calm Ops tokens
185
+
186
+ ## Files touched
187
+
188
+ ### New
189
+ - `src/app/api/swarm-status/route.ts`
190
+ - `src/hooks/use-polling.ts`
191
+ - `src/components/shared/swarm-load-indicator.tsx`
192
+ - `src/components/chat/chat-overload-banner.tsx`
193
+
194
+ ### Modify
195
+ - `src/components/shared/app-sidebar.tsx` — mount `<SwarmLoadIndicator />` at top of SidebarContent
196
+ - `src/components/chat/chat-shell.tsx` — mount `<ChatOverloadBanner />` above ChatInput
197
+ - `src/components/schedules/schedule-list.tsx` (or equivalent) — queue-depth badge + near-cap badge
198
+ - `src/components/schedules/schedule-form.tsx` — rename + tooltips + calibration hint
199
+
200
+ ### Not modified (avoiding pollution)
201
+ - `src/lib/constants/status-families.ts` — NO new `busyness` family per design review
202
+
203
+ ## Dependencies on Spec A
204
+
205
+ - `<SwarmLoadIndicator />` reads `chatStreamsActive` from the in-memory `activeChatStreams` set created by Spec A (`src/lib/chat/active-streams.ts`). C can scaffold mid-A after A's interface is pinned.
206
+ - Queue-depth badge reads `tasks.status='queued' AND source_type='scheduled'` which exists today but is populated meaningfully only after Spec A's concurrency limiter lands.
207
+ - "Near step cap" badge reads `schedules.max_turns` column added by Spec A.
208
+
209
+ ## Ship plan
210
+
211
+ - No feature flag — UI is additive and safe.
212
+ - Scaffolding (API endpoint, hook, indicator component) can begin mid-A.
213
+ - Full ship after Spec A stabilizes and A's data-model migrations have landed.
package/next.config.mjs CHANGED
@@ -2,6 +2,7 @@
2
2
  const nextConfig = {
3
3
  serverExternalPackages: ["better-sqlite3", "pdf-parse", "pdfjs-dist"],
4
4
  devIndicators: false,
5
+ allowedDevOrigins: ["127.0.0.1"],
5
6
  };
6
7
 
7
8
  export default nextConfig;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "stagent",
3
- "version": "0.9.6",
3
+ "version": "0.11.0",
4
4
  "description": "AI Business Operating System — run your business with AI agents. Local-first, multi-provider, governed.",
5
5
  "keywords": [
6
6
  "ai",
@@ -67,7 +67,6 @@
67
67
  "@dnd-kit/sortable": "^10.0.0",
68
68
  "@dnd-kit/utilities": "^3.2.2",
69
69
  "@hookform/resolvers": "^5.2.2",
70
- "@supabase/supabase-js": "^2.101.1",
71
70
  "@tailwindcss/postcss": "^4",
72
71
  "@tailwindcss/typography": "^0.5",
73
72
  "@tanstack/react-table": "^8.21.3",
@@ -97,6 +96,7 @@
97
96
  "react-markdown": "^10.1.0",
98
97
  "recharts": "^3.8.1",
99
98
  "remark-gfm": "^4.0.1",
99
+ "semver": "^7.7.4",
100
100
  "sharp": "^0.34.5",
101
101
  "smol-toml": "^1.6.1",
102
102
  "sonner": "^2.0.7",
@@ -116,6 +116,7 @@
116
116
  "@types/js-yaml": "^4.0.9",
117
117
  "@types/react": "^19",
118
118
  "@types/react-dom": "^19",
119
+ "@types/semver": "^7.7.1",
119
120
  "@types/sharp": "^0.31.1",
120
121
  "@vitejs/plugin-react": "^5.1.4",
121
122
  "@vitest/coverage-v8": "^4.0.18",
@@ -0,0 +1,15 @@
1
+ import { afterEach, describe, expect, it, vi } from "vitest";
2
+
3
+ describe("instrumentation register()", () => {
4
+ afterEach(() => {
5
+ vi.unstubAllEnvs();
6
+ });
7
+
8
+ it("ensureInstance is importable from the bootstrap module and returns a skipped result in dev mode", async () => {
9
+ vi.stubEnv("STAGENT_DEV_MODE", "true");
10
+ const { ensureInstance } = await import("@/lib/instance/bootstrap");
11
+ const result = await ensureInstance();
12
+ expect(result.skipped).toBe("dev_mode_env");
13
+ expect(result.steps).toEqual([]);
14
+ });
15
+ });