@swarmclawai/swarmclaw 1.5.53 → 1.5.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +17 -3
  2. package/package.json +2 -2
  3. package/src/app/api/agents/[id]/route.ts +14 -2
  4. package/src/app/api/agents/agents-route.test.ts +65 -1
  5. package/src/app/api/chatrooms/[id]/chat/route.ts +5 -3
  6. package/src/app/api/chatrooms/route.ts +3 -0
  7. package/src/app/api/missions/[id]/control/route.ts +21 -0
  8. package/src/app/api/missions/templates/[id]/instantiate/route.ts +64 -0
  9. package/src/app/api/missions/templates/route.ts +8 -0
  10. package/src/app/api/tasks/[id]/route.ts +11 -1
  11. package/src/app/api/tasks/tasks-route.test.ts +81 -0
  12. package/src/app/api/webhooks/[id]/route.ts +18 -15
  13. package/src/app/missions/page.tsx +135 -22
  14. package/src/cli/index.js +2 -0
  15. package/src/cli/spec.js +2 -0
  16. package/src/components/missions/mission-edit-sheet.tsx +319 -0
  17. package/src/components/missions/mission-template-gallery.tsx +113 -0
  18. package/src/components/missions/mission-template-install-dialog.tsx +283 -0
  19. package/src/lib/server/agents/agent-service.ts +10 -2
  20. package/src/lib/server/agents/main-agent-loop-advanced.test.ts +36 -0
  21. package/src/lib/server/agents/main-agent-loop.ts +111 -4
  22. package/src/lib/server/chat-execution/chat-turn-preparation.test.ts +253 -0
  23. package/src/lib/server/chat-execution/chat-turn-preparation.ts +46 -26
  24. package/src/lib/server/chat-execution/message-classifier.ts +11 -7
  25. package/src/lib/server/chat-execution/post-stream-finalization.test.ts +85 -0
  26. package/src/lib/server/chat-execution/post-stream-finalization.ts +41 -16
  27. package/src/lib/server/chat-execution/response-completeness.test.ts +2 -1
  28. package/src/lib/server/chat-execution/response-completeness.ts +11 -3
  29. package/src/lib/server/chatrooms/chatroom-agent-signals.test.ts +54 -0
  30. package/src/lib/server/chatrooms/chatroom-agent-signals.ts +105 -9
  31. package/src/lib/server/chats/chat-session-service.ts +11 -0
  32. package/src/lib/server/connectors/email.test.ts +64 -0
  33. package/src/lib/server/connectors/email.ts +35 -6
  34. package/src/lib/server/connectors/response-media.ts +1 -0
  35. package/src/lib/server/daemon/daemon-runtime.ts +31 -19
  36. package/src/lib/server/memory/memory-db.test.ts +8 -0
  37. package/src/lib/server/memory/memory-db.ts +1 -1
  38. package/src/lib/server/missions/mission-service.ts +47 -1
  39. package/src/lib/server/missions/mission-templates.test.ts +208 -0
  40. package/src/lib/server/missions/mission-templates.ts +186 -0
  41. package/src/lib/server/runtime/session-run-manager/drain.ts +16 -0
  42. package/src/lib/server/storage-normalization.ts +6 -0
  43. package/src/lib/server/storage.ts +1 -1
  44. package/src/lib/server/tasks/task-validation.test.ts +30 -0
  45. package/src/lib/server/tasks/task-validation.ts +21 -2
  46. package/src/lib/server/working-state/normalization.ts +5 -1
  47. package/src/lib/validation/schemas.ts +40 -0
  48. package/src/types/mission.ts +27 -0
@@ -0,0 +1,253 @@
1
+ import assert from 'node:assert/strict'
2
+ import { describe, it } from 'node:test'
3
+ import type { Agent, Session } from '@/types'
4
+ import type { ResolvedAgentRoute } from '@/lib/server/agents/agent-runtime-config'
5
+ import { applyAgentSyncToSession } from './chat-turn-preparation'
6
+
7
+ const SESSION_ID = 'sess_test_1'
8
+ const AGENT_ID = 'agent_test_1'
9
+
10
+ function makeAgent(overrides: Partial<Agent> = {}): Agent {
11
+ return {
12
+ id: AGENT_ID,
13
+ name: 'Test Agent',
14
+ description: '',
15
+ systemPrompt: '',
16
+ provider: 'openai',
17
+ model: 'gpt-4o',
18
+ credentialId: 'cred_openai',
19
+ apiEndpoint: null,
20
+ ...overrides,
21
+ } as Agent
22
+ }
23
+
24
+ function makeSession(overrides: Partial<Session> = {}): Session {
25
+ return {
26
+ id: SESSION_ID,
27
+ name: 'Chat',
28
+ cwd: '/tmp',
29
+ user: 'test',
30
+ provider: 'openai',
31
+ model: 'gpt-4o',
32
+ claudeSessionId: null,
33
+ messages: [],
34
+ createdAt: 0,
35
+ lastActiveAt: 0,
36
+ agentId: AGENT_ID,
37
+ ...overrides,
38
+ } as Session
39
+ }
40
+
41
+ function makeRoute(overrides: Partial<ResolvedAgentRoute> = {}): ResolvedAgentRoute {
42
+ return {
43
+ id: 'route_1',
44
+ label: 'primary',
45
+ provider: 'openai',
46
+ model: 'gpt-4o',
47
+ credentialId: 'cred_openai',
48
+ fallbackCredentialIds: [],
49
+ apiEndpoint: null,
50
+ gatewayProfileId: null,
51
+ priority: 1,
52
+ source: 'agent',
53
+ ...overrides,
54
+ }
55
+ }
56
+
57
+ describe('applyAgentSyncToSession — user-selected provider/model preservation', () => {
58
+ it('session with matching provider/model inherits credentials from route (baseline)', () => {
59
+ const session = makeSession({ provider: 'openai', model: 'gpt-4o', credentialId: null })
60
+ const agent = makeAgent()
61
+ const route = makeRoute({ credentialId: 'cred_openai' })
62
+
63
+ const { session: updated, changed } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
64
+
65
+ assert.equal(changed, true)
66
+ assert.equal(updated.provider, 'openai', 'provider unchanged')
67
+ assert.equal(updated.model, 'gpt-4o', 'model unchanged')
68
+ assert.equal(updated.credentialId, 'cred_openai', 'credential synced from route')
69
+ })
70
+
71
+ it('user-switched provider is preserved even when agent/route disagree', () => {
72
+ const session = makeSession({
73
+ provider: 'anthropic',
74
+ model: 'claude-opus-4-7',
75
+ credentialId: 'cred_anthropic',
76
+ apiEndpoint: 'https://api.anthropic.com',
77
+ })
78
+ const agent = makeAgent({ provider: 'openai', model: 'gpt-4o' })
79
+ const route = makeRoute({ provider: 'openai', model: 'gpt-4o', credentialId: 'cred_openai' })
80
+
81
+ const { session: updated } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
82
+
83
+ assert.equal(updated.provider, 'anthropic', 'user-switched provider preserved')
84
+ assert.equal(updated.model, 'claude-opus-4-7', 'user-switched model preserved')
85
+ })
86
+
87
+ it('user-switched provider keeps its credentials (does not rewrite from route)', () => {
88
+ const session = makeSession({
89
+ provider: 'anthropic',
90
+ model: 'claude-opus-4-7',
91
+ credentialId: 'cred_anthropic',
92
+ fallbackCredentialIds: ['cred_anthropic_backup'],
93
+ apiEndpoint: 'https://api.anthropic.com/v1',
94
+ })
95
+ const agent = makeAgent({ provider: 'openai', credentialId: 'cred_openai' })
96
+ const route = makeRoute({
97
+ provider: 'openai',
98
+ credentialId: 'cred_openai',
99
+ fallbackCredentialIds: ['cred_openai_backup'],
100
+ apiEndpoint: 'https://api.openai.com/v1',
101
+ })
102
+
103
+ const { session: updated } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
104
+
105
+ assert.equal(updated.credentialId, 'cred_anthropic', 'credentialId not rewritten')
106
+ assert.deepEqual(
107
+ updated.fallbackCredentialIds,
108
+ ['cred_anthropic_backup'],
109
+ 'fallbackCredentialIds not rewritten',
110
+ )
111
+ assert.equal(
112
+ updated.apiEndpoint,
113
+ 'https://api.anthropic.com/v1',
114
+ 'apiEndpoint not rewritten',
115
+ )
116
+ })
117
+
118
+ it('user-switched model (same provider) keeps its credentials and model', () => {
119
+ const session = makeSession({
120
+ provider: 'openai',
121
+ model: 'gpt-4o-mini',
122
+ credentialId: 'cred_openai_user',
123
+ })
124
+ const agent = makeAgent({ provider: 'openai', model: 'gpt-4o', credentialId: 'cred_openai' })
125
+ const route = makeRoute({
126
+ provider: 'openai',
127
+ model: 'gpt-4o',
128
+ credentialId: 'cred_openai',
129
+ })
130
+
131
+ const { session: updated } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
132
+
133
+ assert.equal(updated.model, 'gpt-4o-mini', 'user-switched model preserved')
134
+ assert.equal(
135
+ updated.credentialId,
136
+ 'cred_openai',
137
+ 'same-provider credential does sync from route',
138
+ )
139
+ })
140
+
141
+ it('empty session.provider inherits from agent', () => {
142
+ const session = makeSession({
143
+ provider: '' as Session['provider'],
144
+ model: '',
145
+ credentialId: null,
146
+ })
147
+ const agent = makeAgent({ provider: 'openai', model: 'gpt-4o' })
148
+
149
+ const { session: updated, changed } = applyAgentSyncToSession(session, agent, null, SESSION_ID)
150
+
151
+ assert.equal(changed, true)
152
+ assert.equal(updated.provider, 'openai', 'provider initialized from agent')
153
+ assert.equal(updated.model, 'gpt-4o', 'model initialized from agent')
154
+ })
155
+
156
+ it('gatewayProfileId syncs from route regardless of provider switch', () => {
157
+ const session = makeSession({
158
+ provider: 'anthropic',
159
+ model: 'claude-opus-4-7',
160
+ gatewayProfileId: null,
161
+ })
162
+ const agent = makeAgent({ provider: 'openai' })
163
+ const route = makeRoute({ provider: 'openai', gatewayProfileId: 'gw_profile_1' })
164
+
165
+ const { session: updated } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
166
+
167
+ assert.equal(
168
+ updated.gatewayProfileId,
169
+ 'gw_profile_1',
170
+ 'gatewayProfileId syncs from route even across provider switch',
171
+ )
172
+ })
173
+
174
+ it('no route: session inherits credentialId and apiEndpoint from agent when unset', () => {
175
+ const session = makeSession({
176
+ provider: 'openai',
177
+ model: 'gpt-4o',
178
+ credentialId: undefined,
179
+ apiEndpoint: undefined,
180
+ })
181
+ const agent = makeAgent({
182
+ provider: 'openai',
183
+ credentialId: 'cred_agent',
184
+ apiEndpoint: 'https://custom.openai.example/v1',
185
+ })
186
+
187
+ const { session: updated } = applyAgentSyncToSession(session, agent, null, SESSION_ID)
188
+
189
+ assert.equal(updated.credentialId, 'cred_agent', 'credentialId filled from agent')
190
+ assert.equal(
191
+ updated.apiEndpoint,
192
+ 'https://custom.openai.example/v1',
193
+ 'apiEndpoint filled from agent',
194
+ )
195
+ })
196
+
197
+ it('tool/extension selection syncs from agent when session has no parent', () => {
198
+ const session = makeSession({
199
+ parentSessionId: null,
200
+ tools: ['old_tool'],
201
+ extensions: ['old_ext'],
202
+ })
203
+ const agent = makeAgent({
204
+ tools: ['new_tool_a', 'new_tool_b'],
205
+ extensions: ['new_ext'],
206
+ })
207
+
208
+ const { session: updated, changed } = applyAgentSyncToSession(session, agent, null, SESSION_ID)
209
+
210
+ assert.equal(changed, true)
211
+ assert.deepEqual(updated.tools, ['new_tool_a', 'new_tool_b'])
212
+ assert.deepEqual(updated.extensions, ['new_ext'])
213
+ })
214
+
215
+ it('tool/extension selection does NOT sync on child (delegated) sessions', () => {
216
+ const session = makeSession({
217
+ parentSessionId: 'parent_session',
218
+ tools: ['child_tool'],
219
+ extensions: ['child_ext'],
220
+ })
221
+ const agent = makeAgent({
222
+ tools: ['agent_tool'],
223
+ extensions: ['agent_ext'],
224
+ })
225
+
226
+ const { session: updated } = applyAgentSyncToSession(session, agent, null, SESSION_ID)
227
+
228
+ assert.deepEqual(updated.tools, ['child_tool'], 'child session tools preserved')
229
+ assert.deepEqual(updated.extensions, ['child_ext'], 'child session extensions preserved')
230
+ })
231
+
232
+ it('idempotent: repeated sync with same inputs yields changed=false', () => {
233
+ const session = makeSession({
234
+ provider: 'openai',
235
+ model: 'gpt-4o',
236
+ credentialId: 'cred_openai',
237
+ fallbackCredentialIds: [],
238
+ apiEndpoint: null,
239
+ gatewayProfileId: null,
240
+ tools: [],
241
+ extensions: [],
242
+ parentSessionId: null,
243
+ memoryScopeMode: null,
244
+ })
245
+ const agent = makeAgent({ tools: [], extensions: [] })
246
+ const route = makeRoute()
247
+
248
+ applyAgentSyncToSession(session, agent, route, SESSION_ID)
249
+ const { changed } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
250
+
251
+ assert.equal(changed, false, 'second sync makes no changes')
252
+ })
253
+ })
@@ -3,7 +3,7 @@ import os from 'os'
3
3
 
4
4
  import { log } from '@/lib/server/logger'
5
5
  import { getProvider } from '@/lib/providers'
6
- import type { ExecutionBrief, Message, Session } from '@/types'
6
+ import type { Agent, ExecutionBrief, Message, Session } from '@/types'
7
7
  import {
8
8
  decryptKey,
9
9
  loadCredentials,
@@ -29,6 +29,7 @@ import {
29
29
  import {
30
30
  applyResolvedRoute,
31
31
  resolvePrimaryAgentRoute,
32
+ type ResolvedAgentRoute,
32
33
  } from '@/lib/server/agents/agent-runtime-config'
33
34
  import {
34
35
  runCapabilityBeforeMessageWrite,
@@ -190,17 +191,13 @@ function joinSystemPromptBlocks(...blocks: Array<string | null | undefined>): st
190
191
  return joined || undefined
191
192
  }
192
193
 
193
- function syncSessionFromAgent(sessionId: string): void {
194
- const session = getSession(sessionId)
195
- if (!session?.agentId) return
196
- const agent = getAgent(session.agentId)
197
- if (!agent) return
198
-
194
+ export function applyAgentSyncToSession(
195
+ session: Session,
196
+ agent: Agent,
197
+ route: ResolvedAgentRoute | null,
198
+ sessionId: string,
199
+ ): { session: Session; changed: boolean } {
199
200
  let changed = false
200
- const route = resolvePrimaryAgentRoute(agent, undefined, {
201
- preferredGatewayTags: session.routePreferredGatewayTags || [],
202
- preferredGatewayUseCase: session.routePreferredGatewayUseCase || null,
203
- })
204
201
  if (!session.provider && agent.provider) { session.provider = agent.provider; changed = true }
205
202
  if ((session.model === undefined || session.model === null || session.model === '') && agent.model !== undefined) {
206
203
  session.model = agent.model
@@ -208,19 +205,24 @@ function syncSessionFromAgent(sessionId: string): void {
208
205
  }
209
206
  if (route) {
210
207
  const resolved = applyResolvedRoute({ ...session }, route)
211
- if (session.provider !== resolved.provider) { session.provider = resolved.provider; changed = true }
212
- if (session.model !== resolved.model) { session.model = resolved.model; changed = true }
213
- if ((session.credentialId || null) !== (resolved.credentialId || null)) {
214
- session.credentialId = resolved.credentialId ?? null
215
- changed = true
216
- }
217
- if (JSON.stringify(session.fallbackCredentialIds || []) !== JSON.stringify(resolved.fallbackCredentialIds || [])) {
218
- session.fallbackCredentialIds = [...(resolved.fallbackCredentialIds || [])]
219
- changed = true
220
- }
221
- if ((session.apiEndpoint || null) !== (resolved.apiEndpoint || null)) {
222
- session.apiEndpoint = resolved.apiEndpoint ?? null
223
- changed = true
208
+ // Do NOT sync provider/model from the route here the user may have manually
209
+ // switched the session model, and we must preserve that choice.
210
+ // Provider/model are initialized from the route at session-creation time only.
211
+ // Only sync credentials/endpoint when the session's provider still matches the
212
+ // route's provider — if the user switched providers, leave their credential alone.
213
+ if (session.provider === resolved.provider) {
214
+ if ((session.credentialId || null) !== (resolved.credentialId || null)) {
215
+ session.credentialId = resolved.credentialId ?? null
216
+ changed = true
217
+ }
218
+ if (JSON.stringify(session.fallbackCredentialIds || []) !== JSON.stringify(resolved.fallbackCredentialIds || [])) {
219
+ session.fallbackCredentialIds = [...(resolved.fallbackCredentialIds || [])]
220
+ changed = true
221
+ }
222
+ if ((session.apiEndpoint || null) !== (resolved.apiEndpoint || null)) {
223
+ session.apiEndpoint = resolved.apiEndpoint ?? null
224
+ changed = true
225
+ }
224
226
  }
225
227
  if ((session.gatewayProfileId || null) !== (resolved.gatewayProfileId || null)) {
226
228
  session.gatewayProfileId = resolved.gatewayProfileId ?? null
@@ -297,9 +299,21 @@ function syncSessionFromAgent(sessionId: string): void {
297
299
  changed = true
298
300
  }
299
301
  }
302
+ return { session, changed }
303
+ }
300
304
 
305
+ export function syncSessionFromAgent(sessionId: string): void {
306
+ const session = getSession(sessionId)
307
+ if (!session?.agentId) return
308
+ const agent = getAgent(session.agentId)
309
+ if (!agent) return
310
+ const route = resolvePrimaryAgentRoute(agent, undefined, {
311
+ preferredGatewayTags: session.routePreferredGatewayTags || [],
312
+ preferredGatewayUseCase: session.routePreferredGatewayUseCase || null,
313
+ })
314
+ const { session: updated, changed } = applyAgentSyncToSession(session, agent, route, sessionId)
301
315
  if (changed) {
302
- saveSession(sessionId, session)
316
+ saveSession(sessionId, updated)
303
317
  }
304
318
  }
305
319
 
@@ -624,9 +638,15 @@ export async function prepareChatTurn(input: ExecuteChatTurnInput): Promise<Prep
624
638
  preferredGatewayTags: session.routePreferredGatewayTags || [],
625
639
  preferredGatewayUseCase: session.routePreferredGatewayUseCase || null,
626
640
  })
627
- if (preferredRoute) {
641
+ if (preferredRoute && sessionForRun.provider === preferredRoute.provider) {
642
+ // Apply route for credentials/endpoint/gateway, but preserve the user's
643
+ // manually-selected model — only sync infra, not the model choice.
644
+ const savedModel = sessionForRun.model
628
645
  sessionForRun = applyResolvedRoute({ ...sessionForRun }, preferredRoute)
646
+ sessionForRun = { ...sessionForRun, model: savedModel }
629
647
  }
648
+ // If the user has manually switched to a different provider, skip the route
649
+ // entirely — the session already has the correct provider/model/credential.
630
650
  }
631
651
  let effectiveMessage = message
632
652
 
@@ -218,13 +218,17 @@ export interface ClassifyMessageInput {
218
218
  history?: Message[]
219
219
  }
220
220
 
221
- // Timeout sized for Ollama Cloud with a fully-configured agent: observed
222
- // classifier calls in the 4-6 s range during live testing, plus the expanded
223
- // 4-flag semantic schema requires a slightly larger JSON output. 10 s
224
- // accommodates the tail without blocking chat turns for long on a total
225
- // failure. Result is cached per-message so the latency tax only applies to
226
- // first-seen messages.
227
- const CLASSIFIER_TIMEOUT_MS = 10_000
221
+ // Timeout sized for Ollama Cloud with a fully-configured agent. Observed
222
+ // classifier calls in the 4-6s range during live testing, with cloud
223
+ // providers (Ollama Cloud, OpenRouter) routinely tipping over the 10s
224
+ // boundary on a cold cache. SC_CLASSIFIER_TIMEOUT_MS overrides for users
225
+ // running consistently slow providers; default raised to 20s so the cloud
226
+ // path actually completes.
227
+ const DEFAULT_CLASSIFIER_TIMEOUT_MS = 20_000
228
+ const CLASSIFIER_TIMEOUT_MS = (() => {
229
+ const raw = Number(process.env.SC_CLASSIFIER_TIMEOUT_MS)
230
+ return Number.isFinite(raw) && raw > 0 ? Math.trunc(raw) : DEFAULT_CLASSIFIER_TIMEOUT_MS
231
+ })()
228
232
 
229
233
  /**
230
234
  * Classify a user message using a single LLM call.
@@ -0,0 +1,85 @@
1
+ import assert from 'node:assert/strict'
2
+ import { describe, it } from 'node:test'
3
+
4
+ import { stripLeakedClassificationJson } from './post-stream-finalization'
5
+
6
+ // A fully-valid MessageClassification serialized by the model. Mirrors the
7
+ // real output we observed during a live delegation turn.
8
+ const VALID_LEAK = JSON.stringify({
9
+ taskIntent: 'research',
10
+ isDeliverableTask: false,
11
+ isBroadGoal: false,
12
+ isLightweightDirectChat: true,
13
+ hasHumanSignals: false,
14
+ hasSignificantEvent: false,
15
+ isResearchSynthesis: false,
16
+ workType: 'general',
17
+ explicitToolRequests: [],
18
+ confidence: 0.95,
19
+ })
20
+
21
+ describe('stripLeakedClassificationJson', () => {
22
+ it('strips a leaked classification JSON that starts with taskIntent', () => {
23
+ const input = `${VALID_LEAK}Task created and delegated.`
24
+ const { cleaned, stripped } = stripLeakedClassificationJson(input)
25
+ assert.equal(stripped, true)
26
+ assert.equal(cleaned, 'Task created and delegated.')
27
+ })
28
+
29
+ it('strips when the leak appears mid-response', () => {
30
+ const input = `Here you go: ${VALID_LEAK} continuing.`
31
+ const { cleaned, stripped } = stripLeakedClassificationJson(input)
32
+ assert.equal(stripped, true)
33
+ assert.equal(cleaned.includes('taskIntent'), false)
34
+ })
35
+
36
+ it('leaves normal assistant text untouched', () => {
37
+ const input = 'Your favorite color is blue.'
38
+ const { cleaned, stripped } = stripLeakedClassificationJson(input)
39
+ assert.equal(stripped, false)
40
+ assert.equal(cleaned, input)
41
+ })
42
+
43
+ it('leaves a partial or unrelated JSON object alone', () => {
44
+ // A bare object with one classifier-adjacent key but not the full shape
45
+ // must NOT be stripped — the zod schema rejects it.
46
+ const input = 'Prefix text. {"workType": "coding"} suffix.'
47
+ const { cleaned, stripped } = stripLeakedClassificationJson(input)
48
+ assert.equal(stripped, false)
49
+ assert.equal(cleaned, input)
50
+ })
51
+
52
+ it('ignores malformed JSON that looks like a classifier leak', () => {
53
+ const input = 'Malformed {"taskIntent": "research", "isDeliverableTask": [oops suffix.'
54
+ const { cleaned, stripped } = stripLeakedClassificationJson(input)
55
+ assert.equal(stripped, false)
56
+ assert.equal(cleaned, input)
57
+ })
58
+
59
+ it('does not confuse braces inside strings', () => {
60
+ const input = `Before {"label": "{not json}", ${VALID_LEAK.slice(1)} after`
61
+ const { cleaned, stripped } = stripLeakedClassificationJson(input)
62
+ assert.equal(stripped, true)
63
+ assert.equal(cleaned.includes('taskIntent'), false)
64
+ })
65
+
66
+ it('rejects a classifier-like object with an invalid enum value', () => {
67
+ // taskIntent must be one of the TaskIntent enum values. Garbage value is
68
+ // rejected by safeParse so no stripping happens.
69
+ const invalid = JSON.stringify({
70
+ taskIntent: 'totally-made-up-intent',
71
+ isDeliverableTask: false,
72
+ isBroadGoal: false,
73
+ hasHumanSignals: false,
74
+ hasSignificantEvent: false,
75
+ isResearchSynthesis: false,
76
+ workType: 'general',
77
+ explicitToolRequests: [],
78
+ confidence: 0.5,
79
+ })
80
+ const input = `${invalid} not a real leak`
81
+ const { cleaned, stripped } = stripLeakedClassificationJson(input)
82
+ assert.equal(stripped, false)
83
+ assert.equal(cleaned, input)
84
+ })
85
+ })
@@ -18,32 +18,57 @@ import { runCapabilityHook } from '@/lib/server/native-capabilities'
18
18
  import {
19
19
  shouldForceExternalServiceSummary,
20
20
  } from '@/lib/server/chat-execution/chat-streaming-utils'
21
- import type { MessageClassification } from '@/lib/server/chat-execution/message-classifier'
21
+ import {
22
+ MessageClassificationSchema,
23
+ type MessageClassification,
24
+ } from '@/lib/server/chat-execution/message-classifier'
22
25
  import {
23
26
  resolveFinalStreamResponseText,
24
27
  } from '@/lib/server/chat-execution/stream-continuation'
25
28
  import { buildForcedExternalServiceSummary } from '@/lib/server/chat-execution/prompt-builder'
26
29
 
27
30
  // ---------------------------------------------------------------------------
28
- // Classification JSON leak detection — strips `{ "isDeliverableTask": ... }`
29
- // objects that some models echo verbatim into their response text.
31
+ // Classification JSON leak detection — strips MessageClassification objects
32
+ // that some models echo verbatim into their response text. Candidate JSON
33
+ // substrings are found by brace-matching, then validated against the actual
34
+ // MessageClassificationSchema — the single source of truth for what a
35
+ // classifier object looks like.
30
36
  // ---------------------------------------------------------------------------
31
37
 
32
- const CLASSIFICATION_LEAK_RE = /\{\s*"isDeliverableTask"\s*:/
33
-
34
- function stripLeakedClassificationJson(text: string): { cleaned: string; stripped: boolean } {
35
- const match = CLASSIFICATION_LEAK_RE.exec(text)
36
- if (!match || match.index === undefined) return { cleaned: text, stripped: false }
37
- const startIdx = match.index
38
+ /** Returns the index just past the balanced `}` for the `{` at `start`, or -1. */
39
+ function findBalancedObjectEnd(text: string, start: number): number {
38
40
  let depth = 0
39
- let end = -1
40
- for (let i = startIdx; i < text.length; i++) {
41
- if (text[i] === '{') depth++
42
- else if (text[i] === '}') { depth--; if (depth === 0) { end = i + 1; break } }
41
+ let inString = false
42
+ let escape = false
43
+ for (let i = start; i < text.length; i++) {
44
+ const ch = text[i]
45
+ if (escape) { escape = false; continue }
46
+ if (inString) {
47
+ if (ch === '\\') escape = true
48
+ else if (ch === '"') inString = false
49
+ continue
50
+ }
51
+ if (ch === '"') inString = true
52
+ else if (ch === '{') depth += 1
53
+ else if (ch === '}') {
54
+ depth -= 1
55
+ if (depth === 0) return i + 1
56
+ }
57
+ }
58
+ return -1
59
+ }
60
+
61
+ export function stripLeakedClassificationJson(text: string): { cleaned: string; stripped: boolean } {
62
+ for (let i = text.indexOf('{'); i !== -1; i = text.indexOf('{', i + 1)) {
63
+ const end = findBalancedObjectEnd(text, i)
64
+ if (end === -1) break
65
+ let parsed: unknown
66
+ try { parsed = JSON.parse(text.slice(i, end)) } catch { continue }
67
+ if (!MessageClassificationSchema.safeParse(parsed).success) continue
68
+ log.warn(TAG, 'Stripped leaked classification JSON from model output')
69
+ return { cleaned: (text.slice(0, i) + text.slice(end)).trimStart(), stripped: true }
43
70
  }
44
- if (end === -1) return { cleaned: text, stripped: false }
45
- log.warn(TAG, 'Stripped leaked classification JSON from model output')
46
- return { cleaned: (text.slice(0, startIdx) + text.slice(end)).trimStart(), stripped: true }
71
+ return { cleaned: text, stripped: false }
47
72
  }
48
73
 
49
74
  // StreamAgentChatResult is defined inline to avoid circular dependency with stream-agent-chat.ts
@@ -78,7 +78,8 @@ describe('response-completeness', () => {
78
78
  toolCallCount: 0,
79
79
  },
80
80
  {
81
- generateText: () => new Promise((resolve) => setTimeout(() => resolve('{"isIncomplete": true, "confidence": 0.9}'), 5000)),
81
+ timeoutMs: 50,
82
+ generateText: () => new Promise((resolve) => setTimeout(() => resolve('{"isIncomplete": true, "confidence": 0.9}'), 500)),
82
83
  },
83
84
  )
84
85
  assert.equal(result, null)
@@ -153,7 +153,11 @@ export interface EvaluateCompletenessInput {
153
153
  toolCallCount: number
154
154
  }
155
155
 
156
- const COMPLETENESS_TIMEOUT_MS = 2_000
156
+ const DEFAULT_COMPLETENESS_TIMEOUT_MS = 8_000
157
+ const COMPLETENESS_TIMEOUT_MS = (() => {
158
+ const raw = Number(process.env.SC_COMPLETENESS_TIMEOUT_MS)
159
+ return Number.isFinite(raw) && raw > 0 ? Math.trunc(raw) : DEFAULT_COMPLETENESS_TIMEOUT_MS
160
+ })()
157
161
 
158
162
  /**
159
163
  * Evaluate whether an agent response is incomplete — i.e. the agent described
@@ -164,7 +168,7 @@ const COMPLETENESS_TIMEOUT_MS = 2_000
164
168
  */
165
169
  export async function evaluateResponseCompleteness(
166
170
  input: EvaluateCompletenessInput,
167
- options?: { generateText?: (prompt: string) => Promise<string> },
171
+ options?: { generateText?: (prompt: string) => Promise<string>; timeoutMs?: number },
168
172
  ): Promise<ResponseCompleteness | null> {
169
173
  const response = input.response.trim()
170
174
  if (!response) return null
@@ -175,6 +179,10 @@ export async function evaluateResponseCompleteness(
175
179
 
176
180
  const prompt = buildCompletenessPrompt(input.message, response, input.toolCallCount)
177
181
 
182
+ const timeoutMs = typeof options?.timeoutMs === 'number' && options.timeoutMs > 0
183
+ ? options.timeoutMs
184
+ : COMPLETENESS_TIMEOUT_MS
185
+
178
186
  const startMs = Date.now()
179
187
  try {
180
188
  const responseText = await Promise.race([
@@ -189,7 +197,7 @@ export async function evaluateResponseCompleteness(
189
197
  return extractModelText(result.content)
190
198
  })(),
191
199
  new Promise<never>((_, reject) =>
192
- setTimeout(() => reject(new Error('completeness-timeout')), COMPLETENESS_TIMEOUT_MS),
200
+ setTimeout(() => reject(new Error('completeness-timeout')), timeoutMs),
193
201
  ),
194
202
  ])
195
203
 
@@ -0,0 +1,54 @@
1
+ import assert from 'node:assert/strict'
2
+ import { test } from 'node:test'
3
+ import { stripAgentReactionTokens } from '@/lib/server/chatrooms/chatroom-agent-signals'
4
+
5
+ test('stripAgentReactionTokens removes single REACTION marker glued to text', () => {
6
+ const input = '11 plus 7 equals 18.\n[REACTION]{"emoji": "✅", "to": "c93bb040"}'
7
+ const out = stripAgentReactionTokens(input)
8
+ assert.equal(out, '11 plus 7 equals 18.')
9
+ })
10
+
11
+ test('stripAgentReactionTokens removes multiple REACTION markers', () => {
12
+ const input = 'Sure thing.[REACTION]{"emoji":"👍","to":"a1"} Got it.[REACTION]{"emoji":"🎉","to":"b2"}'
13
+ const out = stripAgentReactionTokens(input)
14
+ assert.ok(!out.includes('[REACTION]'))
15
+ assert.ok(out.includes('Sure thing.'))
16
+ assert.ok(out.includes('Got it.'))
17
+ })
18
+
19
+ test('stripAgentReactionTokens preserves text without markers', () => {
20
+ const input = 'Just a normal message with no markers.'
21
+ const out = stripAgentReactionTokens(input)
22
+ assert.equal(out, input)
23
+ })
24
+
25
+ test('stripAgentReactionTokens handles empty string', () => {
26
+ assert.equal(stripAgentReactionTokens(''), '')
27
+ })
28
+
29
+ test('stripAgentReactionTokens collapses excess whitespace from removed lines', () => {
30
+ const input = 'Line 1\n\n\n[REACTION]{"emoji":"✅","to":"x"}\n\n\nLine 2'
31
+ const out = stripAgentReactionTokens(input)
32
+ assert.ok(!out.includes('[REACTION]'))
33
+ assert.ok(out.includes('Line 1'))
34
+ assert.ok(out.includes('Line 2'))
35
+ assert.ok(!out.match(/\n{3,}/), 'no triple newlines')
36
+ })
37
+
38
+ test('stripAgentReactionTokens leaves [REACTION] intact when JSON is invalid (incomplete payload)', () => {
39
+ const input = 'Sure.[REACTION]{"emoji":"👍"} Done.'
40
+ const out = stripAgentReactionTokens(input)
41
+ assert.ok(out.includes('[REACTION]'), 'invalid reaction (missing "to") preserved verbatim')
42
+ })
43
+
44
+ test('stripAgentReactionTokens leaves [REACTION] intact when followed by non-JSON', () => {
45
+ const input = 'Notes: [REACTION] is a label, not a marker.'
46
+ const out = stripAgentReactionTokens(input)
47
+ assert.equal(out, input)
48
+ })
49
+
50
+ test('stripAgentReactionTokens handles nested JSON without truncation', () => {
51
+ const input = 'Tags applied.[REACTION]{"emoji":"🏷️","to":"abc","meta":{"reason":"label"}}'
52
+ const out = stripAgentReactionTokens(input)
53
+ assert.equal(out, 'Tags applied.')
54
+ })