@swarmclawai/swarmclaw 1.5.53 → 1.5.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -3
- package/package.json +2 -2
- package/src/app/api/agents/[id]/route.ts +14 -2
- package/src/app/api/agents/agents-route.test.ts +65 -1
- package/src/app/api/chatrooms/[id]/chat/route.ts +5 -3
- package/src/app/api/chatrooms/route.ts +3 -0
- package/src/app/api/missions/[id]/control/route.ts +21 -0
- package/src/app/api/missions/templates/[id]/instantiate/route.ts +64 -0
- package/src/app/api/missions/templates/route.ts +8 -0
- package/src/app/api/tasks/[id]/route.ts +11 -1
- package/src/app/api/tasks/tasks-route.test.ts +81 -0
- package/src/app/api/webhooks/[id]/route.ts +18 -15
- package/src/app/missions/page.tsx +135 -22
- package/src/cli/index.js +2 -0
- package/src/cli/spec.js +2 -0
- package/src/components/missions/mission-edit-sheet.tsx +319 -0
- package/src/components/missions/mission-template-gallery.tsx +113 -0
- package/src/components/missions/mission-template-install-dialog.tsx +283 -0
- package/src/lib/server/agents/agent-service.ts +10 -2
- package/src/lib/server/agents/main-agent-loop-advanced.test.ts +36 -0
- package/src/lib/server/agents/main-agent-loop.ts +111 -4
- package/src/lib/server/chat-execution/chat-turn-preparation.test.ts +253 -0
- package/src/lib/server/chat-execution/chat-turn-preparation.ts +46 -26
- package/src/lib/server/chat-execution/message-classifier.ts +11 -7
- package/src/lib/server/chat-execution/post-stream-finalization.test.ts +85 -0
- package/src/lib/server/chat-execution/post-stream-finalization.ts +41 -16
- package/src/lib/server/chat-execution/response-completeness.test.ts +2 -1
- package/src/lib/server/chat-execution/response-completeness.ts +11 -3
- package/src/lib/server/chatrooms/chatroom-agent-signals.test.ts +54 -0
- package/src/lib/server/chatrooms/chatroom-agent-signals.ts +105 -9
- package/src/lib/server/chats/chat-session-service.ts +11 -0
- package/src/lib/server/connectors/email.test.ts +64 -0
- package/src/lib/server/connectors/email.ts +35 -6
- package/src/lib/server/connectors/response-media.ts +1 -0
- package/src/lib/server/daemon/daemon-runtime.ts +31 -19
- package/src/lib/server/memory/memory-db.test.ts +8 -0
- package/src/lib/server/memory/memory-db.ts +1 -1
- package/src/lib/server/missions/mission-service.ts +47 -1
- package/src/lib/server/missions/mission-templates.test.ts +208 -0
- package/src/lib/server/missions/mission-templates.ts +186 -0
- package/src/lib/server/runtime/session-run-manager/drain.ts +16 -0
- package/src/lib/server/storage-normalization.ts +6 -0
- package/src/lib/server/storage.ts +1 -1
- package/src/lib/server/tasks/task-validation.test.ts +30 -0
- package/src/lib/server/tasks/task-validation.ts +21 -2
- package/src/lib/server/working-state/normalization.ts +5 -1
- package/src/lib/validation/schemas.ts +40 -0
- package/src/types/mission.ts +27 -0
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import assert from 'node:assert/strict'
|
|
2
|
+
import { describe, it } from 'node:test'
|
|
3
|
+
import type { Agent, Session } from '@/types'
|
|
4
|
+
import type { ResolvedAgentRoute } from '@/lib/server/agents/agent-runtime-config'
|
|
5
|
+
import { applyAgentSyncToSession } from './chat-turn-preparation'
|
|
6
|
+
|
|
7
|
+
const SESSION_ID = 'sess_test_1'
|
|
8
|
+
const AGENT_ID = 'agent_test_1'
|
|
9
|
+
|
|
10
|
+
function makeAgent(overrides: Partial<Agent> = {}): Agent {
|
|
11
|
+
return {
|
|
12
|
+
id: AGENT_ID,
|
|
13
|
+
name: 'Test Agent',
|
|
14
|
+
description: '',
|
|
15
|
+
systemPrompt: '',
|
|
16
|
+
provider: 'openai',
|
|
17
|
+
model: 'gpt-4o',
|
|
18
|
+
credentialId: 'cred_openai',
|
|
19
|
+
apiEndpoint: null,
|
|
20
|
+
...overrides,
|
|
21
|
+
} as Agent
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function makeSession(overrides: Partial<Session> = {}): Session {
|
|
25
|
+
return {
|
|
26
|
+
id: SESSION_ID,
|
|
27
|
+
name: 'Chat',
|
|
28
|
+
cwd: '/tmp',
|
|
29
|
+
user: 'test',
|
|
30
|
+
provider: 'openai',
|
|
31
|
+
model: 'gpt-4o',
|
|
32
|
+
claudeSessionId: null,
|
|
33
|
+
messages: [],
|
|
34
|
+
createdAt: 0,
|
|
35
|
+
lastActiveAt: 0,
|
|
36
|
+
agentId: AGENT_ID,
|
|
37
|
+
...overrides,
|
|
38
|
+
} as Session
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function makeRoute(overrides: Partial<ResolvedAgentRoute> = {}): ResolvedAgentRoute {
|
|
42
|
+
return {
|
|
43
|
+
id: 'route_1',
|
|
44
|
+
label: 'primary',
|
|
45
|
+
provider: 'openai',
|
|
46
|
+
model: 'gpt-4o',
|
|
47
|
+
credentialId: 'cred_openai',
|
|
48
|
+
fallbackCredentialIds: [],
|
|
49
|
+
apiEndpoint: null,
|
|
50
|
+
gatewayProfileId: null,
|
|
51
|
+
priority: 1,
|
|
52
|
+
source: 'agent',
|
|
53
|
+
...overrides,
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
describe('applyAgentSyncToSession — user-selected provider/model preservation', () => {
|
|
58
|
+
it('session with matching provider/model inherits credentials from route (baseline)', () => {
|
|
59
|
+
const session = makeSession({ provider: 'openai', model: 'gpt-4o', credentialId: null })
|
|
60
|
+
const agent = makeAgent()
|
|
61
|
+
const route = makeRoute({ credentialId: 'cred_openai' })
|
|
62
|
+
|
|
63
|
+
const { session: updated, changed } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
|
|
64
|
+
|
|
65
|
+
assert.equal(changed, true)
|
|
66
|
+
assert.equal(updated.provider, 'openai', 'provider unchanged')
|
|
67
|
+
assert.equal(updated.model, 'gpt-4o', 'model unchanged')
|
|
68
|
+
assert.equal(updated.credentialId, 'cred_openai', 'credential synced from route')
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it('user-switched provider is preserved even when agent/route disagree', () => {
|
|
72
|
+
const session = makeSession({
|
|
73
|
+
provider: 'anthropic',
|
|
74
|
+
model: 'claude-opus-4-7',
|
|
75
|
+
credentialId: 'cred_anthropic',
|
|
76
|
+
apiEndpoint: 'https://api.anthropic.com',
|
|
77
|
+
})
|
|
78
|
+
const agent = makeAgent({ provider: 'openai', model: 'gpt-4o' })
|
|
79
|
+
const route = makeRoute({ provider: 'openai', model: 'gpt-4o', credentialId: 'cred_openai' })
|
|
80
|
+
|
|
81
|
+
const { session: updated } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
|
|
82
|
+
|
|
83
|
+
assert.equal(updated.provider, 'anthropic', 'user-switched provider preserved')
|
|
84
|
+
assert.equal(updated.model, 'claude-opus-4-7', 'user-switched model preserved')
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
it('user-switched provider keeps its credentials (does not rewrite from route)', () => {
|
|
88
|
+
const session = makeSession({
|
|
89
|
+
provider: 'anthropic',
|
|
90
|
+
model: 'claude-opus-4-7',
|
|
91
|
+
credentialId: 'cred_anthropic',
|
|
92
|
+
fallbackCredentialIds: ['cred_anthropic_backup'],
|
|
93
|
+
apiEndpoint: 'https://api.anthropic.com/v1',
|
|
94
|
+
})
|
|
95
|
+
const agent = makeAgent({ provider: 'openai', credentialId: 'cred_openai' })
|
|
96
|
+
const route = makeRoute({
|
|
97
|
+
provider: 'openai',
|
|
98
|
+
credentialId: 'cred_openai',
|
|
99
|
+
fallbackCredentialIds: ['cred_openai_backup'],
|
|
100
|
+
apiEndpoint: 'https://api.openai.com/v1',
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
const { session: updated } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
|
|
104
|
+
|
|
105
|
+
assert.equal(updated.credentialId, 'cred_anthropic', 'credentialId not rewritten')
|
|
106
|
+
assert.deepEqual(
|
|
107
|
+
updated.fallbackCredentialIds,
|
|
108
|
+
['cred_anthropic_backup'],
|
|
109
|
+
'fallbackCredentialIds not rewritten',
|
|
110
|
+
)
|
|
111
|
+
assert.equal(
|
|
112
|
+
updated.apiEndpoint,
|
|
113
|
+
'https://api.anthropic.com/v1',
|
|
114
|
+
'apiEndpoint not rewritten',
|
|
115
|
+
)
|
|
116
|
+
})
|
|
117
|
+
|
|
118
|
+
it('user-switched model (same provider) keeps its credentials and model', () => {
|
|
119
|
+
const session = makeSession({
|
|
120
|
+
provider: 'openai',
|
|
121
|
+
model: 'gpt-4o-mini',
|
|
122
|
+
credentialId: 'cred_openai_user',
|
|
123
|
+
})
|
|
124
|
+
const agent = makeAgent({ provider: 'openai', model: 'gpt-4o', credentialId: 'cred_openai' })
|
|
125
|
+
const route = makeRoute({
|
|
126
|
+
provider: 'openai',
|
|
127
|
+
model: 'gpt-4o',
|
|
128
|
+
credentialId: 'cred_openai',
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
const { session: updated } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
|
|
132
|
+
|
|
133
|
+
assert.equal(updated.model, 'gpt-4o-mini', 'user-switched model preserved')
|
|
134
|
+
assert.equal(
|
|
135
|
+
updated.credentialId,
|
|
136
|
+
'cred_openai',
|
|
137
|
+
'same-provider credential does sync from route',
|
|
138
|
+
)
|
|
139
|
+
})
|
|
140
|
+
|
|
141
|
+
it('empty session.provider inherits from agent', () => {
|
|
142
|
+
const session = makeSession({
|
|
143
|
+
provider: '' as Session['provider'],
|
|
144
|
+
model: '',
|
|
145
|
+
credentialId: null,
|
|
146
|
+
})
|
|
147
|
+
const agent = makeAgent({ provider: 'openai', model: 'gpt-4o' })
|
|
148
|
+
|
|
149
|
+
const { session: updated, changed } = applyAgentSyncToSession(session, agent, null, SESSION_ID)
|
|
150
|
+
|
|
151
|
+
assert.equal(changed, true)
|
|
152
|
+
assert.equal(updated.provider, 'openai', 'provider initialized from agent')
|
|
153
|
+
assert.equal(updated.model, 'gpt-4o', 'model initialized from agent')
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
it('gatewayProfileId syncs from route regardless of provider switch', () => {
|
|
157
|
+
const session = makeSession({
|
|
158
|
+
provider: 'anthropic',
|
|
159
|
+
model: 'claude-opus-4-7',
|
|
160
|
+
gatewayProfileId: null,
|
|
161
|
+
})
|
|
162
|
+
const agent = makeAgent({ provider: 'openai' })
|
|
163
|
+
const route = makeRoute({ provider: 'openai', gatewayProfileId: 'gw_profile_1' })
|
|
164
|
+
|
|
165
|
+
const { session: updated } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
|
|
166
|
+
|
|
167
|
+
assert.equal(
|
|
168
|
+
updated.gatewayProfileId,
|
|
169
|
+
'gw_profile_1',
|
|
170
|
+
'gatewayProfileId syncs from route even across provider switch',
|
|
171
|
+
)
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
it('no route: session inherits credentialId and apiEndpoint from agent when unset', () => {
|
|
175
|
+
const session = makeSession({
|
|
176
|
+
provider: 'openai',
|
|
177
|
+
model: 'gpt-4o',
|
|
178
|
+
credentialId: undefined,
|
|
179
|
+
apiEndpoint: undefined,
|
|
180
|
+
})
|
|
181
|
+
const agent = makeAgent({
|
|
182
|
+
provider: 'openai',
|
|
183
|
+
credentialId: 'cred_agent',
|
|
184
|
+
apiEndpoint: 'https://custom.openai.example/v1',
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
const { session: updated } = applyAgentSyncToSession(session, agent, null, SESSION_ID)
|
|
188
|
+
|
|
189
|
+
assert.equal(updated.credentialId, 'cred_agent', 'credentialId filled from agent')
|
|
190
|
+
assert.equal(
|
|
191
|
+
updated.apiEndpoint,
|
|
192
|
+
'https://custom.openai.example/v1',
|
|
193
|
+
'apiEndpoint filled from agent',
|
|
194
|
+
)
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
it('tool/extension selection syncs from agent when session has no parent', () => {
|
|
198
|
+
const session = makeSession({
|
|
199
|
+
parentSessionId: null,
|
|
200
|
+
tools: ['old_tool'],
|
|
201
|
+
extensions: ['old_ext'],
|
|
202
|
+
})
|
|
203
|
+
const agent = makeAgent({
|
|
204
|
+
tools: ['new_tool_a', 'new_tool_b'],
|
|
205
|
+
extensions: ['new_ext'],
|
|
206
|
+
})
|
|
207
|
+
|
|
208
|
+
const { session: updated, changed } = applyAgentSyncToSession(session, agent, null, SESSION_ID)
|
|
209
|
+
|
|
210
|
+
assert.equal(changed, true)
|
|
211
|
+
assert.deepEqual(updated.tools, ['new_tool_a', 'new_tool_b'])
|
|
212
|
+
assert.deepEqual(updated.extensions, ['new_ext'])
|
|
213
|
+
})
|
|
214
|
+
|
|
215
|
+
it('tool/extension selection does NOT sync on child (delegated) sessions', () => {
|
|
216
|
+
const session = makeSession({
|
|
217
|
+
parentSessionId: 'parent_session',
|
|
218
|
+
tools: ['child_tool'],
|
|
219
|
+
extensions: ['child_ext'],
|
|
220
|
+
})
|
|
221
|
+
const agent = makeAgent({
|
|
222
|
+
tools: ['agent_tool'],
|
|
223
|
+
extensions: ['agent_ext'],
|
|
224
|
+
})
|
|
225
|
+
|
|
226
|
+
const { session: updated } = applyAgentSyncToSession(session, agent, null, SESSION_ID)
|
|
227
|
+
|
|
228
|
+
assert.deepEqual(updated.tools, ['child_tool'], 'child session tools preserved')
|
|
229
|
+
assert.deepEqual(updated.extensions, ['child_ext'], 'child session extensions preserved')
|
|
230
|
+
})
|
|
231
|
+
|
|
232
|
+
it('idempotent: repeated sync with same inputs yields changed=false', () => {
|
|
233
|
+
const session = makeSession({
|
|
234
|
+
provider: 'openai',
|
|
235
|
+
model: 'gpt-4o',
|
|
236
|
+
credentialId: 'cred_openai',
|
|
237
|
+
fallbackCredentialIds: [],
|
|
238
|
+
apiEndpoint: null,
|
|
239
|
+
gatewayProfileId: null,
|
|
240
|
+
tools: [],
|
|
241
|
+
extensions: [],
|
|
242
|
+
parentSessionId: null,
|
|
243
|
+
memoryScopeMode: null,
|
|
244
|
+
})
|
|
245
|
+
const agent = makeAgent({ tools: [], extensions: [] })
|
|
246
|
+
const route = makeRoute()
|
|
247
|
+
|
|
248
|
+
applyAgentSyncToSession(session, agent, route, SESSION_ID)
|
|
249
|
+
const { changed } = applyAgentSyncToSession(session, agent, route, SESSION_ID)
|
|
250
|
+
|
|
251
|
+
assert.equal(changed, false, 'second sync makes no changes')
|
|
252
|
+
})
|
|
253
|
+
})
|
|
@@ -3,7 +3,7 @@ import os from 'os'
|
|
|
3
3
|
|
|
4
4
|
import { log } from '@/lib/server/logger'
|
|
5
5
|
import { getProvider } from '@/lib/providers'
|
|
6
|
-
import type { ExecutionBrief, Message, Session } from '@/types'
|
|
6
|
+
import type { Agent, ExecutionBrief, Message, Session } from '@/types'
|
|
7
7
|
import {
|
|
8
8
|
decryptKey,
|
|
9
9
|
loadCredentials,
|
|
@@ -29,6 +29,7 @@ import {
|
|
|
29
29
|
import {
|
|
30
30
|
applyResolvedRoute,
|
|
31
31
|
resolvePrimaryAgentRoute,
|
|
32
|
+
type ResolvedAgentRoute,
|
|
32
33
|
} from '@/lib/server/agents/agent-runtime-config'
|
|
33
34
|
import {
|
|
34
35
|
runCapabilityBeforeMessageWrite,
|
|
@@ -190,17 +191,13 @@ function joinSystemPromptBlocks(...blocks: Array<string | null | undefined>): st
|
|
|
190
191
|
return joined || undefined
|
|
191
192
|
}
|
|
192
193
|
|
|
193
|
-
function
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
194
|
+
export function applyAgentSyncToSession(
|
|
195
|
+
session: Session,
|
|
196
|
+
agent: Agent,
|
|
197
|
+
route: ResolvedAgentRoute | null,
|
|
198
|
+
sessionId: string,
|
|
199
|
+
): { session: Session; changed: boolean } {
|
|
199
200
|
let changed = false
|
|
200
|
-
const route = resolvePrimaryAgentRoute(agent, undefined, {
|
|
201
|
-
preferredGatewayTags: session.routePreferredGatewayTags || [],
|
|
202
|
-
preferredGatewayUseCase: session.routePreferredGatewayUseCase || null,
|
|
203
|
-
})
|
|
204
201
|
if (!session.provider && agent.provider) { session.provider = agent.provider; changed = true }
|
|
205
202
|
if ((session.model === undefined || session.model === null || session.model === '') && agent.model !== undefined) {
|
|
206
203
|
session.model = agent.model
|
|
@@ -208,19 +205,24 @@ function syncSessionFromAgent(sessionId: string): void {
|
|
|
208
205
|
}
|
|
209
206
|
if (route) {
|
|
210
207
|
const resolved = applyResolvedRoute({ ...session }, route)
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
208
|
+
// Do NOT sync provider/model from the route here — the user may have manually
|
|
209
|
+
// switched the session model, and we must preserve that choice.
|
|
210
|
+
// Provider/model are initialized from the route at session-creation time only.
|
|
211
|
+
// Only sync credentials/endpoint when the session's provider still matches the
|
|
212
|
+
// route's provider — if the user switched providers, leave their credential alone.
|
|
213
|
+
if (session.provider === resolved.provider) {
|
|
214
|
+
if ((session.credentialId || null) !== (resolved.credentialId || null)) {
|
|
215
|
+
session.credentialId = resolved.credentialId ?? null
|
|
216
|
+
changed = true
|
|
217
|
+
}
|
|
218
|
+
if (JSON.stringify(session.fallbackCredentialIds || []) !== JSON.stringify(resolved.fallbackCredentialIds || [])) {
|
|
219
|
+
session.fallbackCredentialIds = [...(resolved.fallbackCredentialIds || [])]
|
|
220
|
+
changed = true
|
|
221
|
+
}
|
|
222
|
+
if ((session.apiEndpoint || null) !== (resolved.apiEndpoint || null)) {
|
|
223
|
+
session.apiEndpoint = resolved.apiEndpoint ?? null
|
|
224
|
+
changed = true
|
|
225
|
+
}
|
|
224
226
|
}
|
|
225
227
|
if ((session.gatewayProfileId || null) !== (resolved.gatewayProfileId || null)) {
|
|
226
228
|
session.gatewayProfileId = resolved.gatewayProfileId ?? null
|
|
@@ -297,9 +299,21 @@ function syncSessionFromAgent(sessionId: string): void {
|
|
|
297
299
|
changed = true
|
|
298
300
|
}
|
|
299
301
|
}
|
|
302
|
+
return { session, changed }
|
|
303
|
+
}
|
|
300
304
|
|
|
305
|
+
export function syncSessionFromAgent(sessionId: string): void {
|
|
306
|
+
const session = getSession(sessionId)
|
|
307
|
+
if (!session?.agentId) return
|
|
308
|
+
const agent = getAgent(session.agentId)
|
|
309
|
+
if (!agent) return
|
|
310
|
+
const route = resolvePrimaryAgentRoute(agent, undefined, {
|
|
311
|
+
preferredGatewayTags: session.routePreferredGatewayTags || [],
|
|
312
|
+
preferredGatewayUseCase: session.routePreferredGatewayUseCase || null,
|
|
313
|
+
})
|
|
314
|
+
const { session: updated, changed } = applyAgentSyncToSession(session, agent, route, sessionId)
|
|
301
315
|
if (changed) {
|
|
302
|
-
saveSession(sessionId,
|
|
316
|
+
saveSession(sessionId, updated)
|
|
303
317
|
}
|
|
304
318
|
}
|
|
305
319
|
|
|
@@ -624,9 +638,15 @@ export async function prepareChatTurn(input: ExecuteChatTurnInput): Promise<Prep
|
|
|
624
638
|
preferredGatewayTags: session.routePreferredGatewayTags || [],
|
|
625
639
|
preferredGatewayUseCase: session.routePreferredGatewayUseCase || null,
|
|
626
640
|
})
|
|
627
|
-
if (preferredRoute) {
|
|
641
|
+
if (preferredRoute && sessionForRun.provider === preferredRoute.provider) {
|
|
642
|
+
// Apply route for credentials/endpoint/gateway, but preserve the user's
|
|
643
|
+
// manually-selected model — only sync infra, not the model choice.
|
|
644
|
+
const savedModel = sessionForRun.model
|
|
628
645
|
sessionForRun = applyResolvedRoute({ ...sessionForRun }, preferredRoute)
|
|
646
|
+
sessionForRun = { ...sessionForRun, model: savedModel }
|
|
629
647
|
}
|
|
648
|
+
// If the user has manually switched to a different provider, skip the route
|
|
649
|
+
// entirely — the session already has the correct provider/model/credential.
|
|
630
650
|
}
|
|
631
651
|
let effectiveMessage = message
|
|
632
652
|
|
|
@@ -218,13 +218,17 @@ export interface ClassifyMessageInput {
|
|
|
218
218
|
history?: Message[]
|
|
219
219
|
}
|
|
220
220
|
|
|
221
|
-
// Timeout sized for Ollama Cloud with a fully-configured agent
|
|
222
|
-
// classifier calls in the 4-
|
|
223
|
-
//
|
|
224
|
-
//
|
|
225
|
-
//
|
|
226
|
-
//
|
|
227
|
-
const
|
|
221
|
+
// Timeout sized for Ollama Cloud with a fully-configured agent. Observed
|
|
222
|
+
// classifier calls in the 4-6s range during live testing, with cloud
|
|
223
|
+
// providers (Ollama Cloud, OpenRouter) routinely tipping over the 10s
|
|
224
|
+
// boundary on a cold cache. SC_CLASSIFIER_TIMEOUT_MS overrides for users
|
|
225
|
+
// running consistently slow providers; default raised to 20s so the cloud
|
|
226
|
+
// path actually completes.
|
|
227
|
+
const DEFAULT_CLASSIFIER_TIMEOUT_MS = 20_000
|
|
228
|
+
const CLASSIFIER_TIMEOUT_MS = (() => {
|
|
229
|
+
const raw = Number(process.env.SC_CLASSIFIER_TIMEOUT_MS)
|
|
230
|
+
return Number.isFinite(raw) && raw > 0 ? Math.trunc(raw) : DEFAULT_CLASSIFIER_TIMEOUT_MS
|
|
231
|
+
})()
|
|
228
232
|
|
|
229
233
|
/**
|
|
230
234
|
* Classify a user message using a single LLM call.
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import assert from 'node:assert/strict'
|
|
2
|
+
import { describe, it } from 'node:test'
|
|
3
|
+
|
|
4
|
+
import { stripLeakedClassificationJson } from './post-stream-finalization'
|
|
5
|
+
|
|
6
|
+
// A fully-valid MessageClassification serialized by the model. Mirrors the
|
|
7
|
+
// real output we observed during a live delegation turn.
|
|
8
|
+
const VALID_LEAK = JSON.stringify({
|
|
9
|
+
taskIntent: 'research',
|
|
10
|
+
isDeliverableTask: false,
|
|
11
|
+
isBroadGoal: false,
|
|
12
|
+
isLightweightDirectChat: true,
|
|
13
|
+
hasHumanSignals: false,
|
|
14
|
+
hasSignificantEvent: false,
|
|
15
|
+
isResearchSynthesis: false,
|
|
16
|
+
workType: 'general',
|
|
17
|
+
explicitToolRequests: [],
|
|
18
|
+
confidence: 0.95,
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
describe('stripLeakedClassificationJson', () => {
|
|
22
|
+
it('strips a leaked classification JSON that starts with taskIntent', () => {
|
|
23
|
+
const input = `${VALID_LEAK}Task created and delegated.`
|
|
24
|
+
const { cleaned, stripped } = stripLeakedClassificationJson(input)
|
|
25
|
+
assert.equal(stripped, true)
|
|
26
|
+
assert.equal(cleaned, 'Task created and delegated.')
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
it('strips when the leak appears mid-response', () => {
|
|
30
|
+
const input = `Here you go: ${VALID_LEAK} continuing.`
|
|
31
|
+
const { cleaned, stripped } = stripLeakedClassificationJson(input)
|
|
32
|
+
assert.equal(stripped, true)
|
|
33
|
+
assert.equal(cleaned.includes('taskIntent'), false)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('leaves normal assistant text untouched', () => {
|
|
37
|
+
const input = 'Your favorite color is blue.'
|
|
38
|
+
const { cleaned, stripped } = stripLeakedClassificationJson(input)
|
|
39
|
+
assert.equal(stripped, false)
|
|
40
|
+
assert.equal(cleaned, input)
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
it('leaves a partial or unrelated JSON object alone', () => {
|
|
44
|
+
// A bare object with one classifier-adjacent key but not the full shape
|
|
45
|
+
// must NOT be stripped — the zod schema rejects it.
|
|
46
|
+
const input = 'Prefix text. {"workType": "coding"} suffix.'
|
|
47
|
+
const { cleaned, stripped } = stripLeakedClassificationJson(input)
|
|
48
|
+
assert.equal(stripped, false)
|
|
49
|
+
assert.equal(cleaned, input)
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
it('ignores malformed JSON that looks like a classifier leak', () => {
|
|
53
|
+
const input = 'Malformed {"taskIntent": "research", "isDeliverableTask": [oops suffix.'
|
|
54
|
+
const { cleaned, stripped } = stripLeakedClassificationJson(input)
|
|
55
|
+
assert.equal(stripped, false)
|
|
56
|
+
assert.equal(cleaned, input)
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
it('does not confuse braces inside strings', () => {
|
|
60
|
+
const input = `Before {"label": "{not json}", ${VALID_LEAK.slice(1)} after`
|
|
61
|
+
const { cleaned, stripped } = stripLeakedClassificationJson(input)
|
|
62
|
+
assert.equal(stripped, true)
|
|
63
|
+
assert.equal(cleaned.includes('taskIntent'), false)
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
it('rejects a classifier-like object with an invalid enum value', () => {
|
|
67
|
+
// taskIntent must be one of the TaskIntent enum values. Garbage value is
|
|
68
|
+
// rejected by safeParse so no stripping happens.
|
|
69
|
+
const invalid = JSON.stringify({
|
|
70
|
+
taskIntent: 'totally-made-up-intent',
|
|
71
|
+
isDeliverableTask: false,
|
|
72
|
+
isBroadGoal: false,
|
|
73
|
+
hasHumanSignals: false,
|
|
74
|
+
hasSignificantEvent: false,
|
|
75
|
+
isResearchSynthesis: false,
|
|
76
|
+
workType: 'general',
|
|
77
|
+
explicitToolRequests: [],
|
|
78
|
+
confidence: 0.5,
|
|
79
|
+
})
|
|
80
|
+
const input = `${invalid} not a real leak`
|
|
81
|
+
const { cleaned, stripped } = stripLeakedClassificationJson(input)
|
|
82
|
+
assert.equal(stripped, false)
|
|
83
|
+
assert.equal(cleaned, input)
|
|
84
|
+
})
|
|
85
|
+
})
|
|
@@ -18,32 +18,57 @@ import { runCapabilityHook } from '@/lib/server/native-capabilities'
|
|
|
18
18
|
import {
|
|
19
19
|
shouldForceExternalServiceSummary,
|
|
20
20
|
} from '@/lib/server/chat-execution/chat-streaming-utils'
|
|
21
|
-
import
|
|
21
|
+
import {
|
|
22
|
+
MessageClassificationSchema,
|
|
23
|
+
type MessageClassification,
|
|
24
|
+
} from '@/lib/server/chat-execution/message-classifier'
|
|
22
25
|
import {
|
|
23
26
|
resolveFinalStreamResponseText,
|
|
24
27
|
} from '@/lib/server/chat-execution/stream-continuation'
|
|
25
28
|
import { buildForcedExternalServiceSummary } from '@/lib/server/chat-execution/prompt-builder'
|
|
26
29
|
|
|
27
30
|
// ---------------------------------------------------------------------------
|
|
28
|
-
// Classification JSON leak detection — strips
|
|
29
|
-
//
|
|
31
|
+
// Classification JSON leak detection — strips MessageClassification objects
|
|
32
|
+
// that some models echo verbatim into their response text. Candidate JSON
|
|
33
|
+
// substrings are found by brace-matching, then validated against the actual
|
|
34
|
+
// MessageClassificationSchema — the single source of truth for what a
|
|
35
|
+
// classifier object looks like.
|
|
30
36
|
// ---------------------------------------------------------------------------
|
|
31
37
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
function stripLeakedClassificationJson(text: string): { cleaned: string; stripped: boolean } {
|
|
35
|
-
const match = CLASSIFICATION_LEAK_RE.exec(text)
|
|
36
|
-
if (!match || match.index === undefined) return { cleaned: text, stripped: false }
|
|
37
|
-
const startIdx = match.index
|
|
38
|
+
/** Returns the index just past the balanced `}` for the `{` at `start`, or -1. */
|
|
39
|
+
function findBalancedObjectEnd(text: string, start: number): number {
|
|
38
40
|
let depth = 0
|
|
39
|
-
let
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
let inString = false
|
|
42
|
+
let escape = false
|
|
43
|
+
for (let i = start; i < text.length; i++) {
|
|
44
|
+
const ch = text[i]
|
|
45
|
+
if (escape) { escape = false; continue }
|
|
46
|
+
if (inString) {
|
|
47
|
+
if (ch === '\\') escape = true
|
|
48
|
+
else if (ch === '"') inString = false
|
|
49
|
+
continue
|
|
50
|
+
}
|
|
51
|
+
if (ch === '"') inString = true
|
|
52
|
+
else if (ch === '{') depth += 1
|
|
53
|
+
else if (ch === '}') {
|
|
54
|
+
depth -= 1
|
|
55
|
+
if (depth === 0) return i + 1
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return -1
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export function stripLeakedClassificationJson(text: string): { cleaned: string; stripped: boolean } {
|
|
62
|
+
for (let i = text.indexOf('{'); i !== -1; i = text.indexOf('{', i + 1)) {
|
|
63
|
+
const end = findBalancedObjectEnd(text, i)
|
|
64
|
+
if (end === -1) break
|
|
65
|
+
let parsed: unknown
|
|
66
|
+
try { parsed = JSON.parse(text.slice(i, end)) } catch { continue }
|
|
67
|
+
if (!MessageClassificationSchema.safeParse(parsed).success) continue
|
|
68
|
+
log.warn(TAG, 'Stripped leaked classification JSON from model output')
|
|
69
|
+
return { cleaned: (text.slice(0, i) + text.slice(end)).trimStart(), stripped: true }
|
|
43
70
|
}
|
|
44
|
-
|
|
45
|
-
log.warn(TAG, 'Stripped leaked classification JSON from model output')
|
|
46
|
-
return { cleaned: (text.slice(0, startIdx) + text.slice(end)).trimStart(), stripped: true }
|
|
71
|
+
return { cleaned: text, stripped: false }
|
|
47
72
|
}
|
|
48
73
|
|
|
49
74
|
// StreamAgentChatResult is defined inline to avoid circular dependency with stream-agent-chat.ts
|
|
@@ -78,7 +78,8 @@ describe('response-completeness', () => {
|
|
|
78
78
|
toolCallCount: 0,
|
|
79
79
|
},
|
|
80
80
|
{
|
|
81
|
-
|
|
81
|
+
timeoutMs: 50,
|
|
82
|
+
generateText: () => new Promise((resolve) => setTimeout(() => resolve('{"isIncomplete": true, "confidence": 0.9}'), 500)),
|
|
82
83
|
},
|
|
83
84
|
)
|
|
84
85
|
assert.equal(result, null)
|
|
@@ -153,7 +153,11 @@ export interface EvaluateCompletenessInput {
|
|
|
153
153
|
toolCallCount: number
|
|
154
154
|
}
|
|
155
155
|
|
|
156
|
-
const
|
|
156
|
+
const DEFAULT_COMPLETENESS_TIMEOUT_MS = 8_000
|
|
157
|
+
const COMPLETENESS_TIMEOUT_MS = (() => {
|
|
158
|
+
const raw = Number(process.env.SC_COMPLETENESS_TIMEOUT_MS)
|
|
159
|
+
return Number.isFinite(raw) && raw > 0 ? Math.trunc(raw) : DEFAULT_COMPLETENESS_TIMEOUT_MS
|
|
160
|
+
})()
|
|
157
161
|
|
|
158
162
|
/**
|
|
159
163
|
* Evaluate whether an agent response is incomplete — i.e. the agent described
|
|
@@ -164,7 +168,7 @@ const COMPLETENESS_TIMEOUT_MS = 2_000
|
|
|
164
168
|
*/
|
|
165
169
|
export async function evaluateResponseCompleteness(
|
|
166
170
|
input: EvaluateCompletenessInput,
|
|
167
|
-
options?: { generateText?: (prompt: string) => Promise<string
|
|
171
|
+
options?: { generateText?: (prompt: string) => Promise<string>; timeoutMs?: number },
|
|
168
172
|
): Promise<ResponseCompleteness | null> {
|
|
169
173
|
const response = input.response.trim()
|
|
170
174
|
if (!response) return null
|
|
@@ -175,6 +179,10 @@ export async function evaluateResponseCompleteness(
|
|
|
175
179
|
|
|
176
180
|
const prompt = buildCompletenessPrompt(input.message, response, input.toolCallCount)
|
|
177
181
|
|
|
182
|
+
const timeoutMs = typeof options?.timeoutMs === 'number' && options.timeoutMs > 0
|
|
183
|
+
? options.timeoutMs
|
|
184
|
+
: COMPLETENESS_TIMEOUT_MS
|
|
185
|
+
|
|
178
186
|
const startMs = Date.now()
|
|
179
187
|
try {
|
|
180
188
|
const responseText = await Promise.race([
|
|
@@ -189,7 +197,7 @@ export async function evaluateResponseCompleteness(
|
|
|
189
197
|
return extractModelText(result.content)
|
|
190
198
|
})(),
|
|
191
199
|
new Promise<never>((_, reject) =>
|
|
192
|
-
setTimeout(() => reject(new Error('completeness-timeout')),
|
|
200
|
+
setTimeout(() => reject(new Error('completeness-timeout')), timeoutMs),
|
|
193
201
|
),
|
|
194
202
|
])
|
|
195
203
|
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import assert from 'node:assert/strict'
|
|
2
|
+
import { test } from 'node:test'
|
|
3
|
+
import { stripAgentReactionTokens } from '@/lib/server/chatrooms/chatroom-agent-signals'
|
|
4
|
+
|
|
5
|
+
test('stripAgentReactionTokens removes single REACTION marker glued to text', () => {
|
|
6
|
+
const input = '11 plus 7 equals 18.\n[REACTION]{"emoji": "✅", "to": "c93bb040"}'
|
|
7
|
+
const out = stripAgentReactionTokens(input)
|
|
8
|
+
assert.equal(out, '11 plus 7 equals 18.')
|
|
9
|
+
})
|
|
10
|
+
|
|
11
|
+
test('stripAgentReactionTokens removes multiple REACTION markers', () => {
|
|
12
|
+
const input = 'Sure thing.[REACTION]{"emoji":"👍","to":"a1"} Got it.[REACTION]{"emoji":"🎉","to":"b2"}'
|
|
13
|
+
const out = stripAgentReactionTokens(input)
|
|
14
|
+
assert.ok(!out.includes('[REACTION]'))
|
|
15
|
+
assert.ok(out.includes('Sure thing.'))
|
|
16
|
+
assert.ok(out.includes('Got it.'))
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
test('stripAgentReactionTokens preserves text without markers', () => {
|
|
20
|
+
const input = 'Just a normal message with no markers.'
|
|
21
|
+
const out = stripAgentReactionTokens(input)
|
|
22
|
+
assert.equal(out, input)
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
test('stripAgentReactionTokens handles empty string', () => {
|
|
26
|
+
assert.equal(stripAgentReactionTokens(''), '')
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
test('stripAgentReactionTokens collapses excess whitespace from removed lines', () => {
|
|
30
|
+
const input = 'Line 1\n\n\n[REACTION]{"emoji":"✅","to":"x"}\n\n\nLine 2'
|
|
31
|
+
const out = stripAgentReactionTokens(input)
|
|
32
|
+
assert.ok(!out.includes('[REACTION]'))
|
|
33
|
+
assert.ok(out.includes('Line 1'))
|
|
34
|
+
assert.ok(out.includes('Line 2'))
|
|
35
|
+
assert.ok(!out.match(/\n{3,}/), 'no triple newlines')
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
test('stripAgentReactionTokens leaves [REACTION] intact when JSON is invalid (incomplete payload)', () => {
|
|
39
|
+
const input = 'Sure.[REACTION]{"emoji":"👍"} Done.'
|
|
40
|
+
const out = stripAgentReactionTokens(input)
|
|
41
|
+
assert.ok(out.includes('[REACTION]'), 'invalid reaction (missing "to") preserved verbatim')
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
test('stripAgentReactionTokens leaves [REACTION] intact when followed by non-JSON', () => {
|
|
45
|
+
const input = 'Notes: [REACTION] is a label, not a marker.'
|
|
46
|
+
const out = stripAgentReactionTokens(input)
|
|
47
|
+
assert.equal(out, input)
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
test('stripAgentReactionTokens handles nested JSON without truncation', () => {
|
|
51
|
+
const input = 'Tags applied.[REACTION]{"emoji":"🏷️","to":"abc","meta":{"reason":"label"}}'
|
|
52
|
+
const out = stripAgentReactionTokens(input)
|
|
53
|
+
assert.equal(out, 'Tags applied.')
|
|
54
|
+
})
|