onbuzz 4.8.0 → 4.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "onbuzz",
3
- "version": "4.8.0",
3
+ "version": "4.8.1",
4
4
  "description": "Loxia OnBuzz - Your AI Fleet",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -0,0 +1,319 @@
1
+ /**
2
+ * Unit tests for `_resolveModelApiType` + `_pickSystemPromptForModel`
3
+ * on AgentScheduler — the runtime-side half of the "trim duplicated
4
+ * tool docs for Responses-API models" feature.
5
+ *
6
+ * Three concerns covered:
7
+ * 1. Classification (_resolveModelApiType) — every catalog shape the
8
+ * backend's _inferRouting recognizes must produce the same answer
9
+ * on the CLI side, or the optimization fires on the wrong models.
10
+ * 2. Selection (_pickSystemPromptForModel) — must fall back to
11
+ * `agent.systemPrompt` for every safety-net path:
12
+ * • modelsService missing → fallback
13
+ * • model not in catalog → fallback
14
+ * • apiType resolves to chat_completion → fallback
15
+ * • originalSystemPrompt missing → fallback
16
+ * • toolsRegistry missing → fallback
17
+ * • rebuild throws → fallback
18
+ * Only the happy path (Responses model + everything available)
19
+ * returns the trimmed rebuild.
20
+ * 3. Caching — same (agent, model) twice should rebuild ONCE; the
21
+ * invalidator should clear entries for ONE agent only.
22
+ */
23
+ import { jest, describe, test, expect, beforeEach } from '@jest/globals';
24
+
25
+ // Stub the activity service so importing the scheduler doesn't drag in
26
+ // the full agent dependency graph for these unit tests.
27
+ jest.unstable_mockModule('../../services/agentActivityService.js', () => ({
28
+ shouldAgentBeActive: () => ({ active: false, reason: 'stub' }),
29
+ getActiveAgents: () => [],
30
+ shouldSkipIteration: () => false,
31
+ }));
32
+
33
+ const { default: AgentScheduler } = await import('../agentScheduler.js');
34
+
35
+ // ─── Test-only helpers ────────────────────────────────────────────────
36
+ function makeScheduler({ models = [], registry = null, logger = null } = {}) {
37
+ const modelsService = {
38
+ getModels: () => models,
39
+ };
40
+ const agentPool = {
41
+ toolsRegistry: registry,
42
+ getAllAgents: async () => new Map(),
43
+ getAgent: async () => null,
44
+ };
45
+ const aiService = {};
46
+ const messageProcessor = {};
47
+ const log = logger || {
48
+ info() {}, warn() {}, error() {}, debug() {},
49
+ };
50
+ return new AgentScheduler(
51
+ agentPool,
52
+ messageProcessor,
53
+ aiService,
54
+ log,
55
+ null, // webSocketManager
56
+ null, // modelRouterService
57
+ modelsService, // ← what we care about
58
+ );
59
+ }
60
+
61
+ // A minimal fake registry that records the apiType it was called with
62
+ // so we can assert the scheduler propagates it correctly.
63
+ function makeRegistry({ shouldThrow = false } = {}) {
64
+ const calls = [];
65
+ return {
66
+ calls,
67
+ enhanceSystemPrompt(prompt, capabilities, options) {
68
+ calls.push({ prompt, capabilities, options });
69
+ if (shouldThrow) throw new Error('boom');
70
+ const apiTag = options?.apiType === 'responses' ? '[trimmed]' : '[full]';
71
+ return `${prompt}\n## TOOLS ${apiTag} for caps=${(capabilities || []).join(',')}`;
72
+ },
73
+ };
74
+ }
75
+
76
+ // ──────────────────────────────────────────────────────────────────────
77
+ // 1. _resolveModelApiType — parity with backend's _inferRouting
78
+ // ──────────────────────────────────────────────────────────────────────
79
+
80
+ describe('_resolveModelApiType — catalog → routing decision', () => {
81
+ test('api_type=["responses"] alone → "responses"', () => {
82
+ const s = makeScheduler({ models: [{ name: 'codex-mini', api_type: ['responses'] }] });
83
+ expect(s._resolveModelApiType('codex-mini')).toBe('responses');
84
+ });
85
+
86
+ test('api_type=["chat_completion","responses"] (BOTH present) → "chat_completion"', () => {
87
+ // Backend rule: only flips to responses when chat_completion is ABSENT.
88
+ const s = makeScheduler({
89
+ models: [{ name: 'gpt-5', api_type: ['chat_completion', 'responses'] }],
90
+ });
91
+ expect(s._resolveModelApiType('gpt-5')).toBe('chat_completion');
92
+ });
93
+
94
+ test('api_type=["chat_completion"] → "chat_completion"', () => {
95
+ const s = makeScheduler({ models: [{ name: 'claude', api_type: ['chat_completion'] }] });
96
+ expect(s._resolveModelApiType('claude')).toBe('chat_completion');
97
+ });
98
+
99
+ test('capabilities.responses=="true" + chatCompletion=="false" → "responses"', () => {
100
+ const s = makeScheduler({
101
+ models: [{
102
+ name: 'o3',
103
+ api_type: ['responses'],
104
+ capabilities: { responses: 'true', chatCompletion: 'false' },
105
+ }],
106
+ });
107
+ expect(s._resolveModelApiType('o3')).toBe('responses');
108
+ });
109
+
110
+ test('explicit useResponsesApi flag → "responses"', () => {
111
+ const s = makeScheduler({
112
+ models: [{ name: 'foo', api_type: [], useResponsesApi: true }],
113
+ });
114
+ expect(s._resolveModelApiType('foo')).toBe('responses');
115
+ });
116
+
117
+ test('name-pattern fallback: "codex" → "responses" even with no catalog data', () => {
118
+ const s = makeScheduler({ models: [{ name: 'gpt-5-1-codex-mini' }] });
119
+ expect(s._resolveModelApiType('gpt-5-1-codex-mini')).toBe('responses');
120
+ });
121
+
122
+ test('name-pattern fallback: "gpt-X-pro" → "responses"', () => {
123
+ const s = makeScheduler({ models: [{ name: 'gpt-5-pro' }] });
124
+ expect(s._resolveModelApiType('gpt-5-pro')).toBe('responses');
125
+ });
126
+
127
+ test('unknown model returns undefined (caller falls back to old behaviour)', () => {
128
+ const s = makeScheduler({ models: [{ name: 'claude', api_type: ['chat_completion'] }] });
129
+ expect(s._resolveModelApiType('mystery-model')).toBeUndefined();
130
+ });
131
+
132
+ test('modelsService missing → undefined (safe — caller falls back)', () => {
133
+ const s = new AgentScheduler({ toolsRegistry: null }, {}, {}, {
134
+ info() {}, warn() {}, error() {}, debug() {},
135
+ });
136
+ expect(s._resolveModelApiType('codex-mini')).toBeUndefined();
137
+ });
138
+
139
+ test('arbitrary exception in catalog → undefined (defensive)', () => {
140
+ // getModels() throws — must not propagate.
141
+ const s = new AgentScheduler(
142
+ { toolsRegistry: null },
143
+ {},
144
+ {},
145
+ { info() {}, warn() {}, error() {}, debug() {} },
146
+ null,
147
+ null,
148
+ { getModels() { throw new Error('catalog offline'); } },
149
+ );
150
+ expect(s._resolveModelApiType('codex-mini')).toBeUndefined();
151
+ });
152
+ });
153
+
154
+ // ──────────────────────────────────────────────────────────────────────
155
+ // 2. _pickSystemPromptForModel — happy path + every safety-net path
156
+ // ──────────────────────────────────────────────────────────────────────
157
+
158
+ describe('_pickSystemPromptForModel — back-compat fallbacks', () => {
159
+ const BAKED_PROMPT = 'BAKED: agent persona\n## AVAILABLE TOOLS\n…lots of text…';
160
+ const ORIGINAL_PROMPT = 'Agent persona';
161
+
162
+ const agentFor = (overrides = {}) => ({
163
+ id: 'agent-1',
164
+ systemPrompt: BAKED_PROMPT,
165
+ originalSystemPrompt: ORIGINAL_PROMPT,
166
+ capabilities: ['memory', 'terminal'],
167
+ skills: [],
168
+ ...overrides,
169
+ });
170
+
171
+ test('chat-completion model → returns agent.systemPrompt verbatim', async () => {
172
+ const s = makeScheduler({
173
+ models: [{ name: 'claude', api_type: ['chat_completion'] }],
174
+ registry: makeRegistry(),
175
+ });
176
+ const out = await s._pickSystemPromptForModel(agentFor(), 'claude');
177
+ expect(out).toBe(BAKED_PROMPT);
178
+ });
179
+
180
+ test('unknown model → returns agent.systemPrompt verbatim', async () => {
181
+ const s = makeScheduler({ models: [], registry: makeRegistry() });
182
+ const out = await s._pickSystemPromptForModel(agentFor(), 'never-heard-of');
183
+ expect(out).toBe(BAKED_PROMPT);
184
+ });
185
+
186
+ test('Responses model BUT originalSystemPrompt missing → fallback', async () => {
187
+ // Very old persisted agent (pre-originalSystemPrompt storage).
188
+ const s = makeScheduler({
189
+ models: [{ name: 'codex', api_type: ['responses'] }],
190
+ registry: makeRegistry(),
191
+ });
192
+ const out = await s._pickSystemPromptForModel(
193
+ agentFor({ originalSystemPrompt: undefined }),
194
+ 'codex',
195
+ );
196
+ expect(out).toBe(BAKED_PROMPT);
197
+ });
198
+
199
+ test('Responses model BUT toolsRegistry missing → fallback', async () => {
200
+ const s = makeScheduler({
201
+ models: [{ name: 'codex', api_type: ['responses'] }],
202
+ registry: null,
203
+ });
204
+ const out = await s._pickSystemPromptForModel(agentFor(), 'codex');
205
+ expect(out).toBe(BAKED_PROMPT);
206
+ });
207
+
208
+ test('Responses model AND rebuild throws → fallback (no crash)', async () => {
209
+ const s = makeScheduler({
210
+ models: [{ name: 'codex', api_type: ['responses'] }],
211
+ registry: makeRegistry({ shouldThrow: true }),
212
+ });
213
+ const out = await s._pickSystemPromptForModel(agentFor(), 'codex');
214
+ expect(out).toBe(BAKED_PROMPT);
215
+ });
216
+ });
217
+
218
+ // ──────────────────────────────────────────────────────────────────────
219
+ // 3. _pickSystemPromptForModel — happy path
220
+ // ──────────────────────────────────────────────────────────────────────
221
+
222
+ describe('_pickSystemPromptForModel — Responses model rebuild', () => {
223
+ const agent = {
224
+ id: 'agent-1',
225
+ systemPrompt: 'STALE (chat-completion shape)',
226
+ originalSystemPrompt: 'Agent persona',
227
+ capabilities: ['memory', 'terminal'],
228
+ skills: [],
229
+ };
230
+
231
+ test('rebuilds with apiType:"responses" and returns trimmed prompt', async () => {
232
+ const reg = makeRegistry();
233
+ const s = makeScheduler({
234
+ models: [{ name: 'codex', api_type: ['responses'] }],
235
+ registry: reg,
236
+ });
237
+ const out = await s._pickSystemPromptForModel(agent, 'codex');
238
+ expect(out).toContain('[trimmed]');
239
+ expect(out).toContain('Agent persona');
240
+ // The registry was called with apiType: 'responses' AND the agent's capabilities.
241
+ expect(reg.calls).toHaveLength(1);
242
+ expect(reg.calls[0].options.apiType).toBe('responses');
243
+ expect(reg.calls[0].capabilities).toEqual(['memory', 'terminal']);
244
+ });
245
+
246
+ test('caches per (agentId, modelName) — second call does NOT rebuild', async () => {
247
+ const reg = makeRegistry();
248
+ const s = makeScheduler({
249
+ models: [{ name: 'codex', api_type: ['responses'] }],
250
+ registry: reg,
251
+ });
252
+ await s._pickSystemPromptForModel(agent, 'codex');
253
+ await s._pickSystemPromptForModel(agent, 'codex');
254
+ await s._pickSystemPromptForModel(agent, 'codex');
255
+ expect(reg.calls).toHaveLength(1);
256
+ });
257
+
258
+ test('different models for same agent → SEPARATE cache entries', async () => {
259
+ const reg = makeRegistry();
260
+ const s = makeScheduler({
261
+ models: [
262
+ { name: 'codex', api_type: ['responses'] },
263
+ { name: 'gpt-5-pro', api_type: ['responses'] },
264
+ ],
265
+ registry: reg,
266
+ });
267
+ await s._pickSystemPromptForModel(agent, 'codex');
268
+ await s._pickSystemPromptForModel(agent, 'gpt-5-pro');
269
+ expect(reg.calls).toHaveLength(2);
270
+ });
271
+
272
+ test('switching back to chat-completion mid-session uses the persisted prompt unchanged', async () => {
273
+ const reg = makeRegistry();
274
+ const s = makeScheduler({
275
+ models: [
276
+ { name: 'codex', api_type: ['responses'] },
277
+ { name: 'claude', api_type: ['chat_completion'] },
278
+ ],
279
+ registry: reg,
280
+ });
281
+ const native = await s._pickSystemPromptForModel(agent, 'codex');
282
+ const inline = await s._pickSystemPromptForModel(agent, 'claude');
283
+ expect(native).toContain('[trimmed]');
284
+ expect(inline).toBe('STALE (chat-completion shape)'); // persisted, untouched
285
+ expect(reg.calls).toHaveLength(1); // only the codex rebuild ran
286
+ });
287
+ });
288
+
289
+ // ──────────────────────────────────────────────────────────────────────
290
+ // 4. Cache invalidation
291
+ // ──────────────────────────────────────────────────────────────────────
292
+
293
+ describe('_invalidateNativePromptCache — selective per-agent clear', () => {
294
+ test('clears entries for ONE agent only, leaves others alone', async () => {
295
+ const reg = makeRegistry();
296
+ const s = makeScheduler({
297
+ models: [{ name: 'codex', api_type: ['responses'] }],
298
+ registry: reg,
299
+ });
300
+ const a1 = { id: 'a1', systemPrompt: 'p1', originalSystemPrompt: 'persona-1', capabilities: ['memory'], skills: [] };
301
+ const a2 = { id: 'a2', systemPrompt: 'p2', originalSystemPrompt: 'persona-2', capabilities: ['terminal'], skills: [] };
302
+
303
+ await s._pickSystemPromptForModel(a1, 'codex');
304
+ await s._pickSystemPromptForModel(a2, 'codex');
305
+ expect(reg.calls).toHaveLength(2);
306
+
307
+ s._invalidateNativePromptCache('a1');
308
+
309
+ // Re-fetching a1 → rebuild. a2 → still cached.
310
+ await s._pickSystemPromptForModel(a1, 'codex');
311
+ await s._pickSystemPromptForModel(a2, 'codex');
312
+ expect(reg.calls).toHaveLength(3); // only a1 rebuilt
313
+ });
314
+
315
+ test('invalidating for an agent that never rendered is a no-op (does not throw)', async () => {
316
+ const s = makeScheduler({ models: [], registry: makeRegistry() });
317
+ expect(() => s._invalidateNativePromptCache('never-seen')).not.toThrow();
318
+ });
319
+ });
@@ -380,6 +380,18 @@ class AgentPool {
380
380
  originalLength: baseSystemPrompt?.length || 0,
381
381
  enhancedLength: enhancedSystemPrompt?.length || 0
382
382
  });
383
+
384
+ // The scheduler caches per-(agent, model) Responses-API prompts
385
+ // built from this agent's `originalSystemPrompt` + capabilities.
386
+ // Both inputs just changed, so any cached rebuilds are stale.
387
+ // No-op when the scheduler isn't attached (tests / very-early
388
+ // boot) or when it predates this method (old binaries during
389
+ // a rolling upgrade).
390
+ try {
391
+ this.scheduler?._invalidateNativePromptCache?.(agentId);
392
+ } catch (e) {
393
+ this.logger.debug?.('Failed to invalidate native prompt cache', { agentId, error: e.message });
394
+ }
383
395
  } catch (error) {
384
396
  this.logger.error(`Failed to regenerate system prompt with updated capabilities`, {
385
397
  agentId,
@@ -60,6 +60,19 @@ class AgentScheduler {
60
60
  // Initialize ContextInjectionService for file attachments
61
61
  this.contextInjectionService = new ContextInjectionService({}, logger);
62
62
 
63
+ // Per-turn system-prompt rebuild cache for native-API models.
64
+ // Agents persist a `systemPrompt` baked at create-time for the
65
+ // chat-completion shape (text descriptions of every tool). When a
66
+ // turn targets a Responses-API model (Codex / o-series / gpt-5-pro),
67
+ // we want a TRIMMED prompt that omits text docs for tools whose
68
+ // structured schemas are sent in `tools:`. Rebuilding fresh each
69
+ // turn would be wasteful — agents typically stay on the same model
70
+ // for many turns — so we memoize per (agentId, modelName).
71
+ //
72
+ // Cleared on process restart and on agent updates that change the
73
+ // base prompt or capabilities (see `_invalidateNativePromptCache`).
74
+ this._nativePromptCache = new Map(); // `${agentId}|${modelName}` → string
75
+
63
76
  // Initialize FlowContextService for flow execution context
64
77
  this.flowContextService = new FlowContextService({}, logger);
65
78
 
@@ -1919,8 +1932,17 @@ class AgentScheduler {
1919
1932
  // After compaction, retrieve messages from AgentPool (will use compacted if available)
1920
1933
  const messagesToSend = await this.agentPool.getMessagesForAI(agentId, targetModel);
1921
1934
 
1922
- // Inject TaskManager instructions for AGENT mode
1923
- let enhancedSystemPrompt = agent.systemPrompt;
1935
+ // ── Pick the right system-prompt shape for the target model ──
1936
+ // Default: use the agent's persisted `systemPrompt` (baked at
1937
+ // create-time with full text descriptions for every tool — the
1938
+ // chat-completion shape). For models that use the Responses API
1939
+ // (native function-calling), rebuild a trimmed version that
1940
+ // omits text docs for tools whose structured schemas we send in
1941
+ // `tools:`. Falls back to the persisted prompt whenever the
1942
+ // model's apiType is unknown OR the agent has no stored original
1943
+ // prompt — preserves existing behaviour for old agents and
1944
+ // unknown models. See `_pickSystemPromptForModel`.
1945
+ let enhancedSystemPrompt = await this._pickSystemPromptForModel(agent, targetModel);
1924
1946
  if (agent.mode === AGENT_MODES.AGENT) {
1925
1947
  const taskManagerInstruction = "\n\nIMPORTANT: You are in AGENT mode. The use of TaskManager tool is mandatory.\n\n" +
1926
1948
  "TASK LIFECYCLE (follow this, don't improvise):\n" +
@@ -2169,6 +2191,156 @@ class AgentScheduler {
2169
2191
  }
2170
2192
  }
2171
2193
 
2194
+ /**
2195
+ * Choose the right base system prompt for the target model.
2196
+ *
2197
+ * • If the model's catalog entry says it uses the Responses API
2198
+ * ('responses' in its api_type / capabilities) AND the agent has
2199
+ * an `originalSystemPrompt` we can rebuild from, return a
2200
+ * freshly-built prompt that omits text descriptions for tools
2201
+ * with native function schemas (see baseTool.js — those tools'
2202
+ * structured schemas in `tools:` are the canonical source for
2203
+ * these models, so the text docs are pure duplication).
2204
+ *
2205
+ * • Otherwise return the agent's persisted `systemPrompt` exactly
2206
+ * as it is today. This covers:
2207
+ * – chat-completion models (no native function calling)
2208
+ * – models we can't classify (modelsService offline / catalog
2209
+ * field missing) — fail safe to old behaviour
2210
+ * – very old agents persisted before `originalSystemPrompt`
2211
+ * was stored — fail safe to old behaviour
2212
+ *
2213
+ * Result is memoized per `(agentId, targetModel)` to avoid rebuilding
2214
+ * on every turn. The cache is invalidated whenever the agent's base
2215
+ * prompt or capabilities change (see `_invalidateNativePromptCache`).
2216
+ *
2217
+ * @private
2218
+ * @param {Object} agent - Agent record
2219
+ * @param {string} targetModel - Model name about to be called
2220
+ * @returns {Promise<string>} The prompt to use as the base
2221
+ */
2222
+ async _pickSystemPromptForModel(agent, targetModel) {
2223
+ // 1. Resolve the model's API type. Unknown → use persisted prompt.
2224
+ const apiType = this._resolveModelApiType(targetModel);
2225
+ if (apiType !== 'responses') return agent.systemPrompt;
2226
+
2227
+ // 2. Need the original (un-enhanced) prompt to rebuild from. Without
2228
+ // it we can't safely re-add the trimmed tool docs — fall back
2229
+ // to the persisted shape (which works for chat-completion and
2230
+ // is also accepted by Responses API, just with the duplication
2231
+ // cost). This is the back-compat path for legacy agents.
2232
+ if (!agent.originalSystemPrompt) return agent.systemPrompt;
2233
+
2234
+ // 3. Cache lookup.
2235
+ const cacheKey = `${agent.id}|${targetModel}`;
2236
+ const cached = this._nativePromptCache.get(cacheKey);
2237
+ if (cached) return cached;
2238
+
2239
+ // 4. Rebuild. The agentPool stores the toolsRegistry — reuse it so
2240
+ // we go through the exact same code path that built the original
2241
+ // prompt, just with apiType set. Skills index + the rest of the
2242
+ // augmentation must be reapplied; mirror what createAgent does.
2243
+ try {
2244
+ const registry = this.agentPool?.toolsRegistry;
2245
+ if (!registry) return agent.systemPrompt;
2246
+
2247
+ let rebuilt = registry.enhanceSystemPrompt(
2248
+ agent.originalSystemPrompt,
2249
+ agent.capabilities || [],
2250
+ { apiType: 'responses' },
2251
+ );
2252
+
2253
+ // Re-inject ASSIGNED SKILLS block if present (createAgent appends
2254
+ // this after enhanceSystemPrompt — see agentPool.js:108).
2255
+ if (Array.isArray(agent.skills) && agent.skills.length > 0) {
2256
+ try {
2257
+ const { getSkillsService } = await import('../services/skillsService.js');
2258
+ const skillsService = getSkillsService(this.logger);
2259
+ await skillsService.initialize();
2260
+ const summaries = await skillsService.getSkillSummaries(agent.skills);
2261
+ if (summaries.length > 0) {
2262
+ rebuilt += '\n\n## ASSIGNED SKILLS\n\n';
2263
+ rebuilt += 'Use the skills tool to browse and load skill content. Use "describe" to see sections, "read-section" to load specific parts.\n\n';
2264
+ for (const s of summaries) {
2265
+ const sections = s.sections?.length ? `\n Sections: ${s.sections.map(h => h.replace(/^#+\s*/, '')).join(', ')}` : '';
2266
+ rebuilt += `- **${s.name}** (${s.lineCount} lines): ${s.description}${sections}\n`;
2267
+ }
2268
+ }
2269
+ } catch (e) {
2270
+ this.logger?.debug?.('Failed to re-inject skills index for native prompt', { error: e.message });
2271
+ }
2272
+ }
2273
+
2274
+ this._nativePromptCache.set(cacheKey, rebuilt);
2275
+ this.logger?.debug?.('Built native-API system prompt', {
2276
+ agentId: agent.id,
2277
+ targetModel,
2278
+ originalLength: agent.systemPrompt?.length || 0,
2279
+ rebuiltLength: rebuilt.length,
2280
+ savedTokensApprox: Math.round(((agent.systemPrompt?.length || 0) - rebuilt.length) / 4),
2281
+ });
2282
+ return rebuilt;
2283
+ } catch (err) {
2284
+ // Anything goes wrong → fall back to old behaviour. Failing
2285
+ // closed (no prompt) would break the agent's turn; failing open
2286
+ // (use chat-completion shape) just keeps the duplication.
2287
+ this.logger?.warn?.('Native system-prompt rebuild failed — using persisted prompt', {
2288
+ agentId: agent.id,
2289
+ targetModel,
2290
+ error: err.message,
2291
+ });
2292
+ return agent.systemPrompt;
2293
+ }
2294
+ }
2295
+
2296
+ /**
2297
+ * Look up a model's API type from the catalog. Returns 'responses',
2298
+ * 'chat_completion', or undefined when unknown. The catalog exposes
2299
+ * `api_type` as an array and/or `capabilities.responses`/`capabilities.chatCompletion`
2300
+ * — mirror the backend's _inferRouting precedence so the CLI's
2301
+ * classification matches the backend's routing decision exactly.
2302
+ * @private
2303
+ */
2304
+ _resolveModelApiType(modelName) {
2305
+ try {
2306
+ if (!this.modelsService || typeof this.modelsService.getModels !== 'function') return undefined;
2307
+ const models = this.modelsService.getModels();
2308
+ const m = models.find(x => x.name === modelName);
2309
+ if (!m) return undefined;
2310
+
2311
+ const apiType = Array.isArray(m.api_type) ? m.api_type : (m.api_type ? [m.api_type] : []);
2312
+ const caps = m.capabilities || {};
2313
+
2314
+ // Mirrors backend services/llmServiceFactory.js _inferRouting:
2315
+ // responses if api_type contains 'responses' AND not 'chat_completion'
2316
+ // OR capabilities.responses === 'true' / chatCompletion === 'false'
2317
+ // OR explicit useResponsesApi flag
2318
+ if (apiType.includes('responses') && !apiType.includes('chat_completion')) return 'responses';
2319
+ if (caps.chatCompletion === 'false' && (caps.responses === 'true' || apiType.includes('responses'))) return 'responses';
2320
+ if (m.useResponsesApi) return 'responses';
2321
+ // Name-based fallback (last resort — only when catalog has no routing data)
2322
+ if (/codex/i.test(modelName) || /gpt.*-pro$/i.test(modelName)) return 'responses';
2323
+ return 'chat_completion';
2324
+ } catch (err) {
2325
+ // Defensive — never block the turn on a classification failure.
2326
+ this.logger?.debug?.('Model apiType resolution failed', { modelName, error: err.message });
2327
+ return undefined;
2328
+ }
2329
+ }
2330
+
2331
+ /**
2332
+ * Drop cached native prompts for an agent. Called by agentPool when
2333
+ * the base prompt or capabilities change so the next turn rebuilds.
2334
+ * Exposed so agentPool can call it without poking internal state.
2335
+ */
2336
+ _invalidateNativePromptCache(agentId) {
2337
+ for (const key of this._nativePromptCache.keys()) {
2338
+ if (key.startsWith(`${agentId}|`)) {
2339
+ this._nativePromptCache.delete(key);
2340
+ }
2341
+ }
2342
+ }
2343
+
2172
2344
  /**
2173
2345
  * Get AI response using streaming with WebSocket broadcast
2174
2346
  * @param {string} agentId - Agent ID
@@ -1,47 +1,65 @@
1
1
  /**
2
2
  * Regression tests for the router model name convention.
3
3
  *
4
- * Background: Dynamic Routing silently no-op'd for an entire release
5
- * because the CLI's `ROUTER_MODEL` constant was `'autopilot-model-router'`
6
- * while the catalog (the source of truth that the backend's /llm/chat
7
- * looks up) keys the entry as `'model-router'`. The catalog's regex
8
- * fallback is wrapped with ^…$ at index-build time in the backend
9
- * (services/modelCatalogService.js:109), so the `autopilot-` prefix
10
- * caused every routing-decision call to return 400, the
11
- * ModelRouterService caught + fell back to the current model, and no
12
- * routing ever happened.
4
+ * Background two rounds of this same shape of bug:
13
5
  *
14
- * These tests pin the post-fix invariant and catch any reintroduction
15
- * of a product-name prefix in the future.
6
+ * Round 1: Dynamic Routing silently no-op'd because ROUTER_MODEL
7
+ * was `'autopilot-model-router'` while the catalog keyed the entry
8
+ * as `'model-router'`. Fixed by changing the constant to the bare
9
+ * form.
10
+ *
11
+ * Round 2: Even the bare `'model-router'` key didn't exist in the
12
+ * live catalog, because the catalog discovers deployments by
13
+ * underlying-model-name (not by Azure deployment-name). The team's
14
+ * actual deployment was named `autopilot-model-router` in Azure but
15
+ * its underlying model is `gpt-4.1-nano` — so the live catalog
16
+ * keys the entry under `gpt-4.1-nano`. The CLI now defaults to
17
+ * that name.
18
+ *
19
+ * These tests pin the new invariant + the override mechanism + catch
20
+ * any reintroduction of a product-name prefix.
16
21
  *
17
22
  * Why a separate test file: the existing modelRouterService.test.js
18
23
  * mocks the constants module at the top of the file, so it cannot
19
24
  * assert anything about the REAL value of MODEL_ROUTER_CONFIG. This
20
25
  * file imports the real constants instead.
21
26
  */
22
- import { describe, test, expect } from '@jest/globals';
27
+ import { describe, test, expect, beforeEach, afterEach } from '@jest/globals';
23
28
  import fs from 'node:fs';
24
29
  import path from 'node:path';
25
30
  import { fileURLToPath } from 'node:url';
26
31
 
27
- import { MODEL_ROUTER_CONFIG } from '../../utilities/constants.js';
28
-
29
32
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
30
33
  const SRC_ROOT = path.resolve(__dirname, '../..');
31
34
 
32
35
  describe('Router model naming — matches catalog convention', () => {
33
- test('ROUTER_MODEL is exactly "model-router"', () => {
34
- // This is the modelKey in autopilot-model-catalog's models_registry.json.
35
- // Any other value would force the backend through the regex fallback,
36
- // which is anchored ^…$ and will reject product-prefixed forms.
37
- expect(MODEL_ROUTER_CONFIG.ROUTER_MODEL).toBe('model-router');
36
+ test('default ROUTER_MODEL is "gpt-4.1-nano" (current live catalog key)', async () => {
37
+ // The autopilot-model-router deployment's underlying model is
38
+ // gpt-4.1-nano. The catalog keys entries by underlying model, so
39
+ // this is the canonical name the CLI must request.
40
+ delete process.env.LOXIA_ROUTER_MODEL;
41
+ // Re-import to pick up the (re-)evaluated default.
42
+ const fresh = await import(`../../utilities/constants.js?nocache=${Date.now()}`);
43
+ expect(fresh.MODEL_ROUTER_CONFIG.ROUTER_MODEL).toBe('gpt-4.1-nano');
44
+ });
45
+
46
+ test('LOXIA_ROUTER_MODEL env var overrides the default (no rebuild needed)', async () => {
47
+ process.env.LOXIA_ROUTER_MODEL = 'gpt-4o-mini';
48
+ try {
49
+ const fresh = await import(`../../utilities/constants.js?nocache=${Date.now()}`);
50
+ expect(fresh.MODEL_ROUTER_CONFIG.ROUTER_MODEL).toBe('gpt-4o-mini');
51
+ } finally {
52
+ delete process.env.LOXIA_ROUTER_MODEL;
53
+ }
38
54
  });
39
55
 
40
- test('ROUTER_MODEL does NOT carry a product/brand prefix', () => {
41
- // Defense-in-depth: even if the catalog modelKey ever changes,
42
- // the name must not start with a product prefix like "autopilot-"
56
+ test('ROUTER_MODEL does NOT carry a product/brand prefix', async () => {
57
+ // Defense-in-depth: even if the canonical name ever changes,
58
+ // it must not start with a product prefix like "autopilot-"
43
59
  // or "onbuzz-". The catalog's canonical names are product-agnostic.
44
- const v = MODEL_ROUTER_CONFIG.ROUTER_MODEL;
60
+ delete process.env.LOXIA_ROUTER_MODEL;
61
+ const fresh = await import(`../../utilities/constants.js?nocache=${Date.now()}`);
62
+ const v = fresh.MODEL_ROUTER_CONFIG.ROUTER_MODEL;
45
63
  expect(v).not.toMatch(/^autopilot[-_]/i);
46
64
  expect(v).not.toMatch(/^onbuzz[-_]/i);
47
65
  expect(v).not.toMatch(/^loxia[-_]/i);