npm - onbuzz - Versions diffs - 4.8.0 → 4.8.1 - Mend

onbuzz 4.8.0 → 4.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json +1 -1
package/src/core/__tests__/agentScheduler.nativePromptPick.test.js +319 -0
package/src/core/agentPool.js +12 -0
package/src/core/agentScheduler.js +174 -2
package/src/services/__tests__/modelRouterNaming.test.js +41 -23
package/src/tools/__tests__/baseTool.test.js +142 -0
package/src/tools/baseTool.js +83 -1
package/src/tools/openaiFunctionSchemas.js +14 -0
package/src/tools/skillsTool.js +282 -277
package/src/utilities/constants.js +19 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "onbuzz",
-  "version": "4.8.0",
+  "version": "4.8.1",
   "description": "Loxia OnBuzz - Your AI Fleet",
   "type": "module",
   "main": "src/index.js",

package/src/core/__tests__/agentScheduler.nativePromptPick.test.js ADDED Viewed

@@ -0,0 +1,319 @@
+/**
+ * Unit tests for `_resolveModelApiType` + `_pickSystemPromptForModel`
+ * on AgentScheduler — the runtime-side half of the "trim duplicated
+ * tool docs for Responses-API models" feature.
+ *
+ * Three concerns covered:
+ *   1. Classification (_resolveModelApiType) — every catalog shape the
+ *      backend's _inferRouting recognizes must produce the same answer
+ *      on the CLI side, or the optimization fires on the wrong models.
+ *   2. Selection (_pickSystemPromptForModel) — must fall back to
+ *      `agent.systemPrompt` for every safety-net path:
+ *         • modelsService missing                → fallback
+ *         • model not in catalog                 → fallback
+ *         • apiType resolves to chat_completion → fallback
+ *         • originalSystemPrompt missing         → fallback
+ *         • toolsRegistry missing                → fallback
+ *         • rebuild throws                       → fallback
+ *      Only the happy path (Responses model + everything available)
+ *      returns the trimmed rebuild.
+ *   3. Caching — same (agent, model) twice should rebuild ONCE; the
+ *      invalidator should clear entries for ONE agent only.
+ */
+import { jest, describe, test, expect, beforeEach } from '@jest/globals';
+// Stub the activity service so importing the scheduler doesn't drag in
+// the full agent dependency graph for these unit tests.
+jest.unstable_mockModule('../../services/agentActivityService.js', () => ({
+  shouldAgentBeActive: () => ({ active: false, reason: 'stub' }),
+  getActiveAgents: () => [],
+  shouldSkipIteration: () => false,
+}));
+const { default: AgentScheduler } = await import('../agentScheduler.js');
+// ─── Test-only helpers ────────────────────────────────────────────────
+function makeScheduler({ models = [], registry = null, logger = null } = {}) {
+  const modelsService = {
+    getModels: () => models,
+  };
+  const agentPool = {
+    toolsRegistry: registry,
+    getAllAgents: async () => new Map(),
+    getAgent: async () => null,
+  };
+  const aiService = {};
+  const messageProcessor = {};
+  const log = logger || {
+    info() {}, warn() {}, error() {}, debug() {},
+  };
+  return new AgentScheduler(
+    agentPool,
+    messageProcessor,
+    aiService,
+    log,
+    null,           // webSocketManager
+    null,           // modelRouterService
+    modelsService,  // ← what we care about
+  );
+}
+// A minimal fake registry that records the apiType it was called with
+// so we can assert the scheduler propagates it correctly.
+function makeRegistry({ shouldThrow = false } = {}) {
+  const calls = [];
+  return {
+    calls,
+    enhanceSystemPrompt(prompt, capabilities, options) {
+      calls.push({ prompt, capabilities, options });
+      if (shouldThrow) throw new Error('boom');
+      const apiTag = options?.apiType === 'responses' ? '[trimmed]' : '[full]';
+      return `${prompt}\n## TOOLS ${apiTag} for caps=${(capabilities || []).join(',')}`;
+    },
+  };
+}
+// ──────────────────────────────────────────────────────────────────────
+// 1. _resolveModelApiType — parity with backend's _inferRouting
+// ──────────────────────────────────────────────────────────────────────
+describe('_resolveModelApiType — catalog → routing decision', () => {
+  test('api_type=["responses"] alone → "responses"', () => {
+    const s = makeScheduler({ models: [{ name: 'codex-mini', api_type: ['responses'] }] });
+    expect(s._resolveModelApiType('codex-mini')).toBe('responses');
+  });
+  test('api_type=["chat_completion","responses"] (BOTH present) → "chat_completion"', () => {
+    // Backend rule: only flips to responses when chat_completion is ABSENT.
+    const s = makeScheduler({
+      models: [{ name: 'gpt-5', api_type: ['chat_completion', 'responses'] }],
+    });
+    expect(s._resolveModelApiType('gpt-5')).toBe('chat_completion');
+  });
+  test('api_type=["chat_completion"] → "chat_completion"', () => {
+    const s = makeScheduler({ models: [{ name: 'claude', api_type: ['chat_completion'] }] });
+    expect(s._resolveModelApiType('claude')).toBe('chat_completion');
+  });
+  test('capabilities.responses=="true" + chatCompletion=="false" → "responses"', () => {
+    const s = makeScheduler({
+      models: [{
+        name: 'o3',
+        api_type: ['responses'],
+        capabilities: { responses: 'true', chatCompletion: 'false' },
+      }],
+    });
+    expect(s._resolveModelApiType('o3')).toBe('responses');
+  });
+  test('explicit useResponsesApi flag → "responses"', () => {
+    const s = makeScheduler({
+      models: [{ name: 'foo', api_type: [], useResponsesApi: true }],
+    });
+    expect(s._resolveModelApiType('foo')).toBe('responses');
+  });
+  test('name-pattern fallback: "codex" → "responses" even with no catalog data', () => {
+    const s = makeScheduler({ models: [{ name: 'gpt-5-1-codex-mini' }] });
+    expect(s._resolveModelApiType('gpt-5-1-codex-mini')).toBe('responses');
+  });
+  test('name-pattern fallback: "gpt-X-pro" → "responses"', () => {
+    const s = makeScheduler({ models: [{ name: 'gpt-5-pro' }] });
+    expect(s._resolveModelApiType('gpt-5-pro')).toBe('responses');
+  });
+  test('unknown model returns undefined (caller falls back to old behaviour)', () => {
+    const s = makeScheduler({ models: [{ name: 'claude', api_type: ['chat_completion'] }] });
+    expect(s._resolveModelApiType('mystery-model')).toBeUndefined();
+  });
+  test('modelsService missing → undefined (safe — caller falls back)', () => {
+    const s = new AgentScheduler({ toolsRegistry: null }, {}, {}, {
+      info() {}, warn() {}, error() {}, debug() {},
+    });
+    expect(s._resolveModelApiType('codex-mini')).toBeUndefined();
+  });
+  test('arbitrary exception in catalog → undefined (defensive)', () => {
+    // getModels() throws — must not propagate.
+    const s = new AgentScheduler(
+      { toolsRegistry: null },
+      {},
+      {},
+      { info() {}, warn() {}, error() {}, debug() {} },
+      null,
+      null,
+      { getModels() { throw new Error('catalog offline'); } },
+    );
+    expect(s._resolveModelApiType('codex-mini')).toBeUndefined();
+  });
+});
+// ──────────────────────────────────────────────────────────────────────
+// 2. _pickSystemPromptForModel — happy path + every safety-net path
+// ──────────────────────────────────────────────────────────────────────
+describe('_pickSystemPromptForModel — back-compat fallbacks', () => {
+  const BAKED_PROMPT = 'BAKED: agent persona\n## AVAILABLE TOOLS\n…lots of text…';
+  const ORIGINAL_PROMPT = 'Agent persona';
+  const agentFor = (overrides = {}) => ({
+    id: 'agent-1',
+    systemPrompt: BAKED_PROMPT,
+    originalSystemPrompt: ORIGINAL_PROMPT,
+    capabilities: ['memory', 'terminal'],
+    skills: [],
+    ...overrides,
+  });
+  test('chat-completion model → returns agent.systemPrompt verbatim', async () => {
+    const s = makeScheduler({
+      models: [{ name: 'claude', api_type: ['chat_completion'] }],
+      registry: makeRegistry(),
+    });
+    const out = await s._pickSystemPromptForModel(agentFor(), 'claude');
+    expect(out).toBe(BAKED_PROMPT);
+  });
+  test('unknown model → returns agent.systemPrompt verbatim', async () => {
+    const s = makeScheduler({ models: [], registry: makeRegistry() });
+    const out = await s._pickSystemPromptForModel(agentFor(), 'never-heard-of');
+    expect(out).toBe(BAKED_PROMPT);
+  });
+  test('Responses model BUT originalSystemPrompt missing → fallback', async () => {
+    // Very old persisted agent (pre-originalSystemPrompt storage).
+    const s = makeScheduler({
+      models: [{ name: 'codex', api_type: ['responses'] }],
+      registry: makeRegistry(),
+    });
+    const out = await s._pickSystemPromptForModel(
+      agentFor({ originalSystemPrompt: undefined }),
+      'codex',
+    );
+    expect(out).toBe(BAKED_PROMPT);
+  });
+  test('Responses model BUT toolsRegistry missing → fallback', async () => {
+    const s = makeScheduler({
+      models: [{ name: 'codex', api_type: ['responses'] }],
+      registry: null,
+    });
+    const out = await s._pickSystemPromptForModel(agentFor(), 'codex');
+    expect(out).toBe(BAKED_PROMPT);
+  });
+  test('Responses model AND rebuild throws → fallback (no crash)', async () => {
+    const s = makeScheduler({
+      models: [{ name: 'codex', api_type: ['responses'] }],
+      registry: makeRegistry({ shouldThrow: true }),
+    });
+    const out = await s._pickSystemPromptForModel(agentFor(), 'codex');
+    expect(out).toBe(BAKED_PROMPT);
+  });
+});
+// ──────────────────────────────────────────────────────────────────────
+// 3. _pickSystemPromptForModel — happy path
+// ──────────────────────────────────────────────────────────────────────
+describe('_pickSystemPromptForModel — Responses model rebuild', () => {
+  const agent = {
+    id: 'agent-1',
+    systemPrompt: 'STALE (chat-completion shape)',
+    originalSystemPrompt: 'Agent persona',
+    capabilities: ['memory', 'terminal'],
+    skills: [],
+  };
+  test('rebuilds with apiType:"responses" and returns trimmed prompt', async () => {
+    const reg = makeRegistry();
+    const s = makeScheduler({
+      models: [{ name: 'codex', api_type: ['responses'] }],
+      registry: reg,
+    });
+    const out = await s._pickSystemPromptForModel(agent, 'codex');
+    expect(out).toContain('[trimmed]');
+    expect(out).toContain('Agent persona');
+    // The registry was called with apiType: 'responses' AND the agent's capabilities.
+    expect(reg.calls).toHaveLength(1);
+    expect(reg.calls[0].options.apiType).toBe('responses');
+    expect(reg.calls[0].capabilities).toEqual(['memory', 'terminal']);
+  });
+  test('caches per (agentId, modelName) — second call does NOT rebuild', async () => {
+    const reg = makeRegistry();
+    const s = makeScheduler({
+      models: [{ name: 'codex', api_type: ['responses'] }],
+      registry: reg,
+    });
+    await s._pickSystemPromptForModel(agent, 'codex');
+    await s._pickSystemPromptForModel(agent, 'codex');
+    await s._pickSystemPromptForModel(agent, 'codex');
+    expect(reg.calls).toHaveLength(1);
+  });
+  test('different models for same agent → SEPARATE cache entries', async () => {
+    const reg = makeRegistry();
+    const s = makeScheduler({
+      models: [
+        { name: 'codex', api_type: ['responses'] },
+        { name: 'gpt-5-pro', api_type: ['responses'] },
+      ],
+      registry: reg,
+    });
+    await s._pickSystemPromptForModel(agent, 'codex');
+    await s._pickSystemPromptForModel(agent, 'gpt-5-pro');
+    expect(reg.calls).toHaveLength(2);
+  });
+  test('switching back to chat-completion mid-session uses the persisted prompt unchanged', async () => {
+    const reg = makeRegistry();
+    const s = makeScheduler({
+      models: [
+        { name: 'codex',  api_type: ['responses'] },
+        { name: 'claude', api_type: ['chat_completion'] },
+      ],
+      registry: reg,
+    });
+    const native = await s._pickSystemPromptForModel(agent, 'codex');
+    const inline = await s._pickSystemPromptForModel(agent, 'claude');
+    expect(native).toContain('[trimmed]');
+    expect(inline).toBe('STALE (chat-completion shape)');  // persisted, untouched
+    expect(reg.calls).toHaveLength(1);                     // only the codex rebuild ran
+  });
+});
+// ──────────────────────────────────────────────────────────────────────
+// 4. Cache invalidation
+// ──────────────────────────────────────────────────────────────────────
+describe('_invalidateNativePromptCache — selective per-agent clear', () => {
+  test('clears entries for ONE agent only, leaves others alone', async () => {
+    const reg = makeRegistry();
+    const s = makeScheduler({
+      models: [{ name: 'codex', api_type: ['responses'] }],
+      registry: reg,
+    });
+    const a1 = { id: 'a1', systemPrompt: 'p1', originalSystemPrompt: 'persona-1', capabilities: ['memory'], skills: [] };
+    const a2 = { id: 'a2', systemPrompt: 'p2', originalSystemPrompt: 'persona-2', capabilities: ['terminal'], skills: [] };
+    await s._pickSystemPromptForModel(a1, 'codex');
+    await s._pickSystemPromptForModel(a2, 'codex');
+    expect(reg.calls).toHaveLength(2);
+    s._invalidateNativePromptCache('a1');
+    // Re-fetching a1 → rebuild. a2 → still cached.
+    await s._pickSystemPromptForModel(a1, 'codex');
+    await s._pickSystemPromptForModel(a2, 'codex');
+    expect(reg.calls).toHaveLength(3); // only a1 rebuilt
+  });
+  test('invalidating for an agent that never rendered is a no-op (does not throw)', async () => {
+    const s = makeScheduler({ models: [], registry: makeRegistry() });
+    expect(() => s._invalidateNativePromptCache('never-seen')).not.toThrow();
+  });
+});

package/src/core/agentPool.js CHANGED Viewed

@@ -380,6 +380,18 @@ class AgentPool {
           originalLength: baseSystemPrompt?.length || 0,
           enhancedLength: enhancedSystemPrompt?.length || 0
         });
+        // The scheduler caches per-(agent, model) Responses-API prompts
+        // built from this agent's `originalSystemPrompt` + capabilities.
+        // Both inputs just changed, so any cached rebuilds are stale.
+        // No-op when the scheduler isn't attached (tests / very-early
+        // boot) or when it predates this method (old binaries during
+        // a rolling upgrade).
+        try {
+          this.scheduler?._invalidateNativePromptCache?.(agentId);
+        } catch (e) {
+          this.logger.debug?.('Failed to invalidate native prompt cache', { agentId, error: e.message });
+        }
       } catch (error) {
         this.logger.error(`Failed to regenerate system prompt with updated capabilities`, {
           agentId,

package/src/core/agentScheduler.js CHANGED Viewed

@@ -60,6 +60,19 @@ class AgentScheduler {
     // Initialize ContextInjectionService for file attachments
     this.contextInjectionService = new ContextInjectionService({}, logger);
+    // Per-turn system-prompt rebuild cache for native-API models.
+    // Agents persist a `systemPrompt` baked at create-time for the
+    // chat-completion shape (text descriptions of every tool). When a
+    // turn targets a Responses-API model (Codex / o-series / gpt-5-pro),
+    // we want a TRIMMED prompt that omits text docs for tools whose
+    // structured schemas are sent in `tools:`. Rebuilding fresh each
+    // turn would be wasteful — agents typically stay on the same model
+    // for many turns — so we memoize per (agentId, modelName).
+    //
+    // Cleared on process restart and on agent updates that change the
+    // base prompt or capabilities (see `_invalidateNativePromptCache`).
+    this._nativePromptCache = new Map();   // `${agentId}|${modelName}` → string
     // Initialize FlowContextService for flow execution context
     this.flowContextService = new FlowContextService({}, logger);
@@ -1919,8 +1932,17 @@ class AgentScheduler {
       // After compaction, retrieve messages from AgentPool (will use compacted if available)
       const messagesToSend = await this.agentPool.getMessagesForAI(agentId, targetModel);
-      // Inject TaskManager instructions for AGENT mode
-      let enhancedSystemPrompt = agent.systemPrompt;
+      // ── Pick the right system-prompt shape for the target model ──
+      // Default: use the agent's persisted `systemPrompt` (baked at
+      // create-time with full text descriptions for every tool — the
+      // chat-completion shape). For models that use the Responses API
+      // (native function-calling), rebuild a trimmed version that
+      // omits text docs for tools whose structured schemas we send in
+      // `tools:`. Falls back to the persisted prompt whenever the
+      // model's apiType is unknown OR the agent has no stored original
+      // prompt — preserves existing behaviour for old agents and
+      // unknown models. See `_pickSystemPromptForModel`.
+      let enhancedSystemPrompt = await this._pickSystemPromptForModel(agent, targetModel);
       if (agent.mode === AGENT_MODES.AGENT) {
         const taskManagerInstruction = "\n\nIMPORTANT: You are in AGENT mode. The use of TaskManager tool is mandatory.\n\n" +
           "TASK LIFECYCLE (follow this, don't improvise):\n" +
@@ -2169,6 +2191,156 @@ class AgentScheduler {
     }
   }
+  /**
+   * Choose the right base system prompt for the target model.
+   *
+   *   • If the model's catalog entry says it uses the Responses API
+   *     ('responses' in its api_type / capabilities) AND the agent has
+   *     an `originalSystemPrompt` we can rebuild from, return a
+   *     freshly-built prompt that omits text descriptions for tools
+   *     with native function schemas (see baseTool.js — those tools'
+   *     structured schemas in `tools:` are the canonical source for
+   *     these models, so the text docs are pure duplication).
+   *
+   *   • Otherwise return the agent's persisted `systemPrompt` exactly
+   *     as it is today. This covers:
+   *       – chat-completion models (no native function calling)
+   *       – models we can't classify (modelsService offline / catalog
+   *         field missing) — fail safe to old behaviour
+   *       – very old agents persisted before `originalSystemPrompt`
+   *         was stored — fail safe to old behaviour
+   *
+   * Result is memoized per `(agentId, targetModel)` to avoid rebuilding
+   * on every turn. The cache is invalidated whenever the agent's base
+   * prompt or capabilities change (see `_invalidateNativePromptCache`).
+   *
+   * @private
+   * @param {Object} agent - Agent record
+   * @param {string} targetModel - Model name about to be called
+   * @returns {Promise<string>} The prompt to use as the base
+   */
+  async _pickSystemPromptForModel(agent, targetModel) {
+    // 1. Resolve the model's API type. Unknown → use persisted prompt.
+    const apiType = this._resolveModelApiType(targetModel);
+    if (apiType !== 'responses') return agent.systemPrompt;
+    // 2. Need the original (un-enhanced) prompt to rebuild from. Without
+    //    it we can't safely re-add the trimmed tool docs — fall back
+    //    to the persisted shape (which works for chat-completion and
+    //    is also accepted by Responses API, just with the duplication
+    //    cost). This is the back-compat path for legacy agents.
+    if (!agent.originalSystemPrompt) return agent.systemPrompt;
+    // 3. Cache lookup.
+    const cacheKey = `${agent.id}|${targetModel}`;
+    const cached = this._nativePromptCache.get(cacheKey);
+    if (cached) return cached;
+    // 4. Rebuild. The agentPool stores the toolsRegistry — reuse it so
+    //    we go through the exact same code path that built the original
+    //    prompt, just with apiType set. Skills index + the rest of the
+    //    augmentation must be reapplied; mirror what createAgent does.
+    try {
+      const registry = this.agentPool?.toolsRegistry;
+      if (!registry) return agent.systemPrompt;
+      let rebuilt = registry.enhanceSystemPrompt(
+        agent.originalSystemPrompt,
+        agent.capabilities || [],
+        { apiType: 'responses' },
+      );
+      // Re-inject ASSIGNED SKILLS block if present (createAgent appends
+      // this after enhanceSystemPrompt — see agentPool.js:108).
+      if (Array.isArray(agent.skills) && agent.skills.length > 0) {
+        try {
+          const { getSkillsService } = await import('../services/skillsService.js');
+          const skillsService = getSkillsService(this.logger);
+          await skillsService.initialize();
+          const summaries = await skillsService.getSkillSummaries(agent.skills);
+          if (summaries.length > 0) {
+            rebuilt += '\n\n## ASSIGNED SKILLS\n\n';
+            rebuilt += 'Use the skills tool to browse and load skill content. Use "describe" to see sections, "read-section" to load specific parts.\n\n';
+            for (const s of summaries) {
+              const sections = s.sections?.length ? `\n    Sections: ${s.sections.map(h => h.replace(/^#+\s*/, '')).join(', ')}` : '';
+              rebuilt += `- **${s.name}** (${s.lineCount} lines): ${s.description}${sections}\n`;
+            }
+          }
+        } catch (e) {
+          this.logger?.debug?.('Failed to re-inject skills index for native prompt', { error: e.message });
+        }
+      }
+      this._nativePromptCache.set(cacheKey, rebuilt);
+      this.logger?.debug?.('Built native-API system prompt', {
+        agentId: agent.id,
+        targetModel,
+        originalLength: agent.systemPrompt?.length || 0,
+        rebuiltLength: rebuilt.length,
+        savedTokensApprox: Math.round(((agent.systemPrompt?.length || 0) - rebuilt.length) / 4),
+      });
+      return rebuilt;
+    } catch (err) {
+      // Anything goes wrong → fall back to old behaviour. Failing
+      // closed (no prompt) would break the agent's turn; failing open
+      // (use chat-completion shape) just keeps the duplication.
+      this.logger?.warn?.('Native system-prompt rebuild failed — using persisted prompt', {
+        agentId: agent.id,
+        targetModel,
+        error: err.message,
+      });
+      return agent.systemPrompt;
+    }
+  }
+  /**
+   * Look up a model's API type from the catalog. Returns 'responses',
+   * 'chat_completion', or undefined when unknown. The catalog exposes
+   * `api_type` as an array and/or `capabilities.responses`/`capabilities.chatCompletion`
+   * — mirror the backend's _inferRouting precedence so the CLI's
+   * classification matches the backend's routing decision exactly.
+   * @private
+   */
+  _resolveModelApiType(modelName) {
+    try {
+      if (!this.modelsService || typeof this.modelsService.getModels !== 'function') return undefined;
+      const models = this.modelsService.getModels();
+      const m = models.find(x => x.name === modelName);
+      if (!m) return undefined;
+      const apiType = Array.isArray(m.api_type) ? m.api_type : (m.api_type ? [m.api_type] : []);
+      const caps = m.capabilities || {};
+      // Mirrors backend services/llmServiceFactory.js _inferRouting:
+      //   responses if api_type contains 'responses' AND not 'chat_completion'
+      //   OR capabilities.responses === 'true' / chatCompletion === 'false'
+      //   OR explicit useResponsesApi flag
+      if (apiType.includes('responses') && !apiType.includes('chat_completion')) return 'responses';
+      if (caps.chatCompletion === 'false' && (caps.responses === 'true' || apiType.includes('responses'))) return 'responses';
+      if (m.useResponsesApi) return 'responses';
+      // Name-based fallback (last resort — only when catalog has no routing data)
+      if (/codex/i.test(modelName) || /gpt.*-pro$/i.test(modelName)) return 'responses';
+      return 'chat_completion';
+    } catch (err) {
+      // Defensive — never block the turn on a classification failure.
+      this.logger?.debug?.('Model apiType resolution failed', { modelName, error: err.message });
+      return undefined;
+    }
+  }
+  /**
+   * Drop cached native prompts for an agent. Called by agentPool when
+   * the base prompt or capabilities change so the next turn rebuilds.
+   * Exposed so agentPool can call it without poking internal state.
+   */
+  _invalidateNativePromptCache(agentId) {
+    for (const key of this._nativePromptCache.keys()) {
+      if (key.startsWith(`${agentId}|`)) {
+        this._nativePromptCache.delete(key);
+      }
+    }
+  }
   /**
    * Get AI response using streaming with WebSocket broadcast
    * @param {string} agentId - Agent ID

package/src/services/__tests__/modelRouterNaming.test.js CHANGED Viewed

@@ -1,47 +1,65 @@
 /**
  * Regression tests for the router model name convention.
  *
- * Background: Dynamic Routing silently no-op'd for an entire release
- * because the CLI's `ROUTER_MODEL` constant was `'autopilot-model-router'`
- * while the catalog (the source of truth that the backend's /llm/chat
- * looks up) keys the entry as `'model-router'`. The catalog's regex
- * fallback is wrapped with ^…$ at index-build time in the backend
- * (services/modelCatalogService.js:109), so the `autopilot-` prefix
- * caused every routing-decision call to return 400, the
- * ModelRouterService caught + fell back to the current model, and no
- * routing ever happened.
+ * Background — two rounds of this same shape of bug:
  *
- * These tests pin the post-fix invariant and catch any reintroduction
- * of a product-name prefix in the future.
+ *   Round 1: Dynamic Routing silently no-op'd because ROUTER_MODEL
+ *   was `'autopilot-model-router'` while the catalog keyed the entry
+ *   as `'model-router'`. Fixed by changing the constant to the bare
+ *   form.
+ *
+ *   Round 2: Even the bare `'model-router'` key didn't exist in the
+ *   live catalog, because the catalog discovers deployments by
+ *   underlying-model-name (not by Azure deployment-name). The team's
+ *   actual deployment was named `autopilot-model-router` in Azure but
+ *   its underlying model is `gpt-4.1-nano` — so the live catalog
+ *   keys the entry under `gpt-4.1-nano`. The CLI now defaults to
+ *   that name.
+ *
+ * These tests pin the new invariant + the override mechanism + catch
+ * any reintroduction of a product-name prefix.
  *
  * Why a separate test file: the existing modelRouterService.test.js
  * mocks the constants module at the top of the file, so it cannot
  * assert anything about the REAL value of MODEL_ROUTER_CONFIG. This
  * file imports the real constants instead.
  */
-import { describe, test, expect } from '@jest/globals';
+import { describe, test, expect, beforeEach, afterEach } from '@jest/globals';
 import fs from 'node:fs';
 import path from 'node:path';
 import { fileURLToPath } from 'node:url';
-import { MODEL_ROUTER_CONFIG } from '../../utilities/constants.js';
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const SRC_ROOT  = path.resolve(__dirname, '../..');
 describe('Router model naming — matches catalog convention', () => {
-  test('ROUTER_MODEL is exactly "model-router"', () => {
-    // This is the modelKey in autopilot-model-catalog's models_registry.json.
-    // Any other value would force the backend through the regex fallback,
-    // which is anchored ^…$ and will reject product-prefixed forms.
-    expect(MODEL_ROUTER_CONFIG.ROUTER_MODEL).toBe('model-router');
+  test('default ROUTER_MODEL is "gpt-4.1-nano" (current live catalog key)', async () => {
+    // The autopilot-model-router deployment's underlying model is
+    // gpt-4.1-nano. The catalog keys entries by underlying model, so
+    // this is the canonical name the CLI must request.
+    delete process.env.LOXIA_ROUTER_MODEL;
+    // Re-import to pick up the (re-)evaluated default.
+    const fresh = await import(`../../utilities/constants.js?nocache=${Date.now()}`);
+    expect(fresh.MODEL_ROUTER_CONFIG.ROUTER_MODEL).toBe('gpt-4.1-nano');
+  });
+  test('LOXIA_ROUTER_MODEL env var overrides the default (no rebuild needed)', async () => {
+    process.env.LOXIA_ROUTER_MODEL = 'gpt-4o-mini';
+    try {
+      const fresh = await import(`../../utilities/constants.js?nocache=${Date.now()}`);
+      expect(fresh.MODEL_ROUTER_CONFIG.ROUTER_MODEL).toBe('gpt-4o-mini');
+    } finally {
+      delete process.env.LOXIA_ROUTER_MODEL;
+    }
   });
-  test('ROUTER_MODEL does NOT carry a product/brand prefix', () => {
-    // Defense-in-depth: even if the catalog modelKey ever changes,
-    // the name must not start with a product prefix like "autopilot-"
+  test('ROUTER_MODEL does NOT carry a product/brand prefix', async () => {
+    // Defense-in-depth: even if the canonical name ever changes,
+    // it must not start with a product prefix like "autopilot-"
     // or "onbuzz-". The catalog's canonical names are product-agnostic.
-    const v = MODEL_ROUTER_CONFIG.ROUTER_MODEL;
+    delete process.env.LOXIA_ROUTER_MODEL;
+    const fresh = await import(`../../utilities/constants.js?nocache=${Date.now()}`);
+    const v = fresh.MODEL_ROUTER_CONFIG.ROUTER_MODEL;
     expect(v).not.toMatch(/^autopilot[-_]/i);
     expect(v).not.toMatch(/^onbuzz[-_]/i);
     expect(v).not.toMatch(/^loxia[-_]/i);