npm - onbuzz - Versions diffs - 4.8.0 → 4.8.2 - Mend

onbuzz 4.8.0 → 4.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/package.json +1 -1
package/src/core/__tests__/agentPool.test.js +185 -0
package/src/core/__tests__/agentScheduler.nativePromptPick.test.js +319 -0
package/src/core/__tests__/agentScheduler.taskListInjection.test.js +94 -0
package/src/core/agentPool.js +319 -0
package/src/core/agentScheduler.js +216 -2
package/src/services/__tests__/conversationCompactionService.test.js +141 -0
package/src/services/__tests__/modelRouterNaming.test.js +41 -23
package/src/services/conversationCompactionService.js +120 -46
package/src/tools/__tests__/baseTool.test.js +171 -0
package/src/tools/__tests__/codeMapTool.test.js +179 -0
package/src/tools/__tests__/taskManagerTool.test.js +141 -0
package/src/tools/baseTool.js +89 -1
package/src/tools/openaiFunctionSchemas.js +14 -0
package/src/tools/skillsTool.js +282 -277
package/src/tools/taskManagerTool.js +72 -2
package/src/utilities/constants.js +19 -1

package/src/tools/taskManagerTool.js CHANGED Viewed

@@ -219,7 +219,7 @@ ACTIONS:
 EXAMPLES:
-Sync task list (RECOMMENDED):
+Sync task list (RECOMMENDED — but BEWARE: replaces the whole list):
 \`\`\`json
 {
   "toolId": "taskmanager",
@@ -234,6 +234,27 @@ Sync task list (RECOMMENDED):
 }
 \`\`\`
+⚠️ **DESTRUCTIVE-SYNC GUARDRAIL** — sync replaces the entire task list. Any existing pending/in_progress task whose title doesn't match an incoming task is dropped. If you don't intend to drop those tasks, sync will REFUSE the call and tell you which tasks were at risk.
+To proceed with a destructive sync intentionally, add \`"confirmReplace": true\`:
+\`\`\`json
+{
+  "toolId": "taskmanager",
+  "actions": [{
+    "type": "sync",
+    "confirmReplace": true,
+    "tasks": [ /* the full new plan */ ]
+  }]
+}
+\`\`\`
+**Before issuing sync, prefer one of these instead** (they don't risk dropping tasks):
+- \`{"type": "list"}\` first — see what tasks already exist before deciding to replace them.
+- \`{"type": "create", "title": "..."}\` — add a single task without touching the rest.
+- \`{"type": "update", "taskId": "...", "status": "in_progress"}\` — change one task.
+This matters most right after compaction: the conversation history that mentioned your previous sync may have been compressed, but your task list is still there. Always \`list\` before \`sync\` if you suspect context loss.
 Create a task:
 \`\`\`json
 {
@@ -650,7 +671,7 @@ Always use a detailed task description to provide context for the task, and leve
    * @private
    */
   async syncTasks(agent, params, context) {
-    let { tasks } = params;
+    let { tasks, confirmReplace } = params;
     // Parse tasks if provided as JSON string
     if (typeof tasks === 'string') {
@@ -669,6 +690,55 @@ Always use a detailed task description to provide context for the task, and leve
       throw new Error('Tasks array cannot be empty');
     }
+    // ── DESTRUCTIVE-SYNC GUARDRAIL ───────────────────────────────────
+    // Real failure observed in production: an agent (post-compaction)
+    // lost track of its existing 9-task plan and called sync with a
+    // 4-task list of unrelated work. sync silently dropped all 9
+    // → agent built the wrong thing → user had to repeat themselves
+    // multiple times before the agent finally re-read the user request.
+    //
+    // Rule: if this sync would drop ANY pending or in_progress task
+    // whose title doesn't match an incoming task, refuse the call
+    // unless the agent explicitly passes `confirmReplace: true`.
+    // Completed/cancelled tasks can be silently pruned — they're done.
+    //
+    // Why this isn't too strict: matching is fuzzy (case-insensitive
+    // title compare; see findExistingTask below). An honest plan
+    // refinement that renames a few tasks will hit this guard, which
+    // is correct — the agent should acknowledge it's replacing work.
+    const existingTasksForGuard = agent.taskList?.tasks || [];
+    const incomingTitles = new Set(
+      tasks
+        .map(t => (t.title || '').toLowerCase().trim())
+        .filter(Boolean)
+    );
+    const dropped = existingTasksForGuard.filter(t => {
+      // Already terminal — safe to drop.
+      if (t.status === 'completed' || t.status === 'cancelled') return false;
+      // Match against any incoming title — same fuzzy rule used below.
+      return !incomingTitles.has((t.title || '').toLowerCase().trim());
+    });
+    if (dropped.length > 0 && confirmReplace !== true) {
+      const summary = dropped
+        .map(t => `  - [${t.status}] ${t.title}`)
+        .join('\n');
+      const hint = [
+        `Sync would drop ${dropped.length} non-terminal task(s) that don't match any incoming title:`,
+        summary,
+        '',
+        'If this is intentional (you really mean to replace the plan), retry with `confirmReplace: true`.',
+        'If you instead want to ADD tasks without dropping existing ones, use action "create" per task.',
+        'If you want to keep an existing task, include its title verbatim in the incoming list.',
+        '',
+        'This guardrail prevents post-compaction context loss from silently destroying in-flight work.',
+      ].join('\n');
+      const err = new Error(hint);
+      err.code = 'SYNC_WOULD_DROP_OPEN_TASKS';
+      err.droppedTasks = dropped.map(t => ({ id: t.id, title: t.title, status: t.status, priority: t.priority }));
+      throw err;
+    }
     const timestamp = new Date().toISOString();
     const existingTasks = agent.taskList.tasks || [];
     const updatedTasks = [];

package/src/utilities/constants.js CHANGED Viewed

@@ -26,8 +26,26 @@ const SYSTEM_DEFAULTS = {
 };
 // Model Router Configuration
+//
+// ROUTER_MODEL is the model the Dynamic Model Routing feature calls
+// (via a cheap chat-completion request) to decide which "real" model
+// should handle each turn. Resolution order:
+//   1. env LOXIA_ROUTER_MODEL — operator override, no rebuild needed
+//   2. 'gpt-4.1-nano' — current live default. The platform's
+//      autopilot-model-router deployment uses gpt-4.1-nano as its
+//      underlying model, and the model-catalog keys entries by the
+//      underlying model name (NOT the Azure deployment name), so the
+//      CLI must ask for 'gpt-4.1-nano' to be matched. Cheaper than the
+//      retired OpenAI 'model-router' product, same job.
+//
+// Historical note: this used to be the literal string 'model-router',
+// matching an OpenAI product name. That product is no longer in our
+// Azure catalog (no deployment keyed under that name), which caused
+// every routing call to fail with HTTP 400 "Unsupported model:
+// model-router" until the circuit breaker tripped. The fix migrates
+// the default to the underlying model name that IS in the catalog.
 const MODEL_ROUTER_CONFIG = {
-  ROUTER_MODEL: 'model-router', // Autopilot model router deployment
+  ROUTER_MODEL: process.env.LOXIA_ROUTER_MODEL || 'gpt-4.1-nano',
   CONTEXT_MESSAGES_COUNT: 5, // Number of recent messages to include
   BENCHMARK_REFRESH_INTERVAL: 3600000, // 1 hour in milliseconds
   FALLBACK_ON_ERROR: true, // Continue with previous model on router error