onbuzz 4.8.0 → 4.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -219,7 +219,7 @@ ACTIONS:
219
219
 
220
220
  EXAMPLES:
221
221
 
222
- Sync task list (RECOMMENDED):
222
+ Sync task list (RECOMMENDED — but BEWARE: replaces the whole list):
223
223
  \`\`\`json
224
224
  {
225
225
  "toolId": "taskmanager",
@@ -234,6 +234,27 @@ Sync task list (RECOMMENDED):
234
234
  }
235
235
  \`\`\`
236
236
 
237
+ ⚠️ **DESTRUCTIVE-SYNC GUARDRAIL** — sync replaces the entire task list. Any existing pending/in_progress task whose title doesn't match an incoming task is dropped. If you don't intend to drop those tasks, sync will REFUSE the call and tell you which tasks were at risk.
238
+
239
+ To proceed with a destructive sync intentionally, add \`"confirmReplace": true\`:
240
+ \`\`\`json
241
+ {
242
+ "toolId": "taskmanager",
243
+ "actions": [{
244
+ "type": "sync",
245
+ "confirmReplace": true,
246
+ "tasks": [ /* the full new plan */ ]
247
+ }]
248
+ }
249
+ \`\`\`
250
+
251
+ **Before issuing sync, prefer one of these instead** (they don't risk dropping tasks):
252
+ - \`{"type": "list"}\` first — see what tasks already exist before deciding to replace them.
253
+ - \`{"type": "create", "title": "..."}\` — add a single task without touching the rest.
254
+ - \`{"type": "update", "taskId": "...", "status": "in_progress"}\` — change one task.
255
+
256
+ This matters most right after compaction: the conversation history that mentioned your previous sync may have been compressed, but your task list is still there. Always \`list\` before \`sync\` if you suspect context loss.
257
+
237
258
  Create a task:
238
259
  \`\`\`json
239
260
  {
@@ -650,7 +671,7 @@ Always use a detailed task description to provide context for the task, and leve
650
671
  * @private
651
672
  */
652
673
  async syncTasks(agent, params, context) {
653
- let { tasks } = params;
674
+ let { tasks, confirmReplace } = params;
654
675
 
655
676
  // Parse tasks if provided as JSON string
656
677
  if (typeof tasks === 'string') {
@@ -669,6 +690,55 @@ Always use a detailed task description to provide context for the task, and leve
669
690
  throw new Error('Tasks array cannot be empty');
670
691
  }
671
692
 
693
+ // ── DESTRUCTIVE-SYNC GUARDRAIL ───────────────────────────────────
694
+ // Real failure observed in production: an agent (post-compaction)
695
+ // lost track of its existing 9-task plan and called sync with a
696
+ // 4-task list of unrelated work. sync silently dropped all 9
697
+ // → agent built the wrong thing → user had to repeat themselves
698
+ // multiple times before the agent finally re-read the user request.
699
+ //
700
+ // Rule: if this sync would drop ANY pending or in_progress task
701
+ // whose title doesn't match an incoming task, refuse the call
702
+ // unless the agent explicitly passes `confirmReplace: true`.
703
+ // Completed/cancelled tasks can be silently pruned — they're done.
704
+ //
705
+ // Why this isn't too strict: matching is fuzzy (case-insensitive
706
+ // title compare; see findExistingTask below). An honest plan
707
+ // refinement that renames a few tasks will hit this guard, which
708
+ // is correct — the agent should acknowledge it's replacing work.
709
+ const existingTasksForGuard = agent.taskList?.tasks || [];
710
+ const incomingTitles = new Set(
711
+ tasks
712
+ .map(t => (t.title || '').toLowerCase().trim())
713
+ .filter(Boolean)
714
+ );
715
+ const dropped = existingTasksForGuard.filter(t => {
716
+ // Already terminal — safe to drop.
717
+ if (t.status === 'completed' || t.status === 'cancelled') return false;
718
+ // Match against any incoming title — same fuzzy rule used below.
719
+ return !incomingTitles.has((t.title || '').toLowerCase().trim());
720
+ });
721
+
722
+ if (dropped.length > 0 && confirmReplace !== true) {
723
+ const summary = dropped
724
+ .map(t => ` - [${t.status}] ${t.title}`)
725
+ .join('\n');
726
+ const hint = [
727
+ `Sync would drop ${dropped.length} non-terminal task(s) that don't match any incoming title:`,
728
+ summary,
729
+ '',
730
+ 'If this is intentional (you really mean to replace the plan), retry with `confirmReplace: true`.',
731
+ 'If you instead want to ADD tasks without dropping existing ones, use action "create" per task.',
732
+ 'If you want to keep an existing task, include its title verbatim in the incoming list.',
733
+ '',
734
+ 'This guardrail prevents post-compaction context loss from silently destroying in-flight work.',
735
+ ].join('\n');
736
+ const err = new Error(hint);
737
+ err.code = 'SYNC_WOULD_DROP_OPEN_TASKS';
738
+ err.droppedTasks = dropped.map(t => ({ id: t.id, title: t.title, status: t.status, priority: t.priority }));
739
+ throw err;
740
+ }
741
+
672
742
  const timestamp = new Date().toISOString();
673
743
  const existingTasks = agent.taskList.tasks || [];
674
744
  const updatedTasks = [];
@@ -26,8 +26,26 @@ const SYSTEM_DEFAULTS = {
26
26
  };
27
27
 
28
28
  // Model Router Configuration
29
+ //
30
+ // ROUTER_MODEL is the model the Dynamic Model Routing feature calls
31
+ // (via a cheap chat-completion request) to decide which "real" model
32
+ // should handle each turn. Resolution order:
33
+ // 1. env LOXIA_ROUTER_MODEL — operator override, no rebuild needed
34
+ // 2. 'gpt-4.1-nano' — current live default. The platform's
35
+ // autopilot-model-router deployment uses gpt-4.1-nano as its
36
+ // underlying model, and the model-catalog keys entries by the
37
+ // underlying model name (NOT the Azure deployment name), so the
38
+ // CLI must ask for 'gpt-4.1-nano' to be matched. Cheaper than the
39
+ // retired OpenAI 'model-router' product, same job.
40
+ //
41
+ // Historical note: this used to be the literal string 'model-router',
42
+ // matching an OpenAI product name. That product is no longer in our
43
+ // Azure catalog (no deployment keyed under that name), which caused
44
+ // every routing call to fail with HTTP 400 "Unsupported model:
45
+ // model-router" until the circuit breaker tripped. The fix migrates
46
+ // the default to the underlying model name that IS in the catalog.
29
47
  const MODEL_ROUTER_CONFIG = {
30
- ROUTER_MODEL: 'model-router', // Autopilot model router deployment
48
+ ROUTER_MODEL: process.env.LOXIA_ROUTER_MODEL || 'gpt-4.1-nano',
31
49
  CONTEXT_MESSAGES_COUNT: 5, // Number of recent messages to include
32
50
  BENCHMARK_REFRESH_INTERVAL: 3600000, // 1 hour in milliseconds
33
51
  FALLBACK_ON_ERROR: true, // Continue with previous model on router error