onbuzz 4.8.0 → 4.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/core/__tests__/agentPool.test.js +185 -0
- package/src/core/__tests__/agentScheduler.nativePromptPick.test.js +319 -0
- package/src/core/__tests__/agentScheduler.taskListInjection.test.js +94 -0
- package/src/core/agentPool.js +319 -0
- package/src/core/agentScheduler.js +216 -2
- package/src/services/__tests__/conversationCompactionService.test.js +141 -0
- package/src/services/__tests__/modelRouterNaming.test.js +41 -23
- package/src/services/conversationCompactionService.js +120 -46
- package/src/tools/__tests__/baseTool.test.js +171 -0
- package/src/tools/__tests__/codeMapTool.test.js +179 -0
- package/src/tools/__tests__/taskManagerTool.test.js +141 -0
- package/src/tools/baseTool.js +89 -1
- package/src/tools/openaiFunctionSchemas.js +14 -0
- package/src/tools/skillsTool.js +282 -277
- package/src/tools/taskManagerTool.js +72 -2
- package/src/utilities/constants.js +19 -1
|
@@ -219,7 +219,7 @@ ACTIONS:
|
|
|
219
219
|
|
|
220
220
|
EXAMPLES:
|
|
221
221
|
|
|
222
|
-
Sync task list (RECOMMENDED):
|
|
222
|
+
Sync task list (RECOMMENDED — but BEWARE: replaces the whole list):
|
|
223
223
|
\`\`\`json
|
|
224
224
|
{
|
|
225
225
|
"toolId": "taskmanager",
|
|
@@ -234,6 +234,27 @@ Sync task list (RECOMMENDED):
|
|
|
234
234
|
}
|
|
235
235
|
\`\`\`
|
|
236
236
|
|
|
237
|
+
⚠️ **DESTRUCTIVE-SYNC GUARDRAIL** — sync replaces the entire task list. Any existing pending/in_progress task whose title doesn't match an incoming task is dropped. If you don't intend to drop those tasks, sync will REFUSE the call and tell you which tasks were at risk.
|
|
238
|
+
|
|
239
|
+
To proceed with a destructive sync intentionally, add \`"confirmReplace": true\`:
|
|
240
|
+
\`\`\`json
|
|
241
|
+
{
|
|
242
|
+
"toolId": "taskmanager",
|
|
243
|
+
"actions": [{
|
|
244
|
+
"type": "sync",
|
|
245
|
+
"confirmReplace": true,
|
|
246
|
+
"tasks": [ /* the full new plan */ ]
|
|
247
|
+
}]
|
|
248
|
+
}
|
|
249
|
+
\`\`\`
|
|
250
|
+
|
|
251
|
+
**Before issuing sync, prefer one of these instead** (they don't risk dropping tasks):
|
|
252
|
+
- \`{"type": "list"}\` first — see what tasks already exist before deciding to replace them.
|
|
253
|
+
- \`{"type": "create", "title": "..."}\` — add a single task without touching the rest.
|
|
254
|
+
- \`{"type": "update", "taskId": "...", "status": "in_progress"}\` — change one task.
|
|
255
|
+
|
|
256
|
+
This matters most right after compaction: the conversation history that mentioned your previous sync may have been compressed, but your task list is still there. Always \`list\` before \`sync\` if you suspect context loss.
|
|
257
|
+
|
|
237
258
|
Create a task:
|
|
238
259
|
\`\`\`json
|
|
239
260
|
{
|
|
@@ -650,7 +671,7 @@ Always use a detailed task description to provide context for the task, and leve
|
|
|
650
671
|
* @private
|
|
651
672
|
*/
|
|
652
673
|
async syncTasks(agent, params, context) {
|
|
653
|
-
let { tasks } = params;
|
|
674
|
+
let { tasks, confirmReplace } = params;
|
|
654
675
|
|
|
655
676
|
// Parse tasks if provided as JSON string
|
|
656
677
|
if (typeof tasks === 'string') {
|
|
@@ -669,6 +690,55 @@ Always use a detailed task description to provide context for the task, and leve
|
|
|
669
690
|
throw new Error('Tasks array cannot be empty');
|
|
670
691
|
}
|
|
671
692
|
|
|
693
|
+
// ── DESTRUCTIVE-SYNC GUARDRAIL ───────────────────────────────────
|
|
694
|
+
// Real failure observed in production: an agent (post-compaction)
|
|
695
|
+
// lost track of its existing 9-task plan and called sync with a
|
|
696
|
+
// 4-task list of unrelated work. sync silently dropped all 9
|
|
697
|
+
// → agent built the wrong thing → user had to repeat themselves
|
|
698
|
+
// multiple times before the agent finally re-read the user request.
|
|
699
|
+
//
|
|
700
|
+
// Rule: if this sync would drop ANY pending or in_progress task
|
|
701
|
+
// whose title doesn't match an incoming task, refuse the call
|
|
702
|
+
// unless the agent explicitly passes `confirmReplace: true`.
|
|
703
|
+
// Completed/cancelled tasks can be silently pruned — they're done.
|
|
704
|
+
//
|
|
705
|
+
// Why this isn't too strict: matching is fuzzy (case-insensitive
|
|
706
|
+
// title compare; see findExistingTask below). An honest plan
|
|
707
|
+
// refinement that renames a few tasks will hit this guard, which
|
|
708
|
+
// is correct — the agent should acknowledge it's replacing work.
|
|
709
|
+
const existingTasksForGuard = agent.taskList?.tasks || [];
|
|
710
|
+
const incomingTitles = new Set(
|
|
711
|
+
tasks
|
|
712
|
+
.map(t => (t.title || '').toLowerCase().trim())
|
|
713
|
+
.filter(Boolean)
|
|
714
|
+
);
|
|
715
|
+
const dropped = existingTasksForGuard.filter(t => {
|
|
716
|
+
// Already terminal — safe to drop.
|
|
717
|
+
if (t.status === 'completed' || t.status === 'cancelled') return false;
|
|
718
|
+
// Match against any incoming title — same fuzzy rule used below.
|
|
719
|
+
return !incomingTitles.has((t.title || '').toLowerCase().trim());
|
|
720
|
+
});
|
|
721
|
+
|
|
722
|
+
if (dropped.length > 0 && confirmReplace !== true) {
|
|
723
|
+
const summary = dropped
|
|
724
|
+
.map(t => ` - [${t.status}] ${t.title}`)
|
|
725
|
+
.join('\n');
|
|
726
|
+
const hint = [
|
|
727
|
+
`Sync would drop ${dropped.length} non-terminal task(s) that don't match any incoming title:`,
|
|
728
|
+
summary,
|
|
729
|
+
'',
|
|
730
|
+
'If this is intentional (you really mean to replace the plan), retry with `confirmReplace: true`.',
|
|
731
|
+
'If you instead want to ADD tasks without dropping existing ones, use action "create" per task.',
|
|
732
|
+
'If you want to keep an existing task, include its title verbatim in the incoming list.',
|
|
733
|
+
'',
|
|
734
|
+
'This guardrail prevents post-compaction context loss from silently destroying in-flight work.',
|
|
735
|
+
].join('\n');
|
|
736
|
+
const err = new Error(hint);
|
|
737
|
+
err.code = 'SYNC_WOULD_DROP_OPEN_TASKS';
|
|
738
|
+
err.droppedTasks = dropped.map(t => ({ id: t.id, title: t.title, status: t.status, priority: t.priority }));
|
|
739
|
+
throw err;
|
|
740
|
+
}
|
|
741
|
+
|
|
672
742
|
const timestamp = new Date().toISOString();
|
|
673
743
|
const existingTasks = agent.taskList.tasks || [];
|
|
674
744
|
const updatedTasks = [];
|
|
@@ -26,8 +26,26 @@ const SYSTEM_DEFAULTS = {
|
|
|
26
26
|
};
|
|
27
27
|
|
|
28
28
|
// Model Router Configuration
|
|
29
|
+
//
|
|
30
|
+
// ROUTER_MODEL is the model the Dynamic Model Routing feature calls
|
|
31
|
+
// (via a cheap chat-completion request) to decide which "real" model
|
|
32
|
+
// should handle each turn. Resolution order:
|
|
33
|
+
// 1. env LOXIA_ROUTER_MODEL — operator override, no rebuild needed
|
|
34
|
+
// 2. 'gpt-4.1-nano' — current live default. The platform's
|
|
35
|
+
// autopilot-model-router deployment uses gpt-4.1-nano as its
|
|
36
|
+
// underlying model, and the model-catalog keys entries by the
|
|
37
|
+
// underlying model name (NOT the Azure deployment name), so the
|
|
38
|
+
// CLI must ask for 'gpt-4.1-nano' to be matched. Cheaper than the
|
|
39
|
+
// retired OpenAI 'model-router' product, same job.
|
|
40
|
+
//
|
|
41
|
+
// Historical note: this used to be the literal string 'model-router',
|
|
42
|
+
// matching an OpenAI product name. That product is no longer in our
|
|
43
|
+
// Azure catalog (no deployment keyed under that name), which caused
|
|
44
|
+
// every routing call to fail with HTTP 400 "Unsupported model:
|
|
45
|
+
// model-router" until the circuit breaker tripped. The fix migrates
|
|
46
|
+
// the default to the underlying model name that IS in the catalog.
|
|
29
47
|
const MODEL_ROUTER_CONFIG = {
|
|
30
|
-
ROUTER_MODEL: '
|
|
48
|
+
ROUTER_MODEL: process.env.LOXIA_ROUTER_MODEL || 'gpt-4.1-nano',
|
|
31
49
|
CONTEXT_MESSAGES_COUNT: 5, // Number of recent messages to include
|
|
32
50
|
BENCHMARK_REFRESH_INTERVAL: 3600000, // 1 hour in milliseconds
|
|
33
51
|
FALLBACK_ON_ERROR: true, // Continue with previous model on router error
|