onbuzz 4.8.0 → 4.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/core/__tests__/agentScheduler.nativePromptPick.test.js +319 -0
- package/src/core/agentPool.js +12 -0
- package/src/core/agentScheduler.js +174 -2
- package/src/services/__tests__/modelRouterNaming.test.js +41 -23
- package/src/tools/__tests__/baseTool.test.js +142 -0
- package/src/tools/baseTool.js +83 -1
- package/src/tools/openaiFunctionSchemas.js +14 -0
- package/src/tools/skillsTool.js +282 -277
- package/src/utilities/constants.js +19 -1
|
@@ -364,6 +364,148 @@ describe('ToolsRegistry', () => {
|
|
|
364
364
|
expect(desc).toContain('HOW TO GET TOOL DOCUMENTATION');
|
|
365
365
|
});
|
|
366
366
|
|
|
367
|
+
describe('OPERATING POSTURE section', () => {
|
|
368
|
+
// Minimal fakes so the registry will accept these as memory/skills/taskmanager.
|
|
369
|
+
// BaseTool derives `this.id` from the class name (lowercased, with
|
|
370
|
+
// "Tool" stripped) — so the class must be named exactly *Tool* and
|
|
371
|
+
// the id is derived. We override `this.id` after super() to pin it
|
|
372
|
+
// independently of the class name, which keeps the test classes
|
|
373
|
+
// readable. validateTool() also requires parseParameters.
|
|
374
|
+
class FakeMemoryTool extends BaseTool {
|
|
375
|
+
constructor() { super(); this.id = 'memory'; }
|
|
376
|
+
getDescription() { return 'Memory tool stub'; }
|
|
377
|
+
parseParameters() { return {}; }
|
|
378
|
+
async execute() { return { ok: true }; }
|
|
379
|
+
}
|
|
380
|
+
class FakeSkillsTool extends BaseTool {
|
|
381
|
+
constructor() { super(); this.id = 'skills'; }
|
|
382
|
+
getDescription() { return 'Skills tool stub'; }
|
|
383
|
+
parseParameters() { return {}; }
|
|
384
|
+
async execute() { return { ok: true }; }
|
|
385
|
+
}
|
|
386
|
+
class FakeTaskManagerTool extends BaseTool {
|
|
387
|
+
constructor() { super(); this.id = 'taskmanager'; }
|
|
388
|
+
getDescription() { return 'TaskManager tool stub'; }
|
|
389
|
+
parseParameters() { return {}; }
|
|
390
|
+
async execute() { return { ok: true }; }
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
test('appears when memory tool is in capabilities (proactive memory nudge)', async () => {
|
|
394
|
+
await registry.registerTool(FakeMemoryTool);
|
|
395
|
+
const desc = registry.generateToolDescriptionsForPrompt(['memory']);
|
|
396
|
+
expect(desc).toContain('OPERATING POSTURE');
|
|
397
|
+
expect(desc).toMatch(/memory.*list/i);
|
|
398
|
+
// Plan/* should be cross-referenced here so agents writing a plan
|
|
399
|
+
// memory isn't an isolated tip buried in the memory tool's own desc.
|
|
400
|
+
expect(desc).toContain('plan/');
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
test('appears when skills tool is in capabilities (proactive skills nudge)', async () => {
|
|
404
|
+
await registry.registerTool(FakeSkillsTool);
|
|
405
|
+
const desc = registry.generateToolDescriptionsForPrompt(['skills']);
|
|
406
|
+
expect(desc).toContain('OPERATING POSTURE');
|
|
407
|
+
expect(desc).toMatch(/skills.*list/i);
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
test('does NOT appear when neither memory nor skills is in capabilities', async () => {
|
|
411
|
+
await registry.registerTool(TestTool);
|
|
412
|
+
const desc = registry.generateToolDescriptionsForPrompt(['test']);
|
|
413
|
+
expect(desc).not.toContain('OPERATING POSTURE');
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
test('distinguishes memory vs taskmanager when both are present (so agents know which to use)', async () => {
|
|
417
|
+
await registry.registerTool(FakeMemoryTool);
|
|
418
|
+
await registry.registerTool(FakeTaskManagerTool);
|
|
419
|
+
const desc = registry.generateToolDescriptionsForPrompt(['memory', 'taskmanager']);
|
|
420
|
+
expect(desc).toContain('OPERATING POSTURE');
|
|
421
|
+
expect(desc).toContain('persistent knowledge'); // memory
|
|
422
|
+
expect(desc).toContain('step-by-step'); // taskmanager
|
|
423
|
+
});
|
|
424
|
+
});
|
|
425
|
+
|
|
426
|
+
// ── Per-model prompt shape: skip text docs for tools with native schemas
|
|
427
|
+
// when the target uses the Responses API (codex / o-series / gpt-5-pro).
|
|
428
|
+
describe('apiType="responses" — trims duplication with native function schemas', () => {
|
|
429
|
+
class FakeMemoryTool extends BaseTool {
|
|
430
|
+
constructor() { super(); this.id = 'memory'; }
|
|
431
|
+
getDescription() { return 'Memory tool stub with LONG description that would normally take many tokens'; }
|
|
432
|
+
getSummary() { return 'Persistent memory'; }
|
|
433
|
+
parseParameters() { return {}; }
|
|
434
|
+
async execute() { return { ok: true }; }
|
|
435
|
+
}
|
|
436
|
+
class FakeTerminalTool extends BaseTool {
|
|
437
|
+
constructor() { super(); this.id = 'terminal'; }
|
|
438
|
+
getDescription() { return 'Terminal tool LONG description that would normally take many tokens'; }
|
|
439
|
+
getSummary() { return 'Shell access'; }
|
|
440
|
+
parseParameters() { return {}; }
|
|
441
|
+
async execute() { return { ok: true }; }
|
|
442
|
+
}
|
|
443
|
+
class FakeWebTool extends BaseTool {
|
|
444
|
+
// 'web' is NOT in OPENAI_FUNCTION_SCHEMAS — its text doc must always appear.
|
|
445
|
+
constructor() { super(); this.id = 'web'; }
|
|
446
|
+
getDescription() { return 'Web tool LONG description that would normally take many tokens'; }
|
|
447
|
+
getSummary() { return 'Browser automation'; }
|
|
448
|
+
parseParameters() { return {}; }
|
|
449
|
+
async execute() { return { ok: true }; }
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
test('omits text description for tools that have native function schemas (memory, terminal)', async () => {
|
|
453
|
+
await registry.registerTool(FakeMemoryTool);
|
|
454
|
+
await registry.registerTool(FakeTerminalTool);
|
|
455
|
+
|
|
456
|
+
const responsesDesc = registry.generateToolDescriptionsForPrompt(
|
|
457
|
+
['memory', 'terminal'],
|
|
458
|
+
{ apiType: 'responses' },
|
|
459
|
+
);
|
|
460
|
+
// Header still present + one-line pointer to structured schema.
|
|
461
|
+
expect(responsesDesc).toContain('AVAILABLE TOOLS');
|
|
462
|
+
expect(responsesDesc).toContain('see structured schema');
|
|
463
|
+
// The big multi-line text doc must NOT be repeated.
|
|
464
|
+
expect(responsesDesc).not.toContain('### MEMORY TOOL');
|
|
465
|
+
expect(responsesDesc).not.toContain('### TERMINAL TOOL');
|
|
466
|
+
expect(responsesDesc).not.toContain('LONG description that would normally take many tokens');
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
test('keeps text description for tools that do NOT have native function schemas (e.g. web)', async () => {
|
|
470
|
+
await registry.registerTool(FakeWebTool);
|
|
471
|
+
const responsesDesc = registry.generateToolDescriptionsForPrompt(
|
|
472
|
+
['web'],
|
|
473
|
+
{ apiType: 'responses' },
|
|
474
|
+
);
|
|
475
|
+
// 'web' has no native schema → text doc MUST be present.
|
|
476
|
+
expect(responsesDesc).toContain('### WEB TOOL');
|
|
477
|
+
expect(responsesDesc).toContain('LONG description that would normally take many tokens');
|
|
478
|
+
});
|
|
479
|
+
|
|
480
|
+
test('BACKWARD COMPAT: without apiType option, behaves exactly as before (full text for everything)', async () => {
|
|
481
|
+
await registry.registerTool(FakeMemoryTool);
|
|
482
|
+
await registry.registerTool(FakeTerminalTool);
|
|
483
|
+
|
|
484
|
+
const defaultDesc = registry.generateToolDescriptionsForPrompt(['memory', 'terminal']);
|
|
485
|
+
// No apiType → keep the heavy text docs as today.
|
|
486
|
+
expect(defaultDesc).toContain('### MEMORY TOOL');
|
|
487
|
+
expect(defaultDesc).toContain('### TERMINAL TOOL');
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
test('BACKWARD COMPAT: apiType="chat_completion" is equivalent to no apiType', async () => {
|
|
491
|
+
await registry.registerTool(FakeMemoryTool);
|
|
492
|
+
const a = registry.generateToolDescriptionsForPrompt(['memory'], { apiType: 'chat_completion' });
|
|
493
|
+
const b = registry.generateToolDescriptionsForPrompt(['memory']);
|
|
494
|
+
expect(a).toBe(b);
|
|
495
|
+
});
|
|
496
|
+
|
|
497
|
+
test('enhanceSystemPrompt forwards apiType option to the description builder', async () => {
|
|
498
|
+
await registry.registerTool(FakeMemoryTool);
|
|
499
|
+
const native = registry.enhanceSystemPrompt('Base.', ['memory'], { apiType: 'responses' });
|
|
500
|
+
const inline = registry.enhanceSystemPrompt('Base.', ['memory']);
|
|
501
|
+
// Native form is meaningfully shorter (we dropped the per-tool block).
|
|
502
|
+
expect(native.length).toBeLessThan(inline.length);
|
|
503
|
+
// Both still contain the section headers and the original base prompt.
|
|
504
|
+
expect(native).toContain('Base.');
|
|
505
|
+
expect(native).toContain('AVAILABLE TOOLS');
|
|
506
|
+
});
|
|
507
|
+
});
|
|
508
|
+
|
|
367
509
|
test('enhanceSystemPrompt appends tool docs', async () => {
|
|
368
510
|
await registry.registerTool(TestTool);
|
|
369
511
|
const enhanced = registry.enhanceSystemPrompt('Base prompt.', []);
|
package/src/tools/baseTool.js
CHANGED
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
ERROR_TYPES,
|
|
16
16
|
SYSTEM_DEFAULTS
|
|
17
17
|
} from '../utilities/constants.js';
|
|
18
|
+
import { NATIVE_SCHEMA_TOOL_NAMES } from './openaiFunctionSchemas.js';
|
|
18
19
|
|
|
19
20
|
class BaseTool {
|
|
20
21
|
constructor(config = {}, logger = null) {
|
|
@@ -690,8 +691,20 @@ class ToolsRegistry {
|
|
|
690
691
|
includeUsageGuidelines = true,
|
|
691
692
|
includeSecurityNotes = true,
|
|
692
693
|
compact = false,
|
|
693
|
-
layered = false
|
|
694
|
+
layered = false,
|
|
695
|
+
// 'responses' | 'chat_completion' | undefined.
|
|
696
|
+
// When 'responses', the target model uses native function-calling
|
|
697
|
+
// (Codex / o-series / gpt-5-pro). For tools that have a native
|
|
698
|
+
// schema in openaiFunctionSchemas.js, the structured schema sent
|
|
699
|
+
// in `tools:` IS the canonical source of truth for the model —
|
|
700
|
+
// so we skip baking the same information into the system prompt
|
|
701
|
+
// as text. This eliminates ~3K duplicated tokens per turn on the
|
|
702
|
+
// models that need it most. Defaults to undefined (= 'chat_completion'
|
|
703
|
+
// behaviour: include text descriptions). Old callers that don't
|
|
704
|
+
// pass this option get the previous behaviour verbatim — back-compat.
|
|
705
|
+
apiType = undefined,
|
|
694
706
|
} = options;
|
|
707
|
+
const isNativeApi = apiType === 'responses';
|
|
695
708
|
|
|
696
709
|
// Get tools to include — always inject 'help' so agents can query tool docs
|
|
697
710
|
let toolIds = capabilities.length > 0
|
|
@@ -751,6 +764,19 @@ class ToolsRegistry {
|
|
|
751
764
|
const tool = this.tools.get(toolId);
|
|
752
765
|
if (!tool || !tool.isEnabled) continue;
|
|
753
766
|
|
|
767
|
+
// Skip text descriptions for tools that have a native function
|
|
768
|
+
// schema, when the target model uses the Responses API. The
|
|
769
|
+
// structured schema is the canonical source for these models.
|
|
770
|
+
// We DO still emit a one-line pointer so the agent isn't blind
|
|
771
|
+
// to the tool's existence (its capability list lives in the
|
|
772
|
+
// system prompt elsewhere too, but a single-line mention here
|
|
773
|
+
// costs ~10 tokens and is a useful breadcrumb).
|
|
774
|
+
if (isNativeApi && NATIVE_SCHEMA_TOOL_NAMES.has(toolId.toLowerCase())) {
|
|
775
|
+
const summary = this.toolSummaries.get(toolId) || `${toolId} tool`;
|
|
776
|
+
description += `- **${toolId}** — ${summary} (see structured schema)\n`;
|
|
777
|
+
continue;
|
|
778
|
+
}
|
|
779
|
+
|
|
754
780
|
try {
|
|
755
781
|
if (compact) {
|
|
756
782
|
// Compact format - just tool name and brief description
|
|
@@ -795,6 +821,58 @@ class ToolsRegistry {
|
|
|
795
821
|
description += '- **TOOL RESULTS ARE AVAILABLE ONLY AFTER YOUR MESSAGE ENDS**: Tools execute after your entire message is sent. You will NOT see any tool results until your next turn. This means: if the next tool call depends on results from a previous one, they MUST be in separate messages. You may batch independent tool calls in a single message, but never assume or guess the output of a tool — always wait for the actual result in the next turn before proceeding.\n\n';
|
|
796
822
|
description += 'After invoking a tool, WAIT for the actual response. Do NOT generate imaginary responses.\n\n';
|
|
797
823
|
|
|
824
|
+
// ── Operating posture ────────────────────────────────────────────
|
|
825
|
+
// Cross-cutting habits agents should adopt VOLUNTARILY. The tool
|
|
826
|
+
// descriptions tell them WHAT each tool does; this block tells them
|
|
827
|
+
// WHEN to reach for them without being asked. Without this, agents
|
|
828
|
+
// tend to:
|
|
829
|
+
// • skip the memory/skills check at the start of a new task,
|
|
830
|
+
// re-discovering things the team already wrote down
|
|
831
|
+
// • never create a plan/* memory, losing the thread across the
|
|
832
|
+
// first compaction
|
|
833
|
+
// • only invoke `help`/`skills` after a failure, not proactively
|
|
834
|
+
// Only emitted when the relevant tools are actually in the agent's
|
|
835
|
+
// capability set — no point teaching "check skills" to an agent
|
|
836
|
+
// that doesn't have the skills tool.
|
|
837
|
+
const hasMemory = toolIds.includes('memory');
|
|
838
|
+
const hasSkills = toolIds.includes('skills');
|
|
839
|
+
if (hasMemory || hasSkills) {
|
|
840
|
+
description += '## OPERATING POSTURE\n\n';
|
|
841
|
+
description += 'Treat these as habits, not optional extras. Use them proactively, before you need them.\n\n';
|
|
842
|
+
|
|
843
|
+
if (hasMemory || hasSkills) {
|
|
844
|
+
description += '**At the start of a new task or topic shift:**\n';
|
|
845
|
+
if (hasMemory) {
|
|
846
|
+
description += '- Run `memory` → `list` (titles only) to scan for relevant context the team or your past self stored. If a title looks relevant, `read` it before improvising.\n';
|
|
847
|
+
}
|
|
848
|
+
if (hasSkills) {
|
|
849
|
+
description += '- Run `skills` → `list` to see if a skill already encodes how to do this task. If yes, follow its checklist instead of inventing one.\n';
|
|
850
|
+
}
|
|
851
|
+
description += '\n';
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
if (hasMemory) {
|
|
855
|
+
description += '**When you recognize the work is multi-turn or multi-session:**\n';
|
|
856
|
+
description += '- Save a `memory` entry with title starting `plan/` (e.g. `plan/refactor-auth`). The content auto-injects into your system prompt every turn under "AGENT WORKING PLAN" until you delete it. This is how you survive compaction — anything important enough to remember next session belongs here.\n';
|
|
857
|
+
description += '- Update or delete the plan as the situation changes. A stale plan is worse than no plan.\n\n';
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
if (hasMemory) {
|
|
861
|
+
description += '**When you learn something durable** (a user preference, a non-obvious constraint, an architectural fact the next agent will want):\n';
|
|
862
|
+
description += '- Save it as a `memory` (non-`plan/` title). One-shot facts go here, NOT in your reply.\n\n';
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
description += '**Distinction:**\n';
|
|
866
|
+
description += '- `memory` = persistent knowledge that survives sessions (why, constraints, durable facts, working plans).\n';
|
|
867
|
+
if (toolIds.includes('taskmanager')) {
|
|
868
|
+
description += '- `taskmanager` = step-by-step checkboxes for the CURRENT task (what to do next, in order).\n';
|
|
869
|
+
}
|
|
870
|
+
if (hasSkills) {
|
|
871
|
+
description += '- `skills` = reusable playbooks the team curated for recurring tasks.\n';
|
|
872
|
+
}
|
|
873
|
+
description += '\n';
|
|
874
|
+
}
|
|
875
|
+
|
|
798
876
|
// Add exploration strategy if code-map is available
|
|
799
877
|
if (toolIds.includes('code-map')) {
|
|
800
878
|
description += '## CODE EXPLORATION STRATEGY\n\n';
|
|
@@ -817,6 +895,10 @@ class ToolsRegistry {
|
|
|
817
895
|
* @returns {string} Enhanced system prompt
|
|
818
896
|
*/
|
|
819
897
|
enhanceSystemPrompt(existingPrompt, capabilities = [], options = {}) {
|
|
898
|
+
// `options.apiType` ('responses' | 'chat_completion' | undefined)
|
|
899
|
+
// is forwarded to the description builder so native-API models get
|
|
900
|
+
// a trimmed prompt that doesn't duplicate the structured schemas.
|
|
901
|
+
// Old callers omit it and get pre-existing behaviour unchanged.
|
|
820
902
|
const toolSection = this.generateToolDescriptionsForPrompt(capabilities, options);
|
|
821
903
|
|
|
822
904
|
if (!toolSection.trim()) {
|
|
@@ -322,4 +322,18 @@ export function getToolSchemasForAgent(capabilities = []) {
|
|
|
322
322
|
return OPENAI_FUNCTION_SCHEMAS.filter(s => allowed.has(s.name.toLowerCase()));
|
|
323
323
|
}
|
|
324
324
|
|
|
325
|
+
/**
|
|
326
|
+
* Names of every tool that has a native function schema in this file.
|
|
327
|
+
* Importable as a Set so other modules (notably baseTool's system-prompt
|
|
328
|
+
* builder) can decide "is the structured schema the canonical source of
|
|
329
|
+
* truth for this tool, or do we still need to bake a text description
|
|
330
|
+
* into the system prompt?". When a model uses the Responses API (which
|
|
331
|
+
* is RLHFed for native function-calling), the structured schema in
|
|
332
|
+
* `tools:` is the canonical source — emitting the text description as
|
|
333
|
+
* well doubles the same information in the context window.
|
|
334
|
+
*/
|
|
335
|
+
export const NATIVE_SCHEMA_TOOL_NAMES = new Set(
|
|
336
|
+
OPENAI_FUNCTION_SCHEMAS.map(s => s.name.toLowerCase())
|
|
337
|
+
);
|
|
338
|
+
|
|
325
339
|
export default OPENAI_FUNCTION_SCHEMAS;
|