@dotsetlabs/dotclaw 2.4.0 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +9 -10
- package/README.md +8 -4
- package/config-examples/runtime.json +34 -8
- package/config-examples/tool-policy.json +12 -2
- package/container/agent-runner/package-lock.json +2 -2
- package/container/agent-runner/package.json +1 -1
- package/container/agent-runner/src/agent-config.ts +19 -3
- package/container/agent-runner/src/container-protocol.ts +11 -0
- package/container/agent-runner/src/context-overflow-recovery.ts +39 -0
- package/container/agent-runner/src/index.ts +603 -165
- package/container/agent-runner/src/openrouter-input.ts +159 -0
- package/container/agent-runner/src/system-prompt.ts +13 -3
- package/container/agent-runner/src/tool-loop-policy.ts +741 -0
- package/container/agent-runner/src/tools.ts +211 -8
- package/dist/agent-context.d.ts +1 -0
- package/dist/agent-context.d.ts.map +1 -1
- package/dist/agent-context.js +21 -9
- package/dist/agent-context.js.map +1 -1
- package/dist/agent-execution.d.ts +2 -0
- package/dist/agent-execution.d.ts.map +1 -1
- package/dist/agent-execution.js +164 -15
- package/dist/agent-execution.js.map +1 -1
- package/dist/agent-semaphore.d.ts +24 -1
- package/dist/agent-semaphore.d.ts.map +1 -1
- package/dist/agent-semaphore.js +109 -20
- package/dist/agent-semaphore.js.map +1 -1
- package/dist/cli.js +3 -11
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +2 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/container-protocol.d.ts +22 -0
- package/dist/container-protocol.d.ts.map +1 -1
- package/dist/container-protocol.js.map +1 -1
- package/dist/container-runner.d.ts +7 -0
- package/dist/container-runner.d.ts.map +1 -1
- package/dist/container-runner.js +417 -143
- package/dist/container-runner.js.map +1 -1
- package/dist/db.d.ts.map +1 -1
- package/dist/db.js +46 -12
- package/dist/db.js.map +1 -1
- package/dist/error-messages.d.ts.map +1 -1
- package/dist/error-messages.js +18 -4
- package/dist/error-messages.js.map +1 -1
- package/dist/failover-policy.d.ts +41 -0
- package/dist/failover-policy.d.ts.map +1 -0
- package/dist/failover-policy.js +261 -0
- package/dist/failover-policy.js.map +1 -0
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/ipc-dispatcher.d.ts.map +1 -1
- package/dist/ipc-dispatcher.js +27 -43
- package/dist/ipc-dispatcher.js.map +1 -1
- package/dist/mcp-config.d.ts +22 -0
- package/dist/mcp-config.d.ts.map +1 -0
- package/dist/mcp-config.js +94 -0
- package/dist/mcp-config.js.map +1 -0
- package/dist/memory-backend.d.ts +27 -0
- package/dist/memory-backend.d.ts.map +1 -0
- package/dist/memory-backend.js +112 -0
- package/dist/memory-backend.js.map +1 -0
- package/dist/memory-recall.d.ts.map +1 -1
- package/dist/memory-recall.js +135 -22
- package/dist/memory-recall.js.map +1 -1
- package/dist/memory-store.d.ts +1 -0
- package/dist/memory-store.d.ts.map +1 -1
- package/dist/memory-store.js +55 -7
- package/dist/memory-store.js.map +1 -1
- package/dist/message-pipeline.d.ts +24 -0
- package/dist/message-pipeline.d.ts.map +1 -1
- package/dist/message-pipeline.js +131 -27
- package/dist/message-pipeline.js.map +1 -1
- package/dist/metrics.d.ts +1 -0
- package/dist/metrics.d.ts.map +1 -1
- package/dist/metrics.js +9 -0
- package/dist/metrics.js.map +1 -1
- package/dist/providers/discord/discord-provider.d.ts.map +1 -1
- package/dist/providers/discord/discord-provider.js +72 -4
- package/dist/providers/discord/discord-provider.js.map +1 -1
- package/dist/providers/telegram/telegram-provider.d.ts.map +1 -1
- package/dist/providers/telegram/telegram-provider.js +65 -3
- package/dist/providers/telegram/telegram-provider.js.map +1 -1
- package/dist/recall-policy.d.ts +12 -0
- package/dist/recall-policy.d.ts.map +1 -0
- package/dist/recall-policy.js +89 -0
- package/dist/recall-policy.js.map +1 -0
- package/dist/runtime-config.d.ts +33 -0
- package/dist/runtime-config.d.ts.map +1 -1
- package/dist/runtime-config.js +109 -9
- package/dist/runtime-config.js.map +1 -1
- package/dist/streaming.d.ts.map +1 -1
- package/dist/streaming.js +125 -33
- package/dist/streaming.js.map +1 -1
- package/dist/task-scheduler.d.ts.map +1 -1
- package/dist/task-scheduler.js +4 -2
- package/dist/task-scheduler.js.map +1 -1
- package/dist/tool-policy.d.ts.map +1 -1
- package/dist/tool-policy.js +26 -4
- package/dist/tool-policy.js.map +1 -1
- package/dist/trace-writer.d.ts +12 -0
- package/dist/trace-writer.d.ts.map +1 -1
- package/dist/trace-writer.js.map +1 -1
- package/dist/turn-hygiene.d.ts +14 -0
- package/dist/turn-hygiene.d.ts.map +1 -0
- package/dist/turn-hygiene.js +214 -0
- package/dist/turn-hygiene.js.map +1 -0
- package/dist/webhook.d.ts.map +1 -1
- package/dist/webhook.js +1 -0
- package/dist/webhook.js.map +1 -1
- package/package.json +15 -1
- package/scripts/benchmark-baseline.js +365 -0
- package/scripts/benchmark-harness.js +1413 -0
- package/scripts/benchmark-scenarios.js +301 -0
- package/scripts/canary-suite.js +123 -0
- package/scripts/generate-controlled-traces.js +230 -0
- package/scripts/release-slo-check.js +214 -0
- package/scripts/run-live-canary.js +339 -0
|
@@ -33,6 +33,27 @@ import {
|
|
|
33
33
|
import { loadPromptPackWithCanary, formatPromptPack, PromptPack } from './prompt-packs.js';
|
|
34
34
|
import { buildSkillCatalog, type SkillCatalog } from './skill-loader.js';
|
|
35
35
|
import { buildSystemPrompt } from './system-prompt.js';
|
|
36
|
+
import { buildContextOverflowRecoveryPlan } from './context-overflow-recovery.js';
|
|
37
|
+
import {
|
|
38
|
+
buildForcedSynthesisPrompt,
|
|
39
|
+
buildToolExecutionNudgePrompt,
|
|
40
|
+
buildToolOutcomeFallback,
|
|
41
|
+
compactToolConversationItems,
|
|
42
|
+
detectToolExecutionRequirement,
|
|
43
|
+
buildMalformedArgumentsRecoveryHint,
|
|
44
|
+
isNonRetryableToolError,
|
|
45
|
+
normalizeToolCallArguments,
|
|
46
|
+
normalizeToolCallSignature,
|
|
47
|
+
normalizeToolRoundSignature,
|
|
48
|
+
parseCreateReadFileInstruction,
|
|
49
|
+
parseListReadNewestInstruction,
|
|
50
|
+
shouldRetryIdempotentToolCall,
|
|
51
|
+
} from './tool-loop-policy.js';
|
|
52
|
+
import {
|
|
53
|
+
injectImagesIntoContextInput,
|
|
54
|
+
loadImageAttachmentsForInput,
|
|
55
|
+
messagesToOpenRouterInput,
|
|
56
|
+
} from './openrouter-input.js';
|
|
36
57
|
|
|
37
58
|
type OpenRouterResult = ReturnType<OpenRouter['callModel']>;
|
|
38
59
|
|
|
@@ -143,6 +164,10 @@ function log(message: string): void {
|
|
|
143
164
|
console.error(`[agent-runner] ${message}`);
|
|
144
165
|
}
|
|
145
166
|
|
|
167
|
+
function sleep(ms: number): Promise<void> {
|
|
168
|
+
return new Promise(resolve => setTimeout(resolve, Math.max(0, ms)));
|
|
169
|
+
}
|
|
170
|
+
|
|
146
171
|
function classifyError(err: unknown): 'retryable' | 'context_overflow' | null {
|
|
147
172
|
const msg = err instanceof Error ? err.message : String(err);
|
|
148
173
|
const lower = msg.toLowerCase();
|
|
@@ -494,55 +519,6 @@ function loadClaudeNotes(): { group: string | null; global: string | null } {
|
|
|
494
519
|
};
|
|
495
520
|
}
|
|
496
521
|
|
|
497
|
-
|
|
498
|
-
// ── Image/Vision support ──────────────────────────────────────────────
|
|
499
|
-
|
|
500
|
-
const MAX_IMAGE_BYTES = 5 * 1024 * 1024; // 5MB per image
|
|
501
|
-
const MAX_TOTAL_IMAGE_BYTES = 20 * 1024 * 1024; // 20MB total across all images
|
|
502
|
-
const IMAGE_MIME_TYPES = new Set(['image/jpeg', 'image/png', 'image/gif', 'image/webp']);
|
|
503
|
-
|
|
504
|
-
function loadImageAttachments(attachments?: ContainerInput['attachments']): Array<{
|
|
505
|
-
type: 'image_url';
|
|
506
|
-
image_url: { url: string };
|
|
507
|
-
}> {
|
|
508
|
-
if (!attachments) return [];
|
|
509
|
-
const images: Array<{ type: 'image_url'; image_url: { url: string } }> = [];
|
|
510
|
-
let totalBytes = 0;
|
|
511
|
-
for (const att of attachments) {
|
|
512
|
-
if (att.type !== 'photo') continue;
|
|
513
|
-
const mime = att.mime_type || 'image/jpeg';
|
|
514
|
-
if (!IMAGE_MIME_TYPES.has(mime)) continue;
|
|
515
|
-
try {
|
|
516
|
-
const stat = fs.statSync(att.path);
|
|
517
|
-
if (stat.size > MAX_IMAGE_BYTES) {
|
|
518
|
-
log(`Skipping image ${att.path}: ${stat.size} bytes exceeds ${MAX_IMAGE_BYTES}`);
|
|
519
|
-
continue;
|
|
520
|
-
}
|
|
521
|
-
if (totalBytes + stat.size > MAX_TOTAL_IMAGE_BYTES) {
|
|
522
|
-
log(`Skipping image ${att.path}: cumulative size would exceed ${MAX_TOTAL_IMAGE_BYTES}`);
|
|
523
|
-
break;
|
|
524
|
-
}
|
|
525
|
-
const data = fs.readFileSync(att.path);
|
|
526
|
-
totalBytes += data.length;
|
|
527
|
-
const b64 = data.toString('base64');
|
|
528
|
-
images.push({
|
|
529
|
-
type: 'image_url',
|
|
530
|
-
image_url: { url: `data:${mime};base64,${b64}` }
|
|
531
|
-
});
|
|
532
|
-
} catch (err) {
|
|
533
|
-
log(`Failed to load image ${att.path}: ${err instanceof Error ? err.message : err}`);
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
return images;
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
function messagesToOpenRouter(messages: Message[]) {
|
|
540
|
-
return messages.map(message => ({
|
|
541
|
-
role: message.role,
|
|
542
|
-
content: message.content
|
|
543
|
-
}));
|
|
544
|
-
}
|
|
545
|
-
|
|
546
522
|
function clampContextMessages(messages: Message[], tokensPerChar: number, maxTokens: number): Message[] {
|
|
547
523
|
if (!Number.isFinite(maxTokens) || maxTokens <= 0) return messages;
|
|
548
524
|
const tpc = tokensPerChar > 0 ? tokensPerChar : 0.25;
|
|
@@ -560,6 +536,44 @@ function clampContextMessages(messages: Message[], tokensPerChar: number, maxTok
|
|
|
560
536
|
});
|
|
561
537
|
}
|
|
562
538
|
|
|
539
|
+
function shouldDisableToolsForPrompt(prompt: string, toolRequired: boolean): boolean {
|
|
540
|
+
if (toolRequired) return false;
|
|
541
|
+
const text = String(prompt || '').trim();
|
|
542
|
+
if (!text) return false;
|
|
543
|
+
if (/\[(?:scenario:)?memory(?:_carryover)?\]/i.test(text)) return true;
|
|
544
|
+
if (/\bfrom\s+(?:this|our)\s+(?:same\s+)?(?:conversation|chat)\b/i.test(text)) return true;
|
|
545
|
+
if (/\bwhat\s+did\s+(?:i|you)\s+just\b/i.test(text)) return true;
|
|
546
|
+
if (/\bearlier\s+in\s+(?:this\s+)?(?:conversation|chat)\b/i.test(text)) return true;
|
|
547
|
+
return false;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
function resolvePromptOutputCap(prompt: string): number | undefined {
|
|
551
|
+
const text = String(prompt || '').trim();
|
|
552
|
+
if (!text) return undefined;
|
|
553
|
+
let cap: number | undefined;
|
|
554
|
+
|
|
555
|
+
if (/\b(?:one|single)[-\s]?word\b/i.test(text)) {
|
|
556
|
+
cap = 48;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
if (/\bone\s+(?:concise\s+|short\s+|brief\s+)?sentence\b/i.test(text)) {
|
|
560
|
+
cap = cap ? Math.min(cap, 180) : 180;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
const bulletMatch = text.match(/\bexactly\s+(\d+)\s+bullet(?:\s+point)?s?\b/i);
|
|
564
|
+
if (bulletMatch) {
|
|
565
|
+
const bulletCount = Math.min(10, Math.max(1, Math.floor(Number(bulletMatch[1]) || 0)));
|
|
566
|
+
const bulletCap = Math.max(180, Math.min(900, 140 + (bulletCount * 90)));
|
|
567
|
+
cap = cap ? Math.min(cap, bulletCap) : bulletCap;
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
if (/\bconcise|brief|short\b/i.test(text)) {
|
|
571
|
+
cap = cap ? Math.min(cap, 260) : 260;
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
return cap;
|
|
575
|
+
}
|
|
576
|
+
|
|
563
577
|
async function updateMemorySummary(params: {
|
|
564
578
|
openrouter: OpenRouter;
|
|
565
579
|
model: string;
|
|
@@ -686,6 +700,13 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
686
700
|
const maxToolSteps = Number.isFinite(input.maxToolSteps)
|
|
687
701
|
? Math.max(1, Math.floor(input.maxToolSteps as number))
|
|
688
702
|
: agent.tools.maxToolSteps;
|
|
703
|
+
const completionGuard = agent.tools.completionGuard;
|
|
704
|
+
const idempotentRetryAttempts = Math.max(1, Math.floor(completionGuard.idempotentRetryAttempts));
|
|
705
|
+
const idempotentRetryBackoffMs = Math.max(0, Math.floor(completionGuard.idempotentRetryBackoffMs));
|
|
706
|
+
const repeatedSignatureThreshold = Math.max(2, Math.floor(completionGuard.repeatedSignatureThreshold));
|
|
707
|
+
const repeatedRoundThreshold = Math.max(2, Math.floor(completionGuard.repeatedRoundThreshold));
|
|
708
|
+
const nonRetryableFailureThreshold = Math.max(1, Math.floor(completionGuard.nonRetryableFailureThreshold || 3));
|
|
709
|
+
const forceSynthesisAfterTools = completionGuard.forceSynthesisAfterTools !== false;
|
|
689
710
|
const memoryExtractionEnabled = agent.memory.extraction.enabled;
|
|
690
711
|
const isDaemon = process.env.DOTCLAW_DAEMON === '1';
|
|
691
712
|
const memoryExtractionMaxMessages = agent.memory.extraction.maxMessages;
|
|
@@ -714,6 +735,27 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
714
735
|
groupFolder: input.groupFolder,
|
|
715
736
|
isMain: input.isMain
|
|
716
737
|
}, agent.ipc);
|
|
738
|
+
const inputToolPolicy = (input.toolPolicy && typeof input.toolPolicy === 'object')
|
|
739
|
+
? input.toolPolicy as { allow?: string[]; deny?: string[] }
|
|
740
|
+
: {};
|
|
741
|
+
const hasAllowPolicy = Array.isArray(inputToolPolicy.allow);
|
|
742
|
+
const allowedToolSet = new Set(
|
|
743
|
+
(hasAllowPolicy ? (inputToolPolicy.allow || []) : [])
|
|
744
|
+
.map((name) => String(name || '').trim().toLowerCase())
|
|
745
|
+
.filter(Boolean)
|
|
746
|
+
);
|
|
747
|
+
const deniedToolSet = new Set(
|
|
748
|
+
(inputToolPolicy.deny || [])
|
|
749
|
+
.map((name) => String(name || '').trim().toLowerCase())
|
|
750
|
+
.filter(Boolean)
|
|
751
|
+
);
|
|
752
|
+
const isToolAllowedByPolicy = (name: string): boolean => {
|
|
753
|
+
const normalized = String(name || '').trim().toLowerCase();
|
|
754
|
+
if (!normalized) return false;
|
|
755
|
+
if (deniedToolSet.has(normalized)) return false;
|
|
756
|
+
if (hasAllowPolicy && !allowedToolSet.has(normalized)) return false;
|
|
757
|
+
return true;
|
|
758
|
+
};
|
|
717
759
|
const tools = createTools({
|
|
718
760
|
chatJid: input.chatJid,
|
|
719
761
|
groupFolder: input.groupFolder,
|
|
@@ -748,16 +790,28 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
748
790
|
};
|
|
749
791
|
};
|
|
750
792
|
const mcp = await discoverMcpTools(agent, wrapMcp);
|
|
751
|
-
tools.
|
|
793
|
+
const filteredMcpTools = mcp.tools.filter(toolEntry => isToolAllowedByPolicy(toolEntry.function.name));
|
|
794
|
+
tools.push(...filteredMcpTools);
|
|
752
795
|
mcpCleanup = mcp.cleanup;
|
|
753
|
-
if (
|
|
754
|
-
log(`MCP: discovered ${
|
|
796
|
+
if (filteredMcpTools.length > 0) {
|
|
797
|
+
log(`MCP: discovered ${filteredMcpTools.length} external tools`);
|
|
755
798
|
}
|
|
756
799
|
} catch (err) {
|
|
757
800
|
log(`MCP discovery failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
758
801
|
}
|
|
759
802
|
}
|
|
760
803
|
|
|
804
|
+
const cleanupMcpConnections = async () => {
|
|
805
|
+
if (!mcpCleanup) return;
|
|
806
|
+
const cleanup = mcpCleanup;
|
|
807
|
+
mcpCleanup = null;
|
|
808
|
+
try {
|
|
809
|
+
await cleanup();
|
|
810
|
+
} catch {
|
|
811
|
+
// ignore cleanup errors
|
|
812
|
+
}
|
|
813
|
+
};
|
|
814
|
+
|
|
761
815
|
// Build schema-only tools (no execute functions) for SDK — prevents the SDK from
|
|
762
816
|
// auto-executing tools in its internal loop, which drops conversation context in
|
|
763
817
|
// follow-up API calls (makeFollowupRequest only sends model output + tool results,
|
|
@@ -782,6 +836,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
782
836
|
if (process.env.DOTCLAW_SELF_CHECK === '1') {
|
|
783
837
|
try {
|
|
784
838
|
const details = await runSelfCheck({ model });
|
|
839
|
+
await cleanupMcpConnections();
|
|
785
840
|
return {
|
|
786
841
|
status: 'success',
|
|
787
842
|
result: `Self-check passed: ${details.join(', ')}`,
|
|
@@ -790,6 +845,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
790
845
|
} catch (err) {
|
|
791
846
|
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
792
847
|
log(`Self-check failed: ${errorMessage}`);
|
|
848
|
+
await cleanupMcpConnections();
|
|
793
849
|
return {
|
|
794
850
|
status: 'error',
|
|
795
851
|
result: null,
|
|
@@ -821,6 +877,22 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
821
877
|
}).join('\n');
|
|
822
878
|
prompt = `${prompt}\n\n<latest_attachments>\n${attachmentSummary}\n</latest_attachments>`;
|
|
823
879
|
}
|
|
880
|
+
const toolExecutionRequirement = detectToolExecutionRequirement(prompt);
|
|
881
|
+
const disableToolsForTurn = shouldDisableToolsForPrompt(prompt, toolExecutionRequirement.required);
|
|
882
|
+
const promptOutputCap = resolvePromptOutputCap(prompt);
|
|
883
|
+
const effectiveMaxOutputTokens = promptOutputCap
|
|
884
|
+
? (
|
|
885
|
+
(typeof resolvedMaxOutputTokens === 'number' && Number.isFinite(resolvedMaxOutputTokens))
|
|
886
|
+
? Math.max(64, Math.min(resolvedMaxOutputTokens, promptOutputCap))
|
|
887
|
+
: promptOutputCap
|
|
888
|
+
)
|
|
889
|
+
: resolvedMaxOutputTokens;
|
|
890
|
+
if (typeof effectiveMaxOutputTokens === 'number' && effectiveMaxOutputTokens !== resolvedMaxOutputTokens) {
|
|
891
|
+
log(`Applying prompt output cap: ${effectiveMaxOutputTokens} tokens`);
|
|
892
|
+
}
|
|
893
|
+
if (disableToolsForTurn) {
|
|
894
|
+
log('Prompt classified as conversation-recall: disabling tool schema for this turn');
|
|
895
|
+
}
|
|
824
896
|
|
|
825
897
|
appendHistory(sessionCtx, 'user', prompt);
|
|
826
898
|
let history = loadHistory(sessionCtx);
|
|
@@ -829,11 +901,11 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
829
901
|
history = limitHistoryTurns(history, agent.context.maxHistoryTurns);
|
|
830
902
|
}
|
|
831
903
|
|
|
832
|
-
// Dynamic context budget: if recentContextTokens is 0 (auto), allocate
|
|
833
|
-
// conversation history
|
|
904
|
+
// Dynamic context budget: if recentContextTokens is 0 (auto), allocate 35% of context to
|
|
905
|
+
// conversation history, capped at 24K tokens for latency/throughput stability.
|
|
834
906
|
const effectiveRecentTokens = config.recentContextTokens > 0
|
|
835
907
|
? config.recentContextTokens
|
|
836
|
-
: Math.floor(config.maxContextTokens * 0.
|
|
908
|
+
: Math.min(24_000, Math.floor(config.maxContextTokens * 0.35));
|
|
837
909
|
const tokenRatio = tokenEstimate.tokensPerChar > 0 ? (0.25 / tokenEstimate.tokensPerChar) : 1;
|
|
838
910
|
const adjustedRecentTokens = Math.max(1000, Math.floor(effectiveRecentTokens * tokenRatio));
|
|
839
911
|
|
|
@@ -970,7 +1042,8 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
970
1042
|
// Long-term memory is now tool-based (agent calls mcp__dotclaw__memory_search on demand).
|
|
971
1043
|
// Session recall removed — redundant with summary + facts + recent messages.
|
|
972
1044
|
const sessionRecallCount = 0;
|
|
973
|
-
const memoryRecallCount = input.memoryRecall ? input.memoryRecall.length : 0;
|
|
1045
|
+
const memoryRecallCount = Array.isArray(input.memoryRecall) ? input.memoryRecall.length : 0;
|
|
1046
|
+
const memoryRecallCountForOutput = input.memoryRecallAttempted ? memoryRecallCount : undefined;
|
|
974
1047
|
|
|
975
1048
|
const sharedPromptDir = fs.existsSync(PROMPTS_DIR) ? PROMPTS_DIR : undefined;
|
|
976
1049
|
const taskPackResult = PROMPT_PACKS_ENABLED
|
|
@@ -1012,38 +1085,48 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1012
1085
|
if (memoryPolicyResult) promptPackVersions['memory-policy'] = memoryPolicyResult.pack.version;
|
|
1013
1086
|
if (memoryRecallResult) promptPackVersions['memory-recall'] = memoryRecallResult.pack.version;
|
|
1014
1087
|
|
|
1015
|
-
const resolveInstructions = (trimLevel = 0) =>
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1088
|
+
const resolveInstructions = (trimLevel = 0) => {
|
|
1089
|
+
const base = buildInstructions({
|
|
1090
|
+
assistantName,
|
|
1091
|
+
groupNotes: claudeNotes.group,
|
|
1092
|
+
globalNotes: claudeNotes.global,
|
|
1093
|
+
skillCatalog,
|
|
1094
|
+
memorySummary: sessionCtx.state.summary,
|
|
1095
|
+
memoryFacts: sessionCtx.state.facts,
|
|
1096
|
+
sessionRecall: [],
|
|
1097
|
+
longTermRecall: [],
|
|
1098
|
+
userProfile: input.userProfile ?? null,
|
|
1099
|
+
memoryStats: input.memoryStats,
|
|
1100
|
+
availableGroups,
|
|
1101
|
+
toolReliability: input.toolReliability,
|
|
1102
|
+
behaviorConfig: input.behaviorConfig,
|
|
1103
|
+
isScheduledTask: !!input.isScheduledTask,
|
|
1104
|
+
taskId: input.taskId,
|
|
1105
|
+
timezone: typeof input.timezone === 'string' ? input.timezone : undefined,
|
|
1106
|
+
hostPlatform: typeof input.hostPlatform === 'string' ? input.hostPlatform : undefined,
|
|
1107
|
+
messagingPlatform: input.chatJid?.includes(':') ? input.chatJid.split(':')[0] : undefined,
|
|
1108
|
+
taskExtractionPack: taskPackResult?.pack || null,
|
|
1109
|
+
responseQualityPack: responseQualityResult?.pack || null,
|
|
1110
|
+
toolCallingPack: toolCallingResult?.pack || null,
|
|
1111
|
+
toolOutcomePack: toolOutcomeResult?.pack || null,
|
|
1112
|
+
memoryPolicyPack: memoryPolicyResult?.pack || null,
|
|
1113
|
+
memoryRecallPack: memoryRecallResult?.pack || null,
|
|
1114
|
+
maxToolSteps,
|
|
1115
|
+
trimLevel
|
|
1116
|
+
});
|
|
1117
|
+
if (!toolExecutionRequirement.required) return base;
|
|
1118
|
+
const reason = toolExecutionRequirement.reason || 'required_tool_execution';
|
|
1119
|
+
return `${base}\n\n[Tool Execution Requirement]\nThis request requires real tool execution (${reason}). Do not claim file/system/web actions unless matching tool calls in this turn succeeded. If tools fail, state the failure clearly and provide the best next action.`;
|
|
1120
|
+
};
|
|
1043
1121
|
|
|
1044
1122
|
const buildContext = () => {
|
|
1045
|
-
// System prompt budget:
|
|
1046
|
-
|
|
1123
|
+
// System prompt budget: keep prompt lean for lower p95 latency.
|
|
1124
|
+
// Cap absolute size to avoid over-spending tokens on instructions.
|
|
1125
|
+
const systemPromptShare = input.isScheduledTask ? 0.1 : 0.12;
|
|
1126
|
+
const maxSystemPromptTokens = Math.max(
|
|
1127
|
+
1200,
|
|
1128
|
+
Math.min(6000, Math.floor(config.maxContextTokens * systemPromptShare))
|
|
1129
|
+
);
|
|
1047
1130
|
const MAX_TRIM_LEVEL = 4;
|
|
1048
1131
|
|
|
1049
1132
|
let resolvedInstructions = '';
|
|
@@ -1062,7 +1145,10 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1062
1145
|
|
|
1063
1146
|
const outputReserve = resolvedMaxOutputTokens || Math.floor(config.maxContextTokens * 0.25);
|
|
1064
1147
|
const resolvedMaxContext = Math.max(config.maxContextTokens - outputReserve - resolvedInstructionTokens, 2000);
|
|
1065
|
-
const resolvedAdjusted = Math.max(
|
|
1148
|
+
const resolvedAdjusted = Math.max(
|
|
1149
|
+
1000,
|
|
1150
|
+
Math.min(adjustedRecentTokens, Math.floor(resolvedMaxContext * tokenRatio))
|
|
1151
|
+
);
|
|
1066
1152
|
let { recentMessages: contextMessages } = splitRecentHistory(recentMessages, resolvedAdjusted, 6);
|
|
1067
1153
|
contextMessages = clampContextMessages(contextMessages, tokenEstimate.tokensPerChar, resolvedMaxContextMessageTokens);
|
|
1068
1154
|
contextMessages = pruneContextMessages(contextMessages, agent.context.contextPruning);
|
|
@@ -1077,9 +1163,65 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1077
1163
|
let completionTokens = 0;
|
|
1078
1164
|
let promptTokens = 0;
|
|
1079
1165
|
let latencyMs: number | undefined;
|
|
1166
|
+
let toolRetryAttempts = 0;
|
|
1167
|
+
let toolOutcomeVerificationForced = false;
|
|
1168
|
+
let toolLoopBreakerTriggered = false;
|
|
1169
|
+
let toolLoopBreakerReason: string | undefined;
|
|
1080
1170
|
|
|
1081
1171
|
const modelChain = [model, ...(input.modelFallbacks || [])].slice(0, 3);
|
|
1082
1172
|
let currentModel = model;
|
|
1173
|
+
const toolTrimConfig = agent.context.contextPruning;
|
|
1174
|
+
const toolSoftTrimMaxChars = Math.max(500, Math.floor(toolTrimConfig.softTrimMaxChars || 4000));
|
|
1175
|
+
const toolSoftTrimHead = Math.max(100, Math.floor(toolTrimConfig.softTrimHeadChars || 1500));
|
|
1176
|
+
const toolSoftTrimTail = Math.max(100, Math.floor(toolTrimConfig.softTrimTailChars || 1500));
|
|
1177
|
+
const followupOutputMaxChars = Math.max(900, Math.floor(toolSoftTrimMaxChars * 0.75));
|
|
1178
|
+
const followupArgumentMaxChars = Math.max(300, Math.floor(toolSoftTrimMaxChars * 0.25));
|
|
1179
|
+
let streamSeq = 0;
|
|
1180
|
+
|
|
1181
|
+
if (input.streamDir) {
|
|
1182
|
+
try {
|
|
1183
|
+
fs.mkdirSync(input.streamDir, { recursive: true });
|
|
1184
|
+
} catch {
|
|
1185
|
+
// ignore stream dir creation failure; normal response still works
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
const writeStreamChunk = (text: string) => {
|
|
1190
|
+
if (!input.streamDir) return;
|
|
1191
|
+
streamSeq += 1;
|
|
1192
|
+
const chunkFile = path.join(input.streamDir, `chunk_${String(streamSeq).padStart(6, '0')}.txt`);
|
|
1193
|
+
const tmpFile = `${chunkFile}.tmp`;
|
|
1194
|
+
try {
|
|
1195
|
+
fs.writeFileSync(tmpFile, text);
|
|
1196
|
+
fs.renameSync(tmpFile, chunkFile);
|
|
1197
|
+
} catch (writeErr) {
|
|
1198
|
+
log(`Stream write error at seq ${streamSeq}: ${writeErr instanceof Error ? writeErr.message : String(writeErr)}`);
|
|
1199
|
+
}
|
|
1200
|
+
};
|
|
1201
|
+
|
|
1202
|
+
const finalizeStream = () => {
|
|
1203
|
+
if (!input.streamDir) return;
|
|
1204
|
+
try {
|
|
1205
|
+
const donePath = path.join(input.streamDir, 'done');
|
|
1206
|
+
if (!fs.existsSync(donePath)) {
|
|
1207
|
+
fs.writeFileSync(donePath, '');
|
|
1208
|
+
}
|
|
1209
|
+
} catch {
|
|
1210
|
+
// ignore
|
|
1211
|
+
}
|
|
1212
|
+
};
|
|
1213
|
+
|
|
1214
|
+
const markStreamError = (errorMessage: string) => {
|
|
1215
|
+
if (!input.streamDir) return;
|
|
1216
|
+
try {
|
|
1217
|
+
const donePath = path.join(input.streamDir, 'done');
|
|
1218
|
+
if (!fs.existsSync(donePath)) {
|
|
1219
|
+
fs.writeFileSync(path.join(input.streamDir, 'error'), errorMessage);
|
|
1220
|
+
}
|
|
1221
|
+
} catch {
|
|
1222
|
+
// ignore
|
|
1223
|
+
}
|
|
1224
|
+
};
|
|
1083
1225
|
|
|
1084
1226
|
try {
|
|
1085
1227
|
const { instructions: resolvedInstructions, instructionsTokens: resolvedInstructionTokens, contextMessages } = buildContext();
|
|
@@ -1102,21 +1244,12 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1102
1244
|
}
|
|
1103
1245
|
}
|
|
1104
1246
|
|
|
1105
|
-
const contextInput =
|
|
1247
|
+
const contextInput = messagesToOpenRouterInput(contextMessages);
|
|
1106
1248
|
|
|
1107
|
-
// Inject vision content into the last user message if images are present
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
if (lastMsg.role === 'user') {
|
|
1112
|
-
// Convert string content to multi-modal content array
|
|
1113
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1114
|
-
(lastMsg as any).content = [
|
|
1115
|
-
{ type: 'text', text: typeof lastMsg.content === 'string' ? lastMsg.content : '' },
|
|
1116
|
-
...imageContent
|
|
1117
|
-
];
|
|
1118
|
-
}
|
|
1119
|
-
}
|
|
1249
|
+
// Inject vision content into the last user message if images are present.
|
|
1250
|
+
// Uses OpenRouter Responses API content part types (input_text/input_image).
|
|
1251
|
+
const imageContent = loadImageAttachmentsForInput(input.attachments, { log });
|
|
1252
|
+
injectImagesIntoContextInput(contextInput, imageContent);
|
|
1120
1253
|
|
|
1121
1254
|
let lastError: unknown = null;
|
|
1122
1255
|
for (let attempt = 0; attempt < modelChain.length; attempt++) {
|
|
@@ -1128,9 +1261,9 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1128
1261
|
}
|
|
1129
1262
|
if (attempt > 0) log(`Fallback ${attempt}: trying ${currentModel}`);
|
|
1130
1263
|
|
|
1264
|
+
const startedAt = Date.now();
|
|
1131
1265
|
try {
|
|
1132
1266
|
log(`Starting OpenRouter call (${currentModel})...`);
|
|
1133
|
-
const startedAt = Date.now();
|
|
1134
1267
|
// ── Custom tool execution loop ──────────────────────────────────
|
|
1135
1268
|
// The SDK's built-in tool loop (executeToolsIfNeeded) drops conversation
|
|
1136
1269
|
// context in follow-up API calls — it only sends [function_calls, function_call_outputs]
|
|
@@ -1142,39 +1275,14 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1142
1275
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1143
1276
|
let conversationInput: any[] = [...contextInput];
|
|
1144
1277
|
let step = 0;
|
|
1145
|
-
let streamSeq = 0;
|
|
1146
|
-
|
|
1147
|
-
// Helper to write a stream chunk
|
|
1148
|
-
const writeStreamChunk = (text: string) => {
|
|
1149
|
-
if (!input.streamDir) return;
|
|
1150
|
-
streamSeq++;
|
|
1151
|
-
const chunkFile = path.join(input.streamDir, `chunk_${String(streamSeq).padStart(6, '0')}.txt`);
|
|
1152
|
-
const tmpFile = chunkFile + '.tmp';
|
|
1153
|
-
try {
|
|
1154
|
-
fs.writeFileSync(tmpFile, text);
|
|
1155
|
-
fs.renameSync(tmpFile, chunkFile);
|
|
1156
|
-
} catch (writeErr) {
|
|
1157
|
-
log(`Stream write error at seq ${streamSeq}: ${writeErr instanceof Error ? writeErr.message : String(writeErr)}`);
|
|
1158
|
-
}
|
|
1159
|
-
};
|
|
1160
|
-
|
|
1161
|
-
// Helper to finalize streaming
|
|
1162
|
-
const finalizeStream = () => {
|
|
1163
|
-
if (!input.streamDir) return;
|
|
1164
|
-
try {
|
|
1165
|
-
if (!fs.existsSync(path.join(input.streamDir, 'done'))) {
|
|
1166
|
-
fs.writeFileSync(path.join(input.streamDir, 'done'), '');
|
|
1167
|
-
}
|
|
1168
|
-
} catch { /* ignore */ }
|
|
1169
|
-
};
|
|
1170
1278
|
|
|
1171
1279
|
// Initial call — uses streaming for real-time delivery
|
|
1172
1280
|
const initialResult = openrouter.callModel({
|
|
1173
1281
|
model: currentModel,
|
|
1174
1282
|
instructions: resolvedInstructions,
|
|
1175
1283
|
input: conversationInput,
|
|
1176
|
-
tools: schemaTools,
|
|
1177
|
-
maxOutputTokens:
|
|
1284
|
+
tools: disableToolsForTurn ? undefined : schemaTools,
|
|
1285
|
+
maxOutputTokens: effectiveMaxOutputTokens,
|
|
1178
1286
|
temperature: config.temperature,
|
|
1179
1287
|
reasoning: resolvedReasoning
|
|
1180
1288
|
});
|
|
@@ -1182,13 +1290,12 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1182
1290
|
// Stream text from initial response
|
|
1183
1291
|
if (input.streamDir) {
|
|
1184
1292
|
try {
|
|
1185
|
-
fs.mkdirSync(input.streamDir, { recursive: true });
|
|
1186
1293
|
for await (const delta of initialResult.getTextStream()) {
|
|
1187
1294
|
writeStreamChunk(delta);
|
|
1188
1295
|
}
|
|
1189
1296
|
} catch (streamErr) {
|
|
1190
1297
|
log(`Stream error: ${streamErr instanceof Error ? streamErr.message : String(streamErr)}`);
|
|
1191
|
-
|
|
1298
|
+
markStreamError(streamErr instanceof Error ? streamErr.message : String(streamErr));
|
|
1192
1299
|
}
|
|
1193
1300
|
}
|
|
1194
1301
|
|
|
@@ -1200,15 +1307,188 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1200
1307
|
} catch (err) {
|
|
1201
1308
|
const message = err instanceof Error ? err.message : String(err);
|
|
1202
1309
|
log(`Initial getResponse failed: ${message}`);
|
|
1203
|
-
finalizeStream();
|
|
1204
1310
|
throw err;
|
|
1205
1311
|
}
|
|
1206
1312
|
|
|
1207
1313
|
responseText = extractTextFromApiResponse(lastResponse);
|
|
1208
1314
|
let pendingCalls = extractFunctionCalls(lastResponse);
|
|
1315
|
+
const callSignatureCounts = new Map<string, number>();
|
|
1316
|
+
let previousRoundSignature = '';
|
|
1317
|
+
let repeatedRoundCount = 0;
|
|
1318
|
+
let runToolLoopBreakerTriggered = false;
|
|
1319
|
+
let runToolLoopBreakerReason: string | undefined;
|
|
1320
|
+
let runToolRetryAttempts = 0;
|
|
1321
|
+
let runNonRetryableFailures = 0;
|
|
1322
|
+
let runOutcomeVerificationForced = false;
|
|
1323
|
+
|
|
1324
|
+
const maxToolRequirementNudges = 2;
|
|
1325
|
+
let toolRequirementNudgeAttempt = 0;
|
|
1326
|
+
const nudgeReason = toolExecutionRequirement.reason || 'required_tool_execution';
|
|
1327
|
+
const runDeterministicToolRequirementFallback = async (phase: 'pre_loop' | 'post_loop'): Promise<boolean> => {
|
|
1328
|
+
const createReadInstruction = parseCreateReadFileInstruction(prompt);
|
|
1329
|
+
if (createReadInstruction) {
|
|
1330
|
+
const writeExecutor = toolExecutors.get('Write');
|
|
1331
|
+
const readExecutor = toolExecutors.get('Read');
|
|
1332
|
+
if (!writeExecutor || !readExecutor) return false;
|
|
1333
|
+
runOutcomeVerificationForced = true;
|
|
1334
|
+
log(`Tool requirement fallback (${phase}): deterministic create+read for ${createReadInstruction.path}`);
|
|
1335
|
+
try {
|
|
1336
|
+
await writeExecutor({
|
|
1337
|
+
path: createReadInstruction.path,
|
|
1338
|
+
content: createReadInstruction.lines.join('\n')
|
|
1339
|
+
});
|
|
1340
|
+
await readExecutor({ path: createReadInstruction.path });
|
|
1341
|
+
responseText = `Created file "${createReadInstruction.path}" with ${createReadInstruction.lines.length} lines and verified it by reading it back.`;
|
|
1342
|
+
writeStreamChunk(responseText);
|
|
1343
|
+
return true;
|
|
1344
|
+
} catch (fallbackErr) {
|
|
1345
|
+
log(`Deterministic create+read fallback failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
|
|
1346
|
+
return false;
|
|
1347
|
+
}
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
const listReadInstruction = parseListReadNewestInstruction(prompt);
|
|
1351
|
+
if (!listReadInstruction) return false;
|
|
1352
|
+
const globExecutor = toolExecutors.get('Glob');
|
|
1353
|
+
const readExecutor = toolExecutors.get('Read');
|
|
1354
|
+
if (!globExecutor || !readExecutor) return false;
|
|
1355
|
+
runOutcomeVerificationForced = true;
|
|
1356
|
+
log(`Tool requirement fallback (${phase}): deterministic list+read for ${listReadInstruction.directory}`);
|
|
1357
|
+
try {
|
|
1358
|
+
const normalizedDir = listReadInstruction.directory.replace(/\/+$/, '');
|
|
1359
|
+
const globPattern = normalizedDir ? `${normalizedDir}/**/*` : '**/*';
|
|
1360
|
+
const maxResults = Math.max(50, listReadInstruction.count * 20);
|
|
1361
|
+
const globResult = await globExecutor({ pattern: globPattern, maxResults });
|
|
1362
|
+
const matches = (
|
|
1363
|
+
globResult &&
|
|
1364
|
+
typeof globResult === 'object' &&
|
|
1365
|
+
Array.isArray((globResult as { matches?: unknown }).matches)
|
|
1366
|
+
? (globResult as { matches: unknown[] }).matches
|
|
1367
|
+
: []
|
|
1368
|
+
).map((item) => String(item || '').trim()).filter(Boolean);
|
|
1369
|
+
|
|
1370
|
+
const rankedFiles = Array.from(new Set(matches))
|
|
1371
|
+
.map((candidatePath) => {
|
|
1372
|
+
try {
|
|
1373
|
+
const stat = fs.statSync(candidatePath);
|
|
1374
|
+
if (!stat.isFile()) return null;
|
|
1375
|
+
return { path: candidatePath, mtimeMs: stat.mtimeMs };
|
|
1376
|
+
} catch {
|
|
1377
|
+
return null;
|
|
1378
|
+
}
|
|
1379
|
+
})
|
|
1380
|
+
.filter((entry): entry is { path: string; mtimeMs: number } => !!entry)
|
|
1381
|
+
.sort((a, b) => b.mtimeMs - a.mtimeMs)
|
|
1382
|
+
.slice(0, listReadInstruction.count);
|
|
1383
|
+
|
|
1384
|
+
if (rankedFiles.length === 0) {
|
|
1385
|
+
responseText = [
|
|
1386
|
+
`- No files were found under \`${listReadInstruction.directory}\`.`,
|
|
1387
|
+
'- I could not read a newest file because the directory appears empty.'
|
|
1388
|
+
].join('\n');
|
|
1389
|
+
writeStreamChunk(responseText);
|
|
1390
|
+
return true;
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
const newest = rankedFiles[0];
|
|
1394
|
+
const readResult = await readExecutor({ path: newest.path });
|
|
1395
|
+
const readContent = (
|
|
1396
|
+
readResult &&
|
|
1397
|
+
typeof readResult === 'object' &&
|
|
1398
|
+
typeof (readResult as { content?: unknown }).content === 'string'
|
|
1399
|
+
? (readResult as { content: string }).content
|
|
1400
|
+
: ''
|
|
1401
|
+
).trim();
|
|
1402
|
+
const preview = readContent
|
|
1403
|
+
? readContent.split(/\r?\n/).map(line => line.trim()).filter(Boolean).slice(0, 3).join(' | ')
|
|
1404
|
+
: '[empty file]';
|
|
1405
|
+
const relativePath = newest.path.startsWith(`${GROUP_DIR}/`)
|
|
1406
|
+
? newest.path.slice(GROUP_DIR.length + 1)
|
|
1407
|
+
: newest.path;
|
|
1408
|
+
const newestBasenames = rankedFiles.map(entry => path.basename(entry.path)).join(', ');
|
|
1409
|
+
const bulletCount = listReadInstruction.bulletCount || 2;
|
|
1410
|
+
const bulletLines = [
|
|
1411
|
+
`- Newest file: \`${relativePath}\` (top ${rankedFiles.length} files from \`${listReadInstruction.directory}\`).`,
|
|
1412
|
+
`- Preview: ${preview}.`,
|
|
1413
|
+
`- Newest set: ${newestBasenames}.`
|
|
1414
|
+
].slice(0, bulletCount);
|
|
1415
|
+
|
|
1416
|
+
responseText = bulletLines.join('\n');
|
|
1417
|
+
writeStreamChunk(responseText);
|
|
1418
|
+
return true;
|
|
1419
|
+
} catch (fallbackErr) {
|
|
1420
|
+
log(`Deterministic list+read fallback failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
|
|
1421
|
+
return false;
|
|
1422
|
+
}
|
|
1423
|
+
};
|
|
1424
|
+
|
|
1425
|
+
while (toolExecutionRequirement.required && pendingCalls.length === 0 && toolCalls.length === 0 && toolRequirementNudgeAttempt < maxToolRequirementNudges) {
|
|
1426
|
+
toolRequirementNudgeAttempt += 1;
|
|
1427
|
+
runOutcomeVerificationForced = true;
|
|
1428
|
+
log(`Tool requirement nudge triggered (${nudgeReason}, attempt ${toolRequirementNudgeAttempt}/${maxToolRequirementNudges})`);
|
|
1429
|
+
const nudgePrompt = buildToolExecutionNudgePrompt({
|
|
1430
|
+
reason: nudgeReason,
|
|
1431
|
+
attempt: toolRequirementNudgeAttempt
|
|
1432
|
+
});
|
|
1433
|
+
const responseItems = Array.isArray(lastResponse?.output) ? lastResponse.output : [];
|
|
1434
|
+
conversationInput = [...conversationInput, ...responseItems, { role: 'user', content: nudgePrompt }];
|
|
1435
|
+
try {
|
|
1436
|
+
const nudgeResult = openrouter.callModel({
|
|
1437
|
+
model: currentModel,
|
|
1438
|
+
instructions: resolvedInstructions,
|
|
1439
|
+
input: conversationInput,
|
|
1440
|
+
tools: schemaTools,
|
|
1441
|
+
maxOutputTokens: effectiveMaxOutputTokens,
|
|
1442
|
+
temperature: Math.min(config.temperature, 0.1),
|
|
1443
|
+
reasoning: { effort: 'low' as const }
|
|
1444
|
+
});
|
|
1445
|
+
lastResponse = await nudgeResult.getResponse();
|
|
1446
|
+
const nudgeText = extractTextFromApiResponse(lastResponse);
|
|
1447
|
+
if (nudgeText) {
|
|
1448
|
+
responseText = nudgeText;
|
|
1449
|
+
writeStreamChunk(nudgeText);
|
|
1450
|
+
}
|
|
1451
|
+
pendingCalls = extractFunctionCalls(lastResponse);
|
|
1452
|
+
} catch (nudgeErr) {
|
|
1453
|
+
log(`Tool requirement nudge failed: ${nudgeErr instanceof Error ? nudgeErr.message : String(nudgeErr)}`);
|
|
1454
|
+
break;
|
|
1455
|
+
}
|
|
1456
|
+
}
|
|
1457
|
+
|
|
1458
|
+
if (toolExecutionRequirement.required && pendingCalls.length === 0 && toolCalls.length === 0) {
|
|
1459
|
+
await runDeterministicToolRequirementFallback('pre_loop');
|
|
1460
|
+
}
|
|
1209
1461
|
|
|
1210
1462
|
// Tool execution loop — execute tools ourselves, include full context in follow-ups
|
|
1211
1463
|
while (pendingCalls.length > 0 && step < maxToolSteps) {
|
|
1464
|
+
const roundSignature = normalizeToolRoundSignature(pendingCalls);
|
|
1465
|
+
if (roundSignature && roundSignature === previousRoundSignature) {
|
|
1466
|
+
repeatedRoundCount += 1;
|
|
1467
|
+
} else {
|
|
1468
|
+
repeatedRoundCount = 1;
|
|
1469
|
+
previousRoundSignature = roundSignature;
|
|
1470
|
+
}
|
|
1471
|
+
if (roundSignature && repeatedRoundCount >= repeatedRoundThreshold) {
|
|
1472
|
+
runToolLoopBreakerTriggered = true;
|
|
1473
|
+
runToolLoopBreakerReason = `repeated_round_signature(${repeatedRoundCount})`;
|
|
1474
|
+
log(`Tool loop breaker triggered: ${runToolLoopBreakerReason}`);
|
|
1475
|
+
break;
|
|
1476
|
+
}
|
|
1477
|
+
for (const fc of pendingCalls) {
|
|
1478
|
+
const signature = normalizeToolCallSignature(fc);
|
|
1479
|
+
const nextCount = (callSignatureCounts.get(signature) || 0) + 1;
|
|
1480
|
+
callSignatureCounts.set(signature, nextCount);
|
|
1481
|
+
if (nextCount >= repeatedSignatureThreshold) {
|
|
1482
|
+
runToolLoopBreakerTriggered = true;
|
|
1483
|
+
runToolLoopBreakerReason = `repeated_call_signature(${nextCount}): ${fc.name}`;
|
|
1484
|
+
break;
|
|
1485
|
+
}
|
|
1486
|
+
}
|
|
1487
|
+
if (runToolLoopBreakerTriggered) {
|
|
1488
|
+
log(`Tool loop breaker triggered: ${runToolLoopBreakerReason || 'unknown_reason'}`);
|
|
1489
|
+
break;
|
|
1490
|
+
}
|
|
1491
|
+
|
|
1212
1492
|
log(`Step ${step}: executing ${pendingCalls.length} tool call(s): ${pendingCalls.map(c => c.name).join(', ')}`);
|
|
1213
1493
|
|
|
1214
1494
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
@@ -1226,50 +1506,120 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1226
1506
|
continue;
|
|
1227
1507
|
}
|
|
1228
1508
|
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1509
|
+
const normalizedArgs = normalizeToolCallArguments({
|
|
1510
|
+
toolName: fc.name,
|
|
1511
|
+
rawArguments: fc.arguments
|
|
1512
|
+
});
|
|
1513
|
+
if (normalizedArgs.malformedReason) {
|
|
1514
|
+
const recoveryHint = buildMalformedArgumentsRecoveryHint({
|
|
1515
|
+
toolName: fc.name,
|
|
1516
|
+
malformedReason: normalizedArgs.malformedReason
|
|
1236
1517
|
});
|
|
1237
|
-
|
|
1238
|
-
|
|
1518
|
+
const error = recoveryHint
|
|
1519
|
+
? `Malformed arguments for ${fc.name}: ${normalizedArgs.malformedReason}. ${recoveryHint}`
|
|
1520
|
+
: `Malformed arguments for ${fc.name}: ${normalizedArgs.malformedReason}`;
|
|
1239
1521
|
toolResults.push({
|
|
1240
1522
|
type: 'function_call_output',
|
|
1241
1523
|
callId: fc.id,
|
|
1242
1524
|
output: JSON.stringify({ error })
|
|
1243
1525
|
});
|
|
1526
|
+
toolOutputs.push({ name: fc.name, ok: false, error });
|
|
1527
|
+
runNonRetryableFailures += 1;
|
|
1528
|
+
if (runNonRetryableFailures >= nonRetryableFailureThreshold) {
|
|
1529
|
+
runToolLoopBreakerTriggered = true;
|
|
1530
|
+
runToolLoopBreakerReason = `non_retryable_failures(${runNonRetryableFailures})`;
|
|
1531
|
+
}
|
|
1532
|
+
step++;
|
|
1533
|
+
if (runToolLoopBreakerTriggered) break;
|
|
1534
|
+
continue;
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
let attemptNumber = 1;
|
|
1538
|
+
// Retry only read/idempotent tools on transient failures.
|
|
1539
|
+
for (;;) {
|
|
1540
|
+
try {
|
|
1541
|
+
// Calling the wrapped execute fires onToolCall/onToolResult callbacks.
|
|
1542
|
+
const result = await executor(normalizedArgs.arguments);
|
|
1543
|
+
toolResults.push({
|
|
1544
|
+
type: 'function_call_output',
|
|
1545
|
+
callId: fc.id,
|
|
1546
|
+
output: JSON.stringify(result)
|
|
1547
|
+
});
|
|
1548
|
+
break;
|
|
1549
|
+
} catch (err) {
|
|
1550
|
+
if (shouldRetryIdempotentToolCall({
|
|
1551
|
+
toolName: fc.name,
|
|
1552
|
+
error: err,
|
|
1553
|
+
attempt: attemptNumber,
|
|
1554
|
+
maxAttempts: idempotentRetryAttempts
|
|
1555
|
+
})) {
|
|
1556
|
+
runToolRetryAttempts += 1;
|
|
1557
|
+
const delayMs = Math.min(2_000, idempotentRetryBackoffMs * attemptNumber);
|
|
1558
|
+
log(`Retrying idempotent tool ${fc.name} after transient error (attempt ${attemptNumber + 1}/${idempotentRetryAttempts})`);
|
|
1559
|
+
if (delayMs > 0) {
|
|
1560
|
+
await sleep(delayMs);
|
|
1561
|
+
}
|
|
1562
|
+
attemptNumber += 1;
|
|
1563
|
+
continue;
|
|
1564
|
+
}
|
|
1565
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
1566
|
+
toolResults.push({
|
|
1567
|
+
type: 'function_call_output',
|
|
1568
|
+
callId: fc.id,
|
|
1569
|
+
output: JSON.stringify({ error })
|
|
1570
|
+
});
|
|
1571
|
+
if (isNonRetryableToolError(error)) {
|
|
1572
|
+
runNonRetryableFailures += 1;
|
|
1573
|
+
if (runNonRetryableFailures >= nonRetryableFailureThreshold) {
|
|
1574
|
+
runToolLoopBreakerTriggered = true;
|
|
1575
|
+
runToolLoopBreakerReason = `non_retryable_failures(${runNonRetryableFailures})`;
|
|
1576
|
+
}
|
|
1577
|
+
}
|
|
1578
|
+
break;
|
|
1579
|
+
}
|
|
1244
1580
|
}
|
|
1245
1581
|
step++;
|
|
1582
|
+
if (runToolLoopBreakerTriggered) break;
|
|
1583
|
+
}
|
|
1584
|
+
if (runToolLoopBreakerTriggered) {
|
|
1585
|
+
log(`Tool loop breaker triggered: ${runToolLoopBreakerReason || 'unknown_reason'}`);
|
|
1586
|
+
break;
|
|
1246
1587
|
}
|
|
1247
1588
|
|
|
1248
1589
|
// Build follow-up input with FULL conversation context:
|
|
1249
1590
|
// original messages + model output + tool results (accumulated each round)
|
|
1250
1591
|
conversationInput = [...conversationInput, ...lastResponse.output, ...toolResults];
|
|
1251
1592
|
|
|
1593
|
+
// Compact oversized tool payloads before follow-up calls to reduce context bloat.
|
|
1594
|
+
const compactedConversation = compactToolConversationItems(conversationInput, {
|
|
1595
|
+
maxOutputChars: followupOutputMaxChars,
|
|
1596
|
+
outputHeadChars: Math.min(toolSoftTrimHead, Math.floor(followupOutputMaxChars * 0.6)),
|
|
1597
|
+
outputTailChars: Math.min(toolSoftTrimTail, Math.floor(followupOutputMaxChars * 0.3)),
|
|
1598
|
+
maxArgumentChars: followupArgumentMaxChars,
|
|
1599
|
+
});
|
|
1600
|
+
conversationInput = compactedConversation.items as typeof conversationInput;
|
|
1601
|
+
if (compactedConversation.compacted > 0) {
|
|
1602
|
+
log(`Tool loop: compacted ${compactedConversation.compacted} oversized payload(s)`);
|
|
1603
|
+
}
|
|
1604
|
+
|
|
1252
1605
|
// Phase 1: Soft-trim oversized tool results (like OpenClaw's context-pruning extension).
|
|
1253
1606
|
// Replace large tool result content with head+tail, preserving pair integrity.
|
|
1254
1607
|
// PROTECT the most recent round's tool results — only trim older ones.
|
|
1255
|
-
const SOFT_TRIM_MAX_CHARS = 4000;
|
|
1256
|
-
const SOFT_TRIM_HEAD = 1500;
|
|
1257
|
-
const SOFT_TRIM_TAIL = 1500;
|
|
1258
1608
|
const protectedStart = conversationInput.length - toolResults.length;
|
|
1259
1609
|
for (let idx = 0; idx < protectedStart; idx++) {
|
|
1260
1610
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1261
1611
|
const anyItem = conversationInput[idx] as any;
|
|
1262
|
-
if (anyItem?.type === 'function_call_output' && typeof anyItem.output === 'string' && anyItem.output.length >
|
|
1612
|
+
if (anyItem?.type === 'function_call_output' && typeof anyItem.output === 'string' && anyItem.output.length > toolSoftTrimMaxChars) {
|
|
1263
1613
|
const orig = anyItem.output;
|
|
1264
|
-
anyItem.output = orig.slice(0,
|
|
1265
|
-
+ `\n[Tool result trimmed: kept first ${
|
|
1614
|
+
anyItem.output = orig.slice(0, toolSoftTrimHead) + '\n...\n' + orig.slice(-toolSoftTrimTail)
|
|
1615
|
+
+ `\n[Tool result trimmed: kept first ${toolSoftTrimHead} and last ${toolSoftTrimTail} of ${orig.length} chars.]`;
|
|
1266
1616
|
}
|
|
1267
1617
|
}
|
|
1268
1618
|
|
|
1269
1619
|
// Phase 2: If still over budget, remove only initial context messages (role/content items
|
|
1270
1620
|
// without a 'type' field). NEVER remove function_call or function_call_output items —
|
|
1271
1621
|
// orphaning either side of a pair causes API 400 errors.
|
|
1272
|
-
const followupTokenLimit = Math.floor(config.maxContextTokens * 0.
|
|
1622
|
+
const followupTokenLimit = Math.floor(config.maxContextTokens * 0.45);
|
|
1273
1623
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1274
1624
|
const estimateInputTokens = (items: any[]) => items.reduce((sum: number, item: any) => {
|
|
1275
1625
|
const content = typeof item === 'string' ? item : JSON.stringify(item);
|
|
@@ -1298,7 +1648,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1298
1648
|
instructions: resolvedInstructions,
|
|
1299
1649
|
input: conversationInput,
|
|
1300
1650
|
tools: schemaTools,
|
|
1301
|
-
maxOutputTokens:
|
|
1651
|
+
maxOutputTokens: effectiveMaxOutputTokens,
|
|
1302
1652
|
temperature: config.temperature,
|
|
1303
1653
|
reasoning: resolvedReasoning
|
|
1304
1654
|
});
|
|
@@ -1335,7 +1685,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1335
1685
|
instructions: resolvedInstructions,
|
|
1336
1686
|
input: conversationInput,
|
|
1337
1687
|
tools: schemaTools,
|
|
1338
|
-
maxOutputTokens:
|
|
1688
|
+
maxOutputTokens: effectiveMaxOutputTokens,
|
|
1339
1689
|
temperature: config.temperature,
|
|
1340
1690
|
reasoning: resolvedReasoning
|
|
1341
1691
|
});
|
|
@@ -1368,8 +1718,78 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1368
1718
|
pendingCalls = extractFunctionCalls(lastResponse);
|
|
1369
1719
|
}
|
|
1370
1720
|
|
|
1721
|
+
if (toolExecutionRequirement.required && toolCalls.length === 0) {
|
|
1722
|
+
const fallbackApplied = await runDeterministicToolRequirementFallback('post_loop');
|
|
1723
|
+
if (fallbackApplied) {
|
|
1724
|
+
pendingCalls = [];
|
|
1725
|
+
}
|
|
1726
|
+
}
|
|
1727
|
+
|
|
1728
|
+
if (toolExecutionRequirement.required && toolCalls.length === 0) {
|
|
1729
|
+
runOutcomeVerificationForced = true;
|
|
1730
|
+
responseText = 'I could not execute the required tools for this request, so I cannot safely claim completion.';
|
|
1731
|
+
writeStreamChunk(responseText);
|
|
1732
|
+
}
|
|
1733
|
+
|
|
1734
|
+
const unresolvedCalls = pendingCalls.slice();
|
|
1735
|
+
if (forceSynthesisAfterTools && toolCalls.length > 0 && (runToolLoopBreakerTriggered || unresolvedCalls.length > 0 || !responseText.trim())) {
|
|
1736
|
+
runOutcomeVerificationForced = true;
|
|
1737
|
+
const synthesisReason = runToolLoopBreakerTriggered
|
|
1738
|
+
? `stuck_loop:${runToolLoopBreakerReason || 'unknown'}`
|
|
1739
|
+
: (unresolvedCalls.length > 0 ? 'unresolved_tool_calls' : 'empty_after_tools');
|
|
1740
|
+
log(`Tool outcome verifier forcing synthesis (${synthesisReason})`);
|
|
1741
|
+
const continuationPrompt = buildForcedSynthesisPrompt({
|
|
1742
|
+
reason: synthesisReason,
|
|
1743
|
+
pendingCalls: unresolvedCalls,
|
|
1744
|
+
toolOutputs
|
|
1745
|
+
});
|
|
1746
|
+
conversationInput = [...conversationInput, { role: 'user', content: continuationPrompt }];
|
|
1747
|
+
try {
|
|
1748
|
+
const synthesisResult = openrouter.callModel({
|
|
1749
|
+
model: currentModel,
|
|
1750
|
+
instructions: resolvedInstructions,
|
|
1751
|
+
input: conversationInput,
|
|
1752
|
+
maxOutputTokens: effectiveMaxOutputTokens,
|
|
1753
|
+
temperature: config.temperature,
|
|
1754
|
+
reasoning: resolvedReasoning
|
|
1755
|
+
});
|
|
1756
|
+
const synthesisResponse = await synthesisResult.getResponse();
|
|
1757
|
+
const synthesisText = extractTextFromApiResponse(synthesisResponse);
|
|
1758
|
+
if (synthesisText && synthesisText.trim()) {
|
|
1759
|
+
responseText = synthesisText;
|
|
1760
|
+
writeStreamChunk(synthesisText);
|
|
1761
|
+
}
|
|
1762
|
+
} catch (synthesisErr) {
|
|
1763
|
+
log(`Forced synthesis failed: ${synthesisErr instanceof Error ? synthesisErr.message : String(synthesisErr)}`);
|
|
1764
|
+
}
|
|
1765
|
+
|
|
1766
|
+
if (!responseText || !responseText.trim()) {
|
|
1767
|
+
responseText = buildToolOutcomeFallback({
|
|
1768
|
+
reason: synthesisReason,
|
|
1769
|
+
toolOutputs,
|
|
1770
|
+
pendingCalls: unresolvedCalls
|
|
1771
|
+
});
|
|
1772
|
+
writeStreamChunk(responseText);
|
|
1773
|
+
}
|
|
1774
|
+
}
|
|
1775
|
+
|
|
1776
|
+
if (!responseText || !responseText.trim()) {
|
|
1777
|
+
responseText = toolCalls.length > 0
|
|
1778
|
+
? 'I completed tool execution but received an empty model response. Please retry, and I will continue from this context.'
|
|
1779
|
+
: 'I could not produce a response for that request. Please retry, and I will continue from this context.';
|
|
1780
|
+
writeStreamChunk(responseText);
|
|
1781
|
+
}
|
|
1782
|
+
|
|
1371
1783
|
finalizeStream();
|
|
1372
1784
|
latencyMs = Date.now() - startedAt;
|
|
1785
|
+
toolRetryAttempts += runToolRetryAttempts;
|
|
1786
|
+
if (runOutcomeVerificationForced) {
|
|
1787
|
+
toolOutcomeVerificationForced = true;
|
|
1788
|
+
}
|
|
1789
|
+
if (runToolLoopBreakerTriggered) {
|
|
1790
|
+
toolLoopBreakerTriggered = true;
|
|
1791
|
+
toolLoopBreakerReason = runToolLoopBreakerReason;
|
|
1792
|
+
}
|
|
1373
1793
|
|
|
1374
1794
|
if (responseText && responseText.trim()) {
|
|
1375
1795
|
log(`Model returned text response (${responseText.length} chars, ${step} tool steps)`);
|
|
@@ -1391,10 +1811,13 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1391
1811
|
// rebuild system prompt at max trim level, then retry.
|
|
1392
1812
|
if (errClass === 'context_overflow' && contextMessages.length > 4) {
|
|
1393
1813
|
log(`Context overflow on ${currentModel}, emergency compaction + max trim`);
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1814
|
+
const recoveryPlan = buildContextOverflowRecoveryPlan({
|
|
1815
|
+
contextMessages: contextMessages.map(msg => ({ role: msg.role, content: msg.content })),
|
|
1816
|
+
emergencySummary: null,
|
|
1817
|
+
keepRecentCount: 4
|
|
1818
|
+
});
|
|
1819
|
+
const toCompact = recoveryPlan.toCompact;
|
|
1820
|
+
const toKeep = recoveryPlan.toKeep;
|
|
1398
1821
|
let emergencySummary = '';
|
|
1399
1822
|
if (toCompact.length > 0) {
|
|
1400
1823
|
try {
|
|
@@ -1420,22 +1843,30 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1420
1843
|
}
|
|
1421
1844
|
// Rebuild system prompt at max trim level (includes updated summary)
|
|
1422
1845
|
const minInstructions = resolveInstructions(4);
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
:
|
|
1846
|
+
const compactedInput = buildContextOverflowRecoveryPlan({
|
|
1847
|
+
contextMessages: toKeep,
|
|
1848
|
+
emergencySummary,
|
|
1849
|
+
keepRecentCount: Math.max(1, toKeep.length)
|
|
1850
|
+
}).retryInput;
|
|
1427
1851
|
try {
|
|
1428
1852
|
const retryResult = openrouter.callModel({
|
|
1429
1853
|
model: currentModel,
|
|
1430
1854
|
instructions: minInstructions,
|
|
1431
1855
|
input: compactedInput,
|
|
1432
1856
|
tools: schemaTools,
|
|
1433
|
-
maxOutputTokens:
|
|
1857
|
+
maxOutputTokens: effectiveMaxOutputTokens,
|
|
1434
1858
|
temperature: config.temperature,
|
|
1435
1859
|
reasoning: resolvedReasoning
|
|
1436
1860
|
});
|
|
1437
1861
|
const retryResponse = await retryResult.getResponse();
|
|
1438
1862
|
responseText = extractTextFromApiResponse(retryResponse) || '';
|
|
1863
|
+
if (responseText) {
|
|
1864
|
+
writeStreamChunk(responseText);
|
|
1865
|
+
}
|
|
1866
|
+
finalizeStream();
|
|
1867
|
+
latencyMs = Date.now() - startedAt;
|
|
1868
|
+
completionTokens = estimateTokensForModel(responseText || '', tokenEstimate.tokensPerChar);
|
|
1869
|
+
promptTokens = resolvedPromptTokens;
|
|
1439
1870
|
lastError = null;
|
|
1440
1871
|
break;
|
|
1441
1872
|
} catch (retryErr) {
|
|
@@ -1459,6 +1890,8 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1459
1890
|
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
1460
1891
|
const allFailed = modelChain.length > 1 ? `All models failed. Last error: ${errorMessage}` : errorMessage;
|
|
1461
1892
|
log(`Agent error: ${allFailed}`);
|
|
1893
|
+
markStreamError(allFailed);
|
|
1894
|
+
await cleanupMcpConnections();
|
|
1462
1895
|
return {
|
|
1463
1896
|
status: 'error',
|
|
1464
1897
|
result: null,
|
|
@@ -1470,12 +1903,16 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1470
1903
|
memory_facts: sessionCtx.state.facts,
|
|
1471
1904
|
tokens_prompt: promptTokens,
|
|
1472
1905
|
tokens_completion: completionTokens,
|
|
1473
|
-
memory_recall_count:
|
|
1906
|
+
memory_recall_count: memoryRecallCountForOutput,
|
|
1474
1907
|
session_recall_count: sessionRecallCount,
|
|
1475
1908
|
memory_items_upserted: memoryItemsUpserted,
|
|
1476
1909
|
memory_items_extracted: memoryItemsExtracted,
|
|
1477
1910
|
timings: Object.keys(timings).length > 0 ? timings : undefined,
|
|
1478
1911
|
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
1912
|
+
tool_retry_attempts: toolRetryAttempts || undefined,
|
|
1913
|
+
tool_outcome_verification_forced: toolOutcomeVerificationForced || undefined,
|
|
1914
|
+
tool_loop_breaker_triggered: toolLoopBreakerTriggered || undefined,
|
|
1915
|
+
tool_loop_breaker_reason: toolLoopBreakerReason,
|
|
1479
1916
|
latency_ms: latencyMs
|
|
1480
1917
|
};
|
|
1481
1918
|
}
|
|
@@ -1569,10 +2006,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1569
2006
|
}
|
|
1570
2007
|
}
|
|
1571
2008
|
|
|
1572
|
-
|
|
1573
|
-
if (mcpCleanup) {
|
|
1574
|
-
try { await mcpCleanup(); } catch { /* ignore cleanup errors */ }
|
|
1575
|
-
}
|
|
2009
|
+
await cleanupMcpConnections();
|
|
1576
2010
|
|
|
1577
2011
|
return {
|
|
1578
2012
|
status: 'success',
|
|
@@ -1584,12 +2018,16 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
|
|
|
1584
2018
|
memory_facts: sessionCtx.state.facts,
|
|
1585
2019
|
tokens_prompt: promptTokens,
|
|
1586
2020
|
tokens_completion: completionTokens,
|
|
1587
|
-
memory_recall_count:
|
|
2021
|
+
memory_recall_count: memoryRecallCountForOutput,
|
|
1588
2022
|
session_recall_count: sessionRecallCount,
|
|
1589
2023
|
memory_items_upserted: memoryItemsUpserted,
|
|
1590
2024
|
memory_items_extracted: memoryItemsExtracted,
|
|
1591
2025
|
timings: Object.keys(timings).length > 0 ? timings : undefined,
|
|
1592
2026
|
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
2027
|
+
tool_retry_attempts: toolRetryAttempts || undefined,
|
|
2028
|
+
tool_outcome_verification_forced: toolOutcomeVerificationForced || undefined,
|
|
2029
|
+
tool_loop_breaker_triggered: toolLoopBreakerTriggered || undefined,
|
|
2030
|
+
tool_loop_breaker_reason: toolLoopBreakerReason,
|
|
1593
2031
|
latency_ms: latencyMs,
|
|
1594
2032
|
replyToId
|
|
1595
2033
|
};
|