lynkr 7.2.5 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/config/model-tiers.json +89 -0
- package/docs/docs.html +1 -0
- package/docs/index.md +7 -0
- package/docs/toon-integration-spec.md +130 -0
- package/documentation/README.md +3 -2
- package/documentation/claude-code-cli.md +23 -16
- package/documentation/cursor-integration.md +17 -14
- package/documentation/docker.md +11 -4
- package/documentation/embeddings.md +7 -5
- package/documentation/faq.md +66 -12
- package/documentation/features.md +22 -15
- package/documentation/installation.md +66 -14
- package/documentation/production.md +43 -8
- package/documentation/providers.md +145 -42
- package/documentation/routing.md +476 -0
- package/documentation/token-optimization.md +7 -5
- package/documentation/troubleshooting.md +81 -5
- package/install.sh +6 -1
- package/package.json +4 -2
- package/scripts/setup.js +0 -1
- package/src/agents/executor.js +14 -6
- package/src/api/middleware/session.js +15 -2
- package/src/api/openai-router.js +130 -37
- package/src/api/providers-handler.js +15 -1
- package/src/api/router.js +107 -2
- package/src/budget/index.js +4 -3
- package/src/clients/databricks.js +431 -234
- package/src/clients/gpt-utils.js +181 -0
- package/src/clients/ollama-utils.js +66 -140
- package/src/clients/routing.js +0 -1
- package/src/clients/standard-tools.js +76 -3
- package/src/config/index.js +113 -35
- package/src/context/toon.js +173 -0
- package/src/logger/index.js +23 -0
- package/src/orchestrator/index.js +686 -211
- package/src/routing/agentic-detector.js +320 -0
- package/src/routing/complexity-analyzer.js +202 -2
- package/src/routing/cost-optimizer.js +305 -0
- package/src/routing/index.js +168 -159
- package/src/routing/model-tiers.js +365 -0
- package/src/server.js +2 -2
- package/src/sessions/cleanup.js +3 -3
- package/src/sessions/record.js +10 -1
- package/src/sessions/store.js +7 -2
- package/src/tools/agent-task.js +48 -1
- package/src/tools/index.js +15 -2
- package/te +11622 -0
- package/test/README.md +1 -1
- package/test/azure-openai-config.test.js +17 -8
- package/test/azure-openai-integration.test.js +7 -1
- package/test/azure-openai-routing.test.js +41 -43
- package/test/bedrock-integration.test.js +18 -32
- package/test/hybrid-routing-integration.test.js +35 -20
- package/test/hybrid-routing-performance.test.js +74 -64
- package/test/llamacpp-integration.test.js +28 -9
- package/test/lmstudio-integration.test.js +20 -8
- package/test/openai-integration.test.js +17 -20
- package/test/performance-tests.js +1 -1
- package/test/routing.test.js +65 -59
- package/test/toon-compression.test.js +131 -0
- package/CLAWROUTER_ROUTING_PLAN.md +0 -910
- package/ROUTER_COMPARISON.md +0 -173
- package/TIER_ROUTING_PLAN.md +0 -771
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const config = require("../config");
|
|
2
2
|
const { invokeModel } = require("../clients/databricks");
|
|
3
3
|
const { appendTurnToSession } = require("../sessions/record");
|
|
4
|
+
const { upsertSession } = require("../sessions/store");
|
|
4
5
|
const { executeToolCall } = require("../tools");
|
|
5
6
|
const policy = require("../policy");
|
|
6
7
|
const logger = require("../logger");
|
|
@@ -10,6 +11,7 @@ const tokens = require("../utils/tokens");
|
|
|
10
11
|
const systemPrompt = require("../prompts/system");
|
|
11
12
|
const historyCompression = require("../context/compression");
|
|
12
13
|
const tokenBudget = require("../context/budget");
|
|
14
|
+
const { applyToonCompression } = require("../context/toon");
|
|
13
15
|
const { classifyRequestType, selectToolsSmartly } = require("../tools/smart-selection");
|
|
14
16
|
const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = require("../headroom");
|
|
15
17
|
const { createAuditLogger } = require("../logger/audit-logger");
|
|
@@ -19,6 +21,8 @@ const crypto = require("crypto");
|
|
|
19
21
|
const { asyncClone, asyncTransform, getPoolStats } = require("../workers/helpers");
|
|
20
22
|
const { getSemanticCache, isSemanticCacheEnabled } = require("../cache/semantic");
|
|
21
23
|
const lazyLoader = require("../tools/lazy-loader");
|
|
24
|
+
const { areSimilarToolCalls } = require("../clients/gpt-utils");
|
|
25
|
+
const { getModelRegistrySync } = require("../routing/model-registry");
|
|
22
26
|
|
|
23
27
|
/**
|
|
24
28
|
* Get destination URL for audit logging based on provider type
|
|
@@ -49,6 +53,8 @@ function getDestinationUrl(providerType) {
|
|
|
49
53
|
return config.zai?.endpoint ?? 'unknown';
|
|
50
54
|
case 'vertex':
|
|
51
55
|
return config.vertex?.endpoint ?? 'unknown';
|
|
56
|
+
case 'moonshot':
|
|
57
|
+
return config.moonshot?.endpoint ?? 'unknown';
|
|
52
58
|
default:
|
|
53
59
|
return 'unknown';
|
|
54
60
|
}
|
|
@@ -455,6 +461,192 @@ function injectToolLoopStopInstruction(messages, threshold = 5) {
|
|
|
455
461
|
return messages;
|
|
456
462
|
}
|
|
457
463
|
|
|
464
|
+
// === CROSS-REQUEST TOOL CALL DEDUP TRACKING ===
|
|
465
|
+
// These helpers track tool call signatures across multiple HTTP requests within
|
|
466
|
+
// the same session (client/passthrough mode). The inner-loop detection in
|
|
467
|
+
// runAgentLoop() only sees one request at a time, so repeated calls across
|
|
468
|
+
// requests escape it.
|
|
469
|
+
|
|
470
|
+
const DEDUP_MAX_SIGNATURES = 50;
|
|
471
|
+
const DEDUP_WARN_THRESHOLD = 2;
|
|
472
|
+
const DEDUP_TERMINATE_THRESHOLD = 3;
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* Initialise session.metadata.toolCallDedup if missing.
|
|
476
|
+
* @param {Object} session
|
|
477
|
+
*/
|
|
478
|
+
function ensureDedupStructure(session) {
|
|
479
|
+
if (!session || !session.metadata) return;
|
|
480
|
+
if (!session.metadata.toolCallDedup) {
|
|
481
|
+
session.metadata.toolCallDedup = {
|
|
482
|
+
signatures: {},
|
|
483
|
+
similarGroups: {},
|
|
484
|
+
lastResetAt: Date.now(),
|
|
485
|
+
warningInjected: false,
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
/**
|
|
491
|
+
* Record a tool call into the cross-request dedup tracker.
|
|
492
|
+
* Handles similarity merging and enforces the 50-entry cap.
|
|
493
|
+
* @param {Object} session
|
|
494
|
+
* @param {Object} toolCall - tool_use block (Anthropic format: { name, input, id })
|
|
495
|
+
*/
|
|
496
|
+
function recordCrossRequestToolCall(session, toolCall) {
|
|
497
|
+
if (!session?.metadata) return;
|
|
498
|
+
ensureDedupStructure(session);
|
|
499
|
+
|
|
500
|
+
const dedup = session.metadata.toolCallDedup;
|
|
501
|
+
const signature = getToolCallSignature(toolCall);
|
|
502
|
+
const toolName = toolCall.function?.name ?? toolCall.name ?? 'unknown';
|
|
503
|
+
const args = toolCall.function?.arguments ?? toolCall.input;
|
|
504
|
+
const argsPreview = (typeof args === 'string' ? args : JSON.stringify(args ?? {})).substring(0, 200);
|
|
505
|
+
const now = Date.now();
|
|
506
|
+
|
|
507
|
+
// Check if this signature maps to a canonical via similarity groups
|
|
508
|
+
const canonicalSig = dedup.similarGroups[signature] || signature;
|
|
509
|
+
|
|
510
|
+
if (dedup.signatures[canonicalSig]) {
|
|
511
|
+
dedup.signatures[canonicalSig].count += 1;
|
|
512
|
+
dedup.signatures[canonicalSig].lastSeen = now;
|
|
513
|
+
} else {
|
|
514
|
+
// Check for similar existing entries before creating a new one
|
|
515
|
+
let mergedInto = null;
|
|
516
|
+
for (const [existingSig, existingData] of Object.entries(dedup.signatures)) {
|
|
517
|
+
// Build a fake call object from stored data to compare with areSimilarToolCalls
|
|
518
|
+
const existingCall = {
|
|
519
|
+
name: existingData.toolName,
|
|
520
|
+
input: existingData.argsPreview,
|
|
521
|
+
};
|
|
522
|
+
if (areSimilarToolCalls(toolCall, existingCall)) {
|
|
523
|
+
// Merge: map this signature to the existing canonical
|
|
524
|
+
dedup.similarGroups[signature] = existingSig;
|
|
525
|
+
dedup.signatures[existingSig].count += 1;
|
|
526
|
+
dedup.signatures[existingSig].lastSeen = now;
|
|
527
|
+
mergedInto = existingSig;
|
|
528
|
+
logger.debug({
|
|
529
|
+
newSignature: signature,
|
|
530
|
+
canonicalSignature: existingSig,
|
|
531
|
+
toolName,
|
|
532
|
+
count: dedup.signatures[existingSig].count,
|
|
533
|
+
}, "Cross-request tool dedup: merged similar call");
|
|
534
|
+
break;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
if (!mergedInto) {
|
|
539
|
+
// New unique signature
|
|
540
|
+
dedup.signatures[signature] = {
|
|
541
|
+
count: 1,
|
|
542
|
+
toolName,
|
|
543
|
+
firstSeen: now,
|
|
544
|
+
lastSeen: now,
|
|
545
|
+
argsPreview,
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
// Enforce cap: evict oldest entries if over limit
|
|
551
|
+
const sigKeys = Object.keys(dedup.signatures);
|
|
552
|
+
if (sigKeys.length > DEDUP_MAX_SIGNATURES) {
|
|
553
|
+
const sorted = sigKeys.sort(
|
|
554
|
+
(a, b) => dedup.signatures[a].lastSeen - dedup.signatures[b].lastSeen
|
|
555
|
+
);
|
|
556
|
+
const toRemove = sorted.slice(0, sigKeys.length - DEDUP_MAX_SIGNATURES);
|
|
557
|
+
for (const key of toRemove) {
|
|
558
|
+
delete dedup.signatures[key];
|
|
559
|
+
// Also clean up any similarGroups pointing to this key
|
|
560
|
+
for (const [groupSig, canonical] of Object.entries(dedup.similarGroups)) {
|
|
561
|
+
if (canonical === key) delete dedup.similarGroups[groupSig];
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
/**
|
|
568
|
+
* Return the highest dedup count, the associated tool name, and signature.
|
|
569
|
+
* @param {Object} session
|
|
570
|
+
* @returns {{ maxCount: number, toolName: string|null, signature: string|null }}
|
|
571
|
+
*/
|
|
572
|
+
function getMaxDedupCount(session) {
|
|
573
|
+
if (!session?.metadata?.toolCallDedup?.signatures) {
|
|
574
|
+
return { maxCount: 0, toolName: null, signature: null };
|
|
575
|
+
}
|
|
576
|
+
const sigs = session.metadata.toolCallDedup.signatures;
|
|
577
|
+
let maxCount = 0;
|
|
578
|
+
let toolName = null;
|
|
579
|
+
let signature = null;
|
|
580
|
+
for (const [sig, data] of Object.entries(sigs)) {
|
|
581
|
+
if (data.count > maxCount) {
|
|
582
|
+
maxCount = data.count;
|
|
583
|
+
toolName = data.toolName;
|
|
584
|
+
signature = sig;
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
return { maxCount, toolName, signature };
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* Extract tool_use blocks from messages that appear after the last user text message.
|
|
592
|
+
* These are the tool calls from the current assistant turn that the client is sending back.
|
|
593
|
+
* @param {Array} messages
|
|
594
|
+
* @returns {Array} - Array of tool_use-like objects
|
|
595
|
+
*/
|
|
596
|
+
function extractToolUseFromCurrentTurn(messages) {
|
|
597
|
+
if (!Array.isArray(messages)) return [];
|
|
598
|
+
|
|
599
|
+
// Find last user text message
|
|
600
|
+
let lastUserTextIndex = -1;
|
|
601
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
602
|
+
const msg = messages[i];
|
|
603
|
+
if (msg?.role !== 'user') continue;
|
|
604
|
+
if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
|
|
605
|
+
lastUserTextIndex = i;
|
|
606
|
+
break;
|
|
607
|
+
}
|
|
608
|
+
if (Array.isArray(msg.content)) {
|
|
609
|
+
const hasText = msg.content.some(block =>
|
|
610
|
+
(block?.type === 'text' && block?.text?.trim?.().length > 0) ||
|
|
611
|
+
(block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
|
|
612
|
+
);
|
|
613
|
+
if (hasText) {
|
|
614
|
+
lastUserTextIndex = i;
|
|
615
|
+
break;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
const toolUseBlocks = [];
|
|
621
|
+
const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
|
|
622
|
+
for (let i = startIndex; i < messages.length; i++) {
|
|
623
|
+
const msg = messages[i];
|
|
624
|
+
if (msg?.role !== 'assistant') continue;
|
|
625
|
+
if (!Array.isArray(msg.content)) continue;
|
|
626
|
+
for (const block of msg.content) {
|
|
627
|
+
if (block?.type === 'tool_use') {
|
|
628
|
+
toolUseBlocks.push(block);
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
return toolUseBlocks;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
/**
|
|
636
|
+
* Reset dedup tracking. Called when a new user question is detected.
|
|
637
|
+
* @param {Object} session
|
|
638
|
+
*/
|
|
639
|
+
function resetDedupTracking(session) {
|
|
640
|
+
if (!session?.metadata) return;
|
|
641
|
+
session.metadata.toolCallDedup = {
|
|
642
|
+
signatures: {},
|
|
643
|
+
similarGroups: {},
|
|
644
|
+
lastResetAt: Date.now(),
|
|
645
|
+
warningInjected: false,
|
|
646
|
+
};
|
|
647
|
+
logger.debug({ sessionId: session?.id ?? null }, "Cross-request tool dedup: reset tracking for new user question");
|
|
648
|
+
}
|
|
649
|
+
|
|
458
650
|
function sanitiseAzureTools(tools) {
|
|
459
651
|
if (!Array.isArray(tools) || tools.length === 0) return undefined;
|
|
460
652
|
const allowed = new Set([
|
|
@@ -516,13 +708,51 @@ function parseExecutionContent(content) {
|
|
|
516
708
|
const trimmed = content.trim();
|
|
517
709
|
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
|
|
518
710
|
try {
|
|
519
|
-
|
|
711
|
+
const parsed = JSON.parse(trimmed);
|
|
712
|
+
// Handle Anthropic content blocks array - extract text
|
|
713
|
+
if (Array.isArray(parsed)) {
|
|
714
|
+
const textParts = parsed
|
|
715
|
+
.filter(block => block && typeof block === 'object')
|
|
716
|
+
.map(block => {
|
|
717
|
+
if (block.type === 'text' && typeof block.text === 'string') {
|
|
718
|
+
return block.text;
|
|
719
|
+
}
|
|
720
|
+
// Handle other block types gracefully
|
|
721
|
+
if (block.text) return block.text;
|
|
722
|
+
if (block.content) return typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
|
|
723
|
+
return null;
|
|
724
|
+
})
|
|
725
|
+
.filter(text => text !== null);
|
|
726
|
+
|
|
727
|
+
if (textParts.length > 0) {
|
|
728
|
+
return textParts.join('\n');
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
return parsed;
|
|
520
732
|
} catch {
|
|
521
733
|
return content;
|
|
522
734
|
}
|
|
523
735
|
}
|
|
524
736
|
return content;
|
|
525
737
|
}
|
|
738
|
+
// Handle content that's already an array (content blocks)
|
|
739
|
+
if (Array.isArray(content)) {
|
|
740
|
+
const textParts = content
|
|
741
|
+
.filter(block => block && typeof block === 'object')
|
|
742
|
+
.map(block => {
|
|
743
|
+
if (block.type === 'text' && typeof block.text === 'string') {
|
|
744
|
+
return block.text;
|
|
745
|
+
}
|
|
746
|
+
if (block.text) return block.text;
|
|
747
|
+
if (block.content) return typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
|
|
748
|
+
return null;
|
|
749
|
+
})
|
|
750
|
+
.filter(text => text !== null);
|
|
751
|
+
|
|
752
|
+
if (textParts.length > 0) {
|
|
753
|
+
return textParts.join('\n');
|
|
754
|
+
}
|
|
755
|
+
}
|
|
526
756
|
return content;
|
|
527
757
|
}
|
|
528
758
|
|
|
@@ -718,19 +948,17 @@ function stripThinkingBlocks(text) {
|
|
|
718
948
|
return cleanedLines.join("\n").trim();
|
|
719
949
|
}
|
|
720
950
|
|
|
951
|
+
/**
|
|
952
|
+
* Convert legacy Ollama /api/chat response to Anthropic Messages format.
|
|
953
|
+
* Used when Ollama < v0.14.0 (no native Anthropic endpoint).
|
|
954
|
+
*/
|
|
721
955
|
function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
|
|
722
|
-
// Ollama response format:
|
|
723
|
-
// { model, created_at, message: { role, content, tool_calls }, done, total_duration, ... }
|
|
724
|
-
// { eval_count, prompt_eval_count, ... }
|
|
725
|
-
|
|
726
956
|
const message = ollamaResponse?.message ?? {};
|
|
727
957
|
const rawContent = message.content || "";
|
|
728
958
|
const toolCalls = message.tool_calls || [];
|
|
729
959
|
|
|
730
|
-
// Build content blocks
|
|
731
960
|
const contentItems = [];
|
|
732
961
|
|
|
733
|
-
// Add text content if present, after stripping thinking blocks
|
|
734
962
|
if (typeof rawContent === "string" && rawContent.trim()) {
|
|
735
963
|
const cleanedContent = stripThinkingBlocks(rawContent);
|
|
736
964
|
if (cleanedContent) {
|
|
@@ -738,18 +966,31 @@ function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
|
|
|
738
966
|
}
|
|
739
967
|
}
|
|
740
968
|
|
|
741
|
-
//
|
|
969
|
+
// Convert tool calls from OpenAI function-calling format to Anthropic tool_use
|
|
742
970
|
if (Array.isArray(toolCalls) && toolCalls.length > 0) {
|
|
743
|
-
const
|
|
744
|
-
|
|
745
|
-
|
|
971
|
+
for (const toolCall of toolCalls) {
|
|
972
|
+
const func = toolCall.function || {};
|
|
973
|
+
let input = {};
|
|
974
|
+
if (func.arguments) {
|
|
975
|
+
if (typeof func.arguments === "string") {
|
|
976
|
+
try { input = JSON.parse(func.arguments); } catch { input = {}; }
|
|
977
|
+
} else if (typeof func.arguments === "object") {
|
|
978
|
+
input = func.arguments;
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
contentItems.push({
|
|
982
|
+
type: "tool_use",
|
|
983
|
+
id: toolCall.id || `toolu_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
984
|
+
name: func.name || "unknown",
|
|
985
|
+
input,
|
|
986
|
+
});
|
|
987
|
+
}
|
|
746
988
|
}
|
|
747
989
|
|
|
748
990
|
if (contentItems.length === 0) {
|
|
749
991
|
contentItems.push({ type: "text", text: "" });
|
|
750
992
|
}
|
|
751
993
|
|
|
752
|
-
// Ollama uses different token count fields
|
|
753
994
|
const inputTokens = ollamaResponse.prompt_eval_count ?? 0;
|
|
754
995
|
const outputTokens = ollamaResponse.eval_count ?? 0;
|
|
755
996
|
|
|
@@ -759,7 +1000,8 @@ function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
|
|
|
759
1000
|
role: "assistant",
|
|
760
1001
|
model: requestedModel,
|
|
761
1002
|
content: contentItems,
|
|
762
|
-
stop_reason:
|
|
1003
|
+
stop_reason: toolCalls.length > 0 ? "tool_use" :
|
|
1004
|
+
ollamaResponse.done ? "end_turn" : "max_tokens",
|
|
763
1005
|
stop_sequence: null,
|
|
764
1006
|
usage: {
|
|
765
1007
|
input_tokens: inputTokens,
|
|
@@ -851,6 +1093,9 @@ function sanitizePayload(payload) {
|
|
|
851
1093
|
config.modelProvider?.defaultModel ??
|
|
852
1094
|
"databricks-claude-sonnet-4-5";
|
|
853
1095
|
clean.model = requestedModel;
|
|
1096
|
+
if (!clean.max_tokens) {
|
|
1097
|
+
clean.max_tokens = 16384;
|
|
1098
|
+
}
|
|
854
1099
|
const providerType = config.modelProvider?.type ?? "databricks";
|
|
855
1100
|
const flattenContent = providerType !== "azure-anthropic";
|
|
856
1101
|
clean.messages = normaliseMessages(clean, { flattenContent }).filter((msg) => {
|
|
@@ -995,12 +1240,10 @@ function sanitizePayload(payload) {
|
|
|
995
1240
|
// Check if this is a simple conversational message (no tools needed)
|
|
996
1241
|
const isConversational = (() => {
|
|
997
1242
|
if (!Array.isArray(clean.messages) || clean.messages.length === 0) {
|
|
998
|
-
logger.debug({ reason: "No messages array" }, "Ollama conversational check");
|
|
999
1243
|
return false;
|
|
1000
1244
|
}
|
|
1001
1245
|
const lastMessage = clean.messages[clean.messages.length - 1];
|
|
1002
1246
|
if (lastMessage?.role !== "user") {
|
|
1003
|
-
logger.debug({ role: lastMessage?.role }, "Ollama conversational check - not user");
|
|
1004
1247
|
return false;
|
|
1005
1248
|
}
|
|
1006
1249
|
|
|
@@ -1008,28 +1251,18 @@ function sanitizePayload(payload) {
|
|
|
1008
1251
|
? lastMessage.content
|
|
1009
1252
|
: "";
|
|
1010
1253
|
|
|
1011
|
-
logger.debug({
|
|
1012
|
-
contentType: typeof lastMessage.content,
|
|
1013
|
-
isString: typeof lastMessage.content === "string",
|
|
1014
|
-
contentLength: typeof lastMessage.content === "string" ? lastMessage.content.length : "N/A",
|
|
1015
|
-
actualContent: typeof lastMessage.content === "string" ? lastMessage.content.substring(0, 100) : JSON.stringify(lastMessage.content).substring(0, 100)
|
|
1016
|
-
}, "Ollama conversational check - analyzing content");
|
|
1017
|
-
|
|
1018
1254
|
const trimmed = content.trim().toLowerCase();
|
|
1019
1255
|
|
|
1020
1256
|
// Simple greetings
|
|
1021
1257
|
if (/^(hi|hello|hey|good morning|good afternoon|good evening|howdy|greetings)[\s\.\!\?]*$/.test(trimmed)) {
|
|
1022
|
-
|
|
1023
|
-
return true;
|
|
1258
|
+
return "greeting";
|
|
1024
1259
|
}
|
|
1025
1260
|
|
|
1026
|
-
//
|
|
1027
|
-
if (
|
|
1028
|
-
|
|
1029
|
-
return true;
|
|
1261
|
+
// Conversational phrases that don't need tools (thanks, farewells, acknowledgements)
|
|
1262
|
+
if (/^(thanks|thank you|thx|ty|bye|goodbye|see you|ok|okay|cool|nice|great|awesome|sure|got it|sounds good|no worries|np|cheers)[\s\.\!\?]*$/.test(trimmed)) {
|
|
1263
|
+
return "conversational";
|
|
1030
1264
|
}
|
|
1031
1265
|
|
|
1032
|
-
logger.debug({ trimmed: trimmed.substring(0, 50), length: trimmed.length }, "Ollama conversational check - not matched");
|
|
1033
1266
|
return false;
|
|
1034
1267
|
})();
|
|
1035
1268
|
|
|
@@ -1039,37 +1272,12 @@ function sanitizePayload(payload) {
|
|
|
1039
1272
|
delete clean.tool_choice;
|
|
1040
1273
|
logger.debug({
|
|
1041
1274
|
model: config.ollama?.model,
|
|
1042
|
-
|
|
1043
|
-
}, "Ollama conversational mode");
|
|
1275
|
+
reason: isConversational,
|
|
1276
|
+
}, "Ollama conversational mode - tools removed");
|
|
1044
1277
|
} else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) {
|
|
1045
|
-
// Ollama
|
|
1046
|
-
//
|
|
1047
|
-
|
|
1048
|
-
"Bash",
|
|
1049
|
-
"Read",
|
|
1050
|
-
"Write",
|
|
1051
|
-
"Edit",
|
|
1052
|
-
"Glob",
|
|
1053
|
-
"Grep",
|
|
1054
|
-
"WebSearch",
|
|
1055
|
-
"WebFetch"
|
|
1056
|
-
]);
|
|
1057
|
-
|
|
1058
|
-
const limitedTools = clean.tools.filter(tool =>
|
|
1059
|
-
OLLAMA_ESSENTIAL_TOOLS.has(tool.name)
|
|
1060
|
-
);
|
|
1061
|
-
|
|
1062
|
-
logger.debug({
|
|
1063
|
-
model: config.ollama?.model,
|
|
1064
|
-
originalToolCount: clean.tools.length,
|
|
1065
|
-
limitedToolCount: limitedTools.length,
|
|
1066
|
-
keptTools: limitedTools.map(t => t.name)
|
|
1067
|
-
}, "Ollama tools limited for performance");
|
|
1068
|
-
|
|
1069
|
-
clean.tools = limitedTools.length > 0 ? limitedTools : undefined;
|
|
1070
|
-
if (!clean.tools) {
|
|
1071
|
-
delete clean.tools;
|
|
1072
|
-
}
|
|
1278
|
+
// Keep all tools — Ollama receives them in Anthropic format (native API)
|
|
1279
|
+
// or they get converted to OpenAI format in invokeOllama (legacy API)
|
|
1280
|
+
clean.tools = ensureAnthropicToolFormat(clean.tools);
|
|
1073
1281
|
} else {
|
|
1074
1282
|
// Remove tools for models without tool support
|
|
1075
1283
|
delete clean.tools;
|
|
@@ -1097,6 +1305,14 @@ function sanitizePayload(payload) {
|
|
|
1097
1305
|
} else {
|
|
1098
1306
|
clean.tools = ensureAnthropicToolFormat(clean.tools);
|
|
1099
1307
|
}
|
|
1308
|
+
} else if (providerType === "moonshot") {
|
|
1309
|
+
// Moonshot supports tools - keep them in Anthropic format
|
|
1310
|
+
// They will be converted to OpenAI format in invokeMoonshot
|
|
1311
|
+
if (!Array.isArray(clean.tools) || clean.tools.length === 0) {
|
|
1312
|
+
delete clean.tools;
|
|
1313
|
+
} else {
|
|
1314
|
+
clean.tools = ensureAnthropicToolFormat(clean.tools);
|
|
1315
|
+
}
|
|
1100
1316
|
} else if (Array.isArray(clean.tools)) {
|
|
1101
1317
|
// Unknown provider - remove tools for safety
|
|
1102
1318
|
delete clean.tools;
|
|
@@ -1172,6 +1388,10 @@ function sanitizePayload(payload) {
|
|
|
1172
1388
|
}
|
|
1173
1389
|
}
|
|
1174
1390
|
|
|
1391
|
+
// Optional TOON conversion for large JSON message payloads (prompt context only).
|
|
1392
|
+
// Run this BEFORE message coalescing to preserve parseable JSON boundaries.
|
|
1393
|
+
applyToonCompression(clean, config.toon, { logger });
|
|
1394
|
+
|
|
1175
1395
|
// FIX: Handle consecutive messages with the same role (causes llama.cpp 400 error)
|
|
1176
1396
|
// Strategy: Merge all consecutive messages, add instruction to focus on last request
|
|
1177
1397
|
if (Array.isArray(clean.messages) && clean.messages.length > 0) {
|
|
@@ -1210,7 +1430,7 @@ function sanitizePayload(payload) {
|
|
|
1210
1430
|
}
|
|
1211
1431
|
|
|
1212
1432
|
if (merged.length !== clean.messages.length) {
|
|
1213
|
-
logger.
|
|
1433
|
+
logger.debug({
|
|
1214
1434
|
originalCount: clean.messages.length,
|
|
1215
1435
|
mergedCount: merged.length,
|
|
1216
1436
|
reduced: clean.messages.length - merged.length
|
|
@@ -1220,19 +1440,11 @@ function sanitizePayload(payload) {
|
|
|
1220
1440
|
clean.messages = merged;
|
|
1221
1441
|
}
|
|
1222
1442
|
|
|
1223
|
-
// [CONTEXT_FLOW] Log payload after sanitization
|
|
1224
1443
|
logger.debug({
|
|
1225
1444
|
providerType: config.modelProvider?.type ?? "databricks",
|
|
1226
|
-
phase: "after_sanitize",
|
|
1227
|
-
systemField: typeof clean.system === 'string'
|
|
1228
|
-
? { type: 'string', length: clean.system.length }
|
|
1229
|
-
: clean.system
|
|
1230
|
-
? { type: typeof clean.system, value: clean.system }
|
|
1231
|
-
: undefined,
|
|
1232
1445
|
messageCount: clean.messages?.length ?? 0,
|
|
1233
|
-
firstMessageHasSystem: clean.messages?.[0]?.content?.includes?.('You are Claude Code') ?? false,
|
|
1234
1446
|
toolCount: clean.tools?.length ?? 0
|
|
1235
|
-
}, '
|
|
1447
|
+
}, 'After sanitizePayload');
|
|
1236
1448
|
|
|
1237
1449
|
// === Suggestion mode: tag request and override model if configured ===
|
|
1238
1450
|
const { isSuggestionMode: isSuggestion } = detectSuggestionMode(clean.messages);
|
|
@@ -1339,8 +1551,7 @@ async function runAgentLoop({
|
|
|
1339
1551
|
providerType,
|
|
1340
1552
|
headers,
|
|
1341
1553
|
}) {
|
|
1342
|
-
|
|
1343
|
-
logger.info({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop ENTERED');
|
|
1554
|
+
logger.debug({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop entered');
|
|
1344
1555
|
const settings = resolveLoopOptions(options);
|
|
1345
1556
|
// Initialize audit logger (no-op if disabled)
|
|
1346
1557
|
const auditLogger = createAuditLogger(config.audit);
|
|
@@ -1387,7 +1598,6 @@ async function runAgentLoop({
|
|
|
1387
1598
|
}
|
|
1388
1599
|
|
|
1389
1600
|
steps += 1;
|
|
1390
|
-
console.log('[LOOP DEBUG] Entered while loop - step:', steps);
|
|
1391
1601
|
logger.debug(
|
|
1392
1602
|
{
|
|
1393
1603
|
sessionId: session?.id ?? null,
|
|
@@ -1397,6 +1607,19 @@ async function runAgentLoop({
|
|
|
1397
1607
|
"Agent loop step",
|
|
1398
1608
|
);
|
|
1399
1609
|
|
|
1610
|
+
// Trim messages when they grow too large to prevent OOM.
|
|
1611
|
+
// Keep the first message (system/user) and the last MAX_LOOP_MESSAGES.
|
|
1612
|
+
const MAX_LOOP_MESSAGES = 40;
|
|
1613
|
+
if (cleanPayload.messages && cleanPayload.messages.length > MAX_LOOP_MESSAGES) {
|
|
1614
|
+
const excess = cleanPayload.messages.length - MAX_LOOP_MESSAGES;
|
|
1615
|
+
// Keep first 2 messages (system context + initial user) and trim from the middle
|
|
1616
|
+
cleanPayload.messages.splice(2, excess);
|
|
1617
|
+
logger.debug(
|
|
1618
|
+
{ trimmed: excess, remaining: cleanPayload.messages.length },
|
|
1619
|
+
"Trimmed intermediate messages to prevent memory growth",
|
|
1620
|
+
);
|
|
1621
|
+
}
|
|
1622
|
+
|
|
1400
1623
|
// Debug: Log payload before sending to Azure
|
|
1401
1624
|
if (providerType === "azure-anthropic") {
|
|
1402
1625
|
logger.debug(
|
|
@@ -1481,14 +1704,11 @@ async function runAgentLoop({
|
|
|
1481
1704
|
}
|
|
1482
1705
|
}
|
|
1483
1706
|
|
|
1484
|
-
// [CONTEXT_FLOW] Log after memory injection
|
|
1485
1707
|
logger.debug({
|
|
1486
1708
|
sessionId: session?.id ?? null,
|
|
1487
|
-
phase: "after_memory",
|
|
1488
|
-
systemPromptLength: cleanPayload.system?.length ?? 0,
|
|
1489
1709
|
messageCount: cleanPayload.messages?.length ?? 0,
|
|
1490
1710
|
toolCount: cleanPayload.tools?.length ?? 0
|
|
1491
|
-
}, '
|
|
1711
|
+
}, 'After memory injection');
|
|
1492
1712
|
|
|
1493
1713
|
if (steps === 1 && (config.systemPrompt?.mode === 'dynamic' || config.systemPrompt?.toolDescriptions === 'minimal')) {
|
|
1494
1714
|
try {
|
|
@@ -1577,9 +1797,26 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1577
1797
|
logger.debug({ sessionId: session?.id ?? null }, 'Tool termination instructions injected for non-Claude model');
|
|
1578
1798
|
}
|
|
1579
1799
|
|
|
1800
|
+
// Compute model-aware token budget thresholds
|
|
1801
|
+
const registry = getModelRegistrySync();
|
|
1802
|
+
const modelInfo = registry.getCost(requestedModel);
|
|
1803
|
+
const modelContextWindow = modelInfo?.context || config.tokenBudget?.max || 180000;
|
|
1804
|
+
const modelMax = Math.floor(modelContextWindow * 0.85);
|
|
1805
|
+
const effectiveMax = Math.min(modelMax, config.tokenBudget?.max || 180000);
|
|
1806
|
+
const effectiveWarning = Math.floor(effectiveMax * 0.65);
|
|
1807
|
+
|
|
1808
|
+
logger.debug({
|
|
1809
|
+
sessionId: session?.id ?? null,
|
|
1810
|
+
requestedModel,
|
|
1811
|
+
modelContextWindow,
|
|
1812
|
+
effectiveWarning,
|
|
1813
|
+
effectiveMax,
|
|
1814
|
+
source: modelInfo?.source || 'default',
|
|
1815
|
+
}, 'Model-aware token budget computed');
|
|
1816
|
+
|
|
1580
1817
|
if (steps === 1 && config.tokenBudget?.enforcement !== false) {
|
|
1581
1818
|
try {
|
|
1582
|
-
const budgetCheck = tokenBudget.checkBudget(cleanPayload);
|
|
1819
|
+
const budgetCheck = tokenBudget.checkBudget(cleanPayload, effectiveWarning, effectiveMax);
|
|
1583
1820
|
|
|
1584
1821
|
if (budgetCheck.atWarning) {
|
|
1585
1822
|
logger.warn({
|
|
@@ -1593,8 +1830,8 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1593
1830
|
if (budgetCheck.overMax) {
|
|
1594
1831
|
// Apply adaptive compression to fit within budget
|
|
1595
1832
|
const enforcement = tokenBudget.enforceBudget(cleanPayload, {
|
|
1596
|
-
warningThreshold:
|
|
1597
|
-
maxThreshold:
|
|
1833
|
+
warningThreshold: effectiveWarning,
|
|
1834
|
+
maxThreshold: effectiveMax,
|
|
1598
1835
|
enforcement: true
|
|
1599
1836
|
});
|
|
1600
1837
|
|
|
@@ -1618,7 +1855,6 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1618
1855
|
}
|
|
1619
1856
|
|
|
1620
1857
|
// Track estimated token usage before model call
|
|
1621
|
-
console.log('[TOKEN DEBUG] About to track token usage - step:', steps);
|
|
1622
1858
|
const estimatedTokens = config.tokenTracking?.enabled !== false
|
|
1623
1859
|
? tokens.countPayloadTokens(cleanPayload)
|
|
1624
1860
|
: null;
|
|
@@ -1632,15 +1868,6 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1632
1868
|
}
|
|
1633
1869
|
|
|
1634
1870
|
// Apply Headroom compression if enabled
|
|
1635
|
-
const headroomEstTokens = Math.ceil(JSON.stringify(cleanPayload.messages || []).length / 4);
|
|
1636
|
-
logger.info({
|
|
1637
|
-
headroomEnabled: isHeadroomEnabled(),
|
|
1638
|
-
messageCount: cleanPayload.messages?.length ?? 0,
|
|
1639
|
-
estimatedTokens: headroomEstTokens,
|
|
1640
|
-
threshold: config.headroom?.minTokens || 500,
|
|
1641
|
-
willCompress: isHeadroomEnabled() && headroomEstTokens >= (config.headroom?.minTokens || 500),
|
|
1642
|
-
}, 'Headroom compression check');
|
|
1643
|
-
|
|
1644
1871
|
if (isHeadroomEnabled() && cleanPayload.messages && cleanPayload.messages.length > 0) {
|
|
1645
1872
|
try {
|
|
1646
1873
|
const compressionResult = await headroomCompress(
|
|
@@ -1649,36 +1876,27 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1649
1876
|
{
|
|
1650
1877
|
mode: config.headroom?.mode,
|
|
1651
1878
|
queryContext: cleanPayload.messages[cleanPayload.messages.length - 1]?.content,
|
|
1879
|
+
model: requestedModel,
|
|
1880
|
+
modelLimit: modelContextWindow,
|
|
1881
|
+
tokenBudget: effectiveMax,
|
|
1652
1882
|
}
|
|
1653
1883
|
);
|
|
1654
1884
|
|
|
1655
|
-
logger.info({
|
|
1656
|
-
compressed: compressionResult.compressed,
|
|
1657
|
-
tokensBefore: compressionResult.stats?.tokens_before,
|
|
1658
|
-
tokensAfter: compressionResult.stats?.tokens_after,
|
|
1659
|
-
savings: compressionResult.stats?.savings_percent ? `${compressionResult.stats.savings_percent}%` : 'N/A',
|
|
1660
|
-
reason: compressionResult.stats?.reason || compressionResult.stats?.transforms_applied?.join(', ') || 'none',
|
|
1661
|
-
}, 'Headroom compression result');
|
|
1662
|
-
|
|
1663
1885
|
if (compressionResult.compressed) {
|
|
1664
1886
|
cleanPayload.messages = compressionResult.messages;
|
|
1665
1887
|
if (compressionResult.tools) {
|
|
1666
1888
|
cleanPayload.tools = compressionResult.tools;
|
|
1667
1889
|
}
|
|
1668
|
-
logger.info({
|
|
1669
|
-
sessionId: session?.id ?? null,
|
|
1670
|
-
tokensBefore: compressionResult.stats?.tokens_before,
|
|
1671
|
-
tokensAfter: compressionResult.stats?.tokens_after,
|
|
1672
|
-
saved: compressionResult.stats?.tokens_saved,
|
|
1673
|
-
savingsPercent: compressionResult.stats?.savings_percent,
|
|
1674
|
-
transforms: compressionResult.stats?.transforms_applied,
|
|
1675
|
-
}, 'Headroom compression applied to request');
|
|
1676
|
-
} else {
|
|
1677
|
-
logger.debug({
|
|
1678
|
-
sessionId: session?.id ?? null,
|
|
1679
|
-
reason: compressionResult.stats?.reason,
|
|
1680
|
-
}, 'Headroom compression skipped');
|
|
1681
1890
|
}
|
|
1891
|
+
|
|
1892
|
+
logger.debug({
|
|
1893
|
+
sessionId: session?.id ?? null,
|
|
1894
|
+
outcome: compressionResult.compressed ? 'applied' : 'skipped',
|
|
1895
|
+
tokensBefore: compressionResult.stats?.tokens_before,
|
|
1896
|
+
tokensAfter: compressionResult.stats?.tokens_after,
|
|
1897
|
+
savingsPercent: compressionResult.stats?.savings_percent,
|
|
1898
|
+
reason: compressionResult.stats?.reason || compressionResult.stats?.transforms_applied?.join(', ') || 'none',
|
|
1899
|
+
}, 'Headroom compression');
|
|
1682
1900
|
} catch (headroomErr) {
|
|
1683
1901
|
logger.warn({ err: headroomErr, sessionId: session?.id ?? null }, 'Headroom compression failed, using original messages');
|
|
1684
1902
|
}
|
|
@@ -1990,9 +2208,10 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1990
2208
|
});
|
|
1991
2209
|
|
|
1992
2210
|
let assistantToolMessage;
|
|
1993
|
-
if (providerType === "azure-anthropic") {
|
|
1994
|
-
// For
|
|
1995
|
-
//
|
|
2211
|
+
if (providerType === "azure-anthropic" || isAnthropicFormat) {
|
|
2212
|
+
// For Anthropic-format responses (azure-anthropic, Ollama native API,
|
|
2213
|
+
// azure-openai Responses API), use the content array directly —
|
|
2214
|
+
// it already contains both text and tool_use blocks in the correct format
|
|
1996
2215
|
assistantToolMessage = {
|
|
1997
2216
|
role: "assistant",
|
|
1998
2217
|
content: databricksResponse.json?.content ?? [],
|
|
@@ -2005,9 +2224,9 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2005
2224
|
};
|
|
2006
2225
|
}
|
|
2007
2226
|
|
|
2008
|
-
// Only add fallback content for
|
|
2227
|
+
// Only add fallback content for OpenAI-format responses (Anthropic format already has content)
|
|
2009
2228
|
if (
|
|
2010
|
-
providerType !== "azure-anthropic" &&
|
|
2229
|
+
providerType !== "azure-anthropic" && !isAnthropicFormat &&
|
|
2011
2230
|
(!assistantToolMessage.content ||
|
|
2012
2231
|
(typeof assistantToolMessage.content === "string" &&
|
|
2013
2232
|
assistantToolMessage.content.trim().length === 0)) &&
|
|
@@ -2043,7 +2262,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2043
2262
|
// If in passthrough/client mode and there are client-side tools, return them to client
|
|
2044
2263
|
// Server-side tools (Task, Web) will be executed below
|
|
2045
2264
|
if ((executionMode === "passthrough" || executionMode === "client") && clientSideToolCalls.length > 0) {
|
|
2046
|
-
logger.
|
|
2265
|
+
logger.debug(
|
|
2047
2266
|
{
|
|
2048
2267
|
sessionId: session?.id ?? null,
|
|
2049
2268
|
totalToolCount: toolCalls.length,
|
|
@@ -2068,7 +2287,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2068
2287
|
type: "message",
|
|
2069
2288
|
role: "assistant",
|
|
2070
2289
|
content: clientContent,
|
|
2071
|
-
model: databricksResponse.json?.model ||
|
|
2290
|
+
model: databricksResponse.json?.model || cleanPayload.model,
|
|
2072
2291
|
stop_reason: "tool_use",
|
|
2073
2292
|
usage: databricksResponse.json?.usage || {
|
|
2074
2293
|
input_tokens: 0,
|
|
@@ -2089,6 +2308,27 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2089
2308
|
// then continue the conversation loop. For now, let's fall through to execute server-side tools.
|
|
2090
2309
|
if (serverSideToolCalls.length === 0) {
|
|
2091
2310
|
// No server-side tools - pure passthrough
|
|
2311
|
+
// Record outbound client-side tool calls into cross-request dedup tracker
|
|
2312
|
+
if (session && clientSideToolCalls.length > 0) {
|
|
2313
|
+
ensureDedupStructure(session);
|
|
2314
|
+
for (const call of clientSideToolCalls) {
|
|
2315
|
+
recordCrossRequestToolCall(session, call);
|
|
2316
|
+
}
|
|
2317
|
+
// Persist dedup state (non-ephemeral sessions only)
|
|
2318
|
+
if (session.id && !session._ephemeral) {
|
|
2319
|
+
try { upsertSession(session.id, { metadata: session.metadata }); } catch (e) {
|
|
2320
|
+
logger.debug({ err: e.message }, "Failed to persist outbound dedup state");
|
|
2321
|
+
}
|
|
2322
|
+
}
|
|
2323
|
+
const { maxCount, toolName: dedupTool } = getMaxDedupCount(session);
|
|
2324
|
+
logger.debug({
|
|
2325
|
+
sessionId: session?.id ?? null,
|
|
2326
|
+
clientToolCount: clientSideToolCalls.length,
|
|
2327
|
+
maxDedupCount: maxCount,
|
|
2328
|
+
maxDedupTool: dedupTool,
|
|
2329
|
+
}, "Cross-request tool dedup: recorded outbound tool calls");
|
|
2330
|
+
}
|
|
2331
|
+
|
|
2092
2332
|
return {
|
|
2093
2333
|
response: {
|
|
2094
2334
|
status: 200,
|
|
@@ -2105,7 +2345,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2105
2345
|
// Override toolCalls to only include Server-side tools for server execution
|
|
2106
2346
|
toolCalls = serverSideToolCalls;
|
|
2107
2347
|
|
|
2108
|
-
logger.
|
|
2348
|
+
logger.debug(
|
|
2109
2349
|
{
|
|
2110
2350
|
sessionId: session?.id ?? null,
|
|
2111
2351
|
serverToolCount: serverSideToolCalls.length,
|
|
@@ -2114,7 +2354,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2114
2354
|
);
|
|
2115
2355
|
} else if (executionMode === "passthrough" || executionMode === "client") {
|
|
2116
2356
|
// Only Server-side tools, no Client-side tools - execute all server-side
|
|
2117
|
-
logger.
|
|
2357
|
+
logger.debug(
|
|
2118
2358
|
{
|
|
2119
2359
|
sessionId: session?.id ?? null,
|
|
2120
2360
|
serverToolCount: serverSideToolCalls.length,
|
|
@@ -2179,6 +2419,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2179
2419
|
session,
|
|
2180
2420
|
cwd,
|
|
2181
2421
|
requestMessages: cleanPayload.messages,
|
|
2422
|
+
provider: providerType, // Pass provider for GPT-specific formatting
|
|
2182
2423
|
}))
|
|
2183
2424
|
);
|
|
2184
2425
|
|
|
@@ -2412,10 +2653,14 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2412
2653
|
session,
|
|
2413
2654
|
cwd,
|
|
2414
2655
|
requestMessages: cleanPayload.messages,
|
|
2656
|
+
provider: providerType, // Pass provider for GPT-specific formatting
|
|
2415
2657
|
});
|
|
2416
2658
|
|
|
2417
2659
|
let toolMessage;
|
|
2418
|
-
if (providerType === "azure-anthropic") {
|
|
2660
|
+
if (providerType === "azure-anthropic" || isAnthropicFormat) {
|
|
2661
|
+
// Anthropic-format tool result for providers whose responses use
|
|
2662
|
+
// Anthropic tool_use blocks (azure-anthropic, Ollama native API,
|
|
2663
|
+
// azure-openai Responses API)
|
|
2419
2664
|
const parsedContent = parseExecutionContent(execution.content);
|
|
2420
2665
|
const serialisedContent =
|
|
2421
2666
|
typeof parsedContent === "string" || parsedContent === null
|
|
@@ -2526,34 +2771,54 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2526
2771
|
|
|
2527
2772
|
// === TOOL CALL LOOP DETECTION ===
|
|
2528
2773
|
// Track tool calls to detect infinite loops where the model calls the same tool
|
|
2529
|
-
// repeatedly with identical parameters
|
|
2774
|
+
// repeatedly with identical or similar parameters
|
|
2775
|
+
// All providers use threshold 2 and similarity-based detection
|
|
2776
|
+
const loopThreshold = 2;
|
|
2777
|
+
|
|
2530
2778
|
for (const call of toolCalls) {
|
|
2531
2779
|
const signature = getToolCallSignature(call);
|
|
2532
|
-
const
|
|
2533
|
-
|
|
2780
|
+
const existingEntry = toolCallHistory.get(signature);
|
|
2781
|
+
let count = (existingEntry?.count || 0) + 1;
|
|
2782
|
+
toolCallHistory.set(signature, { count, call });
|
|
2534
2783
|
|
|
2535
2784
|
const toolName = call.function?.name ?? call.name ?? 'unknown';
|
|
2536
2785
|
|
|
2537
|
-
|
|
2786
|
+
// Check for similar (not just identical) tool calls across all providers
|
|
2787
|
+
// This catches cases where the model slightly varies parameters but is essentially looping
|
|
2788
|
+
for (const [existingSig, existingData] of toolCallHistory.entries()) {
|
|
2789
|
+
if (existingSig !== signature && areSimilarToolCalls(call, existingData.call)) {
|
|
2790
|
+
// Found a similar call - increase count to trigger loop detection earlier
|
|
2791
|
+
count = Math.max(count, existingData.count + 1);
|
|
2792
|
+
logger.debug({
|
|
2793
|
+
tool: toolName,
|
|
2794
|
+
currentSignature: signature,
|
|
2795
|
+
similarSignature: existingSig,
|
|
2796
|
+
combinedCount: count,
|
|
2797
|
+
}, "Similar tool call detected - combining counts");
|
|
2798
|
+
}
|
|
2799
|
+
}
|
|
2800
|
+
|
|
2801
|
+
if (count === loopThreshold && !loopWarningInjected) {
|
|
2538
2802
|
logger.warn(
|
|
2539
2803
|
{
|
|
2540
2804
|
sessionId: session?.id ?? null,
|
|
2541
2805
|
correlationId: options?.correlationId,
|
|
2542
2806
|
tool: toolName,
|
|
2543
2807
|
loopCount: count,
|
|
2808
|
+
loopThreshold,
|
|
2544
2809
|
signature: signature,
|
|
2545
2810
|
action: 'warning_injected',
|
|
2546
2811
|
totalSteps: steps,
|
|
2547
2812
|
remainingSteps: settings.maxSteps - steps,
|
|
2548
2813
|
},
|
|
2549
|
-
|
|
2814
|
+
`Tool call loop detected - same tool called ${loopThreshold} times with identical/similar parameters`,
|
|
2550
2815
|
);
|
|
2551
2816
|
|
|
2552
2817
|
// Inject warning message to model
|
|
2553
2818
|
loopWarningInjected = true;
|
|
2554
2819
|
const warningMessage = {
|
|
2555
2820
|
role: "user",
|
|
2556
|
-
content:
|
|
2821
|
+
content: `⚠️ CRITICAL SYSTEM WARNING: You have called the "${toolName}" tool ${count} times with identical or similar parameters. This IS an infinite loop. STOP calling this tool immediately. You MUST now provide a direct text response to the user based on the results you have received. If the tool returned "no results" or empty output, that IS the final answer - do not retry. Summarize your findings and respond.`,
|
|
2557
2822
|
};
|
|
2558
2823
|
|
|
2559
2824
|
cleanPayload.messages.push(warningMessage);
|
|
@@ -2568,11 +2833,12 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2568
2833
|
reason: "tool_call_loop_warning",
|
|
2569
2834
|
toolName,
|
|
2570
2835
|
loopCount: count,
|
|
2836
|
+
loopThreshold,
|
|
2571
2837
|
},
|
|
2572
2838
|
});
|
|
2573
2839
|
}
|
|
2574
|
-
} else if (count >
|
|
2575
|
-
// Force termination after
|
|
2840
|
+
} else if (count > loopThreshold) {
|
|
2841
|
+
// Force termination after threshold exceeded
|
|
2576
2842
|
// Log FULL context for debugging why the loop occurred
|
|
2577
2843
|
logger.error(
|
|
2578
2844
|
{
|
|
@@ -2580,6 +2846,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2580
2846
|
correlationId: options?.correlationId,
|
|
2581
2847
|
tool: toolName,
|
|
2582
2848
|
loopCount: count,
|
|
2849
|
+
loopThreshold,
|
|
2583
2850
|
signature: signature,
|
|
2584
2851
|
action: 'request_terminated',
|
|
2585
2852
|
totalSteps: steps,
|
|
@@ -2600,7 +2867,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2600
2867
|
body: {
|
|
2601
2868
|
error: {
|
|
2602
2869
|
type: "tool_call_loop_detected",
|
|
2603
|
-
message: `Tool call loop detected: The model called the same tool ("${toolName}") with identical parameters ${count} times. This indicates an infinite loop and execution has been terminated. Please try rephrasing your request or provide different parameters.`,
|
|
2870
|
+
message: `Tool call loop detected: The model called the same tool ("${toolName}") with identical parameters ${count} times (threshold: ${loopThreshold}). This indicates an infinite loop and execution has been terminated. Please try rephrasing your request or provide different parameters.`,
|
|
2604
2871
|
},
|
|
2605
2872
|
},
|
|
2606
2873
|
terminationReason: "tool_call_loop",
|
|
@@ -2632,11 +2899,19 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2632
2899
|
anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
|
|
2633
2900
|
}
|
|
2634
2901
|
} else if (actualProvider === "ollama") {
|
|
2635
|
-
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
)
|
|
2639
|
-
|
|
2902
|
+
const ollamaJson = databricksResponse.json;
|
|
2903
|
+
// Detect response format: Anthropic API (v0.14.0+) has type:"message",
|
|
2904
|
+
// legacy /api/chat has message.role + message.content
|
|
2905
|
+
if (ollamaJson?.type === "message" && Array.isArray(ollamaJson?.content)) {
|
|
2906
|
+
// Anthropic-native response — passthrough
|
|
2907
|
+
anthropicPayload = ollamaJson;
|
|
2908
|
+
} else {
|
|
2909
|
+
// Legacy Ollama response — convert to Anthropic format
|
|
2910
|
+
anthropicPayload = ollamaToAnthropicResponse(ollamaJson, requestedModel);
|
|
2911
|
+
}
|
|
2912
|
+
if (Array.isArray(anthropicPayload?.content)) {
|
|
2913
|
+
anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
|
|
2914
|
+
}
|
|
2640
2915
|
} else if (actualProvider === "openrouter") {
|
|
2641
2916
|
const { convertOpenRouterResponseToAnthropic } = require("../clients/openrouter-utils");
|
|
2642
2917
|
|
|
@@ -2865,6 +3140,16 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2865
3140
|
if (Array.isArray(anthropicPayload?.content)) {
|
|
2866
3141
|
anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
|
|
2867
3142
|
}
|
|
3143
|
+
} else if (actualProvider === "moonshot") {
|
|
3144
|
+
// Moonshot responses are already converted to Anthropic format in invokeMoonshot
|
|
3145
|
+
logger.info({
|
|
3146
|
+
hasJson: !!databricksResponse.json,
|
|
3147
|
+
jsonContent: JSON.stringify(databricksResponse.json?.content)?.substring(0, 300),
|
|
3148
|
+
}, "=== MOONSHOT ORCHESTRATOR DEBUG ===");
|
|
3149
|
+
anthropicPayload = databricksResponse.json;
|
|
3150
|
+
if (Array.isArray(anthropicPayload?.content)) {
|
|
3151
|
+
anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
|
|
3152
|
+
}
|
|
2868
3153
|
} else {
|
|
2869
3154
|
anthropicPayload = toAnthropicResponse(
|
|
2870
3155
|
databricksResponse.json,
|
|
@@ -3059,6 +3344,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
3059
3344
|
session,
|
|
3060
3345
|
cwd,
|
|
3061
3346
|
requestMessages: cleanPayload.messages,
|
|
3347
|
+
provider: providerType, // Pass provider for GPT-specific formatting
|
|
3062
3348
|
});
|
|
3063
3349
|
|
|
3064
3350
|
const toolResultMessage = createFallbackToolResultMessage(providerType, {
|
|
@@ -3332,100 +3618,289 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
|
|
|
3332
3618
|
|
|
3333
3619
|
// === TOOL LOOP GUARD (EARLY CHECK) ===
|
|
3334
3620
|
// Check BEFORE sanitization since sanitizePayload removes conversation history
|
|
3335
|
-
|
|
3621
|
+
// All providers use threshold 2 to catch loops early
|
|
3622
|
+
const providerType = config.modelProvider?.type ?? "databricks";
|
|
3623
|
+
const toolLoopThreshold = 2;
|
|
3336
3624
|
const { toolResultCount, toolUseCount } = countToolCallsInHistory(payload?.messages);
|
|
3337
3625
|
|
|
3338
|
-
|
|
3339
|
-
|
|
3340
|
-
|
|
3341
|
-
|
|
3342
|
-
|
|
3343
|
-
|
|
3626
|
+
const executionMode = config.toolExecutionMode || "server";
|
|
3627
|
+
const isClientMode = executionMode === "client" || executionMode === "passthrough";
|
|
3628
|
+
|
|
3629
|
+
if (isClientMode && session) {
|
|
3630
|
+
// === CROSS-REQUEST DEDUP (CLIENT/PASSTHROUGH MODE) ===
|
|
3631
|
+
// The inner-loop guard resets each HTTP request so repeated calls across
|
|
3632
|
+
// requests escape detection. Track signatures in session metadata instead.
|
|
3633
|
+
ensureDedupStructure(session);
|
|
3634
|
+
|
|
3635
|
+
// Detect new user question → reset dedup tracking
|
|
3636
|
+
const dedup = session.metadata.toolCallDedup;
|
|
3637
|
+
const incomingToolUse = extractToolUseFromCurrentTurn(payload?.messages);
|
|
3638
|
+
// A user text message with no preceding tool_use means a brand-new question
|
|
3639
|
+
const hasNewUserText = (() => {
|
|
3640
|
+
const msgs = payload?.messages || [];
|
|
3641
|
+
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
3642
|
+
const msg = msgs[i];
|
|
3643
|
+
if (msg?.role === 'user') {
|
|
3644
|
+
if (typeof msg.content === 'string' && msg.content.trim().length > 0) return true;
|
|
3645
|
+
if (Array.isArray(msg.content)) {
|
|
3646
|
+
return msg.content.some(block =>
|
|
3647
|
+
(block?.type === 'text' && block?.text?.trim?.().length > 0) ||
|
|
3648
|
+
(block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
|
|
3649
|
+
);
|
|
3650
|
+
}
|
|
3651
|
+
}
|
|
3652
|
+
break; // Only check the very last message
|
|
3653
|
+
}
|
|
3654
|
+
return false;
|
|
3655
|
+
})();
|
|
3344
3656
|
|
|
3345
|
-
|
|
3346
|
-
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3350
|
-
|
|
3351
|
-
|
|
3657
|
+
if (hasNewUserText && incomingToolUse.length === 0) {
|
|
3658
|
+
// Pure user text with no tool results → new question
|
|
3659
|
+
resetDedupTracking(session);
|
|
3660
|
+
} else {
|
|
3661
|
+
// Record each tool_use from the incoming messages into the dedup tracker
|
|
3662
|
+
for (const toolUseBlock of incomingToolUse) {
|
|
3663
|
+
recordCrossRequestToolCall(session, toolUseBlock);
|
|
3664
|
+
}
|
|
3352
3665
|
|
|
3353
|
-
|
|
3354
|
-
// This prevents showing old results from previous questions
|
|
3355
|
-
let toolResultsSummary = "";
|
|
3356
|
-
const messages = payload?.messages || [];
|
|
3666
|
+
const { maxCount, toolName: dedupToolName, signature: dedupSig } = getMaxDedupCount(session);
|
|
3357
3667
|
|
|
3358
|
-
|
|
3359
|
-
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
|
|
3364
|
-
|
|
3365
|
-
|
|
3668
|
+
if (maxCount >= DEDUP_TERMINATE_THRESHOLD) {
|
|
3669
|
+
// Force-terminate: same pattern as existing tool_loop_guard
|
|
3670
|
+
logger.error({
|
|
3671
|
+
toolName: dedupToolName,
|
|
3672
|
+
count: maxCount,
|
|
3673
|
+
threshold: DEDUP_TERMINATE_THRESHOLD,
|
|
3674
|
+
signature: dedupSig,
|
|
3675
|
+
sessionId: session?.id ?? null,
|
|
3676
|
+
}, "[CrossRequestDedup] FORCE TERMINATING - repeated tool call across requests");
|
|
3677
|
+
|
|
3678
|
+
// Extract tool results summary from current turn
|
|
3679
|
+
let toolResultsSummary = "";
|
|
3680
|
+
const messages = payload?.messages || [];
|
|
3681
|
+
const { lastUserTextIndex: luIdx } = countToolCallsInHistory(messages);
|
|
3682
|
+
const startIdx = luIdx >= 0 ? luIdx : 0;
|
|
3683
|
+
for (let i = startIdx; i < messages.length; i++) {
|
|
3684
|
+
const msg = messages[i];
|
|
3685
|
+
if (!msg || !Array.isArray(msg.content)) continue;
|
|
3686
|
+
for (const block of msg.content) {
|
|
3687
|
+
if (block?.type === 'tool_result' && block?.content) {
|
|
3688
|
+
const content = typeof block.content === 'string'
|
|
3689
|
+
? block.content
|
|
3690
|
+
: JSON.stringify(block.content);
|
|
3691
|
+
if (content && !content.includes('Found 0')) {
|
|
3692
|
+
toolResultsSummary += content + "\n";
|
|
3693
|
+
}
|
|
3694
|
+
}
|
|
3695
|
+
}
|
|
3696
|
+
}
|
|
3697
|
+
|
|
3698
|
+
let responseText = `Based on the tool results, here's what I found:\n\n`;
|
|
3699
|
+
if (toolResultsSummary.trim()) {
|
|
3700
|
+
responseText += toolResultsSummary.trim();
|
|
3701
|
+
} else {
|
|
3702
|
+
responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
|
|
3703
|
+
}
|
|
3704
|
+
|
|
3705
|
+
const forcedResponse = {
|
|
3706
|
+
id: `msg_forced_${Date.now()}`,
|
|
3707
|
+
type: "message",
|
|
3708
|
+
role: "assistant",
|
|
3709
|
+
content: [{ type: "text", text: responseText }],
|
|
3710
|
+
model: requestedModel || "unknown",
|
|
3711
|
+
stop_reason: "end_turn",
|
|
3712
|
+
stop_sequence: null,
|
|
3713
|
+
usage: { input_tokens: 0, output_tokens: 100 },
|
|
3714
|
+
};
|
|
3715
|
+
|
|
3716
|
+
// Reset dedup after termination so next question starts fresh
|
|
3717
|
+
resetDedupTracking(session);
|
|
3718
|
+
// Persist to DB (non-ephemeral sessions only)
|
|
3719
|
+
if (session.id && !session._ephemeral) {
|
|
3720
|
+
try { upsertSession(session.id, { metadata: session.metadata }); } catch (e) {
|
|
3721
|
+
logger.debug({ err: e.message }, "Failed to persist dedup reset");
|
|
3722
|
+
}
|
|
3723
|
+
}
|
|
3724
|
+
|
|
3725
|
+
return {
|
|
3726
|
+
status: 200,
|
|
3727
|
+
body: forcedResponse,
|
|
3728
|
+
terminationReason: "tool_loop_guard",
|
|
3729
|
+
};
|
|
3366
3730
|
}
|
|
3367
|
-
|
|
3368
|
-
|
|
3369
|
-
|
|
3370
|
-
|
|
3371
|
-
|
|
3372
|
-
|
|
3373
|
-
|
|
3374
|
-
|
|
3731
|
+
|
|
3732
|
+
if (maxCount >= DEDUP_WARN_THRESHOLD && !dedup.warningInjected) {
|
|
3733
|
+
logger.warn({
|
|
3734
|
+
toolName: dedupToolName,
|
|
3735
|
+
count: maxCount,
|
|
3736
|
+
threshold: DEDUP_WARN_THRESHOLD,
|
|
3737
|
+
signature: dedupSig,
|
|
3738
|
+
sessionId: session?.id ?? null,
|
|
3739
|
+
}, "[CrossRequestDedup] Warning - repeated tool call detected across requests");
|
|
3740
|
+
|
|
3741
|
+
dedup.warningInjected = true;
|
|
3742
|
+
|
|
3743
|
+
// Inject a strict warning into the payload so the model sees it
|
|
3744
|
+
if (Array.isArray(payload?.messages)) {
|
|
3745
|
+
payload.messages.push({
|
|
3746
|
+
role: "user",
|
|
3747
|
+
content: `⚠️ CRITICAL SYSTEM WARNING: You have called the "${dedupToolName}" tool ${maxCount} times with identical or similar parameters across multiple requests. This IS an infinite loop. STOP calling this tool immediately. You MUST now provide a direct text response based on the results you have received. If the tool returned "no results" or empty output, that IS the final answer - do not retry. Summarize your findings and respond.`,
|
|
3748
|
+
});
|
|
3749
|
+
}
|
|
3750
|
+
}
|
|
3751
|
+
|
|
3752
|
+
// Persist dedup state (non-ephemeral sessions only)
|
|
3753
|
+
if (session.id && !session._ephemeral) {
|
|
3754
|
+
try { upsertSession(session.id, { metadata: session.metadata }); } catch (e) {
|
|
3755
|
+
logger.debug({ err: e.message }, "Failed to persist dedup state");
|
|
3375
3756
|
}
|
|
3376
3757
|
}
|
|
3377
3758
|
}
|
|
3378
3759
|
|
|
3379
|
-
//
|
|
3380
|
-
const
|
|
3381
|
-
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
3760
|
+
// Client mode still uses the relaxed per-request threshold for the count-based guard
|
|
3761
|
+
const effectiveThreshold = 10;
|
|
3762
|
+
if (toolResultCount >= effectiveThreshold) {
|
|
3763
|
+
logger.error({
|
|
3764
|
+
toolResultCount,
|
|
3765
|
+
toolUseCount,
|
|
3766
|
+
threshold: effectiveThreshold,
|
|
3767
|
+
sessionId: session?.id ?? null,
|
|
3768
|
+
}, "[ToolLoopGuard] FORCE TERMINATING - too many tool calls in conversation");
|
|
3769
|
+
|
|
3770
|
+
let toolResultsSummary = "";
|
|
3771
|
+
const messages = payload?.messages || [];
|
|
3772
|
+
let lastUserTextIndex = -1;
|
|
3773
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
3774
|
+
const msg = messages[i];
|
|
3775
|
+
if (msg?.role !== 'user') continue;
|
|
3776
|
+
if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
|
|
3777
|
+
lastUserTextIndex = i;
|
|
3778
|
+
break;
|
|
3779
|
+
}
|
|
3780
|
+
if (Array.isArray(msg.content)) {
|
|
3781
|
+
const hasText = msg.content.some(block =>
|
|
3782
|
+
(block?.type === 'text' && block?.text?.trim?.().length > 0) ||
|
|
3783
|
+
(block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
|
|
3784
|
+
);
|
|
3785
|
+
if (hasText) {
|
|
3786
|
+
lastUserTextIndex = i;
|
|
3787
|
+
break;
|
|
3788
|
+
}
|
|
3789
|
+
}
|
|
3790
|
+
}
|
|
3791
|
+
const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
|
|
3792
|
+
for (let i = startIndex; i < messages.length; i++) {
|
|
3793
|
+
const msg = messages[i];
|
|
3794
|
+
if (!msg || !Array.isArray(msg.content)) continue;
|
|
3795
|
+
for (const block of msg.content) {
|
|
3796
|
+
if (block?.type === 'tool_result' && block?.content) {
|
|
3797
|
+
const content = typeof block.content === 'string'
|
|
3798
|
+
? block.content
|
|
3799
|
+
: JSON.stringify(block.content);
|
|
3800
|
+
if (content && !content.includes('Found 0')) {
|
|
3801
|
+
toolResultsSummary += content + "\n";
|
|
3802
|
+
}
|
|
3391
3803
|
}
|
|
3392
3804
|
}
|
|
3393
3805
|
}
|
|
3394
|
-
}
|
|
3395
3806
|
|
|
3396
|
-
|
|
3397
|
-
|
|
3398
|
-
|
|
3399
|
-
|
|
3400
|
-
|
|
3401
|
-
|
|
3807
|
+
let responseText = `Based on the tool results, here's what I found:\n\n`;
|
|
3808
|
+
if (toolResultsSummary.trim()) {
|
|
3809
|
+
responseText += toolResultsSummary.trim();
|
|
3810
|
+
} else {
|
|
3811
|
+
responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
|
|
3812
|
+
}
|
|
3813
|
+
|
|
3814
|
+
const forcedResponse = {
|
|
3815
|
+
id: `msg_forced_${Date.now()}`,
|
|
3816
|
+
type: "message",
|
|
3817
|
+
role: "assistant",
|
|
3818
|
+
content: [{ type: "text", text: responseText }],
|
|
3819
|
+
model: requestedModel || "unknown",
|
|
3820
|
+
stop_reason: "end_turn",
|
|
3821
|
+
stop_sequence: null,
|
|
3822
|
+
usage: { input_tokens: 0, output_tokens: 100 },
|
|
3823
|
+
};
|
|
3824
|
+
|
|
3825
|
+
return {
|
|
3826
|
+
status: 200,
|
|
3827
|
+
body: forcedResponse,
|
|
3828
|
+
terminationReason: "tool_loop_guard",
|
|
3829
|
+
};
|
|
3402
3830
|
}
|
|
3831
|
+
} else {
|
|
3832
|
+
// Server mode: use existing threshold 2 with countToolCallsInHistory
|
|
3833
|
+
const effectiveThreshold = toolLoopThreshold;
|
|
3834
|
+
|
|
3835
|
+
if (toolResultCount >= effectiveThreshold) {
|
|
3836
|
+
logger.error({
|
|
3837
|
+
toolResultCount,
|
|
3838
|
+
toolUseCount,
|
|
3839
|
+
threshold: effectiveThreshold,
|
|
3840
|
+
sessionId: session?.id ?? null,
|
|
3841
|
+
}, "[ToolLoopGuard] FORCE TERMINATING - too many tool calls in conversation");
|
|
3842
|
+
|
|
3843
|
+
let toolResultsSummary = "";
|
|
3844
|
+
const messages = payload?.messages || [];
|
|
3845
|
+
let lastUserTextIndex = -1;
|
|
3846
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
3847
|
+
const msg = messages[i];
|
|
3848
|
+
if (msg?.role !== 'user') continue;
|
|
3849
|
+
if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
|
|
3850
|
+
lastUserTextIndex = i;
|
|
3851
|
+
break;
|
|
3852
|
+
}
|
|
3853
|
+
if (Array.isArray(msg.content)) {
|
|
3854
|
+
const hasText = msg.content.some(block =>
|
|
3855
|
+
(block?.type === 'text' && block?.text?.trim?.().length > 0) ||
|
|
3856
|
+
(block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
|
|
3857
|
+
);
|
|
3858
|
+
if (hasText) {
|
|
3859
|
+
lastUserTextIndex = i;
|
|
3860
|
+
break;
|
|
3861
|
+
}
|
|
3862
|
+
}
|
|
3863
|
+
}
|
|
3864
|
+
const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
|
|
3865
|
+
for (let i = startIndex; i < messages.length; i++) {
|
|
3866
|
+
const msg = messages[i];
|
|
3867
|
+
if (!msg || !Array.isArray(msg.content)) continue;
|
|
3868
|
+
for (const block of msg.content) {
|
|
3869
|
+
if (block?.type === 'tool_result' && block?.content) {
|
|
3870
|
+
const content = typeof block.content === 'string'
|
|
3871
|
+
? block.content
|
|
3872
|
+
: JSON.stringify(block.content);
|
|
3873
|
+
if (content && !content.includes('Found 0')) {
|
|
3874
|
+
toolResultsSummary += content + "\n";
|
|
3875
|
+
}
|
|
3876
|
+
}
|
|
3877
|
+
}
|
|
3878
|
+
}
|
|
3403
3879
|
|
|
3404
|
-
|
|
3405
|
-
|
|
3406
|
-
|
|
3407
|
-
|
|
3408
|
-
|
|
3409
|
-
|
|
3410
|
-
{
|
|
3411
|
-
type: "text",
|
|
3412
|
-
text: responseText,
|
|
3413
|
-
},
|
|
3414
|
-
],
|
|
3415
|
-
model: requestedModel || "unknown",
|
|
3416
|
-
stop_reason: "end_turn",
|
|
3417
|
-
stop_sequence: null,
|
|
3418
|
-
usage: {
|
|
3419
|
-
input_tokens: 0,
|
|
3420
|
-
output_tokens: 100,
|
|
3421
|
-
},
|
|
3422
|
-
};
|
|
3880
|
+
let responseText = `Based on the tool results, here's what I found:\n\n`;
|
|
3881
|
+
if (toolResultsSummary.trim()) {
|
|
3882
|
+
responseText += toolResultsSummary.trim();
|
|
3883
|
+
} else {
|
|
3884
|
+
responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
|
|
3885
|
+
}
|
|
3423
3886
|
|
|
3424
|
-
|
|
3425
|
-
|
|
3426
|
-
|
|
3427
|
-
|
|
3428
|
-
|
|
3887
|
+
const forcedResponse = {
|
|
3888
|
+
id: `msg_forced_${Date.now()}`,
|
|
3889
|
+
type: "message",
|
|
3890
|
+
role: "assistant",
|
|
3891
|
+
content: [{ type: "text", text: responseText }],
|
|
3892
|
+
model: requestedModel || "unknown",
|
|
3893
|
+
stop_reason: "end_turn",
|
|
3894
|
+
stop_sequence: null,
|
|
3895
|
+
usage: { input_tokens: 0, output_tokens: 100 },
|
|
3896
|
+
};
|
|
3897
|
+
|
|
3898
|
+
return {
|
|
3899
|
+
status: 200,
|
|
3900
|
+
body: forcedResponse,
|
|
3901
|
+
terminationReason: "tool_loop_guard",
|
|
3902
|
+
};
|
|
3903
|
+
}
|
|
3429
3904
|
}
|
|
3430
3905
|
|
|
3431
3906
|
const cleanPayload = sanitizePayload(payload);
|