lynkr 7.2.4 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/config/model-tiers.json +89 -0
- package/docs/docs.html +1 -0
- package/docs/index.md +7 -0
- package/docs/toon-integration-spec.md +130 -0
- package/documentation/README.md +3 -2
- package/documentation/claude-code-cli.md +23 -16
- package/documentation/cursor-integration.md +17 -14
- package/documentation/docker.md +11 -4
- package/documentation/embeddings.md +7 -5
- package/documentation/faq.md +66 -12
- package/documentation/features.md +22 -15
- package/documentation/installation.md +66 -14
- package/documentation/production.md +43 -8
- package/documentation/providers.md +145 -42
- package/documentation/routing.md +476 -0
- package/documentation/token-optimization.md +7 -5
- package/documentation/troubleshooting.md +81 -5
- package/install.sh +6 -1
- package/package.json +5 -3
- package/scripts/setup.js +0 -1
- package/src/agents/executor.js +14 -6
- package/src/api/middleware/session.js +15 -2
- package/src/api/openai-router.js +130 -37
- package/src/api/providers-handler.js +15 -1
- package/src/api/router.js +107 -2
- package/src/budget/index.js +4 -3
- package/src/clients/databricks.js +431 -234
- package/src/clients/gpt-utils.js +181 -0
- package/src/clients/ollama-utils.js +66 -140
- package/src/clients/routing.js +0 -1
- package/src/clients/standard-tools.js +82 -5
- package/src/config/index.js +119 -35
- package/src/context/toon.js +173 -0
- package/src/headroom/launcher.js +8 -3
- package/src/logger/index.js +23 -0
- package/src/orchestrator/index.js +765 -212
- package/src/routing/agentic-detector.js +320 -0
- package/src/routing/complexity-analyzer.js +202 -2
- package/src/routing/cost-optimizer.js +305 -0
- package/src/routing/index.js +168 -159
- package/src/routing/model-registry.js +437 -0
- package/src/routing/model-tiers.js +365 -0
- package/src/server.js +2 -2
- package/src/sessions/cleanup.js +3 -3
- package/src/sessions/record.js +10 -1
- package/src/sessions/store.js +7 -2
- package/src/tools/agent-task.js +48 -1
- package/src/tools/index.js +15 -2
- package/src/tools/workspace.js +35 -4
- package/src/workspace/index.js +30 -0
- package/te +11622 -0
- package/test/README.md +1 -1
- package/test/azure-openai-config.test.js +17 -8
- package/test/azure-openai-integration.test.js +7 -1
- package/test/azure-openai-routing.test.js +41 -43
- package/test/bedrock-integration.test.js +18 -32
- package/test/hybrid-routing-integration.test.js +35 -20
- package/test/hybrid-routing-performance.test.js +74 -64
- package/test/llamacpp-integration.test.js +28 -9
- package/test/lmstudio-integration.test.js +20 -8
- package/test/openai-integration.test.js +17 -20
- package/test/performance-tests.js +1 -1
- package/test/routing.test.js +65 -59
- package/test/toon-compression.test.js +131 -0
- package/CLAWROUTER_ROUTING_PLAN.md +0 -910
- package/ROUTER_COMPARISON.md +0 -173
- package/TIER_ROUTING_PLAN.md +0 -771
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
const config = require("../config");
|
|
2
2
|
const { invokeModel } = require("../clients/databricks");
|
|
3
3
|
const { appendTurnToSession } = require("../sessions/record");
|
|
4
|
+
const { upsertSession } = require("../sessions/store");
|
|
4
5
|
const { executeToolCall } = require("../tools");
|
|
5
6
|
const policy = require("../policy");
|
|
6
7
|
const logger = require("../logger");
|
|
@@ -10,6 +11,7 @@ const tokens = require("../utils/tokens");
|
|
|
10
11
|
const systemPrompt = require("../prompts/system");
|
|
11
12
|
const historyCompression = require("../context/compression");
|
|
12
13
|
const tokenBudget = require("../context/budget");
|
|
14
|
+
const { applyToonCompression } = require("../context/toon");
|
|
13
15
|
const { classifyRequestType, selectToolsSmartly } = require("../tools/smart-selection");
|
|
14
16
|
const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = require("../headroom");
|
|
15
17
|
const { createAuditLogger } = require("../logger/audit-logger");
|
|
@@ -19,6 +21,8 @@ const crypto = require("crypto");
|
|
|
19
21
|
const { asyncClone, asyncTransform, getPoolStats } = require("../workers/helpers");
|
|
20
22
|
const { getSemanticCache, isSemanticCacheEnabled } = require("../cache/semantic");
|
|
21
23
|
const lazyLoader = require("../tools/lazy-loader");
|
|
24
|
+
const { areSimilarToolCalls } = require("../clients/gpt-utils");
|
|
25
|
+
const { getModelRegistrySync } = require("../routing/model-registry");
|
|
22
26
|
|
|
23
27
|
/**
|
|
24
28
|
* Get destination URL for audit logging based on provider type
|
|
@@ -49,6 +53,8 @@ function getDestinationUrl(providerType) {
|
|
|
49
53
|
return config.zai?.endpoint ?? 'unknown';
|
|
50
54
|
case 'vertex':
|
|
51
55
|
return config.vertex?.endpoint ?? 'unknown';
|
|
56
|
+
case 'moonshot':
|
|
57
|
+
return config.moonshot?.endpoint ?? 'unknown';
|
|
52
58
|
default:
|
|
53
59
|
return 'unknown';
|
|
54
60
|
}
|
|
@@ -455,6 +461,192 @@ function injectToolLoopStopInstruction(messages, threshold = 5) {
|
|
|
455
461
|
return messages;
|
|
456
462
|
}
|
|
457
463
|
|
|
464
|
+
// === CROSS-REQUEST TOOL CALL DEDUP TRACKING ===
|
|
465
|
+
// These helpers track tool call signatures across multiple HTTP requests within
|
|
466
|
+
// the same session (client/passthrough mode). The inner-loop detection in
|
|
467
|
+
// runAgentLoop() only sees one request at a time, so repeated calls across
|
|
468
|
+
// requests escape it.
|
|
469
|
+
|
|
470
|
+
const DEDUP_MAX_SIGNATURES = 50;
|
|
471
|
+
const DEDUP_WARN_THRESHOLD = 2;
|
|
472
|
+
const DEDUP_TERMINATE_THRESHOLD = 3;
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* Initialise session.metadata.toolCallDedup if missing.
|
|
476
|
+
* @param {Object} session
|
|
477
|
+
*/
|
|
478
|
+
function ensureDedupStructure(session) {
|
|
479
|
+
if (!session || !session.metadata) return;
|
|
480
|
+
if (!session.metadata.toolCallDedup) {
|
|
481
|
+
session.metadata.toolCallDedup = {
|
|
482
|
+
signatures: {},
|
|
483
|
+
similarGroups: {},
|
|
484
|
+
lastResetAt: Date.now(),
|
|
485
|
+
warningInjected: false,
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
/**
|
|
491
|
+
* Record a tool call into the cross-request dedup tracker.
|
|
492
|
+
* Handles similarity merging and enforces the 50-entry cap.
|
|
493
|
+
* @param {Object} session
|
|
494
|
+
* @param {Object} toolCall - tool_use block (Anthropic format: { name, input, id })
|
|
495
|
+
*/
|
|
496
|
+
function recordCrossRequestToolCall(session, toolCall) {
|
|
497
|
+
if (!session?.metadata) return;
|
|
498
|
+
ensureDedupStructure(session);
|
|
499
|
+
|
|
500
|
+
const dedup = session.metadata.toolCallDedup;
|
|
501
|
+
const signature = getToolCallSignature(toolCall);
|
|
502
|
+
const toolName = toolCall.function?.name ?? toolCall.name ?? 'unknown';
|
|
503
|
+
const args = toolCall.function?.arguments ?? toolCall.input;
|
|
504
|
+
const argsPreview = (typeof args === 'string' ? args : JSON.stringify(args ?? {})).substring(0, 200);
|
|
505
|
+
const now = Date.now();
|
|
506
|
+
|
|
507
|
+
// Check if this signature maps to a canonical via similarity groups
|
|
508
|
+
const canonicalSig = dedup.similarGroups[signature] || signature;
|
|
509
|
+
|
|
510
|
+
if (dedup.signatures[canonicalSig]) {
|
|
511
|
+
dedup.signatures[canonicalSig].count += 1;
|
|
512
|
+
dedup.signatures[canonicalSig].lastSeen = now;
|
|
513
|
+
} else {
|
|
514
|
+
// Check for similar existing entries before creating a new one
|
|
515
|
+
let mergedInto = null;
|
|
516
|
+
for (const [existingSig, existingData] of Object.entries(dedup.signatures)) {
|
|
517
|
+
// Build a fake call object from stored data to compare with areSimilarToolCalls
|
|
518
|
+
const existingCall = {
|
|
519
|
+
name: existingData.toolName,
|
|
520
|
+
input: existingData.argsPreview,
|
|
521
|
+
};
|
|
522
|
+
if (areSimilarToolCalls(toolCall, existingCall)) {
|
|
523
|
+
// Merge: map this signature to the existing canonical
|
|
524
|
+
dedup.similarGroups[signature] = existingSig;
|
|
525
|
+
dedup.signatures[existingSig].count += 1;
|
|
526
|
+
dedup.signatures[existingSig].lastSeen = now;
|
|
527
|
+
mergedInto = existingSig;
|
|
528
|
+
logger.debug({
|
|
529
|
+
newSignature: signature,
|
|
530
|
+
canonicalSignature: existingSig,
|
|
531
|
+
toolName,
|
|
532
|
+
count: dedup.signatures[existingSig].count,
|
|
533
|
+
}, "Cross-request tool dedup: merged similar call");
|
|
534
|
+
break;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
if (!mergedInto) {
|
|
539
|
+
// New unique signature
|
|
540
|
+
dedup.signatures[signature] = {
|
|
541
|
+
count: 1,
|
|
542
|
+
toolName,
|
|
543
|
+
firstSeen: now,
|
|
544
|
+
lastSeen: now,
|
|
545
|
+
argsPreview,
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
// Enforce cap: evict oldest entries if over limit
|
|
551
|
+
const sigKeys = Object.keys(dedup.signatures);
|
|
552
|
+
if (sigKeys.length > DEDUP_MAX_SIGNATURES) {
|
|
553
|
+
const sorted = sigKeys.sort(
|
|
554
|
+
(a, b) => dedup.signatures[a].lastSeen - dedup.signatures[b].lastSeen
|
|
555
|
+
);
|
|
556
|
+
const toRemove = sorted.slice(0, sigKeys.length - DEDUP_MAX_SIGNATURES);
|
|
557
|
+
for (const key of toRemove) {
|
|
558
|
+
delete dedup.signatures[key];
|
|
559
|
+
// Also clean up any similarGroups pointing to this key
|
|
560
|
+
for (const [groupSig, canonical] of Object.entries(dedup.similarGroups)) {
|
|
561
|
+
if (canonical === key) delete dedup.similarGroups[groupSig];
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
/**
|
|
568
|
+
* Return the highest dedup count, the associated tool name, and signature.
|
|
569
|
+
* @param {Object} session
|
|
570
|
+
* @returns {{ maxCount: number, toolName: string|null, signature: string|null }}
|
|
571
|
+
*/
|
|
572
|
+
function getMaxDedupCount(session) {
|
|
573
|
+
if (!session?.metadata?.toolCallDedup?.signatures) {
|
|
574
|
+
return { maxCount: 0, toolName: null, signature: null };
|
|
575
|
+
}
|
|
576
|
+
const sigs = session.metadata.toolCallDedup.signatures;
|
|
577
|
+
let maxCount = 0;
|
|
578
|
+
let toolName = null;
|
|
579
|
+
let signature = null;
|
|
580
|
+
for (const [sig, data] of Object.entries(sigs)) {
|
|
581
|
+
if (data.count > maxCount) {
|
|
582
|
+
maxCount = data.count;
|
|
583
|
+
toolName = data.toolName;
|
|
584
|
+
signature = sig;
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
return { maxCount, toolName, signature };
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* Extract tool_use blocks from messages that appear after the last user text message.
|
|
592
|
+
* These are the tool calls from the current assistant turn that the client is sending back.
|
|
593
|
+
* @param {Array} messages
|
|
594
|
+
* @returns {Array} - Array of tool_use-like objects
|
|
595
|
+
*/
|
|
596
|
+
function extractToolUseFromCurrentTurn(messages) {
|
|
597
|
+
if (!Array.isArray(messages)) return [];
|
|
598
|
+
|
|
599
|
+
// Find last user text message
|
|
600
|
+
let lastUserTextIndex = -1;
|
|
601
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
602
|
+
const msg = messages[i];
|
|
603
|
+
if (msg?.role !== 'user') continue;
|
|
604
|
+
if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
|
|
605
|
+
lastUserTextIndex = i;
|
|
606
|
+
break;
|
|
607
|
+
}
|
|
608
|
+
if (Array.isArray(msg.content)) {
|
|
609
|
+
const hasText = msg.content.some(block =>
|
|
610
|
+
(block?.type === 'text' && block?.text?.trim?.().length > 0) ||
|
|
611
|
+
(block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
|
|
612
|
+
);
|
|
613
|
+
if (hasText) {
|
|
614
|
+
lastUserTextIndex = i;
|
|
615
|
+
break;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
const toolUseBlocks = [];
|
|
621
|
+
const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
|
|
622
|
+
for (let i = startIndex; i < messages.length; i++) {
|
|
623
|
+
const msg = messages[i];
|
|
624
|
+
if (msg?.role !== 'assistant') continue;
|
|
625
|
+
if (!Array.isArray(msg.content)) continue;
|
|
626
|
+
for (const block of msg.content) {
|
|
627
|
+
if (block?.type === 'tool_use') {
|
|
628
|
+
toolUseBlocks.push(block);
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
return toolUseBlocks;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
/**
|
|
636
|
+
* Reset dedup tracking. Called when a new user question is detected.
|
|
637
|
+
* @param {Object} session
|
|
638
|
+
*/
|
|
639
|
+
function resetDedupTracking(session) {
|
|
640
|
+
if (!session?.metadata) return;
|
|
641
|
+
session.metadata.toolCallDedup = {
|
|
642
|
+
signatures: {},
|
|
643
|
+
similarGroups: {},
|
|
644
|
+
lastResetAt: Date.now(),
|
|
645
|
+
warningInjected: false,
|
|
646
|
+
};
|
|
647
|
+
logger.debug({ sessionId: session?.id ?? null }, "Cross-request tool dedup: reset tracking for new user question");
|
|
648
|
+
}
|
|
649
|
+
|
|
458
650
|
function sanitiseAzureTools(tools) {
|
|
459
651
|
if (!Array.isArray(tools) || tools.length === 0) return undefined;
|
|
460
652
|
const allowed = new Set([
|
|
@@ -516,13 +708,51 @@ function parseExecutionContent(content) {
|
|
|
516
708
|
const trimmed = content.trim();
|
|
517
709
|
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
|
|
518
710
|
try {
|
|
519
|
-
|
|
711
|
+
const parsed = JSON.parse(trimmed);
|
|
712
|
+
// Handle Anthropic content blocks array - extract text
|
|
713
|
+
if (Array.isArray(parsed)) {
|
|
714
|
+
const textParts = parsed
|
|
715
|
+
.filter(block => block && typeof block === 'object')
|
|
716
|
+
.map(block => {
|
|
717
|
+
if (block.type === 'text' && typeof block.text === 'string') {
|
|
718
|
+
return block.text;
|
|
719
|
+
}
|
|
720
|
+
// Handle other block types gracefully
|
|
721
|
+
if (block.text) return block.text;
|
|
722
|
+
if (block.content) return typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
|
|
723
|
+
return null;
|
|
724
|
+
})
|
|
725
|
+
.filter(text => text !== null);
|
|
726
|
+
|
|
727
|
+
if (textParts.length > 0) {
|
|
728
|
+
return textParts.join('\n');
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
return parsed;
|
|
520
732
|
} catch {
|
|
521
733
|
return content;
|
|
522
734
|
}
|
|
523
735
|
}
|
|
524
736
|
return content;
|
|
525
737
|
}
|
|
738
|
+
// Handle content that's already an array (content blocks)
|
|
739
|
+
if (Array.isArray(content)) {
|
|
740
|
+
const textParts = content
|
|
741
|
+
.filter(block => block && typeof block === 'object')
|
|
742
|
+
.map(block => {
|
|
743
|
+
if (block.type === 'text' && typeof block.text === 'string') {
|
|
744
|
+
return block.text;
|
|
745
|
+
}
|
|
746
|
+
if (block.text) return block.text;
|
|
747
|
+
if (block.content) return typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
|
|
748
|
+
return null;
|
|
749
|
+
})
|
|
750
|
+
.filter(text => text !== null);
|
|
751
|
+
|
|
752
|
+
if (textParts.length > 0) {
|
|
753
|
+
return textParts.join('\n');
|
|
754
|
+
}
|
|
755
|
+
}
|
|
526
756
|
return content;
|
|
527
757
|
}
|
|
528
758
|
|
|
@@ -718,19 +948,17 @@ function stripThinkingBlocks(text) {
|
|
|
718
948
|
return cleanedLines.join("\n").trim();
|
|
719
949
|
}
|
|
720
950
|
|
|
951
|
+
/**
|
|
952
|
+
* Convert legacy Ollama /api/chat response to Anthropic Messages format.
|
|
953
|
+
* Used when Ollama < v0.14.0 (no native Anthropic endpoint).
|
|
954
|
+
*/
|
|
721
955
|
function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
|
|
722
|
-
// Ollama response format:
|
|
723
|
-
// { model, created_at, message: { role, content, tool_calls }, done, total_duration, ... }
|
|
724
|
-
// { eval_count, prompt_eval_count, ... }
|
|
725
|
-
|
|
726
956
|
const message = ollamaResponse?.message ?? {};
|
|
727
957
|
const rawContent = message.content || "";
|
|
728
958
|
const toolCalls = message.tool_calls || [];
|
|
729
959
|
|
|
730
|
-
// Build content blocks
|
|
731
960
|
const contentItems = [];
|
|
732
961
|
|
|
733
|
-
// Add text content if present, after stripping thinking blocks
|
|
734
962
|
if (typeof rawContent === "string" && rawContent.trim()) {
|
|
735
963
|
const cleanedContent = stripThinkingBlocks(rawContent);
|
|
736
964
|
if (cleanedContent) {
|
|
@@ -738,18 +966,31 @@ function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
|
|
|
738
966
|
}
|
|
739
967
|
}
|
|
740
968
|
|
|
741
|
-
//
|
|
969
|
+
// Convert tool calls from OpenAI function-calling format to Anthropic tool_use
|
|
742
970
|
if (Array.isArray(toolCalls) && toolCalls.length > 0) {
|
|
743
|
-
const
|
|
744
|
-
|
|
745
|
-
|
|
971
|
+
for (const toolCall of toolCalls) {
|
|
972
|
+
const func = toolCall.function || {};
|
|
973
|
+
let input = {};
|
|
974
|
+
if (func.arguments) {
|
|
975
|
+
if (typeof func.arguments === "string") {
|
|
976
|
+
try { input = JSON.parse(func.arguments); } catch { input = {}; }
|
|
977
|
+
} else if (typeof func.arguments === "object") {
|
|
978
|
+
input = func.arguments;
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
contentItems.push({
|
|
982
|
+
type: "tool_use",
|
|
983
|
+
id: toolCall.id || `toolu_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
984
|
+
name: func.name || "unknown",
|
|
985
|
+
input,
|
|
986
|
+
});
|
|
987
|
+
}
|
|
746
988
|
}
|
|
747
989
|
|
|
748
990
|
if (contentItems.length === 0) {
|
|
749
991
|
contentItems.push({ type: "text", text: "" });
|
|
750
992
|
}
|
|
751
993
|
|
|
752
|
-
// Ollama uses different token count fields
|
|
753
994
|
const inputTokens = ollamaResponse.prompt_eval_count ?? 0;
|
|
754
995
|
const outputTokens = ollamaResponse.eval_count ?? 0;
|
|
755
996
|
|
|
@@ -759,7 +1000,8 @@ function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
|
|
|
759
1000
|
role: "assistant",
|
|
760
1001
|
model: requestedModel,
|
|
761
1002
|
content: contentItems,
|
|
762
|
-
stop_reason:
|
|
1003
|
+
stop_reason: toolCalls.length > 0 ? "tool_use" :
|
|
1004
|
+
ollamaResponse.done ? "end_turn" : "max_tokens",
|
|
763
1005
|
stop_sequence: null,
|
|
764
1006
|
usage: {
|
|
765
1007
|
input_tokens: inputTokens,
|
|
@@ -851,6 +1093,9 @@ function sanitizePayload(payload) {
|
|
|
851
1093
|
config.modelProvider?.defaultModel ??
|
|
852
1094
|
"databricks-claude-sonnet-4-5";
|
|
853
1095
|
clean.model = requestedModel;
|
|
1096
|
+
if (!clean.max_tokens) {
|
|
1097
|
+
clean.max_tokens = 16384;
|
|
1098
|
+
}
|
|
854
1099
|
const providerType = config.modelProvider?.type ?? "databricks";
|
|
855
1100
|
const flattenContent = providerType !== "azure-anthropic";
|
|
856
1101
|
clean.messages = normaliseMessages(clean, { flattenContent }).filter((msg) => {
|
|
@@ -995,12 +1240,10 @@ function sanitizePayload(payload) {
|
|
|
995
1240
|
// Check if this is a simple conversational message (no tools needed)
|
|
996
1241
|
const isConversational = (() => {
|
|
997
1242
|
if (!Array.isArray(clean.messages) || clean.messages.length === 0) {
|
|
998
|
-
logger.debug({ reason: "No messages array" }, "Ollama conversational check");
|
|
999
1243
|
return false;
|
|
1000
1244
|
}
|
|
1001
1245
|
const lastMessage = clean.messages[clean.messages.length - 1];
|
|
1002
1246
|
if (lastMessage?.role !== "user") {
|
|
1003
|
-
logger.debug({ role: lastMessage?.role }, "Ollama conversational check - not user");
|
|
1004
1247
|
return false;
|
|
1005
1248
|
}
|
|
1006
1249
|
|
|
@@ -1008,28 +1251,18 @@ function sanitizePayload(payload) {
|
|
|
1008
1251
|
? lastMessage.content
|
|
1009
1252
|
: "";
|
|
1010
1253
|
|
|
1011
|
-
logger.debug({
|
|
1012
|
-
contentType: typeof lastMessage.content,
|
|
1013
|
-
isString: typeof lastMessage.content === "string",
|
|
1014
|
-
contentLength: typeof lastMessage.content === "string" ? lastMessage.content.length : "N/A",
|
|
1015
|
-
actualContent: typeof lastMessage.content === "string" ? lastMessage.content.substring(0, 100) : JSON.stringify(lastMessage.content).substring(0, 100)
|
|
1016
|
-
}, "Ollama conversational check - analyzing content");
|
|
1017
|
-
|
|
1018
1254
|
const trimmed = content.trim().toLowerCase();
|
|
1019
1255
|
|
|
1020
1256
|
// Simple greetings
|
|
1021
1257
|
if (/^(hi|hello|hey|good morning|good afternoon|good evening|howdy|greetings)[\s\.\!\?]*$/.test(trimmed)) {
|
|
1022
|
-
|
|
1023
|
-
return true;
|
|
1258
|
+
return "greeting";
|
|
1024
1259
|
}
|
|
1025
1260
|
|
|
1026
|
-
//
|
|
1027
|
-
if (
|
|
1028
|
-
|
|
1029
|
-
return true;
|
|
1261
|
+
// Conversational phrases that don't need tools (thanks, farewells, acknowledgements)
|
|
1262
|
+
if (/^(thanks|thank you|thx|ty|bye|goodbye|see you|ok|okay|cool|nice|great|awesome|sure|got it|sounds good|no worries|np|cheers)[\s\.\!\?]*$/.test(trimmed)) {
|
|
1263
|
+
return "conversational";
|
|
1030
1264
|
}
|
|
1031
1265
|
|
|
1032
|
-
logger.debug({ trimmed: trimmed.substring(0, 50), length: trimmed.length }, "Ollama conversational check - not matched");
|
|
1033
1266
|
return false;
|
|
1034
1267
|
})();
|
|
1035
1268
|
|
|
@@ -1039,37 +1272,12 @@ function sanitizePayload(payload) {
|
|
|
1039
1272
|
delete clean.tool_choice;
|
|
1040
1273
|
logger.debug({
|
|
1041
1274
|
model: config.ollama?.model,
|
|
1042
|
-
|
|
1043
|
-
}, "Ollama conversational mode");
|
|
1275
|
+
reason: isConversational,
|
|
1276
|
+
}, "Ollama conversational mode - tools removed");
|
|
1044
1277
|
} else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) {
|
|
1045
|
-
// Ollama
|
|
1046
|
-
//
|
|
1047
|
-
|
|
1048
|
-
"Bash",
|
|
1049
|
-
"Read",
|
|
1050
|
-
"Write",
|
|
1051
|
-
"Edit",
|
|
1052
|
-
"Glob",
|
|
1053
|
-
"Grep",
|
|
1054
|
-
"WebSearch",
|
|
1055
|
-
"WebFetch"
|
|
1056
|
-
]);
|
|
1057
|
-
|
|
1058
|
-
const limitedTools = clean.tools.filter(tool =>
|
|
1059
|
-
OLLAMA_ESSENTIAL_TOOLS.has(tool.name)
|
|
1060
|
-
);
|
|
1061
|
-
|
|
1062
|
-
logger.debug({
|
|
1063
|
-
model: config.ollama?.model,
|
|
1064
|
-
originalToolCount: clean.tools.length,
|
|
1065
|
-
limitedToolCount: limitedTools.length,
|
|
1066
|
-
keptTools: limitedTools.map(t => t.name)
|
|
1067
|
-
}, "Ollama tools limited for performance");
|
|
1068
|
-
|
|
1069
|
-
clean.tools = limitedTools.length > 0 ? limitedTools : undefined;
|
|
1070
|
-
if (!clean.tools) {
|
|
1071
|
-
delete clean.tools;
|
|
1072
|
-
}
|
|
1278
|
+
// Keep all tools — Ollama receives them in Anthropic format (native API)
|
|
1279
|
+
// or they get converted to OpenAI format in invokeOllama (legacy API)
|
|
1280
|
+
clean.tools = ensureAnthropicToolFormat(clean.tools);
|
|
1073
1281
|
} else {
|
|
1074
1282
|
// Remove tools for models without tool support
|
|
1075
1283
|
delete clean.tools;
|
|
@@ -1097,6 +1305,14 @@ function sanitizePayload(payload) {
|
|
|
1097
1305
|
} else {
|
|
1098
1306
|
clean.tools = ensureAnthropicToolFormat(clean.tools);
|
|
1099
1307
|
}
|
|
1308
|
+
} else if (providerType === "moonshot") {
|
|
1309
|
+
// Moonshot supports tools - keep them in Anthropic format
|
|
1310
|
+
// They will be converted to OpenAI format in invokeMoonshot
|
|
1311
|
+
if (!Array.isArray(clean.tools) || clean.tools.length === 0) {
|
|
1312
|
+
delete clean.tools;
|
|
1313
|
+
} else {
|
|
1314
|
+
clean.tools = ensureAnthropicToolFormat(clean.tools);
|
|
1315
|
+
}
|
|
1100
1316
|
} else if (Array.isArray(clean.tools)) {
|
|
1101
1317
|
// Unknown provider - remove tools for safety
|
|
1102
1318
|
delete clean.tools;
|
|
@@ -1172,6 +1388,10 @@ function sanitizePayload(payload) {
|
|
|
1172
1388
|
}
|
|
1173
1389
|
}
|
|
1174
1390
|
|
|
1391
|
+
// Optional TOON conversion for large JSON message payloads (prompt context only).
|
|
1392
|
+
// Run this BEFORE message coalescing to preserve parseable JSON boundaries.
|
|
1393
|
+
applyToonCompression(clean, config.toon, { logger });
|
|
1394
|
+
|
|
1175
1395
|
// FIX: Handle consecutive messages with the same role (causes llama.cpp 400 error)
|
|
1176
1396
|
// Strategy: Merge all consecutive messages, add instruction to focus on last request
|
|
1177
1397
|
if (Array.isArray(clean.messages) && clean.messages.length > 0) {
|
|
@@ -1210,7 +1430,7 @@ function sanitizePayload(payload) {
|
|
|
1210
1430
|
}
|
|
1211
1431
|
|
|
1212
1432
|
if (merged.length !== clean.messages.length) {
|
|
1213
|
-
logger.
|
|
1433
|
+
logger.debug({
|
|
1214
1434
|
originalCount: clean.messages.length,
|
|
1215
1435
|
mergedCount: merged.length,
|
|
1216
1436
|
reduced: clean.messages.length - merged.length
|
|
@@ -1220,19 +1440,20 @@ function sanitizePayload(payload) {
|
|
|
1220
1440
|
clean.messages = merged;
|
|
1221
1441
|
}
|
|
1222
1442
|
|
|
1223
|
-
// [CONTEXT_FLOW] Log payload after sanitization
|
|
1224
1443
|
logger.debug({
|
|
1225
1444
|
providerType: config.modelProvider?.type ?? "databricks",
|
|
1226
|
-
phase: "after_sanitize",
|
|
1227
|
-
systemField: typeof clean.system === 'string'
|
|
1228
|
-
? { type: 'string', length: clean.system.length }
|
|
1229
|
-
: clean.system
|
|
1230
|
-
? { type: typeof clean.system, value: clean.system }
|
|
1231
|
-
: undefined,
|
|
1232
1445
|
messageCount: clean.messages?.length ?? 0,
|
|
1233
|
-
firstMessageHasSystem: clean.messages?.[0]?.content?.includes?.('You are Claude Code') ?? false,
|
|
1234
1446
|
toolCount: clean.tools?.length ?? 0
|
|
1235
|
-
}, '
|
|
1447
|
+
}, 'After sanitizePayload');
|
|
1448
|
+
|
|
1449
|
+
// === Suggestion mode: tag request and override model if configured ===
|
|
1450
|
+
const { isSuggestionMode: isSuggestion } = detectSuggestionMode(clean.messages);
|
|
1451
|
+
clean._requestMode = isSuggestion ? "suggestion" : "main";
|
|
1452
|
+
const smConfig = config.modelProvider?.suggestionModeModel ?? "default";
|
|
1453
|
+
if (isSuggestion && smConfig.toLowerCase() !== "default" && smConfig.toLowerCase() !== "none") {
|
|
1454
|
+
clean.model = smConfig;
|
|
1455
|
+
clean._suggestionModeModel = smConfig;
|
|
1456
|
+
}
|
|
1236
1457
|
|
|
1237
1458
|
return clean;
|
|
1238
1459
|
}
|
|
@@ -1330,8 +1551,7 @@ async function runAgentLoop({
|
|
|
1330
1551
|
providerType,
|
|
1331
1552
|
headers,
|
|
1332
1553
|
}) {
|
|
1333
|
-
|
|
1334
|
-
logger.info({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop ENTERED');
|
|
1554
|
+
logger.debug({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop entered');
|
|
1335
1555
|
const settings = resolveLoopOptions(options);
|
|
1336
1556
|
// Initialize audit logger (no-op if disabled)
|
|
1337
1557
|
const auditLogger = createAuditLogger(config.audit);
|
|
@@ -1378,7 +1598,6 @@ async function runAgentLoop({
|
|
|
1378
1598
|
}
|
|
1379
1599
|
|
|
1380
1600
|
steps += 1;
|
|
1381
|
-
console.log('[LOOP DEBUG] Entered while loop - step:', steps);
|
|
1382
1601
|
logger.debug(
|
|
1383
1602
|
{
|
|
1384
1603
|
sessionId: session?.id ?? null,
|
|
@@ -1388,6 +1607,19 @@ async function runAgentLoop({
|
|
|
1388
1607
|
"Agent loop step",
|
|
1389
1608
|
);
|
|
1390
1609
|
|
|
1610
|
+
// Trim messages when they grow too large to prevent OOM.
|
|
1611
|
+
// Keep the first message (system/user) and the last MAX_LOOP_MESSAGES.
|
|
1612
|
+
const MAX_LOOP_MESSAGES = 40;
|
|
1613
|
+
if (cleanPayload.messages && cleanPayload.messages.length > MAX_LOOP_MESSAGES) {
|
|
1614
|
+
const excess = cleanPayload.messages.length - MAX_LOOP_MESSAGES;
|
|
1615
|
+
// Keep first 2 messages (system context + initial user) and trim from the middle
|
|
1616
|
+
cleanPayload.messages.splice(2, excess);
|
|
1617
|
+
logger.debug(
|
|
1618
|
+
{ trimmed: excess, remaining: cleanPayload.messages.length },
|
|
1619
|
+
"Trimmed intermediate messages to prevent memory growth",
|
|
1620
|
+
);
|
|
1621
|
+
}
|
|
1622
|
+
|
|
1391
1623
|
// Debug: Log payload before sending to Azure
|
|
1392
1624
|
if (providerType === "azure-anthropic") {
|
|
1393
1625
|
logger.debug(
|
|
@@ -1472,14 +1704,11 @@ async function runAgentLoop({
|
|
|
1472
1704
|
}
|
|
1473
1705
|
}
|
|
1474
1706
|
|
|
1475
|
-
// [CONTEXT_FLOW] Log after memory injection
|
|
1476
1707
|
logger.debug({
|
|
1477
1708
|
sessionId: session?.id ?? null,
|
|
1478
|
-
phase: "after_memory",
|
|
1479
|
-
systemPromptLength: cleanPayload.system?.length ?? 0,
|
|
1480
1709
|
messageCount: cleanPayload.messages?.length ?? 0,
|
|
1481
1710
|
toolCount: cleanPayload.tools?.length ?? 0
|
|
1482
|
-
}, '
|
|
1711
|
+
}, 'After memory injection');
|
|
1483
1712
|
|
|
1484
1713
|
if (steps === 1 && (config.systemPrompt?.mode === 'dynamic' || config.systemPrompt?.toolDescriptions === 'minimal')) {
|
|
1485
1714
|
try {
|
|
@@ -1568,9 +1797,26 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1568
1797
|
logger.debug({ sessionId: session?.id ?? null }, 'Tool termination instructions injected for non-Claude model');
|
|
1569
1798
|
}
|
|
1570
1799
|
|
|
1800
|
+
// Compute model-aware token budget thresholds
|
|
1801
|
+
const registry = getModelRegistrySync();
|
|
1802
|
+
const modelInfo = registry.getCost(requestedModel);
|
|
1803
|
+
const modelContextWindow = modelInfo?.context || config.tokenBudget?.max || 180000;
|
|
1804
|
+
const modelMax = Math.floor(modelContextWindow * 0.85);
|
|
1805
|
+
const effectiveMax = Math.min(modelMax, config.tokenBudget?.max || 180000);
|
|
1806
|
+
const effectiveWarning = Math.floor(effectiveMax * 0.65);
|
|
1807
|
+
|
|
1808
|
+
logger.debug({
|
|
1809
|
+
sessionId: session?.id ?? null,
|
|
1810
|
+
requestedModel,
|
|
1811
|
+
modelContextWindow,
|
|
1812
|
+
effectiveWarning,
|
|
1813
|
+
effectiveMax,
|
|
1814
|
+
source: modelInfo?.source || 'default',
|
|
1815
|
+
}, 'Model-aware token budget computed');
|
|
1816
|
+
|
|
1571
1817
|
if (steps === 1 && config.tokenBudget?.enforcement !== false) {
|
|
1572
1818
|
try {
|
|
1573
|
-
const budgetCheck = tokenBudget.checkBudget(cleanPayload);
|
|
1819
|
+
const budgetCheck = tokenBudget.checkBudget(cleanPayload, effectiveWarning, effectiveMax);
|
|
1574
1820
|
|
|
1575
1821
|
if (budgetCheck.atWarning) {
|
|
1576
1822
|
logger.warn({
|
|
@@ -1584,8 +1830,8 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1584
1830
|
if (budgetCheck.overMax) {
|
|
1585
1831
|
// Apply adaptive compression to fit within budget
|
|
1586
1832
|
const enforcement = tokenBudget.enforceBudget(cleanPayload, {
|
|
1587
|
-
warningThreshold:
|
|
1588
|
-
maxThreshold:
|
|
1833
|
+
warningThreshold: effectiveWarning,
|
|
1834
|
+
maxThreshold: effectiveMax,
|
|
1589
1835
|
enforcement: true
|
|
1590
1836
|
});
|
|
1591
1837
|
|
|
@@ -1609,7 +1855,6 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1609
1855
|
}
|
|
1610
1856
|
|
|
1611
1857
|
// Track estimated token usage before model call
|
|
1612
|
-
console.log('[TOKEN DEBUG] About to track token usage - step:', steps);
|
|
1613
1858
|
const estimatedTokens = config.tokenTracking?.enabled !== false
|
|
1614
1859
|
? tokens.countPayloadTokens(cleanPayload)
|
|
1615
1860
|
: null;
|
|
@@ -1623,15 +1868,6 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1623
1868
|
}
|
|
1624
1869
|
|
|
1625
1870
|
// Apply Headroom compression if enabled
|
|
1626
|
-
const headroomEstTokens = Math.ceil(JSON.stringify(cleanPayload.messages || []).length / 4);
|
|
1627
|
-
logger.info({
|
|
1628
|
-
headroomEnabled: isHeadroomEnabled(),
|
|
1629
|
-
messageCount: cleanPayload.messages?.length ?? 0,
|
|
1630
|
-
estimatedTokens: headroomEstTokens,
|
|
1631
|
-
threshold: config.headroom?.minTokens || 500,
|
|
1632
|
-
willCompress: isHeadroomEnabled() && headroomEstTokens >= (config.headroom?.minTokens || 500),
|
|
1633
|
-
}, 'Headroom compression check');
|
|
1634
|
-
|
|
1635
1871
|
if (isHeadroomEnabled() && cleanPayload.messages && cleanPayload.messages.length > 0) {
|
|
1636
1872
|
try {
|
|
1637
1873
|
const compressionResult = await headroomCompress(
|
|
@@ -1640,36 +1876,27 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1640
1876
|
{
|
|
1641
1877
|
mode: config.headroom?.mode,
|
|
1642
1878
|
queryContext: cleanPayload.messages[cleanPayload.messages.length - 1]?.content,
|
|
1879
|
+
model: requestedModel,
|
|
1880
|
+
modelLimit: modelContextWindow,
|
|
1881
|
+
tokenBudget: effectiveMax,
|
|
1643
1882
|
}
|
|
1644
1883
|
);
|
|
1645
1884
|
|
|
1646
|
-
logger.info({
|
|
1647
|
-
compressed: compressionResult.compressed,
|
|
1648
|
-
tokensBefore: compressionResult.stats?.tokens_before,
|
|
1649
|
-
tokensAfter: compressionResult.stats?.tokens_after,
|
|
1650
|
-
savings: compressionResult.stats?.savings_percent ? `${compressionResult.stats.savings_percent}%` : 'N/A',
|
|
1651
|
-
reason: compressionResult.stats?.reason || compressionResult.stats?.transforms_applied?.join(', ') || 'none',
|
|
1652
|
-
}, 'Headroom compression result');
|
|
1653
|
-
|
|
1654
1885
|
if (compressionResult.compressed) {
|
|
1655
1886
|
cleanPayload.messages = compressionResult.messages;
|
|
1656
1887
|
if (compressionResult.tools) {
|
|
1657
1888
|
cleanPayload.tools = compressionResult.tools;
|
|
1658
1889
|
}
|
|
1659
|
-
logger.info({
|
|
1660
|
-
sessionId: session?.id ?? null,
|
|
1661
|
-
tokensBefore: compressionResult.stats?.tokens_before,
|
|
1662
|
-
tokensAfter: compressionResult.stats?.tokens_after,
|
|
1663
|
-
saved: compressionResult.stats?.tokens_saved,
|
|
1664
|
-
savingsPercent: compressionResult.stats?.savings_percent,
|
|
1665
|
-
transforms: compressionResult.stats?.transforms_applied,
|
|
1666
|
-
}, 'Headroom compression applied to request');
|
|
1667
|
-
} else {
|
|
1668
|
-
logger.debug({
|
|
1669
|
-
sessionId: session?.id ?? null,
|
|
1670
|
-
reason: compressionResult.stats?.reason,
|
|
1671
|
-
}, 'Headroom compression skipped');
|
|
1672
1890
|
}
|
|
1891
|
+
|
|
1892
|
+
logger.debug({
|
|
1893
|
+
sessionId: session?.id ?? null,
|
|
1894
|
+
outcome: compressionResult.compressed ? 'applied' : 'skipped',
|
|
1895
|
+
tokensBefore: compressionResult.stats?.tokens_before,
|
|
1896
|
+
tokensAfter: compressionResult.stats?.tokens_after,
|
|
1897
|
+
savingsPercent: compressionResult.stats?.savings_percent,
|
|
1898
|
+
reason: compressionResult.stats?.reason || compressionResult.stats?.transforms_applied?.join(', ') || 'none',
|
|
1899
|
+
}, 'Headroom compression');
|
|
1673
1900
|
} catch (headroomErr) {
|
|
1674
1901
|
logger.warn({ err: headroomErr, sessionId: session?.id ?? null }, 'Headroom compression failed, using original messages');
|
|
1675
1902
|
}
|
|
@@ -1902,11 +2129,26 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1902
2129
|
toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
|
|
1903
2130
|
}
|
|
1904
2131
|
|
|
2132
|
+
// Guard: drop hallucinated tool calls when no tools were sent to the model.
|
|
2133
|
+
// Some models (e.g. Llama 3.1) hallucinate tool_call blocks from conversation
|
|
2134
|
+
// history even when the request contained zero tool definitions.
|
|
2135
|
+
const toolsWereSent = Array.isArray(cleanPayload.tools) && cleanPayload.tools.length > 0;
|
|
2136
|
+
if (toolCalls.length > 0 && !toolsWereSent) {
|
|
2137
|
+
logger.warn({
|
|
2138
|
+
sessionId: session?.id ?? null,
|
|
2139
|
+
step: steps,
|
|
2140
|
+
hallucinated: toolCalls.map(tc => tc.function?.name || tc.name),
|
|
2141
|
+
noToolInjection: !!cleanPayload._noToolInjection,
|
|
2142
|
+
}, "Dropped hallucinated tool calls (no tools were sent to model)");
|
|
2143
|
+
toolCalls = [];
|
|
2144
|
+
// If there's also no text content, treat as empty response (handled below)
|
|
2145
|
+
}
|
|
2146
|
+
|
|
1905
2147
|
if (toolCalls.length > 0) {
|
|
1906
2148
|
// Convert OpenAI/OpenRouter format to Anthropic format for session storage
|
|
1907
2149
|
let sessionContent;
|
|
1908
2150
|
if (providerType === "azure-anthropic") {
|
|
1909
|
-
// Azure Anthropic already returns content in Anthropic
|
|
2151
|
+
// Azure Anthropic already returns content in Anthropic
|
|
1910
2152
|
sessionContent = databricksResponse.json?.content ?? [];
|
|
1911
2153
|
} else {
|
|
1912
2154
|
// Convert OpenAI/OpenRouter format to Anthropic content blocks
|
|
@@ -1966,9 +2208,10 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1966
2208
|
});
|
|
1967
2209
|
|
|
1968
2210
|
let assistantToolMessage;
|
|
1969
|
-
if (providerType === "azure-anthropic") {
|
|
1970
|
-
// For
|
|
1971
|
-
//
|
|
2211
|
+
if (providerType === "azure-anthropic" || isAnthropicFormat) {
|
|
2212
|
+
// For Anthropic-format responses (azure-anthropic, Ollama native API,
|
|
2213
|
+
// azure-openai Responses API), use the content array directly —
|
|
2214
|
+
// it already contains both text and tool_use blocks in the correct format
|
|
1972
2215
|
assistantToolMessage = {
|
|
1973
2216
|
role: "assistant",
|
|
1974
2217
|
content: databricksResponse.json?.content ?? [],
|
|
@@ -1981,9 +2224,9 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
1981
2224
|
};
|
|
1982
2225
|
}
|
|
1983
2226
|
|
|
1984
|
-
// Only add fallback content for
|
|
2227
|
+
// Only add fallback content for OpenAI-format responses (Anthropic format already has content)
|
|
1985
2228
|
if (
|
|
1986
|
-
providerType !== "azure-anthropic" &&
|
|
2229
|
+
providerType !== "azure-anthropic" && !isAnthropicFormat &&
|
|
1987
2230
|
(!assistantToolMessage.content ||
|
|
1988
2231
|
(typeof assistantToolMessage.content === "string" &&
|
|
1989
2232
|
assistantToolMessage.content.trim().length === 0)) &&
|
|
@@ -2019,7 +2262,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2019
2262
|
// If in passthrough/client mode and there are client-side tools, return them to client
|
|
2020
2263
|
// Server-side tools (Task, Web) will be executed below
|
|
2021
2264
|
if ((executionMode === "passthrough" || executionMode === "client") && clientSideToolCalls.length > 0) {
|
|
2022
|
-
logger.
|
|
2265
|
+
logger.debug(
|
|
2023
2266
|
{
|
|
2024
2267
|
sessionId: session?.id ?? null,
|
|
2025
2268
|
totalToolCount: toolCalls.length,
|
|
@@ -2044,7 +2287,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2044
2287
|
type: "message",
|
|
2045
2288
|
role: "assistant",
|
|
2046
2289
|
content: clientContent,
|
|
2047
|
-
model: databricksResponse.json?.model ||
|
|
2290
|
+
model: databricksResponse.json?.model || cleanPayload.model,
|
|
2048
2291
|
stop_reason: "tool_use",
|
|
2049
2292
|
usage: databricksResponse.json?.usage || {
|
|
2050
2293
|
input_tokens: 0,
|
|
@@ -2065,6 +2308,27 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2065
2308
|
// then continue the conversation loop. For now, let's fall through to execute server-side tools.
|
|
2066
2309
|
if (serverSideToolCalls.length === 0) {
|
|
2067
2310
|
// No server-side tools - pure passthrough
|
|
2311
|
+
// Record outbound client-side tool calls into cross-request dedup tracker
|
|
2312
|
+
if (session && clientSideToolCalls.length > 0) {
|
|
2313
|
+
ensureDedupStructure(session);
|
|
2314
|
+
for (const call of clientSideToolCalls) {
|
|
2315
|
+
recordCrossRequestToolCall(session, call);
|
|
2316
|
+
}
|
|
2317
|
+
// Persist dedup state (non-ephemeral sessions only)
|
|
2318
|
+
if (session.id && !session._ephemeral) {
|
|
2319
|
+
try { upsertSession(session.id, { metadata: session.metadata }); } catch (e) {
|
|
2320
|
+
logger.debug({ err: e.message }, "Failed to persist outbound dedup state");
|
|
2321
|
+
}
|
|
2322
|
+
}
|
|
2323
|
+
const { maxCount, toolName: dedupTool } = getMaxDedupCount(session);
|
|
2324
|
+
logger.debug({
|
|
2325
|
+
sessionId: session?.id ?? null,
|
|
2326
|
+
clientToolCount: clientSideToolCalls.length,
|
|
2327
|
+
maxDedupCount: maxCount,
|
|
2328
|
+
maxDedupTool: dedupTool,
|
|
2329
|
+
}, "Cross-request tool dedup: recorded outbound tool calls");
|
|
2330
|
+
}
|
|
2331
|
+
|
|
2068
2332
|
return {
|
|
2069
2333
|
response: {
|
|
2070
2334
|
status: 200,
|
|
@@ -2081,7 +2345,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2081
2345
|
// Override toolCalls to only include Server-side tools for server execution
|
|
2082
2346
|
toolCalls = serverSideToolCalls;
|
|
2083
2347
|
|
|
2084
|
-
logger.
|
|
2348
|
+
logger.debug(
|
|
2085
2349
|
{
|
|
2086
2350
|
sessionId: session?.id ?? null,
|
|
2087
2351
|
serverToolCount: serverSideToolCalls.length,
|
|
@@ -2090,7 +2354,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2090
2354
|
);
|
|
2091
2355
|
} else if (executionMode === "passthrough" || executionMode === "client") {
|
|
2092
2356
|
// Only Server-side tools, no Client-side tools - execute all server-side
|
|
2093
|
-
logger.
|
|
2357
|
+
logger.debug(
|
|
2094
2358
|
{
|
|
2095
2359
|
sessionId: session?.id ?? null,
|
|
2096
2360
|
serverToolCount: serverSideToolCalls.length,
|
|
@@ -2155,6 +2419,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2155
2419
|
session,
|
|
2156
2420
|
cwd,
|
|
2157
2421
|
requestMessages: cleanPayload.messages,
|
|
2422
|
+
provider: providerType, // Pass provider for GPT-specific formatting
|
|
2158
2423
|
}))
|
|
2159
2424
|
);
|
|
2160
2425
|
|
|
@@ -2388,10 +2653,14 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2388
2653
|
session,
|
|
2389
2654
|
cwd,
|
|
2390
2655
|
requestMessages: cleanPayload.messages,
|
|
2656
|
+
provider: providerType, // Pass provider for GPT-specific formatting
|
|
2391
2657
|
});
|
|
2392
2658
|
|
|
2393
2659
|
let toolMessage;
|
|
2394
|
-
if (providerType === "azure-anthropic") {
|
|
2660
|
+
if (providerType === "azure-anthropic" || isAnthropicFormat) {
|
|
2661
|
+
// Anthropic-format tool result for providers whose responses use
|
|
2662
|
+
// Anthropic tool_use blocks (azure-anthropic, Ollama native API,
|
|
2663
|
+
// azure-openai Responses API)
|
|
2395
2664
|
const parsedContent = parseExecutionContent(execution.content);
|
|
2396
2665
|
const serialisedContent =
|
|
2397
2666
|
typeof parsedContent === "string" || parsedContent === null
|
|
@@ -2502,34 +2771,54 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2502
2771
|
|
|
2503
2772
|
// === TOOL CALL LOOP DETECTION ===
|
|
2504
2773
|
// Track tool calls to detect infinite loops where the model calls the same tool
|
|
2505
|
-
// repeatedly with identical parameters
|
|
2774
|
+
// repeatedly with identical or similar parameters
|
|
2775
|
+
// All providers use threshold 2 and similarity-based detection
|
|
2776
|
+
const loopThreshold = 2;
|
|
2777
|
+
|
|
2506
2778
|
for (const call of toolCalls) {
|
|
2507
2779
|
const signature = getToolCallSignature(call);
|
|
2508
|
-
const
|
|
2509
|
-
|
|
2780
|
+
const existingEntry = toolCallHistory.get(signature);
|
|
2781
|
+
let count = (existingEntry?.count || 0) + 1;
|
|
2782
|
+
toolCallHistory.set(signature, { count, call });
|
|
2510
2783
|
|
|
2511
2784
|
const toolName = call.function?.name ?? call.name ?? 'unknown';
|
|
2512
2785
|
|
|
2513
|
-
|
|
2786
|
+
// Check for similar (not just identical) tool calls across all providers
|
|
2787
|
+
// This catches cases where the model slightly varies parameters but is essentially looping
|
|
2788
|
+
for (const [existingSig, existingData] of toolCallHistory.entries()) {
|
|
2789
|
+
if (existingSig !== signature && areSimilarToolCalls(call, existingData.call)) {
|
|
2790
|
+
// Found a similar call - increase count to trigger loop detection earlier
|
|
2791
|
+
count = Math.max(count, existingData.count + 1);
|
|
2792
|
+
logger.debug({
|
|
2793
|
+
tool: toolName,
|
|
2794
|
+
currentSignature: signature,
|
|
2795
|
+
similarSignature: existingSig,
|
|
2796
|
+
combinedCount: count,
|
|
2797
|
+
}, "Similar tool call detected - combining counts");
|
|
2798
|
+
}
|
|
2799
|
+
}
|
|
2800
|
+
|
|
2801
|
+
if (count === loopThreshold && !loopWarningInjected) {
|
|
2514
2802
|
logger.warn(
|
|
2515
2803
|
{
|
|
2516
2804
|
sessionId: session?.id ?? null,
|
|
2517
2805
|
correlationId: options?.correlationId,
|
|
2518
2806
|
tool: toolName,
|
|
2519
2807
|
loopCount: count,
|
|
2808
|
+
loopThreshold,
|
|
2520
2809
|
signature: signature,
|
|
2521
2810
|
action: 'warning_injected',
|
|
2522
2811
|
totalSteps: steps,
|
|
2523
2812
|
remainingSteps: settings.maxSteps - steps,
|
|
2524
2813
|
},
|
|
2525
|
-
|
|
2814
|
+
`Tool call loop detected - same tool called ${loopThreshold} times with identical/similar parameters`,
|
|
2526
2815
|
);
|
|
2527
2816
|
|
|
2528
2817
|
// Inject warning message to model
|
|
2529
2818
|
loopWarningInjected = true;
|
|
2530
2819
|
const warningMessage = {
|
|
2531
2820
|
role: "user",
|
|
2532
|
-
content:
|
|
2821
|
+
content: `⚠️ CRITICAL SYSTEM WARNING: You have called the "${toolName}" tool ${count} times with identical or similar parameters. This IS an infinite loop. STOP calling this tool immediately. You MUST now provide a direct text response to the user based on the results you have received. If the tool returned "no results" or empty output, that IS the final answer - do not retry. Summarize your findings and respond.`,
|
|
2533
2822
|
};
|
|
2534
2823
|
|
|
2535
2824
|
cleanPayload.messages.push(warningMessage);
|
|
@@ -2544,11 +2833,12 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2544
2833
|
reason: "tool_call_loop_warning",
|
|
2545
2834
|
toolName,
|
|
2546
2835
|
loopCount: count,
|
|
2836
|
+
loopThreshold,
|
|
2547
2837
|
},
|
|
2548
2838
|
});
|
|
2549
2839
|
}
|
|
2550
|
-
} else if (count >
|
|
2551
|
-
// Force termination after
|
|
2840
|
+
} else if (count > loopThreshold) {
|
|
2841
|
+
// Force termination after threshold exceeded
|
|
2552
2842
|
// Log FULL context for debugging why the loop occurred
|
|
2553
2843
|
logger.error(
|
|
2554
2844
|
{
|
|
@@ -2556,6 +2846,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2556
2846
|
correlationId: options?.correlationId,
|
|
2557
2847
|
tool: toolName,
|
|
2558
2848
|
loopCount: count,
|
|
2849
|
+
loopThreshold,
|
|
2559
2850
|
signature: signature,
|
|
2560
2851
|
action: 'request_terminated',
|
|
2561
2852
|
totalSteps: steps,
|
|
@@ -2576,7 +2867,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2576
2867
|
body: {
|
|
2577
2868
|
error: {
|
|
2578
2869
|
type: "tool_call_loop_detected",
|
|
2579
|
-
message: `Tool call loop detected: The model called the same tool ("${toolName}") with identical parameters ${count} times. This indicates an infinite loop and execution has been terminated. Please try rephrasing your request or provide different parameters.`,
|
|
2870
|
+
message: `Tool call loop detected: The model called the same tool ("${toolName}") with identical parameters ${count} times (threshold: ${loopThreshold}). This indicates an infinite loop and execution has been terminated. Please try rephrasing your request or provide different parameters.`,
|
|
2580
2871
|
},
|
|
2581
2872
|
},
|
|
2582
2873
|
terminationReason: "tool_call_loop",
|
|
@@ -2608,11 +2899,19 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2608
2899
|
anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
|
|
2609
2900
|
}
|
|
2610
2901
|
} else if (actualProvider === "ollama") {
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
)
|
|
2615
|
-
|
|
2902
|
+
const ollamaJson = databricksResponse.json;
|
|
2903
|
+
// Detect response format: Anthropic API (v0.14.0+) has type:"message",
|
|
2904
|
+
// legacy /api/chat has message.role + message.content
|
|
2905
|
+
if (ollamaJson?.type === "message" && Array.isArray(ollamaJson?.content)) {
|
|
2906
|
+
// Anthropic-native response — passthrough
|
|
2907
|
+
anthropicPayload = ollamaJson;
|
|
2908
|
+
} else {
|
|
2909
|
+
// Legacy Ollama response — convert to Anthropic format
|
|
2910
|
+
anthropicPayload = ollamaToAnthropicResponse(ollamaJson, requestedModel);
|
|
2911
|
+
}
|
|
2912
|
+
if (Array.isArray(anthropicPayload?.content)) {
|
|
2913
|
+
anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
|
|
2914
|
+
}
|
|
2616
2915
|
} else if (actualProvider === "openrouter") {
|
|
2617
2916
|
const { convertOpenRouterResponseToAnthropic } = require("../clients/openrouter-utils");
|
|
2618
2917
|
|
|
@@ -2841,6 +3140,16 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
2841
3140
|
if (Array.isArray(anthropicPayload?.content)) {
|
|
2842
3141
|
anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
|
|
2843
3142
|
}
|
|
3143
|
+
} else if (actualProvider === "moonshot") {
|
|
3144
|
+
// Moonshot responses are already converted to Anthropic format in invokeMoonshot
|
|
3145
|
+
logger.info({
|
|
3146
|
+
hasJson: !!databricksResponse.json,
|
|
3147
|
+
jsonContent: JSON.stringify(databricksResponse.json?.content)?.substring(0, 300),
|
|
3148
|
+
}, "=== MOONSHOT ORCHESTRATOR DEBUG ===");
|
|
3149
|
+
anthropicPayload = databricksResponse.json;
|
|
3150
|
+
if (Array.isArray(anthropicPayload?.content)) {
|
|
3151
|
+
anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
|
|
3152
|
+
}
|
|
2844
3153
|
} else {
|
|
2845
3154
|
anthropicPayload = toAnthropicResponse(
|
|
2846
3155
|
databricksResponse.json,
|
|
@@ -3035,6 +3344,7 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
3035
3344
|
session,
|
|
3036
3345
|
cwd,
|
|
3037
3346
|
requestMessages: cleanPayload.messages,
|
|
3347
|
+
provider: providerType, // Pass provider for GPT-specific formatting
|
|
3038
3348
|
});
|
|
3039
3349
|
|
|
3040
3350
|
const toolResultMessage = createFallbackToolResultMessage(providerType, {
|
|
@@ -3243,6 +3553,34 @@ IMPORTANT TOOL USAGE RULES:
|
|
|
3243
3553
|
};
|
|
3244
3554
|
}
|
|
3245
3555
|
|
|
3556
|
+
/**
|
|
3557
|
+
* Detect if the current request is a suggestion mode call.
|
|
3558
|
+
* Scans the last user message for the [SUGGESTION MODE: marker.
|
|
3559
|
+
* @param {Array} messages - The conversation messages
|
|
3560
|
+
* @returns {{ isSuggestionMode: boolean }}
|
|
3561
|
+
*/
|
|
3562
|
+
function detectSuggestionMode(messages) {
|
|
3563
|
+
if (!Array.isArray(messages) || messages.length === 0) {
|
|
3564
|
+
return { isSuggestionMode: false };
|
|
3565
|
+
}
|
|
3566
|
+
// Scan from the end to find the last user message
|
|
3567
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
3568
|
+
const msg = messages[i];
|
|
3569
|
+
if (msg?.role !== 'user') continue;
|
|
3570
|
+
const content = typeof msg.content === 'string'
|
|
3571
|
+
? msg.content
|
|
3572
|
+
: Array.isArray(msg.content)
|
|
3573
|
+
? msg.content.map(b => b.text || '').join(' ')
|
|
3574
|
+
: '';
|
|
3575
|
+
if (content.includes('[SUGGESTION MODE:')) {
|
|
3576
|
+
return { isSuggestionMode: true };
|
|
3577
|
+
}
|
|
3578
|
+
// Only check the last user message
|
|
3579
|
+
break;
|
|
3580
|
+
}
|
|
3581
|
+
return { isSuggestionMode: false };
|
|
3582
|
+
}
|
|
3583
|
+
|
|
3246
3584
|
async function processMessage({ payload, headers, session, cwd, options = {} }) {
|
|
3247
3585
|
const requestedModel =
|
|
3248
3586
|
payload?.model ??
|
|
@@ -3252,102 +3590,317 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
|
|
|
3252
3590
|
typeof headers?.["anthropic-beta"] === "string" &&
|
|
3253
3591
|
headers["anthropic-beta"].includes("interleaved-thinking");
|
|
3254
3592
|
|
|
3593
|
+
// === SUGGESTION MODE: Early return when SUGGESTION_MODE_MODEL=none ===
|
|
3594
|
+
const { isSuggestionMode } = detectSuggestionMode(payload?.messages);
|
|
3595
|
+
const suggestionModelConfig = config.modelProvider?.suggestionModeModel ?? "default";
|
|
3596
|
+
if (isSuggestionMode && suggestionModelConfig.toLowerCase() === "none") {
|
|
3597
|
+
logger.info('Suggestion mode: skipping LLM call (SUGGESTION_MODE_MODEL=none)');
|
|
3598
|
+
return {
|
|
3599
|
+
response: {
|
|
3600
|
+
json: {
|
|
3601
|
+
id: `msg_suggestion_skip_${Date.now()}`,
|
|
3602
|
+
type: "message",
|
|
3603
|
+
role: "assistant",
|
|
3604
|
+
content: [{ type: "text", text: "" }],
|
|
3605
|
+
model: requestedModel,
|
|
3606
|
+
stop_reason: "end_turn",
|
|
3607
|
+
stop_sequence: null,
|
|
3608
|
+
usage: { input_tokens: 0, output_tokens: 0 },
|
|
3609
|
+
},
|
|
3610
|
+
ok: true,
|
|
3611
|
+
status: 200,
|
|
3612
|
+
},
|
|
3613
|
+
steps: 0,
|
|
3614
|
+
durationMs: 0,
|
|
3615
|
+
terminationReason: "suggestion_mode_skip",
|
|
3616
|
+
};
|
|
3617
|
+
}
|
|
3618
|
+
|
|
3255
3619
|
// === TOOL LOOP GUARD (EARLY CHECK) ===
|
|
3256
3620
|
// Check BEFORE sanitization since sanitizePayload removes conversation history
|
|
3257
|
-
|
|
3621
|
+
// All providers use threshold 2 to catch loops early
|
|
3622
|
+
const providerType = config.modelProvider?.type ?? "databricks";
|
|
3623
|
+
const toolLoopThreshold = 2;
|
|
3258
3624
|
const { toolResultCount, toolUseCount } = countToolCallsInHistory(payload?.messages);
|
|
3259
3625
|
|
|
3260
|
-
|
|
3261
|
-
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
|
|
3626
|
+
const executionMode = config.toolExecutionMode || "server";
|
|
3627
|
+
const isClientMode = executionMode === "client" || executionMode === "passthrough";
|
|
3628
|
+
|
|
3629
|
+
if (isClientMode && session) {
|
|
3630
|
+
// === CROSS-REQUEST DEDUP (CLIENT/PASSTHROUGH MODE) ===
|
|
3631
|
+
// The inner-loop guard resets each HTTP request so repeated calls across
|
|
3632
|
+
// requests escape detection. Track signatures in session metadata instead.
|
|
3633
|
+
ensureDedupStructure(session);
|
|
3634
|
+
|
|
3635
|
+
// Detect new user question → reset dedup tracking
|
|
3636
|
+
const dedup = session.metadata.toolCallDedup;
|
|
3637
|
+
const incomingToolUse = extractToolUseFromCurrentTurn(payload?.messages);
|
|
3638
|
+
// A user text message with no preceding tool_use means a brand-new question
|
|
3639
|
+
const hasNewUserText = (() => {
|
|
3640
|
+
const msgs = payload?.messages || [];
|
|
3641
|
+
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
3642
|
+
const msg = msgs[i];
|
|
3643
|
+
if (msg?.role === 'user') {
|
|
3644
|
+
if (typeof msg.content === 'string' && msg.content.trim().length > 0) return true;
|
|
3645
|
+
if (Array.isArray(msg.content)) {
|
|
3646
|
+
return msg.content.some(block =>
|
|
3647
|
+
(block?.type === 'text' && block?.text?.trim?.().length > 0) ||
|
|
3648
|
+
(block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
|
|
3649
|
+
);
|
|
3650
|
+
}
|
|
3651
|
+
}
|
|
3652
|
+
break; // Only check the very last message
|
|
3653
|
+
}
|
|
3654
|
+
return false;
|
|
3655
|
+
})();
|
|
3266
3656
|
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3271
|
-
|
|
3272
|
-
|
|
3273
|
-
|
|
3657
|
+
if (hasNewUserText && incomingToolUse.length === 0) {
|
|
3658
|
+
// Pure user text with no tool results → new question
|
|
3659
|
+
resetDedupTracking(session);
|
|
3660
|
+
} else {
|
|
3661
|
+
// Record each tool_use from the incoming messages into the dedup tracker
|
|
3662
|
+
for (const toolUseBlock of incomingToolUse) {
|
|
3663
|
+
recordCrossRequestToolCall(session, toolUseBlock);
|
|
3664
|
+
}
|
|
3274
3665
|
|
|
3275
|
-
|
|
3276
|
-
// This prevents showing old results from previous questions
|
|
3277
|
-
let toolResultsSummary = "";
|
|
3278
|
-
const messages = payload?.messages || [];
|
|
3666
|
+
const { maxCount, toolName: dedupToolName, signature: dedupSig } = getMaxDedupCount(session);
|
|
3279
3667
|
|
|
3280
|
-
|
|
3281
|
-
|
|
3282
|
-
|
|
3283
|
-
|
|
3284
|
-
|
|
3285
|
-
|
|
3286
|
-
|
|
3287
|
-
|
|
3668
|
+
if (maxCount >= DEDUP_TERMINATE_THRESHOLD) {
|
|
3669
|
+
// Force-terminate: same pattern as existing tool_loop_guard
|
|
3670
|
+
logger.error({
|
|
3671
|
+
toolName: dedupToolName,
|
|
3672
|
+
count: maxCount,
|
|
3673
|
+
threshold: DEDUP_TERMINATE_THRESHOLD,
|
|
3674
|
+
signature: dedupSig,
|
|
3675
|
+
sessionId: session?.id ?? null,
|
|
3676
|
+
}, "[CrossRequestDedup] FORCE TERMINATING - repeated tool call across requests");
|
|
3677
|
+
|
|
3678
|
+
// Extract tool results summary from current turn
|
|
3679
|
+
let toolResultsSummary = "";
|
|
3680
|
+
const messages = payload?.messages || [];
|
|
3681
|
+
const { lastUserTextIndex: luIdx } = countToolCallsInHistory(messages);
|
|
3682
|
+
const startIdx = luIdx >= 0 ? luIdx : 0;
|
|
3683
|
+
for (let i = startIdx; i < messages.length; i++) {
|
|
3684
|
+
const msg = messages[i];
|
|
3685
|
+
if (!msg || !Array.isArray(msg.content)) continue;
|
|
3686
|
+
for (const block of msg.content) {
|
|
3687
|
+
if (block?.type === 'tool_result' && block?.content) {
|
|
3688
|
+
const content = typeof block.content === 'string'
|
|
3689
|
+
? block.content
|
|
3690
|
+
: JSON.stringify(block.content);
|
|
3691
|
+
if (content && !content.includes('Found 0')) {
|
|
3692
|
+
toolResultsSummary += content + "\n";
|
|
3693
|
+
}
|
|
3694
|
+
}
|
|
3695
|
+
}
|
|
3696
|
+
}
|
|
3697
|
+
|
|
3698
|
+
let responseText = `Based on the tool results, here's what I found:\n\n`;
|
|
3699
|
+
if (toolResultsSummary.trim()) {
|
|
3700
|
+
responseText += toolResultsSummary.trim();
|
|
3701
|
+
} else {
|
|
3702
|
+
responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
|
|
3703
|
+
}
|
|
3704
|
+
|
|
3705
|
+
const forcedResponse = {
|
|
3706
|
+
id: `msg_forced_${Date.now()}`,
|
|
3707
|
+
type: "message",
|
|
3708
|
+
role: "assistant",
|
|
3709
|
+
content: [{ type: "text", text: responseText }],
|
|
3710
|
+
model: requestedModel || "unknown",
|
|
3711
|
+
stop_reason: "end_turn",
|
|
3712
|
+
stop_sequence: null,
|
|
3713
|
+
usage: { input_tokens: 0, output_tokens: 100 },
|
|
3714
|
+
};
|
|
3715
|
+
|
|
3716
|
+
// Reset dedup after termination so next question starts fresh
|
|
3717
|
+
resetDedupTracking(session);
|
|
3718
|
+
// Persist to DB (non-ephemeral sessions only)
|
|
3719
|
+
if (session.id && !session._ephemeral) {
|
|
3720
|
+
try { upsertSession(session.id, { metadata: session.metadata }); } catch (e) {
|
|
3721
|
+
logger.debug({ err: e.message }, "Failed to persist dedup reset");
|
|
3722
|
+
}
|
|
3723
|
+
}
|
|
3724
|
+
|
|
3725
|
+
return {
|
|
3726
|
+
status: 200,
|
|
3727
|
+
body: forcedResponse,
|
|
3728
|
+
terminationReason: "tool_loop_guard",
|
|
3729
|
+
};
|
|
3288
3730
|
}
|
|
3289
|
-
|
|
3290
|
-
|
|
3291
|
-
|
|
3292
|
-
|
|
3293
|
-
|
|
3294
|
-
|
|
3295
|
-
|
|
3296
|
-
|
|
3731
|
+
|
|
3732
|
+
if (maxCount >= DEDUP_WARN_THRESHOLD && !dedup.warningInjected) {
|
|
3733
|
+
logger.warn({
|
|
3734
|
+
toolName: dedupToolName,
|
|
3735
|
+
count: maxCount,
|
|
3736
|
+
threshold: DEDUP_WARN_THRESHOLD,
|
|
3737
|
+
signature: dedupSig,
|
|
3738
|
+
sessionId: session?.id ?? null,
|
|
3739
|
+
}, "[CrossRequestDedup] Warning - repeated tool call detected across requests");
|
|
3740
|
+
|
|
3741
|
+
dedup.warningInjected = true;
|
|
3742
|
+
|
|
3743
|
+
// Inject a strict warning into the payload so the model sees it
|
|
3744
|
+
if (Array.isArray(payload?.messages)) {
|
|
3745
|
+
payload.messages.push({
|
|
3746
|
+
role: "user",
|
|
3747
|
+
content: `⚠️ CRITICAL SYSTEM WARNING: You have called the "${dedupToolName}" tool ${maxCount} times with identical or similar parameters across multiple requests. This IS an infinite loop. STOP calling this tool immediately. You MUST now provide a direct text response based on the results you have received. If the tool returned "no results" or empty output, that IS the final answer - do not retry. Summarize your findings and respond.`,
|
|
3748
|
+
});
|
|
3749
|
+
}
|
|
3750
|
+
}
|
|
3751
|
+
|
|
3752
|
+
// Persist dedup state (non-ephemeral sessions only)
|
|
3753
|
+
if (session.id && !session._ephemeral) {
|
|
3754
|
+
try { upsertSession(session.id, { metadata: session.metadata }); } catch (e) {
|
|
3755
|
+
logger.debug({ err: e.message }, "Failed to persist dedup state");
|
|
3297
3756
|
}
|
|
3298
3757
|
}
|
|
3299
3758
|
}
|
|
3300
3759
|
|
|
3301
|
-
//
|
|
3302
|
-
const
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
|
|
3312
|
-
|
|
3760
|
+
// Client mode still uses the relaxed per-request threshold for the count-based guard
|
|
3761
|
+
const effectiveThreshold = 10;
|
|
3762
|
+
if (toolResultCount >= effectiveThreshold) {
|
|
3763
|
+
logger.error({
|
|
3764
|
+
toolResultCount,
|
|
3765
|
+
toolUseCount,
|
|
3766
|
+
threshold: effectiveThreshold,
|
|
3767
|
+
sessionId: session?.id ?? null,
|
|
3768
|
+
}, "[ToolLoopGuard] FORCE TERMINATING - too many tool calls in conversation");
|
|
3769
|
+
|
|
3770
|
+
let toolResultsSummary = "";
|
|
3771
|
+
const messages = payload?.messages || [];
|
|
3772
|
+
let lastUserTextIndex = -1;
|
|
3773
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
3774
|
+
const msg = messages[i];
|
|
3775
|
+
if (msg?.role !== 'user') continue;
|
|
3776
|
+
if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
|
|
3777
|
+
lastUserTextIndex = i;
|
|
3778
|
+
break;
|
|
3779
|
+
}
|
|
3780
|
+
if (Array.isArray(msg.content)) {
|
|
3781
|
+
const hasText = msg.content.some(block =>
|
|
3782
|
+
(block?.type === 'text' && block?.text?.trim?.().length > 0) ||
|
|
3783
|
+
(block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
|
|
3784
|
+
);
|
|
3785
|
+
if (hasText) {
|
|
3786
|
+
lastUserTextIndex = i;
|
|
3787
|
+
break;
|
|
3788
|
+
}
|
|
3789
|
+
}
|
|
3790
|
+
}
|
|
3791
|
+
const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
|
|
3792
|
+
for (let i = startIndex; i < messages.length; i++) {
|
|
3793
|
+
const msg = messages[i];
|
|
3794
|
+
if (!msg || !Array.isArray(msg.content)) continue;
|
|
3795
|
+
for (const block of msg.content) {
|
|
3796
|
+
if (block?.type === 'tool_result' && block?.content) {
|
|
3797
|
+
const content = typeof block.content === 'string'
|
|
3798
|
+
? block.content
|
|
3799
|
+
: JSON.stringify(block.content);
|
|
3800
|
+
if (content && !content.includes('Found 0')) {
|
|
3801
|
+
toolResultsSummary += content + "\n";
|
|
3802
|
+
}
|
|
3313
3803
|
}
|
|
3314
3804
|
}
|
|
3315
3805
|
}
|
|
3316
|
-
}
|
|
3317
3806
|
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3323
|
-
|
|
3807
|
+
let responseText = `Based on the tool results, here's what I found:\n\n`;
|
|
3808
|
+
if (toolResultsSummary.trim()) {
|
|
3809
|
+
responseText += toolResultsSummary.trim();
|
|
3810
|
+
} else {
|
|
3811
|
+
responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
|
|
3812
|
+
}
|
|
3813
|
+
|
|
3814
|
+
const forcedResponse = {
|
|
3815
|
+
id: `msg_forced_${Date.now()}`,
|
|
3816
|
+
type: "message",
|
|
3817
|
+
role: "assistant",
|
|
3818
|
+
content: [{ type: "text", text: responseText }],
|
|
3819
|
+
model: requestedModel || "unknown",
|
|
3820
|
+
stop_reason: "end_turn",
|
|
3821
|
+
stop_sequence: null,
|
|
3822
|
+
usage: { input_tokens: 0, output_tokens: 100 },
|
|
3823
|
+
};
|
|
3824
|
+
|
|
3825
|
+
return {
|
|
3826
|
+
status: 200,
|
|
3827
|
+
body: forcedResponse,
|
|
3828
|
+
terminationReason: "tool_loop_guard",
|
|
3829
|
+
};
|
|
3324
3830
|
}
|
|
3831
|
+
} else {
|
|
3832
|
+
// Server mode: use existing threshold 2 with countToolCallsInHistory
|
|
3833
|
+
const effectiveThreshold = toolLoopThreshold;
|
|
3834
|
+
|
|
3835
|
+
if (toolResultCount >= effectiveThreshold) {
|
|
3836
|
+
logger.error({
|
|
3837
|
+
toolResultCount,
|
|
3838
|
+
toolUseCount,
|
|
3839
|
+
threshold: effectiveThreshold,
|
|
3840
|
+
sessionId: session?.id ?? null,
|
|
3841
|
+
}, "[ToolLoopGuard] FORCE TERMINATING - too many tool calls in conversation");
|
|
3842
|
+
|
|
3843
|
+
let toolResultsSummary = "";
|
|
3844
|
+
const messages = payload?.messages || [];
|
|
3845
|
+
let lastUserTextIndex = -1;
|
|
3846
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
3847
|
+
const msg = messages[i];
|
|
3848
|
+
if (msg?.role !== 'user') continue;
|
|
3849
|
+
if (typeof msg.content === 'string' && msg.content.trim().length > 0) {
|
|
3850
|
+
lastUserTextIndex = i;
|
|
3851
|
+
break;
|
|
3852
|
+
}
|
|
3853
|
+
if (Array.isArray(msg.content)) {
|
|
3854
|
+
const hasText = msg.content.some(block =>
|
|
3855
|
+
(block?.type === 'text' && block?.text?.trim?.().length > 0) ||
|
|
3856
|
+
(block?.type === 'input_text' && block?.input_text?.trim?.().length > 0)
|
|
3857
|
+
);
|
|
3858
|
+
if (hasText) {
|
|
3859
|
+
lastUserTextIndex = i;
|
|
3860
|
+
break;
|
|
3861
|
+
}
|
|
3862
|
+
}
|
|
3863
|
+
}
|
|
3864
|
+
const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0;
|
|
3865
|
+
for (let i = startIndex; i < messages.length; i++) {
|
|
3866
|
+
const msg = messages[i];
|
|
3867
|
+
if (!msg || !Array.isArray(msg.content)) continue;
|
|
3868
|
+
for (const block of msg.content) {
|
|
3869
|
+
if (block?.type === 'tool_result' && block?.content) {
|
|
3870
|
+
const content = typeof block.content === 'string'
|
|
3871
|
+
? block.content
|
|
3872
|
+
: JSON.stringify(block.content);
|
|
3873
|
+
if (content && !content.includes('Found 0')) {
|
|
3874
|
+
toolResultsSummary += content + "\n";
|
|
3875
|
+
}
|
|
3876
|
+
}
|
|
3877
|
+
}
|
|
3878
|
+
}
|
|
3325
3879
|
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
{
|
|
3333
|
-
type: "text",
|
|
3334
|
-
text: responseText,
|
|
3335
|
-
},
|
|
3336
|
-
],
|
|
3337
|
-
model: requestedModel || "unknown",
|
|
3338
|
-
stop_reason: "end_turn",
|
|
3339
|
-
stop_sequence: null,
|
|
3340
|
-
usage: {
|
|
3341
|
-
input_tokens: 0,
|
|
3342
|
-
output_tokens: 100,
|
|
3343
|
-
},
|
|
3344
|
-
};
|
|
3880
|
+
let responseText = `Based on the tool results, here's what I found:\n\n`;
|
|
3881
|
+
if (toolResultsSummary.trim()) {
|
|
3882
|
+
responseText += toolResultsSummary.trim();
|
|
3883
|
+
} else {
|
|
3884
|
+
responseText += `The tools executed but didn't return clear results. Please check the tool output above or try a different command.`;
|
|
3885
|
+
}
|
|
3345
3886
|
|
|
3346
|
-
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3350
|
-
|
|
3887
|
+
const forcedResponse = {
|
|
3888
|
+
id: `msg_forced_${Date.now()}`,
|
|
3889
|
+
type: "message",
|
|
3890
|
+
role: "assistant",
|
|
3891
|
+
content: [{ type: "text", text: responseText }],
|
|
3892
|
+
model: requestedModel || "unknown",
|
|
3893
|
+
stop_reason: "end_turn",
|
|
3894
|
+
stop_sequence: null,
|
|
3895
|
+
usage: { input_tokens: 0, output_tokens: 100 },
|
|
3896
|
+
};
|
|
3897
|
+
|
|
3898
|
+
return {
|
|
3899
|
+
status: 200,
|
|
3900
|
+
body: forcedResponse,
|
|
3901
|
+
terminationReason: "tool_loop_guard",
|
|
3902
|
+
};
|
|
3903
|
+
}
|
|
3351
3904
|
}
|
|
3352
3905
|
|
|
3353
3906
|
const cleanPayload = sanitizePayload(payload);
|