@juspay/neurolink 9.10.1 → 9.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/dist/agent/directTools.d.ts +3 -3
  3. package/dist/cli/commands/config.d.ts +9 -9
  4. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  5. package/dist/constants/contextWindows.d.ts +6 -3
  6. package/dist/constants/contextWindows.js +30 -3
  7. package/dist/constants/index.d.ts +3 -3
  8. package/dist/constants/retry.d.ts +4 -4
  9. package/dist/constants/retry.js +1 -1
  10. package/dist/context/contextCompactor.d.ts +1 -1
  11. package/dist/context/contextCompactor.js +59 -1
  12. package/dist/context/summarizationEngine.d.ts +2 -2
  13. package/dist/context/summarizationEngine.js +44 -18
  14. package/dist/context/toolOutputLimits.d.ts +22 -13
  15. package/dist/context/toolOutputLimits.js +58 -64
  16. package/dist/core/baseProvider.d.ts +11 -2
  17. package/dist/core/baseProvider.js +16 -1
  18. package/dist/core/conversationMemoryManager.d.ts +13 -1
  19. package/dist/core/conversationMemoryManager.js +36 -5
  20. package/dist/core/modules/GenerationHandler.d.ts +6 -0
  21. package/dist/core/modules/GenerationHandler.js +192 -7
  22. package/dist/core/modules/MessageBuilder.js +42 -4
  23. package/dist/core/modules/TelemetryHandler.js +4 -1
  24. package/dist/core/redisConversationMemoryManager.d.ts +19 -3
  25. package/dist/core/redisConversationMemoryManager.js +253 -58
  26. package/dist/index.d.ts +2 -0
  27. package/dist/index.js +3 -0
  28. package/dist/lib/agent/directTools.d.ts +7 -7
  29. package/dist/lib/constants/contextWindows.d.ts +6 -3
  30. package/dist/lib/constants/contextWindows.js +30 -3
  31. package/dist/lib/constants/index.d.ts +3 -3
  32. package/dist/lib/constants/retry.d.ts +4 -4
  33. package/dist/lib/constants/retry.js +1 -1
  34. package/dist/lib/context/contextCompactor.d.ts +1 -1
  35. package/dist/lib/context/contextCompactor.js +59 -1
  36. package/dist/lib/context/summarizationEngine.d.ts +2 -2
  37. package/dist/lib/context/summarizationEngine.js +44 -18
  38. package/dist/lib/context/toolOutputLimits.d.ts +22 -13
  39. package/dist/lib/context/toolOutputLimits.js +58 -64
  40. package/dist/lib/core/baseProvider.d.ts +11 -2
  41. package/dist/lib/core/baseProvider.js +16 -1
  42. package/dist/lib/core/conversationMemoryManager.d.ts +13 -1
  43. package/dist/lib/core/conversationMemoryManager.js +36 -5
  44. package/dist/lib/core/modules/GenerationHandler.d.ts +6 -0
  45. package/dist/lib/core/modules/GenerationHandler.js +192 -7
  46. package/dist/lib/core/modules/MessageBuilder.js +42 -4
  47. package/dist/lib/core/modules/TelemetryHandler.js +4 -1
  48. package/dist/lib/core/redisConversationMemoryManager.d.ts +19 -3
  49. package/dist/lib/core/redisConversationMemoryManager.js +253 -58
  50. package/dist/lib/files/fileTools.d.ts +3 -3
  51. package/dist/lib/index.d.ts +2 -0
  52. package/dist/lib/index.js +3 -0
  53. package/dist/lib/mcp/externalServerManager.js +46 -10
  54. package/dist/lib/memory/memoryRetrievalTools.d.ts +166 -0
  55. package/dist/lib/memory/memoryRetrievalTools.js +145 -0
  56. package/dist/lib/neurolink.d.ts +35 -1
  57. package/dist/lib/neurolink.js +476 -16
  58. package/dist/lib/providers/amazonBedrock.d.ts +1 -1
  59. package/dist/lib/providers/amazonBedrock.js +78 -45
  60. package/dist/lib/providers/amazonSagemaker.d.ts +1 -1
  61. package/dist/lib/providers/amazonSagemaker.js +1 -1
  62. package/dist/lib/providers/anthropic.d.ts +1 -1
  63. package/dist/lib/providers/anthropic.js +7 -7
  64. package/dist/lib/providers/anthropicBaseProvider.d.ts +1 -1
  65. package/dist/lib/providers/anthropicBaseProvider.js +7 -6
  66. package/dist/lib/providers/azureOpenai.d.ts +1 -1
  67. package/dist/lib/providers/azureOpenai.js +1 -1
  68. package/dist/lib/providers/googleAiStudio.d.ts +1 -1
  69. package/dist/lib/providers/googleAiStudio.js +5 -5
  70. package/dist/lib/providers/googleVertex.d.ts +1 -1
  71. package/dist/lib/providers/googleVertex.js +74 -17
  72. package/dist/lib/providers/huggingFace.d.ts +1 -1
  73. package/dist/lib/providers/huggingFace.js +1 -1
  74. package/dist/lib/providers/litellm.d.ts +1 -1
  75. package/dist/lib/providers/litellm.js +18 -16
  76. package/dist/lib/providers/mistral.d.ts +1 -1
  77. package/dist/lib/providers/mistral.js +1 -1
  78. package/dist/lib/providers/ollama.d.ts +1 -1
  79. package/dist/lib/providers/ollama.js +8 -7
  80. package/dist/lib/providers/openAI.d.ts +1 -1
  81. package/dist/lib/providers/openAI.js +6 -6
  82. package/dist/lib/providers/openRouter.d.ts +1 -1
  83. package/dist/lib/providers/openRouter.js +6 -2
  84. package/dist/lib/providers/openaiCompatible.d.ts +1 -1
  85. package/dist/lib/providers/openaiCompatible.js +1 -1
  86. package/dist/lib/proxy/proxyFetch.js +291 -65
  87. package/dist/lib/server/utils/validation.d.ts +4 -4
  88. package/dist/lib/services/server/ai/observability/instrumentation.js +12 -3
  89. package/dist/lib/telemetry/telemetryService.d.ts +2 -1
  90. package/dist/lib/telemetry/telemetryService.js +8 -1
  91. package/dist/lib/types/contextTypes.d.ts +26 -2
  92. package/dist/lib/types/conversation.d.ts +72 -40
  93. package/dist/lib/types/conversationMemoryInterface.d.ts +5 -1
  94. package/dist/lib/types/generateTypes.d.ts +26 -0
  95. package/dist/lib/types/modelTypes.d.ts +2 -2
  96. package/dist/lib/types/multimodal.d.ts +2 -0
  97. package/dist/lib/types/observability.d.ts +10 -0
  98. package/dist/lib/types/sdkTypes.d.ts +1 -1
  99. package/dist/lib/utils/conversationMemory.d.ts +4 -3
  100. package/dist/lib/utils/conversationMemory.js +44 -6
  101. package/dist/lib/utils/errorHandling.d.ts +5 -0
  102. package/dist/lib/utils/errorHandling.js +7 -2
  103. package/dist/lib/utils/logger.d.ts +8 -0
  104. package/dist/lib/utils/logger.js +56 -1
  105. package/dist/lib/utils/messageBuilder.js +74 -4
  106. package/dist/lib/utils/redis.js +6 -1
  107. package/dist/lib/utils/tokenEstimation.d.ts +2 -2
  108. package/dist/lib/utils/tokenEstimation.js +16 -1
  109. package/dist/lib/workflow/config.d.ts +110 -110
  110. package/dist/mcp/externalServerManager.js +46 -10
  111. package/dist/memory/memoryRetrievalTools.d.ts +166 -0
  112. package/dist/memory/memoryRetrievalTools.js +144 -0
  113. package/dist/neurolink.d.ts +35 -1
  114. package/dist/neurolink.js +476 -16
  115. package/dist/providers/amazonBedrock.d.ts +1 -1
  116. package/dist/providers/amazonBedrock.js +78 -45
  117. package/dist/providers/amazonSagemaker.d.ts +1 -1
  118. package/dist/providers/amazonSagemaker.js +1 -1
  119. package/dist/providers/anthropic.d.ts +1 -1
  120. package/dist/providers/anthropic.js +7 -7
  121. package/dist/providers/anthropicBaseProvider.d.ts +1 -1
  122. package/dist/providers/anthropicBaseProvider.js +7 -6
  123. package/dist/providers/azureOpenai.d.ts +1 -1
  124. package/dist/providers/azureOpenai.js +1 -1
  125. package/dist/providers/googleAiStudio.d.ts +1 -1
  126. package/dist/providers/googleAiStudio.js +5 -5
  127. package/dist/providers/googleVertex.d.ts +1 -1
  128. package/dist/providers/googleVertex.js +74 -17
  129. package/dist/providers/huggingFace.d.ts +1 -1
  130. package/dist/providers/huggingFace.js +1 -1
  131. package/dist/providers/litellm.d.ts +1 -1
  132. package/dist/providers/litellm.js +18 -16
  133. package/dist/providers/mistral.d.ts +1 -1
  134. package/dist/providers/mistral.js +1 -1
  135. package/dist/providers/ollama.d.ts +1 -1
  136. package/dist/providers/ollama.js +8 -7
  137. package/dist/providers/openAI.d.ts +1 -1
  138. package/dist/providers/openAI.js +6 -6
  139. package/dist/providers/openRouter.d.ts +1 -1
  140. package/dist/providers/openRouter.js +6 -2
  141. package/dist/providers/openaiCompatible.d.ts +1 -1
  142. package/dist/providers/openaiCompatible.js +1 -1
  143. package/dist/proxy/proxyFetch.js +291 -65
  144. package/dist/services/server/ai/observability/instrumentation.js +12 -3
  145. package/dist/telemetry/telemetryService.d.ts +2 -1
  146. package/dist/telemetry/telemetryService.js +8 -1
  147. package/dist/types/contextTypes.d.ts +26 -2
  148. package/dist/types/conversation.d.ts +72 -40
  149. package/dist/types/conversationMemoryInterface.d.ts +5 -1
  150. package/dist/types/generateTypes.d.ts +26 -0
  151. package/dist/types/modelTypes.d.ts +10 -10
  152. package/dist/types/multimodal.d.ts +2 -0
  153. package/dist/types/observability.d.ts +10 -0
  154. package/dist/types/sdkTypes.d.ts +1 -1
  155. package/dist/utils/conversationMemory.d.ts +4 -3
  156. package/dist/utils/conversationMemory.js +44 -6
  157. package/dist/utils/errorHandling.d.ts +5 -0
  158. package/dist/utils/errorHandling.js +7 -2
  159. package/dist/utils/logger.d.ts +8 -0
  160. package/dist/utils/logger.js +56 -1
  161. package/dist/utils/messageBuilder.js +74 -4
  162. package/dist/utils/redis.js +6 -1
  163. package/dist/utils/tokenEstimation.d.ts +2 -2
  164. package/dist/utils/tokenEstimation.js +16 -1
  165. package/dist/workflow/config.d.ts +12 -12
  166. package/package.json +1 -1
@@ -10,6 +10,8 @@
10
10
  * - Google: https://ai.google.dev/gemini-api/docs/models
11
11
  * - Others: Provider documentation as of Feb 2026
12
12
  */
13
+ import { DynamicModelProvider } from "../core/dynamicModels.js";
14
+ import { logger } from "../utils/logger.js";
13
15
  /** Default context window when provider/model is unknown */
14
16
  export const DEFAULT_CONTEXT_WINDOW = 128_000;
15
17
  /** Maximum output reserve when maxTokens not specified */
@@ -67,6 +69,10 @@ export const MODEL_CONTEXT_WINDOWS = {
67
69
  "gemini-2.0-flash": 1_048_576,
68
70
  "gemini-1.5-pro": 2_097_152,
69
71
  "gemini-1.5-flash": 1_048_576,
72
+ "claude-sonnet-4-5": 200_000,
73
+ "claude-sonnet-4-20250514": 200_000,
74
+ "claude-opus-4": 200_000,
75
+ "claude-opus-4-20250514": 200_000,
70
76
  },
71
77
  bedrock: {
72
78
  _default: 200_000,
@@ -109,11 +115,32 @@ export const MODEL_CONTEXT_WINDOWS = {
109
115
  * Resolve context window size for a provider/model combination.
110
116
  *
111
117
  * Priority:
112
- * 1. Exact model match under provider
113
- * 2. Provider's _default
114
- * 3. Global DEFAULT_CONTEXT_WINDOW
118
+ * 0. Dynamic model registry (DynamicModelProvider) — resolves cross-provider
119
+ * models (e.g. Claude on Vertex) that the static table cannot handle
120
+ * 1. Exact model match under provider in static registry
121
+ * 2. Prefix match under provider in static registry
122
+ * 3. Provider's _default in static registry
123
+ * 4. Global DEFAULT_CONTEXT_WINDOW
115
124
  */
116
125
  export function getContextWindowSize(provider, model) {
126
+ // Step 0: Check dynamic model registry first.
127
+ // This resolves cases where the runtime provider differs from the model's
128
+ // origin (e.g. Claude running via Vertex would hit Vertex's Gemini default
129
+ // in the static table). The dynamic registry knows the actual model metadata.
130
+ if (model) {
131
+ try {
132
+ const dynamicProvider = DynamicModelProvider.getInstance();
133
+ const modelConfig = dynamicProvider.resolveModel(provider, model);
134
+ if (modelConfig?.contextWindow) {
135
+ logger.debug(`[ContextWindow] Resolved via dynamic registry: provider=${provider}, model=${model}, contextWindow=${modelConfig.contextWindow}`);
136
+ return modelConfig.contextWindow;
137
+ }
138
+ }
139
+ catch {
140
+ // Dynamic registry not initialized yet — fall through to static lookup
141
+ }
142
+ }
143
+ // Static fallback chain
117
144
  const providerWindows = MODEL_CONTEXT_WINDOWS[provider];
118
145
  if (!providerWindows) {
119
146
  return DEFAULT_CONTEXT_WINDOW;
@@ -58,7 +58,7 @@ export declare const PROVIDER_OPERATION_CONFIGS: {
58
58
  };
59
59
  readonly OLLAMA: {
60
60
  readonly timeout: 10000;
61
- readonly maxRetries: 2;
61
+ readonly maxRetries: 1;
62
62
  readonly retryDelay: 200;
63
63
  };
64
64
  };
@@ -83,7 +83,7 @@ export declare const MCP_OPERATION_CONFIGS: {
83
83
  };
84
84
  readonly HEALTH_CHECK: {
85
85
  readonly timeout: 5000;
86
- readonly maxRetries: 2;
86
+ readonly maxRetries: 1;
87
87
  readonly retryDelay: 200;
88
88
  };
89
89
  };
@@ -140,7 +140,7 @@ export declare function getProviderRetryConfig(provider: string): {
140
140
  readonly maxDelay: 30000;
141
141
  readonly multiplier: 1.5;
142
142
  } | {
143
- readonly maxAttempts: 2;
143
+ readonly maxAttempts: 1;
144
144
  readonly baseDelay: 200;
145
145
  readonly maxDelay: 5000;
146
146
  readonly multiplier: 1.5;
@@ -18,7 +18,7 @@ export declare const RETRY_ATTEMPTS: {
18
18
  /** Critical operations that must succeed */
19
19
  readonly CRITICAL: 5;
20
20
  /** Quick operations that should fail fast */
21
- readonly QUICK: 2;
21
+ readonly QUICK: 1;
22
22
  /** Network operations prone to transient failures */
23
23
  readonly NETWORK: 4;
24
24
  /** Authentication operations */
@@ -122,7 +122,7 @@ export declare const PROVIDER_RETRY: {
122
122
  };
123
123
  /** Ollama retry configuration (local service) */
124
124
  readonly OLLAMA: {
125
- readonly maxAttempts: 2;
125
+ readonly maxAttempts: 1;
126
126
  readonly baseDelay: 200;
127
127
  readonly maxDelay: 5000;
128
128
  readonly multiplier: 1.5;
@@ -141,7 +141,7 @@ export declare const OPERATION_RETRY: {
141
141
  };
142
142
  /** MCP operation retry config */
143
143
  readonly MCP_OPERATION: {
144
- readonly maxAttempts: 2;
144
+ readonly maxAttempts: 1;
145
145
  readonly baseDelay: 200;
146
146
  readonly circuitBreaker: false;
147
147
  };
@@ -210,7 +210,7 @@ export declare const RetryUtils: {
210
210
  readonly maxDelay: 30000;
211
211
  readonly multiplier: 1.5;
212
212
  } | {
213
- readonly maxAttempts: 2;
213
+ readonly maxAttempts: 1;
214
214
  readonly baseDelay: 200;
215
215
  readonly maxDelay: 5000;
216
216
  readonly multiplier: 1.5;
@@ -18,7 +18,7 @@ export const RETRY_ATTEMPTS = {
18
18
  /** Critical operations that must succeed */
19
19
  CRITICAL: 5, // 5 attempts - High-importance operations
20
20
  /** Quick operations that should fail fast */
21
- QUICK: 2, // 2 attempts - Fast operations, minimal retry
21
+ QUICK: 1, // 1 attempt, no retries fail fast
22
22
  /** Network operations prone to transient failures */
23
23
  NETWORK: 4, // 4 attempts - Network operations
24
24
  /** Authentication operations */
@@ -18,5 +18,5 @@ export declare class ContextCompactor {
18
18
  /**
19
19
  * Run the multi-stage compaction pipeline until messages fit within budget.
20
20
  */
21
- compact(messages: ChatMessage[], targetTokens: number, memoryConfig?: Partial<ConversationMemoryConfig>): Promise<CompactionResult>;
21
+ compact(messages: ChatMessage[], targetTokens: number, memoryConfig?: Partial<ConversationMemoryConfig>, requestId?: string): Promise<CompactionResult>;
22
22
  }
@@ -9,6 +9,7 @@
9
9
  * Stage 4: Sliding Window Truncation (fallback -- no LLM call)
10
10
  */
11
11
  import { estimateMessagesTokens } from "../utils/tokenEstimation.js";
12
+ import { logger } from "../utils/logger.js";
12
13
  import { pruneToolOutputs } from "./stages/toolOutputPruner.js";
13
14
  import { deduplicateFileReads } from "./stages/fileReadDeduplicator.js";
14
15
  import { summarizeMessages } from "./stages/structuredSummarizer.js";
@@ -35,14 +36,21 @@ export class ContextCompactor {
35
36
  /**
36
37
  * Run the multi-stage compaction pipeline until messages fit within budget.
37
38
  */
38
- async compact(messages, targetTokens, memoryConfig) {
39
+ async compact(messages, targetTokens, memoryConfig, requestId) {
40
+ const compactionStartTime = Date.now();
39
41
  const provider = this.config.provider || undefined;
40
42
  const tokensBefore = estimateMessagesTokens(messages, provider);
41
43
  const stagesUsed = [];
42
44
  let currentMessages = [...messages];
45
+ logger.info("[Compaction] Starting", {
46
+ requestId,
47
+ estimatedTokens: tokensBefore,
48
+ budgetTokens: targetTokens,
49
+ });
43
50
  // Stage 1: Tool Output Pruning
44
51
  if (this.config.enablePrune &&
45
52
  estimateMessagesTokens(currentMessages, provider) > targetTokens) {
53
+ const stageTokensBefore = estimateMessagesTokens(currentMessages, provider);
46
54
  const pruneResult = pruneToolOutputs(currentMessages, {
47
55
  protectTokens: this.config.pruneProtectTokens,
48
56
  minimumSavings: this.config.pruneMinimumSavings,
@@ -53,19 +61,37 @@ export class ContextCompactor {
53
61
  currentMessages = pruneResult.messages;
54
62
  stagesUsed.push("prune");
55
63
  }
64
+ const stageTokensAfter = estimateMessagesTokens(currentMessages, provider);
65
+ logger.info("[Compaction] Stage 1 (prune)", {
66
+ requestId,
67
+ ran: pruneResult.pruned,
68
+ tokensBefore: stageTokensBefore,
69
+ tokensAfter: stageTokensAfter,
70
+ saved: stageTokensBefore - stageTokensAfter,
71
+ });
56
72
  }
57
73
  // Stage 2: File Read Deduplication
58
74
  if (this.config.enableDeduplicate &&
59
75
  estimateMessagesTokens(currentMessages, provider) > targetTokens) {
76
+ const stageTokensBefore = estimateMessagesTokens(currentMessages, provider);
60
77
  const dedupResult = deduplicateFileReads(currentMessages);
61
78
  if (dedupResult.deduplicated) {
62
79
  currentMessages = dedupResult.messages;
63
80
  stagesUsed.push("deduplicate");
64
81
  }
82
+ const stageTokensAfter = estimateMessagesTokens(currentMessages, provider);
83
+ logger.info("[Compaction] Stage 2 (deduplicate)", {
84
+ requestId,
85
+ ran: dedupResult.deduplicated,
86
+ tokensBefore: stageTokensBefore,
87
+ tokensAfter: stageTokensAfter,
88
+ saved: stageTokensBefore - stageTokensAfter,
89
+ });
65
90
  }
66
91
  // Stage 3: LLM Summarization
67
92
  if (this.config.enableSummarize &&
68
93
  estimateMessagesTokens(currentMessages, provider) > targetTokens) {
94
+ const stageTokensBefore = estimateMessagesTokens(currentMessages, provider);
69
95
  try {
70
96
  const summarizeResult = await summarizeMessages(currentMessages, {
71
97
  provider: this.config.summarizationProvider,
@@ -77,14 +103,30 @@ export class ContextCompactor {
77
103
  currentMessages = summarizeResult.messages;
78
104
  stagesUsed.push("summarize");
79
105
  }
106
+ const stageTokensAfter = estimateMessagesTokens(currentMessages, provider);
107
+ logger.info("[Compaction] Stage 3 (summarize)", {
108
+ requestId,
109
+ ran: summarizeResult.summarized,
110
+ tokensBefore: stageTokensBefore,
111
+ tokensAfter: stageTokensAfter,
112
+ saved: stageTokensBefore - stageTokensAfter,
113
+ });
80
114
  }
81
115
  catch {
116
+ logger.info("[Compaction] Stage 3 (summarize)", {
117
+ requestId,
118
+ ran: false,
119
+ tokensBefore: stageTokensBefore,
120
+ tokensAfter: stageTokensBefore,
121
+ saved: 0,
122
+ });
82
123
  // Summarization failed, fall through to truncation
83
124
  }
84
125
  }
85
126
  // Stage 4: Sliding Window Truncation (fallback)
86
127
  if (this.config.enableTruncate &&
87
128
  estimateMessagesTokens(currentMessages, provider) > targetTokens) {
129
+ const stageTokensBefore = estimateMessagesTokens(currentMessages, provider);
88
130
  const truncResult = truncateWithSlidingWindow(currentMessages, {
89
131
  fraction: this.config.truncationFraction,
90
132
  });
@@ -92,8 +134,24 @@ export class ContextCompactor {
92
134
  currentMessages = truncResult.messages;
93
135
  stagesUsed.push("truncate");
94
136
  }
137
+ const stageTokensAfter = estimateMessagesTokens(currentMessages, provider);
138
+ logger.info("[Compaction] Stage 4 (truncate)", {
139
+ requestId,
140
+ ran: truncResult.truncated,
141
+ tokensBefore: stageTokensBefore,
142
+ tokensAfter: stageTokensAfter,
143
+ saved: stageTokensBefore - stageTokensAfter,
144
+ });
95
145
  }
96
146
  const tokensAfter = estimateMessagesTokens(currentMessages, provider);
147
+ logger.info("[Compaction] Complete", {
148
+ requestId,
149
+ tokensBefore,
150
+ tokensAfter,
151
+ totalSaved: tokensBefore - tokensAfter,
152
+ stagesUsed,
153
+ durationMs: Date.now() - compactionStartTime,
154
+ });
97
155
  return {
98
156
  compacted: stagesUsed.length > 0,
99
157
  stagesUsed,
@@ -18,7 +18,7 @@ export declare class SummarizationEngine {
18
18
  * @param logPrefix - Prefix for log messages
19
19
  * @returns True if summarization was performed
20
20
  */
21
- checkAndSummarize(session: SessionMemory, threshold: number, config: Partial<ConversationMemoryConfig>, logPrefix?: string): Promise<boolean>;
21
+ checkAndSummarize(session: SessionMemory, threshold: number, config: Partial<ConversationMemoryConfig>, logPrefix?: string, requestId?: string): Promise<boolean>;
22
22
  /**
23
23
  * Perform token-based summarization on a session.
24
24
  * Uses pointer-based, non-destructive approach.
@@ -27,7 +27,7 @@ export declare class SummarizationEngine {
27
27
  * @param config - Conversation memory configuration (partial allowed)
28
28
  * @param logPrefix - Prefix for log messages
29
29
  */
30
- summarizeSession(session: SessionMemory, threshold: number, config: Partial<ConversationMemoryConfig>, logPrefix?: string): Promise<void>;
30
+ summarizeSession(session: SessionMemory, threshold: number, config: Partial<ConversationMemoryConfig>, logPrefix?: string, requestId?: string): Promise<void>;
31
31
  /**
32
32
  * Estimate total tokens for a message array.
33
33
  * @param messages - Array of chat messages
@@ -21,19 +21,20 @@ export class SummarizationEngine {
21
21
  * @param logPrefix - Prefix for log messages
22
22
  * @returns True if summarization was performed
23
23
  */
24
- async checkAndSummarize(session, threshold, config, logPrefix = "[SummarizationEngine]") {
25
- const contextMessages = buildContextFromPointer(session);
24
+ async checkAndSummarize(session, threshold, config, logPrefix = "[SummarizationEngine]", requestId) {
25
+ const contextMessages = buildContextFromPointer(session, requestId);
26
26
  const tokenCount = this.estimateTokens(contextMessages);
27
27
  session.lastTokenCount = tokenCount;
28
28
  session.lastCountedAt = Date.now();
29
- logger.debug(`${logPrefix} Token count check`, {
29
+ logger.info("[Summarization] Check", {
30
+ requestId,
30
31
  sessionId: session.sessionId,
31
32
  tokenCount,
32
33
  threshold,
33
- needsSummarization: tokenCount >= threshold,
34
+ willSummarize: tokenCount >= threshold,
34
35
  });
35
36
  if (tokenCount >= threshold) {
36
- await this.summarizeSession(session, threshold, config, logPrefix);
37
+ await this.summarizeSession(session, threshold, config, logPrefix, requestId);
37
38
  return true;
38
39
  }
39
40
  return false;
@@ -46,7 +47,8 @@ export class SummarizationEngine {
46
47
  * @param config - Conversation memory configuration (partial allowed)
47
48
  * @param logPrefix - Prefix for log messages
48
49
  */
49
- async summarizeSession(session, threshold, config, logPrefix = "[SummarizationEngine]") {
50
+ async summarizeSession(session, threshold, config, logPrefix = "[SummarizationEngine]", requestId) {
51
+ const startTime = Date.now();
50
52
  const startIndex = session.summarizedUpToMessageId
51
53
  ? session.messages.findIndex((m) => m.id === session.summarizedUpToMessageId) + 1
52
54
  : 0;
@@ -60,21 +62,45 @@ export class SummarizationEngine {
60
62
  if (messagesToSummarize.length === 0) {
61
63
  return;
62
64
  }
63
- const summary = await generateSummary(messagesToSummarize, config, logPrefix, session.summarizedMessage);
64
- if (!summary) {
65
- logger.warn(`${logPrefix} Summary generation failed`, {
65
+ const recentToKeep = recentMessages.length - messagesToSummarize.length;
66
+ logger.info("[Summarization] Starting", {
67
+ requestId,
68
+ sessionId: session.sessionId,
69
+ messagesToSummarize: messagesToSummarize.length,
70
+ recentToKeep,
71
+ hasPreviousSummary: !!session.summarizedMessage,
72
+ });
73
+ try {
74
+ const summary = await generateSummary(messagesToSummarize, config, logPrefix, session.summarizedMessage, requestId);
75
+ if (!summary) {
76
+ logger.warn(`${logPrefix} Summary generation failed`, {
77
+ requestId,
78
+ sessionId: session.sessionId,
79
+ durationMs: Date.now() - startTime,
80
+ });
81
+ return;
82
+ }
83
+ const lastSummarized = messagesToSummarize[messagesToSummarize.length - 1];
84
+ session.summarizedUpToMessageId = lastSummarized.id;
85
+ session.summarizedMessage = summary;
86
+ logger.info("[Summarization] Complete", {
87
+ requestId,
66
88
  sessionId: session.sessionId,
89
+ summaryChars: summary.length,
90
+ newPointerId: lastSummarized.id,
91
+ durationMs: Date.now() - startTime,
67
92
  });
68
- return;
69
93
  }
70
- const lastSummarized = messagesToSummarize[messagesToSummarize.length - 1];
71
- session.summarizedUpToMessageId = lastSummarized.id;
72
- session.summarizedMessage = summary;
73
- logger.info(`${logPrefix} Summarization complete`, {
74
- sessionId: session.sessionId,
75
- summarizedCount: messagesToSummarize.length,
76
- totalMessages: session.messages.length,
77
- });
94
+ catch (err) {
95
+ const errorMessage = err instanceof Error ? err.message : String(err);
96
+ logger.error("[Summarization] Error", {
97
+ requestId,
98
+ sessionId: session.sessionId,
99
+ error: errorMessage,
100
+ durationMs: Date.now() - startTime,
101
+ });
102
+ throw err;
103
+ }
78
104
  }
79
105
  /**
80
106
  * Estimate total tokens for a message array.
@@ -1,17 +1,26 @@
1
1
  /**
2
- * Tool Output Size Limits
3
- *
4
- * Truncates tool outputs exceeding size limits.
5
- * Can save full output to disk with a pointer.
6
- * Modeled on OpenCode's approach.
2
+ * Tool output preview generation.
3
+ * Generates head/tail previews of large tool outputs for context-efficient LLM calls.
4
+ * @module
7
5
  */
8
- /** Maximum tool output size in bytes (50KB) */
9
- export declare const MAX_TOOL_OUTPUT_BYTES: number;
10
- /** Maximum tool output lines */
11
- export declare const MAX_TOOL_OUTPUT_LINES = 2000;
12
- import type { TruncateOptions, TruncateResult } from "../types/contextTypes.js";
13
- export type { TruncateOptions, TruncateResult } from "../types/contextTypes.js";
6
+ import type { ToolOutputPreviewOptions, ToolOutputPreviewResult } from "../types/contextTypes.js";
7
+ export type { ToolOutputPreviewOptions, ToolOutputPreviewResult, } from "../types/contextTypes.js";
8
+ /** Default maximum preview size in bytes (50KB) */
9
+ export declare const DEFAULT_MAX_PREVIEW_BYTES: number;
10
+ /** Default maximum preview lines */
11
+ export declare const DEFAULT_MAX_PREVIEW_LINES = 2000;
12
+ /** Default head ratio (25% of preview budget) */
13
+ export declare const DEFAULT_HEAD_RATIO = 0.25;
14
+ /** Tool name referenced in truncation notices for on-demand full-output access */
15
+ export declare const RETRIEVE_CONTEXT_TOOL_NAME = "retrieve_context";
16
+ /** Default tail ratio (75% of preview budget) */
17
+ export declare const DEFAULT_TAIL_RATIO = 0.75;
14
18
  /**
15
- * Truncate tool output if it exceeds size limits.
19
+ * Generate a head/tail preview of a tool output string.
20
+ * If the output is within limits, returns it unchanged with truncated: false.
21
+ * If over limits, keeps the first 25% and last 75% with an omission notice.
22
+ *
23
+ * Industry pattern: 25/75 head/tail split. Head captures schema/headers/structure,
24
+ * tail captures the most recent and typically most relevant data.
16
25
  */
17
- export declare function truncateToolOutput(output: string, options?: TruncateOptions): TruncateResult;
26
+ export declare function generateToolOutputPreview(output: string, options?: ToolOutputPreviewOptions): ToolOutputPreviewResult;
@@ -1,84 +1,78 @@
1
1
  /**
2
- * Tool Output Size Limits
3
- *
4
- * Truncates tool outputs exceeding size limits.
5
- * Can save full output to disk with a pointer.
6
- * Modeled on OpenCode's approach.
2
+ * Tool output preview generation.
3
+ * Generates head/tail previews of large tool outputs for context-efficient LLM calls.
4
+ * @module
7
5
  */
8
- import { writeFileSync, mkdirSync } from "fs";
9
- import { join } from "path";
10
- import { randomUUID } from "crypto";
11
- import { tmpdir } from "os";
12
- /** Maximum tool output size in bytes (50KB) */
13
- export const MAX_TOOL_OUTPUT_BYTES = 50 * 1024;
14
- /** Maximum tool output lines */
15
- export const MAX_TOOL_OUTPUT_LINES = 2_000;
6
+ /** Default maximum preview size in bytes (50KB) */
7
+ export const DEFAULT_MAX_PREVIEW_BYTES = 50 * 1024;
8
+ /** Default maximum preview lines */
9
+ export const DEFAULT_MAX_PREVIEW_LINES = 2_000;
10
+ /** Default head ratio (25% of preview budget) */
11
+ export const DEFAULT_HEAD_RATIO = 0.25;
12
+ /** Tool name referenced in truncation notices for on-demand full-output access */
13
+ export const RETRIEVE_CONTEXT_TOOL_NAME = "retrieve_context";
14
+ /** Default tail ratio (75% of preview budget) */
15
+ export const DEFAULT_TAIL_RATIO = 0.75;
16
16
  /**
17
- * Truncate tool output if it exceeds size limits.
17
+ * Generate a head/tail preview of a tool output string.
18
+ * If the output is within limits, returns it unchanged with truncated: false.
19
+ * If over limits, keeps the first 25% and last 75% with an omission notice.
20
+ *
21
+ * Industry pattern: 25/75 head/tail split. Head captures schema/headers/structure,
22
+ * tail captures the most recent and typically most relevant data.
18
23
  */
19
- export function truncateToolOutput(output, options) {
20
- const maxBytes = options?.maxBytes ?? MAX_TOOL_OUTPUT_BYTES;
21
- const maxLines = options?.maxLines ?? MAX_TOOL_OUTPUT_LINES;
22
- const direction = options?.direction ?? "tail";
23
- const saveToDisk = options?.saveToDisk ?? false;
24
+ export function generateToolOutputPreview(output, options) {
25
+ const maxBytes = options?.maxBytes ?? DEFAULT_MAX_PREVIEW_BYTES;
26
+ const maxLines = options?.maxLines ?? DEFAULT_MAX_PREVIEW_LINES;
27
+ const rawHeadRatio = options?.headRatio ?? DEFAULT_HEAD_RATIO;
28
+ const rawTailRatio = options?.tailRatio ?? DEFAULT_TAIL_RATIO;
29
+ // Clamp ratios to valid range to avoid negative omittedBytes
30
+ const headRatio = Math.max(0, Math.min(1, rawHeadRatio));
31
+ const tailRatio = Math.max(0, Math.min(1, rawTailRatio));
24
32
  const originalSize = Buffer.byteLength(output, "utf-8");
25
- // Check byte limit
26
- const exceedsBytes = originalSize > maxBytes;
27
- // Check line limit
28
33
  const lines = output.split("\n");
34
+ const exceedsBytes = originalSize > maxBytes;
29
35
  const exceedsLines = lines.length > maxLines;
30
36
  if (!exceedsBytes && !exceedsLines) {
31
- return { content: output, truncated: false, originalSize };
37
+ return { preview: output, truncated: false, originalSize };
32
38
  }
33
- // Save to disk if requested
34
- let savedPath;
35
- if (saveToDisk) {
36
- try {
37
- const saveDir = options?.saveDir ?? join(tmpdir(), "neurolink-tool-output");
38
- mkdirSync(saveDir, { recursive: true });
39
- savedPath = join(saveDir, `tool-output-${randomUUID()}.txt`);
40
- writeFileSync(savedPath, output, "utf-8");
41
- }
42
- catch {
43
- // Silently fail disk save
44
- }
45
- }
46
- // Apply truncation
47
- let truncated;
39
+ // Line-based split
40
+ const headLineCount = Math.max(1, Math.floor(maxLines * headRatio));
41
+ const tailLineCount = Math.max(1, maxLines - headLineCount);
42
+ let head;
43
+ let tail;
48
44
  if (exceedsLines) {
49
- if (direction === "head") {
50
- truncated = lines.slice(0, maxLines).join("\n");
51
- }
52
- else {
53
- truncated = lines.slice(-maxLines).join("\n");
54
- }
45
+ head = lines.slice(0, headLineCount).join("\n");
46
+ tail = lines.slice(-tailLineCount).join("\n");
55
47
  }
56
48
  else {
57
- truncated = output;
58
- }
59
- // Apply byte limit
60
- if (Buffer.byteLength(truncated, "utf-8") > maxBytes) {
61
- if (direction === "head") {
62
- truncated = truncated.slice(0, maxBytes);
63
- }
64
- else {
65
- truncated = truncated.slice(-maxBytes);
66
- }
49
+ head = lines
50
+ .slice(0, Math.max(1, Math.floor(lines.length * headRatio)))
51
+ .join("\n");
52
+ tail = lines
53
+ .slice(-Math.max(1, Math.ceil(lines.length * tailRatio)))
54
+ .join("\n");
67
55
  }
68
- // Add truncation notice
69
- const notice = savedPath
70
- ? `\n\n[Output truncated from ${originalSize} bytes to ${Buffer.byteLength(truncated, "utf-8")} bytes. Full output saved to: ${savedPath}]`
71
- : `\n\n[Output truncated from ${originalSize} bytes to ${Buffer.byteLength(truncated, "utf-8")} bytes]`;
72
- if (direction === "head") {
73
- truncated = truncated + notice;
56
+ // Byte-based cap on each portion
57
+ const headMaxBytes = Math.floor(maxBytes * headRatio);
58
+ const tailMaxBytes = maxBytes - headMaxBytes;
59
+ if (Buffer.byteLength(head, "utf-8") > headMaxBytes) {
60
+ head = Buffer.from(head, "utf-8")
61
+ .subarray(0, headMaxBytes)
62
+ .toString("utf-8");
74
63
  }
75
- else {
76
- truncated = notice + "\n" + truncated;
64
+ if (Buffer.byteLength(tail, "utf-8") > tailMaxBytes) {
65
+ const tailBuf = Buffer.from(tail, "utf-8");
66
+ tail = tailBuf.subarray(tailBuf.length - tailMaxBytes).toString("utf-8");
77
67
  }
68
+ const omittedBytes = Math.max(0, originalSize -
69
+ Buffer.byteLength(head, "utf-8") -
70
+ Buffer.byteLength(tail, "utf-8"));
71
+ const notice = `\n\n[... ${omittedBytes} bytes omitted. ` +
72
+ `Use ${RETRIEVE_CONTEXT_TOOL_NAME} tool to access full output ...]\n\n`;
78
73
  return {
79
- content: truncated,
74
+ preview: head + notice + tail,
80
75
  truncated: true,
81
- savedPath,
82
76
  originalSize,
83
77
  };
84
78
  }
@@ -218,9 +218,18 @@ export declare abstract class BaseProvider implements AIProvider {
218
218
  */
219
219
  setSessionContext(sessionId?: string, userId?: string): void;
220
220
  /**
221
- * Provider-specific error handling
221
+ * Provider-specific error formatting.
222
+ * Subclasses implement this to produce human-readable error messages
223
+ * (e.g., "❌ Google Vertex AI Provider Error\n\n...").
222
224
  */
223
- protected abstract handleProviderError(error: unknown): Error;
225
+ protected abstract formatProviderError(error: unknown): Error;
226
+ /**
227
+ * Handle provider errors with abort passthrough.
228
+ * AbortErrors are never wrapped — they must propagate with their
229
+ * original identity so that isAbortError() can detect them in
230
+ * retry/fallback loops (directProviderGeneration, performMCPGenerationRetries).
231
+ */
232
+ protected handleProviderError(error: unknown): Error;
224
233
  /**
225
234
  * Image generation method. Providers that support it should override this.
226
235
  * By default, it throws an error indicating that the functionality is not supported.
@@ -174,7 +174,7 @@ export class BaseProvider {
174
174
  temperature: options.temperature,
175
175
  maxTokens: options.maxTokens,
176
176
  tools: options.tools, // 🔧 FIX: Pass user-provided tools (including RAG tools) to generation pipeline
177
- disableTools: false,
177
+ disableTools: !!options.disableTools,
178
178
  maxSteps: options.maxSteps || 5,
179
179
  provider: options.provider,
180
180
  model: options.model,
@@ -767,6 +767,21 @@ export class BaseProvider {
767
767
  this.userId = userId;
768
768
  this.toolsManager.setSessionContext(sessionId, userId);
769
769
  }
770
+ /**
771
+ * Handle provider errors with abort passthrough.
772
+ * AbortErrors are never wrapped — they must propagate with their
773
+ * original identity so that isAbortError() can detect them in
774
+ * retry/fallback loops (directProviderGeneration, performMCPGenerationRetries).
775
+ */
776
+ handleProviderError(error) {
777
+ if (isAbortError(error)) {
778
+ // Preserve AbortError identity — never wrap in provider-specific formatting
779
+ return error instanceof Error
780
+ ? error
781
+ : new DOMException("The operation was aborted", "AbortError");
782
+ }
783
+ return this.formatProviderError(error);
784
+ }
770
785
  /**
771
786
  * Image generation method. Providers that support it should override this.
772
787
  * By default, it throws an error indicating that the functionality is not supported.
@@ -50,7 +50,7 @@ export declare class ConversationMemoryManager implements IConversationMemoryMan
50
50
  * Returns messages from pointer onwards (or all if no pointer)
51
51
  * Now consistently async to match Redis implementation
52
52
  */
53
- buildContextMessages(sessionId: string): Promise<ChatMessage[]>;
53
+ buildContextMessages(sessionId: string, _userId?: string, _enableSummarization?: boolean, requestId?: string): Promise<ChatMessage[]>;
54
54
  getSession(sessionId: string, _userId?: string): SessionMemory | undefined;
55
55
  createSummarySystemMessage(content: string, summarizesFrom?: string, summarizesTo?: string): ChatMessage;
56
56
  private ensureInitialized;
@@ -59,4 +59,16 @@ export declare class ConversationMemoryManager implements IConversationMemoryMan
59
59
  getStats(): Promise<ConversationMemoryStats>;
60
60
  clearSession(sessionId: string): Promise<boolean>;
61
61
  clearAllSessions(): Promise<void>;
62
+ /**
63
+ * Get the raw messages array for a session.
64
+ * Returns the full messages list without context filtering or summarization.
65
+ * Returns a deep copy to prevent external mutation of internal state.
66
+ */
67
+ getSessionMessages(sessionId: string, _userId?: string): Promise<ChatMessage[]>;
68
+ /**
69
+ * Replace the entire messages array for a session.
70
+ * Creates the session if it does not exist.
71
+ * Resets summary pointers since old pointers may reference messages that no longer exist.
72
+ */
73
+ setSessionMessages(sessionId: string, messages: ChatMessage[], userId?: string): Promise<void>;
62
74
  }