@juspay/neurolink 9.10.1 → 9.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/dist/agent/directTools.d.ts +3 -3
  3. package/dist/cli/commands/config.d.ts +9 -9
  4. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  5. package/dist/constants/contextWindows.d.ts +6 -3
  6. package/dist/constants/contextWindows.js +30 -3
  7. package/dist/constants/index.d.ts +3 -3
  8. package/dist/constants/retry.d.ts +4 -4
  9. package/dist/constants/retry.js +1 -1
  10. package/dist/context/contextCompactor.d.ts +1 -1
  11. package/dist/context/contextCompactor.js +59 -1
  12. package/dist/context/summarizationEngine.d.ts +2 -2
  13. package/dist/context/summarizationEngine.js +44 -18
  14. package/dist/context/toolOutputLimits.d.ts +22 -13
  15. package/dist/context/toolOutputLimits.js +58 -64
  16. package/dist/core/baseProvider.d.ts +11 -2
  17. package/dist/core/baseProvider.js +16 -1
  18. package/dist/core/conversationMemoryManager.d.ts +13 -1
  19. package/dist/core/conversationMemoryManager.js +36 -5
  20. package/dist/core/modules/GenerationHandler.d.ts +6 -0
  21. package/dist/core/modules/GenerationHandler.js +192 -7
  22. package/dist/core/modules/MessageBuilder.js +42 -4
  23. package/dist/core/modules/TelemetryHandler.js +4 -1
  24. package/dist/core/redisConversationMemoryManager.d.ts +19 -3
  25. package/dist/core/redisConversationMemoryManager.js +253 -58
  26. package/dist/index.d.ts +2 -0
  27. package/dist/index.js +3 -0
  28. package/dist/lib/agent/directTools.d.ts +7 -7
  29. package/dist/lib/constants/contextWindows.d.ts +6 -3
  30. package/dist/lib/constants/contextWindows.js +30 -3
  31. package/dist/lib/constants/index.d.ts +3 -3
  32. package/dist/lib/constants/retry.d.ts +4 -4
  33. package/dist/lib/constants/retry.js +1 -1
  34. package/dist/lib/context/contextCompactor.d.ts +1 -1
  35. package/dist/lib/context/contextCompactor.js +59 -1
  36. package/dist/lib/context/summarizationEngine.d.ts +2 -2
  37. package/dist/lib/context/summarizationEngine.js +44 -18
  38. package/dist/lib/context/toolOutputLimits.d.ts +22 -13
  39. package/dist/lib/context/toolOutputLimits.js +58 -64
  40. package/dist/lib/core/baseProvider.d.ts +11 -2
  41. package/dist/lib/core/baseProvider.js +16 -1
  42. package/dist/lib/core/conversationMemoryManager.d.ts +13 -1
  43. package/dist/lib/core/conversationMemoryManager.js +36 -5
  44. package/dist/lib/core/modules/GenerationHandler.d.ts +6 -0
  45. package/dist/lib/core/modules/GenerationHandler.js +192 -7
  46. package/dist/lib/core/modules/MessageBuilder.js +42 -4
  47. package/dist/lib/core/modules/TelemetryHandler.js +4 -1
  48. package/dist/lib/core/redisConversationMemoryManager.d.ts +19 -3
  49. package/dist/lib/core/redisConversationMemoryManager.js +253 -58
  50. package/dist/lib/files/fileTools.d.ts +3 -3
  51. package/dist/lib/index.d.ts +2 -0
  52. package/dist/lib/index.js +3 -0
  53. package/dist/lib/mcp/externalServerManager.js +46 -10
  54. package/dist/lib/memory/memoryRetrievalTools.d.ts +166 -0
  55. package/dist/lib/memory/memoryRetrievalTools.js +145 -0
  56. package/dist/lib/neurolink.d.ts +35 -1
  57. package/dist/lib/neurolink.js +476 -16
  58. package/dist/lib/providers/amazonBedrock.d.ts +1 -1
  59. package/dist/lib/providers/amazonBedrock.js +78 -45
  60. package/dist/lib/providers/amazonSagemaker.d.ts +1 -1
  61. package/dist/lib/providers/amazonSagemaker.js +1 -1
  62. package/dist/lib/providers/anthropic.d.ts +1 -1
  63. package/dist/lib/providers/anthropic.js +7 -7
  64. package/dist/lib/providers/anthropicBaseProvider.d.ts +1 -1
  65. package/dist/lib/providers/anthropicBaseProvider.js +7 -6
  66. package/dist/lib/providers/azureOpenai.d.ts +1 -1
  67. package/dist/lib/providers/azureOpenai.js +1 -1
  68. package/dist/lib/providers/googleAiStudio.d.ts +1 -1
  69. package/dist/lib/providers/googleAiStudio.js +5 -5
  70. package/dist/lib/providers/googleVertex.d.ts +1 -1
  71. package/dist/lib/providers/googleVertex.js +74 -17
  72. package/dist/lib/providers/huggingFace.d.ts +1 -1
  73. package/dist/lib/providers/huggingFace.js +1 -1
  74. package/dist/lib/providers/litellm.d.ts +1 -1
  75. package/dist/lib/providers/litellm.js +18 -16
  76. package/dist/lib/providers/mistral.d.ts +1 -1
  77. package/dist/lib/providers/mistral.js +1 -1
  78. package/dist/lib/providers/ollama.d.ts +1 -1
  79. package/dist/lib/providers/ollama.js +8 -7
  80. package/dist/lib/providers/openAI.d.ts +1 -1
  81. package/dist/lib/providers/openAI.js +6 -6
  82. package/dist/lib/providers/openRouter.d.ts +1 -1
  83. package/dist/lib/providers/openRouter.js +6 -2
  84. package/dist/lib/providers/openaiCompatible.d.ts +1 -1
  85. package/dist/lib/providers/openaiCompatible.js +1 -1
  86. package/dist/lib/proxy/proxyFetch.js +291 -65
  87. package/dist/lib/server/utils/validation.d.ts +4 -4
  88. package/dist/lib/services/server/ai/observability/instrumentation.js +12 -3
  89. package/dist/lib/telemetry/telemetryService.d.ts +2 -1
  90. package/dist/lib/telemetry/telemetryService.js +8 -1
  91. package/dist/lib/types/contextTypes.d.ts +26 -2
  92. package/dist/lib/types/conversation.d.ts +72 -40
  93. package/dist/lib/types/conversationMemoryInterface.d.ts +5 -1
  94. package/dist/lib/types/generateTypes.d.ts +26 -0
  95. package/dist/lib/types/modelTypes.d.ts +2 -2
  96. package/dist/lib/types/multimodal.d.ts +2 -0
  97. package/dist/lib/types/observability.d.ts +10 -0
  98. package/dist/lib/types/sdkTypes.d.ts +1 -1
  99. package/dist/lib/utils/conversationMemory.d.ts +4 -3
  100. package/dist/lib/utils/conversationMemory.js +44 -6
  101. package/dist/lib/utils/errorHandling.d.ts +5 -0
  102. package/dist/lib/utils/errorHandling.js +7 -2
  103. package/dist/lib/utils/logger.d.ts +8 -0
  104. package/dist/lib/utils/logger.js +56 -1
  105. package/dist/lib/utils/messageBuilder.js +74 -4
  106. package/dist/lib/utils/redis.js +6 -1
  107. package/dist/lib/utils/tokenEstimation.d.ts +2 -2
  108. package/dist/lib/utils/tokenEstimation.js +16 -1
  109. package/dist/lib/workflow/config.d.ts +110 -110
  110. package/dist/mcp/externalServerManager.js +46 -10
  111. package/dist/memory/memoryRetrievalTools.d.ts +166 -0
  112. package/dist/memory/memoryRetrievalTools.js +144 -0
  113. package/dist/neurolink.d.ts +35 -1
  114. package/dist/neurolink.js +476 -16
  115. package/dist/providers/amazonBedrock.d.ts +1 -1
  116. package/dist/providers/amazonBedrock.js +78 -45
  117. package/dist/providers/amazonSagemaker.d.ts +1 -1
  118. package/dist/providers/amazonSagemaker.js +1 -1
  119. package/dist/providers/anthropic.d.ts +1 -1
  120. package/dist/providers/anthropic.js +7 -7
  121. package/dist/providers/anthropicBaseProvider.d.ts +1 -1
  122. package/dist/providers/anthropicBaseProvider.js +7 -6
  123. package/dist/providers/azureOpenai.d.ts +1 -1
  124. package/dist/providers/azureOpenai.js +1 -1
  125. package/dist/providers/googleAiStudio.d.ts +1 -1
  126. package/dist/providers/googleAiStudio.js +5 -5
  127. package/dist/providers/googleVertex.d.ts +1 -1
  128. package/dist/providers/googleVertex.js +74 -17
  129. package/dist/providers/huggingFace.d.ts +1 -1
  130. package/dist/providers/huggingFace.js +1 -1
  131. package/dist/providers/litellm.d.ts +1 -1
  132. package/dist/providers/litellm.js +18 -16
  133. package/dist/providers/mistral.d.ts +1 -1
  134. package/dist/providers/mistral.js +1 -1
  135. package/dist/providers/ollama.d.ts +1 -1
  136. package/dist/providers/ollama.js +8 -7
  137. package/dist/providers/openAI.d.ts +1 -1
  138. package/dist/providers/openAI.js +6 -6
  139. package/dist/providers/openRouter.d.ts +1 -1
  140. package/dist/providers/openRouter.js +6 -2
  141. package/dist/providers/openaiCompatible.d.ts +1 -1
  142. package/dist/providers/openaiCompatible.js +1 -1
  143. package/dist/proxy/proxyFetch.js +291 -65
  144. package/dist/services/server/ai/observability/instrumentation.js +12 -3
  145. package/dist/telemetry/telemetryService.d.ts +2 -1
  146. package/dist/telemetry/telemetryService.js +8 -1
  147. package/dist/types/contextTypes.d.ts +26 -2
  148. package/dist/types/conversation.d.ts +72 -40
  149. package/dist/types/conversationMemoryInterface.d.ts +5 -1
  150. package/dist/types/generateTypes.d.ts +26 -0
  151. package/dist/types/modelTypes.d.ts +10 -10
  152. package/dist/types/multimodal.d.ts +2 -0
  153. package/dist/types/observability.d.ts +10 -0
  154. package/dist/types/sdkTypes.d.ts +1 -1
  155. package/dist/utils/conversationMemory.d.ts +4 -3
  156. package/dist/utils/conversationMemory.js +44 -6
  157. package/dist/utils/errorHandling.d.ts +5 -0
  158. package/dist/utils/errorHandling.js +7 -2
  159. package/dist/utils/logger.d.ts +8 -0
  160. package/dist/utils/logger.js +56 -1
  161. package/dist/utils/messageBuilder.js +74 -4
  162. package/dist/utils/redis.js +6 -1
  163. package/dist/utils/tokenEstimation.d.ts +2 -2
  164. package/dist/utils/tokenEstimation.js +16 -1
  165. package/dist/workflow/config.d.ts +12 -12
  166. package/package.json +1 -1
@@ -1,84 +1,78 @@
1
1
  /**
2
- * Tool Output Size Limits
3
- *
4
- * Truncates tool outputs exceeding size limits.
5
- * Can save full output to disk with a pointer.
6
- * Modeled on OpenCode's approach.
2
+ * Tool output preview generation.
3
+ * Generates head/tail previews of large tool outputs for context-efficient LLM calls.
4
+ * @module
7
5
  */
8
- import { writeFileSync, mkdirSync } from "fs";
9
- import { join } from "path";
10
- import { randomUUID } from "crypto";
11
- import { tmpdir } from "os";
12
- /** Maximum tool output size in bytes (50KB) */
13
- export const MAX_TOOL_OUTPUT_BYTES = 50 * 1024;
14
- /** Maximum tool output lines */
15
- export const MAX_TOOL_OUTPUT_LINES = 2_000;
6
+ /** Default maximum preview size in bytes (50KB) */
7
+ export const DEFAULT_MAX_PREVIEW_BYTES = 50 * 1024;
8
+ /** Default maximum preview lines */
9
+ export const DEFAULT_MAX_PREVIEW_LINES = 2_000;
10
+ /** Default head ratio (25% of preview budget) */
11
+ export const DEFAULT_HEAD_RATIO = 0.25;
12
+ /** Tool name referenced in truncation notices for on-demand full-output access */
13
+ export const RETRIEVE_CONTEXT_TOOL_NAME = "retrieve_context";
14
+ /** Default tail ratio (75% of preview budget) */
15
+ export const DEFAULT_TAIL_RATIO = 0.75;
16
16
  /**
17
- * Truncate tool output if it exceeds size limits.
17
+ * Generate a head/tail preview of a tool output string.
18
+ * If the output is within limits, returns it unchanged with truncated: false.
19
+ * If over limits, keeps the first 25% and last 75% with an omission notice.
20
+ *
21
+ * Industry pattern: 25/75 head/tail split. Head captures schema/headers/structure,
22
+ * tail captures the most recent and typically most relevant data.
18
23
  */
19
- export function truncateToolOutput(output, options) {
20
- const maxBytes = options?.maxBytes ?? MAX_TOOL_OUTPUT_BYTES;
21
- const maxLines = options?.maxLines ?? MAX_TOOL_OUTPUT_LINES;
22
- const direction = options?.direction ?? "tail";
23
- const saveToDisk = options?.saveToDisk ?? false;
24
+ export function generateToolOutputPreview(output, options) {
25
+ const maxBytes = options?.maxBytes ?? DEFAULT_MAX_PREVIEW_BYTES;
26
+ const maxLines = options?.maxLines ?? DEFAULT_MAX_PREVIEW_LINES;
27
+ const rawHeadRatio = options?.headRatio ?? DEFAULT_HEAD_RATIO;
28
+ const rawTailRatio = options?.tailRatio ?? DEFAULT_TAIL_RATIO;
29
+ // Clamp ratios to valid range to avoid negative omittedBytes
30
+ const headRatio = Math.max(0, Math.min(1, rawHeadRatio));
31
+ const tailRatio = Math.max(0, Math.min(1, rawTailRatio));
24
32
  const originalSize = Buffer.byteLength(output, "utf-8");
25
- // Check byte limit
26
- const exceedsBytes = originalSize > maxBytes;
27
- // Check line limit
28
33
  const lines = output.split("\n");
34
+ const exceedsBytes = originalSize > maxBytes;
29
35
  const exceedsLines = lines.length > maxLines;
30
36
  if (!exceedsBytes && !exceedsLines) {
31
- return { content: output, truncated: false, originalSize };
37
+ return { preview: output, truncated: false, originalSize };
32
38
  }
33
- // Save to disk if requested
34
- let savedPath;
35
- if (saveToDisk) {
36
- try {
37
- const saveDir = options?.saveDir ?? join(tmpdir(), "neurolink-tool-output");
38
- mkdirSync(saveDir, { recursive: true });
39
- savedPath = join(saveDir, `tool-output-${randomUUID()}.txt`);
40
- writeFileSync(savedPath, output, "utf-8");
41
- }
42
- catch {
43
- // Silently fail disk save
44
- }
45
- }
46
- // Apply truncation
47
- let truncated;
39
+ // Line-based split
40
+ const headLineCount = Math.max(1, Math.floor(maxLines * headRatio));
41
+ const tailLineCount = Math.max(1, maxLines - headLineCount);
42
+ let head;
43
+ let tail;
48
44
  if (exceedsLines) {
49
- if (direction === "head") {
50
- truncated = lines.slice(0, maxLines).join("\n");
51
- }
52
- else {
53
- truncated = lines.slice(-maxLines).join("\n");
54
- }
45
+ head = lines.slice(0, headLineCount).join("\n");
46
+ tail = lines.slice(-tailLineCount).join("\n");
55
47
  }
56
48
  else {
57
- truncated = output;
58
- }
59
- // Apply byte limit
60
- if (Buffer.byteLength(truncated, "utf-8") > maxBytes) {
61
- if (direction === "head") {
62
- truncated = truncated.slice(0, maxBytes);
63
- }
64
- else {
65
- truncated = truncated.slice(-maxBytes);
66
- }
49
+ head = lines
50
+ .slice(0, Math.max(1, Math.floor(lines.length * headRatio)))
51
+ .join("\n");
52
+ tail = lines
53
+ .slice(-Math.max(1, Math.ceil(lines.length * tailRatio)))
54
+ .join("\n");
67
55
  }
68
- // Add truncation notice
69
- const notice = savedPath
70
- ? `\n\n[Output truncated from ${originalSize} bytes to ${Buffer.byteLength(truncated, "utf-8")} bytes. Full output saved to: ${savedPath}]`
71
- : `\n\n[Output truncated from ${originalSize} bytes to ${Buffer.byteLength(truncated, "utf-8")} bytes]`;
72
- if (direction === "head") {
73
- truncated = truncated + notice;
56
+ // Byte-based cap on each portion
57
+ const headMaxBytes = Math.floor(maxBytes * headRatio);
58
+ const tailMaxBytes = maxBytes - headMaxBytes;
59
+ if (Buffer.byteLength(head, "utf-8") > headMaxBytes) {
60
+ head = Buffer.from(head, "utf-8")
61
+ .subarray(0, headMaxBytes)
62
+ .toString("utf-8");
74
63
  }
75
- else {
76
- truncated = notice + "\n" + truncated;
64
+ if (Buffer.byteLength(tail, "utf-8") > tailMaxBytes) {
65
+ const tailBuf = Buffer.from(tail, "utf-8");
66
+ tail = tailBuf.subarray(tailBuf.length - tailMaxBytes).toString("utf-8");
77
67
  }
68
+ const omittedBytes = Math.max(0, originalSize -
69
+ Buffer.byteLength(head, "utf-8") -
70
+ Buffer.byteLength(tail, "utf-8"));
71
+ const notice = `\n\n[... ${omittedBytes} bytes omitted. ` +
72
+ `Use ${RETRIEVE_CONTEXT_TOOL_NAME} tool to access full output ...]\n\n`;
78
73
  return {
79
- content: truncated,
74
+ preview: head + notice + tail,
80
75
  truncated: true,
81
- savedPath,
82
76
  originalSize,
83
77
  };
84
78
  }
@@ -218,9 +218,18 @@ export declare abstract class BaseProvider implements AIProvider {
218
218
  */
219
219
  setSessionContext(sessionId?: string, userId?: string): void;
220
220
  /**
221
- * Provider-specific error handling
221
+ * Provider-specific error formatting.
222
+ * Subclasses implement this to produce human-readable error messages
223
+ * (e.g., "❌ Google Vertex AI Provider Error\n\n...").
222
224
  */
223
- protected abstract handleProviderError(error: unknown): Error;
225
+ protected abstract formatProviderError(error: unknown): Error;
226
+ /**
227
+ * Handle provider errors with abort passthrough.
228
+ * AbortErrors are never wrapped — they must propagate with their
229
+ * original identity so that isAbortError() can detect them in
230
+ * retry/fallback loops (directProviderGeneration, performMCPGenerationRetries).
231
+ */
232
+ protected handleProviderError(error: unknown): Error;
224
233
  /**
225
234
  * Image generation method. Providers that support it should override this.
226
235
  * By default, it throws an error indicating that the functionality is not supported.
@@ -174,7 +174,7 @@ export class BaseProvider {
174
174
  temperature: options.temperature,
175
175
  maxTokens: options.maxTokens,
176
176
  tools: options.tools, // 🔧 FIX: Pass user-provided tools (including RAG tools) to generation pipeline
177
- disableTools: false,
177
+ disableTools: !!options.disableTools,
178
178
  maxSteps: options.maxSteps || 5,
179
179
  provider: options.provider,
180
180
  model: options.model,
@@ -767,6 +767,21 @@ export class BaseProvider {
767
767
  this.userId = userId;
768
768
  this.toolsManager.setSessionContext(sessionId, userId);
769
769
  }
770
+ /**
771
+ * Handle provider errors with abort passthrough.
772
+ * AbortErrors are never wrapped — they must propagate with their
773
+ * original identity so that isAbortError() can detect them in
774
+ * retry/fallback loops (directProviderGeneration, performMCPGenerationRetries).
775
+ */
776
+ handleProviderError(error) {
777
+ if (isAbortError(error)) {
778
+ // Preserve AbortError identity — never wrap in provider-specific formatting
779
+ return error instanceof Error
780
+ ? error
781
+ : new DOMException("The operation was aborted", "AbortError");
782
+ }
783
+ return this.formatProviderError(error);
784
+ }
770
785
  /**
771
786
  * Image generation method. Providers that support it should override this.
772
787
  * By default, it throws an error indicating that the functionality is not supported.
@@ -50,7 +50,7 @@ export declare class ConversationMemoryManager implements IConversationMemoryMan
50
50
  * Returns messages from pointer onwards (or all if no pointer)
51
51
  * Now consistently async to match Redis implementation
52
52
  */
53
- buildContextMessages(sessionId: string): Promise<ChatMessage[]>;
53
+ buildContextMessages(sessionId: string, _userId?: string, _enableSummarization?: boolean, requestId?: string): Promise<ChatMessage[]>;
54
54
  getSession(sessionId: string, _userId?: string): SessionMemory | undefined;
55
55
  createSummarySystemMessage(content: string, summarizesFrom?: string, summarizesTo?: string): ChatMessage;
56
56
  private ensureInitialized;
@@ -59,4 +59,16 @@ export declare class ConversationMemoryManager implements IConversationMemoryMan
59
59
  getStats(): Promise<ConversationMemoryStats>;
60
60
  clearSession(sessionId: string): Promise<boolean>;
61
61
  clearAllSessions(): Promise<void>;
62
+ /**
63
+ * Get the raw messages array for a session.
64
+ * Returns the full messages list without context filtering or summarization.
65
+ * Returns a deep copy to prevent external mutation of internal state.
66
+ */
67
+ getSessionMessages(sessionId: string, _userId?: string): Promise<ChatMessage[]>;
68
+ /**
69
+ * Replace the entire messages array for a session.
70
+ * Creates the session if it does not exist.
71
+ * Resets summary pointers since old pointers may reference messages that no longer exist.
72
+ */
73
+ setSessionMessages(sessionId: string, messages: ChatMessage[], userId?: string): Promise<void>;
62
74
  }
@@ -92,11 +92,12 @@ export class ConversationMemoryManager {
92
92
  if (!this.summarizationInProgress.has(options.sessionId)) {
93
93
  setImmediate(async () => {
94
94
  try {
95
- await this.checkAndSummarize(session, tokenThreshold);
95
+ await this.checkAndSummarize(session, tokenThreshold, options.requestId);
96
96
  }
97
97
  catch (error) {
98
98
  logger.error("Background summarization failed", {
99
99
  sessionId: session.sessionId,
100
+ requestId: options.requestId,
100
101
  error: error instanceof Error ? error.message : String(error),
101
102
  });
102
103
  }
@@ -154,7 +155,7 @@ export class ConversationMemoryManager {
154
155
  /**
155
156
  * Check if summarization is needed based on token count
156
157
  */
157
- async checkAndSummarize(session, threshold) {
158
+ async checkAndSummarize(session, threshold, requestId) {
158
159
  // Acquire lock - if already in progress, skip
159
160
  if (this.summarizationInProgress.has(session.sessionId)) {
160
161
  logger.debug("[ConversationMemoryManager] Summarization already in progress, skipping", {
@@ -164,7 +165,7 @@ export class ConversationMemoryManager {
164
165
  }
165
166
  this.summarizationInProgress.add(session.sessionId);
166
167
  try {
167
- await this.summarizationEngine.checkAndSummarize(session, threshold, this.config, "[ConversationMemory]");
168
+ await this.summarizationEngine.checkAndSummarize(session, threshold, this.config, "[ConversationMemory]", requestId);
168
169
  }
169
170
  catch (error) {
170
171
  logger.error("Token counting or summarization failed", {
@@ -195,9 +196,9 @@ export class ConversationMemoryManager {
195
196
  * Returns messages from pointer onwards (or all if no pointer)
196
197
  * Now consistently async to match Redis implementation
197
198
  */
198
- async buildContextMessages(sessionId) {
199
+ async buildContextMessages(sessionId, _userId, _enableSummarization, requestId) {
199
200
  const session = this.sessions.get(sessionId);
200
- return session ? buildContextFromPointer(session) : [];
201
+ return session ? buildContextFromPointer(session, requestId) : [];
201
202
  }
202
203
  getSession(sessionId, _userId) {
203
204
  return this.sessions.get(sessionId);
@@ -263,4 +264,34 @@ export class ConversationMemoryManager {
263
264
  this.sessions.clear();
264
265
  logger.info("All sessions cleared", { clearedCount: sessionIds.length });
265
266
  }
267
+ /**
268
+ * Get the raw messages array for a session.
269
+ * Returns the full messages list without context filtering or summarization.
270
+ * Returns a deep copy to prevent external mutation of internal state.
271
+ */
272
+ async getSessionMessages(sessionId, _userId) {
273
+ await this.ensureInitialized();
274
+ const session = this.sessions.get(sessionId);
275
+ return session ? session.messages.map((msg) => ({ ...msg })) : [];
276
+ }
277
+ /**
278
+ * Replace the entire messages array for a session.
279
+ * Creates the session if it does not exist.
280
+ * Resets summary pointers since old pointers may reference messages that no longer exist.
281
+ */
282
+ async setSessionMessages(sessionId, messages, userId) {
283
+ await this.ensureInitialized();
284
+ let session = this.sessions.get(sessionId);
285
+ if (!session) {
286
+ session = this.createNewSession(sessionId, userId);
287
+ this.sessions.set(sessionId, session);
288
+ this.enforceSessionLimit();
289
+ }
290
+ session.messages = [...messages];
291
+ session.summarizedUpToMessageId = undefined;
292
+ session.summarizedMessage = undefined;
293
+ session.lastTokenCount = undefined;
294
+ session.lastCountedAt = undefined;
295
+ session.lastActivity = Date.now();
296
+ }
266
297
  }
@@ -38,6 +38,12 @@ export declare class GenerationHandler {
38
38
  * Execute the generation with AI SDK
39
39
  */
40
40
  executeGeneration(model: LanguageModelV1, messages: CoreMessage[], tools: Record<string, Tool>, options: TextGenerationOptions): Promise<Awaited<ReturnType<typeof generateText>>>;
41
+ /**
42
+ * Extract cache metrics from provider metadata (e.g. Anthropic's providerMetadata.anthropic)
43
+ * The Vercel AI SDK's LanguageModelUsage only has promptTokens/completionTokens/totalTokens.
44
+ * Cache metrics are surfaced via providerMetadata by provider-specific SDK adapters.
45
+ */
46
+ private extractCacheMetricsFromProviderMetadata;
41
47
  /**
42
48
  * Log generation completion information
43
49
  */
@@ -14,8 +14,24 @@
14
14
  */
15
15
  import { generateText, Output, NoObjectGeneratedError } from "ai";
16
16
  import { logger } from "../../utils/logger.js";
17
- import { extractTokenUsage } from "../../utils/tokenUtils.js";
17
+ import { extractTokenUsage, extractCacheCreationTokens, extractCacheReadTokens, calculateCacheSavingsPercent, } from "../../utils/tokenUtils.js";
18
18
  import { DEFAULT_MAX_STEPS } from "../constants.js";
19
+ /**
20
+ * Safely preview-serialize a value for debug logging.
21
+ * Handles undefined, circular references, and non-serializable values.
22
+ */
23
+ function safePreview(v) {
24
+ if (v === undefined) {
25
+ return "";
26
+ }
27
+ try {
28
+ const text = typeof v === "string" ? v : JSON.stringify(v);
29
+ return (text ?? "").substring(0, 200);
30
+ }
31
+ catch {
32
+ return "[unserializable]";
33
+ }
34
+ }
19
35
  /**
20
36
  * GenerationHandler class - Handles text generation operations for AI providers
21
37
  */
@@ -39,16 +55,41 @@ export class GenerationHandler {
39
55
  async callGenerateText(model, messages, tools, options, shouldUseTools, includeStructuredOutput) {
40
56
  // Check if this is a Google provider (for provider-specific options)
41
57
  const isGoogleProvider = this.providerName === "google-ai" || this.providerName === "vertex";
42
- // Check if this is an Anthropic provider
43
- const isAnthropicProvider = this.providerName === "anthropic" || this.providerName === "bedrock";
58
+ // Check if this is an Anthropic provider (includes Vertex+Claude)
59
+ const isAnthropicProvider = this.providerName === "anthropic" ||
60
+ this.providerName === "bedrock" ||
61
+ (this.providerName === "vertex" && this.modelName?.startsWith("claude-"));
44
62
  const useStructuredOutput = includeStructuredOutput &&
45
63
  !!options.schema &&
46
64
  (options.output?.format === "json" ||
47
65
  options.output?.format === "structured");
66
+ // Annotate the last tool with cache_control so the full tool-definition
67
+ // block becomes a cache breakpoint for Anthropic-family providers.
68
+ // Non-Anthropic providers harmlessly ignore unknown providerOptions.
69
+ // Note: The AI SDK Tool type doesn't yet include providerOptions, so we
70
+ // use a type assertion. The Anthropic adapter reads this at runtime.
71
+ const toolsWithCache = { ...tools };
72
+ if (isAnthropicProvider &&
73
+ shouldUseTools &&
74
+ Object.keys(toolsWithCache).length > 0) {
75
+ const toolNames = Object.keys(toolsWithCache);
76
+ const lastToolName = toolNames[toolNames.length - 1];
77
+ if (lastToolName && toolsWithCache[lastToolName]) {
78
+ const lastTool = toolsWithCache[lastToolName];
79
+ toolsWithCache[lastToolName] = {
80
+ ...lastTool,
81
+ providerOptions: {
82
+ ...(lastTool.providerOptions ?? {}),
83
+ anthropic: { cacheControl: { type: "ephemeral" } },
84
+ },
85
+ };
86
+ }
87
+ }
48
88
  return await generateText({
49
89
  model,
50
90
  messages,
51
- ...(shouldUseTools && Object.keys(tools).length > 0 && { tools }),
91
+ ...(shouldUseTools &&
92
+ Object.keys(toolsWithCache).length > 0 && { tools: toolsWithCache }),
52
93
  maxSteps: options.maxSteps ?? DEFAULT_MAX_STEPS,
53
94
  ...(shouldUseTools &&
54
95
  options.toolChoice && { toolChoice: options.toolChoice }),
@@ -116,8 +157,90 @@ export class GenerationHandler {
116
157
  const useStructuredOutput = !!options.schema &&
117
158
  (options.output?.format === "json" ||
118
159
  options.output?.format === "structured");
160
+ const requestId = options.requestId ||
161
+ options.context?.requestId ||
162
+ "unknown";
163
+ logger.info("[GenerationHandler] Calling generateText", {
164
+ requestId,
165
+ model: model.modelId || "unknown",
166
+ messageCount: messages.length,
167
+ toolCount: Object.keys(tools || {}).length,
168
+ maxSteps: options.maxSteps,
169
+ temperature: options.temperature,
170
+ });
171
+ if (logger.shouldLog("debug")) {
172
+ try {
173
+ logger.debug("[Observability] Full generateText parameters", {
174
+ requestId,
175
+ model: model.modelId || "unknown",
176
+ messageCount: messages.length,
177
+ messages: messages.map((msg, i) => ({
178
+ index: i,
179
+ role: msg.role,
180
+ contentLength: typeof msg.content === "string"
181
+ ? msg.content.length
182
+ : safePreview(msg.content).length,
183
+ contentPreview: typeof msg.content === "string"
184
+ ? msg.content.substring(0, 200)
185
+ : "[multimodal]",
186
+ })),
187
+ toolNames: Object.keys(tools || {}),
188
+ toolCount: Object.keys(tools || {}).length,
189
+ maxSteps: options.maxSteps,
190
+ temperature: options.temperature,
191
+ maxTokens: options.maxTokens,
192
+ });
193
+ }
194
+ catch {
195
+ // Ignore serialization errors in debug logging
196
+ }
197
+ }
198
+ const genStartTime = Date.now();
119
199
  try {
120
- return await this.callGenerateText(model, messages, tools, options, shouldUseTools, true);
200
+ const result = await this.callGenerateText(model, messages, tools, options, shouldUseTools, true);
201
+ logger.info("[GenerationHandler] generateText returned", {
202
+ requestId,
203
+ durationMs: Date.now() - genStartTime,
204
+ finishReason: result.finishReason,
205
+ steps: result.steps?.length || 1,
206
+ toolCallsTotal: result.toolCalls?.length || 0,
207
+ responseChars: result.text?.length || 0,
208
+ });
209
+ if (logger.shouldLog("debug")) {
210
+ logger.debug("[Observability] Full LLM response", {
211
+ requestId,
212
+ finishReason: result.finishReason,
213
+ responseTextPreview: result.text?.substring(0, 200) || "",
214
+ responseTextLength: result.text?.length || 0,
215
+ toolCalls: result.toolCalls?.map((tc) => ({
216
+ toolName: tc.toolName,
217
+ argsPreview: safePreview(tc.args),
218
+ })),
219
+ toolResults: result.toolResults?.map((tr) => ({
220
+ toolName: tr.toolName,
221
+ resultPreview: safePreview(tr.result),
222
+ })),
223
+ steps: result.steps?.map((step, i) => ({
224
+ stepIndex: i,
225
+ stepType: step.stepType,
226
+ textPreview: step.text?.substring(0, 200),
227
+ textLength: step.text?.length || 0,
228
+ toolCalls: step.toolCalls?.map((tc) => ({
229
+ toolName: tc.toolName,
230
+ argsPreview: safePreview(tc.args),
231
+ })),
232
+ toolResults: step.toolResults?.map((tr) => ({
233
+ toolName: tr.toolName,
234
+ resultPreview: safePreview(tr.result),
235
+ })),
236
+ finishReason: step.finishReason,
237
+ })),
238
+ usage: result.usage,
239
+ providerMetadata: result.experimental_providerMetadata ||
240
+ result.providerMetadata,
241
+ });
242
+ }
243
+ return result;
121
244
  }
122
245
  catch (error) {
123
246
  // If NoObjectGeneratedError is thrown when using schema + tools together,
@@ -130,16 +253,50 @@ export class GenerationHandler {
130
253
  });
131
254
  // Retry without experimental_output - the formatEnhancedResult method
132
255
  // will extract JSON from the text response
133
- return await this.callGenerateText(model, messages, tools, options, shouldUseTools, false);
256
+ const result = await this.callGenerateText(model, messages, tools, options, shouldUseTools, false);
257
+ logger.info("[GenerationHandler] generateText returned (fallback)", {
258
+ requestId,
259
+ durationMs: Date.now() - genStartTime,
260
+ finishReason: result.finishReason,
261
+ steps: result.steps?.length || 1,
262
+ toolCallsTotal: result.toolCalls?.length || 0,
263
+ responseChars: result.text?.length || 0,
264
+ });
265
+ return result;
134
266
  }
135
267
  // Re-throw other errors
136
268
  throw error;
137
269
  }
138
270
  }
271
+ /**
272
+ * Extract cache metrics from provider metadata (e.g. Anthropic's providerMetadata.anthropic)
273
+ * The Vercel AI SDK's LanguageModelUsage only has promptTokens/completionTokens/totalTokens.
274
+ * Cache metrics are surfaced via providerMetadata by provider-specific SDK adapters.
275
+ */
276
+ extractCacheMetricsFromProviderMetadata(generateResult) {
277
+ const providerMeta = generateResult
278
+ .providerMetadata ||
279
+ generateResult.experimental_providerMetadata;
280
+ if (!providerMeta) {
281
+ return {};
282
+ }
283
+ // Anthropic surfaces cache metrics under providerMetadata.anthropic
284
+ const anthropicMeta = providerMeta.anthropic;
285
+ if (anthropicMeta) {
286
+ const cacheCreationTokens = extractCacheCreationTokens(anthropicMeta);
287
+ const cacheReadTokens = extractCacheReadTokens(anthropicMeta);
288
+ return {
289
+ ...(cacheCreationTokens !== undefined && { cacheCreationTokens }),
290
+ ...(cacheReadTokens !== undefined && { cacheReadTokens }),
291
+ };
292
+ }
293
+ return {};
294
+ }
139
295
  /**
140
296
  * Log generation completion information
141
297
  */
142
298
  logGenerationComplete(generateResult) {
299
+ const cacheMetrics = this.extractCacheMetricsFromProviderMetadata(generateResult);
143
300
  logger.debug(`generateText completed`, {
144
301
  provider: this.providerName,
145
302
  model: this.modelName,
@@ -147,6 +304,12 @@ export class GenerationHandler {
147
304
  toolResultsCount: generateResult.toolResults?.length || 0,
148
305
  finishReason: generateResult.finishReason,
149
306
  usage: generateResult.usage,
307
+ ...(cacheMetrics.cacheCreationTokens !== undefined && {
308
+ cacheCreationTokens: cacheMetrics.cacheCreationTokens,
309
+ }),
310
+ ...(cacheMetrics.cacheReadTokens !== undefined && {
311
+ cacheReadTokens: cacheMetrics.cacheReadTokens,
312
+ }),
150
313
  timestamp: Date.now(),
151
314
  });
152
315
  }
@@ -274,6 +437,28 @@ export class GenerationHandler {
274
437
  // Note: The AI SDK bundles thinking tokens into promptTokens for Google models.
275
438
  // Separate reasoningTokens tracking will work when/if the AI SDK adds support.
276
439
  const usage = extractTokenUsage(generateResult.usage);
440
+ // Merge cache metrics from providerMetadata if not already present in usage
441
+ // The AI SDK's LanguageModelUsage doesn't include cache tokens; they come from
442
+ // provider-specific metadata (e.g. Anthropic's providerMetadata.anthropic)
443
+ if (usage.cacheCreationTokens === undefined ||
444
+ usage.cacheReadTokens === undefined) {
445
+ const cacheMetrics = this.extractCacheMetricsFromProviderMetadata(generateResult);
446
+ if (usage.cacheCreationTokens === undefined &&
447
+ cacheMetrics.cacheCreationTokens !== undefined) {
448
+ usage.cacheCreationTokens = cacheMetrics.cacheCreationTokens;
449
+ }
450
+ if (usage.cacheReadTokens === undefined &&
451
+ cacheMetrics.cacheReadTokens !== undefined) {
452
+ usage.cacheReadTokens = cacheMetrics.cacheReadTokens;
453
+ }
454
+ // Recalculate cache savings if we added cache metrics
455
+ if (usage.cacheReadTokens !== undefined) {
456
+ const savingsPercent = calculateCacheSavingsPercent(usage.cacheReadTokens, usage.input);
457
+ if (savingsPercent !== undefined) {
458
+ usage.cacheSavingsPercent = savingsPercent;
459
+ }
460
+ }
461
+ }
277
462
  return {
278
463
  content,
279
464
  usage,
@@ -309,7 +494,7 @@ export class GenerationHandler {
309
494
  provider: this.providerName,
310
495
  model: this.modelName,
311
496
  responseTextLength: result.text?.length || 0,
312
- responsePreview: result.text?.substring(0, 500) + "...",
497
+ responsePreview: result.text?.substring(0, 500) ?? "",
313
498
  finishReason: result.finishReason,
314
499
  usage: result.usage,
315
500
  });