@juspay/neurolink 9.26.2 → 9.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +59 -9
  3. package/dist/cli/commands/config.d.ts +4 -4
  4. package/dist/cli/commands/mcp.d.ts +87 -0
  5. package/dist/cli/commands/mcp.js +1524 -0
  6. package/dist/cli/loop/optionsSchema.js +4 -0
  7. package/dist/core/modules/ToolsManager.js +29 -2
  8. package/dist/index.d.ts +2 -1
  9. package/dist/index.js +27 -1
  10. package/dist/lib/core/modules/ToolsManager.js +29 -2
  11. package/dist/lib/index.d.ts +2 -1
  12. package/dist/lib/index.js +27 -1
  13. package/dist/lib/mcp/agentExposure.d.ts +228 -0
  14. package/dist/lib/mcp/agentExposure.js +357 -0
  15. package/dist/lib/mcp/batching/index.d.ts +11 -0
  16. package/dist/lib/mcp/batching/index.js +11 -0
  17. package/dist/lib/mcp/batching/requestBatcher.d.ts +202 -0
  18. package/dist/lib/mcp/batching/requestBatcher.js +442 -0
  19. package/dist/lib/mcp/caching/index.d.ts +11 -0
  20. package/dist/lib/mcp/caching/index.js +11 -0
  21. package/dist/lib/mcp/caching/toolCache.d.ts +221 -0
  22. package/dist/lib/mcp/caching/toolCache.js +434 -0
  23. package/dist/lib/mcp/elicitation/elicitationManager.d.ts +169 -0
  24. package/dist/lib/mcp/elicitation/elicitationManager.js +377 -0
  25. package/dist/lib/mcp/elicitation/index.d.ts +11 -0
  26. package/dist/lib/mcp/elicitation/index.js +12 -0
  27. package/dist/lib/mcp/elicitation/types.d.ts +278 -0
  28. package/dist/lib/mcp/elicitation/types.js +11 -0
  29. package/dist/lib/mcp/elicitationProtocol.d.ts +228 -0
  30. package/dist/lib/mcp/elicitationProtocol.js +376 -0
  31. package/dist/lib/mcp/enhancedToolDiscovery.d.ts +205 -0
  32. package/dist/lib/mcp/enhancedToolDiscovery.js +482 -0
  33. package/dist/lib/mcp/index.d.ts +38 -1
  34. package/dist/lib/mcp/index.js +36 -3
  35. package/dist/lib/mcp/mcpRegistryClient.d.ts +332 -0
  36. package/dist/lib/mcp/mcpRegistryClient.js +489 -0
  37. package/dist/lib/mcp/mcpServerBase.d.ts +227 -0
  38. package/dist/lib/mcp/mcpServerBase.js +374 -0
  39. package/dist/lib/mcp/multiServerManager.d.ts +310 -0
  40. package/dist/lib/mcp/multiServerManager.js +580 -0
  41. package/dist/lib/mcp/routing/index.d.ts +11 -0
  42. package/dist/lib/mcp/routing/index.js +11 -0
  43. package/dist/lib/mcp/routing/toolRouter.d.ts +219 -0
  44. package/dist/lib/mcp/routing/toolRouter.js +417 -0
  45. package/dist/lib/mcp/serverCapabilities.d.ts +341 -0
  46. package/dist/lib/mcp/serverCapabilities.js +503 -0
  47. package/dist/lib/mcp/toolAnnotations.d.ts +154 -0
  48. package/dist/lib/mcp/toolAnnotations.js +240 -0
  49. package/dist/lib/mcp/toolConverter.d.ts +178 -0
  50. package/dist/lib/mcp/toolConverter.js +259 -0
  51. package/dist/lib/mcp/toolIntegration.d.ts +136 -0
  52. package/dist/lib/mcp/toolIntegration.js +335 -0
  53. package/dist/lib/memory/hippocampusInitializer.d.ts +2 -2
  54. package/dist/lib/memory/hippocampusInitializer.js +1 -1
  55. package/dist/lib/neurolink.d.ts +275 -2
  56. package/dist/lib/neurolink.js +596 -56
  57. package/dist/lib/providers/litellm.d.ts +10 -0
  58. package/dist/lib/providers/litellm.js +104 -2
  59. package/dist/lib/types/configTypes.d.ts +56 -0
  60. package/dist/lib/types/conversation.d.ts +2 -2
  61. package/dist/lib/types/generateTypes.d.ts +4 -0
  62. package/dist/lib/types/index.d.ts +2 -1
  63. package/dist/lib/types/modelTypes.d.ts +6 -6
  64. package/dist/lib/types/streamTypes.d.ts +2 -0
  65. package/dist/lib/types/tools.d.ts +2 -0
  66. package/dist/lib/utils/pricing.js +177 -17
  67. package/dist/lib/utils/schemaConversion.d.ts +6 -1
  68. package/dist/lib/utils/schemaConversion.js +50 -28
  69. package/dist/lib/workflow/config.d.ts +16 -16
  70. package/dist/mcp/agentExposure.d.ts +228 -0
  71. package/dist/mcp/agentExposure.js +356 -0
  72. package/dist/mcp/batching/index.d.ts +11 -0
  73. package/dist/mcp/batching/index.js +10 -0
  74. package/dist/mcp/batching/requestBatcher.d.ts +202 -0
  75. package/dist/mcp/batching/requestBatcher.js +441 -0
  76. package/dist/mcp/caching/index.d.ts +11 -0
  77. package/dist/mcp/caching/index.js +10 -0
  78. package/dist/mcp/caching/toolCache.d.ts +221 -0
  79. package/dist/mcp/caching/toolCache.js +433 -0
  80. package/dist/mcp/elicitation/elicitationManager.d.ts +169 -0
  81. package/dist/mcp/elicitation/elicitationManager.js +376 -0
  82. package/dist/mcp/elicitation/index.d.ts +11 -0
  83. package/dist/mcp/elicitation/index.js +11 -0
  84. package/dist/mcp/elicitation/types.d.ts +278 -0
  85. package/dist/mcp/elicitation/types.js +10 -0
  86. package/dist/mcp/elicitationProtocol.d.ts +228 -0
  87. package/dist/mcp/elicitationProtocol.js +375 -0
  88. package/dist/mcp/enhancedToolDiscovery.d.ts +205 -0
  89. package/dist/mcp/enhancedToolDiscovery.js +481 -0
  90. package/dist/mcp/index.d.ts +38 -1
  91. package/dist/mcp/index.js +36 -3
  92. package/dist/mcp/mcpRegistryClient.d.ts +332 -0
  93. package/dist/mcp/mcpRegistryClient.js +488 -0
  94. package/dist/mcp/mcpServerBase.d.ts +227 -0
  95. package/dist/mcp/mcpServerBase.js +373 -0
  96. package/dist/mcp/multiServerManager.d.ts +310 -0
  97. package/dist/mcp/multiServerManager.js +579 -0
  98. package/dist/mcp/routing/index.d.ts +11 -0
  99. package/dist/mcp/routing/index.js +10 -0
  100. package/dist/mcp/routing/toolRouter.d.ts +219 -0
  101. package/dist/mcp/routing/toolRouter.js +416 -0
  102. package/dist/mcp/serverCapabilities.d.ts +341 -0
  103. package/dist/mcp/serverCapabilities.js +502 -0
  104. package/dist/mcp/toolAnnotations.d.ts +154 -0
  105. package/dist/mcp/toolAnnotations.js +239 -0
  106. package/dist/mcp/toolConverter.d.ts +178 -0
  107. package/dist/mcp/toolConverter.js +258 -0
  108. package/dist/mcp/toolIntegration.d.ts +136 -0
  109. package/dist/mcp/toolIntegration.js +334 -0
  110. package/dist/memory/hippocampusInitializer.d.ts +2 -2
  111. package/dist/memory/hippocampusInitializer.js +1 -1
  112. package/dist/neurolink.d.ts +275 -2
  113. package/dist/neurolink.js +596 -56
  114. package/dist/providers/litellm.d.ts +10 -0
  115. package/dist/providers/litellm.js +104 -2
  116. package/dist/types/configTypes.d.ts +56 -0
  117. package/dist/types/conversation.d.ts +2 -2
  118. package/dist/types/generateTypes.d.ts +4 -0
  119. package/dist/types/index.d.ts +2 -1
  120. package/dist/types/streamTypes.d.ts +2 -0
  121. package/dist/types/tools.d.ts +2 -0
  122. package/dist/utils/pricing.js +177 -17
  123. package/dist/utils/schemaConversion.d.ts +6 -1
  124. package/dist/utils/schemaConversion.js +50 -28
  125. package/package.json +2 -2
@@ -29,6 +29,16 @@ export declare class LiteLLMProvider extends BaseProvider {
29
29
  * Note: This is only used when tools are disabled
30
30
  */
31
31
  protected executeStream(options: StreamOptions, analysisSchema?: ZodType<unknown, ZodTypeDef, unknown> | Schema<unknown>): Promise<StreamResult>;
32
+ /**
33
+ * Generate an embedding for a single text input
34
+ * Uses the LiteLLM proxy with OpenAI-compatible embedding API
35
+ */
36
+ embed(text: string, modelName?: string): Promise<number[]>;
37
+ /**
38
+ * Generate embeddings for multiple text inputs
39
+ * Uses the LiteLLM proxy with OpenAI-compatible embedding API
40
+ */
41
+ embedMany(texts: string[], modelName?: string): Promise<number[][]>;
32
42
  /**
33
43
  * Get available models from LiteLLM proxy server
34
44
  * Dynamically fetches from /v1/models endpoint with caching and fallback
@@ -1,4 +1,5 @@
1
1
  import { createOpenAI } from "@ai-sdk/openai";
2
+ import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
2
3
  import { Output, streamText, } from "ai";
3
4
  import { BaseProvider } from "../core/baseProvider.js";
4
5
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
@@ -7,8 +8,10 @@ import { createProxyFetch } from "../proxy/proxyFetch.js";
7
8
  import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/errors.js";
8
9
  import { isAbortError } from "../utils/errorHandling.js";
9
10
  import { logger } from "../utils/logger.js";
11
+ import { calculateCost } from "../utils/pricing.js";
10
12
  import { getProviderModel } from "../utils/providerConfig.js";
11
13
  import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
14
+ const streamTracer = trace.getTracer("neurolink.provider.litellm");
12
15
  // Configuration helpers
13
16
  const getLiteLLMConfig = () => {
14
17
  return {
@@ -55,7 +58,9 @@ export class LiteLLMProvider extends BaseProvider {
55
58
  apiKey: config.apiKey,
56
59
  fetch: createProxyFetch(),
57
60
  });
58
- this.model = customOpenAI(this.modelName || getDefaultLiteLLMModel());
61
+ this.model = customOpenAI(this.modelName || getDefaultLiteLLMModel(), {
62
+ structuredOutputs: false,
63
+ });
59
64
  logger.debug("LiteLLM Provider initialized", {
60
65
  modelName: this.modelName,
61
66
  provider: this.providerName,
@@ -206,7 +211,64 @@ export class LiteLLMProvider extends BaseProvider {
206
211
  });
207
212
  }
208
213
  }
209
- const result = await streamText(streamOptions);
214
+ // Wrap streamText in an OTel span to capture provider-level latency, token usage, and cost
215
+ const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
216
+ kind: SpanKind.CLIENT,
217
+ attributes: {
218
+ "gen_ai.system": "litellm",
219
+ "gen_ai.request.model": model.modelId || this.modelName || "unknown",
220
+ },
221
+ });
222
+ let result;
223
+ try {
224
+ result = streamText(streamOptions);
225
+ }
226
+ catch (streamError) {
227
+ streamSpan.setStatus({
228
+ code: SpanStatusCode.ERROR,
229
+ message: streamError instanceof Error
230
+ ? streamError.message
231
+ : String(streamError),
232
+ });
233
+ streamSpan.end();
234
+ throw streamError;
235
+ }
236
+ // Collect token usage, cost, and finish reason asynchronously when the stream completes,
237
+ // then end the span. This avoids blocking the stream consumer.
238
+ result.usage
239
+ .then((usage) => {
240
+ streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens || 0);
241
+ streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens || 0);
242
+ const cost = calculateCost(this.providerName, this.modelName, {
243
+ input: usage.promptTokens || 0,
244
+ output: usage.completionTokens || 0,
245
+ total: (usage.promptTokens || 0) + (usage.completionTokens || 0),
246
+ });
247
+ if (cost && cost > 0) {
248
+ streamSpan.setAttribute("neurolink.cost", cost);
249
+ }
250
+ })
251
+ .catch(() => {
252
+ // Usage may not be available if the stream is aborted
253
+ });
254
+ result.finishReason
255
+ .then((reason) => {
256
+ streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
257
+ })
258
+ .catch(() => {
259
+ // Finish reason may not be available if the stream is aborted
260
+ });
261
+ result.text
262
+ .then(() => {
263
+ streamSpan.end();
264
+ })
265
+ .catch((err) => {
266
+ streamSpan.setStatus({
267
+ code: SpanStatusCode.ERROR,
268
+ message: err instanceof Error ? err.message : String(err),
269
+ });
270
+ streamSpan.end();
271
+ });
210
272
  timeoutController?.cleanup();
211
273
  // Transform stream to content object stream using fullStream (handles both text and tool calls)
212
274
  // Note: fullStream includes tool results, textStream only has text
@@ -269,6 +331,46 @@ export class LiteLLMProvider extends BaseProvider {
269
331
  throw this.handleProviderError(error);
270
332
  }
271
333
  }
334
+ /**
335
+ * Generate an embedding for a single text input
336
+ * Uses the LiteLLM proxy with OpenAI-compatible embedding API
337
+ */
338
+ async embed(text, modelName) {
339
+ const { embed: aiEmbed } = await import("ai");
340
+ const { createOpenAI } = await import("@ai-sdk/openai");
341
+ const config = getLiteLLMConfig();
342
+ const embeddingModelName = modelName ||
343
+ process.env.LITELLM_EMBEDDING_MODEL ||
344
+ "gemini-embedding-001";
345
+ const customOpenAI = createOpenAI({
346
+ baseURL: config.baseURL,
347
+ apiKey: config.apiKey,
348
+ fetch: createProxyFetch(),
349
+ });
350
+ const embeddingModel = customOpenAI.textEmbeddingModel(embeddingModelName);
351
+ const result = await aiEmbed({ model: embeddingModel, value: text });
352
+ return result.embedding;
353
+ }
354
+ /**
355
+ * Generate embeddings for multiple text inputs
356
+ * Uses the LiteLLM proxy with OpenAI-compatible embedding API
357
+ */
358
+ async embedMany(texts, modelName) {
359
+ const { embedMany: aiEmbedMany } = await import("ai");
360
+ const { createOpenAI } = await import("@ai-sdk/openai");
361
+ const config = getLiteLLMConfig();
362
+ const embeddingModelName = modelName ||
363
+ process.env.LITELLM_EMBEDDING_MODEL ||
364
+ "gemini-embedding-001";
365
+ const customOpenAI = createOpenAI({
366
+ baseURL: config.baseURL,
367
+ apiKey: config.apiKey,
368
+ fetch: createProxyFetch(),
369
+ });
370
+ const embeddingModel = customOpenAI.textEmbeddingModel(embeddingModelName);
371
+ const result = await aiEmbedMany({ model: embeddingModel, values: texts });
372
+ return result.embeddings;
373
+ }
272
374
  /**
273
375
  * Get available models from LiteLLM proxy server
274
376
  * Dynamically fetches from /v1/models endpoint with caching and fallback
@@ -6,6 +6,9 @@ import { MCPToolRegistry } from "../mcp/toolRegistry.js";
6
6
  import type { HITLConfig } from "../types/hitlTypes.js";
7
7
  import type { ConversationMemoryConfig } from "./conversation.js";
8
8
  import type { ObservabilityConfig } from "./observability.js";
9
+ import type { RoutingStrategy } from "../mcp/routing/index.js";
10
+ import type { CacheStrategy } from "../mcp/caching/index.js";
11
+ import type { ToolMiddleware } from "../mcp/toolIntegration.js";
9
12
  /**
10
13
  * Main NeuroLink configuration type
11
14
  */
@@ -28,6 +31,59 @@ export type NeurolinkConstructorConfig = {
28
31
  toolRegistry?: MCPToolRegistry;
29
32
  observability?: ObservabilityConfig;
30
33
  modelAliasConfig?: import("./generateTypes.js").ModelAliasConfig;
34
+ /** MCP enhancement modules configuration (cache, router, batcher, annotations, middleware) */
35
+ mcp?: MCPEnhancementsConfig;
36
+ };
37
+ /**
38
+ * Configuration for MCP enhancement modules wired into generate()/stream() paths.
39
+ *
40
+ * These modules are automatically applied during tool execution when configured:
41
+ * - cache: Tool result caching (disabled by default)
42
+ * - annotations: Auto-infer tool safety metadata (enabled by default)
43
+ * - router: Multi-server tool routing (auto-activates with 2+ servers)
44
+ * - batcher: Batch programmatic tool calls (disabled by default)
45
+ * - discovery: Enhanced tool search and filtering (enabled by default)
46
+ * - middleware: Global tool execution middleware chain (empty by default)
47
+ */
48
+ export type MCPEnhancementsConfig = {
49
+ /** Tool result caching. Default: disabled. Enable to cache read-only tool results. */
50
+ cache?: {
51
+ enabled?: boolean;
52
+ /** Cache TTL in milliseconds. Default: 300000 (5 min) */
53
+ ttl?: number;
54
+ /** Maximum cache entries. Default: 500 */
55
+ maxSize?: number;
56
+ /** Eviction strategy. Default: 'lru' */
57
+ strategy?: CacheStrategy;
58
+ };
59
+ /** Tool annotation auto-inference. Default: enabled. */
60
+ annotations?: {
61
+ enabled?: boolean;
62
+ /** Auto-infer annotations from tool name/description. Default: true */
63
+ autoInfer?: boolean;
64
+ };
65
+ /** Tool routing for multi-server environments. Auto-activates when 2+ external servers exist. */
66
+ router?: {
67
+ enabled?: boolean;
68
+ /** Routing strategy. Default: 'least-loaded' */
69
+ strategy?: RoutingStrategy;
70
+ /** Enable session affinity. Default: false */
71
+ enableAffinity?: boolean;
72
+ };
73
+ /** Request batching for programmatic executeTool() calls. Default: disabled. */
74
+ batcher?: {
75
+ enabled?: boolean;
76
+ /** Max requests per batch. Default: 10 */
77
+ maxBatchSize?: number;
78
+ /** Max wait before flushing batch in ms. Default: 100 */
79
+ maxWaitMs?: number;
80
+ };
81
+ /** Enhanced tool discovery. Default: enabled. */
82
+ discovery?: {
83
+ enabled?: boolean;
84
+ };
85
+ /** Global tool middleware applied to every tool execution. Default: empty. */
86
+ middleware?: ToolMiddleware[];
31
87
  };
32
88
  /**
33
89
  * Provider-specific configuration
@@ -33,8 +33,8 @@
33
33
  * - Current time (ISO): `new Date().toISOString()`
34
34
  */
35
35
  import type { Mem0Config } from "../memory/mem0Initializer.js";
36
- import type { Memory } from "../memory/hippocampusInitializer.js";
37
- export type { Memory };
36
+ import type { Memory, CustomStorageConfig } from "../memory/hippocampusInitializer.js";
37
+ export type { Memory, CustomStorageConfig };
38
38
  /**
39
39
  * Configuration for conversation memory feature
40
40
  */
@@ -259,6 +259,8 @@ export type GenerateOptions = {
259
259
  * Default: false (backward compatible — tool schemas are injected into system prompt).
260
260
  */
261
261
  skipToolPromptInjection?: boolean;
262
+ /** Disable tool result caching for this request (overrides global mcp.cache.enabled) */
263
+ disableToolCache?: boolean;
262
264
  /** Maximum number of tool execution steps (default: 200) */
263
265
  maxSteps?: number;
264
266
  /**
@@ -662,6 +664,8 @@ export type TextGenerationOptions = {
662
664
  toolFilter?: string[];
663
665
  /** Exclude these tools by name (blacklist). Applied after toolFilter. */
664
666
  excludeTools?: string[];
667
+ /** Disable tool result caching for this request (overrides global mcp.cache.enabled) */
668
+ disableToolCache?: boolean;
665
669
  /**
666
670
  * Tool choice configuration for the generation.
667
671
  * Controls whether and which tools the model must call.
@@ -4,7 +4,7 @@
4
4
  export { AIProviderName } from "../constants/enums.js";
5
5
  export * from "./cli.js";
6
6
  export * from "./common.js";
7
- export type { AnalyticsConfig, BackupInfo, BackupMetadata, CacheConfig, ConfigUpdateOptions, ConfigValidationResult, FallbackConfig, NeuroLinkConfig, PerformanceConfig, RetryConfig, ToolConfig, } from "./configTypes.js";
7
+ export type { AnalyticsConfig, BackupInfo, BackupMetadata, CacheConfig, ConfigUpdateOptions, ConfigValidationResult, FallbackConfig, MCPEnhancementsConfig, NeuroLinkConfig, PerformanceConfig, RetryConfig, ToolConfig, } from "./configTypes.js";
8
8
  export type { ExternalMCPConfigValidation, ExternalMCPManagerConfig, ExternalMCPOperationResult, ExternalMCPServerEvents, ExternalMCPServerHealth, ExternalMCPServerInstance, ExternalMCPServerStatus, ExternalMCPToolContext, ExternalMCPToolInfo, ExternalMCPToolResult, } from "./externalMcp.js";
9
9
  export type { AuthorizationUrlResult, CircuitBreakerConfig, CircuitBreakerEvents, CircuitBreakerState, CircuitBreakerStats, DiscoveredMcp, ExternalToolExecutionOptions, FlexibleValidationResult, HTTPRetryConfig, MCPClientResult, MCPConnectedServer, MCPDiscoveredServer, MCPExecutableTool, MCPOAuthConfig, MCPServerCategory, MCPServerConfig, MCPServerConnectionStatus, MCPServerMetadata, MCPServerRegistryEntry, MCPServerStatus, MCPToolInfo, MCPToolMetadata, MCPTransportType, McpMetadata, McpRegistry, NeuroLinkExecutionContext, NeuroLinkMCPServer, NeuroLinkMCPTool, OAuthClientInformation, OAuthTokens as McpOAuthTokens, RateLimitConfig, TokenBucketRateLimitConfig, TokenExchangeRequest, TokenStorage, ToolDiscoveryResult, ToolRegistryEvents, ToolValidationResult, } from "./mcpTypes.js";
10
10
  export type { ModelCapability, ModelFilter, ModelPricing, ModelResolutionContext, ModelStats, ModelUseCase, } from "./providers.js";
@@ -38,4 +38,5 @@ export * from "./contextTypes.js";
38
38
  export * from "./fileReferenceTypes.js";
39
39
  export * from "./ragTypes.js";
40
40
  export * from "./conversationMemoryInterface.js";
41
+ export type { CustomStorageConfig } from "./conversation.js";
41
42
  export * from "./subscriptionTypes.js";
@@ -329,6 +329,8 @@ export type StreamOptions = {
329
329
  toolFilter?: string[];
330
330
  /** Exclude these tools by name (blacklist). Applied after toolFilter. */
331
331
  excludeTools?: string[];
332
+ /** Disable tool result caching for this request (overrides global mcp.cache.enabled) */
333
+ disableToolCache?: boolean;
332
334
  /**
333
335
  * Skip injecting tool schemas into the system prompt.
334
336
  * When true, tools are ONLY passed natively via the provider's `tools` parameter,
@@ -75,6 +75,8 @@ export type ToolInfo = {
75
75
  serverId?: string;
76
76
  inputSchema?: StandardRecord;
77
77
  outputSchema?: StandardRecord;
78
+ /** MCP tool annotations (safety hints, metadata). Auto-inferred when mcp.annotations.autoInfer is enabled. */
79
+ annotations?: import("../mcp/toolAnnotations.js").MCPToolAnnotations;
78
80
  [key: string]: unknown;
79
81
  };
80
82
  /**
@@ -9,69 +9,198 @@
9
9
  * (Bedrock, Azure, Mistral, etc.) will return 0 from calculateCost().
10
10
  */
11
11
  const PRICING = {
12
- // Anthropic (direct API)
12
+ // Anthropic (direct API) — updated March 2026
13
13
  anthropic: {
14
+ // Claude 4.6 family
15
+ "claude-opus-4-6": {
16
+ input: 5.0 / 1_000_000,
17
+ output: 25.0 / 1_000_000,
18
+ cacheRead: 0.5 / 1_000_000,
19
+ cacheCreation: 6.25 / 1_000_000,
20
+ },
21
+ "claude-sonnet-4-6": {
22
+ input: 3.0 / 1_000_000,
23
+ output: 15.0 / 1_000_000,
24
+ cacheRead: 0.3 / 1_000_000,
25
+ cacheCreation: 3.75 / 1_000_000,
26
+ },
27
+ // Claude 4.5 family
14
28
  "claude-sonnet-4-5-20250929": {
15
29
  input: 3.0 / 1_000_000,
16
30
  output: 15.0 / 1_000_000,
17
31
  cacheRead: 0.3 / 1_000_000,
18
32
  cacheCreation: 3.75 / 1_000_000,
19
33
  },
20
- "claude-opus-4-6": {
34
+ "claude-opus-4-5": {
35
+ input: 5.0 / 1_000_000,
36
+ output: 25.0 / 1_000_000,
37
+ cacheRead: 0.5 / 1_000_000,
38
+ cacheCreation: 6.25 / 1_000_000,
39
+ },
40
+ "claude-haiku-4-5-20251001": {
41
+ input: 1.0 / 1_000_000,
42
+ output: 5.0 / 1_000_000,
43
+ cacheRead: 0.1 / 1_000_000,
44
+ cacheCreation: 1.25 / 1_000_000,
45
+ },
46
+ // Claude 4.0/4.1 family
47
+ "claude-opus-4-1": {
21
48
  input: 15.0 / 1_000_000,
22
49
  output: 75.0 / 1_000_000,
23
50
  cacheRead: 1.5 / 1_000_000,
24
51
  cacheCreation: 18.75 / 1_000_000,
25
52
  },
26
- "claude-haiku-4-5-20251001": {
53
+ "claude-opus-4": {
54
+ input: 15.0 / 1_000_000,
55
+ output: 75.0 / 1_000_000,
56
+ cacheRead: 1.5 / 1_000_000,
57
+ cacheCreation: 18.75 / 1_000_000,
58
+ },
59
+ "claude-sonnet-4": {
60
+ input: 3.0 / 1_000_000,
61
+ output: 15.0 / 1_000_000,
62
+ cacheRead: 0.3 / 1_000_000,
63
+ cacheCreation: 3.75 / 1_000_000,
64
+ },
65
+ // Claude 3.x family
66
+ "claude-3-7-sonnet": {
67
+ input: 3.0 / 1_000_000,
68
+ output: 15.0 / 1_000_000,
69
+ cacheRead: 0.3 / 1_000_000,
70
+ cacheCreation: 3.75 / 1_000_000,
71
+ },
72
+ "claude-3-5-sonnet": {
73
+ input: 3.0 / 1_000_000,
74
+ output: 15.0 / 1_000_000,
75
+ cacheRead: 0.3 / 1_000_000,
76
+ cacheCreation: 3.75 / 1_000_000,
77
+ },
78
+ "claude-3-5-haiku": {
27
79
  input: 0.8 / 1_000_000,
28
80
  output: 4.0 / 1_000_000,
29
81
  cacheRead: 0.08 / 1_000_000,
30
82
  cacheCreation: 1.0 / 1_000_000,
31
83
  },
84
+ "claude-3-opus": {
85
+ input: 15.0 / 1_000_000,
86
+ output: 75.0 / 1_000_000,
87
+ cacheRead: 1.5 / 1_000_000,
88
+ cacheCreation: 18.75 / 1_000_000,
89
+ },
90
+ "claude-3-sonnet": { input: 3.0 / 1_000_000, output: 15.0 / 1_000_000 },
91
+ "claude-3-haiku": { input: 0.25 / 1_000_000, output: 1.25 / 1_000_000 },
32
92
  },
33
- // Google Vertex AI (same models, same pricing)
93
+ // Google Vertex AI — Claude models on Vertex (same pricing, @ date suffix)
34
94
  vertex: {
35
- "claude-sonnet-4-5@20250929": {
95
+ "claude-sonnet-4-6": {
36
96
  input: 3.0 / 1_000_000,
37
97
  output: 15.0 / 1_000_000,
38
98
  cacheRead: 0.3 / 1_000_000,
39
99
  cacheCreation: 3.75 / 1_000_000,
40
100
  },
41
101
  "claude-opus-4-6": {
42
- input: 15.0 / 1_000_000,
43
- output: 75.0 / 1_000_000,
44
- cacheRead: 1.5 / 1_000_000,
45
- cacheCreation: 18.75 / 1_000_000,
102
+ input: 5.0 / 1_000_000,
103
+ output: 25.0 / 1_000_000,
104
+ cacheRead: 0.5 / 1_000_000,
105
+ cacheCreation: 6.25 / 1_000_000,
46
106
  },
47
- "claude-haiku-4-5@20251001": {
107
+ "claude-sonnet-4-5": {
108
+ input: 3.0 / 1_000_000,
109
+ output: 15.0 / 1_000_000,
110
+ cacheRead: 0.3 / 1_000_000,
111
+ cacheCreation: 3.75 / 1_000_000,
112
+ },
113
+ "claude-opus-4-5": {
114
+ input: 5.0 / 1_000_000,
115
+ output: 25.0 / 1_000_000,
116
+ cacheRead: 0.5 / 1_000_000,
117
+ cacheCreation: 6.25 / 1_000_000,
118
+ },
119
+ "claude-haiku-4-5": {
120
+ input: 1.0 / 1_000_000,
121
+ output: 5.0 / 1_000_000,
122
+ cacheRead: 0.1 / 1_000_000,
123
+ cacheCreation: 1.25 / 1_000_000,
124
+ },
125
+ "claude-3-5-haiku": {
48
126
  input: 0.8 / 1_000_000,
49
127
  output: 4.0 / 1_000_000,
50
128
  cacheRead: 0.08 / 1_000_000,
51
129
  cacheCreation: 1.0 / 1_000_000,
52
130
  },
131
+ "claude-3-5-sonnet": {
132
+ input: 3.0 / 1_000_000,
133
+ output: 15.0 / 1_000_000,
134
+ cacheRead: 0.3 / 1_000_000,
135
+ cacheCreation: 3.75 / 1_000_000,
136
+ },
53
137
  },
54
- // OpenAI
138
+ // OpenAI — updated March 2026
55
139
  openai: {
140
+ // GPT-5.x family
141
+ "gpt-5.4": { input: 2.5 / 1_000_000, output: 15.0 / 1_000_000 },
142
+ "gpt-5.2": { input: 1.75 / 1_000_000, output: 14.0 / 1_000_000 },
143
+ "gpt-5.1": { input: 0.625 / 1_000_000, output: 5.0 / 1_000_000 },
144
+ "gpt-5.1-codex": { input: 1.25 / 1_000_000, output: 10.0 / 1_000_000 },
145
+ "gpt-5": { input: 1.25 / 1_000_000, output: 10.0 / 1_000_000 },
146
+ "gpt-5-mini": { input: 0.25 / 1_000_000, output: 2.0 / 1_000_000 },
147
+ "gpt-5-nano": { input: 0.05 / 1_000_000, output: 0.4 / 1_000_000 },
148
+ // GPT-4.1 family
149
+ "gpt-4.1": { input: 2.0 / 1_000_000, output: 8.0 / 1_000_000 },
150
+ "gpt-4.1-mini": { input: 0.4 / 1_000_000, output: 1.6 / 1_000_000 },
151
+ "gpt-4.1-nano": { input: 0.1 / 1_000_000, output: 0.4 / 1_000_000 },
152
+ // GPT-4o family
56
153
  "gpt-4o": { input: 2.5 / 1_000_000, output: 10.0 / 1_000_000 },
57
154
  "gpt-4o-mini": { input: 0.15 / 1_000_000, output: 0.6 / 1_000_000 },
58
- "gpt-4-turbo": { input: 10.0 / 1_000_000, output: 30.0 / 1_000_000 },
155
+ // o-series reasoning
156
+ o3: { input: 2.0 / 1_000_000, output: 8.0 / 1_000_000 },
157
+ "o3-mini": { input: 1.1 / 1_000_000, output: 4.4 / 1_000_000 },
158
+ "o4-mini": { input: 1.1 / 1_000_000, output: 4.4 / 1_000_000 },
59
159
  o1: { input: 15.0 / 1_000_000, output: 60.0 / 1_000_000 },
60
- "o1-mini": { input: 1.1 / 1_000_000, output: 4.4 / 1_000_000 },
160
+ "o1-mini": { input: 0.55 / 1_000_000, output: 2.2 / 1_000_000 },
161
+ // Legacy
162
+ "gpt-4-turbo": { input: 10.0 / 1_000_000, output: 30.0 / 1_000_000 },
163
+ "gpt-4": { input: 30.0 / 1_000_000, output: 60.0 / 1_000_000 },
164
+ "gpt-3.5-turbo": { input: 0.5 / 1_000_000, output: 1.0 / 1_000_000 },
61
165
  },
62
- // Google (Gemini)
166
+ // Google (Gemini) — updated March 2026
63
167
  google: {
64
- "gemini-2.5-flash": { input: 0.15 / 1_000_000, output: 0.6 / 1_000_000 },
168
+ // Gemini 3.x family
169
+ "gemini-3.1-pro": { input: 2.0 / 1_000_000, output: 12.0 / 1_000_000 },
170
+ "gemini-3-pro": { input: 2.0 / 1_000_000, output: 12.0 / 1_000_000 },
171
+ "gemini-3-flash": { input: 0.5 / 1_000_000, output: 3.0 / 1_000_000 },
172
+ "gemini-3.1-flash-lite": {
173
+ input: 0.25 / 1_000_000,
174
+ output: 1.5 / 1_000_000,
175
+ },
176
+ // Gemini 2.5 family
177
+ "gemini-2.5-flash": { input: 0.3 / 1_000_000, output: 2.5 / 1_000_000 },
65
178
  "gemini-2.5-pro": { input: 1.25 / 1_000_000, output: 10.0 / 1_000_000 },
66
179
  "gemini-2.5-flash-lite": {
180
+ input: 0.1 / 1_000_000,
181
+ output: 0.4 / 1_000_000,
182
+ },
183
+ // Gemini 2.0 family (deprecated June 2026)
184
+ "gemini-2.0-flash": { input: 0.15 / 1_000_000, output: 0.6 / 1_000_000 },
185
+ "gemini-2.0-flash-lite": {
67
186
  input: 0.075 / 1_000_000,
68
187
  output: 0.3 / 1_000_000,
69
188
  },
70
- "gemini-2.0-flash": { input: 0.1 / 1_000_000, output: 0.4 / 1_000_000 },
71
- "gemini-2.0-pro": { input: 1.25 / 1_000_000, output: 10.0 / 1_000_000 },
189
+ // Gemini 1.5 family
72
190
  "gemini-1.5-pro": { input: 1.25 / 1_000_000, output: 5.0 / 1_000_000 },
73
191
  "gemini-1.5-flash": { input: 0.075 / 1_000_000, output: 0.3 / 1_000_000 },
74
192
  },
193
+ // Mistral AI
194
+ mistral: {
195
+ "mistral-large": { input: 2.0 / 1_000_000, output: 6.0 / 1_000_000 },
196
+ "mistral-medium": { input: 2.7 / 1_000_000, output: 8.1 / 1_000_000 },
197
+ "mistral-small": { input: 0.2 / 1_000_000, output: 0.6 / 1_000_000 },
198
+ codestral: { input: 0.3 / 1_000_000, output: 0.9 / 1_000_000 },
199
+ "open-mistral-nemo": {
200
+ input: 0.15 / 1_000_000,
201
+ output: 0.15 / 1_000_000,
202
+ },
203
+ },
75
204
  };
76
205
  /**
77
206
  * Map of normalized provider aliases to canonical PRICING keys.
@@ -80,10 +209,20 @@ const PRICING = {
80
209
  const PROVIDER_ALIASES = {
81
210
  googleai: "google",
82
211
  googleaistudio: "google",
212
+ googlevertex: "vertex",
83
213
  anthropic: "anthropic",
84
214
  openai: "openai",
85
215
  vertex: "vertex",
86
216
  google: "google",
217
+ mistral: "mistral",
218
+ mistralai: "mistral",
219
+ azure: "openai",
220
+ azureopenai: "openai",
221
+ bedrock: "anthropic",
222
+ amazonbedrock: "anthropic",
223
+ litellm: "__cross_provider__",
224
+ openrouter: "__cross_provider__",
225
+ openaicompatible: "__cross_provider__",
87
226
  };
88
227
  /**
89
228
  * Look up per-token rates for a provider/model combination.
@@ -96,6 +235,27 @@ const PROVIDER_ALIASES = {
96
235
  function findRates(provider, model) {
97
236
  const stripped = provider.toLowerCase().replace(/[^a-z]/g, "");
98
237
  const normalizedProvider = PROVIDER_ALIASES[stripped] ?? stripped;
238
+ // Proxy providers (LiteLLM, OpenRouter): search all known providers for a model match
239
+ if (normalizedProvider === "__cross_provider__") {
240
+ for (const providerPricing of Object.values(PRICING)) {
241
+ // Exact match
242
+ if (providerPricing[model]) {
243
+ return providerPricing[model];
244
+ }
245
+ const sortedKeys = Object.keys(providerPricing).sort((a, b) => b.length - a.length);
246
+ // model is a prefix of a known key (e.g. "claude-sonnet-4-5" matches "claude-sonnet-4-5-20250929")
247
+ const reverseKey = sortedKeys.find((k) => k.startsWith(model));
248
+ if (reverseKey) {
249
+ return providerPricing[reverseKey];
250
+ }
251
+ // Known key is a prefix of model (e.g. "gpt-4o" matches "gpt-4o-2024-08-06")
252
+ const forwardKey = sortedKeys.find((k) => model.startsWith(k));
253
+ if (forwardKey) {
254
+ return providerPricing[forwardKey];
255
+ }
256
+ }
257
+ return undefined;
258
+ }
99
259
  const providerPricing = PRICING[normalizedProvider] || PRICING[provider];
100
260
  if (!providerPricing) {
101
261
  return undefined;
@@ -12,7 +12,12 @@ import type { ZodUnknownSchema } from "../types/tools.js";
12
12
  */
13
13
  export declare function inlineJsonSchema(schema: Record<string, unknown>, definitions?: Record<string, Record<string, unknown>>, visited?: Set<string>): Record<string, unknown>;
14
14
  /**
15
- * Convert Zod schema to JSON Schema format for Claude AI
15
+ * Convert Zod schema to JSON Schema format for provider APIs.
16
+ *
17
+ * Handles three input types:
18
+ * 1. Zod schemas (have `_def.typeName`) — converted via zod-to-json-schema
19
+ * 2. AI SDK `jsonSchema()` wrappers (have `.jsonSchema` property) — extracted directly
20
+ * 3. Plain JSON Schema objects (have `type`/`properties` but no `_def`) — returned as-is
16
21
  */
17
22
  export declare function convertZodToJsonSchema(zodSchema: ZodUnknownSchema): object;
18
23
  /**