@juspay/neurolink 9.40.0 → 9.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +7 -1
  3. package/dist/auth/anthropicOAuth.d.ts +18 -3
  4. package/dist/auth/anthropicOAuth.js +137 -4
  5. package/dist/auth/providers/firebase.js +5 -1
  6. package/dist/auth/providers/jwt.js +5 -1
  7. package/dist/auth/providers/workos.js +5 -1
  8. package/dist/auth/sessionManager.d.ts +1 -1
  9. package/dist/auth/sessionManager.js +58 -27
  10. package/dist/browser/neurolink.min.js +471 -445
  11. package/dist/cli/commands/mcp.js +3 -0
  12. package/dist/cli/commands/proxy.d.ts +2 -1
  13. package/dist/cli/commands/proxy.js +279 -16
  14. package/dist/cli/commands/task.d.ts +56 -0
  15. package/dist/cli/commands/task.js +838 -0
  16. package/dist/cli/factories/commandFactory.d.ts +2 -0
  17. package/dist/cli/factories/commandFactory.js +38 -0
  18. package/dist/cli/parser.js +8 -4
  19. package/dist/client/aiSdkAdapter.js +3 -0
  20. package/dist/client/streamingClient.js +30 -10
  21. package/dist/core/modules/GenerationHandler.js +3 -2
  22. package/dist/core/redisConversationMemoryManager.js +7 -3
  23. package/dist/evaluation/BatchEvaluator.js +4 -1
  24. package/dist/evaluation/hooks/observabilityHooks.js +5 -3
  25. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  26. package/dist/evaluation/pipeline/evaluationPipeline.js +20 -8
  27. package/dist/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  28. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  29. package/dist/lib/auth/anthropicOAuth.d.ts +18 -3
  30. package/dist/lib/auth/anthropicOAuth.js +137 -4
  31. package/dist/lib/auth/providers/firebase.js +5 -1
  32. package/dist/lib/auth/providers/jwt.js +5 -1
  33. package/dist/lib/auth/providers/workos.js +5 -1
  34. package/dist/lib/auth/sessionManager.d.ts +1 -1
  35. package/dist/lib/auth/sessionManager.js +58 -27
  36. package/dist/lib/client/aiSdkAdapter.js +3 -0
  37. package/dist/lib/client/streamingClient.js +30 -10
  38. package/dist/lib/core/modules/GenerationHandler.js +3 -2
  39. package/dist/lib/core/redisConversationMemoryManager.js +7 -3
  40. package/dist/lib/evaluation/BatchEvaluator.js +4 -1
  41. package/dist/lib/evaluation/hooks/observabilityHooks.js +5 -3
  42. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  43. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +20 -8
  44. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  45. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  46. package/dist/lib/neurolink.d.ts +18 -1
  47. package/dist/lib/neurolink.js +367 -484
  48. package/dist/lib/observability/otelBridge.d.ts +2 -2
  49. package/dist/lib/observability/otelBridge.js +12 -3
  50. package/dist/lib/providers/amazonBedrock.js +2 -4
  51. package/dist/lib/providers/anthropic.d.ts +9 -5
  52. package/dist/lib/providers/anthropic.js +19 -14
  53. package/dist/lib/providers/anthropicBaseProvider.d.ts +3 -3
  54. package/dist/lib/providers/anthropicBaseProvider.js +5 -4
  55. package/dist/lib/providers/azureOpenai.d.ts +1 -1
  56. package/dist/lib/providers/azureOpenai.js +5 -4
  57. package/dist/lib/providers/googleAiStudio.js +30 -1
  58. package/dist/lib/providers/googleVertex.js +28 -6
  59. package/dist/lib/providers/huggingFace.d.ts +3 -3
  60. package/dist/lib/providers/huggingFace.js +6 -8
  61. package/dist/lib/providers/litellm.js +41 -29
  62. package/dist/lib/providers/mistral.js +2 -1
  63. package/dist/lib/providers/ollama.js +80 -23
  64. package/dist/lib/providers/openAI.js +3 -2
  65. package/dist/lib/providers/openRouter.js +2 -1
  66. package/dist/lib/providers/openaiCompatible.d.ts +4 -4
  67. package/dist/lib/providers/openaiCompatible.js +4 -4
  68. package/dist/lib/proxy/claudeFormat.d.ts +3 -2
  69. package/dist/lib/proxy/claudeFormat.js +25 -20
  70. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  71. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  72. package/dist/lib/proxy/modelRouter.js +3 -0
  73. package/dist/lib/proxy/oauthFetch.d.ts +1 -1
  74. package/dist/lib/proxy/oauthFetch.js +65 -72
  75. package/dist/lib/proxy/proxyConfig.js +44 -24
  76. package/dist/lib/proxy/proxyEnv.d.ts +19 -0
  77. package/dist/lib/proxy/proxyEnv.js +73 -0
  78. package/dist/lib/proxy/proxyFetch.js +50 -4
  79. package/dist/lib/proxy/proxyTracer.d.ts +133 -0
  80. package/dist/lib/proxy/proxyTracer.js +645 -0
  81. package/dist/lib/proxy/rawStreamCapture.d.ts +10 -0
  82. package/dist/lib/proxy/rawStreamCapture.js +83 -0
  83. package/dist/lib/proxy/requestLogger.d.ts +32 -5
  84. package/dist/lib/proxy/requestLogger.js +406 -37
  85. package/dist/lib/proxy/sseInterceptor.d.ts +97 -0
  86. package/dist/lib/proxy/sseInterceptor.js +402 -0
  87. package/dist/lib/proxy/usageStats.d.ts +4 -3
  88. package/dist/lib/proxy/usageStats.js +25 -12
  89. package/dist/lib/rag/chunkers/MarkdownChunker.js +13 -5
  90. package/dist/lib/rag/chunking/markdownChunker.js +15 -6
  91. package/dist/lib/server/routes/claudeProxyRoutes.d.ts +7 -2
  92. package/dist/lib/server/routes/claudeProxyRoutes.js +1737 -508
  93. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +7 -1
  94. package/dist/lib/services/server/ai/observability/instrumentation.js +240 -40
  95. package/dist/lib/tasks/backends/bullmqBackend.d.ts +33 -0
  96. package/dist/lib/tasks/backends/bullmqBackend.js +196 -0
  97. package/dist/lib/tasks/backends/nodeTimeoutBackend.d.ts +27 -0
  98. package/dist/lib/tasks/backends/nodeTimeoutBackend.js +141 -0
  99. package/dist/lib/tasks/backends/taskBackendRegistry.d.ts +31 -0
  100. package/dist/lib/tasks/backends/taskBackendRegistry.js +66 -0
  101. package/dist/lib/tasks/errors.d.ts +31 -0
  102. package/dist/lib/tasks/errors.js +18 -0
  103. package/dist/lib/tasks/store/fileTaskStore.d.ts +43 -0
  104. package/dist/lib/tasks/store/fileTaskStore.js +179 -0
  105. package/dist/lib/tasks/store/redisTaskStore.d.ts +43 -0
  106. package/dist/lib/tasks/store/redisTaskStore.js +197 -0
  107. package/dist/lib/tasks/taskExecutor.d.ts +21 -0
  108. package/dist/lib/tasks/taskExecutor.js +166 -0
  109. package/dist/lib/tasks/taskManager.d.ts +63 -0
  110. package/dist/lib/tasks/taskManager.js +426 -0
  111. package/dist/lib/tasks/tools/taskTools.d.ts +135 -0
  112. package/dist/lib/tasks/tools/taskTools.js +274 -0
  113. package/dist/lib/telemetry/index.d.ts +2 -1
  114. package/dist/lib/telemetry/index.js +2 -1
  115. package/dist/lib/telemetry/telemetryService.d.ts +3 -0
  116. package/dist/lib/telemetry/telemetryService.js +65 -5
  117. package/dist/lib/types/cli.d.ts +10 -0
  118. package/dist/lib/types/configTypes.d.ts +3 -0
  119. package/dist/lib/types/generateTypes.d.ts +13 -0
  120. package/dist/lib/types/index.d.ts +1 -0
  121. package/dist/lib/types/proxyTypes.d.ts +37 -5
  122. package/dist/lib/types/streamTypes.d.ts +25 -3
  123. package/dist/lib/types/taskTypes.d.ts +275 -0
  124. package/dist/lib/types/taskTypes.js +37 -0
  125. package/dist/lib/utils/messageBuilder.js +3 -2
  126. package/dist/lib/utils/providerHealth.d.ts +18 -0
  127. package/dist/lib/utils/providerHealth.js +240 -9
  128. package/dist/lib/utils/providerUtils.js +14 -8
  129. package/dist/lib/utils/toolChoice.d.ts +4 -0
  130. package/dist/lib/utils/toolChoice.js +7 -0
  131. package/dist/neurolink.d.ts +18 -1
  132. package/dist/neurolink.js +367 -484
  133. package/dist/observability/otelBridge.d.ts +2 -2
  134. package/dist/observability/otelBridge.js +12 -3
  135. package/dist/providers/amazonBedrock.js +2 -4
  136. package/dist/providers/anthropic.d.ts +9 -5
  137. package/dist/providers/anthropic.js +19 -14
  138. package/dist/providers/anthropicBaseProvider.d.ts +3 -3
  139. package/dist/providers/anthropicBaseProvider.js +5 -4
  140. package/dist/providers/azureOpenai.d.ts +1 -1
  141. package/dist/providers/azureOpenai.js +5 -4
  142. package/dist/providers/googleAiStudio.js +30 -1
  143. package/dist/providers/googleVertex.js +28 -6
  144. package/dist/providers/huggingFace.d.ts +3 -3
  145. package/dist/providers/huggingFace.js +6 -7
  146. package/dist/providers/litellm.js +41 -29
  147. package/dist/providers/mistral.js +2 -1
  148. package/dist/providers/ollama.js +80 -23
  149. package/dist/providers/openAI.js +3 -2
  150. package/dist/providers/openRouter.js +2 -1
  151. package/dist/providers/openaiCompatible.d.ts +4 -4
  152. package/dist/providers/openaiCompatible.js +4 -3
  153. package/dist/proxy/claudeFormat.d.ts +3 -2
  154. package/dist/proxy/claudeFormat.js +25 -20
  155. package/dist/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  156. package/dist/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  157. package/dist/proxy/modelRouter.js +3 -0
  158. package/dist/proxy/oauthFetch.d.ts +1 -1
  159. package/dist/proxy/oauthFetch.js +65 -72
  160. package/dist/proxy/proxyConfig.js +44 -24
  161. package/dist/proxy/proxyEnv.d.ts +19 -0
  162. package/dist/proxy/proxyEnv.js +72 -0
  163. package/dist/proxy/proxyFetch.js +50 -4
  164. package/dist/proxy/proxyTracer.d.ts +133 -0
  165. package/dist/proxy/proxyTracer.js +644 -0
  166. package/dist/proxy/rawStreamCapture.d.ts +10 -0
  167. package/dist/proxy/rawStreamCapture.js +82 -0
  168. package/dist/proxy/requestLogger.d.ts +32 -5
  169. package/dist/proxy/requestLogger.js +406 -37
  170. package/dist/proxy/sseInterceptor.d.ts +97 -0
  171. package/dist/proxy/sseInterceptor.js +401 -0
  172. package/dist/proxy/usageStats.d.ts +4 -3
  173. package/dist/proxy/usageStats.js +25 -12
  174. package/dist/rag/chunkers/MarkdownChunker.js +13 -5
  175. package/dist/rag/chunking/markdownChunker.js +15 -6
  176. package/dist/server/routes/claudeProxyRoutes.d.ts +7 -2
  177. package/dist/server/routes/claudeProxyRoutes.js +1737 -508
  178. package/dist/services/server/ai/observability/instrumentation.d.ts +7 -1
  179. package/dist/services/server/ai/observability/instrumentation.js +240 -40
  180. package/dist/tasks/backends/bullmqBackend.d.ts +33 -0
  181. package/dist/tasks/backends/bullmqBackend.js +195 -0
  182. package/dist/tasks/backends/nodeTimeoutBackend.d.ts +27 -0
  183. package/dist/tasks/backends/nodeTimeoutBackend.js +140 -0
  184. package/dist/tasks/backends/taskBackendRegistry.d.ts +31 -0
  185. package/dist/tasks/backends/taskBackendRegistry.js +65 -0
  186. package/dist/tasks/errors.d.ts +31 -0
  187. package/dist/tasks/errors.js +17 -0
  188. package/dist/tasks/store/fileTaskStore.d.ts +43 -0
  189. package/dist/tasks/store/fileTaskStore.js +178 -0
  190. package/dist/tasks/store/redisTaskStore.d.ts +43 -0
  191. package/dist/tasks/store/redisTaskStore.js +196 -0
  192. package/dist/tasks/taskExecutor.d.ts +21 -0
  193. package/dist/tasks/taskExecutor.js +165 -0
  194. package/dist/tasks/taskManager.d.ts +63 -0
  195. package/dist/tasks/taskManager.js +425 -0
  196. package/dist/tasks/tools/taskTools.d.ts +135 -0
  197. package/dist/tasks/tools/taskTools.js +273 -0
  198. package/dist/telemetry/index.d.ts +2 -1
  199. package/dist/telemetry/index.js +2 -1
  200. package/dist/telemetry/telemetryService.d.ts +3 -0
  201. package/dist/telemetry/telemetryService.js +65 -5
  202. package/dist/types/cli.d.ts +10 -0
  203. package/dist/types/configTypes.d.ts +3 -0
  204. package/dist/types/generateTypes.d.ts +13 -0
  205. package/dist/types/index.d.ts +1 -0
  206. package/dist/types/proxyTypes.d.ts +37 -5
  207. package/dist/types/streamTypes.d.ts +25 -3
  208. package/dist/types/taskTypes.d.ts +275 -0
  209. package/dist/types/taskTypes.js +36 -0
  210. package/dist/utils/messageBuilder.js +3 -2
  211. package/dist/utils/providerHealth.d.ts +18 -0
  212. package/dist/utils/providerHealth.js +240 -9
  213. package/dist/utils/providerUtils.js +14 -8
  214. package/dist/utils/toolChoice.d.ts +4 -0
  215. package/dist/utils/toolChoice.js +6 -0
  216. package/docs/assets/dashboards/neurolink-proxy-observability-dashboard.json +6609 -0
  217. package/docs/changelog.md +252 -0
  218. package/package.json +19 -1
  219. package/scripts/observability/check-proxy-telemetry.mjs +235 -0
  220. package/scripts/observability/docker-compose.proxy-observability.yaml +55 -0
  221. package/scripts/observability/import-openobserve-dashboard.mjs +240 -0
  222. package/scripts/observability/manage-local-openobserve.sh +184 -0
  223. package/scripts/observability/otel-collector.proxy-observability.yaml +78 -0
  224. package/scripts/observability/proxy-observability.env.example +23 -0
@@ -11,7 +11,7 @@ import { InvalidModelError, NetworkError, ProviderError, } from "../types/errors
11
11
  import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
12
12
  import { TimeoutError } from "../utils/timeout.js";
13
13
  // Model version constants (configurable via environment)
14
- const DEFAULT_OLLAMA_MODEL = "llama3.1:8b";
14
+ const DEFAULT_OLLAMA_MODEL = process.env.OLLAMA_MODEL || "llama3.1:8b";
15
15
  const FALLBACK_OLLAMA_MODEL = "llama3.2:latest"; // Used when primary model fails
16
16
  // Configuration helpers
17
17
  const getOllamaBaseUrl = () => {
@@ -40,6 +40,17 @@ const getOllamaTimeout = () => {
40
40
  // especially for larger models like aliafshar/gemma3-it-qat-tools:latest (12.2B parameters)
41
41
  return parseInt(process.env.OLLAMA_TIMEOUT || "240000", 10);
42
42
  };
43
+ async function createOllamaHttpError(response) {
44
+ let responseBody = "";
45
+ try {
46
+ responseBody = (await response.text()).trim();
47
+ }
48
+ catch {
49
+ // Ignore unreadable bodies
50
+ }
51
+ const suffix = responseBody ? ` - ${responseBody.slice(0, 500)}` : "";
52
+ return new Error(`Ollama API error: ${response.status} ${response.statusText}${suffix}`);
53
+ }
43
54
  // Create proxy-aware fetch instance
44
55
  const proxyFetch = createProxyFetch();
45
56
  // Custom LanguageModel implementation for Ollama
@@ -110,21 +121,37 @@ class OllamaLanguageModel {
110
121
  signal: createAbortSignalWithTimeout(this.timeout),
111
122
  });
112
123
  if (!response.ok) {
113
- throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
124
+ throw await createOllamaHttpError(response);
114
125
  }
115
126
  const data = await response.json();
116
127
  logger.debug("[OllamaLanguageModel] OpenAI API Response:", JSON.stringify(data, null, 2));
117
128
  const text = data.choices?.[0]?.message?.content || "";
118
129
  const usage = data.usage || {};
130
+ const promptTokens = usage.prompt_tokens ??
131
+ this.estimateTokenCount(JSON.stringify(messages));
132
+ const completionTokens = usage.completion_tokens ?? this.estimateTokenCount(text);
119
133
  return {
134
+ content: text ? [{ type: "text", text }] : [],
120
135
  text,
121
136
  usage: {
122
- promptTokens: usage.prompt_tokens ??
123
- this.estimateTokenCount(JSON.stringify(messages)),
124
- completionTokens: usage.completion_tokens ?? this.estimateTokenCount(text),
125
- totalTokens: usage.total_tokens,
137
+ inputTokens: promptTokens,
138
+ outputTokens: completionTokens,
139
+ promptTokens,
140
+ completionTokens,
141
+ totalTokens: usage.total_tokens ?? promptTokens + completionTokens,
142
+ },
143
+ finishReason: data.choices?.[0]?.finish_reason ?? "stop",
144
+ warnings: [],
145
+ request: {
146
+ body: JSON.stringify(requestBody),
147
+ },
148
+ response: {
149
+ id: data.id,
150
+ modelId: data.model,
151
+ timestamp: new Date(),
152
+ headers: {},
153
+ body: data,
126
154
  },
127
- finishReason: "stop",
128
155
  rawCall: {
129
156
  rawPrompt: messages,
130
157
  rawSettings: {
@@ -158,21 +185,45 @@ class OllamaLanguageModel {
158
185
  signal: createAbortSignalWithTimeout(this.timeout),
159
186
  });
160
187
  if (!response.ok) {
161
- throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
188
+ throw await createOllamaHttpError(response);
162
189
  }
163
190
  const data = await response.json();
164
191
  logger.debug("[OllamaLanguageModel] Native API Response:", JSON.stringify(data, null, 2));
192
+ const text = String(data.response ?? "");
193
+ const promptTokens = data.prompt_eval_count ?? this.estimateTokenCount(prompt);
194
+ const completionTokens = data.eval_count ?? this.estimateTokenCount(text);
195
+ const requestBody = {
196
+ model: this.modelId,
197
+ prompt,
198
+ stream: false,
199
+ system: messages.find((m) => m.role === "system")?.content,
200
+ options: {
201
+ temperature: options.temperature,
202
+ num_predict: options.maxTokens,
203
+ },
204
+ };
165
205
  return {
166
- text: data.response,
206
+ content: text ? [{ type: "text", text }] : [],
207
+ text,
167
208
  usage: {
168
- promptTokens: data.prompt_eval_count ?? this.estimateTokenCount(prompt),
169
- completionTokens: data.eval_count ??
170
- this.estimateTokenCount(String(data.response ?? "")),
171
- totalTokens: (data.prompt_eval_count ?? this.estimateTokenCount(prompt)) +
172
- (data.eval_count ??
173
- this.estimateTokenCount(String(data.response ?? ""))),
209
+ inputTokens: promptTokens,
210
+ outputTokens: completionTokens,
211
+ promptTokens,
212
+ completionTokens,
213
+ totalTokens: promptTokens + completionTokens,
214
+ },
215
+ finishReason: data.done_reason ?? "stop",
216
+ warnings: [],
217
+ request: {
218
+ body: JSON.stringify(requestBody),
219
+ },
220
+ response: {
221
+ id: data.created_at,
222
+ modelId: this.modelId,
223
+ timestamp: data.created_at ? new Date(data.created_at) : new Date(),
224
+ headers: {},
225
+ body: data,
174
226
  },
175
- finishReason: "stop",
176
227
  rawCall: {
177
228
  rawPrompt: prompt,
178
229
  rawSettings: {
@@ -220,7 +271,7 @@ class OllamaLanguageModel {
220
271
  ok: response.ok,
221
272
  });
222
273
  if (!response.ok) {
223
- throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
274
+ throw await createOllamaHttpError(response);
224
275
  }
225
276
  const self = this;
226
277
  return {
@@ -282,7 +333,7 @@ class OllamaLanguageModel {
282
333
  ok: response.ok,
283
334
  });
284
335
  if (!response.ok) {
285
- throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
336
+ throw await createOllamaHttpError(response);
286
337
  }
287
338
  const self = this;
288
339
  return {
@@ -705,7 +756,7 @@ export class OllamaProvider extends BaseProvider {
705
756
  signal: createAbortSignalWithTimeout(this.timeout),
706
757
  });
707
758
  if (!response.ok) {
708
- throw this.handleProviderError(new Error(`Ollama API error: ${response.status} ${response.statusText}`));
759
+ throw this.handleProviderError(await createOllamaHttpError(response));
709
760
  }
710
761
  // Process response stream
711
762
  const { content, toolCalls, finishReason } = await this.processOllamaResponse(response, controller);
@@ -870,7 +921,7 @@ export class OllamaProvider extends BaseProvider {
870
921
  ok: response.ok,
871
922
  });
872
923
  if (!response.ok) {
873
- throw this.handleProviderError(new Error(`Ollama API error: ${response.status} ${response.statusText}`));
924
+ throw this.handleProviderError(await createOllamaHttpError(response));
874
925
  }
875
926
  // Transform to async generator for OpenAI-compatible format
876
927
  const self = this;
@@ -936,7 +987,7 @@ export class OllamaProvider extends BaseProvider {
936
987
  ok: response.ok,
937
988
  });
938
989
  if (!response.ok) {
939
- throw this.handleProviderError(new Error(`Ollama API error: ${response.status} ${response.statusText}`));
990
+ throw this.handleProviderError(await createOllamaHttpError(response));
940
991
  }
941
992
  // Transform to async generator to match other providers
942
993
  const self = this;
@@ -1486,8 +1537,14 @@ export class OllamaProvider extends BaseProvider {
1486
1537
  error.message?.includes("not found")) {
1487
1538
  return new InvalidModelError(`❌ Ollama Model Not Found\n\nModel '${this.modelName}' is not available locally.\n\n🔧 Install Model:\n1. Run: ollama pull ${this.modelName}\n2. Or try a different model:\n - ollama pull ${FALLBACK_OLLAMA_MODEL}\n - ollama pull mistral:latest\n - ollama pull codellama:latest\n\n🔧 List Available Models:\nollama list`, this.providerName);
1488
1539
  }
1489
- if (error.message?.includes("404")) {
1490
- return new NetworkError(`❌ Ollama API Endpoint Not Found\n\nThe API endpoint might have changed or Ollama version is incompatible.\n\n🔧 Check:\n1. Ollama version: 'ollama --version'\n2. Update Ollama to latest version\n3. Verify API is available: 'curl ${this.baseUrl}/api/version'`, this.providerName);
1540
+ const errMsg = error.message ?? "";
1541
+ if (errMsg.includes("404") &&
1542
+ (errMsg.toLowerCase().includes("model") ||
1543
+ errMsg.toLowerCase().includes("not found"))) {
1544
+ return new InvalidModelError(`❌ Ollama Returned HTTP 404\n\nThis usually means the configured model '${this.modelName}' is not installed locally, although a bad base URL or incompatible API mode can also cause it.\n\n🔧 Check:\n1. Verify the model exists: 'ollama list'\n2. Pull it if missing: 'ollama pull ${this.modelName}'\n3. Verify the service is healthy: 'curl ${this.baseUrl}/api/version'\n4. If you use OpenAI-compatible mode, confirm the base URL serves /v1/chat/completions`, this.providerName);
1545
+ }
1546
+ if (errMsg.includes("404")) {
1547
+ return new ProviderError(`❌ Ollama Endpoint Returned HTTP 404\n\nThe configured base URL (${this.baseUrl}) did not serve the expected Ollama endpoint for model '${this.modelName}'. This is usually a configuration or API-mode mismatch rather than a missing model.\n\n🔧 Check:\n1. Verify the base URL: ${this.baseUrl}\n2. For native Ollama mode, confirm /api/generate exists\n3. For OpenAI-compatible mode, confirm /v1/chat/completions exists\n4. If the model is missing, the response body should explicitly say so`, this.providerName);
1491
1548
  }
1492
1549
  return new ProviderError(`❌ Ollama Provider Error\n\n${error.message || "Unknown error occurred"}\n\n🔧 Troubleshooting:\n1. Check if Ollama service is running\n2. Verify model is installed: 'ollama list'\n3. Check network connectivity to ${this.baseUrl}\n4. Review Ollama logs for details`, this.providerName);
1493
1550
  }
@@ -1,6 +1,6 @@
1
1
  import { createOpenAI } from "@ai-sdk/openai";
2
+ import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
2
3
  import { embed, embedMany, NoOutputGeneratedError, stepCountIs, streamText, } from "ai";
3
- import { trace, SpanKind, SpanStatusCode } from "@opentelemetry/api";
4
4
  import { AIProviderName } from "../constants/enums.js";
5
5
  import { BaseProvider } from "../core/baseProvider.js";
6
6
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
@@ -12,6 +12,7 @@ import { calculateCost } from "../utils/pricing.js";
12
12
  import { createOpenAIConfig, getProviderModel, validateApiKey, } from "../utils/providerConfig.js";
13
13
  import { isZodSchema } from "../utils/schemaConversion.js";
14
14
  import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
15
+ import { resolveToolChoice } from "../utils/toolChoice.js";
15
16
  import { getModelId } from "./providerTypeUtils.js";
16
17
  /**
17
18
  * Retrieve a tool's schema, handling both AI SDK v6 (`inputSchema`) and
@@ -314,7 +315,7 @@ export class OpenAIProvider extends BaseProvider {
314
315
  maxRetries: 0, // NL11: Disable AI SDK's invisible internal retries; we handle retries with OTel instrumentation
315
316
  tools,
316
317
  stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
317
- toolChoice: shouldUseTools && Object.keys(tools).length > 0 ? "auto" : "none",
318
+ toolChoice: resolveToolChoice(options, tools, shouldUseTools),
318
319
  abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
319
320
  experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
320
321
  onStepFinish: ({ toolCalls, toolResults }) => {
@@ -9,6 +9,7 @@ import { isAbortError } from "../utils/errorHandling.js";
9
9
  import { logger } from "../utils/logger.js";
10
10
  import { getProviderModel } from "../utils/providerConfig.js";
11
11
  import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
12
+ import { resolveToolChoice } from "../utils/toolChoice.js";
12
13
  // Constants
13
14
  const MODELS_DISCOVERY_TIMEOUT_MS = 5000; // 5 seconds for model discovery
14
15
  // Configuration helpers
@@ -234,7 +235,7 @@ export class OpenRouterProvider extends BaseProvider {
234
235
  ...(shouldUseTools &&
235
236
  Object.keys(tools).length > 0 && {
236
237
  tools,
237
- toolChoice: "auto",
238
+ toolChoice: resolveToolChoice(options, tools, shouldUseTools),
238
239
  maxSteps: options.maxSteps || DEFAULT_MAX_STEPS,
239
240
  }),
240
241
  abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
@@ -1,8 +1,8 @@
1
- import { type Schema, type LanguageModel } from "ai";
2
- import type { ZodUnknownSchema } from "../types/typeAliases.js";
3
- import { AIProviderName } from "../constants/enums.js";
4
- import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
1
+ import { type LanguageModel, type Schema } from "ai";
2
+ import type { AIProviderName } from "../constants/enums.js";
5
3
  import { BaseProvider } from "../core/baseProvider.js";
4
+ import type { StreamOptions, StreamResult } from "../types/streamTypes.js";
5
+ import type { ZodUnknownSchema } from "../types/typeAliases.js";
6
6
  /**
7
7
  * OpenAI Compatible Provider - BaseProvider Implementation
8
8
  * Provides access to one of the OpenAI-compatible endpoint (OpenRouter, vLLM, LiteLLM, etc.)
@@ -1,11 +1,11 @@
1
1
  import { createOpenAI } from "@ai-sdk/openai";
2
2
  import { NoOutputGeneratedError, streamText, } from "ai";
3
- import { AIProviderName } from "../constants/enums.js";
4
3
  import { BaseProvider } from "../core/baseProvider.js";
5
- import { logger } from "../utils/logger.js";
6
- import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
7
4
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
8
5
  import { createProxyFetch } from "../proxy/proxyFetch.js";
6
+ import { logger } from "../utils/logger.js";
7
+ import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
8
+ import { resolveToolChoice } from "../utils/toolChoice.js";
9
9
  import { toAnalyticsStreamResult } from "./providerTypeUtils.js";
10
10
  // Constants
11
11
  const FALLBACK_OPENAI_COMPATIBLE_MODEL = "gpt-3.5-turbo";
@@ -178,7 +178,7 @@ export class OpenAICompatibleProvider extends BaseProvider {
178
178
  ? { temperature: options.temperature }
179
179
  : {}),
180
180
  tools,
181
- toolChoice: shouldUseTools ? "auto" : "none",
181
+ toolChoice: resolveToolChoice(options, tools, shouldUseTools),
182
182
  abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
183
183
  experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
184
184
  onStepFinish: (event) => {
@@ -74,6 +74,7 @@ export declare function formatSSE(eventType: string, data: unknown): string;
74
74
  export declare class ClaudeStreamSerializer {
75
75
  private state;
76
76
  private currentBlockType;
77
+ private sawToolUseBlock;
77
78
  private blockIndex;
78
79
  private hasOpenedBlock;
79
80
  private outputTokens;
@@ -107,8 +108,8 @@ export declare class ClaudeStreamSerializer {
107
108
  */
108
109
  private openBlock;
109
110
  /**
110
- * Emit the opening frames: message_start, ping, content_block_start (text).
111
- * Automatically called on the first pushDelta if not called manually.
111
+ * Emit the opening frames: message_start and ping.
112
+ * The first actual content decides which content block opens next.
112
113
  */
113
114
  start(): Generator<string>;
114
115
  /**
@@ -8,8 +8,8 @@
8
8
  *
9
9
  * Reference: https://docs.anthropic.com/en/api/messages
10
10
  */
11
+ import { jsonSchema, tool } from "ai";
11
12
  import { randomBytes } from "crypto";
12
- import { jsonSchema } from "ai";
13
13
  // ---------------------------------------------------------------------------
14
14
  // Helpers
15
15
  // ---------------------------------------------------------------------------
@@ -122,9 +122,7 @@ export function parseClaudeRequest(body) {
122
122
  const resultContent = typeof block.content === "string"
123
123
  ? block.content
124
124
  : Array.isArray(block.content)
125
- ? block.content
126
- .map((b) => (b.type === "text" ? b.text : `[${b.type}]`))
127
- .join("\n")
125
+ ? block.content.map((b) => (b.type === "text" ? b.text : `[${b.type}]`)).join("\n")
128
126
  : "";
129
127
  textParts.push(`[tool_result:${block.tool_use_id}] ${resultContent}`);
130
128
  }
@@ -148,13 +146,13 @@ export function parseClaudeRequest(body) {
148
146
  const tools = {};
149
147
  if (body.tools) {
150
148
  for (const t of body.tools) {
151
- tools[t.name] = {
149
+ tools[t.name] = tool({
152
150
  description: t.description ?? "",
153
- // Wrap raw JSON schema with AI SDK's jsonSchema() so the SDK
154
- // recognizes it (it checks for Symbol.for("vercel.ai.schema")).
155
- // Without this, the SDK tries zodSchema() on raw JSON and crashes.
156
- parameters: jsonSchema(t.input_schema ?? { type: "object" }),
157
- };
151
+ // Fallback providers consume AI SDK-style tools, not Claude wire-format
152
+ // tool descriptors. Wrap the raw JSON schema once here so every
153
+ // downstream provider sees a canonical `inputSchema` shape.
154
+ inputSchema: jsonSchema(t.input_schema ?? { type: "object" }),
155
+ });
158
156
  }
159
157
  }
160
158
  // --- tool_choice ---
@@ -186,9 +184,7 @@ export function parseClaudeRequest(body) {
186
184
  enabled: isEnabled,
187
185
  budgetTokens: body.thinking.budget_tokens,
188
186
  // Pass the raw type so providers can map "adaptive" appropriately
189
- ...(body.thinking.type === "adaptive"
190
- ? { thinkingLevel: "medium" }
191
- : {}),
187
+ ...(body.thinking.type === "adaptive" ? { thinkingLevel: "medium" } : {}),
192
188
  };
193
189
  }
194
190
  return {
@@ -239,6 +235,9 @@ function mapStopReason(finishReason) {
239
235
  */
240
236
  export function serializeClaudeResponse(result, requestModel) {
241
237
  const content = [];
238
+ const inferredFinishReason = result.toolCalls && result.toolCalls.length > 0 && (!result.finishReason || result.finishReason === "stop")
239
+ ? "tool_use"
240
+ : result.finishReason;
242
241
  // Thinking/reasoning content block (if present)
243
242
  if (result.reasoning) {
244
243
  content.push({ type: "thinking", thinking: result.reasoning });
@@ -250,11 +249,15 @@ export function serializeClaudeResponse(result, requestModel) {
250
249
  // Tool use blocks — normalize IDs to Claude `toolu_` format
251
250
  if (result.toolCalls && result.toolCalls.length > 0) {
252
251
  for (const tc of result.toolCalls) {
252
+ const toolInput = tc.args ??
253
+ tc.parameters ??
254
+ tc.input ??
255
+ {};
253
256
  content.push({
254
257
  type: "tool_use",
255
258
  id: generateToolUseId(),
256
259
  name: tc.toolName,
257
- input: tc.args,
260
+ input: toolInput,
258
261
  });
259
262
  }
260
263
  }
@@ -268,7 +271,7 @@ export function serializeClaudeResponse(result, requestModel) {
268
271
  role: "assistant",
269
272
  content,
270
273
  model: result.model ?? requestModel,
271
- stop_reason: mapStopReason(result.finishReason),
274
+ stop_reason: mapStopReason(inferredFinishReason),
272
275
  stop_sequence: null,
273
276
  usage: {
274
277
  input_tokens: result.usage?.input ?? 0,
@@ -363,6 +366,7 @@ export function formatSSE(eventType, data) {
363
366
  export class ClaudeStreamSerializer {
364
367
  state = "idle";
365
368
  currentBlockType = null;
369
+ sawToolUseBlock = false;
366
370
  blockIndex = 0;
367
371
  hasOpenedBlock = false;
368
372
  outputTokens = 0;
@@ -465,15 +469,14 @@ export class ClaudeStreamSerializer {
465
469
  // Public API
466
470
  // -----------------------------------------------------------------------
467
471
  /**
468
- * Emit the opening frames: message_start, ping, content_block_start (text).
469
- * Automatically called on the first pushDelta if not called manually.
472
+ * Emit the opening frames: message_start and ping.
473
+ * The first actual content decides which content block opens next.
470
474
  */
471
475
  *start() {
472
476
  if (this.state !== "idle") {
473
477
  return;
474
478
  }
475
479
  yield* this.ensureMessageStarted();
476
- yield* this.openBlock({ type: "text", text: "" });
477
480
  }
478
481
  /**
479
482
  * Push a text delta. Returns zero or more SSE frames.
@@ -529,6 +532,7 @@ export class ClaudeStreamSerializer {
529
532
  if (this.state === "done" || this.state === "error") {
530
533
  return;
531
534
  }
535
+ this.sawToolUseBlock = true;
532
536
  yield* this.ensureMessageStarted();
533
537
  // Open a tool_use block (closes any current block)
534
538
  yield* this.openBlock({ type: "tool_use", id, name, input: "" });
@@ -562,19 +566,20 @@ export class ClaudeStreamSerializer {
562
566
  *finish(outputTokens, finishReason) {
563
567
  // If we never started (empty response), start first
564
568
  if (this.state === "idle") {
565
- yield* this.start();
569
+ yield* this.ensureMessageStarted();
566
570
  }
567
571
  if (this.state === "done" || this.state === "error") {
568
572
  return;
569
573
  }
570
574
  this.outputTokens = outputTokens ?? this.outputTokens;
575
+ const resolvedFinishReason = this.sawToolUseBlock && (!finishReason || finishReason === "stop") ? "tool_use" : finishReason;
571
576
  // Close any open content block
572
577
  yield* this.closeCurrentBlock();
573
578
  // message_delta
574
579
  const messageDelta = {
575
580
  type: "message_delta",
576
581
  delta: {
577
- stop_reason: mapStopReason(finishReason),
582
+ stop_reason: mapStopReason(resolvedFinishReason),
578
583
  stop_sequence: null,
579
584
  },
580
585
  usage: { output_tokens: this.outputTokens },
@@ -3,13 +3,9 @@
3
3
  * so that Anthropic sees consistent "user" fingerprints even when requests are
4
4
  * spread across multiple accounts.
5
5
  *
6
- * Session IDs follow the format:
7
- * user_[32 hex chars]_account_[UUIDv4]_session_[UUIDv4]
8
- *
9
- * IDs are cached with a 1-hour TTL and reused for subsequent requests from
10
- * the same account within that window.
6
+ * The generated metadata matches Claude Code's shape:
7
+ * {"device_id":"<64 hex>","account_uuid":"<uuid>","session_id":"<uuid>"}
11
8
  */
12
9
  import type { CloakingPlugin } from "../../../types/index.js";
13
- /** Purge all expired sessions from the cache. Exported for external timer use. */
14
10
  export declare function purgeExpiredSessions(): void;
15
11
  export declare function createSessionIdentity(): CloakingPlugin;
@@ -3,29 +3,12 @@
3
3
  * so that Anthropic sees consistent "user" fingerprints even when requests are
4
4
  * spread across multiple accounts.
5
5
  *
6
- * Session IDs follow the format:
7
- * user_[32 hex chars]_account_[UUIDv4]_session_[UUIDv4]
8
- *
9
- * IDs are cached with a 1-hour TTL and reused for subsequent requests from
10
- * the same account within that window.
6
+ * The generated metadata matches Claude Code's shape:
7
+ * {"device_id":"<64 hex>","account_uuid":"<uuid>","session_id":"<uuid>"}
11
8
  */
12
- import { randomBytes, randomUUID } from "crypto";
13
- // ── Session cache with TTL ───────────────────────────────────────────────────
14
- const TTL_MS = 3_600_000; // 1 hour
15
- const sessionCache = new Map();
16
- /** Generate a new session user ID in the required format. */
17
- function generateUserId() {
18
- const hex = randomBytes(32).toString("hex"); // 64 hex chars, take first 32
19
- return `user_${hex.slice(0, 32)}_account_${randomUUID()}_session_${randomUUID()}`;
20
- }
21
- /** Purge all expired sessions from the cache. Exported for external timer use. */
9
+ import { getOrCreateClaudeCodeIdentity, purgeExpiredClaudeCodeIdentities, } from "../../../auth/anthropicOAuth.js";
22
10
  export function purgeExpiredSessions() {
23
- const now = Date.now();
24
- for (const [key, entry] of sessionCache) {
25
- if (entry.expiresAt <= now) {
26
- sessionCache.delete(key);
27
- }
28
- }
11
+ purgeExpiredClaudeCodeIdentities();
29
12
  }
30
13
  export function createSessionIdentity() {
31
14
  return {
@@ -34,23 +17,16 @@ export function createSessionIdentity() {
34
17
  enabled: true,
35
18
  async transformRequest(ctx) {
36
19
  const accountId = ctx.account.id;
37
- const now = Date.now();
38
- // Check cache first — reuse if still valid
39
- let cached = sessionCache.get(accountId);
40
- if (!cached || cached.expiresAt <= now) {
41
- cached = {
42
- userId: generateUserId(),
43
- expiresAt: now + TTL_MS,
44
- };
45
- sessionCache.set(accountId, cached);
46
- }
20
+ const identity = getOrCreateClaudeCodeIdentity(accountId, {
21
+ existingUserId: ctx.request.body.metadata?.user_id,
22
+ });
47
23
  const body = { ...ctx.request.body };
48
24
  // Only set user_id if not already present — in passthrough mode,
49
- // oauthFetch.ts owns this field and sets it from its own session cache.
25
+ // oauthFetch.ts owns this field and sets it from the shared helper.
50
26
  if (!body.metadata?.user_id) {
51
27
  body.metadata = {
52
28
  ...body.metadata,
53
- user_id: cached.userId,
29
+ user_id: identity.metadataUserId,
54
30
  };
55
31
  }
56
32
  return {
@@ -15,6 +15,9 @@ export class ModelRouter {
15
15
  if (this.passthrough.has(requestedModel)) {
16
16
  return { provider: "anthropic", model: requestedModel };
17
17
  }
18
+ if (requestedModel.startsWith("gemini-")) {
19
+ return { provider: "vertex", model: requestedModel };
20
+ }
18
21
  if (requestedModel.startsWith("claude-")) {
19
22
  return { provider: "anthropic", model: requestedModel };
20
23
  }
@@ -20,7 +20,7 @@ export { CLAUDE_CLI_USER_AGENT, MCP_TOOL_PREFIX };
20
20
  * - Sets User-Agent to Claude CLI
21
21
  * - Adds ?beta=true query parameter to /v1/messages
22
22
  * - Injects billing header & agent block into system prompt
23
- * - Injects fake user ID into metadata
23
+ * - Injects Claude-Code-shaped user ID into metadata
24
24
  * - Adds Stainless SDK headers for fingerprint matching
25
25
  * - Disables thinking when tool_choice is forced
26
26
  *