@juspay/neurolink 9.67.1 → 9.67.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -366,12 +366,13 @@ const createVertexSettings = async (region) => {
366
366
  return baseSettings;
367
367
  };
368
368
  // Create Anthropic-specific Vertex settings for native @anthropic-ai/vertex-sdk
369
- const createVertexAnthropicSettings = async (region) => {
369
+ const createVertexAnthropicSettings = async (region, timeoutMs) => {
370
370
  const location = region || getVertexLocation();
371
371
  const project = getVertexProjectId();
372
372
  return {
373
373
  projectId: project,
374
374
  region: location,
375
+ ...(timeoutMs !== undefined && { timeout: timeoutMs }),
375
376
  };
376
377
  };
377
378
  // Helper function to determine if a model is an Anthropic model
@@ -2032,9 +2033,9 @@ export class GoogleVertexProvider extends BaseProvider {
2032
2033
  /**
2033
2034
  * Create native AnthropicVertex client for Claude models
2034
2035
  */
2035
- async createAnthropicVertexClient() {
2036
+ async createAnthropicVertexClient(timeoutMs) {
2036
2037
  const mod = await getAnthropicVertexModule();
2037
- const settings = await createVertexAnthropicSettings(this.location);
2038
+ const settings = await createVertexAnthropicSettings(this.location, timeoutMs);
2038
2039
  return new mod.AnthropicVertex(settings);
2039
2040
  }
2040
2041
  /**
@@ -2042,9 +2043,10 @@ export class GoogleVertexProvider extends BaseProvider {
2042
2043
  * This bypasses @ai-sdk/google-vertex completely and uses Anthropic's native SDK
2043
2044
  */
2044
2045
  async executeNativeAnthropicStream(options) {
2045
- const client = await this.createAnthropicVertexClient();
2046
2046
  const modelName = options.model || this.modelName || "claude-sonnet-4-5@20250929";
2047
2047
  const startTime = Date.now();
2048
+ const streamTimeoutMs = parseTimeout(options.timeout) ?? 300_000;
2049
+ const client = await this.createAnthropicVertexClient(streamTimeoutMs);
2048
2050
  logger.debug("[GoogleVertex] Using native @anthropic-ai/vertex-sdk for Claude stream", {
2049
2051
  model: modelName,
2050
2052
  project: this.projectId,
@@ -2336,7 +2338,6 @@ export class GoogleVertexProvider extends BaseProvider {
2336
2338
  // abort the stream after the configured timeout so a stalled
2337
2339
  // Vertex/Anthropic endpoint can't hang forever. options.timeout wins
2338
2340
  // if set; otherwise 5 min — generous for tool-heavy turns.
2339
- const streamTimeoutMs = parseTimeout(options.timeout) ?? 300_000;
2340
2341
  const streamTimeoutHandle = setTimeout(() => {
2341
2342
  logger.warn(`[GoogleVertex] Anthropic stream exceeded ${streamTimeoutMs}ms — aborting`);
2342
2343
  abortHandler();
@@ -2560,9 +2561,10 @@ export class GoogleVertexProvider extends BaseProvider {
2560
2561
  * Execute generate using native @anthropic-ai/vertex-sdk for Claude models on Vertex AI
2561
2562
  */
2562
2563
  async executeNativeAnthropicGenerate(options) {
2563
- const client = await this.createAnthropicVertexClient();
2564
2564
  const modelName = options.model || this.modelName || "claude-sonnet-4-5@20250929";
2565
2565
  const startTime = Date.now();
2566
+ const generateTimeoutMs = parseTimeout(options.timeout) ?? 300_000;
2567
+ const client = await this.createAnthropicVertexClient(generateTimeoutMs);
2566
2568
  logger.debug("[GoogleVertex] Using native @anthropic-ai/vertex-sdk for Claude generate", {
2567
2569
  model: modelName,
2568
2570
  project: this.projectId,
@@ -2826,7 +2828,6 @@ export class GoogleVertexProvider extends BaseProvider {
2826
2828
  // Bound the SDK wait so a stalled Vertex/Anthropic call can't hang
2827
2829
  // generate forever. options.timeout wins if set, otherwise default
2828
2830
  // to 5 min — generous for tool-heavy turns.
2829
- const generateTimeoutMs = parseTimeout(options.timeout) ?? 300_000;
2830
2831
  const response = await withTimeout(client.messages.create({
2831
2832
  ...requestParams,
2832
2833
  messages: currentMessages,
@@ -1,15 +1,19 @@
1
- import type { ZodType } from "zod";
2
1
  import type { AIProviderName } from "../constants/enums.js";
3
- import { BaseProvider } from "../core/baseProvider.js";
4
- import type { StreamOptions, StreamResult } from "../types/index.js";
5
- import type { LanguageModel, Schema } from "../types/index.js";
2
+ import type { OpenAICompatBuildBodyArgs, OpenAICompatStreamLifecycleListeners } from "../types/index.js";
3
+ import { OpenAIChatCompletionsProvider } from "./openaiChatCompletionsBase.js";
6
4
  /**
7
- * LiteLLM Provider - BaseProvider Implementation
8
- * Provides access to 100+ models via LiteLLM proxy server
5
+ * LiteLLM Provider direct HTTP, no AI SDK. Talks to a LiteLLM proxy
6
+ * server (or any deployment that speaks OpenAI chat-completions + the
7
+ * `/v1/models` and `/v1/embeddings` endpoints).
8
+ *
9
+ * All request/stream/tool-loop orchestration lives in
10
+ * `OpenAIChatCompletionsProvider`. This class adds LiteLLM-specific
11
+ * behaviour: OTel span wrap with cost (`onStreamStart`), Gemini 2.5
12
+ * maxTokens skip (`adjustBuildBodyOptions`), ModelAccessDeniedError on
13
+ * 403, 10-minute model cache (`getAvailableModels`), `LITELLM_FALLBACK_MODELS`
14
+ * env-driven fallback list, and native `/v1/embeddings`.
9
15
  */
10
- export declare class LiteLLMProvider extends BaseProvider {
11
- private model;
12
- private credentials?;
16
+ export declare class LiteLLMProvider extends OpenAIChatCompletionsProvider {
13
17
  private static modelsCache;
14
18
  private static modelsCacheTime;
15
19
  private static readonly MODELS_CACHE_DURATION;
@@ -19,39 +23,35 @@ export declare class LiteLLMProvider extends BaseProvider {
19
23
  });
20
24
  protected getProviderName(): AIProviderName;
21
25
  protected getDefaultModel(): string;
26
+ protected getFallbackModelName(): string;
27
+ protected getFallbackModels(): string[];
22
28
  /**
23
- * Returns the Vercel AI SDK model instance for LiteLLM
29
+ * Gemini 2.5 models on LiteLLM have a known compatibility issue with
30
+ * `max_tokens` — strip it before the wire body is built. Applies to
31
+ * both streaming and non-streaming paths.
24
32
  */
25
- protected getAISDKModel(): LanguageModel;
26
- formatProviderError(error: unknown): Error;
33
+ protected adjustBuildBodyOptions(modelId: string, opts: OpenAICompatBuildBodyArgs["options"]): OpenAICompatBuildBodyArgs["options"];
27
34
  /**
28
- * LiteLLM supports tools for compatible models
35
+ * Wrap the stream in an OTel span to capture provider-level latency,
36
+ * token usage, finish reason, and cost. Matches the pre-migration
37
+ * behaviour where streamText was wrapped in `neurolink.provider.streamText`.
29
38
  */
30
- supportsTools(): boolean;
39
+ protected onStreamStart(modelId: string): OpenAICompatStreamLifecycleListeners | undefined;
40
+ formatProviderError(error: unknown): Error;
31
41
  /**
32
- * Provider-specific streaming implementation
33
- * Note: This is only used when tools are disabled
42
+ * Get available models from LiteLLM proxy `/v1/models` endpoint.
43
+ * Caches results for 10 minutes; falls back to env-driven list or a
44
+ * minimal safe default if the API fetch fails.
34
45
  */
35
- protected executeStream(options: StreamOptions, analysisSchema?: ZodType | Schema<unknown>): Promise<StreamResult>;
36
- private createLiteLLMTransformedStream;
46
+ getAvailableModels(): Promise<string[]>;
47
+ private fetchModelsFromAPI;
37
48
  /**
38
- * Generate an embedding for a single text input
39
- * Uses the LiteLLM proxy with OpenAI-compatible embedding API
49
+ * Generate an embedding for a single text input via native /v1/embeddings.
40
50
  */
41
51
  embed(text: string, modelName?: string): Promise<number[]>;
42
52
  /**
43
- * Generate embeddings for multiple text inputs
44
- * Uses the LiteLLM proxy with OpenAI-compatible embedding API
53
+ * Generate embeddings for multiple text inputs via native /v1/embeddings.
45
54
  */
46
55
  embedMany(texts: string[], modelName?: string): Promise<number[][]>;
47
- /**
48
- * Get available models from LiteLLM proxy server
49
- * Dynamically fetches from /v1/models endpoint with caching and fallback
50
- */
51
- getAvailableModels(): Promise<string[]>;
52
- /**
53
- * Fetch available models from LiteLLM proxy /v1/models endpoint
54
- * @private
55
- */
56
- private fetchModelsFromAPI;
56
+ private callEmbeddings;
57
57
  }