npm - @lobehub/chat - Versions diffs - 1.34.6 → 1.35.1 - Mend

@lobehub/chat 1.34.6 → 1.35.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/CHANGELOG.md +50 -0
package/README.md +8 -8
package/README.zh-CN.md +8 -8
package/changelog/v1.json +18 -0
package/docs/changelog/2024-07-19-gpt-4o-mini.mdx +32 -0
package/docs/changelog/2024-07-19-gpt-4o-mini.zh-CN.mdx +5 -4
package/docs/changelog/2024-08-02-lobe-chat-database-docker.mdx +36 -0
package/docs/changelog/2024-08-02-lobe-chat-database-docker.zh-CN.mdx +0 -1
package/docs/changelog/2024-08-21-file-upload-and-knowledge-base.mdx +30 -0
package/docs/changelog/2024-08-21-file-upload-and-knowledge-base.zh-CN.mdx +0 -1
package/docs/changelog/2024-09-13-openai-o1-models.mdx +31 -0
package/docs/changelog/2024-09-20-artifacts.mdx +55 -0
package/docs/changelog/2024-09-20-artifacts.zh-CN.mdx +3 -2
package/docs/changelog/2024-10-27-pin-assistant.mdx +33 -0
package/docs/changelog/2024-10-27-pin-assistant.zh-CN.mdx +0 -1
package/docs/changelog/2024-11-06-share-text-json.mdx +24 -0
package/docs/changelog/2024-11-06-share-text-json.zh-CN.mdx +3 -1
package/docs/changelog/2024-11-25-november-providers.mdx +5 -5
package/docs/changelog/2024-11-25-november-providers.zh-CN.mdx +5 -5
package/docs/changelog/2024-11-27-forkable-chat.mdx +26 -0
package/docs/changelog/2024-11-27-forkable-chat.zh-CN.mdx +16 -9
package/docs/changelog/index.json +1 -1
package/docs/self-hosting/environment-variables/analytics.mdx +31 -2
package/locales/ar/models.json +94 -7
package/locales/bg-BG/models.json +94 -7
package/locales/de-DE/models.json +94 -7
package/locales/en-US/models.json +94 -7
package/locales/es-ES/models.json +94 -7
package/locales/fa-IR/models.json +94 -7
package/locales/fr-FR/models.json +94 -7
package/locales/it-IT/models.json +94 -7
package/locales/ja-JP/models.json +94 -7
package/locales/ko-KR/models.json +94 -7
package/locales/nl-NL/models.json +94 -7
package/locales/pl-PL/models.json +94 -7
package/locales/pt-BR/models.json +94 -7
package/locales/ru-RU/models.json +94 -7
package/locales/tr-TR/models.json +94 -7
package/locales/vi-VN/models.json +94 -7
package/locales/zh-CN/models.json +121 -34
package/locales/zh-TW/models.json +94 -7
package/package.json +2 -2
package/src/config/modelProviders/ollama.ts +85 -35
package/src/libs/agent-runtime/ollama/index.ts +25 -9
package/src/libs/agent-runtime/utils/streams/ollama.test.ts +130 -46
package/src/libs/agent-runtime/utils/streams/ollama.ts +19 -4
package/src/server/modules/AgentRuntime/index.test.ts +2 -1
package/src/server/modules/AgentRuntime/index.ts +7 -1

package/src/config/modelProviders/ollama.ts CHANGED Viewed

@@ -1,6 +1,5 @@
 import { ModelProviderCard } from '@/types/llm';
-// ref: https://ollama.com/library
 const Ollama: ModelProviderCard = {
   chatModels: [
     {
@@ -8,6 +7,7 @@ const Ollama: ModelProviderCard = {
         'Llama 3.1 是 Meta 推出的领先模型，支持高达 405B 参数，可应用于复杂对话、多语言翻译和数据分析领域。',
       displayName: 'Llama 3.1 8B',
       enabled: true,
+      functionCall: true,
       id: 'llama3.1',
       tokens: 128_000,
     },
@@ -54,6 +54,84 @@ const Ollama: ModelProviderCard = {
       id: 'codellama:70b',
       tokens: 16_384,
     },
+    {
+      description: 'QwQ 是一个实验研究模型，专注于提高 AI 推理能力。',
+      displayName: 'QwQ 32B',
+      enabled: true,
+      functionCall: true,
+      id: 'qwq',
+      releasedAt: '2024-11-28',
+      tokens: 128_000,
+    },
+    {
+      description: 'Qwen2.5 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
+      displayName: 'Qwen2.5 0.5B',
+      id: 'qwen2.5:0.5b',
+      tokens: 128_000,
+    },
+    {
+      description: 'Qwen2.5 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
+      displayName: 'Qwen2.5 1.5B',
+      id: 'qwen2.5:1.5b',
+      tokens: 128_000,
+    },
+    {
+      description: 'Qwen2.5 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
+      displayName: 'Qwen2.5 7B',
+      enabled: true,
+      functionCall: true,
+      id: 'qwen2.5',
+      tokens: 128_000,
+    },
+    {
+      description: 'Qwen2.5 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
+      displayName: 'Qwen2.5 72B',
+      id: 'qwen2.5:72b',
+      tokens: 128_000,
+    },
+    {
+      description: 'Qwen2.5 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
+      displayName: 'Qwen2.5 7B',
+      enabled: true,
+      functionCall: true,
+      id: 'qwen2.5',
+      tokens: 128_000,
+    },
+    {
+      description: 'CodeQwen1.5 是基于大量代码数据训练的大型语言模型，专为解决复杂编程任务。',
+      displayName: 'CodeQwen1.5 7B',
+      functionCall: true,
+      id: 'codeqwen',
+      tokens: 65_536,
+    },
+    {
+      description: 'Qwen2 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
+      displayName: 'Qwen2 0.5B',
+      functionCall: true,
+      id: 'qwen2:0.5b',
+      tokens: 128_000,
+    },
+    {
+      description: 'Qwen2 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
+      displayName: 'Qwen2 1.5B',
+      functionCall: true,
+      id: 'qwen2:1.5b',
+      tokens: 128_000,
+    },
+    {
+      description: 'Qwen2 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
+      displayName: 'Qwen2 7B',
+      functionCall: true,
+      id: 'qwen2',
+      tokens: 128_000,
+    },
+    {
+      description: 'Qwen2 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
+      displayName: 'Qwen2 72B',
+      functionCall: true,
+      id: 'qwen2:72b',
+      tokens: 128_000,
+    },
     {
       description: 'Gemma 2 是 Google 推出的高效模型，涵盖从小型应用到复杂数据处理的多种应用场景。',
       displayName: 'Gemma 2 2B',
@@ -63,7 +141,6 @@ const Ollama: ModelProviderCard = {
     {
       description: 'Gemma 2 是 Google 推出的高效模型，涵盖从小型应用到复杂数据处理的多种应用场景。',
       displayName: 'Gemma 2 9B',
-      enabled: true,
       id: 'gemma2',
       tokens: 8192,
     },
@@ -82,7 +159,6 @@ const Ollama: ModelProviderCard = {
     {
       description: 'CodeGemma 专用于不同编程任务的轻量级语言模型，支持快速迭代和集成。',
       displayName: 'CodeGemma 7B',
-      enabled: true,
       id: 'codegemma',
       tokens: 8192,
     },
@@ -125,6 +201,7 @@ const Ollama: ModelProviderCard = {
       description: 'Mistral 是 Mistral AI 发布的 7B 模型，适合多变的语言处理需求。',
       displayName: 'Mistral 7B',
       enabled: true,
+      functionCall: true,
       id: 'mistral',
       tokens: 32_768,
     },
@@ -133,6 +210,7 @@ const Ollama: ModelProviderCard = {
         'Mixtral 是 Mistral AI 的专家模型，具有开源权重，并在代码生成和语言理解方面提供支持。',
       displayName: 'Mixtral 8x7B',
       enabled: true,
+      functionCall: true,
       id: 'mixtral',
       tokens: 32_768,
     },
@@ -140,6 +218,7 @@ const Ollama: ModelProviderCard = {
       description:
         'Mixtral 是 Mistral AI 的专家模型，具有开源权重，并在代码生成和语言理解方面提供支持。',
       displayName: 'Mixtral 8x22B',
+      functionCall: true,
       id: 'mixtral:8x22b',
       tokens: 65_536,
     },
@@ -155,6 +234,7 @@ const Ollama: ModelProviderCard = {
       description: 'Mistral Nemo 由 Mistral AI 和 NVIDIA 合作推出，是高效性能的 12B 模型。',
       displayName: 'Mixtral Nemo 12B',
       enabled: true,
+      functionCall: true,
       id: 'mistral-nemo',
       tokens: 128_000,
     },
@@ -182,6 +262,7 @@ const Ollama: ModelProviderCard = {
       description: 'Command R 是优化用于对话和长上下文任务的LLM，特别适合动态交互与知识管理。',
       displayName: 'Command R 35B',
       enabled: true,
+      functionCall: true,
       id: 'command-r',
       tokens: 131_072,
     },
@@ -189,6 +270,7 @@ const Ollama: ModelProviderCard = {
       description: 'Command R+ 是一款高性能的大型语言模型，专为真实企业场景和复杂应用而设计。',
       displayName: 'Command R+ 104B',
       enabled: true,
+      functionCall: true,
       id: 'command-r-plus',
       tokens: 131_072,
     },
@@ -220,38 +302,6 @@ const Ollama: ModelProviderCard = {
       id: 'deepseek-coder-v2:236b',
       tokens: 128_000,
     },
-    {
-      description: 'Qwen2 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
-      displayName: 'Qwen2 0.5B',
-      id: 'qwen2:0.5b',
-      tokens: 128_000,
-    },
-    {
-      description: 'Qwen2 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
-      displayName: 'Qwen2 1.5B',
-      id: 'qwen2:1.5b',
-      tokens: 128_000,
-    },
-    {
-      description: 'Qwen2 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
-      displayName: 'Qwen2 7B',
-      enabled: true,
-      id: 'qwen2',
-      tokens: 128_000,
-    },
-    {
-      description: 'Qwen2 是阿里巴巴的新一代大规模语言模型，以优异的性能支持多元化的应用需求。',
-      displayName: 'Qwen2 72B',
-      id: 'qwen2:72b',
-      tokens: 128_000,
-    },
-    {
-      description: 'CodeQwen1.5 是基于大量代码数据训练的大型语言模型，专为解决复杂编程任务。',
-      displayName: 'CodeQwen1.5 7B',
-      enabled: true,
-      id: 'codeqwen',
-      tokens: 65_536,
-    },
     {
       description: 'LLaVA 是结合视觉编码器和 Vicuna 的多模态模型，用于强大的视觉和语言理解。',
       displayName: 'LLaVA 7B',

package/src/libs/agent-runtime/ollama/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Ollama } from 'ollama/browser';
+import { Ollama, Tool } from 'ollama/browser';
 import { ClientOptions } from 'openai';
 import { OpenAIChatMessage } from '@/libs/agent-runtime';
@@ -8,8 +8,9 @@ import { LobeRuntimeAI } from '../BaseAI';
 import { AgentRuntimeErrorType } from '../error';
 import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../types';
 import { AgentRuntimeError } from '../utils/createError';
+import { debugStream } from '../utils/debugStream';
 import { StreamingResponse } from '../utils/response';
-import { OllamaStream } from '../utils/streams';
+import { OllamaStream, convertIterableToStream } from '../utils/streams';
 import { parseDataUri } from '../utils/uriParser';
 import { OllamaMessage } from './type';
@@ -45,23 +46,38 @@ export class LobeOllamaAI implements LobeRuntimeAI {
         options: {
           frequency_penalty: payload.frequency_penalty,
           presence_penalty: payload.presence_penalty,
-          temperature:
-            payload.temperature !== undefined
-            ? payload.temperature / 2
-            : undefined,
+          temperature: payload.temperature !== undefined ? payload.temperature / 2 : undefined,
           top_p: payload.top_p,
         },
         stream: true,
+        tools: payload.tools as Tool[],
       });
-      return StreamingResponse(OllamaStream(response, options?.callback), {
+      const stream = convertIterableToStream(response);
+      const [prod, debug] = stream.tee();
+      if (process.env.DEBUG_OLLAMA_CHAT_COMPLETION === '1') {
+        debugStream(debug).catch(console.error);
+      }
+      return StreamingResponse(OllamaStream(prod, options?.callback), {
         headers: options?.headers,
       });
     } catch (error) {
-      const e = error as { message: string; name: string; status_code: number };
+      const e = error as {
+        error: any;
+        message: string;
+        name: string;
+        status_code: number;
+      };
       throw AgentRuntimeError.chat({
-        error: { message: e.message, name: e.name, status_code: e.status_code },
+        error: {
+          ...e.error,
+          message: String(e.error?.message || e.message),
+          name: e.name,
+          status_code: e.status_code,
+        },
         errorType: AgentRuntimeErrorType.OllamaBizError,
         provider: ModelProvider.Ollama,
       });

package/src/libs/agent-runtime/utils/streams/ollama.test.ts CHANGED Viewed

@@ -6,61 +6,145 @@ import * as uuidModule from '@/utils/uuid';
 import { OllamaStream } from './ollama';
 describe('OllamaStream', () => {
-  it('should transform Ollama stream to protocol stream', async () => {
-    vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
+  describe('should transform Ollama stream to protocol stream', () => {
+    it('text', async () => {
+      vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
+      const mockOllamaStream = new ReadableStream<ChatResponse>({
+        start(controller) {
+          controller.enqueue({ message: { content: 'Hello' }, done: false } as ChatResponse);
+          controller.enqueue({ message: { content: ' world!' }, done: false } as ChatResponse);
+          controller.enqueue({ message: { content: '' }, done: true } as ChatResponse);
+          controller.close();
+        },
+      });
+      const onStartMock = vi.fn();
+      const onTextMock = vi.fn();
+      const onTokenMock = vi.fn();
+      const onCompletionMock = vi.fn();
+      const protocolStream = OllamaStream(mockOllamaStream, {
+        onStart: onStartMock,
+        onText: onTextMock,
+        onToken: onTokenMock,
+        onCompletion: onCompletionMock,
+      });
+      const decoder = new TextDecoder();
+      const chunks = [];
-    const mockOllamaStream: AsyncIterable<ChatResponse> = {
       // @ts-ignore
-      async *[Symbol.asyncIterator]() {
-        yield { message: { content: 'Hello' }, done: false };
-        yield { message: { content: ' world!' }, done: false };
-        yield { message: { content: '' }, done: true };
-      },
-    };
-    const onStartMock = vi.fn();
-    const onTextMock = vi.fn();
-    const onTokenMock = vi.fn();
-    const onCompletionMock = vi.fn();
-    const protocolStream = OllamaStream(mockOllamaStream, {
-      onStart: onStartMock,
-      onText: onTextMock,
-      onToken: onTokenMock,
-      onCompletion: onCompletionMock,
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+      expect(chunks).toEqual([
+        'id: chat_1\n',
+        'event: text\n',
+        `data: "Hello"\n\n`,
+        'id: chat_1\n',
+        'event: text\n',
+        `data: " world!"\n\n`,
+        'id: chat_1\n',
+        'event: stop\n',
+        `data: "finished"\n\n`,
+      ]);
+      expect(onStartMock).toHaveBeenCalledTimes(1);
+      expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
+      expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
+      expect(onTokenMock).toHaveBeenCalledTimes(2);
+      expect(onCompletionMock).toHaveBeenCalledTimes(1);
     });
-    const decoder = new TextDecoder();
-    const chunks = [];
+    it('tools use', async () => {
+      vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
-    // @ts-ignore
-    for await (const chunk of protocolStream) {
-      chunks.push(decoder.decode(chunk, { stream: true }));
-    }
+      const mockOllamaStream = new ReadableStream<ChatResponse>({
+        start(controller) {
+          controller.enqueue({
+            model: 'qwen2.5',
+            created_at: new Date('2024-12-01T03:34:55.166692Z'),
+            message: {
+              role: 'assistant',
+              content: '',
+              tool_calls: [
+                {
+                  function: {
+                    name: 'realtime-weather____fetchCurrentWeather',
+                    arguments: { city: '杭州' },
+                  },
+                },
+              ],
+            },
+            done: false,
+          } as unknown as ChatResponse);
+          controller.enqueue({
+            model: 'qwen2.5',
+            created_at: '2024-12-01T03:34:55.2133Z',
+            message: { role: 'assistant', content: '' },
+            done_reason: 'stop',
+            done: true,
+            total_duration: 1122415333,
+            load_duration: 26178333,
+            prompt_eval_count: 221,
+            prompt_eval_duration: 507000000,
+            eval_count: 26,
+            eval_duration: 583000000,
+          } as unknown as ChatResponse);
+          controller.close();
+        },
+      });
+      const onStartMock = vi.fn();
+      const onTextMock = vi.fn();
+      const onTokenMock = vi.fn();
+      const onToolCall = vi.fn();
+      const onCompletionMock = vi.fn();
-    expect(chunks).toEqual([
-      'id: chat_1\n',
-      'event: text\n',
-      `data: "Hello"\n\n`,
-      'id: chat_1\n',
-      'event: text\n',
-      `data: " world!"\n\n`,
-      'id: chat_1\n',
-      'event: stop\n',
-      `data: "finished"\n\n`,
-    ]);
-    expect(onStartMock).toHaveBeenCalledTimes(1);
-    expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
-    expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
-    expect(onTokenMock).toHaveBeenCalledTimes(2);
-    expect(onCompletionMock).toHaveBeenCalledTimes(1);
+      const protocolStream = OllamaStream(mockOllamaStream, {
+        onStart: onStartMock,
+        onText: onTextMock,
+        onToken: onTokenMock,
+        onCompletion: onCompletionMock,
+        onToolCall,
+      });
+      const decoder = new TextDecoder();
+      const chunks = [];
+      // @ts-ignore
+      for await (const chunk of protocolStream) {
+        chunks.push(decoder.decode(chunk, { stream: true }));
+      }
+      expect(chunks).toEqual(
+        [
+          'id: chat_1',
+          'event: tool_calls',
+          `data: [{"function":{"arguments":"{\\"city\\":\\"杭州\\"}","name":"realtime-weather____fetchCurrentWeather"},"id":"realtime-weather____fetchCurrentWeather_0","index":0,"type":"function"}]\n`,
+          'id: chat_1',
+          'event: stop',
+          `data: "finished"\n`,
+        ].map((i) => `${i}\n`),
+      );
+      expect(onTextMock).toHaveBeenCalledTimes(0);
+      expect(onStartMock).toHaveBeenCalledTimes(1);
+      expect(onToolCall).toHaveBeenCalledTimes(1);
+      expect(onTokenMock).toHaveBeenCalledTimes(0);
+      expect(onCompletionMock).toHaveBeenCalledTimes(1);
+    });
   });
   it('should handle empty stream', async () => {
-    const mockOllamaStream = {
-      async *[Symbol.asyncIterator]() {},
-    };
+    const mockOllamaStream = new ReadableStream<ChatResponse>({
+      start(controller) {
+        controller.close();
+      },
+    });
     const protocolStream = OllamaStream(mockOllamaStream);

package/src/libs/agent-runtime/utils/streams/ollama.ts CHANGED Viewed

@@ -6,27 +6,42 @@ import { nanoid } from '@/utils/uuid';
 import {
   StreamProtocolChunk,
   StreamStack,
-  convertIterableToStream,
   createCallbacksTransformer,
   createSSEProtocolTransformer,
+  generateToolCallId,
 } from './protocol';
 const transformOllamaStream = (chunk: ChatResponse, stack: StreamStack): StreamProtocolChunk => {
   // maybe need another structure to add support for multiple choices
-  if (chunk.done) {
+  if (chunk.done && !chunk.message.content) {
     return { data: 'finished', id: stack.id, type: 'stop' };
   }
+  if (chunk.message.tool_calls && chunk.message.tool_calls.length > 0) {
+    return {
+      data: chunk.message.tool_calls.map((value, index) => ({
+        function: {
+          arguments: JSON.stringify(value.function?.arguments) ?? '{}',
+          name: value.function?.name ?? null,
+        },
+        id: generateToolCallId(index, value.function?.name),
+        index: index,
+        type: 'function',
+      })),
+      id: stack.id,
+      type: 'tool_calls',
+    };
+  }
   return { data: chunk.message.content, id: stack.id, type: 'text' };
 };
 export const OllamaStream = (
-  res: AsyncIterable<ChatResponse>,
+  res: ReadableStream<ChatResponse>,
   cb?: ChatStreamCallbacks,
 ): ReadableStream<string> => {
   const streamStack: StreamStack = { id: 'chat_' + nanoid() };
-  return convertIterableToStream(res)
+  return res
     .pipeThrough(createSSEProtocolTransformer(transformOllamaStream, streamStack))
     .pipeThrough(createCallbacksTransformer(cb));
 };

package/src/server/modules/AgentRuntime/index.test.ts CHANGED Viewed

@@ -134,6 +134,7 @@ describe('initAgentRuntimeWithUserPayload method', () => {
       const runtime = await initAgentRuntimeWithUserPayload(ModelProvider.Ollama, jwtPayload);
       expect(runtime).toBeInstanceOf(AgentRuntime);
       expect(runtime['_runtime']).toBeInstanceOf(LobeOllamaAI);
+      expect(runtime['_runtime']['baseURL']).toEqual(jwtPayload.endpoint);
     });
     it('Perplexity AI provider: with apikey', async () => {
@@ -391,7 +392,7 @@ describe('initAgentRuntimeWithUserPayload method', () => {
       // endpoint 不存在，应返回 DEFAULT_BASE_URL
       expect(runtime['_runtime'].baseURL).toBe('https://dashscope.aliyuncs.com/compatible-mode/v1');
     });
     it('Unknown Provider', async () => {
       const jwtPayload = {};
       const runtime = await initAgentRuntimeWithUserPayload('unknown', jwtPayload);

package/src/server/modules/AgentRuntime/index.ts CHANGED Viewed

@@ -33,7 +33,7 @@ const getLlmOptionsFromPayload = (provider: string, payload: JWTPayload) => {
     default: {
       let upperProvider = provider.toUpperCase();
-      if (!( `${upperProvider}_API_KEY` in llmConfig)) {
+      if (!(`${upperProvider}_API_KEY` in llmConfig)) {
         upperProvider = ModelProvider.OpenAI.toUpperCase(); // Use OpenAI options as default
       }
@@ -43,6 +43,12 @@ const getLlmOptionsFromPayload = (provider: string, payload: JWTPayload) => {
       return baseURL ? { apiKey, baseURL } : { apiKey };
     }
+    case ModelProvider.Ollama: {
+      const baseURL = payload?.endpoint || process.env.OLLAMA_PROXY_URL;
+      return { baseURL };
+    }
     case ModelProvider.Azure: {
       const { AZURE_API_KEY, AZURE_API_VERSION, AZURE_ENDPOINT } = llmConfig;
       const apikey = apiKeyManager.pick(payload?.apiKey || AZURE_API_KEY);