@node-llm/core 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/chat/ChatStream.d.ts.map +1 -1
  2. package/dist/chat/ChatStream.js +85 -34
  3. package/dist/config.d.ts +1 -1
  4. package/dist/errors/index.d.ts +1 -1
  5. package/dist/errors/index.js +2 -2
  6. package/dist/models/models.js +15 -15
  7. package/dist/providers/BaseProvider.d.ts +1 -1
  8. package/dist/providers/BaseProvider.js +1 -1
  9. package/dist/providers/Provider.d.ts +1 -0
  10. package/dist/providers/Provider.d.ts.map +1 -1
  11. package/dist/providers/anthropic/Chat.d.ts.map +1 -1
  12. package/dist/providers/anthropic/Chat.js +5 -1
  13. package/dist/providers/anthropic/Streaming.d.ts.map +1 -1
  14. package/dist/providers/anthropic/Streaming.js +49 -2
  15. package/dist/providers/deepseek/Chat.d.ts.map +1 -1
  16. package/dist/providers/deepseek/Chat.js +5 -4
  17. package/dist/providers/deepseek/Streaming.d.ts.map +1 -1
  18. package/dist/providers/deepseek/Streaming.js +49 -3
  19. package/dist/providers/gemini/Chat.d.ts.map +1 -1
  20. package/dist/providers/gemini/Chat.js +3 -0
  21. package/dist/providers/gemini/Embeddings.d.ts.map +1 -1
  22. package/dist/providers/gemini/Embeddings.js +3 -0
  23. package/dist/providers/gemini/Image.d.ts.map +1 -1
  24. package/dist/providers/gemini/Image.js +3 -0
  25. package/dist/providers/gemini/Streaming.d.ts.map +1 -1
  26. package/dist/providers/gemini/Streaming.js +32 -1
  27. package/dist/providers/gemini/Transcription.d.ts.map +1 -1
  28. package/dist/providers/gemini/Transcription.js +3 -0
  29. package/dist/providers/openai/Chat.d.ts.map +1 -1
  30. package/dist/providers/openai/Chat.js +5 -4
  31. package/dist/providers/openai/Embedding.d.ts.map +1 -1
  32. package/dist/providers/openai/Embedding.js +5 -1
  33. package/dist/providers/openai/Image.d.ts.map +1 -1
  34. package/dist/providers/openai/Image.js +5 -1
  35. package/dist/providers/openai/Moderation.d.ts.map +1 -1
  36. package/dist/providers/openai/Moderation.js +12 -6
  37. package/dist/providers/openai/Streaming.d.ts.map +1 -1
  38. package/dist/providers/openai/Streaming.js +53 -4
  39. package/dist/providers/openai/Transcription.d.ts.map +1 -1
  40. package/dist/providers/openai/Transcription.js +9 -2
  41. package/dist/providers/registry.js +1 -1
  42. package/dist/utils/logger.d.ts +8 -0
  43. package/dist/utils/logger.d.ts.map +1 -1
  44. package/dist/utils/logger.js +22 -0
  45. package/package.json +2 -2
@@ -1 +1 @@
1
- {"version":3,"file":"ChatStream.d.ts","sourceRoot":"","sources":["../../src/chat/ChatStream.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAE/D,OAAO,EAAE,MAAM,EAAE,MAAM,wBAAwB,CAAC;AAEhD;;;GAGG;AACH,qBAAa,UAAU;IAInB,OAAO,CAAC,QAAQ,CAAC,QAAQ;IACzB,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAL1B,OAAO,CAAC,QAAQ,CAAY;gBAGT,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,WAAgB,EAC1C,QAAQ,CAAC,EAAE,OAAO,EAAE;IAmBtB;;OAEG;IACH,IAAI,OAAO,IAAI,SAAS,OAAO,EAAE,CAEhC;IAED;;;OAGG;IACH,MAAM,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC;CAyE3C"}
1
+ {"version":3,"file":"ChatStream.d.ts","sourceRoot":"","sources":["../../src/chat/ChatStream.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAE/D,OAAO,EAAE,MAAM,EAAE,MAAM,wBAAwB,CAAC;AAEhD;;;GAGG;AACH,qBAAa,UAAU;IAInB,OAAO,CAAC,QAAQ,CAAC,QAAQ;IACzB,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAL1B,OAAO,CAAC,QAAQ,CAAY;gBAGT,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,WAAgB,EAC1C,QAAQ,CAAC,EAAE,OAAO,EAAE;IAmBtB;;OAEG;IACH,IAAI,OAAO,IAAI,SAAS,OAAO,EAAE,CAEhC;IAED;;;OAGG;IACH,MAAM,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC;CAkI3C"}
@@ -45,47 +45,98 @@ export class ChatStream {
45
45
  if (!provider.stream) {
46
46
  throw new Error("Streaming not supported by provider");
47
47
  }
48
- let full = "";
48
+ let fullContent = "";
49
49
  let fullReasoning = "";
50
+ let toolCalls;
50
51
  let isFirst = true;
51
- try {
52
- for await (const chunk of provider.stream({
53
- model,
54
- messages,
55
- temperature: options.temperature,
56
- max_tokens: options.maxTokens,
57
- signal: abortController.signal,
58
- })) {
59
- if (isFirst) {
60
- if (options.onNewMessage)
61
- options.onNewMessage();
62
- isFirst = false;
52
+ // Main streaming loop - may iterate multiple times for tool calls
53
+ while (true) {
54
+ fullContent = "";
55
+ fullReasoning = "";
56
+ toolCalls = undefined;
57
+ try {
58
+ for await (const chunk of provider.stream({
59
+ model,
60
+ messages,
61
+ tools: options.tools,
62
+ temperature: options.temperature,
63
+ max_tokens: options.maxTokens,
64
+ signal: abortController.signal,
65
+ })) {
66
+ if (isFirst) {
67
+ if (options.onNewMessage)
68
+ options.onNewMessage();
69
+ isFirst = false;
70
+ }
71
+ if (chunk.content) {
72
+ fullContent += chunk.content;
73
+ yield chunk;
74
+ }
75
+ if (chunk.reasoning) {
76
+ fullReasoning += chunk.reasoning;
77
+ yield { content: "", reasoning: chunk.reasoning };
78
+ }
79
+ // Accumulate tool calls from the final chunk
80
+ if (chunk.tool_calls) {
81
+ toolCalls = chunk.tool_calls;
82
+ }
63
83
  }
64
- if (chunk.content) {
65
- full += chunk.content;
84
+ // Add assistant message to history
85
+ messages.push({
86
+ role: "assistant",
87
+ content: fullContent || null,
88
+ tool_calls: toolCalls,
89
+ // @ts-ignore
90
+ reasoning: fullReasoning || undefined
91
+ });
92
+ // If no tool calls, we're done
93
+ if (!toolCalls || toolCalls.length === 0) {
94
+ if (options.onEndMessage) {
95
+ options.onEndMessage(new ChatResponseString(fullContent, { input_tokens: 0, output_tokens: 0, total_tokens: 0 }, model, fullReasoning || undefined));
96
+ }
97
+ break;
66
98
  }
67
- if (chunk.reasoning) {
68
- fullReasoning += chunk.reasoning;
99
+ // Execute tool calls
100
+ for (const toolCall of toolCalls) {
101
+ if (options.onToolCall)
102
+ options.onToolCall(toolCall);
103
+ const tool = options.tools?.find((t) => t.function.name === toolCall.function.name);
104
+ if (tool?.handler) {
105
+ try {
106
+ const args = JSON.parse(toolCall.function.arguments);
107
+ const result = await tool.handler(args);
108
+ if (options.onToolResult)
109
+ options.onToolResult(result);
110
+ messages.push({
111
+ role: "tool",
112
+ tool_call_id: toolCall.id,
113
+ content: result,
114
+ });
115
+ }
116
+ catch (error) {
117
+ messages.push({
118
+ role: "tool",
119
+ tool_call_id: toolCall.id,
120
+ content: `Error executing tool: ${error.message}`,
121
+ });
122
+ }
123
+ }
124
+ else {
125
+ messages.push({
126
+ role: "tool",
127
+ tool_call_id: toolCall.id,
128
+ content: "Error: Tool not found or no handler provided",
129
+ });
130
+ }
69
131
  }
70
- yield chunk;
132
+ // Continue loop to stream the next response after tool execution
71
133
  }
72
- // Finalize history
73
- messages.push({
74
- role: "assistant",
75
- content: full,
76
- // @ts-ignore
77
- reasoning: fullReasoning || undefined
78
- });
79
- if (options.onEndMessage) {
80
- options.onEndMessage(new ChatResponseString(full, { input_tokens: 0, output_tokens: 0, total_tokens: 0 }, model, fullReasoning || undefined));
81
- }
82
- }
83
- catch (error) {
84
- if (error instanceof Error && error.name === 'AbortError') {
85
- // Stream was aborted, we might still want to save what we got?
86
- // For now just rethrow or handle as needed
134
+ catch (error) {
135
+ if (error instanceof Error && error.name === 'AbortError') {
136
+ // Stream was aborted
137
+ }
138
+ throw error;
87
139
  }
88
- throw error;
89
140
  }
90
141
  };
91
142
  return new Stream(() => sideEffectGenerator(this.provider, this.model, this.messages, this.options, controller), controller);
package/dist/config.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Global configuration for Node-NodeLLM providers.
2
+ * Global configuration for LLM providers.
3
3
  * Values are initialized from environment variables but can be overridden programmatically.
4
4
  */
5
5
  export interface NodeLLMConfig {
@@ -64,7 +64,7 @@ export declare class CapabilityError extends LLMError {
64
64
  constructor(message: string);
65
65
  }
66
66
  /**
67
- * Thrown when NodeLLM provider is not configured
67
+ * Thrown when LLM provider is not configured
68
68
  */
69
69
  export declare class ProviderNotConfiguredError extends LLMError {
70
70
  constructor();
@@ -96,11 +96,11 @@ export class CapabilityError extends LLMError {
96
96
  }
97
97
  }
98
98
  /**
99
- * Thrown when NodeLLM provider is not configured
99
+ * Thrown when LLM provider is not configured
100
100
  */
101
101
  export class ProviderNotConfiguredError extends LLMError {
102
102
  constructor() {
103
- super("NodeLLM provider not configured", "PROVIDER_NOT_CONFIGURED");
103
+ super("LLM provider not configured", "PROVIDER_NOT_CONFIGURED");
104
104
  }
105
105
  }
106
106
  /**
@@ -9419,7 +9419,7 @@ export const modelsData = [
9419
9419
  }
9420
9420
  },
9421
9421
  "metadata": {
9422
- "description": "Virtuoso‑Large is Arcee's top‑tier general‑purpose NodeLLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k context inherited from Qwen 2.5, letting it ingest books, codebases or financial filings wholesale. Training blended DeepSeek R1 distillation, multi‑epoch supervised fine‑tuning and a final DPO/RLHF alignment stage, yielding strong performance on BIG‑Bench‑Hard, GSM‑8K and long‑context Needle‑In‑Haystack tests. Enterprises use Virtuoso‑Large as the \"fallback\" brain in Conductor pipelines when other SLMs flag low confidence. Despite its size, aggressive KV‑cache optimizations keep first‑token latency in the low‑second range on 8× H100 nodes, making it a practical production‑grade powerhouse.",
9422
+ "description": "Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k context inherited from Qwen 2.5, letting it ingest books, codebases or financial filings wholesale. Training blended DeepSeek R1 distillation, multi‑epoch supervised fine‑tuning and a final DPO/RLHF alignment stage, yielding strong performance on BIG‑Bench‑Hard, GSM‑8K and long‑context Needle‑In‑Haystack tests. Enterprises use Virtuoso‑Large as the \"fallback\" brain in Conductor pipelines when other SLMs flag low confidence. Despite its size, aggressive KV‑cache optimizations keep first‑token latency in the low‑second range on 8× H100 nodes, making it a practical production‑grade powerhouse.",
9423
9423
  "architecture": {
9424
9424
  "modality": "text->text",
9425
9425
  "input_modalities": [
@@ -9771,7 +9771,7 @@ export const modelsData = [
9771
9771
  }
9772
9772
  },
9773
9773
  "metadata": {
9774
- "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose NodeLLM applications with high reasoning and throughput demands.",
9774
+ "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose LLM applications with high reasoning and throughput demands.",
9775
9775
  "architecture": {
9776
9776
  "modality": "text->text",
9777
9777
  "input_modalities": [
@@ -11794,7 +11794,7 @@ export const modelsData = [
11794
11794
  }
11795
11795
  },
11796
11796
  "metadata": {
11797
- "description": "A large NodeLLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge",
11797
+ "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge",
11798
11798
  "architecture": {
11799
11799
  "modality": "text->text",
11800
11800
  "input_modalities": [
@@ -13893,7 +13893,7 @@ export const modelsData = [
13893
13893
  }
13894
13894
  },
13895
13895
  "metadata": {
13896
- "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both NodeLLM inputs (prompt classification) and in NodeLLM responses (response classification). It acts as an NodeLLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n",
13896
+ "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n",
13897
13897
  "architecture": {
13898
13898
  "modality": "text->text",
13899
13899
  "input_modalities": [
@@ -14926,7 +14926,7 @@ export const modelsData = [
14926
14926
  }
14927
14927
  },
14928
14928
  "metadata": {
14929
- "description": "The Meta Llama 3.3 multilingual large language model (NodeLLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)",
14929
+ "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)",
14930
14930
  "architecture": {
14931
14931
  "modality": "text->text",
14932
14932
  "input_modalities": [
@@ -14989,7 +14989,7 @@ export const modelsData = [
14989
14989
  ],
14990
14990
  "pricing": {},
14991
14991
  "metadata": {
14992
- "description": "The Meta Llama 3.3 multilingual large language model (NodeLLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)",
14992
+ "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)",
14993
14993
  "architecture": {
14994
14994
  "modality": "text->text",
14995
14995
  "input_modalities": [
@@ -15197,7 +15197,7 @@ export const modelsData = [
15197
15197
  }
15198
15198
  },
15199
15199
  "metadata": {
15200
- "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both NodeLLM inputs (prompt classification) and in NodeLLM responses (response classification). It acts as an NodeLLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.",
15200
+ "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an NodeLLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.",
15201
15201
  "architecture": {
15202
15202
  "modality": "text+image->text",
15203
15203
  "input_modalities": [
@@ -15262,7 +15262,7 @@ export const modelsData = [
15262
15262
  }
15263
15263
  },
15264
15264
  "metadata": {
15265
- "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal NodeLLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
15265
+ "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
15266
15266
  "architecture": {
15267
15267
  "modality": "text->text",
15268
15268
  "input_modalities": [
@@ -16583,7 +16583,7 @@ export const modelsData = [
16583
16583
  }
16584
16584
  },
16585
16585
  "metadata": {
16586
- "description": "Devstral-Small-2505 is a 24B parameter agentic NodeLLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.",
16586
+ "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.",
16587
16587
  "architecture": {
16588
16588
  "modality": "text->text",
16589
16589
  "input_modalities": [
@@ -19601,7 +19601,7 @@ export const modelsData = [
19601
19601
  }
19602
19602
  },
19603
19603
  "metadata": {
19604
- "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (NodeLLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
19604
+ "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
19605
19605
  "architecture": {
19606
19606
  "modality": "text->text",
19607
19607
  "input_modalities": [
@@ -19856,7 +19856,7 @@ export const modelsData = [
19856
19856
  }
19857
19857
  },
19858
19858
  "metadata": {
19859
- "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (NodeLLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
19859
+ "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
19860
19860
  "architecture": {
19861
19861
  "modality": "text->text",
19862
19862
  "input_modalities": [
@@ -19918,7 +19918,7 @@ export const modelsData = [
19918
19918
  ],
19919
19919
  "pricing": {},
19920
19920
  "metadata": {
19921
- "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (NodeLLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
19921
+ "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
19922
19922
  "architecture": {
19923
19923
  "modality": "text->text",
19924
19924
  "input_modalities": [
@@ -22688,7 +22688,7 @@ export const modelsData = [
22688
22688
  }
22689
22689
  },
22690
22690
  "metadata": {
22691
- "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, NodeLLM filtering, and trust & safety labeling.\n\nLearn more about this model in OpenAI's gpt-oss-safeguard [user guide](https://cookbook.openai.com/articles/gpt-oss-safeguard-guide).",
22691
+ "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust & safety labeling.\n\nLearn more about this model in OpenAI's gpt-oss-safeguard [user guide](https://cookbook.openai.com/articles/gpt-oss-safeguard-guide).",
22692
22692
  "architecture": {
22693
22693
  "modality": "text->text",
22694
22694
  "input_modalities": [
@@ -24601,7 +24601,7 @@ export const modelsData = [
24601
24601
  }
24602
24602
  },
24603
24603
  "metadata": {
24604
- "description": "Qwen2.5 VL 7B is a multimodal NodeLLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
24604
+ "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
24605
24605
  "architecture": {
24606
24606
  "modality": "text+image->text",
24607
24607
  "input_modalities": [
@@ -26850,7 +26850,7 @@ export const modelsData = [
26850
26850
  }
26851
26851
  },
26852
26852
  "metadata": {
26853
- "description": "Relace Apply 3 is a specialized code-patching NodeLLM that merges AI-suggested edits straight into your source files. It can apply updates from GPT-4o, Claude, and others into your files at 10,000 tokens/sec on average.\n\nThe model requires the prompt to be in the following format: \n<instruction>{instruction}</instruction>\n<code>{initial_code}</code>\n<update>{edit_snippet}</update>\n\nZero Data Retention is enabled for Relace. Learn more about this model in their [documentation](https://docs.relace.ai/api-reference/instant-apply/apply)",
26853
+ "description": "Relace Apply 3 is a specialized code-patching LLM that merges AI-suggested edits straight into your source files. It can apply updates from GPT-4o, Claude, and others into your files at 10,000 tokens/sec on average.\n\nThe model requires the prompt to be in the following format: \n<instruction>{instruction}</instruction>\n<code>{initial_code}</code>\n<update>{edit_snippet}</update>\n\nZero Data Retention is enabled for Relace. Learn more about this model in their [documentation](https://docs.relace.ai/api-reference/instant-apply/apply)",
26854
26854
  "architecture": {
26855
26855
  "modality": "text->text",
26856
26856
  "input_modalities": [
@@ -1,6 +1,6 @@
1
1
  import { Provider, ChatRequest, ChatResponse, ChatChunk, ModelInfo, ImageRequest, ImageResponse, TranscriptionRequest, TranscriptionResponse, ModerationRequest, ModerationResponse, EmbeddingRequest, EmbeddingResponse } from "./Provider.js";
2
2
  /**
3
- * Abstract base class for all NodeLLM providers.
3
+ * Abstract base class for all LLM providers.
4
4
  * Provides common functionality and default implementations for unsupported features.
5
5
  * Each provider must implement the abstract methods and can override default implementations.
6
6
  */
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Abstract base class for all NodeLLM providers.
2
+ * Abstract base class for all LLM providers.
3
3
  * Provides common functionality and default implementations for unsupported features.
4
4
  * Each provider must implement the abstract methods and can override default implementations.
5
5
  */
@@ -13,6 +13,7 @@ export interface ChatRequest {
13
13
  export interface ChatChunk {
14
14
  content: string;
15
15
  reasoning?: string;
16
+ tool_calls?: ToolCall[];
16
17
  done?: boolean;
17
18
  }
18
19
  export interface Usage {
@@ -1 +1 @@
1
- {"version":3,"file":"Provider.d.ts","sourceRoot":"","sources":["../../src/providers/Provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAC7C,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAEjD,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,GAAG,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,OAAO,CAAC;CAChB;AAED,MAAM,WAAW,KAAK;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,UAAU,CAAC,EAAE,QAAQ,EAAE,CAAC;IACxB,KAAK,CAAC,EAAE,KAAK,CAAC;CACf;AAED,MAAM,WAAW,oBAAoB;IACnC,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACzC,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACxC,wBAAwB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACnD,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC7C,uBAAuB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAClD,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAChD,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC7C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC5C,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC;CAClD;AAED,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,UAAU,EAAE;QAAE,KAAK,EAAE,MAAM,EAAE,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAClD,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,OAAO,EAAE,GAAG,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,CAAC,CAAC,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,aAAa;IAC5B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,oBAAoB;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,oBAAoB,EAAE,CAAC;CACnC;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,OAAO,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AAED,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,gBAAgB,EAAE,CAAC;CAC7B;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IAClD,MAAM,CAAC,CAAC,OAAO,EAAE,WAAW,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC;IACxD,UAAU,CAAC,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;IACpC,KAAK,CAAC,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IACtD,UAAU,CAAC,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC3E,QAAQ,CAAC,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACnE,KAAK,CAAC,CAAC,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;IAC9D,YAAY,CAAC,EAAE,oBAAoB,CAAC;CACrC"}
1
+ {"version":3,"file":"Provider.d.ts","sourceRoot":"","sources":["../../src/providers/Provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAC7C,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAEjD,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,GAAG,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,QAAQ,EAAE,CAAC;IACxB,IAAI,CAAC,EAAE,OAAO,CAAC;CAChB;AAED,MAAM,WAAW,KAAK;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,UAAU,CAAC,EAAE,QAAQ,EAAE,CAAC;IACxB,KAAK,CAAC,EAAE,KAAK,CAAC;CACf;AAED,MAAM,WAAW,oBAAoB;IACnC,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACzC,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACxC,wBAAwB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACnD,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC7C,uBAAuB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAClD,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAChD,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC7C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC5C,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC;CAClD;AAED,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,UAAU,EAAE;QAAE,KAAK,EAAE,MAAM,EAAE,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAClD,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,OAAO,EAAE,GAAG,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,CAAC,CAAC,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,aAAa;IAC5B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,oBAAoB;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,oBAAoB,EAAE,CAAC;CACnC;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,OAAO,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AAED,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,gBAAgB,EAAE,CAAC;CAC7B;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IAClD,MAAM,CAAC,CAAC,OAAO,EAAE,WAAW,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC;IACxD,UAAU,CAAC,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;IACpC,KAAK,CAAC,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IACtD,UAAU,CAAC,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC3E,QAAQ,CAAC,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACnE,KAAK,CAAC,CAAC,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;IAC9D,YAAY,CAAC,EAAE,oBAAoB,CAAC;CACrC"}
@@ -1 +1 @@
1
- {"version":3,"file":"Chat.d.ts","sourceRoot":"","sources":["../../../src/providers/anthropic/Chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,YAAY,EAAS,MAAM,gBAAgB,CAAC;AAUlE,qBAAa,aAAa;IACZ,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEvE,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC;CAwG3D"}
1
+ {"version":3,"file":"Chat.d.ts","sourceRoot":"","sources":["../../../src/providers/anthropic/Chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,YAAY,EAAS,MAAM,gBAAgB,CAAC;AAWlE,qBAAa,aAAa;IACZ,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEvE,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC;CA4G3D"}
@@ -1,6 +1,7 @@
1
1
  import { Capabilities } from "./Capabilities.js";
2
2
  import { handleAnthropicError } from "./Errors.js";
3
3
  import { ModelRegistry } from "../../models/ModelRegistry.js";
4
+ import { logger } from "../../utils/logger.js";
4
5
  import { formatSystemPrompt, formatMessages } from "./Utils.js";
5
6
  export class AnthropicChat {
6
7
  baseUrl;
@@ -54,7 +55,9 @@ export class AnthropicChat {
54
55
  if (hasPdf) {
55
56
  headers["anthropic-beta"] = "pdfs-2024-09-25";
56
57
  }
57
- const response = await fetch(`${this.baseUrl}/messages`, {
58
+ const url = `${this.baseUrl}/messages`;
59
+ logger.logRequest("Anthropic", "POST", url, body);
60
+ const response = await fetch(url, {
58
61
  method: "POST",
59
62
  headers: headers,
60
63
  body: JSON.stringify(body),
@@ -63,6 +66,7 @@ export class AnthropicChat {
63
66
  await handleAnthropicError(response, model);
64
67
  }
65
68
  const json = (await response.json());
69
+ logger.logResponse("Anthropic", response.status, response.statusText, json);
66
70
  const contentBlocks = json.content;
67
71
  // Extract text content and tool calls
68
72
  let content = null;
@@ -1 +1 @@
1
- {"version":3,"file":"Streaming.d.ts","sourceRoot":"","sources":["../../../src/providers/anthropic/Streaming.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAMxD,qBAAa,kBAAkB;IACjB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEtE,OAAO,CACZ,OAAO,EAAE,WAAW,EACpB,UAAU,CAAC,EAAE,eAAe,GAC3B,cAAc,CAAC,SAAS,CAAC;CAqJ7B"}
1
+ {"version":3,"file":"Streaming.d.ts","sourceRoot":"","sources":["../../../src/providers/anthropic/Streaming.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAOxD,qBAAa,kBAAkB;IACjB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEtE,OAAO,CACZ,OAAO,EAAE,WAAW,EACpB,UAAU,CAAC,EAAE,eAAe,GAC3B,cAAc,CAAC,SAAS,CAAC;CAqM7B"}
@@ -1,6 +1,7 @@
1
1
  import { Capabilities } from "./Capabilities.js";
2
2
  import { handleAnthropicError } from "./Errors.js";
3
3
  import { formatSystemPrompt, formatMessages } from "./Utils.js";
4
+ import { logger } from "../../utils/logger.js";
4
5
  export class AnthropicStreaming {
5
6
  baseUrl;
6
7
  apiKey;
@@ -52,8 +53,13 @@ export class AnthropicStreaming {
52
53
  headers["anthropic-beta"] = "pdfs-2024-09-25";
53
54
  }
54
55
  let done = false;
56
+ // Track tool calls being built across chunks
57
+ const toolCallsMap = new Map();
58
+ let currentBlockIndex = -1;
55
59
  try {
56
- const response = await fetch(`${this.baseUrl}/messages`, {
60
+ const url = `${this.baseUrl}/messages`;
61
+ logger.logRequest("Anthropic", "POST", url, body);
62
+ const response = await fetch(url, {
57
63
  method: "POST",
58
64
  headers: headers,
59
65
  body: JSON.stringify(body),
@@ -62,6 +68,7 @@ export class AnthropicStreaming {
62
68
  if (!response.ok) {
63
69
  await handleAnthropicError(response, model);
64
70
  }
71
+ logger.debug("Anthropic streaming started", { status: response.status, statusText: response.statusText });
65
72
  if (!response.body) {
66
73
  throw new Error("No response body for streaming");
67
74
  }
@@ -97,16 +104,56 @@ export class AnthropicStreaming {
97
104
  try {
98
105
  const data = JSON.parse(dataStr);
99
106
  // Handle different event types from Anthropic
100
- if (eventType === "content_block_delta") {
107
+ if (eventType === "content_block_start") {
108
+ // Track the block index for tool use
109
+ if (data.content_block?.type === "tool_use") {
110
+ currentBlockIndex = data.index;
111
+ toolCallsMap.set(currentBlockIndex, {
112
+ id: data.content_block.id,
113
+ type: "function",
114
+ function: {
115
+ name: data.content_block.name,
116
+ arguments: ""
117
+ }
118
+ });
119
+ }
120
+ }
121
+ else if (eventType === "content_block_delta") {
101
122
  if (data.delta && data.delta.type === "text_delta") {
102
123
  yield { content: data.delta.text };
103
124
  }
125
+ else if (data.delta && data.delta.type === "input_json_delta") {
126
+ // Accumulate tool arguments
127
+ const index = data.index;
128
+ if (toolCallsMap.has(index)) {
129
+ const toolCall = toolCallsMap.get(index);
130
+ toolCall.function.arguments += data.delta.partial_json;
131
+ }
132
+ }
133
+ }
134
+ else if (eventType === "content_block_stop") {
135
+ // Block finished
104
136
  }
105
137
  else if (eventType === "message_start") {
106
138
  // Could extract initial usage here
107
139
  }
108
140
  else if (eventType === "message_delta") {
109
141
  // Update usage or stop reason
142
+ if (data.delta?.stop_reason === "end_turn" && toolCallsMap.size > 0) {
143
+ // Yield accumulated tool calls
144
+ const toolCalls = Array.from(toolCallsMap.values()).map(tc => ({
145
+ id: tc.id,
146
+ type: "function",
147
+ function: {
148
+ name: tc.function.name,
149
+ arguments: tc.function.arguments
150
+ }
151
+ }));
152
+ yield { content: "", tool_calls: toolCalls, done: true };
153
+ }
154
+ }
155
+ else if (eventType === "message_stop") {
156
+ done = true;
110
157
  }
111
158
  else if (eventType === "error") {
112
159
  throw new Error(`Stream error: ${data.error?.message}`);
@@ -1 +1 @@
1
- {"version":3,"file":"Chat.d.ts","sourceRoot":"","sources":["../../../src/providers/deepseek/Chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,YAAY,EAAS,MAAM,gBAAgB,CAAC;AAqBlE,qBAAa,YAAY;IACX,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEvE,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC;CAwF3D"}
1
+ {"version":3,"file":"Chat.d.ts","sourceRoot":"","sources":["../../../src/providers/deepseek/Chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,YAAY,EAAS,MAAM,gBAAgB,CAAC;AAsBlE,qBAAa,YAAY;IACX,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEvE,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC;CAyF3D"}
@@ -1,4 +1,5 @@
1
1
  import { ModelRegistry } from "../../models/ModelRegistry.js";
2
+ import { logger } from "../../utils/logger.js";
2
3
  export class DeepSeekChat {
3
4
  baseUrl;
4
5
  apiKey;
@@ -46,10 +47,9 @@ export class DeepSeekChat {
46
47
  body.response_format = response_format;
47
48
  }
48
49
  }
49
- if (process.env.NODELLM_DEBUG === "true") {
50
- console.log(`[DeepSeek Request] ${JSON.stringify(body, null, 2)}`);
51
- }
52
- const response = await fetch(`${this.baseUrl}/chat/completions`, {
50
+ const url = `${this.baseUrl}/chat/completions`;
51
+ logger.logRequest("DeepSeek", "POST", url, body);
52
+ const response = await fetch(url, {
53
53
  method: "POST",
54
54
  headers: {
55
55
  "Authorization": `Bearer ${this.apiKey}`,
@@ -63,6 +63,7 @@ export class DeepSeekChat {
63
63
  throw new Error(`DeepSeek API error: ${response.status} - ${errorText}`);
64
64
  }
65
65
  const json = (await response.json());
66
+ logger.logResponse("DeepSeek", response.status, response.statusText, json);
66
67
  const message = json.choices[0]?.message;
67
68
  const content = message?.content ?? null;
68
69
  const reasoning = message?.reasoning_content ?? null;
@@ -1 +1 @@
1
- {"version":3,"file":"Streaming.d.ts","sourceRoot":"","sources":["../../../src/providers/deepseek/Streaming.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAGxD,qBAAa,iBAAiB;IAChB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEtE,OAAO,CACZ,OAAO,EAAE,WAAW,EACpB,UAAU,CAAC,EAAE,eAAe,GAC3B,cAAc,CAAC,SAAS,CAAC;CA8G7B"}
1
+ {"version":3,"file":"Streaming.d.ts","sourceRoot":"","sources":["../../../src/providers/deepseek/Streaming.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAIxD,qBAAa,iBAAiB;IAChB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEtE,OAAO,CACZ,OAAO,EAAE,WAAW,EACpB,UAAU,CAAC,EAAE,eAAe,GAC3B,cAAc,CAAC,SAAS,CAAC;CA+J7B"}