npm - @node-llm/core - Versions diffs - 1.1.0 → 1.2.0 - Mend

@node-llm/core 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/dist/chat/ChatStream.d.ts.map +1 -1
package/dist/chat/ChatStream.js +85 -34
package/dist/config.d.ts +1 -1
package/dist/errors/index.d.ts +1 -1
package/dist/errors/index.js +2 -2
package/dist/models/models.js +15 -15
package/dist/providers/BaseProvider.d.ts +1 -1
package/dist/providers/BaseProvider.js +1 -1
package/dist/providers/Provider.d.ts +1 -0
package/dist/providers/Provider.d.ts.map +1 -1
package/dist/providers/anthropic/Chat.d.ts.map +1 -1
package/dist/providers/anthropic/Chat.js +5 -1
package/dist/providers/anthropic/Streaming.d.ts.map +1 -1
package/dist/providers/anthropic/Streaming.js +49 -2
package/dist/providers/deepseek/Chat.d.ts.map +1 -1
package/dist/providers/deepseek/Chat.js +5 -4
package/dist/providers/deepseek/Streaming.d.ts.map +1 -1
package/dist/providers/deepseek/Streaming.js +49 -3
package/dist/providers/gemini/Chat.d.ts.map +1 -1
package/dist/providers/gemini/Chat.js +3 -0
package/dist/providers/gemini/Embeddings.d.ts.map +1 -1
package/dist/providers/gemini/Embeddings.js +3 -0
package/dist/providers/gemini/Image.d.ts.map +1 -1
package/dist/providers/gemini/Image.js +3 -0
package/dist/providers/gemini/Streaming.d.ts.map +1 -1
package/dist/providers/gemini/Streaming.js +32 -1
package/dist/providers/gemini/Transcription.d.ts.map +1 -1
package/dist/providers/gemini/Transcription.js +3 -0
package/dist/providers/openai/Chat.d.ts.map +1 -1
package/dist/providers/openai/Chat.js +5 -4
package/dist/providers/openai/Embedding.d.ts.map +1 -1
package/dist/providers/openai/Embedding.js +5 -1
package/dist/providers/openai/Image.d.ts.map +1 -1
package/dist/providers/openai/Image.js +5 -1
package/dist/providers/openai/Moderation.d.ts.map +1 -1
package/dist/providers/openai/Moderation.js +12 -6
package/dist/providers/openai/Streaming.d.ts.map +1 -1
package/dist/providers/openai/Streaming.js +53 -4
package/dist/providers/openai/Transcription.d.ts.map +1 -1
package/dist/providers/openai/Transcription.js +9 -2
package/dist/providers/registry.js +1 -1
package/dist/utils/logger.d.ts +8 -0
package/dist/utils/logger.d.ts.map +1 -1
package/dist/utils/logger.js +22 -0
package/package.json +2 -2

package/dist/chat/ChatStream.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ChatStream.d.ts","sourceRoot":"","sources":["../../src/chat/ChatStream.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAE/D,OAAO,EAAE,MAAM,EAAE,MAAM,wBAAwB,CAAC;AAEhD;;;GAGG;AACH,qBAAa,UAAU;IAInB,OAAO,CAAC,QAAQ,CAAC,QAAQ;IACzB,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAL1B,OAAO,CAAC,QAAQ,CAAY;gBAGT,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,WAAgB,EAC1C,QAAQ,CAAC,EAAE,OAAO,EAAE;IAmBtB;;OAEG;IACH,IAAI,OAAO,IAAI,SAAS,OAAO,EAAE,CAEhC;IAED;;;OAGG;IACH,MAAM,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC;~~CAyE3C~~"}
1	+ {"version":3,"file":"ChatStream.d.ts","sourceRoot":"","sources":["../../src/chat/ChatStream.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAE/D,OAAO,EAAE,MAAM,EAAE,MAAM,wBAAwB,CAAC;AAEhD;;;GAGG;AACH,qBAAa,UAAU;IAInB,OAAO,CAAC,QAAQ,CAAC,QAAQ;IACzB,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAL1B,OAAO,CAAC,QAAQ,CAAY;gBAGT,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,WAAgB,EAC1C,QAAQ,CAAC,EAAE,OAAO,EAAE;IAmBtB;;OAEG;IACH,IAAI,OAAO,IAAI,SAAS,OAAO,EAAE,CAEhC;IAED;;;OAGG;IACH,MAAM,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC;CAkI3C"}

package/dist/chat/ChatStream.js CHANGED Viewed

@@ -45,47 +45,98 @@ export class ChatStream {
             if (!provider.stream) {
                 throw new Error("Streaming not supported by provider");
             }
-            let full = "";
+            let fullContent = "";
             let fullReasoning = "";
+            let toolCalls;
             let isFirst = true;
-            try {
-                for await (const chunk of provider.stream({
-                    model,
-                    messages,
-                    temperature: options.temperature,
-                    max_tokens: options.maxTokens,
-                    signal: abortController.signal,
-                })) {
-                    if (isFirst) {
-                        if (options.onNewMessage)
-                            options.onNewMessage();
-                        isFirst = false;
+            // Main streaming loop - may iterate multiple times for tool calls
+            while (true) {
+                fullContent = "";
+                fullReasoning = "";
+                toolCalls = undefined;
+                try {
+                    for await (const chunk of provider.stream({
+                        model,
+                        messages,
+                        tools: options.tools,
+                        temperature: options.temperature,
+                        max_tokens: options.maxTokens,
+                        signal: abortController.signal,
+                    })) {
+                        if (isFirst) {
+                            if (options.onNewMessage)
+                                options.onNewMessage();
+                            isFirst = false;
+                        }
+                        if (chunk.content) {
+                            fullContent += chunk.content;
+                            yield chunk;
+                        }
+                        if (chunk.reasoning) {
+                            fullReasoning += chunk.reasoning;
+                            yield { content: "", reasoning: chunk.reasoning };
+                        }
+                        // Accumulate tool calls from the final chunk
+                        if (chunk.tool_calls) {
+                            toolCalls = chunk.tool_calls;
+                        }
                     }
-                    if (chunk.content) {
-                        full += chunk.content;
+                    // Add assistant message to history
+                    messages.push({
+                        role: "assistant",
+                        content: fullContent || null,
+                        tool_calls: toolCalls,
+                        // @ts-ignore
+                        reasoning: fullReasoning || undefined
+                    });
+                    // If no tool calls, we're done
+                    if (!toolCalls || toolCalls.length === 0) {
+                        if (options.onEndMessage) {
+                            options.onEndMessage(new ChatResponseString(fullContent, { input_tokens: 0, output_tokens: 0, total_tokens: 0 }, model, fullReasoning || undefined));
+                        }
+                        break;
                     }
-                    if (chunk.reasoning) {
-                        fullReasoning += chunk.reasoning;
+                    // Execute tool calls
+                    for (const toolCall of toolCalls) {
+                        if (options.onToolCall)
+                            options.onToolCall(toolCall);
+                        const tool = options.tools?.find((t) => t.function.name === toolCall.function.name);
+                        if (tool?.handler) {
+                            try {
+                                const args = JSON.parse(toolCall.function.arguments);
+                                const result = await tool.handler(args);
+                                if (options.onToolResult)
+                                    options.onToolResult(result);
+                                messages.push({
+                                    role: "tool",
+                                    tool_call_id: toolCall.id,
+                                    content: result,
+                                });
+                            }
+                            catch (error) {
+                                messages.push({
+                                    role: "tool",
+                                    tool_call_id: toolCall.id,
+                                    content: `Error executing tool: ${error.message}`,
+                                });
+                            }
+                        }
+                        else {
+                            messages.push({
+                                role: "tool",
+                                tool_call_id: toolCall.id,
+                                content: "Error: Tool not found or no handler provided",
+                            });
+                        }
                     }
-                    yield chunk;
+                    // Continue loop to stream the next response after tool execution
                 }
-                // Finalize history
-                messages.push({
-                    role: "assistant",
-                    content: full,
-                    // @ts-ignore
-                    reasoning: fullReasoning || undefined
-                });
-                if (options.onEndMessage) {
-                    options.onEndMessage(new ChatResponseString(full, { input_tokens: 0, output_tokens: 0, total_tokens: 0 }, model, fullReasoning || undefined));
-                }
-            }
-            catch (error) {
-                if (error instanceof Error && error.name === 'AbortError') {
-                    // Stream was aborted, we might still want to save what we got?
-                    // For now just rethrow or handle as needed
+                catch (error) {
+                    if (error instanceof Error && error.name === 'AbortError') {
+                        // Stream was aborted
+                    }
+                    throw error;
                 }
-                throw error;
             }
         };
         return new Stream(() => sideEffectGenerator(this.provider, this.model, this.messages, this.options, controller), controller);

package/dist/config.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Global configuration for Node-NodeLLM providers.
+ * Global configuration for LLM providers.
  * Values are initialized from environment variables but can be overridden programmatically.
  */
 export interface NodeLLMConfig {

package/dist/errors/index.d.ts CHANGED Viewed

@@ -64,7 +64,7 @@ export declare class CapabilityError extends LLMError {
     constructor(message: string);
 }
 /**
- * Thrown when NodeLLM provider is not configured
+ * Thrown when LLM provider is not configured
  */
 export declare class ProviderNotConfiguredError extends LLMError {
     constructor();

package/dist/errors/index.js CHANGED Viewed

@@ -96,11 +96,11 @@ export class CapabilityError extends LLMError {
     }
 }
 /**
- * Thrown when NodeLLM provider is not configured
+ * Thrown when LLM provider is not configured
  */
 export class ProviderNotConfiguredError extends LLMError {
     constructor() {
-        super("NodeLLM provider not configured", "PROVIDER_NOT_CONFIGURED");
+        super("LLM provider not configured", "PROVIDER_NOT_CONFIGURED");
     }
 }
 /**

package/dist/models/models.js CHANGED Viewed

@@ -9419,7 +9419,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "Virtuoso‑Large is Arcee's top‑tier general‑purpose NodeLLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k context inherited from Qwen 2.5, letting it ingest books, codebases or financial filings wholesale. Training blended DeepSeek R1 distillation, multi‑epoch supervised fine‑tuning and a final DPO/RLHF alignment stage, yielding strong performance on BIG‑Bench‑Hard, GSM‑8K and long‑context Needle‑In‑Haystack tests. Enterprises use Virtuoso‑Large as the \"fallback\" brain in Conductor pipelines when other SLMs flag low confidence. Despite its size, aggressive KV‑cache optimizations keep first‑token latency in the low‑second range on 8× H100 nodes, making it a practical production‑grade powerhouse.",
+            "description": "Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k context inherited from Qwen 2.5, letting it ingest books, codebases or financial filings wholesale. Training blended DeepSeek R1 distillation, multi‑epoch supervised fine‑tuning and a final DPO/RLHF alignment stage, yielding strong performance on BIG‑Bench‑Hard, GSM‑8K and long‑context Needle‑In‑Haystack tests. Enterprises use Virtuoso‑Large as the \"fallback\" brain in Conductor pipelines when other SLMs flag low confidence. Despite its size, aggressive KV‑cache optimizations keep first‑token latency in the low‑second range on 8× H100 nodes, making it a practical production‑grade powerhouse.",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -9771,7 +9771,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose NodeLLM applications with high reasoning and throughput demands.",
+            "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose LLM applications with high reasoning and throughput demands.",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -11794,7 +11794,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "A large NodeLLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge",
+            "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -13893,7 +13893,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both NodeLLM inputs (prompt classification) and in NodeLLM responses (response classification). It acts as an NodeLLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n",
+            "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -14926,7 +14926,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "The Meta Llama 3.3 multilingual large language model (NodeLLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)",
+            "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -14989,7 +14989,7 @@ export const modelsData = [
         ],
         "pricing": {},
         "metadata": {
-            "description": "The Meta Llama 3.3 multilingual large language model (NodeLLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)",
+            "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -15197,7 +15197,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both NodeLLM inputs (prompt classification) and in NodeLLM responses (response classification). It acts as an NodeLLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.",
+            "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an NodeLLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.",
             "architecture": {
                 "modality": "text+image->text",
                 "input_modalities": [
@@ -15262,7 +15262,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal NodeLLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
+            "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -16583,7 +16583,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "Devstral-Small-2505 is a 24B parameter agentic NodeLLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.",
+            "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -19601,7 +19601,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (NodeLLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
+            "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -19856,7 +19856,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (NodeLLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
+            "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -19918,7 +19918,7 @@ export const modelsData = [
         ],
         "pricing": {},
         "metadata": {
-            "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (NodeLLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
+            "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -22688,7 +22688,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, NodeLLM filtering, and trust & safety labeling.\n\nLearn more about this model in OpenAI's gpt-oss-safeguard [user guide](https://cookbook.openai.com/articles/gpt-oss-safeguard-guide).",
+            "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust & safety labeling.\n\nLearn more about this model in OpenAI's gpt-oss-safeguard [user guide](https://cookbook.openai.com/articles/gpt-oss-safeguard-guide).",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [
@@ -24601,7 +24601,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "Qwen2.5 VL 7B is a multimodal NodeLLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
+            "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).",
             "architecture": {
                 "modality": "text+image->text",
                 "input_modalities": [
@@ -26850,7 +26850,7 @@ export const modelsData = [
             }
         },
         "metadata": {
-            "description": "Relace Apply 3 is a specialized code-patching NodeLLM that merges AI-suggested edits straight into your source files. It can apply updates from GPT-4o, Claude, and others into your files at 10,000 tokens/sec on average.\n\nThe model requires the prompt to be in the following format: \n<instruction>{instruction}</instruction>\n<code>{initial_code}</code>\n<update>{edit_snippet}</update>\n\nZero Data Retention is enabled for Relace. Learn more about this model in their [documentation](https://docs.relace.ai/api-reference/instant-apply/apply)",
+            "description": "Relace Apply 3 is a specialized code-patching LLM that merges AI-suggested edits straight into your source files. It can apply updates from GPT-4o, Claude, and others into your files at 10,000 tokens/sec on average.\n\nThe model requires the prompt to be in the following format: \n<instruction>{instruction}</instruction>\n<code>{initial_code}</code>\n<update>{edit_snippet}</update>\n\nZero Data Retention is enabled for Relace. Learn more about this model in their [documentation](https://docs.relace.ai/api-reference/instant-apply/apply)",
             "architecture": {
                 "modality": "text->text",
                 "input_modalities": [

package/dist/providers/BaseProvider.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { Provider, ChatRequest, ChatResponse, ChatChunk, ModelInfo, ImageRequest, ImageResponse, TranscriptionRequest, TranscriptionResponse, ModerationRequest, ModerationResponse, EmbeddingRequest, EmbeddingResponse } from "./Provider.js";
 /**
- * Abstract base class for all NodeLLM providers.
+ * Abstract base class for all LLM providers.
  * Provides common functionality and default implementations for unsupported features.
  * Each provider must implement the abstract methods and can override default implementations.
  */

package/dist/providers/BaseProvider.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Abstract base class for all NodeLLM providers.
+ * Abstract base class for all LLM providers.
  * Provides common functionality and default implementations for unsupported features.
  * Each provider must implement the abstract methods and can override default implementations.
  */

package/dist/providers/Provider.d.ts CHANGED Viewed

@@ -13,6 +13,7 @@ export interface ChatRequest {
 export interface ChatChunk {
     content: string;
     reasoning?: string;
+    tool_calls?: ToolCall[];
     done?: boolean;
 }
 export interface Usage {

package/dist/providers/Provider.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"Provider.d.ts","sourceRoot":"","sources":["../../src/providers/Provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAC7C,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAEjD,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,GAAG,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,OAAO,CAAC;CAChB;AAED,MAAM,WAAW,KAAK;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,UAAU,CAAC,EAAE,QAAQ,EAAE,CAAC;IACxB,KAAK,CAAC,EAAE,KAAK,CAAC;CACf;AAED,MAAM,WAAW,oBAAoB;IACnC,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACzC,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACxC,wBAAwB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACnD,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC7C,uBAAuB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAClD,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAChD,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC7C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC5C,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC;CAClD;AAED,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,UAAU,EAAE;QAAE,KAAK,EAAE,MAAM,EAAE,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAClD,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,OAAO,EAAE,GAAG,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,CAAC,CAAC,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,aAAa;IAC5B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,oBAAoB;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,oBAAoB,EAAE,CAAC;CACnC;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,OAAO,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AAED,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,gBAAgB,EAAE,CAAC;CAC7B;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IAClD,MAAM,CAAC,CAAC,OAAO,EAAE,WAAW,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC;IACxD,UAAU,CAAC,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;IACpC,KAAK,CAAC,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IACtD,UAAU,CAAC,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC3E,QAAQ,CAAC,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACnE,KAAK,CAAC,CAAC,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;IAC9D,YAAY,CAAC,EAAE,oBAAoB,CAAC;CACrC"}
1	+ {"version":3,"file":"Provider.d.ts","sourceRoot":"","sources":["../../src/providers/Provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAC7C,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAEjD,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,GAAG,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,QAAQ,EAAE,CAAC;IACxB,IAAI,CAAC,EAAE,OAAO,CAAC;CAChB;AAED,MAAM,WAAW,KAAK;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,UAAU,CAAC,EAAE,QAAQ,EAAE,CAAC;IACxB,KAAK,CAAC,EAAE,KAAK,CAAC;CACf;AAED,MAAM,WAAW,oBAAoB;IACnC,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACzC,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACxC,wBAAwB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IACnD,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC7C,uBAAuB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAClD,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAChD,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC7C,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAC5C,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC;CAClD;AAED,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,UAAU,EAAE;QAAE,KAAK,EAAE,MAAM,EAAE,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAClD,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,OAAO,EAAE,GAAG,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,CAAC,CAAC,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,aAAa;IAC5B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,oBAAoB;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,oBAAoB,EAAE,CAAC;CACnC;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,OAAO,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AAED,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,gBAAgB,EAAE,CAAC;CAC7B;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IAClD,MAAM,CAAC,CAAC,OAAO,EAAE,WAAW,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC;IACxD,UAAU,CAAC,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;IACpC,KAAK,CAAC,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IACtD,UAAU,CAAC,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAC3E,QAAQ,CAAC,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACnE,KAAK,CAAC,CAAC,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;IAC9D,YAAY,CAAC,EAAE,oBAAoB,CAAC;CACrC"}

package/dist/providers/anthropic/Chat.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"Chat.d.ts","sourceRoot":"","sources":["../../../src/providers/anthropic/Chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,YAAY,EAAS,MAAM,gBAAgB,CAAC;~~AAUlE~~,qBAAa,aAAa;IACZ,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEvE,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC;~~CAwG3D~~"}
1	+ {"version":3,"file":"Chat.d.ts","sourceRoot":"","sources":["../../../src/providers/anthropic/Chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,YAAY,EAAS,MAAM,gBAAgB,CAAC;AAWlE,qBAAa,aAAa;IACZ,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEvE,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC;CA4G3D"}

package/dist/providers/anthropic/Chat.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { Capabilities } from "./Capabilities.js";
 import { handleAnthropicError } from "./Errors.js";
 import { ModelRegistry } from "../../models/ModelRegistry.js";
+import { logger } from "../../utils/logger.js";
 import { formatSystemPrompt, formatMessages } from "./Utils.js";
 export class AnthropicChat {
     baseUrl;
@@ -54,7 +55,9 @@ export class AnthropicChat {
         if (hasPdf) {
             headers["anthropic-beta"] = "pdfs-2024-09-25";
         }
-        const response = await fetch(`${this.baseUrl}/messages`, {
+        const url = `${this.baseUrl}/messages`;
+        logger.logRequest("Anthropic", "POST", url, body);
+        const response = await fetch(url, {
             method: "POST",
             headers: headers,
             body: JSON.stringify(body),
@@ -63,6 +66,7 @@ export class AnthropicChat {
             await handleAnthropicError(response, model);
         }
         const json = (await response.json());
+        logger.logResponse("Anthropic", response.status, response.statusText, json);
         const contentBlocks = json.content;
         // Extract text content and tool calls
         let content = null;

package/dist/providers/anthropic/Streaming.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"Streaming.d.ts","sourceRoot":"","sources":["../../../src/providers/anthropic/Streaming.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;~~AAMxD~~,qBAAa,kBAAkB;IACjB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEtE,OAAO,CACZ,OAAO,EAAE,WAAW,EACpB,UAAU,CAAC,EAAE,eAAe,GAC3B,cAAc,CAAC,SAAS,CAAC;~~CAqJ7B~~"}
1	+ {"version":3,"file":"Streaming.d.ts","sourceRoot":"","sources":["../../../src/providers/anthropic/Streaming.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAOxD,qBAAa,kBAAkB;IACjB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEtE,OAAO,CACZ,OAAO,EAAE,WAAW,EACpB,UAAU,CAAC,EAAE,eAAe,GAC3B,cAAc,CAAC,SAAS,CAAC;CAqM7B"}

package/dist/providers/anthropic/Streaming.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { Capabilities } from "./Capabilities.js";
 import { handleAnthropicError } from "./Errors.js";
 import { formatSystemPrompt, formatMessages } from "./Utils.js";
+import { logger } from "../../utils/logger.js";
 export class AnthropicStreaming {
     baseUrl;
     apiKey;
@@ -52,8 +53,13 @@ export class AnthropicStreaming {
             headers["anthropic-beta"] = "pdfs-2024-09-25";
         }
         let done = false;
+        // Track tool calls being built across chunks
+        const toolCallsMap = new Map();
+        let currentBlockIndex = -1;
         try {
-            const response = await fetch(`${this.baseUrl}/messages`, {
+            const url = `${this.baseUrl}/messages`;
+            logger.logRequest("Anthropic", "POST", url, body);
+            const response = await fetch(url, {
                 method: "POST",
                 headers: headers,
                 body: JSON.stringify(body),
@@ -62,6 +68,7 @@ export class AnthropicStreaming {
             if (!response.ok) {
                 await handleAnthropicError(response, model);
             }
+            logger.debug("Anthropic streaming started", { status: response.status, statusText: response.statusText });
             if (!response.body) {
                 throw new Error("No response body for streaming");
             }
@@ -97,16 +104,56 @@ export class AnthropicStreaming {
                     try {
                         const data = JSON.parse(dataStr);
                         // Handle different event types from Anthropic
-                        if (eventType === "content_block_delta") {
+                        if (eventType === "content_block_start") {
+                            // Track the block index for tool use
+                            if (data.content_block?.type === "tool_use") {
+                                currentBlockIndex = data.index;
+                                toolCallsMap.set(currentBlockIndex, {
+                                    id: data.content_block.id,
+                                    type: "function",
+                                    function: {
+                                        name: data.content_block.name,
+                                        arguments: ""
+                                    }
+                                });
+                            }
+                        }
+                        else if (eventType === "content_block_delta") {
                             if (data.delta && data.delta.type === "text_delta") {
                                 yield { content: data.delta.text };
                             }
+                            else if (data.delta && data.delta.type === "input_json_delta") {
+                                // Accumulate tool arguments
+                                const index = data.index;
+                                if (toolCallsMap.has(index)) {
+                                    const toolCall = toolCallsMap.get(index);
+                                    toolCall.function.arguments += data.delta.partial_json;
+                                }
+                            }
+                        }
+                        else if (eventType === "content_block_stop") {
+                            // Block finished
                         }
                         else if (eventType === "message_start") {
                             // Could extract initial usage here
                         }
                         else if (eventType === "message_delta") {
                             // Update usage or stop reason
+                            if (data.delta?.stop_reason === "end_turn" && toolCallsMap.size > 0) {
+                                // Yield accumulated tool calls
+                                const toolCalls = Array.from(toolCallsMap.values()).map(tc => ({
+                                    id: tc.id,
+                                    type: "function",
+                                    function: {
+                                        name: tc.function.name,
+                                        arguments: tc.function.arguments
+                                    }
+                                }));
+                                yield { content: "", tool_calls: toolCalls, done: true };
+                            }
+                        }
+                        else if (eventType === "message_stop") {
+                            done = true;
                         }
                         else if (eventType === "error") {
                             throw new Error(`Stream error: ${data.error?.message}`);

package/dist/providers/deepseek/Chat.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"Chat.d.ts","sourceRoot":"","sources":["../../../src/providers/deepseek/Chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,YAAY,EAAS,MAAM,gBAAgB,CAAC;~~AAqBlE~~,qBAAa,YAAY;IACX,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEvE,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC;~~CAwF3D~~"}
1	+ {"version":3,"file":"Chat.d.ts","sourceRoot":"","sources":["../../../src/providers/deepseek/Chat.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,YAAY,EAAS,MAAM,gBAAgB,CAAC;AAsBlE,qBAAa,YAAY;IACX,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEvE,OAAO,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC;CAyF3D"}

package/dist/providers/deepseek/Chat.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { ModelRegistry } from "../../models/ModelRegistry.js";
+import { logger } from "../../utils/logger.js";
 export class DeepSeekChat {
     baseUrl;
     apiKey;
@@ -46,10 +47,9 @@ export class DeepSeekChat {
                 body.response_format = response_format;
             }
         }
-        if (process.env.NODELLM_DEBUG === "true") {
-            console.log(`[DeepSeek Request] ${JSON.stringify(body, null, 2)}`);
-        }
-        const response = await fetch(`${this.baseUrl}/chat/completions`, {
+        const url = `${this.baseUrl}/chat/completions`;
+        logger.logRequest("DeepSeek", "POST", url, body);
+        const response = await fetch(url, {
             method: "POST",
             headers: {
                 "Authorization": `Bearer ${this.apiKey}`,
@@ -63,6 +63,7 @@ export class DeepSeekChat {
             throw new Error(`DeepSeek API error: ${response.status} - ${errorText}`);
         }
         const json = (await response.json());
+        logger.logResponse("DeepSeek", response.status, response.statusText, json);
         const message = json.choices[0]?.message;
         const content = message?.content ?? null;
         const reasoning = message?.reasoning_content ?? null;

package/dist/providers/deepseek/Streaming.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"Streaming.d.ts","sourceRoot":"","sources":["../../../src/providers/deepseek/Streaming.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;~~AAGxD~~,qBAAa,iBAAiB;IAChB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEtE,OAAO,CACZ,OAAO,EAAE,WAAW,EACpB,UAAU,CAAC,EAAE,eAAe,GAC3B,cAAc,CAAC,SAAS,CAAC;~~CA8G7B~~"}
1	+ {"version":3,"file":"Streaming.d.ts","sourceRoot":"","sources":["../../../src/providers/deepseek/Streaming.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAIxD,qBAAa,iBAAiB;IAChB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAAU,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAxC,OAAO,EAAE,MAAM,EAAmB,MAAM,EAAE,MAAM;IAEtE,OAAO,CACZ,OAAO,EAAE,WAAW,EACpB,UAAU,CAAC,EAAE,eAAe,GAC3B,cAAc,CAAC,SAAS,CAAC;CA+J7B"}