npm - groq-rag - Versions diffs - 0.2.0 → 0.2.2 - Mend

groq-rag 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -513,6 +513,46 @@ const response = await client.chat.withUrl({
 });
 ```
+#### Vision Chat with Tools
+Analyze images with vision models and automatically use tools (web search, calculator, MCP) to provide enhanced responses.
+```typescript
+const response = await client.chat.withVision({
+  messages: [
+    {
+      role: 'user',
+      content: [
+        { type: 'text', text: 'What is this and find more info about it' },
+        { type: 'image_url', image_url: { url: 'data:image/jpeg;base64,...' } }
+      ]
+    }
+  ],
+  visionModel?: string,      // Default: 'meta-llama/llama-4-scout-17b-16e-instruct'
+  agentModel?: string,       // Default: 'llama-3.3-70b-versatile'
+  useTools?: boolean,        // Enable agent tools (default: true)
+  includeMCP?: boolean,      // Include MCP tools (default: false)
+  maxIterations?: number,    // Agent iterations (default: 5)
+});
+// Returns:
+// {
+//   content: string,          // Final response with tool-enhanced info
+//   imageAnalysis: string,    // Raw vision model description
+//   toolCalls: Array<{        // Tools that were used
+//     name: string,
+//     args: unknown,
+//     result: unknown,
+//   }>,
+// }
+```
+**How it works:**
+1. Vision model analyzes the image(s)
+2. Agent takes the analysis + user question
+3. Agent uses tools (web search, calculator, MCP) if needed
+4. Returns comprehensive answer with sources
 ---
 ### Agent System
@@ -1136,6 +1176,25 @@ npm run benchmark
 ## Changelog
+### v0.2.2
+- **New Feature: Vision + Tools** - Analyze images with automatic tool enhancement
+  - `client.chat.withVision()` - Vision analysis with agent tools (web search, calculator, MCP)
+  - Two-step processing: vision model analyzes images, then agent enhances with tools
+  - Supports all vision models (Llama 4 Scout, Llama 4 Maverick)
+  - Returns image analysis, final content, and tool calls used
+- **ToolResult Enhancement** - Added `args` property to track tool input parameters
+- **Demo Website Updates** - All Groq models, vision-only image upload button, MCP integration fixes
+### v0.2.1
+- Bug fixes and improvements
+### v0.2.0
+- MCP (Model Context Protocol) support improvements
+- Browser environment support with `dangerouslyAllowBrowser` option
 ### v0.1.6
 - **New Feature: MCP Integration** - Connect to Model Context Protocol servers

package/dist/index.cjs CHANGED Viewed

@@ -1778,6 +1778,7 @@ var ToolExecutor = class {
     if (!tool) {
       return {
         name,
+        args: params,
         result: null,
         error: `Tool "${name}" not found`,
         executionTime: Date.now() - startTime
@@ -1787,12 +1788,14 @@ var ToolExecutor = class {
       const result = await tool.execute(params);
       return {
         name,
+        args: params,
         result,
         executionTime: Date.now() - startTime
       };
     } catch (error) {
       return {
         name,
+        args: params,
         result: null,
         error: error instanceof Error ? error.message : String(error),
         executionTime: Date.now() - startTime
@@ -2557,6 +2560,70 @@ ${fetchResult.markdown || fetchResult.content}`;
       source: fetchResult
     };
   }
+  /**
+   * Chat with vision (images) + tools support
+   * Analyzes images with vision model, then uses agent with tools to provide enhanced response
+   */
+  async withVision(options) {
+    const {
+      messages,
+      visionModel = "meta-llama/llama-4-scout-17b-16e-instruct",
+      agentModel = "llama-3.3-70b-versatile",
+      useTools = true,
+      includeMCP = false,
+      maxIterations = 5
+    } = options;
+    const visionResponse = await this.parent.client.chat.completions.create({
+      model: visionModel,
+      messages: [
+        {
+          role: "system",
+          content: "Analyze the image(s) provided and describe what you see in detail. If the user asks a question, answer it based on the image. Be specific and thorough."
+        },
+        ...messages
+      ]
+    });
+    const imageAnalysis = visionResponse.choices[0]?.message?.content || "";
+    if (!useTools) {
+      return {
+        content: imageAnalysis,
+        imageAnalysis,
+        toolCalls: []
+      };
+    }
+    const userMessage = messages.find((m) => m.role === "user");
+    let userText = "";
+    if (userMessage && Array.isArray(userMessage.content)) {
+      const textPart = userMessage.content.find(
+        (c) => c.type === "text"
+      );
+      userText = textPart?.text || "";
+    } else if (userMessage && typeof userMessage.content === "string") {
+      userText = userMessage.content;
+    }
+    const agentTask = userText ? `Based on this image analysis: "${imageAnalysis}"
+User question: ${userText}
+Use available tools (web search, calculator, etc.) if needed to provide a complete answer.` : imageAnalysis;
+    const agent = await this.parent.createAgentWithBuiltins(
+      {
+        model: agentModel,
+        maxIterations
+      },
+      { includeMCP }
+    );
+    const result = await agent.run(agentTask);
+    return {
+      content: result.output,
+      imageAnalysis,
+      toolCalls: result.toolCalls.map((t) => ({
+        name: t.name,
+        args: t.args,
+        result: t.result
+      }))
+    };
+  }
 };
 var WebModule = class {
   constructor(parent) {

package/dist/index.d.cts CHANGED Viewed

@@ -251,6 +251,7 @@ interface ToolDefinition {
 }
 interface ToolResult {
     name: string;
+    args?: Record<string, unknown>;
     result: unknown;
     error?: string;
     executionTime?: number;
@@ -840,6 +841,26 @@ declare class ChatWithRAG {
         content: string;
         source: FetchResult;
     }>;
+    /**
+     * Chat with vision (images) + tools support
+     * Analyzes images with vision model, then uses agent with tools to provide enhanced response
+     */
+    withVision(options: {
+        messages: Groq.Chat.ChatCompletionMessageParam[];
+        visionModel?: string;
+        agentModel?: string;
+        useTools?: boolean;
+        includeMCP?: boolean;
+        maxIterations?: number;
+    }): Promise<{
+        content: string;
+        imageAnalysis: string;
+        toolCalls: Array<{
+            name: string;
+            args: unknown;
+            result: unknown;
+        }>;
+    }>;
 }
 /**
  * Web module for fetching and searching

package/dist/index.d.ts CHANGED Viewed

@@ -251,6 +251,7 @@ interface ToolDefinition {
 }
 interface ToolResult {
     name: string;
+    args?: Record<string, unknown>;
     result: unknown;
     error?: string;
     executionTime?: number;
@@ -840,6 +841,26 @@ declare class ChatWithRAG {
         content: string;
         source: FetchResult;
     }>;
+    /**
+     * Chat with vision (images) + tools support
+     * Analyzes images with vision model, then uses agent with tools to provide enhanced response
+     */
+    withVision(options: {
+        messages: Groq.Chat.ChatCompletionMessageParam[];
+        visionModel?: string;
+        agentModel?: string;
+        useTools?: boolean;
+        includeMCP?: boolean;
+        maxIterations?: number;
+    }): Promise<{
+        content: string;
+        imageAnalysis: string;
+        toolCalls: Array<{
+            name: string;
+            args: unknown;
+            result: unknown;
+        }>;
+    }>;
 }
 /**
  * Web module for fetching and searching

package/dist/index.js CHANGED Viewed

@@ -1696,6 +1696,7 @@ var ToolExecutor = class {
     if (!tool) {
       return {
         name,
+        args: params,
         result: null,
         error: `Tool "${name}" not found`,
         executionTime: Date.now() - startTime
@@ -1705,12 +1706,14 @@ var ToolExecutor = class {
       const result = await tool.execute(params);
       return {
         name,
+        args: params,
         result,
         executionTime: Date.now() - startTime
       };
     } catch (error) {
       return {
         name,
+        args: params,
         result: null,
         error: error instanceof Error ? error.message : String(error),
         executionTime: Date.now() - startTime
@@ -2475,6 +2478,70 @@ ${fetchResult.markdown || fetchResult.content}`;
       source: fetchResult
     };
   }
+  /**
+   * Chat with vision (images) + tools support
+   * Analyzes images with vision model, then uses agent with tools to provide enhanced response
+   */
+  async withVision(options) {
+    const {
+      messages,
+      visionModel = "meta-llama/llama-4-scout-17b-16e-instruct",
+      agentModel = "llama-3.3-70b-versatile",
+      useTools = true,
+      includeMCP = false,
+      maxIterations = 5
+    } = options;
+    const visionResponse = await this.parent.client.chat.completions.create({
+      model: visionModel,
+      messages: [
+        {
+          role: "system",
+          content: "Analyze the image(s) provided and describe what you see in detail. If the user asks a question, answer it based on the image. Be specific and thorough."
+        },
+        ...messages
+      ]
+    });
+    const imageAnalysis = visionResponse.choices[0]?.message?.content || "";
+    if (!useTools) {
+      return {
+        content: imageAnalysis,
+        imageAnalysis,
+        toolCalls: []
+      };
+    }
+    const userMessage = messages.find((m) => m.role === "user");
+    let userText = "";
+    if (userMessage && Array.isArray(userMessage.content)) {
+      const textPart = userMessage.content.find(
+        (c) => c.type === "text"
+      );
+      userText = textPart?.text || "";
+    } else if (userMessage && typeof userMessage.content === "string") {
+      userText = userMessage.content;
+    }
+    const agentTask = userText ? `Based on this image analysis: "${imageAnalysis}"
+User question: ${userText}
+Use available tools (web search, calculator, etc.) if needed to provide a complete answer.` : imageAnalysis;
+    const agent = await this.parent.createAgentWithBuiltins(
+      {
+        model: agentModel,
+        maxIterations
+      },
+      { includeMCP }
+    );
+    const result = await agent.run(agentTask);
+    return {
+      content: result.output,
+      imageAnalysis,
+      toolCalls: result.toolCalls.map((t) => ({
+        name: t.name,
+        args: t.args,
+        result: t.result
+      }))
+    };
+  }
 };
 var WebModule = class {
   constructor(parent) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "groq-rag",
-  "version": "0.2.0",
+  "version": "0.2.2",
   "description": "Extended Groq TypeScript SDK with RAG, web browsing, and agent capabilities - 100% groq-sdk API compatible",
   "type": "module",
   "main": "dist/index.cjs",