groq-rag 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -513,6 +513,46 @@ const response = await client.chat.withUrl({
513
513
  });
514
514
  ```
515
515
 
516
+ #### Vision Chat with Tools
517
+
518
+ Analyze images with vision models and automatically use tools (web search, calculator, MCP) to provide enhanced responses.
519
+
520
+ ```typescript
521
+ const response = await client.chat.withVision({
522
+ messages: [
523
+ {
524
+ role: 'user',
525
+ content: [
526
+ { type: 'text', text: 'What is this and find more info about it' },
527
+ { type: 'image_url', image_url: { url: 'data:image/jpeg;base64,...' } }
528
+ ]
529
+ }
530
+ ],
531
+ visionModel?: string, // Default: 'meta-llama/llama-4-scout-17b-16e-instruct'
532
+ agentModel?: string, // Default: 'llama-3.3-70b-versatile'
533
+ useTools?: boolean, // Enable agent tools (default: true)
534
+ includeMCP?: boolean, // Include MCP tools (default: false)
535
+ maxIterations?: number, // Agent iterations (default: 5)
536
+ });
537
+
538
+ // Returns:
539
+ // {
540
+ // content: string, // Final response with tool-enhanced info
541
+ // imageAnalysis: string, // Raw vision model description
542
+ // toolCalls: Array<{ // Tools that were used
543
+ // name: string,
544
+ // args: unknown,
545
+ // result: unknown,
546
+ // }>,
547
+ // }
548
+ ```
549
+
550
+ **How it works:**
551
+ 1. Vision model analyzes the image(s)
552
+ 2. Agent takes the analysis + user question
553
+ 3. Agent uses tools (web search, calculator, MCP) if needed
554
+ 4. Returns comprehensive answer with sources
555
+
516
556
  ---
517
557
 
518
558
  ### Agent System
@@ -1136,6 +1176,25 @@ npm run benchmark
1136
1176
 
1137
1177
  ## Changelog
1138
1178
 
1179
+ ### v0.2.2
1180
+
1181
+ - **New Feature: Vision + Tools** - Analyze images with automatic tool enhancement
1182
+ - `client.chat.withVision()` - Vision analysis with agent tools (web search, calculator, MCP)
1183
+ - Two-step processing: vision model analyzes images, then agent enhances with tools
1184
+ - Supports all vision models (Llama 4 Scout, Llama 4 Maverick)
1185
+ - Returns image analysis, final content, and tool calls used
1186
+ - **ToolResult Enhancement** - Added `args` property to track tool input parameters
1187
+ - **Demo Website Updates** - All Groq models, vision-only image upload button, MCP integration fixes
1188
+
1189
+ ### v0.2.1
1190
+
1191
+ - Bug fixes and improvements
1192
+
1193
+ ### v0.2.0
1194
+
1195
+ - MCP (Model Context Protocol) support improvements
1196
+ - Browser environment support with `dangerouslyAllowBrowser` option
1197
+
1139
1198
  ### v0.1.6
1140
1199
 
1141
1200
  - **New Feature: MCP Integration** - Connect to Model Context Protocol servers
package/dist/index.cjs CHANGED
@@ -1778,6 +1778,7 @@ var ToolExecutor = class {
1778
1778
  if (!tool) {
1779
1779
  return {
1780
1780
  name,
1781
+ args: params,
1781
1782
  result: null,
1782
1783
  error: `Tool "${name}" not found`,
1783
1784
  executionTime: Date.now() - startTime
@@ -1787,12 +1788,14 @@ var ToolExecutor = class {
1787
1788
  const result = await tool.execute(params);
1788
1789
  return {
1789
1790
  name,
1791
+ args: params,
1790
1792
  result,
1791
1793
  executionTime: Date.now() - startTime
1792
1794
  };
1793
1795
  } catch (error) {
1794
1796
  return {
1795
1797
  name,
1798
+ args: params,
1796
1799
  result: null,
1797
1800
  error: error instanceof Error ? error.message : String(error),
1798
1801
  executionTime: Date.now() - startTime
@@ -2557,6 +2560,70 @@ ${fetchResult.markdown || fetchResult.content}`;
2557
2560
  source: fetchResult
2558
2561
  };
2559
2562
  }
2563
+ /**
2564
+ * Chat with vision (images) + tools support
2565
+ * Analyzes images with vision model, then uses agent with tools to provide enhanced response
2566
+ */
2567
+ async withVision(options) {
2568
+ const {
2569
+ messages,
2570
+ visionModel = "meta-llama/llama-4-scout-17b-16e-instruct",
2571
+ agentModel = "llama-3.3-70b-versatile",
2572
+ useTools = true,
2573
+ includeMCP = false,
2574
+ maxIterations = 5
2575
+ } = options;
2576
+ const visionResponse = await this.parent.client.chat.completions.create({
2577
+ model: visionModel,
2578
+ messages: [
2579
+ {
2580
+ role: "system",
2581
+ content: "Analyze the image(s) provided and describe what you see in detail. If the user asks a question, answer it based on the image. Be specific and thorough."
2582
+ },
2583
+ ...messages
2584
+ ]
2585
+ });
2586
+ const imageAnalysis = visionResponse.choices[0]?.message?.content || "";
2587
+ if (!useTools) {
2588
+ return {
2589
+ content: imageAnalysis,
2590
+ imageAnalysis,
2591
+ toolCalls: []
2592
+ };
2593
+ }
2594
+ const userMessage = messages.find((m) => m.role === "user");
2595
+ let userText = "";
2596
+ if (userMessage && Array.isArray(userMessage.content)) {
2597
+ const textPart = userMessage.content.find(
2598
+ (c) => c.type === "text"
2599
+ );
2600
+ userText = textPart?.text || "";
2601
+ } else if (userMessage && typeof userMessage.content === "string") {
2602
+ userText = userMessage.content;
2603
+ }
2604
+ const agentTask = userText ? `Based on this image analysis: "${imageAnalysis}"
2605
+
2606
+ User question: ${userText}
2607
+
2608
+ Use available tools (web search, calculator, etc.) if needed to provide a complete answer.` : imageAnalysis;
2609
+ const agent = await this.parent.createAgentWithBuiltins(
2610
+ {
2611
+ model: agentModel,
2612
+ maxIterations
2613
+ },
2614
+ { includeMCP }
2615
+ );
2616
+ const result = await agent.run(agentTask);
2617
+ return {
2618
+ content: result.output,
2619
+ imageAnalysis,
2620
+ toolCalls: result.toolCalls.map((t) => ({
2621
+ name: t.name,
2622
+ args: t.args,
2623
+ result: t.result
2624
+ }))
2625
+ };
2626
+ }
2560
2627
  };
2561
2628
  var WebModule = class {
2562
2629
  constructor(parent) {
package/dist/index.d.cts CHANGED
@@ -251,6 +251,7 @@ interface ToolDefinition {
251
251
  }
252
252
  interface ToolResult {
253
253
  name: string;
254
+ args?: Record<string, unknown>;
254
255
  result: unknown;
255
256
  error?: string;
256
257
  executionTime?: number;
@@ -840,6 +841,26 @@ declare class ChatWithRAG {
840
841
  content: string;
841
842
  source: FetchResult;
842
843
  }>;
844
+ /**
845
+ * Chat with vision (images) + tools support
846
+ * Analyzes images with vision model, then uses agent with tools to provide enhanced response
847
+ */
848
+ withVision(options: {
849
+ messages: Groq.Chat.ChatCompletionMessageParam[];
850
+ visionModel?: string;
851
+ agentModel?: string;
852
+ useTools?: boolean;
853
+ includeMCP?: boolean;
854
+ maxIterations?: number;
855
+ }): Promise<{
856
+ content: string;
857
+ imageAnalysis: string;
858
+ toolCalls: Array<{
859
+ name: string;
860
+ args: unknown;
861
+ result: unknown;
862
+ }>;
863
+ }>;
843
864
  }
844
865
  /**
845
866
  * Web module for fetching and searching
package/dist/index.d.ts CHANGED
@@ -251,6 +251,7 @@ interface ToolDefinition {
251
251
  }
252
252
  interface ToolResult {
253
253
  name: string;
254
+ args?: Record<string, unknown>;
254
255
  result: unknown;
255
256
  error?: string;
256
257
  executionTime?: number;
@@ -840,6 +841,26 @@ declare class ChatWithRAG {
840
841
  content: string;
841
842
  source: FetchResult;
842
843
  }>;
844
+ /**
845
+ * Chat with vision (images) + tools support
846
+ * Analyzes images with vision model, then uses agent with tools to provide enhanced response
847
+ */
848
+ withVision(options: {
849
+ messages: Groq.Chat.ChatCompletionMessageParam[];
850
+ visionModel?: string;
851
+ agentModel?: string;
852
+ useTools?: boolean;
853
+ includeMCP?: boolean;
854
+ maxIterations?: number;
855
+ }): Promise<{
856
+ content: string;
857
+ imageAnalysis: string;
858
+ toolCalls: Array<{
859
+ name: string;
860
+ args: unknown;
861
+ result: unknown;
862
+ }>;
863
+ }>;
843
864
  }
844
865
  /**
845
866
  * Web module for fetching and searching
package/dist/index.js CHANGED
@@ -1696,6 +1696,7 @@ var ToolExecutor = class {
1696
1696
  if (!tool) {
1697
1697
  return {
1698
1698
  name,
1699
+ args: params,
1699
1700
  result: null,
1700
1701
  error: `Tool "${name}" not found`,
1701
1702
  executionTime: Date.now() - startTime
@@ -1705,12 +1706,14 @@ var ToolExecutor = class {
1705
1706
  const result = await tool.execute(params);
1706
1707
  return {
1707
1708
  name,
1709
+ args: params,
1708
1710
  result,
1709
1711
  executionTime: Date.now() - startTime
1710
1712
  };
1711
1713
  } catch (error) {
1712
1714
  return {
1713
1715
  name,
1716
+ args: params,
1714
1717
  result: null,
1715
1718
  error: error instanceof Error ? error.message : String(error),
1716
1719
  executionTime: Date.now() - startTime
@@ -2475,6 +2478,70 @@ ${fetchResult.markdown || fetchResult.content}`;
2475
2478
  source: fetchResult
2476
2479
  };
2477
2480
  }
2481
+ /**
2482
+ * Chat with vision (images) + tools support
2483
+ * Analyzes images with vision model, then uses agent with tools to provide enhanced response
2484
+ */
2485
+ async withVision(options) {
2486
+ const {
2487
+ messages,
2488
+ visionModel = "meta-llama/llama-4-scout-17b-16e-instruct",
2489
+ agentModel = "llama-3.3-70b-versatile",
2490
+ useTools = true,
2491
+ includeMCP = false,
2492
+ maxIterations = 5
2493
+ } = options;
2494
+ const visionResponse = await this.parent.client.chat.completions.create({
2495
+ model: visionModel,
2496
+ messages: [
2497
+ {
2498
+ role: "system",
2499
+ content: "Analyze the image(s) provided and describe what you see in detail. If the user asks a question, answer it based on the image. Be specific and thorough."
2500
+ },
2501
+ ...messages
2502
+ ]
2503
+ });
2504
+ const imageAnalysis = visionResponse.choices[0]?.message?.content || "";
2505
+ if (!useTools) {
2506
+ return {
2507
+ content: imageAnalysis,
2508
+ imageAnalysis,
2509
+ toolCalls: []
2510
+ };
2511
+ }
2512
+ const userMessage = messages.find((m) => m.role === "user");
2513
+ let userText = "";
2514
+ if (userMessage && Array.isArray(userMessage.content)) {
2515
+ const textPart = userMessage.content.find(
2516
+ (c) => c.type === "text"
2517
+ );
2518
+ userText = textPart?.text || "";
2519
+ } else if (userMessage && typeof userMessage.content === "string") {
2520
+ userText = userMessage.content;
2521
+ }
2522
+ const agentTask = userText ? `Based on this image analysis: "${imageAnalysis}"
2523
+
2524
+ User question: ${userText}
2525
+
2526
+ Use available tools (web search, calculator, etc.) if needed to provide a complete answer.` : imageAnalysis;
2527
+ const agent = await this.parent.createAgentWithBuiltins(
2528
+ {
2529
+ model: agentModel,
2530
+ maxIterations
2531
+ },
2532
+ { includeMCP }
2533
+ );
2534
+ const result = await agent.run(agentTask);
2535
+ return {
2536
+ content: result.output,
2537
+ imageAnalysis,
2538
+ toolCalls: result.toolCalls.map((t) => ({
2539
+ name: t.name,
2540
+ args: t.args,
2541
+ result: t.result
2542
+ }))
2543
+ };
2544
+ }
2478
2545
  };
2479
2546
  var WebModule = class {
2480
2547
  constructor(parent) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "groq-rag",
3
- "version": "0.2.0",
3
+ "version": "0.2.2",
4
4
  "description": "Extended Groq TypeScript SDK with RAG, web browsing, and agent capabilities - 100% groq-sdk API compatible",
5
5
  "type": "module",
6
6
  "main": "dist/index.cjs",