groq-rag 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -0
- package/dist/index.cjs +67 -0
- package/dist/index.d.cts +21 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.js +67 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -513,6 +513,46 @@ const response = await client.chat.withUrl({
|
|
|
513
513
|
});
|
|
514
514
|
```
|
|
515
515
|
|
|
516
|
+
#### Vision Chat with Tools
|
|
517
|
+
|
|
518
|
+
Analyze images with vision models and automatically use tools (web search, calculator, MCP) to provide enhanced responses.
|
|
519
|
+
|
|
520
|
+
```typescript
|
|
521
|
+
const response = await client.chat.withVision({
|
|
522
|
+
messages: [
|
|
523
|
+
{
|
|
524
|
+
role: 'user',
|
|
525
|
+
content: [
|
|
526
|
+
{ type: 'text', text: 'What is this and find more info about it' },
|
|
527
|
+
{ type: 'image_url', image_url: { url: 'data:image/jpeg;base64,...' } }
|
|
528
|
+
]
|
|
529
|
+
}
|
|
530
|
+
],
|
|
531
|
+
visionModel?: string, // Default: 'meta-llama/llama-4-scout-17b-16e-instruct'
|
|
532
|
+
agentModel?: string, // Default: 'llama-3.3-70b-versatile'
|
|
533
|
+
useTools?: boolean, // Enable agent tools (default: true)
|
|
534
|
+
includeMCP?: boolean, // Include MCP tools (default: false)
|
|
535
|
+
maxIterations?: number, // Agent iterations (default: 5)
|
|
536
|
+
});
|
|
537
|
+
|
|
538
|
+
// Returns:
|
|
539
|
+
// {
|
|
540
|
+
// content: string, // Final response with tool-enhanced info
|
|
541
|
+
// imageAnalysis: string, // Raw vision model description
|
|
542
|
+
// toolCalls: Array<{ // Tools that were used
|
|
543
|
+
// name: string,
|
|
544
|
+
// args: unknown,
|
|
545
|
+
// result: unknown,
|
|
546
|
+
// }>,
|
|
547
|
+
// }
|
|
548
|
+
```
|
|
549
|
+
|
|
550
|
+
**How it works:**
|
|
551
|
+
1. Vision model analyzes the image(s)
|
|
552
|
+
2. Agent takes the analysis + user question
|
|
553
|
+
3. Agent uses tools (web search, calculator, MCP) if needed
|
|
554
|
+
4. Returns comprehensive answer with sources
|
|
555
|
+
|
|
516
556
|
---
|
|
517
557
|
|
|
518
558
|
### Agent System
|
|
@@ -1136,6 +1176,25 @@ npm run benchmark
|
|
|
1136
1176
|
|
|
1137
1177
|
## Changelog
|
|
1138
1178
|
|
|
1179
|
+
### v0.2.2
|
|
1180
|
+
|
|
1181
|
+
- **New Feature: Vision + Tools** - Analyze images with automatic tool enhancement
|
|
1182
|
+
- `client.chat.withVision()` - Vision analysis with agent tools (web search, calculator, MCP)
|
|
1183
|
+
- Two-step processing: vision model analyzes images, then agent enhances with tools
|
|
1184
|
+
- Supports all vision models (Llama 4 Scout, Llama 4 Maverick)
|
|
1185
|
+
- Returns image analysis, final content, and tool calls used
|
|
1186
|
+
- **ToolResult Enhancement** - Added `args` property to track tool input parameters
|
|
1187
|
+
- **Demo Website Updates** - All Groq models, vision-only image upload button, MCP integration fixes
|
|
1188
|
+
|
|
1189
|
+
### v0.2.1
|
|
1190
|
+
|
|
1191
|
+
- Bug fixes and improvements
|
|
1192
|
+
|
|
1193
|
+
### v0.2.0
|
|
1194
|
+
|
|
1195
|
+
- MCP (Model Context Protocol) support improvements
|
|
1196
|
+
- Browser environment support with `dangerouslyAllowBrowser` option
|
|
1197
|
+
|
|
1139
1198
|
### v0.1.6
|
|
1140
1199
|
|
|
1141
1200
|
- **New Feature: MCP Integration** - Connect to Model Context Protocol servers
|
package/dist/index.cjs
CHANGED
|
@@ -1778,6 +1778,7 @@ var ToolExecutor = class {
|
|
|
1778
1778
|
if (!tool) {
|
|
1779
1779
|
return {
|
|
1780
1780
|
name,
|
|
1781
|
+
args: params,
|
|
1781
1782
|
result: null,
|
|
1782
1783
|
error: `Tool "${name}" not found`,
|
|
1783
1784
|
executionTime: Date.now() - startTime
|
|
@@ -1787,12 +1788,14 @@ var ToolExecutor = class {
|
|
|
1787
1788
|
const result = await tool.execute(params);
|
|
1788
1789
|
return {
|
|
1789
1790
|
name,
|
|
1791
|
+
args: params,
|
|
1790
1792
|
result,
|
|
1791
1793
|
executionTime: Date.now() - startTime
|
|
1792
1794
|
};
|
|
1793
1795
|
} catch (error) {
|
|
1794
1796
|
return {
|
|
1795
1797
|
name,
|
|
1798
|
+
args: params,
|
|
1796
1799
|
result: null,
|
|
1797
1800
|
error: error instanceof Error ? error.message : String(error),
|
|
1798
1801
|
executionTime: Date.now() - startTime
|
|
@@ -2557,6 +2560,70 @@ ${fetchResult.markdown || fetchResult.content}`;
|
|
|
2557
2560
|
source: fetchResult
|
|
2558
2561
|
};
|
|
2559
2562
|
}
|
|
2563
|
+
/**
|
|
2564
|
+
* Chat with vision (images) + tools support
|
|
2565
|
+
* Analyzes images with vision model, then uses agent with tools to provide enhanced response
|
|
2566
|
+
*/
|
|
2567
|
+
async withVision(options) {
|
|
2568
|
+
const {
|
|
2569
|
+
messages,
|
|
2570
|
+
visionModel = "meta-llama/llama-4-scout-17b-16e-instruct",
|
|
2571
|
+
agentModel = "llama-3.3-70b-versatile",
|
|
2572
|
+
useTools = true,
|
|
2573
|
+
includeMCP = false,
|
|
2574
|
+
maxIterations = 5
|
|
2575
|
+
} = options;
|
|
2576
|
+
const visionResponse = await this.parent.client.chat.completions.create({
|
|
2577
|
+
model: visionModel,
|
|
2578
|
+
messages: [
|
|
2579
|
+
{
|
|
2580
|
+
role: "system",
|
|
2581
|
+
content: "Analyze the image(s) provided and describe what you see in detail. If the user asks a question, answer it based on the image. Be specific and thorough."
|
|
2582
|
+
},
|
|
2583
|
+
...messages
|
|
2584
|
+
]
|
|
2585
|
+
});
|
|
2586
|
+
const imageAnalysis = visionResponse.choices[0]?.message?.content || "";
|
|
2587
|
+
if (!useTools) {
|
|
2588
|
+
return {
|
|
2589
|
+
content: imageAnalysis,
|
|
2590
|
+
imageAnalysis,
|
|
2591
|
+
toolCalls: []
|
|
2592
|
+
};
|
|
2593
|
+
}
|
|
2594
|
+
const userMessage = messages.find((m) => m.role === "user");
|
|
2595
|
+
let userText = "";
|
|
2596
|
+
if (userMessage && Array.isArray(userMessage.content)) {
|
|
2597
|
+
const textPart = userMessage.content.find(
|
|
2598
|
+
(c) => c.type === "text"
|
|
2599
|
+
);
|
|
2600
|
+
userText = textPart?.text || "";
|
|
2601
|
+
} else if (userMessage && typeof userMessage.content === "string") {
|
|
2602
|
+
userText = userMessage.content;
|
|
2603
|
+
}
|
|
2604
|
+
const agentTask = userText ? `Based on this image analysis: "${imageAnalysis}"
|
|
2605
|
+
|
|
2606
|
+
User question: ${userText}
|
|
2607
|
+
|
|
2608
|
+
Use available tools (web search, calculator, etc.) if needed to provide a complete answer.` : imageAnalysis;
|
|
2609
|
+
const agent = await this.parent.createAgentWithBuiltins(
|
|
2610
|
+
{
|
|
2611
|
+
model: agentModel,
|
|
2612
|
+
maxIterations
|
|
2613
|
+
},
|
|
2614
|
+
{ includeMCP }
|
|
2615
|
+
);
|
|
2616
|
+
const result = await agent.run(agentTask);
|
|
2617
|
+
return {
|
|
2618
|
+
content: result.output,
|
|
2619
|
+
imageAnalysis,
|
|
2620
|
+
toolCalls: result.toolCalls.map((t) => ({
|
|
2621
|
+
name: t.name,
|
|
2622
|
+
args: t.args,
|
|
2623
|
+
result: t.result
|
|
2624
|
+
}))
|
|
2625
|
+
};
|
|
2626
|
+
}
|
|
2560
2627
|
};
|
|
2561
2628
|
var WebModule = class {
|
|
2562
2629
|
constructor(parent) {
|
package/dist/index.d.cts
CHANGED
|
@@ -251,6 +251,7 @@ interface ToolDefinition {
|
|
|
251
251
|
}
|
|
252
252
|
interface ToolResult {
|
|
253
253
|
name: string;
|
|
254
|
+
args?: Record<string, unknown>;
|
|
254
255
|
result: unknown;
|
|
255
256
|
error?: string;
|
|
256
257
|
executionTime?: number;
|
|
@@ -840,6 +841,26 @@ declare class ChatWithRAG {
|
|
|
840
841
|
content: string;
|
|
841
842
|
source: FetchResult;
|
|
842
843
|
}>;
|
|
844
|
+
/**
|
|
845
|
+
* Chat with vision (images) + tools support
|
|
846
|
+
* Analyzes images with vision model, then uses agent with tools to provide enhanced response
|
|
847
|
+
*/
|
|
848
|
+
withVision(options: {
|
|
849
|
+
messages: Groq.Chat.ChatCompletionMessageParam[];
|
|
850
|
+
visionModel?: string;
|
|
851
|
+
agentModel?: string;
|
|
852
|
+
useTools?: boolean;
|
|
853
|
+
includeMCP?: boolean;
|
|
854
|
+
maxIterations?: number;
|
|
855
|
+
}): Promise<{
|
|
856
|
+
content: string;
|
|
857
|
+
imageAnalysis: string;
|
|
858
|
+
toolCalls: Array<{
|
|
859
|
+
name: string;
|
|
860
|
+
args: unknown;
|
|
861
|
+
result: unknown;
|
|
862
|
+
}>;
|
|
863
|
+
}>;
|
|
843
864
|
}
|
|
844
865
|
/**
|
|
845
866
|
* Web module for fetching and searching
|
package/dist/index.d.ts
CHANGED
|
@@ -251,6 +251,7 @@ interface ToolDefinition {
|
|
|
251
251
|
}
|
|
252
252
|
interface ToolResult {
|
|
253
253
|
name: string;
|
|
254
|
+
args?: Record<string, unknown>;
|
|
254
255
|
result: unknown;
|
|
255
256
|
error?: string;
|
|
256
257
|
executionTime?: number;
|
|
@@ -840,6 +841,26 @@ declare class ChatWithRAG {
|
|
|
840
841
|
content: string;
|
|
841
842
|
source: FetchResult;
|
|
842
843
|
}>;
|
|
844
|
+
/**
|
|
845
|
+
* Chat with vision (images) + tools support
|
|
846
|
+
* Analyzes images with vision model, then uses agent with tools to provide enhanced response
|
|
847
|
+
*/
|
|
848
|
+
withVision(options: {
|
|
849
|
+
messages: Groq.Chat.ChatCompletionMessageParam[];
|
|
850
|
+
visionModel?: string;
|
|
851
|
+
agentModel?: string;
|
|
852
|
+
useTools?: boolean;
|
|
853
|
+
includeMCP?: boolean;
|
|
854
|
+
maxIterations?: number;
|
|
855
|
+
}): Promise<{
|
|
856
|
+
content: string;
|
|
857
|
+
imageAnalysis: string;
|
|
858
|
+
toolCalls: Array<{
|
|
859
|
+
name: string;
|
|
860
|
+
args: unknown;
|
|
861
|
+
result: unknown;
|
|
862
|
+
}>;
|
|
863
|
+
}>;
|
|
843
864
|
}
|
|
844
865
|
/**
|
|
845
866
|
* Web module for fetching and searching
|
package/dist/index.js
CHANGED
|
@@ -1696,6 +1696,7 @@ var ToolExecutor = class {
|
|
|
1696
1696
|
if (!tool) {
|
|
1697
1697
|
return {
|
|
1698
1698
|
name,
|
|
1699
|
+
args: params,
|
|
1699
1700
|
result: null,
|
|
1700
1701
|
error: `Tool "${name}" not found`,
|
|
1701
1702
|
executionTime: Date.now() - startTime
|
|
@@ -1705,12 +1706,14 @@ var ToolExecutor = class {
|
|
|
1705
1706
|
const result = await tool.execute(params);
|
|
1706
1707
|
return {
|
|
1707
1708
|
name,
|
|
1709
|
+
args: params,
|
|
1708
1710
|
result,
|
|
1709
1711
|
executionTime: Date.now() - startTime
|
|
1710
1712
|
};
|
|
1711
1713
|
} catch (error) {
|
|
1712
1714
|
return {
|
|
1713
1715
|
name,
|
|
1716
|
+
args: params,
|
|
1714
1717
|
result: null,
|
|
1715
1718
|
error: error instanceof Error ? error.message : String(error),
|
|
1716
1719
|
executionTime: Date.now() - startTime
|
|
@@ -2475,6 +2478,70 @@ ${fetchResult.markdown || fetchResult.content}`;
|
|
|
2475
2478
|
source: fetchResult
|
|
2476
2479
|
};
|
|
2477
2480
|
}
|
|
2481
|
+
/**
|
|
2482
|
+
* Chat with vision (images) + tools support
|
|
2483
|
+
* Analyzes images with vision model, then uses agent with tools to provide enhanced response
|
|
2484
|
+
*/
|
|
2485
|
+
async withVision(options) {
|
|
2486
|
+
const {
|
|
2487
|
+
messages,
|
|
2488
|
+
visionModel = "meta-llama/llama-4-scout-17b-16e-instruct",
|
|
2489
|
+
agentModel = "llama-3.3-70b-versatile",
|
|
2490
|
+
useTools = true,
|
|
2491
|
+
includeMCP = false,
|
|
2492
|
+
maxIterations = 5
|
|
2493
|
+
} = options;
|
|
2494
|
+
const visionResponse = await this.parent.client.chat.completions.create({
|
|
2495
|
+
model: visionModel,
|
|
2496
|
+
messages: [
|
|
2497
|
+
{
|
|
2498
|
+
role: "system",
|
|
2499
|
+
content: "Analyze the image(s) provided and describe what you see in detail. If the user asks a question, answer it based on the image. Be specific and thorough."
|
|
2500
|
+
},
|
|
2501
|
+
...messages
|
|
2502
|
+
]
|
|
2503
|
+
});
|
|
2504
|
+
const imageAnalysis = visionResponse.choices[0]?.message?.content || "";
|
|
2505
|
+
if (!useTools) {
|
|
2506
|
+
return {
|
|
2507
|
+
content: imageAnalysis,
|
|
2508
|
+
imageAnalysis,
|
|
2509
|
+
toolCalls: []
|
|
2510
|
+
};
|
|
2511
|
+
}
|
|
2512
|
+
const userMessage = messages.find((m) => m.role === "user");
|
|
2513
|
+
let userText = "";
|
|
2514
|
+
if (userMessage && Array.isArray(userMessage.content)) {
|
|
2515
|
+
const textPart = userMessage.content.find(
|
|
2516
|
+
(c) => c.type === "text"
|
|
2517
|
+
);
|
|
2518
|
+
userText = textPart?.text || "";
|
|
2519
|
+
} else if (userMessage && typeof userMessage.content === "string") {
|
|
2520
|
+
userText = userMessage.content;
|
|
2521
|
+
}
|
|
2522
|
+
const agentTask = userText ? `Based on this image analysis: "${imageAnalysis}"
|
|
2523
|
+
|
|
2524
|
+
User question: ${userText}
|
|
2525
|
+
|
|
2526
|
+
Use available tools (web search, calculator, etc.) if needed to provide a complete answer.` : imageAnalysis;
|
|
2527
|
+
const agent = await this.parent.createAgentWithBuiltins(
|
|
2528
|
+
{
|
|
2529
|
+
model: agentModel,
|
|
2530
|
+
maxIterations
|
|
2531
|
+
},
|
|
2532
|
+
{ includeMCP }
|
|
2533
|
+
);
|
|
2534
|
+
const result = await agent.run(agentTask);
|
|
2535
|
+
return {
|
|
2536
|
+
content: result.output,
|
|
2537
|
+
imageAnalysis,
|
|
2538
|
+
toolCalls: result.toolCalls.map((t) => ({
|
|
2539
|
+
name: t.name,
|
|
2540
|
+
args: t.args,
|
|
2541
|
+
result: t.result
|
|
2542
|
+
}))
|
|
2543
|
+
};
|
|
2544
|
+
}
|
|
2478
2545
|
};
|
|
2479
2546
|
var WebModule = class {
|
|
2480
2547
|
constructor(parent) {
|
package/package.json
CHANGED