modular-agent-examples 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/chunking-demo.ts +339 -0
- package/cleanup-duplicates.ts +142 -0
- package/data/flower.jpg +0 -0
- package/generative.ts +128 -0
- package/graph/context-example.ts +209 -0
- package/graph/data-pipeline/agents.ts +60 -0
- package/graph/data-pipeline/fetchers.ts +166 -0
- package/graph/data-pipeline/index.ts +282 -0
- package/graph/index.ts +154 -0
- package/graph/map-example.ts +227 -0
- package/graph/metrics-example.ts +238 -0
- package/graph/parallel-example.ts +167 -0
- package/graph/pipeline-example.ts +225 -0
- package/graph/planning-example.ts +406 -0
- package/graph/router-example.ts +226 -0
- package/graph/sequential-example.ts +141 -0
- package/graph/voting-example.ts +159 -0
- package/graph-rag/docker-compose.yaml +14 -0
- package/graph-rag/index.js +99 -0
- package/graph-rag/init-db.sh +7 -0
- package/graph-rag/package.json +15 -0
- package/history-compression-example.ts +163 -0
- package/history-persistence.ts +347 -0
- package/index.ts +175 -0
- package/ingestion-pipeline.ts +353 -0
- package/mcp-airbnb-example.ts +69 -0
- package/mcp-http-example.ts +70 -0
- package/mcp-stdio-example.ts +63 -0
- package/multimodal.ts +144 -0
- package/ollama.ts +148 -0
- package/openai-compatible.ts +141 -0
- package/opensearch-vector-store.ts +342 -0
- package/package.json +24 -0
- package/pubmed.ts +289 -0
- package/reasoning-with-sub-agent.ts +311 -0
- package/synchronous/index.ts +48 -0
- package/tsconfig.json +8 -0
- package/vector-store-filtering.ts +303 -0
- package/vector-store.ts +210 -0
- package/vectorstore/index.ts +0 -0
- package/vectorstore/store/dbService.ts +80 -0
- package/voyage-embeddings.ts +99 -0
- package/weather-with-sub-agent.ts +276 -0
- package/weather.ts +389 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
import "dotenv/config";
|
|
2
|
+
import { rmSync } from "fs";
|
|
3
|
+
import { ClaudeAgent } from "../lib/agents/anthropic/ClaudeAgent";
|
|
4
|
+
import { LanceDBVectorStore } from "../lib/vectorstore/LanceDBVectorStore";
|
|
5
|
+
import { OpenAIEmbeddings } from "../lib/embeddings/OpenAIEmbeddings";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Example demonstrating multi-tenancy and filtering in vector stores.
|
|
9
|
+
*
|
|
10
|
+
* Shows how to:
|
|
11
|
+
* 1. Store documents with tenant/project metadata
|
|
12
|
+
* 2. Create tenant-specific retrieval tools
|
|
13
|
+
* 3. Ensure agents only access documents for their tenant
|
|
14
|
+
*
|
|
15
|
+
* NOTE: LanceDB uses DataFusion for SQL filtering, which normalizes unquoted
|
|
16
|
+
* identifiers to lowercase. Use snake_case for metadata field names to avoid
|
|
17
|
+
* case-sensitivity issues.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
// Sample multi-tenant data
|
|
21
|
+
const DOCUMENTS = [
|
|
22
|
+
// Tenant: Acme Corp
|
|
23
|
+
{
|
|
24
|
+
id: "acme-1",
|
|
25
|
+
content:
|
|
26
|
+
"Acme Corp uses a monthly billing cycle. Invoices are sent on the 1st of each month.",
|
|
27
|
+
metadata: { tenant_id: "acme", project_id: "proj-123", category: "billing" },
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
id: "acme-2",
|
|
31
|
+
content: "Acme Corp's support hours are Monday-Friday, 9am-5pm EST.",
|
|
32
|
+
metadata: { tenant_id: "acme", project_id: "proj-123", category: "support" },
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
id: "acme-3",
|
|
36
|
+
content: "Acme Corp has a 30-day refund policy for all services.",
|
|
37
|
+
metadata: { tenant_id: "acme", project_id: "proj-456", category: "policy" },
|
|
38
|
+
},
|
|
39
|
+
|
|
40
|
+
// Tenant: TechStart Inc
|
|
41
|
+
{
|
|
42
|
+
id: "techstart-1",
|
|
43
|
+
content:
|
|
44
|
+
"TechStart Inc uses annual billing. Payment is due within 15 days of invoice.",
|
|
45
|
+
metadata: {
|
|
46
|
+
tenant_id: "techstart",
|
|
47
|
+
project_id: "proj-789",
|
|
48
|
+
category: "billing",
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
id: "techstart-2",
|
|
53
|
+
content:
|
|
54
|
+
"TechStart Inc offers 24/7 premium support for enterprise customers.",
|
|
55
|
+
metadata: {
|
|
56
|
+
tenant_id: "techstart",
|
|
57
|
+
project_id: "proj-789",
|
|
58
|
+
category: "support",
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
id: "techstart-3",
|
|
63
|
+
content: "TechStart Inc has a 60-day money-back guarantee.",
|
|
64
|
+
metadata: {
|
|
65
|
+
tenant_id: "techstart",
|
|
66
|
+
project_id: "proj-789",
|
|
67
|
+
category: "policy",
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
|
|
71
|
+
// Tenant: Global Services
|
|
72
|
+
{
|
|
73
|
+
id: "global-1",
|
|
74
|
+
content:
|
|
75
|
+
"Global Services bills quarterly. Invoices are sent 15 days before the period ends.",
|
|
76
|
+
metadata: {
|
|
77
|
+
tenant_id: "global",
|
|
78
|
+
project_id: "proj-101",
|
|
79
|
+
category: "billing",
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
id: "global-2",
|
|
84
|
+
content:
|
|
85
|
+
"Global Services provides support in 12 languages, available 24/7.",
|
|
86
|
+
metadata: {
|
|
87
|
+
tenant_id: "global",
|
|
88
|
+
project_id: "proj-202",
|
|
89
|
+
category: "support",
|
|
90
|
+
},
|
|
91
|
+
},
|
|
92
|
+
];
|
|
93
|
+
|
|
94
|
+
async function vectorStoreFilteringExample() {
|
|
95
|
+
console.log("Multi-Tenancy Vector Store Example\n");
|
|
96
|
+
console.log("===================================\n");
|
|
97
|
+
|
|
98
|
+
// Check for required API keys
|
|
99
|
+
if (!process.env.OPENAI_API_KEY || !process.env.ANTHROPIC_API_KEY) {
|
|
100
|
+
console.error("Error: OPENAI_API_KEY and ANTHROPIC_API_KEY are required");
|
|
101
|
+
process.exit(1);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
try {
|
|
105
|
+
// Step 1: Create embeddings and vector store (clean slate for demo)
|
|
106
|
+
console.log("1. Creating vector store...");
|
|
107
|
+
rmSync("./examples/data/vectors-filtered", { recursive: true, force: true });
|
|
108
|
+
const embeddings = new OpenAIEmbeddings({
|
|
109
|
+
model: "text-embedding-3-small",
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const store = await LanceDBVectorStore.create({
|
|
113
|
+
name: "multi_tenant_kb",
|
|
114
|
+
uri: "./examples/data/vectors-filtered",
|
|
115
|
+
tableName: "tenant_docs",
|
|
116
|
+
embeddings,
|
|
117
|
+
metadataFields: [
|
|
118
|
+
{ name: "tenant_id", type: "string" as const },
|
|
119
|
+
{ name: "project_id", type: "string" as const },
|
|
120
|
+
{ name: "category", type: "string" as const },
|
|
121
|
+
{ name: "added_by", type: "string" as const },
|
|
122
|
+
],
|
|
123
|
+
});
|
|
124
|
+
console.log(" Store created\n");
|
|
125
|
+
|
|
126
|
+
// Step 2: Add documents with tenant metadata
|
|
127
|
+
console.log("2. Adding documents for multiple tenants...");
|
|
128
|
+
await store.addDocuments(DOCUMENTS);
|
|
129
|
+
console.log(` Added ${DOCUMENTS.length} documents\n`);
|
|
130
|
+
|
|
131
|
+
// Step 3: Test filtering directly
|
|
132
|
+
console.log("3. Testing direct search with filters...\n");
|
|
133
|
+
|
|
134
|
+
console.log(" Searching for billing info (no filter):");
|
|
135
|
+
const allBilling = await store.search("billing cycle", {
|
|
136
|
+
limit: 3,
|
|
137
|
+
filter: { category: "billing" },
|
|
138
|
+
});
|
|
139
|
+
for (const result of allBilling) {
|
|
140
|
+
console.log(
|
|
141
|
+
` - [${result.score.toFixed(3)}] ${
|
|
142
|
+
result.document.metadata?.tenant_id
|
|
143
|
+
}: ${result.document.content}`
|
|
144
|
+
);
|
|
145
|
+
}
|
|
146
|
+
console.log();
|
|
147
|
+
|
|
148
|
+
console.log(" Searching for billing info (Acme only):");
|
|
149
|
+
const acmeBilling = await store.search("billing cycle", {
|
|
150
|
+
limit: 3,
|
|
151
|
+
filter: { tenant_id: "acme", category: "billing" },
|
|
152
|
+
});
|
|
153
|
+
for (const result of acmeBilling) {
|
|
154
|
+
console.log(
|
|
155
|
+
` - [${result.score.toFixed(3)}] ${
|
|
156
|
+
result.document.metadata?.tenant_id
|
|
157
|
+
}: ${result.document.content}`
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
console.log();
|
|
161
|
+
|
|
162
|
+
// Step 4: Create tenant-specific agents
|
|
163
|
+
console.log("4. Creating tenant-specific agents...\n");
|
|
164
|
+
|
|
165
|
+
// Acme Corp agent - can only access Acme documents
|
|
166
|
+
const acmeSearchTool = store.toRetrievalTool(
|
|
167
|
+
"Search the Acme Corp knowledge base for billing, support, and policy information",
|
|
168
|
+
{
|
|
169
|
+
defaultLimit: 3,
|
|
170
|
+
defaultFilter: { tenant_id: "acme" }, // Always filter to Acme
|
|
171
|
+
allowFilterOverride: false, // Agent cannot override this filter
|
|
172
|
+
}
|
|
173
|
+
);
|
|
174
|
+
|
|
175
|
+
const acmeAgent = new ClaudeAgent({
|
|
176
|
+
id: "acme-agent",
|
|
177
|
+
name: "Acme Support Agent",
|
|
178
|
+
description:
|
|
179
|
+
"You are a customer support agent for Acme Corp. Use the search tool to find accurate information.",
|
|
180
|
+
apiKey: process.env.ANTHROPIC_API_KEY as string,
|
|
181
|
+
tools: [acmeSearchTool],
|
|
182
|
+
model: "claude-sonnet-4-20250514",
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
// TechStart agent - can only access TechStart documents
|
|
186
|
+
const techstartSearchTool = store.toRetrievalTool(
|
|
187
|
+
"Search the TechStart Inc knowledge base for billing, support, and policy information",
|
|
188
|
+
{
|
|
189
|
+
defaultLimit: 3,
|
|
190
|
+
defaultFilter: { tenant_id: "techstart" },
|
|
191
|
+
allowFilterOverride: false,
|
|
192
|
+
}
|
|
193
|
+
);
|
|
194
|
+
|
|
195
|
+
const techstartAgent = new ClaudeAgent({
|
|
196
|
+
id: "techstart-agent",
|
|
197
|
+
name: "TechStart Support Agent",
|
|
198
|
+
description:
|
|
199
|
+
"You are a customer support agent for TechStart Inc. Use the search tool to find accurate information.",
|
|
200
|
+
apiKey: process.env.ANTHROPIC_API_KEY as string,
|
|
201
|
+
tools: [techstartSearchTool],
|
|
202
|
+
model: "claude-sonnet-4-20250514",
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
// Step 5: Test tenant isolation
|
|
206
|
+
console.log("5. Testing tenant isolation...\n");
|
|
207
|
+
|
|
208
|
+
const question = "What is the billing cycle?";
|
|
209
|
+
|
|
210
|
+
console.log(` Question: "${question}"\n`);
|
|
211
|
+
|
|
212
|
+
console.log(" Acme Agent Response:");
|
|
213
|
+
const acmeResponse = await acmeAgent.execute(question);
|
|
214
|
+
console.log(` ${acmeResponse}\n`);
|
|
215
|
+
|
|
216
|
+
console.log(" TechStart Agent Response:");
|
|
217
|
+
const techstartResponse = await techstartAgent.execute(question);
|
|
218
|
+
console.log(` ${techstartResponse}\n`);
|
|
219
|
+
|
|
220
|
+
// Step 6: Demonstrate project-level filtering with override
|
|
221
|
+
console.log("6. Creating agent with filter override capability...\n");
|
|
222
|
+
|
|
223
|
+
const flexibleSearchTool = store.toRetrievalTool(
|
|
224
|
+
"Search the knowledge base. You can filter by tenant_id, project_id, or category.",
|
|
225
|
+
{
|
|
226
|
+
defaultLimit: 3,
|
|
227
|
+
defaultFilter: { tenant_id: "acme" }, // Default to Acme
|
|
228
|
+
allowFilterOverride: true, // But allow overriding
|
|
229
|
+
}
|
|
230
|
+
);
|
|
231
|
+
|
|
232
|
+
const flexibleAgent = new ClaudeAgent({
|
|
233
|
+
id: "flexible-agent",
|
|
234
|
+
name: "Flexible Search Agent",
|
|
235
|
+
description: `You are a knowledge base assistant. Use the search tool with appropriate filters.
|
|
236
|
+
When asked about a specific project, use the project_id filter.
|
|
237
|
+
When asked about a category, use the category filter.`,
|
|
238
|
+
apiKey: process.env.ANTHROPIC_API_KEY as string,
|
|
239
|
+
tools: [flexibleSearchTool],
|
|
240
|
+
model: "claude-sonnet-4-20250514",
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
console.log(
|
|
244
|
+
' Question: "What is the support policy for project proj-789?"\n'
|
|
245
|
+
);
|
|
246
|
+
const projectResponse = await flexibleAgent.execute(
|
|
247
|
+
"What is the support policy for project proj-789?"
|
|
248
|
+
);
|
|
249
|
+
console.log(` Response: ${projectResponse}\n`);
|
|
250
|
+
|
|
251
|
+
// Step 7: Demonstrate adding documents with default metadata
|
|
252
|
+
console.log("7. Adding new document with automatic tenant tagging...\n");
|
|
253
|
+
|
|
254
|
+
const acmeAddTool = store.toAddDocumentsTool(
|
|
255
|
+
"Add new documents to the Acme Corp knowledge base",
|
|
256
|
+
{
|
|
257
|
+
defaultMetadata: {
|
|
258
|
+
tenant_id: "acme",
|
|
259
|
+
project_id: "proj-123",
|
|
260
|
+
added_by: "system",
|
|
261
|
+
},
|
|
262
|
+
}
|
|
263
|
+
);
|
|
264
|
+
|
|
265
|
+
const addAgent = new ClaudeAgent({
|
|
266
|
+
id: "add-agent",
|
|
267
|
+
name: "Document Manager",
|
|
268
|
+
description: "You help add new documents to the knowledge base.",
|
|
269
|
+
apiKey: process.env.ANTHROPIC_API_KEY as string,
|
|
270
|
+
tools: [acmeAddTool],
|
|
271
|
+
model: "claude-sonnet-4-20250514",
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
console.log(" Adding: 'Acme Corp offers a new premium support tier.'\n");
|
|
275
|
+
await addAgent.execute(
|
|
276
|
+
"Add a document with id 'acme-4' and content: 'Acme Corp offers a new premium support tier with 24/7 assistance.'"
|
|
277
|
+
);
|
|
278
|
+
|
|
279
|
+
// Verify it was added with correct metadata
|
|
280
|
+
const newDoc = await store.getById("acme-4");
|
|
281
|
+
console.log(" Document added with metadata:");
|
|
282
|
+
console.log(` ${JSON.stringify(newDoc?.metadata, null, 2)}\n`);
|
|
283
|
+
|
|
284
|
+
console.log("✅ Multi-tenancy example completed successfully!\n");
|
|
285
|
+
console.log("Key takeaways:");
|
|
286
|
+
console.log("- Use defaultFilter to enforce tenant isolation");
|
|
287
|
+
console.log("- Set allowFilterOverride: false for strict security");
|
|
288
|
+
console.log("- Use defaultMetadata to auto-tag documents");
|
|
289
|
+
console.log(
|
|
290
|
+
"- Combine filters (tenant + project + category) for fine-grained control"
|
|
291
|
+
);
|
|
292
|
+
console.log(
|
|
293
|
+
"- Use snake_case for metadata field names (LanceDB normalizes SQL identifiers to lowercase)"
|
|
294
|
+
);
|
|
295
|
+
|
|
296
|
+
process.exit(0);
|
|
297
|
+
} catch (error) {
|
|
298
|
+
console.error("Error:", error);
|
|
299
|
+
process.exit(1);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
vectorStoreFilteringExample();
|
package/vector-store.ts
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import "dotenv/config";
|
|
2
|
+
import { ClaudeAgent } from "../lib/agents/anthropic/ClaudeAgent";
|
|
3
|
+
import { LanceDBVectorStore } from "../lib/vectorstore/LanceDBVectorStore";
|
|
4
|
+
import { OpenAIEmbeddings } from "../lib/embeddings/OpenAIEmbeddings";
|
|
5
|
+
import { RecursiveChunker, IngestionPipeline, OpenAiAgent } from "../lib";
|
|
6
|
+
import { readFileSync, readdirSync, statSync } from "fs";
|
|
7
|
+
import { join } from "path";
|
|
8
|
+
|
|
9
|
+
import { createInterface } from "node:readline/promises";
|
|
10
|
+
|
|
11
|
+
const rl = createInterface({
|
|
12
|
+
input: process.stdin,
|
|
13
|
+
output: process.stdout,
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Recursively load all markdown files from a directory
|
|
18
|
+
*/
|
|
19
|
+
function loadMarkdownFiles(
|
|
20
|
+
dir: string,
|
|
21
|
+
baseDir?: string
|
|
22
|
+
): Array<{ path: string; content: string }> {
|
|
23
|
+
const base = baseDir || dir;
|
|
24
|
+
const files: Array<{ path: string; content: string }> = [];
|
|
25
|
+
|
|
26
|
+
const items = readdirSync(dir);
|
|
27
|
+
|
|
28
|
+
for (const item of items) {
|
|
29
|
+
const fullPath = join(dir, item);
|
|
30
|
+
const stat = statSync(fullPath);
|
|
31
|
+
|
|
32
|
+
if (stat.isDirectory()) {
|
|
33
|
+
// Recursively load from subdirectories
|
|
34
|
+
files.push(...loadMarkdownFiles(fullPath, base));
|
|
35
|
+
} else if (item.endsWith(".md")) {
|
|
36
|
+
// Load markdown file
|
|
37
|
+
const content = readFileSync(fullPath, "utf-8");
|
|
38
|
+
const relativePath = fullPath.replace(base + "/", "");
|
|
39
|
+
files.push({ path: relativePath, content });
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return files;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async function vectorStoreExample() {
|
|
47
|
+
console.log("Vector Store Example with RAG Agent\n");
|
|
48
|
+
console.log("====================================\n");
|
|
49
|
+
|
|
50
|
+
// Check for required API keys
|
|
51
|
+
if (!process.env.OPENAI_API_KEY) {
|
|
52
|
+
console.error("Error: OPENAI_API_KEY is required for embeddings");
|
|
53
|
+
process.exit(1);
|
|
54
|
+
}
|
|
55
|
+
if (!process.env.ANTHROPIC_API_KEY) {
|
|
56
|
+
console.error("Error: ANTHROPIC_API_KEY is required for the agent");
|
|
57
|
+
process.exit(1);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
// Step 1: Create embeddings provider
|
|
62
|
+
console.log("1. Creating OpenAI embeddings provider...");
|
|
63
|
+
const embeddings = new OpenAIEmbeddings({
|
|
64
|
+
model: "text-embedding-3-small",
|
|
65
|
+
});
|
|
66
|
+
console.log(
|
|
67
|
+
` Model: ${embeddings.model}, Dimensions: ${embeddings.dimensions}\n`
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
// Step 2: Create vector store
|
|
71
|
+
console.log("2. Creating LanceDB vector store...");
|
|
72
|
+
const store = await LanceDBVectorStore.create({
|
|
73
|
+
name: "knowledge_base",
|
|
74
|
+
uri: "./examples/data/vectors",
|
|
75
|
+
tableName: "agention_docs",
|
|
76
|
+
embeddings,
|
|
77
|
+
metadataFields: [
|
|
78
|
+
{ name: "source", type: "string" as const },
|
|
79
|
+
{ name: "type", type: "string" as const },
|
|
80
|
+
],
|
|
81
|
+
});
|
|
82
|
+
console.log(" Store created successfully\n");
|
|
83
|
+
|
|
84
|
+
// Step 3: Create chunker
|
|
85
|
+
console.log("3. Creating RecursiveChunker for semantic splitting...");
|
|
86
|
+
const chunker = new RecursiveChunker({
|
|
87
|
+
chunkSize: 1000,
|
|
88
|
+
chunkOverlap: 100,
|
|
89
|
+
separators: ["\n\n", "\n", ". ", " "],
|
|
90
|
+
});
|
|
91
|
+
console.log(" Chunker created\n");
|
|
92
|
+
|
|
93
|
+
// Step 4: Create ingestion pipeline
|
|
94
|
+
console.log("4. Creating ingestion pipeline...");
|
|
95
|
+
const pipeline = new IngestionPipeline(chunker, embeddings, store);
|
|
96
|
+
console.log(" Pipeline ready\n");
|
|
97
|
+
|
|
98
|
+
// Step 5: Load and ingest documentation files
|
|
99
|
+
console.log("5. Loading documentation files from docs/guide/...");
|
|
100
|
+
const docsPath = join(__dirname, "../docs/guide");
|
|
101
|
+
const markdownFiles = loadMarkdownFiles(docsPath);
|
|
102
|
+
console.log(` Found ${markdownFiles.length} markdown files\n`);
|
|
103
|
+
|
|
104
|
+
// Step 6: Ingest documents using pipeline
|
|
105
|
+
console.log("6. Ingesting documents (this may take a moment)...");
|
|
106
|
+
|
|
107
|
+
const documents = markdownFiles.map((file) => ({
|
|
108
|
+
text: file.content,
|
|
109
|
+
options: {
|
|
110
|
+
sourceId: file.path,
|
|
111
|
+
sourcePath: file.path,
|
|
112
|
+
metadata: {
|
|
113
|
+
source: file.path,
|
|
114
|
+
type: "documentation",
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
}));
|
|
118
|
+
|
|
119
|
+
const result = await pipeline.ingestMany(documents, {
|
|
120
|
+
batchSize: 10,
|
|
121
|
+
skipDuplicates: true, // Skip chunks that already exist (by content hash)
|
|
122
|
+
onProgress: ({ phase, processed, total }) => {
|
|
123
|
+
console.log(` ${phase}: ${processed}/${total}`);
|
|
124
|
+
},
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
console.log("\n Ingestion complete:");
|
|
128
|
+
console.log(` - Chunks processed: ${result.chunksProcessed}`);
|
|
129
|
+
console.log(` - Chunks skipped (duplicates): ${result.chunksSkipped}`);
|
|
130
|
+
console.log(` - Chunks stored: ${result.chunksStored}`);
|
|
131
|
+
console.log(` - Duration: ${result.duration}ms`);
|
|
132
|
+
console.log(` - Errors: ${result.errors.length}\n`);
|
|
133
|
+
|
|
134
|
+
// Step 7: Test direct search
|
|
135
|
+
console.log('7. Testing direct search for "pipeline"...');
|
|
136
|
+
const searchResults = await store.search(
|
|
137
|
+
"How do I chain agents together?",
|
|
138
|
+
{
|
|
139
|
+
limit: 3,
|
|
140
|
+
}
|
|
141
|
+
);
|
|
142
|
+
console.log(" Top results:");
|
|
143
|
+
for (const result of searchResults) {
|
|
144
|
+
console.log(
|
|
145
|
+
` - [${result.score.toFixed(3)}] ${result.document.metadata?.source}`
|
|
146
|
+
);
|
|
147
|
+
console.log(` ${result.document.content.substring(0, 100)}...\n`);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Step 8: Create retrieval and navigation tools
|
|
151
|
+
console.log("8. Creating tools for the agent...");
|
|
152
|
+
const searchTool = store.toRetrievalTool(
|
|
153
|
+
"Search the Agention documentation for information about agents, tools, pipelines, vector stores, and other features",
|
|
154
|
+
{ defaultLimit: 3 }
|
|
155
|
+
);
|
|
156
|
+
const getChunkTool = store.toGetChunkByIdTool(
|
|
157
|
+
"Retrieve a specific chunk by ID. Use this to get more context by reading previous or next chunks. Check the metadata.prev_id and metadata.next_id fields from search results."
|
|
158
|
+
);
|
|
159
|
+
console.log(` Search tool: ${searchTool.name}`);
|
|
160
|
+
console.log(` Get chunk tool: ${getChunkTool.name}\n`);
|
|
161
|
+
|
|
162
|
+
// Step 9: Create agent with the tools
|
|
163
|
+
console.log("9. Creating Claude agent with search and navigation tools...");
|
|
164
|
+
const agent = new OpenAiAgent({
|
|
165
|
+
id: "rag-agent",
|
|
166
|
+
name: "Documentation Assistant",
|
|
167
|
+
description:
|
|
168
|
+
"You are a helpful assistant that answers questions about Agention. Always use the search tool to find relevant documentation before answering. If you need more context, use the get_chunk tool with previousChunkId or nextChunkId to read surrounding chunks. Base your answers on the search results and cite the source files.",
|
|
169
|
+
apiKey: process.env.OPENAI_API_KEY as string,
|
|
170
|
+
tools: [searchTool, getChunkTool],
|
|
171
|
+
model: "gpt-4.1-nano",
|
|
172
|
+
});
|
|
173
|
+
console.log(" Agent created\n");
|
|
174
|
+
|
|
175
|
+
// Step 10: Interactive Q&A
|
|
176
|
+
console.log("10. Interactive Q&A (type 'exit' to quit)\n");
|
|
177
|
+
console.log(" Try asking:\n");
|
|
178
|
+
console.log(" - What are the different types of executors?");
|
|
179
|
+
console.log(" - How do I create a pipeline?");
|
|
180
|
+
console.log(" - What embedding models are supported?");
|
|
181
|
+
console.log(" - How do vector stores work?\n");
|
|
182
|
+
|
|
183
|
+
while (true) {
|
|
184
|
+
const question = await rl.question("You: ");
|
|
185
|
+
|
|
186
|
+
if (question.toLowerCase() === "exit") {
|
|
187
|
+
console.log("\nGoodbye!");
|
|
188
|
+
break;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (!question.trim()) {
|
|
192
|
+
continue;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
console.log("\nAssistant: Searching documentation...\n");
|
|
196
|
+
|
|
197
|
+
const response = await agent.execute(question);
|
|
198
|
+
console.log(`Assistant: ${response}\n`);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
rl.close();
|
|
202
|
+
process.exit(0);
|
|
203
|
+
} catch (error) {
|
|
204
|
+
console.error("Error:", error);
|
|
205
|
+
rl.close();
|
|
206
|
+
process.exit(1);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
vectorStoreExample();
|
|
File without changes
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import "dotenv/config";
|
|
2
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
3
|
+
import * as arrow from "apache-arrow";
|
|
4
|
+
|
|
5
|
+
import { Utf8 } from "apache-arrow";
|
|
6
|
+
import { embedding } from "@lancedb/lancedb";
|
|
7
|
+
import "@lancedb/lancedb/embedding/openai";
|
|
8
|
+
// import { EmbeddingFunction, getRegistry } from "@lancedb/lancedb/embedding";
|
|
9
|
+
|
|
10
|
+
const setup = async () => {
|
|
11
|
+
const db = await lancedb.connect("./");
|
|
12
|
+
|
|
13
|
+
// const _tbl =
|
|
14
|
+
// await db.createTable(
|
|
15
|
+
// "myTable",
|
|
16
|
+
// { mode: "overwrite" }
|
|
17
|
+
// );
|
|
18
|
+
let tbl: lancedb.Table;
|
|
19
|
+
// console.log(process.env.OPENAI_API_KEY);
|
|
20
|
+
// const apiKey = process.env.OPENAI_API_KEY;
|
|
21
|
+
// await db.dropTable("words");
|
|
22
|
+
const tableNames = await db.tableNames();
|
|
23
|
+
|
|
24
|
+
if (tableNames.includes("words")) {
|
|
25
|
+
tbl = await db.openTable("words");
|
|
26
|
+
} else {
|
|
27
|
+
const func = embedding.getRegistry().get("openai")?.create({
|
|
28
|
+
model: "text-embedding-ada-002",
|
|
29
|
+
// apiKey,
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
// const func = embedding.getRegistry()!.get("openai")!.create({
|
|
33
|
+
// model: "text-embedding-ada-002",
|
|
34
|
+
// apiKey: process.env.OPENAI_API_KEY,
|
|
35
|
+
// });
|
|
36
|
+
const wordsSchema = embedding.LanceSchema({
|
|
37
|
+
text: func!.sourceField(new Utf8()),
|
|
38
|
+
vector: func!.vectorField(),
|
|
39
|
+
id: new arrow.Int32(),
|
|
40
|
+
});
|
|
41
|
+
tbl = await db.createEmptyTable("words", wordsSchema, {
|
|
42
|
+
mode: "overwrite",
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
await tbl.add([
|
|
46
|
+
{ text: "hello world", id: 1 },
|
|
47
|
+
{ text: "goodbye world", id: 2 },
|
|
48
|
+
]);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
console.log(tableNames);
|
|
52
|
+
await tbl.add([
|
|
53
|
+
{
|
|
54
|
+
text: "The world says hello and goodbye to the person who is coming and going",
|
|
55
|
+
id: 3,
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
text: `LanceDB registers the Sentence Transformers embeddings function in the registry as sentence-transformers. You can pass any supported model name to the create. By default it uses "sentence-transformers/paraphrase-MiniLM-L6-v2".`,
|
|
59
|
+
id: 4,
|
|
60
|
+
},
|
|
61
|
+
]);
|
|
62
|
+
|
|
63
|
+
// const data = [
|
|
64
|
+
// { vector: [1.3, 1.4], item: "fizz", price: 100.0 },
|
|
65
|
+
// { vector: [9.5, 56.2], item: "buzz", price: 200.0 },
|
|
66
|
+
// ];
|
|
67
|
+
|
|
68
|
+
const query = "Sentence Transformers";
|
|
69
|
+
const actual = await tbl.search(query).where("id > 2").limit(2).toArray();
|
|
70
|
+
|
|
71
|
+
// await tbl.delete("id = 4");
|
|
72
|
+
// const res = await tbl.search([100, 100]).limit(2).toArray();
|
|
73
|
+
actual.forEach(({ text, id }) => console.log(text, id));
|
|
74
|
+
console.log((await tbl.query().where("id = 4").limit(10).toArray()).length);
|
|
75
|
+
|
|
76
|
+
// console.log(actual.text);
|
|
77
|
+
// await tbl.add(data);
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
setup();
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VoyageAI Embeddings Example
|
|
3
|
+
*
|
|
4
|
+
* Demonstrates how to use VoyageAI embeddings with the vector store.
|
|
5
|
+
*
|
|
6
|
+
* Prerequisites:
|
|
7
|
+
* - Set VOYAGE_API_KEY environment variable
|
|
8
|
+
* - Run with: npm run example -- examples/voyage-embeddings.ts
|
|
9
|
+
*/
|
|
10
|
+
import "dotenv/config";
|
|
11
|
+
import { VoyageAIEmbeddings } from "../lib/embeddings/VoyageAIEmbeddings";
|
|
12
|
+
import { LanceDBVectorStore } from "../lib/vectorstore/LanceDBVectorStore";
|
|
13
|
+
|
|
14
|
+
async function main() {
|
|
15
|
+
console.log("=== VoyageAI Embeddings Example ===\n");
|
|
16
|
+
|
|
17
|
+
// Check for required API key
|
|
18
|
+
if (!process.env.VOYAGE_API_KEY) {
|
|
19
|
+
console.error("Error: VOYAGE_API_KEY environment variable is not set");
|
|
20
|
+
console.log("\nPlease set your VoyageAI API key:");
|
|
21
|
+
console.log(" export VOYAGE_API_KEY=your-api-key-here");
|
|
22
|
+
process.exit(1);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Create VoyageAI embeddings instance
|
|
26
|
+
console.log("Creating VoyageAI embeddings with voyage-4 model...");
|
|
27
|
+
const embeddings = new VoyageAIEmbeddings({
|
|
28
|
+
model: "voyage-4",
|
|
29
|
+
inputType: "document", // Use "document" for indexing, "query" for searching
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
console.log(`Model: ${embeddings.model}`);
|
|
33
|
+
console.log(`Dimensions: ${embeddings.dimensions}\n`);
|
|
34
|
+
|
|
35
|
+
// Example: Generate embeddings directly
|
|
36
|
+
console.log("Generating embeddings for sample texts...");
|
|
37
|
+
const texts = [
|
|
38
|
+
"VoyageAI provides state-of-the-art embedding models",
|
|
39
|
+
"Vector databases enable semantic search capabilities",
|
|
40
|
+
"The voyage-4 model offers excellent performance",
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
const vectors = await embeddings.embed(texts);
|
|
44
|
+
console.log(`Generated ${vectors.length} embeddings`);
|
|
45
|
+
console.log(`First embedding dimensions: ${vectors[0].length}\n`);
|
|
46
|
+
|
|
47
|
+
// Example: Using with LanceDB Vector Store
|
|
48
|
+
console.log("Creating LanceDB vector store with VoyageAI embeddings...");
|
|
49
|
+
const store = await LanceDBVectorStore.create({
|
|
50
|
+
name: "voyage_demo",
|
|
51
|
+
uri: "./data/voyage-example",
|
|
52
|
+
tableName: "documents",
|
|
53
|
+
embeddings,
|
|
54
|
+
metadataFields: [
|
|
55
|
+
{ name: "category", type: "string" as const },
|
|
56
|
+
{ name: "source", type: "string" as const },
|
|
57
|
+
],
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// Add documents (embeddings generated automatically)
|
|
61
|
+
console.log("Adding documents to vector store...");
|
|
62
|
+
await store.addDocuments([
|
|
63
|
+
{
|
|
64
|
+
id: "doc1",
|
|
65
|
+
content: "VoyageAI specializes in embedding models for semantic search",
|
|
66
|
+
metadata: { category: "ai", source: "docs" },
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
id: "doc2",
|
|
70
|
+
content: "voyage-4 is their latest general-purpose embedding model",
|
|
71
|
+
metadata: { category: "models", source: "docs" },
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
id: "doc3",
|
|
75
|
+
content: "voyage-code-3 is optimized for code search and understanding",
|
|
76
|
+
metadata: { category: "models", source: "docs" },
|
|
77
|
+
},
|
|
78
|
+
]);
|
|
79
|
+
|
|
80
|
+
console.log("Documents added successfully!\n");
|
|
81
|
+
|
|
82
|
+
// Perform semantic search
|
|
83
|
+
console.log("Performing semantic search...");
|
|
84
|
+
const query = "What is the latest VoyageAI model?";
|
|
85
|
+
const results = await store.search(query, { limit: 2 });
|
|
86
|
+
|
|
87
|
+
console.log(`\nQuery: "${query}"`);
|
|
88
|
+
console.log("Results:");
|
|
89
|
+
results.forEach((result, i) => {
|
|
90
|
+
console.log(
|
|
91
|
+
`${i + 1}. [Score: ${result.score.toFixed(4)}] ${result.content}`
|
|
92
|
+
);
|
|
93
|
+
console.log(` Metadata:`, result.metadata);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
console.log("\n=== Example Complete ===");
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
main().catch(console.error);
|