modular-agent-examples 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/chunking-demo.ts +339 -0
  2. package/cleanup-duplicates.ts +142 -0
  3. package/data/flower.jpg +0 -0
  4. package/generative.ts +128 -0
  5. package/graph/context-example.ts +209 -0
  6. package/graph/data-pipeline/agents.ts +60 -0
  7. package/graph/data-pipeline/fetchers.ts +166 -0
  8. package/graph/data-pipeline/index.ts +282 -0
  9. package/graph/index.ts +154 -0
  10. package/graph/map-example.ts +227 -0
  11. package/graph/metrics-example.ts +238 -0
  12. package/graph/parallel-example.ts +167 -0
  13. package/graph/pipeline-example.ts +225 -0
  14. package/graph/planning-example.ts +406 -0
  15. package/graph/router-example.ts +226 -0
  16. package/graph/sequential-example.ts +141 -0
  17. package/graph/voting-example.ts +159 -0
  18. package/graph-rag/docker-compose.yaml +14 -0
  19. package/graph-rag/index.js +99 -0
  20. package/graph-rag/init-db.sh +7 -0
  21. package/graph-rag/package.json +15 -0
  22. package/history-compression-example.ts +163 -0
  23. package/history-persistence.ts +347 -0
  24. package/index.ts +175 -0
  25. package/ingestion-pipeline.ts +353 -0
  26. package/mcp-airbnb-example.ts +69 -0
  27. package/mcp-http-example.ts +70 -0
  28. package/mcp-stdio-example.ts +63 -0
  29. package/multimodal.ts +144 -0
  30. package/ollama.ts +148 -0
  31. package/openai-compatible.ts +141 -0
  32. package/opensearch-vector-store.ts +342 -0
  33. package/package.json +24 -0
  34. package/pubmed.ts +289 -0
  35. package/reasoning-with-sub-agent.ts +311 -0
  36. package/synchronous/index.ts +48 -0
  37. package/tsconfig.json +8 -0
  38. package/vector-store-filtering.ts +303 -0
  39. package/vector-store.ts +210 -0
  40. package/vectorstore/index.ts +0 -0
  41. package/vectorstore/store/dbService.ts +80 -0
  42. package/voyage-embeddings.ts +99 -0
  43. package/weather-with-sub-agent.ts +276 -0
  44. package/weather.ts +389 -0
@@ -0,0 +1,303 @@
1
+ import "dotenv/config";
2
+ import { rmSync } from "fs";
3
+ import { ClaudeAgent } from "../lib/agents/anthropic/ClaudeAgent";
4
+ import { LanceDBVectorStore } from "../lib/vectorstore/LanceDBVectorStore";
5
+ import { OpenAIEmbeddings } from "../lib/embeddings/OpenAIEmbeddings";
6
+
7
+ /**
8
+ * Example demonstrating multi-tenancy and filtering in vector stores.
9
+ *
10
+ * Shows how to:
11
+ * 1. Store documents with tenant/project metadata
12
+ * 2. Create tenant-specific retrieval tools
13
+ * 3. Ensure agents only access documents for their tenant
14
+ *
15
+ * NOTE: LanceDB uses DataFusion for SQL filtering, which normalizes unquoted
16
+ * identifiers to lowercase. Use snake_case for metadata field names to avoid
17
+ * case-sensitivity issues.
18
+ */
19
+
20
+ // Sample multi-tenant data
21
+ const DOCUMENTS = [
22
+ // Tenant: Acme Corp
23
+ {
24
+ id: "acme-1",
25
+ content:
26
+ "Acme Corp uses a monthly billing cycle. Invoices are sent on the 1st of each month.",
27
+ metadata: { tenant_id: "acme", project_id: "proj-123", category: "billing" },
28
+ },
29
+ {
30
+ id: "acme-2",
31
+ content: "Acme Corp's support hours are Monday-Friday, 9am-5pm EST.",
32
+ metadata: { tenant_id: "acme", project_id: "proj-123", category: "support" },
33
+ },
34
+ {
35
+ id: "acme-3",
36
+ content: "Acme Corp has a 30-day refund policy for all services.",
37
+ metadata: { tenant_id: "acme", project_id: "proj-456", category: "policy" },
38
+ },
39
+
40
+ // Tenant: TechStart Inc
41
+ {
42
+ id: "techstart-1",
43
+ content:
44
+ "TechStart Inc uses annual billing. Payment is due within 15 days of invoice.",
45
+ metadata: {
46
+ tenant_id: "techstart",
47
+ project_id: "proj-789",
48
+ category: "billing",
49
+ },
50
+ },
51
+ {
52
+ id: "techstart-2",
53
+ content:
54
+ "TechStart Inc offers 24/7 premium support for enterprise customers.",
55
+ metadata: {
56
+ tenant_id: "techstart",
57
+ project_id: "proj-789",
58
+ category: "support",
59
+ },
60
+ },
61
+ {
62
+ id: "techstart-3",
63
+ content: "TechStart Inc has a 60-day money-back guarantee.",
64
+ metadata: {
65
+ tenant_id: "techstart",
66
+ project_id: "proj-789",
67
+ category: "policy",
68
+ },
69
+ },
70
+
71
+ // Tenant: Global Services
72
+ {
73
+ id: "global-1",
74
+ content:
75
+ "Global Services bills quarterly. Invoices are sent 15 days before the period ends.",
76
+ metadata: {
77
+ tenant_id: "global",
78
+ project_id: "proj-101",
79
+ category: "billing",
80
+ },
81
+ },
82
+ {
83
+ id: "global-2",
84
+ content:
85
+ "Global Services provides support in 12 languages, available 24/7.",
86
+ metadata: {
87
+ tenant_id: "global",
88
+ project_id: "proj-202",
89
+ category: "support",
90
+ },
91
+ },
92
+ ];
93
+
94
+ async function vectorStoreFilteringExample() {
95
+ console.log("Multi-Tenancy Vector Store Example\n");
96
+ console.log("===================================\n");
97
+
98
+ // Check for required API keys
99
+ if (!process.env.OPENAI_API_KEY || !process.env.ANTHROPIC_API_KEY) {
100
+ console.error("Error: OPENAI_API_KEY and ANTHROPIC_API_KEY are required");
101
+ process.exit(1);
102
+ }
103
+
104
+ try {
105
+ // Step 1: Create embeddings and vector store (clean slate for demo)
106
+ console.log("1. Creating vector store...");
107
+ rmSync("./examples/data/vectors-filtered", { recursive: true, force: true });
108
+ const embeddings = new OpenAIEmbeddings({
109
+ model: "text-embedding-3-small",
110
+ });
111
+
112
+ const store = await LanceDBVectorStore.create({
113
+ name: "multi_tenant_kb",
114
+ uri: "./examples/data/vectors-filtered",
115
+ tableName: "tenant_docs",
116
+ embeddings,
117
+ metadataFields: [
118
+ { name: "tenant_id", type: "string" as const },
119
+ { name: "project_id", type: "string" as const },
120
+ { name: "category", type: "string" as const },
121
+ { name: "added_by", type: "string" as const },
122
+ ],
123
+ });
124
+ console.log(" Store created\n");
125
+
126
+ // Step 2: Add documents with tenant metadata
127
+ console.log("2. Adding documents for multiple tenants...");
128
+ await store.addDocuments(DOCUMENTS);
129
+ console.log(` Added ${DOCUMENTS.length} documents\n`);
130
+
131
+ // Step 3: Test filtering directly
132
+ console.log("3. Testing direct search with filters...\n");
133
+
134
+ console.log(" Searching for billing info (no filter):");
135
+ const allBilling = await store.search("billing cycle", {
136
+ limit: 3,
137
+ filter: { category: "billing" },
138
+ });
139
+ for (const result of allBilling) {
140
+ console.log(
141
+ ` - [${result.score.toFixed(3)}] ${
142
+ result.document.metadata?.tenant_id
143
+ }: ${result.document.content}`
144
+ );
145
+ }
146
+ console.log();
147
+
148
+ console.log(" Searching for billing info (Acme only):");
149
+ const acmeBilling = await store.search("billing cycle", {
150
+ limit: 3,
151
+ filter: { tenant_id: "acme", category: "billing" },
152
+ });
153
+ for (const result of acmeBilling) {
154
+ console.log(
155
+ ` - [${result.score.toFixed(3)}] ${
156
+ result.document.metadata?.tenant_id
157
+ }: ${result.document.content}`
158
+ );
159
+ }
160
+ console.log();
161
+
162
+ // Step 4: Create tenant-specific agents
163
+ console.log("4. Creating tenant-specific agents...\n");
164
+
165
+ // Acme Corp agent - can only access Acme documents
166
+ const acmeSearchTool = store.toRetrievalTool(
167
+ "Search the Acme Corp knowledge base for billing, support, and policy information",
168
+ {
169
+ defaultLimit: 3,
170
+ defaultFilter: { tenant_id: "acme" }, // Always filter to Acme
171
+ allowFilterOverride: false, // Agent cannot override this filter
172
+ }
173
+ );
174
+
175
+ const acmeAgent = new ClaudeAgent({
176
+ id: "acme-agent",
177
+ name: "Acme Support Agent",
178
+ description:
179
+ "You are a customer support agent for Acme Corp. Use the search tool to find accurate information.",
180
+ apiKey: process.env.ANTHROPIC_API_KEY as string,
181
+ tools: [acmeSearchTool],
182
+ model: "claude-sonnet-4-20250514",
183
+ });
184
+
185
+ // TechStart agent - can only access TechStart documents
186
+ const techstartSearchTool = store.toRetrievalTool(
187
+ "Search the TechStart Inc knowledge base for billing, support, and policy information",
188
+ {
189
+ defaultLimit: 3,
190
+ defaultFilter: { tenant_id: "techstart" },
191
+ allowFilterOverride: false,
192
+ }
193
+ );
194
+
195
+ const techstartAgent = new ClaudeAgent({
196
+ id: "techstart-agent",
197
+ name: "TechStart Support Agent",
198
+ description:
199
+ "You are a customer support agent for TechStart Inc. Use the search tool to find accurate information.",
200
+ apiKey: process.env.ANTHROPIC_API_KEY as string,
201
+ tools: [techstartSearchTool],
202
+ model: "claude-sonnet-4-20250514",
203
+ });
204
+
205
+ // Step 5: Test tenant isolation
206
+ console.log("5. Testing tenant isolation...\n");
207
+
208
+ const question = "What is the billing cycle?";
209
+
210
+ console.log(` Question: "${question}"\n`);
211
+
212
+ console.log(" Acme Agent Response:");
213
+ const acmeResponse = await acmeAgent.execute(question);
214
+ console.log(` ${acmeResponse}\n`);
215
+
216
+ console.log(" TechStart Agent Response:");
217
+ const techstartResponse = await techstartAgent.execute(question);
218
+ console.log(` ${techstartResponse}\n`);
219
+
220
+ // Step 6: Demonstrate project-level filtering with override
221
+ console.log("6. Creating agent with filter override capability...\n");
222
+
223
+ const flexibleSearchTool = store.toRetrievalTool(
224
+ "Search the knowledge base. You can filter by tenant_id, project_id, or category.",
225
+ {
226
+ defaultLimit: 3,
227
+ defaultFilter: { tenant_id: "acme" }, // Default to Acme
228
+ allowFilterOverride: true, // But allow overriding
229
+ }
230
+ );
231
+
232
+ const flexibleAgent = new ClaudeAgent({
233
+ id: "flexible-agent",
234
+ name: "Flexible Search Agent",
235
+ description: `You are a knowledge base assistant. Use the search tool with appropriate filters.
236
+ When asked about a specific project, use the project_id filter.
237
+ When asked about a category, use the category filter.`,
238
+ apiKey: process.env.ANTHROPIC_API_KEY as string,
239
+ tools: [flexibleSearchTool],
240
+ model: "claude-sonnet-4-20250514",
241
+ });
242
+
243
+ console.log(
244
+ ' Question: "What is the support policy for project proj-789?"\n'
245
+ );
246
+ const projectResponse = await flexibleAgent.execute(
247
+ "What is the support policy for project proj-789?"
248
+ );
249
+ console.log(` Response: ${projectResponse}\n`);
250
+
251
+ // Step 7: Demonstrate adding documents with default metadata
252
+ console.log("7. Adding new document with automatic tenant tagging...\n");
253
+
254
+ const acmeAddTool = store.toAddDocumentsTool(
255
+ "Add new documents to the Acme Corp knowledge base",
256
+ {
257
+ defaultMetadata: {
258
+ tenant_id: "acme",
259
+ project_id: "proj-123",
260
+ added_by: "system",
261
+ },
262
+ }
263
+ );
264
+
265
+ const addAgent = new ClaudeAgent({
266
+ id: "add-agent",
267
+ name: "Document Manager",
268
+ description: "You help add new documents to the knowledge base.",
269
+ apiKey: process.env.ANTHROPIC_API_KEY as string,
270
+ tools: [acmeAddTool],
271
+ model: "claude-sonnet-4-20250514",
272
+ });
273
+
274
+ console.log(" Adding: 'Acme Corp offers a new premium support tier.'\n");
275
+ await addAgent.execute(
276
+ "Add a document with id 'acme-4' and content: 'Acme Corp offers a new premium support tier with 24/7 assistance.'"
277
+ );
278
+
279
+ // Verify it was added with correct metadata
280
+ const newDoc = await store.getById("acme-4");
281
+ console.log(" Document added with metadata:");
282
+ console.log(` ${JSON.stringify(newDoc?.metadata, null, 2)}\n`);
283
+
284
+ console.log("✅ Multi-tenancy example completed successfully!\n");
285
+ console.log("Key takeaways:");
286
+ console.log("- Use defaultFilter to enforce tenant isolation");
287
+ console.log("- Set allowFilterOverride: false for strict security");
288
+ console.log("- Use defaultMetadata to auto-tag documents");
289
+ console.log(
290
+ "- Combine filters (tenant + project + category) for fine-grained control"
291
+ );
292
+ console.log(
293
+ "- Use snake_case for metadata field names (LanceDB normalizes SQL identifiers to lowercase)"
294
+ );
295
+
296
+ process.exit(0);
297
+ } catch (error) {
298
+ console.error("Error:", error);
299
+ process.exit(1);
300
+ }
301
+ }
302
+
303
+ vectorStoreFilteringExample();
@@ -0,0 +1,210 @@
1
+ import "dotenv/config";
2
+ import { ClaudeAgent } from "../lib/agents/anthropic/ClaudeAgent";
3
+ import { LanceDBVectorStore } from "../lib/vectorstore/LanceDBVectorStore";
4
+ import { OpenAIEmbeddings } from "../lib/embeddings/OpenAIEmbeddings";
5
+ import { RecursiveChunker, IngestionPipeline, OpenAiAgent } from "../lib";
6
+ import { readFileSync, readdirSync, statSync } from "fs";
7
+ import { join } from "path";
8
+
9
+ import { createInterface } from "node:readline/promises";
10
+
11
+ const rl = createInterface({
12
+ input: process.stdin,
13
+ output: process.stdout,
14
+ });
15
+
16
+ /**
17
+ * Recursively load all markdown files from a directory
18
+ */
19
+ function loadMarkdownFiles(
20
+ dir: string,
21
+ baseDir?: string
22
+ ): Array<{ path: string; content: string }> {
23
+ const base = baseDir || dir;
24
+ const files: Array<{ path: string; content: string }> = [];
25
+
26
+ const items = readdirSync(dir);
27
+
28
+ for (const item of items) {
29
+ const fullPath = join(dir, item);
30
+ const stat = statSync(fullPath);
31
+
32
+ if (stat.isDirectory()) {
33
+ // Recursively load from subdirectories
34
+ files.push(...loadMarkdownFiles(fullPath, base));
35
+ } else if (item.endsWith(".md")) {
36
+ // Load markdown file
37
+ const content = readFileSync(fullPath, "utf-8");
38
+ const relativePath = fullPath.replace(base + "/", "");
39
+ files.push({ path: relativePath, content });
40
+ }
41
+ }
42
+
43
+ return files;
44
+ }
45
+
46
+ async function vectorStoreExample() {
47
+ console.log("Vector Store Example with RAG Agent\n");
48
+ console.log("====================================\n");
49
+
50
+ // Check for required API keys
51
+ if (!process.env.OPENAI_API_KEY) {
52
+ console.error("Error: OPENAI_API_KEY is required for embeddings");
53
+ process.exit(1);
54
+ }
55
+ if (!process.env.ANTHROPIC_API_KEY) {
56
+ console.error("Error: ANTHROPIC_API_KEY is required for the agent");
57
+ process.exit(1);
58
+ }
59
+
60
+ try {
61
+ // Step 1: Create embeddings provider
62
+ console.log("1. Creating OpenAI embeddings provider...");
63
+ const embeddings = new OpenAIEmbeddings({
64
+ model: "text-embedding-3-small",
65
+ });
66
+ console.log(
67
+ ` Model: ${embeddings.model}, Dimensions: ${embeddings.dimensions}\n`
68
+ );
69
+
70
+ // Step 2: Create vector store
71
+ console.log("2. Creating LanceDB vector store...");
72
+ const store = await LanceDBVectorStore.create({
73
+ name: "knowledge_base",
74
+ uri: "./examples/data/vectors",
75
+ tableName: "agention_docs",
76
+ embeddings,
77
+ metadataFields: [
78
+ { name: "source", type: "string" as const },
79
+ { name: "type", type: "string" as const },
80
+ ],
81
+ });
82
+ console.log(" Store created successfully\n");
83
+
84
+ // Step 3: Create chunker
85
+ console.log("3. Creating RecursiveChunker for semantic splitting...");
86
+ const chunker = new RecursiveChunker({
87
+ chunkSize: 1000,
88
+ chunkOverlap: 100,
89
+ separators: ["\n\n", "\n", ". ", " "],
90
+ });
91
+ console.log(" Chunker created\n");
92
+
93
+ // Step 4: Create ingestion pipeline
94
+ console.log("4. Creating ingestion pipeline...");
95
+ const pipeline = new IngestionPipeline(chunker, embeddings, store);
96
+ console.log(" Pipeline ready\n");
97
+
98
+ // Step 5: Load and ingest documentation files
99
+ console.log("5. Loading documentation files from docs/guide/...");
100
+ const docsPath = join(__dirname, "../docs/guide");
101
+ const markdownFiles = loadMarkdownFiles(docsPath);
102
+ console.log(` Found ${markdownFiles.length} markdown files\n`);
103
+
104
+ // Step 6: Ingest documents using pipeline
105
+ console.log("6. Ingesting documents (this may take a moment)...");
106
+
107
+ const documents = markdownFiles.map((file) => ({
108
+ text: file.content,
109
+ options: {
110
+ sourceId: file.path,
111
+ sourcePath: file.path,
112
+ metadata: {
113
+ source: file.path,
114
+ type: "documentation",
115
+ },
116
+ },
117
+ }));
118
+
119
+ const result = await pipeline.ingestMany(documents, {
120
+ batchSize: 10,
121
+ skipDuplicates: true, // Skip chunks that already exist (by content hash)
122
+ onProgress: ({ phase, processed, total }) => {
123
+ console.log(` ${phase}: ${processed}/${total}`);
124
+ },
125
+ });
126
+
127
+ console.log("\n Ingestion complete:");
128
+ console.log(` - Chunks processed: ${result.chunksProcessed}`);
129
+ console.log(` - Chunks skipped (duplicates): ${result.chunksSkipped}`);
130
+ console.log(` - Chunks stored: ${result.chunksStored}`);
131
+ console.log(` - Duration: ${result.duration}ms`);
132
+ console.log(` - Errors: ${result.errors.length}\n`);
133
+
134
+ // Step 7: Test direct search
135
+ console.log('7. Testing direct search for "pipeline"...');
136
+ const searchResults = await store.search(
137
+ "How do I chain agents together?",
138
+ {
139
+ limit: 3,
140
+ }
141
+ );
142
+ console.log(" Top results:");
143
+ for (const result of searchResults) {
144
+ console.log(
145
+ ` - [${result.score.toFixed(3)}] ${result.document.metadata?.source}`
146
+ );
147
+ console.log(` ${result.document.content.substring(0, 100)}...\n`);
148
+ }
149
+
150
+ // Step 8: Create retrieval and navigation tools
151
+ console.log("8. Creating tools for the agent...");
152
+ const searchTool = store.toRetrievalTool(
153
+ "Search the Agention documentation for information about agents, tools, pipelines, vector stores, and other features",
154
+ { defaultLimit: 3 }
155
+ );
156
+ const getChunkTool = store.toGetChunkByIdTool(
157
+ "Retrieve a specific chunk by ID. Use this to get more context by reading previous or next chunks. Check the metadata.prev_id and metadata.next_id fields from search results."
158
+ );
159
+ console.log(` Search tool: ${searchTool.name}`);
160
+ console.log(` Get chunk tool: ${getChunkTool.name}\n`);
161
+
162
+ // Step 9: Create agent with the tools
163
+ console.log("9. Creating Claude agent with search and navigation tools...");
164
+ const agent = new OpenAiAgent({
165
+ id: "rag-agent",
166
+ name: "Documentation Assistant",
167
+ description:
168
+ "You are a helpful assistant that answers questions about Agention. Always use the search tool to find relevant documentation before answering. If you need more context, use the get_chunk tool with previousChunkId or nextChunkId to read surrounding chunks. Base your answers on the search results and cite the source files.",
169
+ apiKey: process.env.OPENAI_API_KEY as string,
170
+ tools: [searchTool, getChunkTool],
171
+ model: "gpt-4.1-nano",
172
+ });
173
+ console.log(" Agent created\n");
174
+
175
+ // Step 10: Interactive Q&A
176
+ console.log("10. Interactive Q&A (type 'exit' to quit)\n");
177
+ console.log(" Try asking:\n");
178
+ console.log(" - What are the different types of executors?");
179
+ console.log(" - How do I create a pipeline?");
180
+ console.log(" - What embedding models are supported?");
181
+ console.log(" - How do vector stores work?\n");
182
+
183
+ while (true) {
184
+ const question = await rl.question("You: ");
185
+
186
+ if (question.toLowerCase() === "exit") {
187
+ console.log("\nGoodbye!");
188
+ break;
189
+ }
190
+
191
+ if (!question.trim()) {
192
+ continue;
193
+ }
194
+
195
+ console.log("\nAssistant: Searching documentation...\n");
196
+
197
+ const response = await agent.execute(question);
198
+ console.log(`Assistant: ${response}\n`);
199
+ }
200
+
201
+ rl.close();
202
+ process.exit(0);
203
+ } catch (error) {
204
+ console.error("Error:", error);
205
+ rl.close();
206
+ process.exit(1);
207
+ }
208
+ }
209
+
210
+ vectorStoreExample();
File without changes
@@ -0,0 +1,80 @@
1
+ import "dotenv/config";
2
+ import * as lancedb from "@lancedb/lancedb";
3
+ import * as arrow from "apache-arrow";
4
+
5
+ import { Utf8 } from "apache-arrow";
6
+ import { embedding } from "@lancedb/lancedb";
7
+ import "@lancedb/lancedb/embedding/openai";
8
+ // import { EmbeddingFunction, getRegistry } from "@lancedb/lancedb/embedding";
9
+
10
+ const setup = async () => {
11
+ const db = await lancedb.connect("./");
12
+
13
+ // const _tbl =
14
+ // await db.createTable(
15
+ // "myTable",
16
+ // { mode: "overwrite" }
17
+ // );
18
+ let tbl: lancedb.Table;
19
+ // console.log(process.env.OPENAI_API_KEY);
20
+ // const apiKey = process.env.OPENAI_API_KEY;
21
+ // await db.dropTable("words");
22
+ const tableNames = await db.tableNames();
23
+
24
+ if (tableNames.includes("words")) {
25
+ tbl = await db.openTable("words");
26
+ } else {
27
+ const func = embedding.getRegistry().get("openai")?.create({
28
+ model: "text-embedding-ada-002",
29
+ // apiKey,
30
+ });
31
+
32
+ // const func = embedding.getRegistry()!.get("openai")!.create({
33
+ // model: "text-embedding-ada-002",
34
+ // apiKey: process.env.OPENAI_API_KEY,
35
+ // });
36
+ const wordsSchema = embedding.LanceSchema({
37
+ text: func!.sourceField(new Utf8()),
38
+ vector: func!.vectorField(),
39
+ id: new arrow.Int32(),
40
+ });
41
+ tbl = await db.createEmptyTable("words", wordsSchema, {
42
+ mode: "overwrite",
43
+ });
44
+
45
+ await tbl.add([
46
+ { text: "hello world", id: 1 },
47
+ { text: "goodbye world", id: 2 },
48
+ ]);
49
+ }
50
+
51
+ console.log(tableNames);
52
+ await tbl.add([
53
+ {
54
+ text: "The world says hello and goodbye to the person who is coming and going",
55
+ id: 3,
56
+ },
57
+ {
58
+ text: `LanceDB registers the Sentence Transformers embeddings function in the registry as sentence-transformers. You can pass any supported model name to the create. By default it uses "sentence-transformers/paraphrase-MiniLM-L6-v2".`,
59
+ id: 4,
60
+ },
61
+ ]);
62
+
63
+ // const data = [
64
+ // { vector: [1.3, 1.4], item: "fizz", price: 100.0 },
65
+ // { vector: [9.5, 56.2], item: "buzz", price: 200.0 },
66
+ // ];
67
+
68
+ const query = "Sentence Transformers";
69
+ const actual = await tbl.search(query).where("id > 2").limit(2).toArray();
70
+
71
+ // await tbl.delete("id = 4");
72
+ // const res = await tbl.search([100, 100]).limit(2).toArray();
73
+ actual.forEach(({ text, id }) => console.log(text, id));
74
+ console.log((await tbl.query().where("id = 4").limit(10).toArray()).length);
75
+
76
+ // console.log(actual.text);
77
+ // await tbl.add(data);
78
+ };
79
+
80
+ setup();
@@ -0,0 +1,99 @@
1
+ /**
2
+ * VoyageAI Embeddings Example
3
+ *
4
+ * Demonstrates how to use VoyageAI embeddings with the vector store.
5
+ *
6
+ * Prerequisites:
7
+ * - Set VOYAGE_API_KEY environment variable
8
+ * - Run with: npm run example -- examples/voyage-embeddings.ts
9
+ */
10
+ import "dotenv/config";
11
+ import { VoyageAIEmbeddings } from "../lib/embeddings/VoyageAIEmbeddings";
12
+ import { LanceDBVectorStore } from "../lib/vectorstore/LanceDBVectorStore";
13
+
14
+ async function main() {
15
+ console.log("=== VoyageAI Embeddings Example ===\n");
16
+
17
+ // Check for required API key
18
+ if (!process.env.VOYAGE_API_KEY) {
19
+ console.error("Error: VOYAGE_API_KEY environment variable is not set");
20
+ console.log("\nPlease set your VoyageAI API key:");
21
+ console.log(" export VOYAGE_API_KEY=your-api-key-here");
22
+ process.exit(1);
23
+ }
24
+
25
+ // Create VoyageAI embeddings instance
26
+ console.log("Creating VoyageAI embeddings with voyage-4 model...");
27
+ const embeddings = new VoyageAIEmbeddings({
28
+ model: "voyage-4",
29
+ inputType: "document", // Use "document" for indexing, "query" for searching
30
+ });
31
+
32
+ console.log(`Model: ${embeddings.model}`);
33
+ console.log(`Dimensions: ${embeddings.dimensions}\n`);
34
+
35
+ // Example: Generate embeddings directly
36
+ console.log("Generating embeddings for sample texts...");
37
+ const texts = [
38
+ "VoyageAI provides state-of-the-art embedding models",
39
+ "Vector databases enable semantic search capabilities",
40
+ "The voyage-4 model offers excellent performance",
41
+ ];
42
+
43
+ const vectors = await embeddings.embed(texts);
44
+ console.log(`Generated ${vectors.length} embeddings`);
45
+ console.log(`First embedding dimensions: ${vectors[0].length}\n`);
46
+
47
+ // Example: Using with LanceDB Vector Store
48
+ console.log("Creating LanceDB vector store with VoyageAI embeddings...");
49
+ const store = await LanceDBVectorStore.create({
50
+ name: "voyage_demo",
51
+ uri: "./data/voyage-example",
52
+ tableName: "documents",
53
+ embeddings,
54
+ metadataFields: [
55
+ { name: "category", type: "string" as const },
56
+ { name: "source", type: "string" as const },
57
+ ],
58
+ });
59
+
60
+ // Add documents (embeddings generated automatically)
61
+ console.log("Adding documents to vector store...");
62
+ await store.addDocuments([
63
+ {
64
+ id: "doc1",
65
+ content: "VoyageAI specializes in embedding models for semantic search",
66
+ metadata: { category: "ai", source: "docs" },
67
+ },
68
+ {
69
+ id: "doc2",
70
+ content: "voyage-4 is their latest general-purpose embedding model",
71
+ metadata: { category: "models", source: "docs" },
72
+ },
73
+ {
74
+ id: "doc3",
75
+ content: "voyage-code-3 is optimized for code search and understanding",
76
+ metadata: { category: "models", source: "docs" },
77
+ },
78
+ ]);
79
+
80
+ console.log("Documents added successfully!\n");
81
+
82
+ // Perform semantic search
83
+ console.log("Performing semantic search...");
84
+ const query = "What is the latest VoyageAI model?";
85
+ const results = await store.search(query, { limit: 2 });
86
+
87
+ console.log(`\nQuery: "${query}"`);
88
+ console.log("Results:");
89
+ results.forEach((result, i) => {
90
+ console.log(
91
+ `${i + 1}. [Score: ${result.score.toFixed(4)}] ${result.content}`
92
+ );
93
+ console.log(` Metadata:`, result.metadata);
94
+ });
95
+
96
+ console.log("\n=== Example Complete ===");
97
+ }
98
+
99
+ main().catch(console.error);