@agentionai/agents 0.6.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/BaseAgent.d.ts +0 -39
- package/dist/agents/BaseAgent.js +1 -100
- package/dist/agents/anthropic/ClaudeAgent.d.ts +2 -0
- package/dist/agents/anthropic/ClaudeAgent.js +4 -8
- package/dist/agents/google/GeminiAgent.d.ts +2 -0
- package/dist/agents/google/GeminiAgent.js +4 -4
- package/dist/agents/mistral/MistralAgent.d.ts +2 -0
- package/dist/agents/mistral/MistralAgent.js +4 -4
- package/dist/agents/openai/OpenAiAgent.d.ts +2 -0
- package/dist/agents/openai/OpenAiAgent.js +4 -4
- package/dist/chunkers/Chunker.d.ts +1 -1
- package/dist/chunkers/Chunker.js +19 -20
- package/dist/chunkers/TokenChunker.d.ts +1 -1
- package/dist/chunkers/TokenChunker.js +2 -3
- package/dist/chunkers/types.d.ts +17 -11
- package/dist/core.d.ts +1 -0
- package/dist/core.js +2 -0
- package/dist/graph/planning/PlanExecutor.d.ts +0 -12
- package/dist/graph/planning/PlanExecutor.js +19 -74
- package/dist/graph/planning/PlanStore.js +2 -6
- package/dist/index.d.ts +1 -0
- package/dist/index.js +2 -0
- package/dist/mcp/MCPClient.d.ts +106 -0
- package/dist/mcp/MCPClient.js +264 -0
- package/dist/mcp/index.d.ts +64 -0
- package/dist/mcp/index.js +67 -0
- package/dist/mcp/types.d.ts +51 -0
- package/dist/mcp/types.js +3 -0
- package/dist/vectorstore/LanceDBVectorStore.d.ts +102 -54
- package/dist/vectorstore/LanceDBVectorStore.js +231 -135
- package/dist/vectorstore/VectorStore.d.ts +2 -2
- package/dist/vectorstore/VectorStore.js +3 -3
- package/package.json +7 -3
|
@@ -44,67 +44,71 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
44
44
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
45
|
exports.LanceDBVectorStore = void 0;
|
|
46
46
|
const VectorStore_1 = require("./VectorStore");
|
|
47
|
+
/**
|
|
48
|
+
* All known ChunkMetadata field names.
|
|
49
|
+
* Used to separate chunk metadata from user metadata when packing/unpacking.
|
|
50
|
+
*/
|
|
51
|
+
const CHUNK_METADATA_KEYS = [
|
|
52
|
+
"index", "total", "prev_id", "next_id",
|
|
53
|
+
"start", "end", "source_id", "source_path",
|
|
54
|
+
"char_count", "token_count", "hash", "section", "page",
|
|
55
|
+
];
|
|
56
|
+
const CHUNK_METADATA_KEY_SET = new Set(CHUNK_METADATA_KEYS);
|
|
47
57
|
/**
|
|
48
58
|
* LanceDB implementation of the VectorStore interface.
|
|
49
59
|
*
|
|
50
|
-
*
|
|
51
|
-
* ```typescript
|
|
52
|
-
* import { LanceDBVectorStore, OpenAIEmbeddings } from "@agentionai/agents";
|
|
60
|
+
* Supports two modes of operation:
|
|
53
61
|
*
|
|
54
|
-
*
|
|
55
|
-
*
|
|
56
|
-
*
|
|
57
|
-
*
|
|
62
|
+
* **Managed mode** (`metadataFields` provided): The store creates the LanceDB
|
|
63
|
+
* table on first insert using an explicit Arrow schema derived from
|
|
64
|
+
* `metadataFields`. User-defined fields are stored as typed top-level columns.
|
|
65
|
+
* Chunk metadata (from chunkers) is automatically packed into a `chunk_metadata`
|
|
66
|
+
* struct column.
|
|
58
67
|
*
|
|
59
|
-
*
|
|
60
|
-
*
|
|
61
|
-
*
|
|
62
|
-
*
|
|
63
|
-
* embeddings,
|
|
64
|
-
* });
|
|
68
|
+
* **Pre-existing table mode** (`metadataFields` omitted): The store connects
|
|
69
|
+
* to a table that was created independently (e.g. via LanceDB CLI or another
|
|
70
|
+
* tool). No schema management is performed; all non-system columns are returned
|
|
71
|
+
* as metadata on read.
|
|
65
72
|
*
|
|
66
|
-
*
|
|
67
|
-
*
|
|
68
|
-
*
|
|
69
|
-
*
|
|
70
|
-
* ]);
|
|
71
|
-
*
|
|
72
|
-
* // Search
|
|
73
|
-
* const results = await store.search("What is LanceDB?", { limit: 5 });
|
|
73
|
+
* @example Managed mode — user-defined metadata fields
|
|
74
|
+
* ```typescript
|
|
75
|
+
* import { LanceDBVectorStore } from "@agentionai/agents";
|
|
76
|
+
* import { OpenAIEmbeddings } from "@agentionai/agents/embeddings";
|
|
74
77
|
*
|
|
75
|
-
*
|
|
76
|
-
* const searchTool = store.toRetrievalTool("Search the knowledge base");
|
|
77
|
-
* ```
|
|
78
|
+
* const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-small" });
|
|
78
79
|
*
|
|
79
|
-
* @example With filterable metadata fields
|
|
80
|
-
* ```typescript
|
|
81
80
|
* const store = await LanceDBVectorStore.create({
|
|
82
81
|
* name: "knowledge_base",
|
|
83
82
|
* uri: "./my-database",
|
|
84
|
-
* tableName: "
|
|
83
|
+
* tableName: "chunks",
|
|
85
84
|
* embeddings,
|
|
86
85
|
* metadataFields: [
|
|
87
|
-
* { name: "
|
|
88
|
-
* { name: "
|
|
89
|
-
* { name: "year", type: "number" },
|
|
90
|
-
* { name: "verified", type: "boolean" },
|
|
91
|
-
* { name: "hash", type: "string" }, // Enables efficient deduplication
|
|
86
|
+
* { name: "author", type: "string", nullable: true },
|
|
87
|
+
* { name: "category", type: "string", nullable: true },
|
|
92
88
|
* ],
|
|
93
89
|
* });
|
|
94
90
|
*
|
|
95
|
-
* //
|
|
91
|
+
* // Chunk metadata (index, hash, prev_id, etc.) is stored automatically
|
|
92
|
+
* // in a chunk_metadata struct column — no need to declare it.
|
|
96
93
|
* await store.addDocuments([
|
|
97
|
-
* {
|
|
98
|
-
* id: "1",
|
|
99
|
-
* content: "LanceDB is a vector database",
|
|
100
|
-
* metadata: { category: "database", source: "docs", year: 2024, verified: true },
|
|
101
|
-
* },
|
|
94
|
+
* { id: "1", content: "LanceDB is a vector database", metadata: { category: "db" } },
|
|
102
95
|
* ]);
|
|
103
96
|
*
|
|
104
|
-
* // Search with filters on metadata columns
|
|
97
|
+
* // Search with filters on user metadata columns
|
|
105
98
|
* const results = await store.search("vector database", {
|
|
106
99
|
* limit: 5,
|
|
107
|
-
* filter: { category: "
|
|
100
|
+
* filter: { category: "db" },
|
|
101
|
+
* });
|
|
102
|
+
* ```
|
|
103
|
+
*
|
|
104
|
+
* @example Pre-existing table mode — connect to externally managed table
|
|
105
|
+
* ```typescript
|
|
106
|
+
* const store = await LanceDBVectorStore.create({
|
|
107
|
+
* name: "my_store",
|
|
108
|
+
* uri: "./my-database",
|
|
109
|
+
* tableName: "existing_table", // table already exists with its own schema
|
|
110
|
+
* embeddings,
|
|
111
|
+
* // metadataFields omitted — schema is not managed by this class
|
|
108
112
|
* });
|
|
109
113
|
* ```
|
|
110
114
|
*/
|
|
@@ -123,15 +127,19 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
123
127
|
/**
|
|
124
128
|
* Create a new LanceDBVectorStore instance.
|
|
125
129
|
*
|
|
126
|
-
*
|
|
130
|
+
* - If the table already exists it is opened immediately.
|
|
131
|
+
* - If `metadataFields` is provided and the table does not exist yet, it
|
|
132
|
+
* will be created on the first insert with an explicit Arrow schema.
|
|
133
|
+
* - If `metadataFields` is **not** provided and the table does not exist,
|
|
134
|
+
* an error is thrown — the store cannot manage an unknown schema.
|
|
127
135
|
*
|
|
128
136
|
* @param config - Configuration for the store
|
|
129
137
|
* @returns A configured LanceDBVectorStore instance
|
|
130
138
|
*
|
|
131
139
|
* @throws Error if @lancedb/lancedb is not installed
|
|
140
|
+
* @throws Error if the table does not exist and no metadataFields are provided
|
|
132
141
|
*/
|
|
133
142
|
static async create(config) {
|
|
134
|
-
// Dynamic import to make lancedb an optional dependency
|
|
135
143
|
let lancedb;
|
|
136
144
|
try {
|
|
137
145
|
lancedb = await Promise.resolve().then(() => __importStar(require("@lancedb/lancedb")));
|
|
@@ -141,55 +149,80 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
141
149
|
}
|
|
142
150
|
const connection = await lancedb.connect(config.uri, config.connectionOptions);
|
|
143
151
|
const tableNames = await connection.tableNames();
|
|
144
|
-
let table;
|
|
145
|
-
const dimensions = config.dimensions ?? config.embeddings?.dimensions ?? 1536;
|
|
152
|
+
let table = null;
|
|
146
153
|
if (tableNames.includes(config.tableName)) {
|
|
147
154
|
table = await connection.openTable(config.tableName);
|
|
148
155
|
}
|
|
149
|
-
else {
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
156
|
+
else if (!config.metadataFields) {
|
|
157
|
+
throw new Error(`Table "${config.tableName}" does not exist and no metadataFields were provided. ` +
|
|
158
|
+
`Either create the table independently or provide metadataFields so the store can create it on first insert.`);
|
|
159
|
+
}
|
|
160
|
+
// Table doesn't exist but metadataFields provided → will be created on first insert.
|
|
161
|
+
return new LanceDBVectorStore(config, connection, table);
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Create the table with an explicit Arrow schema derived from `metadataFields`
|
|
165
|
+
* plus a `chunk_metadata` struct column.
|
|
166
|
+
* Called on the first insert when operating in managed mode.
|
|
167
|
+
*/
|
|
168
|
+
async createManagedTable(records) {
|
|
169
|
+
let arrow;
|
|
170
|
+
try {
|
|
171
|
+
arrow = await Promise.resolve().then(() => __importStar(require("apache-arrow")));
|
|
172
|
+
}
|
|
173
|
+
catch {
|
|
174
|
+
throw new Error("apache-arrow is not installed. Install it with: npm install apache-arrow");
|
|
175
|
+
}
|
|
176
|
+
const schemaFields = [
|
|
177
|
+
new arrow.Field("id", new arrow.Utf8(), false),
|
|
178
|
+
new arrow.Field("text", new arrow.Utf8(), false),
|
|
179
|
+
new arrow.Field("vector", new arrow.FixedSizeList(this.dimensions, new arrow.Field("item", new arrow.Float32(), true)), false),
|
|
180
|
+
];
|
|
181
|
+
// Warn about non-snake_case field names (DataFusion normalizes SQL identifiers to lowercase)
|
|
182
|
+
for (const fieldDef of this.metadataFields) {
|
|
183
|
+
if (fieldDef.name !== fieldDef.name.toLowerCase()) {
|
|
184
|
+
console.warn(`[LanceDBVectorStore] Warning: metadata field "${fieldDef.name}" contains uppercase characters. ` +
|
|
185
|
+
`LanceDB uses DataFusion for SQL filtering, which normalizes unquoted identifiers to lowercase. ` +
|
|
186
|
+
`Use snake_case names (e.g. "${fieldDef.name.replace(/[A-Z]/g, (c) => "_" + c.toLowerCase()).replace(/^_/, "")}") to avoid filter issues.`);
|
|
154
187
|
}
|
|
155
|
-
|
|
156
|
-
|
|
188
|
+
}
|
|
189
|
+
// User-defined metadata columns
|
|
190
|
+
for (const fieldDef of this.metadataFields) {
|
|
191
|
+
const nullable = fieldDef.nullable !== false; // default true
|
|
192
|
+
let arrowType;
|
|
193
|
+
if (fieldDef.type === "number") {
|
|
194
|
+
arrowType = new arrow.Float64();
|
|
157
195
|
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
new arrow.Field("id", new arrow.Utf8(), false),
|
|
161
|
-
new arrow.Field("text", new arrow.Utf8(), false),
|
|
162
|
-
new arrow.Field("vector", new arrow.FixedSizeList(dimensions, new arrow.Field("item", new arrow.Float32(), true)), false),
|
|
163
|
-
];
|
|
164
|
-
// Add metadata fields - either as separate columns or as a JSON string
|
|
165
|
-
if (config.metadataFields && config.metadataFields.length > 0) {
|
|
166
|
-
for (const field of config.metadataFields) {
|
|
167
|
-
const nullable = field.nullable !== false;
|
|
168
|
-
let arrowType;
|
|
169
|
-
switch (field.type) {
|
|
170
|
-
case "string":
|
|
171
|
-
arrowType = new arrow.Utf8();
|
|
172
|
-
break;
|
|
173
|
-
case "number":
|
|
174
|
-
arrowType = new arrow.Float64();
|
|
175
|
-
break;
|
|
176
|
-
case "boolean":
|
|
177
|
-
arrowType = new arrow.Bool();
|
|
178
|
-
break;
|
|
179
|
-
default:
|
|
180
|
-
throw new Error(`Unsupported metadata field type: ${field.type}`);
|
|
181
|
-
}
|
|
182
|
-
schemaFields.push(new arrow.Field(field.name, arrowType, nullable));
|
|
183
|
-
}
|
|
196
|
+
else if (fieldDef.type === "boolean") {
|
|
197
|
+
arrowType = new arrow.Bool();
|
|
184
198
|
}
|
|
185
199
|
else {
|
|
186
|
-
|
|
187
|
-
schemaFields.push(new arrow.Field("metadata", new arrow.Utf8(), true));
|
|
200
|
+
arrowType = new arrow.Utf8();
|
|
188
201
|
}
|
|
189
|
-
|
|
190
|
-
table = await connection.createEmptyTable(config.tableName, schema);
|
|
202
|
+
schemaFields.push(new arrow.Field(fieldDef.name, arrowType, nullable));
|
|
191
203
|
}
|
|
192
|
-
|
|
204
|
+
// Chunk metadata struct column (always included, nullable for non-chunk docs)
|
|
205
|
+
schemaFields.push(new arrow.Field("chunk_metadata", new arrow.Struct([
|
|
206
|
+
new arrow.Field("index", new arrow.Float64(), true),
|
|
207
|
+
new arrow.Field("total", new arrow.Float64(), true),
|
|
208
|
+
new arrow.Field("prev_id", new arrow.Utf8(), true),
|
|
209
|
+
new arrow.Field("next_id", new arrow.Utf8(), true),
|
|
210
|
+
new arrow.Field("start", new arrow.Float64(), true),
|
|
211
|
+
new arrow.Field("end", new arrow.Float64(), true),
|
|
212
|
+
new arrow.Field("source_id", new arrow.Utf8(), true),
|
|
213
|
+
new arrow.Field("source_path", new arrow.Utf8(), true),
|
|
214
|
+
new arrow.Field("char_count", new arrow.Float64(), true),
|
|
215
|
+
new arrow.Field("token_count", new arrow.Float64(), true),
|
|
216
|
+
new arrow.Field("hash", new arrow.Utf8(), true),
|
|
217
|
+
new arrow.Field("section", new arrow.Utf8(), true),
|
|
218
|
+
new arrow.Field("page", new arrow.Float64(), true),
|
|
219
|
+
]), true // nullable — non-chunk documents get null
|
|
220
|
+
));
|
|
221
|
+
const schema = new arrow.Schema(schemaFields);
|
|
222
|
+
this.table = await this.connection.createTable(this.tableName, records, {
|
|
223
|
+
schema,
|
|
224
|
+
});
|
|
225
|
+
return this.table;
|
|
193
226
|
}
|
|
194
227
|
/**
|
|
195
228
|
* Add documents to the vector store.
|
|
@@ -199,10 +232,8 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
199
232
|
if (!this.embeddings) {
|
|
200
233
|
throw new Error("No embeddings provider configured. Use addEmbeddedDocuments() with pre-computed embeddings, or configure an embeddings provider.");
|
|
201
234
|
}
|
|
202
|
-
// Generate embeddings for all documents
|
|
203
235
|
const texts = documents.map((doc) => doc.content);
|
|
204
236
|
const vectors = await this.embeddings.embed(texts);
|
|
205
|
-
// Convert to embedded documents
|
|
206
237
|
const embeddedDocs = documents.map((doc, i) => ({
|
|
207
238
|
...doc,
|
|
208
239
|
embedding: vectors[i],
|
|
@@ -211,6 +242,12 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
211
242
|
}
|
|
212
243
|
/**
|
|
213
244
|
* Add documents with pre-computed embeddings.
|
|
245
|
+
*
|
|
246
|
+
* In managed mode, chunk metadata fields are packed into a `chunk_metadata`
|
|
247
|
+
* struct and user-defined fields are projected to their declared columns.
|
|
248
|
+
* The table is created on the first call; subsequent calls append directly.
|
|
249
|
+
*
|
|
250
|
+
* In pre-existing table mode all metadata is spread flat as-is.
|
|
214
251
|
*/
|
|
215
252
|
async addEmbeddedDocuments(documents, _options) {
|
|
216
253
|
const records = documents.map((doc) => {
|
|
@@ -218,23 +255,69 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
218
255
|
id: doc.id,
|
|
219
256
|
text: doc.content,
|
|
220
257
|
vector: doc.embedding,
|
|
258
|
+
...doc.metadata,
|
|
221
259
|
};
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
else {
|
|
230
|
-
// Legacy: store metadata as JSON string
|
|
231
|
-
record.metadata = doc.metadata ? JSON.stringify(doc.metadata) : undefined;
|
|
260
|
+
// In managed mode, pack chunk metadata into struct and project to schema
|
|
261
|
+
if (this.metadataFields) {
|
|
262
|
+
const packed = this.packChunkMetadata(doc.metadata ?? {});
|
|
263
|
+
record.chunk_metadata = packed;
|
|
264
|
+
return this.projectToSchema(record);
|
|
232
265
|
}
|
|
233
266
|
return record;
|
|
234
267
|
});
|
|
235
|
-
|
|
268
|
+
if (this.table) {
|
|
269
|
+
await this.table.add(records);
|
|
270
|
+
}
|
|
271
|
+
else {
|
|
272
|
+
// Managed mode: metadataFields must be present (enforced in create())
|
|
273
|
+
await this.createManagedTable(records);
|
|
274
|
+
}
|
|
236
275
|
return documents.map((d) => d.id);
|
|
237
276
|
}
|
|
277
|
+
/**
|
|
278
|
+
* Pack chunk metadata fields from flat metadata into a struct object.
|
|
279
|
+
* Returns a plain object for the `chunk_metadata` column, or null if
|
|
280
|
+
* no chunk metadata fields are present.
|
|
281
|
+
*/
|
|
282
|
+
packChunkMetadata(metadata) {
|
|
283
|
+
const struct = {};
|
|
284
|
+
let found = false;
|
|
285
|
+
for (const key of CHUNK_METADATA_KEYS) {
|
|
286
|
+
if (key in metadata) {
|
|
287
|
+
struct[key] = metadata[key] ?? null;
|
|
288
|
+
found = true;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
return found ? struct : null;
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* Unpack a chunk_metadata struct value back to flat metadata keys.
|
|
295
|
+
*/
|
|
296
|
+
unpackChunkMetadata(struct, target) {
|
|
297
|
+
for (const key of CHUNK_METADATA_KEYS) {
|
|
298
|
+
const value = struct[key];
|
|
299
|
+
if (value !== null && value !== undefined) {
|
|
300
|
+
target[key] = value;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Project a record to only the columns declared in the schema
|
|
306
|
+
* (id, text, vector, chunk_metadata, plus all metadataFields).
|
|
307
|
+
*/
|
|
308
|
+
projectToSchema(record) {
|
|
309
|
+
const projected = { id: record.id, text: record.text };
|
|
310
|
+
if (record.vector !== undefined) {
|
|
311
|
+
projected.vector = record.vector;
|
|
312
|
+
}
|
|
313
|
+
// User-defined metadata fields
|
|
314
|
+
for (const f of this.metadataFields) {
|
|
315
|
+
projected[f.name] = record[f.name] ?? null;
|
|
316
|
+
}
|
|
317
|
+
// Chunk metadata struct
|
|
318
|
+
projected.chunk_metadata = record.chunk_metadata ?? null;
|
|
319
|
+
return projected;
|
|
320
|
+
}
|
|
238
321
|
/**
|
|
239
322
|
* Search for documents similar to the query.
|
|
240
323
|
*/
|
|
@@ -249,6 +332,9 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
249
332
|
* Search using a pre-computed embedding vector.
|
|
250
333
|
*/
|
|
251
334
|
async searchByVector(embedding, options) {
|
|
335
|
+
if (!this.table) {
|
|
336
|
+
return [];
|
|
337
|
+
}
|
|
252
338
|
const limit = options?.limit ?? 10;
|
|
253
339
|
const scoreThreshold = options?.scoreThreshold;
|
|
254
340
|
let queryBuilder = this.table.vectorSearch(embedding).limit(limit);
|
|
@@ -265,6 +351,8 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
265
351
|
* Delete documents by their IDs.
|
|
266
352
|
*/
|
|
267
353
|
async delete(ids, _options) {
|
|
354
|
+
if (!this.table)
|
|
355
|
+
return 0;
|
|
268
356
|
const idList = ids.map((id) => `'${id}'`).join(", ");
|
|
269
357
|
const filter = `id IN (${idList})`;
|
|
270
358
|
const countBefore = await this.table.countRows();
|
|
@@ -276,12 +364,16 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
276
364
|
* Delete all documents.
|
|
277
365
|
*/
|
|
278
366
|
async clear(_options) {
|
|
367
|
+
if (!this.table)
|
|
368
|
+
return;
|
|
279
369
|
await this.table.delete("id IS NOT NULL");
|
|
280
370
|
}
|
|
281
371
|
/**
|
|
282
372
|
* Get a document by its ID.
|
|
283
373
|
*/
|
|
284
374
|
async getById(id, _options) {
|
|
375
|
+
if (!this.table)
|
|
376
|
+
return null;
|
|
285
377
|
const results = await this.table
|
|
286
378
|
.query()
|
|
287
379
|
.where(`id = '${id}'`)
|
|
@@ -301,34 +393,21 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
301
393
|
* Get existing documents by their content hashes.
|
|
302
394
|
* Used for deduplication during ingestion.
|
|
303
395
|
*
|
|
304
|
-
*
|
|
305
|
-
*
|
|
396
|
+
* Requires that documents were stored with chunk metadata containing
|
|
397
|
+
* a `hash` field (automatically present when using chunkers from this library).
|
|
398
|
+
* Queries the `chunk_metadata.hash` struct sub-field.
|
|
306
399
|
*/
|
|
307
400
|
async getByHashes(hashes, _options) {
|
|
308
401
|
const hashMap = new Map();
|
|
309
|
-
if (hashes.length === 0) {
|
|
402
|
+
if (hashes.length === 0 || !this.table) {
|
|
310
403
|
return hashMap;
|
|
311
404
|
}
|
|
312
|
-
// Check if hash is a defined metadata field for efficient queries
|
|
313
|
-
const hasHashField = this.metadataFields?.some((field) => field.name === "hash");
|
|
314
405
|
for (const hash of hashes) {
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
.where(`hash = '${hash}'`)
|
|
321
|
-
.limit(1)
|
|
322
|
-
.toArray();
|
|
323
|
-
}
|
|
324
|
-
else {
|
|
325
|
-
// Legacy: search for hash string in JSON metadata
|
|
326
|
-
results = await this.table
|
|
327
|
-
.query()
|
|
328
|
-
.where(`metadata LIKE '%${hash}%'`)
|
|
329
|
-
.limit(1)
|
|
330
|
-
.toArray();
|
|
331
|
-
}
|
|
406
|
+
const results = await this.table
|
|
407
|
+
.query()
|
|
408
|
+
.where(`chunk_metadata.hash = '${hash}'`)
|
|
409
|
+
.limit(1)
|
|
410
|
+
.toArray();
|
|
332
411
|
if (results.length > 0) {
|
|
333
412
|
const record = results[0];
|
|
334
413
|
hashMap.set(hash, record.id);
|
|
@@ -343,7 +422,7 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
343
422
|
return this.connection;
|
|
344
423
|
}
|
|
345
424
|
/**
|
|
346
|
-
* Get the underlying LanceDB table.
|
|
425
|
+
* Get the underlying LanceDB table, or null if no data has been inserted yet.
|
|
347
426
|
*/
|
|
348
427
|
getTable() {
|
|
349
428
|
return this.table;
|
|
@@ -365,14 +444,24 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
365
444
|
* Recommended for tables with more than 10,000 rows.
|
|
366
445
|
*/
|
|
367
446
|
async createIndex() {
|
|
447
|
+
if (!this.table)
|
|
448
|
+
throw new Error("Table not yet created — insert data first.");
|
|
368
449
|
await this.table.createIndex("vector");
|
|
369
450
|
}
|
|
370
451
|
/**
|
|
371
452
|
* Optimize the table for better performance.
|
|
372
453
|
*/
|
|
373
454
|
async optimize() {
|
|
455
|
+
if (!this.table)
|
|
456
|
+
return;
|
|
374
457
|
await this.table.optimize();
|
|
375
458
|
}
|
|
459
|
+
/**
|
|
460
|
+
* Get the configured metadata fields.
|
|
461
|
+
*/
|
|
462
|
+
getMetadataFields() {
|
|
463
|
+
return this.metadataFields;
|
|
464
|
+
}
|
|
376
465
|
/**
|
|
377
466
|
* Build a SQL filter string from a filter object.
|
|
378
467
|
*/
|
|
@@ -397,19 +486,16 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
397
486
|
processResults(results, scoreThreshold) {
|
|
398
487
|
const searchResults = [];
|
|
399
488
|
for (const row of results) {
|
|
400
|
-
// LanceDB returns _distance for vector search
|
|
401
489
|
const distance = row._distance ?? 0;
|
|
402
|
-
// Convert distance to similarity score (lower distance = higher similarity)
|
|
403
490
|
const score = 1 / (1 + distance);
|
|
404
491
|
if (scoreThreshold !== undefined && score < scoreThreshold) {
|
|
405
492
|
continue;
|
|
406
493
|
}
|
|
407
|
-
const metadata = this.extractMetadata(row);
|
|
408
494
|
searchResults.push({
|
|
409
495
|
document: {
|
|
410
496
|
id: row.id,
|
|
411
497
|
content: row.text,
|
|
412
|
-
metadata,
|
|
498
|
+
metadata: this.extractMetadata(row),
|
|
413
499
|
},
|
|
414
500
|
score,
|
|
415
501
|
});
|
|
@@ -417,13 +503,18 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
417
503
|
return searchResults;
|
|
418
504
|
}
|
|
419
505
|
/**
|
|
420
|
-
* Extract metadata from a row
|
|
506
|
+
* Extract metadata from a row.
|
|
507
|
+
*
|
|
508
|
+
* In managed mode: returns user-defined fields plus unpacked chunk_metadata.
|
|
509
|
+
* In pre-existing table mode: returns all non-system columns, with
|
|
510
|
+
* chunk_metadata unpacked if present.
|
|
421
511
|
*/
|
|
422
512
|
extractMetadata(row) {
|
|
513
|
+
const SYSTEM_COLS = new Set(["id", "text", "vector", "_distance", "chunk_metadata"]);
|
|
514
|
+
const metadata = {};
|
|
515
|
+
let hasValue = false;
|
|
423
516
|
if (this.metadataFields && this.metadataFields.length > 0) {
|
|
424
|
-
//
|
|
425
|
-
const metadata = {};
|
|
426
|
-
let hasValue = false;
|
|
517
|
+
// Managed mode: collect declared user fields
|
|
427
518
|
for (const field of this.metadataFields) {
|
|
428
519
|
const value = row[field.name];
|
|
429
520
|
if (value !== null && value !== undefined) {
|
|
@@ -431,18 +522,23 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
431
522
|
hasValue = true;
|
|
432
523
|
}
|
|
433
524
|
}
|
|
434
|
-
return hasValue ? metadata : undefined;
|
|
435
525
|
}
|
|
436
526
|
else {
|
|
437
|
-
//
|
|
438
|
-
|
|
527
|
+
// Pre-existing table mode: return all non-system columns
|
|
528
|
+
for (const [key, value] of Object.entries(row)) {
|
|
529
|
+
if (!SYSTEM_COLS.has(key) && value !== null && value !== undefined) {
|
|
530
|
+
metadata[key] = value;
|
|
531
|
+
hasValue = true;
|
|
532
|
+
}
|
|
533
|
+
}
|
|
439
534
|
}
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
535
|
+
// Unpack chunk_metadata struct if present
|
|
536
|
+
const chunkStruct = row.chunk_metadata;
|
|
537
|
+
if (chunkStruct && typeof chunkStruct === "object") {
|
|
538
|
+
this.unpackChunkMetadata(chunkStruct, metadata);
|
|
539
|
+
hasValue = true;
|
|
540
|
+
}
|
|
541
|
+
return hasValue ? metadata : undefined;
|
|
446
542
|
}
|
|
447
543
|
}
|
|
448
544
|
exports.LanceDBVectorStore = LanceDBVectorStore;
|
|
@@ -235,7 +235,7 @@ export declare abstract class VectorStore {
|
|
|
235
235
|
}>;
|
|
236
236
|
/**
|
|
237
237
|
* Create a tool that agents can use to retrieve a chunk by its ID.
|
|
238
|
-
* Useful for navigating chunk chains using
|
|
238
|
+
* Useful for navigating chunk chains using prev_id/next_id metadata.
|
|
239
239
|
*
|
|
240
240
|
* @param description - Description of what the tool does (e.g., "Get a specific chunk by ID to read adjacent context")
|
|
241
241
|
* @param options - Configuration options for the tool
|
|
@@ -245,7 +245,7 @@ export declare abstract class VectorStore {
|
|
|
245
245
|
* ```typescript
|
|
246
246
|
* const store = new LanceDBVectorStore({ ... });
|
|
247
247
|
* const tool = store.toGetChunkByIdTool(
|
|
248
|
-
* "Retrieve a specific chunk by ID. Use
|
|
248
|
+
* "Retrieve a specific chunk by ID. Use prev_id or next_id from search results to get surrounding context."
|
|
249
249
|
* );
|
|
250
250
|
* agent.addTools([tool]);
|
|
251
251
|
* ```
|
|
@@ -165,7 +165,7 @@ class VectorStore {
|
|
|
165
165
|
}
|
|
166
166
|
/**
|
|
167
167
|
* Create a tool that agents can use to retrieve a chunk by its ID.
|
|
168
|
-
* Useful for navigating chunk chains using
|
|
168
|
+
* Useful for navigating chunk chains using prev_id/next_id metadata.
|
|
169
169
|
*
|
|
170
170
|
* @param description - Description of what the tool does (e.g., "Get a specific chunk by ID to read adjacent context")
|
|
171
171
|
* @param options - Configuration options for the tool
|
|
@@ -175,7 +175,7 @@ class VectorStore {
|
|
|
175
175
|
* ```typescript
|
|
176
176
|
* const store = new LanceDBVectorStore({ ... });
|
|
177
177
|
* const tool = store.toGetChunkByIdTool(
|
|
178
|
-
* "Retrieve a specific chunk by ID. Use
|
|
178
|
+
* "Retrieve a specific chunk by ID. Use prev_id or next_id from search results to get surrounding context."
|
|
179
179
|
* );
|
|
180
180
|
* agent.addTools([tool]);
|
|
181
181
|
* ```
|
|
@@ -187,7 +187,7 @@ class VectorStore {
|
|
|
187
187
|
properties: {
|
|
188
188
|
id: {
|
|
189
189
|
type: "string",
|
|
190
|
-
description: "The chunk ID to retrieve (e.g., from
|
|
190
|
+
description: "The chunk ID to retrieve (e.g., from prev_id or next_id metadata)",
|
|
191
191
|
},
|
|
192
192
|
},
|
|
193
193
|
required: ["id"],
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentionai/agents",
|
|
3
3
|
"author": "Laurent Zuijdwijk",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.8.0",
|
|
5
5
|
"description": "Agent Library",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
@@ -54,8 +54,8 @@
|
|
|
54
54
|
"lint:fix": "eslint 'src/**/*.{js,ts}' --fix",
|
|
55
55
|
"format": "prettier --write 'src/**/*.{js,ts,json,md}'",
|
|
56
56
|
"prepare": "npm run build",
|
|
57
|
-
"example": "
|
|
58
|
-
"example:watch": "nodemon --watch examples --watch src --ext ts --exec 'ts-node' examples/index.ts",
|
|
57
|
+
"example": "tsx",
|
|
58
|
+
"example:watch": "nodemon --watch examples --watch src --ext ts --exec 'ts-node --project tsconfig.esm.json' examples/index.ts",
|
|
59
59
|
"docs": "npm run docs:api && npm run docs:site",
|
|
60
60
|
"docs:api": "typedoc",
|
|
61
61
|
"docs:site": "vitepress build docs",
|
|
@@ -110,6 +110,7 @@
|
|
|
110
110
|
"@google/generative-ai": "^0.24.1",
|
|
111
111
|
"@lancedb/lancedb": "^0.23.0",
|
|
112
112
|
"@mistralai/mistralai": "^1.13.0",
|
|
113
|
+
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
113
114
|
"apache-arrow": "^18.0.0",
|
|
114
115
|
"openai": "^6.16.0",
|
|
115
116
|
"voyageai": "^0.0.3"
|
|
@@ -130,6 +131,9 @@
|
|
|
130
131
|
"@anthropic-ai/sdk": {
|
|
131
132
|
"optional": true
|
|
132
133
|
},
|
|
134
|
+
"@modelcontextprotocol/sdk": {
|
|
135
|
+
"optional": true
|
|
136
|
+
},
|
|
133
137
|
"openai": {
|
|
134
138
|
"optional": true
|
|
135
139
|
},
|