@agentionai/agents 0.7.0 → 0.8.1-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunkers/Chunker.d.ts +1 -1
- package/dist/chunkers/Chunker.js +19 -20
- package/dist/chunkers/TokenChunker.d.ts +1 -1
- package/dist/chunkers/TokenChunker.js +2 -3
- package/dist/chunkers/types.d.ts +17 -11
- package/dist/claude.d.ts +1 -6
- package/dist/claude.js +1 -7
- package/dist/core.d.ts +1 -0
- package/dist/core.js +2 -0
- package/dist/gemini.d.ts +1 -6
- package/dist/gemini.js +1 -7
- package/dist/index.d.ts +3 -0
- package/dist/index.js +3 -0
- package/dist/mistral.d.ts +1 -6
- package/dist/mistral.js +1 -7
- package/dist/openai.d.ts +1 -6
- package/dist/openai.js +1 -7
- package/dist/vectorstore/LanceDBVectorStore.d.ts +102 -54
- package/dist/vectorstore/LanceDBVectorStore.js +231 -135
- package/dist/vectorstore/VectorStore.d.ts +2 -2
- package/dist/vectorstore/VectorStore.js +3 -3
- package/package.json +23 -3
|
@@ -29,7 +29,7 @@ export declare abstract class Chunker {
|
|
|
29
29
|
*/
|
|
30
30
|
protected computeHash(content: string): string;
|
|
31
31
|
/**
|
|
32
|
-
* Link chunks with
|
|
32
|
+
* Link chunks with prev_id and next_id.
|
|
33
33
|
*/
|
|
34
34
|
protected linkChunks(chunks: Chunk[]): void;
|
|
35
35
|
/**
|
package/dist/chunkers/Chunker.js
CHANGED
|
@@ -53,17 +53,17 @@ class Chunker {
|
|
|
53
53
|
}
|
|
54
54
|
const id = this.generateId(content, i, options?.sourceId);
|
|
55
55
|
const metadata = {
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
startOffset,
|
|
61
|
-
endOffset,
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
56
|
+
index: i,
|
|
57
|
+
total: splits.length,
|
|
58
|
+
prev_id: null, // Will be linked after
|
|
59
|
+
next_id: null, // Will be linked after
|
|
60
|
+
start: startOffset,
|
|
61
|
+
end: endOffset,
|
|
62
|
+
source_id: options?.sourceId,
|
|
63
|
+
source_path: options?.sourcePath,
|
|
64
|
+
char_count: content.length,
|
|
65
65
|
hash: this.computeHash(content),
|
|
66
|
-
|
|
66
|
+
section: currentSection,
|
|
67
67
|
...options?.metadata,
|
|
68
68
|
};
|
|
69
69
|
chunks.push({ id, content, metadata });
|
|
@@ -71,9 +71,9 @@ class Chunker {
|
|
|
71
71
|
}
|
|
72
72
|
// Link chunks together
|
|
73
73
|
this.linkChunks(chunks);
|
|
74
|
-
// Update
|
|
74
|
+
// Update total now that we know the final count
|
|
75
75
|
for (const chunk of chunks) {
|
|
76
|
-
chunk.metadata.
|
|
76
|
+
chunk.metadata.total = chunks.length;
|
|
77
77
|
}
|
|
78
78
|
// Apply processor if provided
|
|
79
79
|
if (this.config.chunkProcessor) {
|
|
@@ -100,15 +100,15 @@ class Chunker {
|
|
|
100
100
|
return (0, crypto_1.createHash)("sha256").update(content).digest("hex");
|
|
101
101
|
}
|
|
102
102
|
/**
|
|
103
|
-
* Link chunks with
|
|
103
|
+
* Link chunks with prev_id and next_id.
|
|
104
104
|
*/
|
|
105
105
|
linkChunks(chunks) {
|
|
106
106
|
for (let i = 0; i < chunks.length; i++) {
|
|
107
107
|
if (i > 0) {
|
|
108
|
-
chunks[i].metadata.
|
|
108
|
+
chunks[i].metadata.prev_id = chunks[i - 1].id;
|
|
109
109
|
}
|
|
110
110
|
if (i < chunks.length - 1) {
|
|
111
|
-
chunks[i].metadata.
|
|
111
|
+
chunks[i].metadata.next_id = chunks[i + 1].id;
|
|
112
112
|
}
|
|
113
113
|
}
|
|
114
114
|
}
|
|
@@ -128,11 +128,10 @@ class Chunker {
|
|
|
128
128
|
}
|
|
129
129
|
// Re-link after filtering and update indices
|
|
130
130
|
for (let i = 0; i < processed.length; i++) {
|
|
131
|
-
processed[i].metadata.
|
|
132
|
-
processed[i].metadata.
|
|
133
|
-
processed[i].metadata.
|
|
134
|
-
|
|
135
|
-
processed[i].metadata.nextChunkId =
|
|
131
|
+
processed[i].metadata.index = i;
|
|
132
|
+
processed[i].metadata.total = processed.length;
|
|
133
|
+
processed[i].metadata.prev_id = i > 0 ? processed[i - 1].id : null;
|
|
134
|
+
processed[i].metadata.next_id =
|
|
136
135
|
i < processed.length - 1 ? processed[i + 1].id : null;
|
|
137
136
|
}
|
|
138
137
|
return processed;
|
|
@@ -26,7 +26,7 @@ export declare function resetTokenxCache(): void;
|
|
|
26
26
|
* });
|
|
27
27
|
*
|
|
28
28
|
* const chunks = await chunker.chunk(longDocument);
|
|
29
|
-
* // Each chunk.metadata.
|
|
29
|
+
* // Each chunk.metadata.token_count contains estimated tokens
|
|
30
30
|
* ```
|
|
31
31
|
*/
|
|
32
32
|
export declare class TokenChunker extends Chunker {
|
|
@@ -46,7 +46,6 @@ let tokenxModule = null;
|
|
|
46
46
|
*/
|
|
47
47
|
async function loadTokenx() {
|
|
48
48
|
if (!tokenxModule) {
|
|
49
|
-
// Use dynamic import for ESM module
|
|
50
49
|
tokenxModule = await Promise.resolve().then(() => __importStar(require("tokenx")));
|
|
51
50
|
}
|
|
52
51
|
return tokenxModule;
|
|
@@ -73,7 +72,7 @@ function resetTokenxCache() {
|
|
|
73
72
|
* });
|
|
74
73
|
*
|
|
75
74
|
* const chunks = await chunker.chunk(longDocument);
|
|
76
|
-
* // Each chunk.metadata.
|
|
75
|
+
* // Each chunk.metadata.token_count contains estimated tokens
|
|
77
76
|
* ```
|
|
78
77
|
*/
|
|
79
78
|
class TokenChunker extends Chunker_1.Chunker {
|
|
@@ -160,7 +159,7 @@ class TokenChunker extends Chunker_1.Chunker {
|
|
|
160
159
|
const { estimateTokenCount } = tokenx;
|
|
161
160
|
// Add token count to each chunk's metadata
|
|
162
161
|
for (const chunk of chunks) {
|
|
163
|
-
chunk.metadata.
|
|
162
|
+
chunk.metadata.token_count = estimateTokenCount(chunk.content);
|
|
164
163
|
}
|
|
165
164
|
return chunks;
|
|
166
165
|
}
|
package/dist/chunkers/types.d.ts
CHANGED
|
@@ -11,32 +11,38 @@ export interface Chunk {
|
|
|
11
11
|
}
|
|
12
12
|
/**
|
|
13
13
|
* Metadata associated with each chunk.
|
|
14
|
+
*
|
|
15
|
+
* When stored in LanceDB via `LanceDBVectorStore`, these fields are
|
|
16
|
+
* automatically packed into a `chunk_metadata` struct column — they do
|
|
17
|
+
* not need to be declared in `metadataFields`.
|
|
14
18
|
*/
|
|
15
19
|
export interface ChunkMetadata {
|
|
16
20
|
/** Zero-based index of this chunk in the sequence */
|
|
17
|
-
|
|
21
|
+
index: number;
|
|
18
22
|
/** Total number of chunks in the sequence */
|
|
19
|
-
|
|
23
|
+
total: number;
|
|
20
24
|
/** ID of the previous chunk, or null if first */
|
|
21
|
-
|
|
25
|
+
prev_id: string | null;
|
|
22
26
|
/** ID of the next chunk, or null if last */
|
|
23
|
-
|
|
27
|
+
next_id: string | null;
|
|
24
28
|
/** Character offset where this chunk starts in the source text */
|
|
25
|
-
|
|
29
|
+
start: number;
|
|
26
30
|
/** Character offset where this chunk ends in the source text */
|
|
27
|
-
|
|
31
|
+
end: number;
|
|
28
32
|
/** Optional identifier for the source document */
|
|
29
|
-
|
|
33
|
+
source_id?: string;
|
|
30
34
|
/** Optional path to the source file */
|
|
31
|
-
|
|
35
|
+
source_path?: string;
|
|
32
36
|
/** Number of characters in the chunk content */
|
|
33
|
-
|
|
37
|
+
char_count: number;
|
|
34
38
|
/** Estimated number of tokens (when available) */
|
|
35
|
-
|
|
39
|
+
token_count?: number;
|
|
36
40
|
/** SHA-256 hash of the content for deduplication */
|
|
37
41
|
hash: string;
|
|
38
42
|
/** Section title if detected (e.g., markdown headers) */
|
|
39
|
-
|
|
43
|
+
section?: string;
|
|
44
|
+
/** Page number in the source document (e.g., PDF page) */
|
|
45
|
+
page?: number;
|
|
40
46
|
[key: string]: unknown;
|
|
41
47
|
}
|
|
42
48
|
/**
|
package/dist/claude.d.ts
CHANGED
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
export * from "./
|
|
1
|
+
export * from "./core";
|
|
2
2
|
export * from "./agents/anthropic/ClaudeAgent";
|
|
3
|
-
export * from "./agents/model-types";
|
|
4
3
|
export { anthropicTransformer } from "./history/transformers";
|
|
5
|
-
export * from "./history/History";
|
|
6
|
-
export * from "./history/types";
|
|
7
|
-
export * from "./tools/Tool";
|
|
8
|
-
export * from "./graph/AgentGraph";
|
|
9
4
|
//# sourceMappingURL=claude.d.ts.map
|
package/dist/claude.js
CHANGED
|
@@ -16,14 +16,8 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
17
|
exports.anthropicTransformer = void 0;
|
|
18
18
|
// Claude Agent Entry Point
|
|
19
|
-
__exportStar(require("./
|
|
19
|
+
__exportStar(require("./core"), exports);
|
|
20
20
|
__exportStar(require("./agents/anthropic/ClaudeAgent"), exports);
|
|
21
|
-
__exportStar(require("./agents/model-types"), exports);
|
|
22
21
|
var transformers_1 = require("./history/transformers");
|
|
23
22
|
Object.defineProperty(exports, "anthropicTransformer", { enumerable: true, get: function () { return transformers_1.anthropicTransformer; } });
|
|
24
|
-
// Re-export core functionality
|
|
25
|
-
__exportStar(require("./history/History"), exports);
|
|
26
|
-
__exportStar(require("./history/types"), exports);
|
|
27
|
-
__exportStar(require("./tools/Tool"), exports);
|
|
28
|
-
__exportStar(require("./graph/AgentGraph"), exports);
|
|
29
23
|
//# sourceMappingURL=claude.js.map
|
package/dist/core.d.ts
CHANGED
package/dist/core.js
CHANGED
|
@@ -33,6 +33,8 @@ __exportStar(require("./tools/Tool"), exports);
|
|
|
33
33
|
__exportStar(require("./mcp"), exports);
|
|
34
34
|
// Visualization
|
|
35
35
|
__exportStar(require("./viz"), exports);
|
|
36
|
+
// Embeddings
|
|
37
|
+
__exportStar(require("./embeddings"), exports);
|
|
36
38
|
// Vector Store
|
|
37
39
|
__exportStar(require("./vectorstore"), exports);
|
|
38
40
|
// Chunkers
|
package/dist/gemini.d.ts
CHANGED
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
export * from "./
|
|
1
|
+
export * from "./core";
|
|
2
2
|
export { GeminiAgent } from "./agents/google/GeminiAgent";
|
|
3
|
-
export * from "./agents/model-types";
|
|
4
3
|
export { geminiTransformer } from "./history/transformers";
|
|
5
|
-
export * from "./history/History";
|
|
6
|
-
export * from "./history/types";
|
|
7
|
-
export * from "./tools/Tool";
|
|
8
|
-
export * from "./graph/AgentGraph";
|
|
9
4
|
//# sourceMappingURL=gemini.d.ts.map
|
package/dist/gemini.js
CHANGED
|
@@ -16,15 +16,9 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
17
|
exports.geminiTransformer = exports.GeminiAgent = void 0;
|
|
18
18
|
// Gemini Agent Entry Point
|
|
19
|
-
__exportStar(require("./
|
|
19
|
+
__exportStar(require("./core"), exports);
|
|
20
20
|
var GeminiAgent_1 = require("./agents/google/GeminiAgent");
|
|
21
21
|
Object.defineProperty(exports, "GeminiAgent", { enumerable: true, get: function () { return GeminiAgent_1.GeminiAgent; } });
|
|
22
|
-
__exportStar(require("./agents/model-types"), exports);
|
|
23
22
|
var transformers_1 = require("./history/transformers");
|
|
24
23
|
Object.defineProperty(exports, "geminiTransformer", { enumerable: true, get: function () { return transformers_1.geminiTransformer; } });
|
|
25
|
-
// Re-export core functionality
|
|
26
|
-
__exportStar(require("./history/History"), exports);
|
|
27
|
-
__exportStar(require("./history/types"), exports);
|
|
28
|
-
__exportStar(require("./tools/Tool"), exports);
|
|
29
|
-
__exportStar(require("./graph/AgentGraph"), exports);
|
|
30
24
|
//# sourceMappingURL=gemini.js.map
|
package/dist/index.d.ts
CHANGED
|
@@ -4,6 +4,9 @@ export { OpenAiAgent } from "./agents/openai/OpenAiAgent";
|
|
|
4
4
|
export { MistralAgent } from "./agents/mistral/MistralAgent";
|
|
5
5
|
export { GeminiAgent } from "./agents/google/GeminiAgent";
|
|
6
6
|
export * from "./agents/model-types";
|
|
7
|
+
export * from "./agents/AgentConfig";
|
|
8
|
+
export * from "./agents/AgentEvent";
|
|
9
|
+
export * from "./agents/errors/AgentError";
|
|
7
10
|
export * from "./history/History";
|
|
8
11
|
export * from "./history/types";
|
|
9
12
|
export { anthropicTransformer, openAiTransformer, mistralTransformer, geminiTransformer, } from "./history/transformers";
|
package/dist/index.js
CHANGED
|
@@ -33,6 +33,9 @@ Object.defineProperty(exports, "MistralAgent", { enumerable: true, get: function
|
|
|
33
33
|
var GeminiAgent_1 = require("./agents/google/GeminiAgent");
|
|
34
34
|
Object.defineProperty(exports, "GeminiAgent", { enumerable: true, get: function () { return GeminiAgent_1.GeminiAgent; } });
|
|
35
35
|
__exportStar(require("./agents/model-types"), exports);
|
|
36
|
+
__exportStar(require("./agents/AgentConfig"), exports);
|
|
37
|
+
__exportStar(require("./agents/AgentEvent"), exports);
|
|
38
|
+
__exportStar(require("./agents/errors/AgentError"), exports);
|
|
36
39
|
// History
|
|
37
40
|
__exportStar(require("./history/History"), exports);
|
|
38
41
|
__exportStar(require("./history/types"), exports);
|
package/dist/mistral.d.ts
CHANGED
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
export * from "./
|
|
1
|
+
export * from "./core";
|
|
2
2
|
export { MistralAgent } from "./agents/mistral/MistralAgent";
|
|
3
|
-
export * from "./agents/model-types";
|
|
4
3
|
export { mistralTransformer } from "./history/transformers";
|
|
5
|
-
export * from "./history/History";
|
|
6
|
-
export * from "./history/types";
|
|
7
|
-
export * from "./tools/Tool";
|
|
8
|
-
export * from "./graph/AgentGraph";
|
|
9
4
|
//# sourceMappingURL=mistral.d.ts.map
|
package/dist/mistral.js
CHANGED
|
@@ -16,15 +16,9 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
17
|
exports.mistralTransformer = exports.MistralAgent = void 0;
|
|
18
18
|
// Mistral Agent Entry Point
|
|
19
|
-
__exportStar(require("./
|
|
19
|
+
__exportStar(require("./core"), exports);
|
|
20
20
|
var MistralAgent_1 = require("./agents/mistral/MistralAgent");
|
|
21
21
|
Object.defineProperty(exports, "MistralAgent", { enumerable: true, get: function () { return MistralAgent_1.MistralAgent; } });
|
|
22
|
-
__exportStar(require("./agents/model-types"), exports);
|
|
23
22
|
var transformers_1 = require("./history/transformers");
|
|
24
23
|
Object.defineProperty(exports, "mistralTransformer", { enumerable: true, get: function () { return transformers_1.mistralTransformer; } });
|
|
25
|
-
// Re-export core functionality
|
|
26
|
-
__exportStar(require("./history/History"), exports);
|
|
27
|
-
__exportStar(require("./history/types"), exports);
|
|
28
|
-
__exportStar(require("./tools/Tool"), exports);
|
|
29
|
-
__exportStar(require("./graph/AgentGraph"), exports);
|
|
30
24
|
//# sourceMappingURL=mistral.js.map
|
package/dist/openai.d.ts
CHANGED
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
export * from "./
|
|
1
|
+
export * from "./core";
|
|
2
2
|
export { OpenAiAgent } from "./agents/openai/OpenAiAgent";
|
|
3
|
-
export * from "./agents/model-types";
|
|
4
3
|
export { openAiTransformer } from "./history/transformers";
|
|
5
|
-
export * from "./history/History";
|
|
6
|
-
export * from "./history/types";
|
|
7
|
-
export * from "./tools/Tool";
|
|
8
|
-
export * from "./graph/AgentGraph";
|
|
9
4
|
//# sourceMappingURL=openai.d.ts.map
|
package/dist/openai.js
CHANGED
|
@@ -16,15 +16,9 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
17
|
exports.openAiTransformer = exports.OpenAiAgent = void 0;
|
|
18
18
|
// OpenAI Agent Entry Point
|
|
19
|
-
__exportStar(require("./
|
|
19
|
+
__exportStar(require("./core"), exports);
|
|
20
20
|
var OpenAiAgent_1 = require("./agents/openai/OpenAiAgent");
|
|
21
21
|
Object.defineProperty(exports, "OpenAiAgent", { enumerable: true, get: function () { return OpenAiAgent_1.OpenAiAgent; } });
|
|
22
|
-
__exportStar(require("./agents/model-types"), exports);
|
|
23
22
|
var transformers_1 = require("./history/transformers");
|
|
24
23
|
Object.defineProperty(exports, "openAiTransformer", { enumerable: true, get: function () { return transformers_1.openAiTransformer; } });
|
|
25
|
-
// Re-export core functionality
|
|
26
|
-
__exportStar(require("./history/History"), exports);
|
|
27
|
-
__exportStar(require("./history/types"), exports);
|
|
28
|
-
__exportStar(require("./tools/Tool"), exports);
|
|
29
|
-
__exportStar(require("./graph/AgentGraph"), exports);
|
|
30
24
|
//# sourceMappingURL=openai.js.map
|
|
@@ -18,7 +18,7 @@ export type MetadataFieldType = "string" | "number" | "boolean";
|
|
|
18
18
|
* Definition for a metadata field that will be stored as a separate column.
|
|
19
19
|
*/
|
|
20
20
|
export interface MetadataFieldDefinition {
|
|
21
|
-
/** Name of the metadata field */
|
|
21
|
+
/** Name of the metadata field. Use snake_case (e.g. `tenant_id`) to avoid SQL filter issues. */
|
|
22
22
|
name: string;
|
|
23
23
|
/** Data type for the field */
|
|
24
24
|
type: MetadataFieldType;
|
|
@@ -42,73 +42,84 @@ export interface LanceDBVectorStoreConfig {
|
|
|
42
42
|
/** Additional connection options */
|
|
43
43
|
connectionOptions?: Partial<ConnectionOptions>;
|
|
44
44
|
/**
|
|
45
|
-
*
|
|
46
|
-
*
|
|
47
|
-
*
|
|
45
|
+
* User-defined metadata field definitions.
|
|
46
|
+
*
|
|
47
|
+
* When provided, these fields are stored as typed Arrow columns and are
|
|
48
|
+
* filterable via SQL predicates in `search()`. The table is created on
|
|
49
|
+
* first insert using an explicit Arrow schema built from these definitions.
|
|
50
|
+
*
|
|
51
|
+
* **Important:** Use `snake_case` for field names (e.g. `tenant_id`, not
|
|
52
|
+
* `tenantId`). LanceDB uses DataFusion for SQL filtering, which normalizes
|
|
53
|
+
* unquoted identifiers to lowercase. Mixed-case names like `tenantId` will
|
|
54
|
+
* fail to match the column `tenantId` because the filter resolves to
|
|
55
|
+
* `tenantid`.
|
|
56
|
+
*
|
|
57
|
+
* Chunk metadata fields (index, hash, prev_id, etc.) are handled
|
|
58
|
+
* automatically via a `chunk_metadata` struct column — they do not need
|
|
59
|
+
* to be listed here.
|
|
60
|
+
*
|
|
61
|
+
* When omitted, the store connects to a **pre-existing** table (created
|
|
62
|
+
* independently, e.g. via the LanceDB CLI or another tool). In that case
|
|
63
|
+
* the schema is not managed by this class and all non-system columns are
|
|
64
|
+
* returned as metadata on read.
|
|
48
65
|
*/
|
|
49
66
|
metadataFields?: MetadataFieldDefinition[];
|
|
50
67
|
}
|
|
51
68
|
/**
|
|
52
69
|
* LanceDB implementation of the VectorStore interface.
|
|
53
70
|
*
|
|
54
|
-
*
|
|
55
|
-
* ```typescript
|
|
56
|
-
* import { LanceDBVectorStore, OpenAIEmbeddings } from "@agentionai/agents";
|
|
71
|
+
* Supports two modes of operation:
|
|
57
72
|
*
|
|
58
|
-
*
|
|
59
|
-
*
|
|
60
|
-
*
|
|
61
|
-
*
|
|
62
|
-
*
|
|
63
|
-
* const store = await LanceDBVectorStore.create({
|
|
64
|
-
* name: "knowledge_base",
|
|
65
|
-
* uri: "./my-database",
|
|
66
|
-
* tableName: "documents",
|
|
67
|
-
* embeddings,
|
|
68
|
-
* });
|
|
73
|
+
* **Managed mode** (`metadataFields` provided): The store creates the LanceDB
|
|
74
|
+
* table on first insert using an explicit Arrow schema derived from
|
|
75
|
+
* `metadataFields`. User-defined fields are stored as typed top-level columns.
|
|
76
|
+
* Chunk metadata (from chunkers) is automatically packed into a `chunk_metadata`
|
|
77
|
+
* struct column.
|
|
69
78
|
*
|
|
70
|
-
*
|
|
71
|
-
*
|
|
72
|
-
*
|
|
73
|
-
*
|
|
74
|
-
* ]);
|
|
79
|
+
* **Pre-existing table mode** (`metadataFields` omitted): The store connects
|
|
80
|
+
* to a table that was created independently (e.g. via LanceDB CLI or another
|
|
81
|
+
* tool). No schema management is performed; all non-system columns are returned
|
|
82
|
+
* as metadata on read.
|
|
75
83
|
*
|
|
76
|
-
*
|
|
77
|
-
*
|
|
84
|
+
* @example Managed mode — user-defined metadata fields
|
|
85
|
+
* ```typescript
|
|
86
|
+
* import { LanceDBVectorStore } from "@agentionai/agents";
|
|
87
|
+
* import { OpenAIEmbeddings } from "@agentionai/agents/embeddings";
|
|
78
88
|
*
|
|
79
|
-
*
|
|
80
|
-
* const searchTool = store.toRetrievalTool("Search the knowledge base");
|
|
81
|
-
* ```
|
|
89
|
+
* const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-small" });
|
|
82
90
|
*
|
|
83
|
-
* @example With filterable metadata fields
|
|
84
|
-
* ```typescript
|
|
85
91
|
* const store = await LanceDBVectorStore.create({
|
|
86
92
|
* name: "knowledge_base",
|
|
87
93
|
* uri: "./my-database",
|
|
88
|
-
* tableName: "
|
|
94
|
+
* tableName: "chunks",
|
|
89
95
|
* embeddings,
|
|
90
96
|
* metadataFields: [
|
|
91
|
-
* { name: "
|
|
92
|
-
* { name: "
|
|
93
|
-
* { name: "year", type: "number" },
|
|
94
|
-
* { name: "verified", type: "boolean" },
|
|
95
|
-
* { name: "hash", type: "string" }, // Enables efficient deduplication
|
|
97
|
+
* { name: "author", type: "string", nullable: true },
|
|
98
|
+
* { name: "category", type: "string", nullable: true },
|
|
96
99
|
* ],
|
|
97
100
|
* });
|
|
98
101
|
*
|
|
99
|
-
* //
|
|
102
|
+
* // Chunk metadata (index, hash, prev_id, etc.) is stored automatically
|
|
103
|
+
* // in a chunk_metadata struct column — no need to declare it.
|
|
100
104
|
* await store.addDocuments([
|
|
101
|
-
* {
|
|
102
|
-
* id: "1",
|
|
103
|
-
* content: "LanceDB is a vector database",
|
|
104
|
-
* metadata: { category: "database", source: "docs", year: 2024, verified: true },
|
|
105
|
-
* },
|
|
105
|
+
* { id: "1", content: "LanceDB is a vector database", metadata: { category: "db" } },
|
|
106
106
|
* ]);
|
|
107
107
|
*
|
|
108
|
-
* // Search with filters on metadata columns
|
|
108
|
+
* // Search with filters on user metadata columns
|
|
109
109
|
* const results = await store.search("vector database", {
|
|
110
110
|
* limit: 5,
|
|
111
|
-
* filter: { category: "
|
|
111
|
+
* filter: { category: "db" },
|
|
112
|
+
* });
|
|
113
|
+
* ```
|
|
114
|
+
*
|
|
115
|
+
* @example Pre-existing table mode — connect to externally managed table
|
|
116
|
+
* ```typescript
|
|
117
|
+
* const store = await LanceDBVectorStore.create({
|
|
118
|
+
* name: "my_store",
|
|
119
|
+
* uri: "./my-database",
|
|
120
|
+
* tableName: "existing_table", // table already exists with its own schema
|
|
121
|
+
* embeddings,
|
|
122
|
+
* // metadataFields omitted — schema is not managed by this class
|
|
112
123
|
* });
|
|
113
124
|
* ```
|
|
114
125
|
*/
|
|
@@ -124,14 +135,25 @@ export declare class LanceDBVectorStore extends VectorStore {
|
|
|
124
135
|
/**
|
|
125
136
|
* Create a new LanceDBVectorStore instance.
|
|
126
137
|
*
|
|
127
|
-
*
|
|
138
|
+
* - If the table already exists it is opened immediately.
|
|
139
|
+
* - If `metadataFields` is provided and the table does not exist yet, it
|
|
140
|
+
* will be created on the first insert with an explicit Arrow schema.
|
|
141
|
+
* - If `metadataFields` is **not** provided and the table does not exist,
|
|
142
|
+
* an error is thrown — the store cannot manage an unknown schema.
|
|
128
143
|
*
|
|
129
144
|
* @param config - Configuration for the store
|
|
130
145
|
* @returns A configured LanceDBVectorStore instance
|
|
131
146
|
*
|
|
132
147
|
* @throws Error if @lancedb/lancedb is not installed
|
|
148
|
+
* @throws Error if the table does not exist and no metadataFields are provided
|
|
133
149
|
*/
|
|
134
150
|
static create(config: LanceDBVectorStoreConfig): Promise<LanceDBVectorStore>;
|
|
151
|
+
/**
|
|
152
|
+
* Create the table with an explicit Arrow schema derived from `metadataFields`
|
|
153
|
+
* plus a `chunk_metadata` struct column.
|
|
154
|
+
* Called on the first insert when operating in managed mode.
|
|
155
|
+
*/
|
|
156
|
+
private createManagedTable;
|
|
135
157
|
/**
|
|
136
158
|
* Add documents to the vector store.
|
|
137
159
|
* If an embeddings provider is configured, embeddings are generated automatically.
|
|
@@ -139,8 +161,29 @@ export declare class LanceDBVectorStore extends VectorStore {
|
|
|
139
161
|
addDocuments(documents: Document[], _options?: AddDocumentsOptions): Promise<string[]>;
|
|
140
162
|
/**
|
|
141
163
|
* Add documents with pre-computed embeddings.
|
|
164
|
+
*
|
|
165
|
+
* In managed mode, chunk metadata fields are packed into a `chunk_metadata`
|
|
166
|
+
* struct and user-defined fields are projected to their declared columns.
|
|
167
|
+
* The table is created on the first call; subsequent calls append directly.
|
|
168
|
+
*
|
|
169
|
+
* In pre-existing table mode all metadata is spread flat as-is.
|
|
142
170
|
*/
|
|
143
171
|
addEmbeddedDocuments(documents: EmbeddedDocument[], _options?: AddDocumentsOptions): Promise<string[]>;
|
|
172
|
+
/**
|
|
173
|
+
* Pack chunk metadata fields from flat metadata into a struct object.
|
|
174
|
+
* Returns a plain object for the `chunk_metadata` column, or null if
|
|
175
|
+
* no chunk metadata fields are present.
|
|
176
|
+
*/
|
|
177
|
+
private packChunkMetadata;
|
|
178
|
+
/**
|
|
179
|
+
* Unpack a chunk_metadata struct value back to flat metadata keys.
|
|
180
|
+
*/
|
|
181
|
+
private unpackChunkMetadata;
|
|
182
|
+
/**
|
|
183
|
+
* Project a record to only the columns declared in the schema
|
|
184
|
+
* (id, text, vector, chunk_metadata, plus all metadataFields).
|
|
185
|
+
*/
|
|
186
|
+
private projectToSchema;
|
|
144
187
|
/**
|
|
145
188
|
* Search for documents similar to the query.
|
|
146
189
|
*/
|
|
@@ -165,8 +208,9 @@ export declare class LanceDBVectorStore extends VectorStore {
|
|
|
165
208
|
* Get existing documents by their content hashes.
|
|
166
209
|
* Used for deduplication during ingestion.
|
|
167
210
|
*
|
|
168
|
-
*
|
|
169
|
-
*
|
|
211
|
+
* Requires that documents were stored with chunk metadata containing
|
|
212
|
+
* a `hash` field (automatically present when using chunkers from this library).
|
|
213
|
+
* Queries the `chunk_metadata.hash` struct sub-field.
|
|
170
214
|
*/
|
|
171
215
|
getByHashes(hashes: string[], _options?: DeleteOptions): Promise<Map<string, string>>;
|
|
172
216
|
/**
|
|
@@ -174,9 +218,9 @@ export declare class LanceDBVectorStore extends VectorStore {
|
|
|
174
218
|
*/
|
|
175
219
|
getConnection(): Connection;
|
|
176
220
|
/**
|
|
177
|
-
* Get the underlying LanceDB table.
|
|
221
|
+
* Get the underlying LanceDB table, or null if no data has been inserted yet.
|
|
178
222
|
*/
|
|
179
|
-
getTable(): Table;
|
|
223
|
+
getTable(): Table | null;
|
|
180
224
|
/**
|
|
181
225
|
* Get the configured embeddings provider.
|
|
182
226
|
*/
|
|
@@ -194,6 +238,10 @@ export declare class LanceDBVectorStore extends VectorStore {
|
|
|
194
238
|
* Optimize the table for better performance.
|
|
195
239
|
*/
|
|
196
240
|
optimize(): Promise<void>;
|
|
241
|
+
/**
|
|
242
|
+
* Get the configured metadata fields.
|
|
243
|
+
*/
|
|
244
|
+
getMetadataFields(): MetadataFieldDefinition[] | undefined;
|
|
197
245
|
/**
|
|
198
246
|
* Build a SQL filter string from a filter object.
|
|
199
247
|
*/
|
|
@@ -203,12 +251,12 @@ export declare class LanceDBVectorStore extends VectorStore {
|
|
|
203
251
|
*/
|
|
204
252
|
private processResults;
|
|
205
253
|
/**
|
|
206
|
-
* Extract metadata from a row
|
|
254
|
+
* Extract metadata from a row.
|
|
255
|
+
*
|
|
256
|
+
* In managed mode: returns user-defined fields plus unpacked chunk_metadata.
|
|
257
|
+
* In pre-existing table mode: returns all non-system columns, with
|
|
258
|
+
* chunk_metadata unpacked if present.
|
|
207
259
|
*/
|
|
208
260
|
private extractMetadata;
|
|
209
|
-
/**
|
|
210
|
-
* Get the configured metadata fields.
|
|
211
|
-
*/
|
|
212
|
-
getMetadataFields(): MetadataFieldDefinition[] | undefined;
|
|
213
261
|
}
|
|
214
262
|
//# sourceMappingURL=LanceDBVectorStore.d.ts.map
|
|
@@ -44,67 +44,71 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
44
44
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
45
|
exports.LanceDBVectorStore = void 0;
|
|
46
46
|
const VectorStore_1 = require("./VectorStore");
|
|
47
|
+
/**
|
|
48
|
+
* All known ChunkMetadata field names.
|
|
49
|
+
* Used to separate chunk metadata from user metadata when packing/unpacking.
|
|
50
|
+
*/
|
|
51
|
+
const CHUNK_METADATA_KEYS = [
|
|
52
|
+
"index", "total", "prev_id", "next_id",
|
|
53
|
+
"start", "end", "source_id", "source_path",
|
|
54
|
+
"char_count", "token_count", "hash", "section", "page",
|
|
55
|
+
];
|
|
56
|
+
const CHUNK_METADATA_KEY_SET = new Set(CHUNK_METADATA_KEYS);
|
|
47
57
|
/**
|
|
48
58
|
* LanceDB implementation of the VectorStore interface.
|
|
49
59
|
*
|
|
50
|
-
*
|
|
51
|
-
* ```typescript
|
|
52
|
-
* import { LanceDBVectorStore, OpenAIEmbeddings } from "@agentionai/agents";
|
|
60
|
+
* Supports two modes of operation:
|
|
53
61
|
*
|
|
54
|
-
*
|
|
55
|
-
*
|
|
56
|
-
*
|
|
57
|
-
*
|
|
62
|
+
* **Managed mode** (`metadataFields` provided): The store creates the LanceDB
|
|
63
|
+
* table on first insert using an explicit Arrow schema derived from
|
|
64
|
+
* `metadataFields`. User-defined fields are stored as typed top-level columns.
|
|
65
|
+
* Chunk metadata (from chunkers) is automatically packed into a `chunk_metadata`
|
|
66
|
+
* struct column.
|
|
58
67
|
*
|
|
59
|
-
*
|
|
60
|
-
*
|
|
61
|
-
*
|
|
62
|
-
*
|
|
63
|
-
* embeddings,
|
|
64
|
-
* });
|
|
68
|
+
* **Pre-existing table mode** (`metadataFields` omitted): The store connects
|
|
69
|
+
* to a table that was created independently (e.g. via LanceDB CLI or another
|
|
70
|
+
* tool). No schema management is performed; all non-system columns are returned
|
|
71
|
+
* as metadata on read.
|
|
65
72
|
*
|
|
66
|
-
*
|
|
67
|
-
*
|
|
68
|
-
*
|
|
69
|
-
*
|
|
70
|
-
* ]);
|
|
71
|
-
*
|
|
72
|
-
* // Search
|
|
73
|
-
* const results = await store.search("What is LanceDB?", { limit: 5 });
|
|
73
|
+
* @example Managed mode — user-defined metadata fields
|
|
74
|
+
* ```typescript
|
|
75
|
+
* import { LanceDBVectorStore } from "@agentionai/agents";
|
|
76
|
+
* import { OpenAIEmbeddings } from "@agentionai/agents/embeddings";
|
|
74
77
|
*
|
|
75
|
-
*
|
|
76
|
-
* const searchTool = store.toRetrievalTool("Search the knowledge base");
|
|
77
|
-
* ```
|
|
78
|
+
* const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-small" });
|
|
78
79
|
*
|
|
79
|
-
* @example With filterable metadata fields
|
|
80
|
-
* ```typescript
|
|
81
80
|
* const store = await LanceDBVectorStore.create({
|
|
82
81
|
* name: "knowledge_base",
|
|
83
82
|
* uri: "./my-database",
|
|
84
|
-
* tableName: "
|
|
83
|
+
* tableName: "chunks",
|
|
85
84
|
* embeddings,
|
|
86
85
|
* metadataFields: [
|
|
87
|
-
* { name: "
|
|
88
|
-
* { name: "
|
|
89
|
-
* { name: "year", type: "number" },
|
|
90
|
-
* { name: "verified", type: "boolean" },
|
|
91
|
-
* { name: "hash", type: "string" }, // Enables efficient deduplication
|
|
86
|
+
* { name: "author", type: "string", nullable: true },
|
|
87
|
+
* { name: "category", type: "string", nullable: true },
|
|
92
88
|
* ],
|
|
93
89
|
* });
|
|
94
90
|
*
|
|
95
|
-
* //
|
|
91
|
+
* // Chunk metadata (index, hash, prev_id, etc.) is stored automatically
|
|
92
|
+
* // in a chunk_metadata struct column — no need to declare it.
|
|
96
93
|
* await store.addDocuments([
|
|
97
|
-
* {
|
|
98
|
-
* id: "1",
|
|
99
|
-
* content: "LanceDB is a vector database",
|
|
100
|
-
* metadata: { category: "database", source: "docs", year: 2024, verified: true },
|
|
101
|
-
* },
|
|
94
|
+
* { id: "1", content: "LanceDB is a vector database", metadata: { category: "db" } },
|
|
102
95
|
* ]);
|
|
103
96
|
*
|
|
104
|
-
* // Search with filters on metadata columns
|
|
97
|
+
* // Search with filters on user metadata columns
|
|
105
98
|
* const results = await store.search("vector database", {
|
|
106
99
|
* limit: 5,
|
|
107
|
-
* filter: { category: "
|
|
100
|
+
* filter: { category: "db" },
|
|
101
|
+
* });
|
|
102
|
+
* ```
|
|
103
|
+
*
|
|
104
|
+
* @example Pre-existing table mode — connect to externally managed table
|
|
105
|
+
* ```typescript
|
|
106
|
+
* const store = await LanceDBVectorStore.create({
|
|
107
|
+
* name: "my_store",
|
|
108
|
+
* uri: "./my-database",
|
|
109
|
+
* tableName: "existing_table", // table already exists with its own schema
|
|
110
|
+
* embeddings,
|
|
111
|
+
* // metadataFields omitted — schema is not managed by this class
|
|
108
112
|
* });
|
|
109
113
|
* ```
|
|
110
114
|
*/
|
|
@@ -123,15 +127,19 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
123
127
|
/**
|
|
124
128
|
* Create a new LanceDBVectorStore instance.
|
|
125
129
|
*
|
|
126
|
-
*
|
|
130
|
+
* - If the table already exists it is opened immediately.
|
|
131
|
+
* - If `metadataFields` is provided and the table does not exist yet, it
|
|
132
|
+
* will be created on the first insert with an explicit Arrow schema.
|
|
133
|
+
* - If `metadataFields` is **not** provided and the table does not exist,
|
|
134
|
+
* an error is thrown — the store cannot manage an unknown schema.
|
|
127
135
|
*
|
|
128
136
|
* @param config - Configuration for the store
|
|
129
137
|
* @returns A configured LanceDBVectorStore instance
|
|
130
138
|
*
|
|
131
139
|
* @throws Error if @lancedb/lancedb is not installed
|
|
140
|
+
* @throws Error if the table does not exist and no metadataFields are provided
|
|
132
141
|
*/
|
|
133
142
|
static async create(config) {
|
|
134
|
-
// Dynamic import to make lancedb an optional dependency
|
|
135
143
|
let lancedb;
|
|
136
144
|
try {
|
|
137
145
|
lancedb = await Promise.resolve().then(() => __importStar(require("@lancedb/lancedb")));
|
|
@@ -141,55 +149,80 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
141
149
|
}
|
|
142
150
|
const connection = await lancedb.connect(config.uri, config.connectionOptions);
|
|
143
151
|
const tableNames = await connection.tableNames();
|
|
144
|
-
let table;
|
|
145
|
-
const dimensions = config.dimensions ?? config.embeddings?.dimensions ?? 1536;
|
|
152
|
+
let table = null;
|
|
146
153
|
if (tableNames.includes(config.tableName)) {
|
|
147
154
|
table = await connection.openTable(config.tableName);
|
|
148
155
|
}
|
|
149
|
-
else {
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
156
|
+
else if (!config.metadataFields) {
|
|
157
|
+
throw new Error(`Table "${config.tableName}" does not exist and no metadataFields were provided. ` +
|
|
158
|
+
`Either create the table independently or provide metadataFields so the store can create it on first insert.`);
|
|
159
|
+
}
|
|
160
|
+
// Table doesn't exist but metadataFields provided → will be created on first insert.
|
|
161
|
+
return new LanceDBVectorStore(config, connection, table);
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Create the table with an explicit Arrow schema derived from `metadataFields`
|
|
165
|
+
* plus a `chunk_metadata` struct column.
|
|
166
|
+
* Called on the first insert when operating in managed mode.
|
|
167
|
+
*/
|
|
168
|
+
async createManagedTable(records) {
|
|
169
|
+
let arrow;
|
|
170
|
+
try {
|
|
171
|
+
arrow = await Promise.resolve().then(() => __importStar(require("apache-arrow")));
|
|
172
|
+
}
|
|
173
|
+
catch {
|
|
174
|
+
throw new Error("apache-arrow is not installed. Install it with: npm install apache-arrow");
|
|
175
|
+
}
|
|
176
|
+
const schemaFields = [
|
|
177
|
+
new arrow.Field("id", new arrow.Utf8(), false),
|
|
178
|
+
new arrow.Field("text", new arrow.Utf8(), false),
|
|
179
|
+
new arrow.Field("vector", new arrow.FixedSizeList(this.dimensions, new arrow.Field("item", new arrow.Float32(), true)), false),
|
|
180
|
+
];
|
|
181
|
+
// Warn about non-snake_case field names (DataFusion normalizes SQL identifiers to lowercase)
|
|
182
|
+
for (const fieldDef of this.metadataFields) {
|
|
183
|
+
if (fieldDef.name !== fieldDef.name.toLowerCase()) {
|
|
184
|
+
console.warn(`[LanceDBVectorStore] Warning: metadata field "${fieldDef.name}" contains uppercase characters. ` +
|
|
185
|
+
`LanceDB uses DataFusion for SQL filtering, which normalizes unquoted identifiers to lowercase. ` +
|
|
186
|
+
`Use snake_case names (e.g. "${fieldDef.name.replace(/[A-Z]/g, (c) => "_" + c.toLowerCase()).replace(/^_/, "")}") to avoid filter issues.`);
|
|
154
187
|
}
|
|
155
|
-
|
|
156
|
-
|
|
188
|
+
}
|
|
189
|
+
// User-defined metadata columns
|
|
190
|
+
for (const fieldDef of this.metadataFields) {
|
|
191
|
+
const nullable = fieldDef.nullable !== false; // default true
|
|
192
|
+
let arrowType;
|
|
193
|
+
if (fieldDef.type === "number") {
|
|
194
|
+
arrowType = new arrow.Float64();
|
|
157
195
|
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
new arrow.Field("id", new arrow.Utf8(), false),
|
|
161
|
-
new arrow.Field("text", new arrow.Utf8(), false),
|
|
162
|
-
new arrow.Field("vector", new arrow.FixedSizeList(dimensions, new arrow.Field("item", new arrow.Float32(), true)), false),
|
|
163
|
-
];
|
|
164
|
-
// Add metadata fields - either as separate columns or as a JSON string
|
|
165
|
-
if (config.metadataFields && config.metadataFields.length > 0) {
|
|
166
|
-
for (const field of config.metadataFields) {
|
|
167
|
-
const nullable = field.nullable !== false;
|
|
168
|
-
let arrowType;
|
|
169
|
-
switch (field.type) {
|
|
170
|
-
case "string":
|
|
171
|
-
arrowType = new arrow.Utf8();
|
|
172
|
-
break;
|
|
173
|
-
case "number":
|
|
174
|
-
arrowType = new arrow.Float64();
|
|
175
|
-
break;
|
|
176
|
-
case "boolean":
|
|
177
|
-
arrowType = new arrow.Bool();
|
|
178
|
-
break;
|
|
179
|
-
default:
|
|
180
|
-
throw new Error(`Unsupported metadata field type: ${field.type}`);
|
|
181
|
-
}
|
|
182
|
-
schemaFields.push(new arrow.Field(field.name, arrowType, nullable));
|
|
183
|
-
}
|
|
196
|
+
else if (fieldDef.type === "boolean") {
|
|
197
|
+
arrowType = new arrow.Bool();
|
|
184
198
|
}
|
|
185
199
|
else {
|
|
186
|
-
|
|
187
|
-
schemaFields.push(new arrow.Field("metadata", new arrow.Utf8(), true));
|
|
200
|
+
arrowType = new arrow.Utf8();
|
|
188
201
|
}
|
|
189
|
-
|
|
190
|
-
table = await connection.createEmptyTable(config.tableName, schema);
|
|
202
|
+
schemaFields.push(new arrow.Field(fieldDef.name, arrowType, nullable));
|
|
191
203
|
}
|
|
192
|
-
|
|
204
|
+
// Chunk metadata struct column (always included, nullable for non-chunk docs)
|
|
205
|
+
schemaFields.push(new arrow.Field("chunk_metadata", new arrow.Struct([
|
|
206
|
+
new arrow.Field("index", new arrow.Float64(), true),
|
|
207
|
+
new arrow.Field("total", new arrow.Float64(), true),
|
|
208
|
+
new arrow.Field("prev_id", new arrow.Utf8(), true),
|
|
209
|
+
new arrow.Field("next_id", new arrow.Utf8(), true),
|
|
210
|
+
new arrow.Field("start", new arrow.Float64(), true),
|
|
211
|
+
new arrow.Field("end", new arrow.Float64(), true),
|
|
212
|
+
new arrow.Field("source_id", new arrow.Utf8(), true),
|
|
213
|
+
new arrow.Field("source_path", new arrow.Utf8(), true),
|
|
214
|
+
new arrow.Field("char_count", new arrow.Float64(), true),
|
|
215
|
+
new arrow.Field("token_count", new arrow.Float64(), true),
|
|
216
|
+
new arrow.Field("hash", new arrow.Utf8(), true),
|
|
217
|
+
new arrow.Field("section", new arrow.Utf8(), true),
|
|
218
|
+
new arrow.Field("page", new arrow.Float64(), true),
|
|
219
|
+
]), true // nullable — non-chunk documents get null
|
|
220
|
+
));
|
|
221
|
+
const schema = new arrow.Schema(schemaFields);
|
|
222
|
+
this.table = await this.connection.createTable(this.tableName, records, {
|
|
223
|
+
schema,
|
|
224
|
+
});
|
|
225
|
+
return this.table;
|
|
193
226
|
}
|
|
194
227
|
/**
|
|
195
228
|
* Add documents to the vector store.
|
|
@@ -199,10 +232,8 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
199
232
|
if (!this.embeddings) {
|
|
200
233
|
throw new Error("No embeddings provider configured. Use addEmbeddedDocuments() with pre-computed embeddings, or configure an embeddings provider.");
|
|
201
234
|
}
|
|
202
|
-
// Generate embeddings for all documents
|
|
203
235
|
const texts = documents.map((doc) => doc.content);
|
|
204
236
|
const vectors = await this.embeddings.embed(texts);
|
|
205
|
-
// Convert to embedded documents
|
|
206
237
|
const embeddedDocs = documents.map((doc, i) => ({
|
|
207
238
|
...doc,
|
|
208
239
|
embedding: vectors[i],
|
|
@@ -211,6 +242,12 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
211
242
|
}
|
|
212
243
|
/**
|
|
213
244
|
* Add documents with pre-computed embeddings.
|
|
245
|
+
*
|
|
246
|
+
* In managed mode, chunk metadata fields are packed into a `chunk_metadata`
|
|
247
|
+
* struct and user-defined fields are projected to their declared columns.
|
|
248
|
+
* The table is created on the first call; subsequent calls append directly.
|
|
249
|
+
*
|
|
250
|
+
* In pre-existing table mode all metadata is spread flat as-is.
|
|
214
251
|
*/
|
|
215
252
|
async addEmbeddedDocuments(documents, _options) {
|
|
216
253
|
const records = documents.map((doc) => {
|
|
@@ -218,23 +255,69 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
218
255
|
id: doc.id,
|
|
219
256
|
text: doc.content,
|
|
220
257
|
vector: doc.embedding,
|
|
258
|
+
...doc.metadata,
|
|
221
259
|
};
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
else {
|
|
230
|
-
// Legacy: store metadata as JSON string
|
|
231
|
-
record.metadata = doc.metadata ? JSON.stringify(doc.metadata) : undefined;
|
|
260
|
+
// In managed mode, pack chunk metadata into struct and project to schema
|
|
261
|
+
if (this.metadataFields) {
|
|
262
|
+
const packed = this.packChunkMetadata(doc.metadata ?? {});
|
|
263
|
+
record.chunk_metadata = packed;
|
|
264
|
+
return this.projectToSchema(record);
|
|
232
265
|
}
|
|
233
266
|
return record;
|
|
234
267
|
});
|
|
235
|
-
|
|
268
|
+
if (this.table) {
|
|
269
|
+
await this.table.add(records);
|
|
270
|
+
}
|
|
271
|
+
else {
|
|
272
|
+
// Managed mode: metadataFields must be present (enforced in create())
|
|
273
|
+
await this.createManagedTable(records);
|
|
274
|
+
}
|
|
236
275
|
return documents.map((d) => d.id);
|
|
237
276
|
}
|
|
277
|
+
/**
|
|
278
|
+
* Pack chunk metadata fields from flat metadata into a struct object.
|
|
279
|
+
* Returns a plain object for the `chunk_metadata` column, or null if
|
|
280
|
+
* no chunk metadata fields are present.
|
|
281
|
+
*/
|
|
282
|
+
packChunkMetadata(metadata) {
|
|
283
|
+
const struct = {};
|
|
284
|
+
let found = false;
|
|
285
|
+
for (const key of CHUNK_METADATA_KEYS) {
|
|
286
|
+
if (key in metadata) {
|
|
287
|
+
struct[key] = metadata[key] ?? null;
|
|
288
|
+
found = true;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
return found ? struct : null;
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* Unpack a chunk_metadata struct value back to flat metadata keys.
|
|
295
|
+
*/
|
|
296
|
+
unpackChunkMetadata(struct, target) {
|
|
297
|
+
for (const key of CHUNK_METADATA_KEYS) {
|
|
298
|
+
const value = struct[key];
|
|
299
|
+
if (value !== null && value !== undefined) {
|
|
300
|
+
target[key] = value;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Project a record to only the columns declared in the schema
|
|
306
|
+
* (id, text, vector, chunk_metadata, plus all metadataFields).
|
|
307
|
+
*/
|
|
308
|
+
projectToSchema(record) {
|
|
309
|
+
const projected = { id: record.id, text: record.text };
|
|
310
|
+
if (record.vector !== undefined) {
|
|
311
|
+
projected.vector = record.vector;
|
|
312
|
+
}
|
|
313
|
+
// User-defined metadata fields
|
|
314
|
+
for (const f of this.metadataFields) {
|
|
315
|
+
projected[f.name] = record[f.name] ?? null;
|
|
316
|
+
}
|
|
317
|
+
// Chunk metadata struct
|
|
318
|
+
projected.chunk_metadata = record.chunk_metadata ?? null;
|
|
319
|
+
return projected;
|
|
320
|
+
}
|
|
238
321
|
/**
|
|
239
322
|
* Search for documents similar to the query.
|
|
240
323
|
*/
|
|
@@ -249,6 +332,9 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
249
332
|
* Search using a pre-computed embedding vector.
|
|
250
333
|
*/
|
|
251
334
|
async searchByVector(embedding, options) {
|
|
335
|
+
if (!this.table) {
|
|
336
|
+
return [];
|
|
337
|
+
}
|
|
252
338
|
const limit = options?.limit ?? 10;
|
|
253
339
|
const scoreThreshold = options?.scoreThreshold;
|
|
254
340
|
let queryBuilder = this.table.vectorSearch(embedding).limit(limit);
|
|
@@ -265,6 +351,8 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
265
351
|
* Delete documents by their IDs.
|
|
266
352
|
*/
|
|
267
353
|
async delete(ids, _options) {
|
|
354
|
+
if (!this.table)
|
|
355
|
+
return 0;
|
|
268
356
|
const idList = ids.map((id) => `'${id}'`).join(", ");
|
|
269
357
|
const filter = `id IN (${idList})`;
|
|
270
358
|
const countBefore = await this.table.countRows();
|
|
@@ -276,12 +364,16 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
276
364
|
* Delete all documents.
|
|
277
365
|
*/
|
|
278
366
|
async clear(_options) {
|
|
367
|
+
if (!this.table)
|
|
368
|
+
return;
|
|
279
369
|
await this.table.delete("id IS NOT NULL");
|
|
280
370
|
}
|
|
281
371
|
/**
|
|
282
372
|
* Get a document by its ID.
|
|
283
373
|
*/
|
|
284
374
|
async getById(id, _options) {
|
|
375
|
+
if (!this.table)
|
|
376
|
+
return null;
|
|
285
377
|
const results = await this.table
|
|
286
378
|
.query()
|
|
287
379
|
.where(`id = '${id}'`)
|
|
@@ -301,34 +393,21 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
301
393
|
* Get existing documents by their content hashes.
|
|
302
394
|
* Used for deduplication during ingestion.
|
|
303
395
|
*
|
|
304
|
-
*
|
|
305
|
-
*
|
|
396
|
+
* Requires that documents were stored with chunk metadata containing
|
|
397
|
+
* a `hash` field (automatically present when using chunkers from this library).
|
|
398
|
+
* Queries the `chunk_metadata.hash` struct sub-field.
|
|
306
399
|
*/
|
|
307
400
|
async getByHashes(hashes, _options) {
|
|
308
401
|
const hashMap = new Map();
|
|
309
|
-
if (hashes.length === 0) {
|
|
402
|
+
if (hashes.length === 0 || !this.table) {
|
|
310
403
|
return hashMap;
|
|
311
404
|
}
|
|
312
|
-
// Check if hash is a defined metadata field for efficient queries
|
|
313
|
-
const hasHashField = this.metadataFields?.some((field) => field.name === "hash");
|
|
314
405
|
for (const hash of hashes) {
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
.where(`hash = '${hash}'`)
|
|
321
|
-
.limit(1)
|
|
322
|
-
.toArray();
|
|
323
|
-
}
|
|
324
|
-
else {
|
|
325
|
-
// Legacy: search for hash string in JSON metadata
|
|
326
|
-
results = await this.table
|
|
327
|
-
.query()
|
|
328
|
-
.where(`metadata LIKE '%${hash}%'`)
|
|
329
|
-
.limit(1)
|
|
330
|
-
.toArray();
|
|
331
|
-
}
|
|
406
|
+
const results = await this.table
|
|
407
|
+
.query()
|
|
408
|
+
.where(`chunk_metadata.hash = '${hash}'`)
|
|
409
|
+
.limit(1)
|
|
410
|
+
.toArray();
|
|
332
411
|
if (results.length > 0) {
|
|
333
412
|
const record = results[0];
|
|
334
413
|
hashMap.set(hash, record.id);
|
|
@@ -343,7 +422,7 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
343
422
|
return this.connection;
|
|
344
423
|
}
|
|
345
424
|
/**
|
|
346
|
-
* Get the underlying LanceDB table.
|
|
425
|
+
* Get the underlying LanceDB table, or null if no data has been inserted yet.
|
|
347
426
|
*/
|
|
348
427
|
getTable() {
|
|
349
428
|
return this.table;
|
|
@@ -365,14 +444,24 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
365
444
|
* Recommended for tables with more than 10,000 rows.
|
|
366
445
|
*/
|
|
367
446
|
async createIndex() {
|
|
447
|
+
if (!this.table)
|
|
448
|
+
throw new Error("Table not yet created — insert data first.");
|
|
368
449
|
await this.table.createIndex("vector");
|
|
369
450
|
}
|
|
370
451
|
/**
|
|
371
452
|
* Optimize the table for better performance.
|
|
372
453
|
*/
|
|
373
454
|
async optimize() {
|
|
455
|
+
if (!this.table)
|
|
456
|
+
return;
|
|
374
457
|
await this.table.optimize();
|
|
375
458
|
}
|
|
459
|
+
/**
|
|
460
|
+
* Get the configured metadata fields.
|
|
461
|
+
*/
|
|
462
|
+
getMetadataFields() {
|
|
463
|
+
return this.metadataFields;
|
|
464
|
+
}
|
|
376
465
|
/**
|
|
377
466
|
* Build a SQL filter string from a filter object.
|
|
378
467
|
*/
|
|
@@ -397,19 +486,16 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
397
486
|
processResults(results, scoreThreshold) {
|
|
398
487
|
const searchResults = [];
|
|
399
488
|
for (const row of results) {
|
|
400
|
-
// LanceDB returns _distance for vector search
|
|
401
489
|
const distance = row._distance ?? 0;
|
|
402
|
-
// Convert distance to similarity score (lower distance = higher similarity)
|
|
403
490
|
const score = 1 / (1 + distance);
|
|
404
491
|
if (scoreThreshold !== undefined && score < scoreThreshold) {
|
|
405
492
|
continue;
|
|
406
493
|
}
|
|
407
|
-
const metadata = this.extractMetadata(row);
|
|
408
494
|
searchResults.push({
|
|
409
495
|
document: {
|
|
410
496
|
id: row.id,
|
|
411
497
|
content: row.text,
|
|
412
|
-
metadata,
|
|
498
|
+
metadata: this.extractMetadata(row),
|
|
413
499
|
},
|
|
414
500
|
score,
|
|
415
501
|
});
|
|
@@ -417,13 +503,18 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
417
503
|
return searchResults;
|
|
418
504
|
}
|
|
419
505
|
/**
|
|
420
|
-
* Extract metadata from a row
|
|
506
|
+
* Extract metadata from a row.
|
|
507
|
+
*
|
|
508
|
+
* In managed mode: returns user-defined fields plus unpacked chunk_metadata.
|
|
509
|
+
* In pre-existing table mode: returns all non-system columns, with
|
|
510
|
+
* chunk_metadata unpacked if present.
|
|
421
511
|
*/
|
|
422
512
|
extractMetadata(row) {
|
|
513
|
+
const SYSTEM_COLS = new Set(["id", "text", "vector", "_distance", "chunk_metadata"]);
|
|
514
|
+
const metadata = {};
|
|
515
|
+
let hasValue = false;
|
|
423
516
|
if (this.metadataFields && this.metadataFields.length > 0) {
|
|
424
|
-
//
|
|
425
|
-
const metadata = {};
|
|
426
|
-
let hasValue = false;
|
|
517
|
+
// Managed mode: collect declared user fields
|
|
427
518
|
for (const field of this.metadataFields) {
|
|
428
519
|
const value = row[field.name];
|
|
429
520
|
if (value !== null && value !== undefined) {
|
|
@@ -431,18 +522,23 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
|
|
|
431
522
|
hasValue = true;
|
|
432
523
|
}
|
|
433
524
|
}
|
|
434
|
-
return hasValue ? metadata : undefined;
|
|
435
525
|
}
|
|
436
526
|
else {
|
|
437
|
-
//
|
|
438
|
-
|
|
527
|
+
// Pre-existing table mode: return all non-system columns
|
|
528
|
+
for (const [key, value] of Object.entries(row)) {
|
|
529
|
+
if (!SYSTEM_COLS.has(key) && value !== null && value !== undefined) {
|
|
530
|
+
metadata[key] = value;
|
|
531
|
+
hasValue = true;
|
|
532
|
+
}
|
|
533
|
+
}
|
|
439
534
|
}
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
535
|
+
// Unpack chunk_metadata struct if present
|
|
536
|
+
const chunkStruct = row.chunk_metadata;
|
|
537
|
+
if (chunkStruct && typeof chunkStruct === "object") {
|
|
538
|
+
this.unpackChunkMetadata(chunkStruct, metadata);
|
|
539
|
+
hasValue = true;
|
|
540
|
+
}
|
|
541
|
+
return hasValue ? metadata : undefined;
|
|
446
542
|
}
|
|
447
543
|
}
|
|
448
544
|
exports.LanceDBVectorStore = LanceDBVectorStore;
|
|
@@ -235,7 +235,7 @@ export declare abstract class VectorStore {
|
|
|
235
235
|
}>;
|
|
236
236
|
/**
|
|
237
237
|
* Create a tool that agents can use to retrieve a chunk by its ID.
|
|
238
|
-
* Useful for navigating chunk chains using
|
|
238
|
+
* Useful for navigating chunk chains using prev_id/next_id metadata.
|
|
239
239
|
*
|
|
240
240
|
* @param description - Description of what the tool does (e.g., "Get a specific chunk by ID to read adjacent context")
|
|
241
241
|
* @param options - Configuration options for the tool
|
|
@@ -245,7 +245,7 @@ export declare abstract class VectorStore {
|
|
|
245
245
|
* ```typescript
|
|
246
246
|
* const store = new LanceDBVectorStore({ ... });
|
|
247
247
|
* const tool = store.toGetChunkByIdTool(
|
|
248
|
-
* "Retrieve a specific chunk by ID. Use
|
|
248
|
+
* "Retrieve a specific chunk by ID. Use prev_id or next_id from search results to get surrounding context."
|
|
249
249
|
* );
|
|
250
250
|
* agent.addTools([tool]);
|
|
251
251
|
* ```
|
|
@@ -165,7 +165,7 @@ class VectorStore {
|
|
|
165
165
|
}
|
|
166
166
|
/**
|
|
167
167
|
* Create a tool that agents can use to retrieve a chunk by its ID.
|
|
168
|
-
* Useful for navigating chunk chains using
|
|
168
|
+
* Useful for navigating chunk chains using prev_id/next_id metadata.
|
|
169
169
|
*
|
|
170
170
|
* @param description - Description of what the tool does (e.g., "Get a specific chunk by ID to read adjacent context")
|
|
171
171
|
* @param options - Configuration options for the tool
|
|
@@ -175,7 +175,7 @@ class VectorStore {
|
|
|
175
175
|
* ```typescript
|
|
176
176
|
* const store = new LanceDBVectorStore({ ... });
|
|
177
177
|
* const tool = store.toGetChunkByIdTool(
|
|
178
|
-
* "Retrieve a specific chunk by ID. Use
|
|
178
|
+
* "Retrieve a specific chunk by ID. Use prev_id or next_id from search results to get surrounding context."
|
|
179
179
|
* );
|
|
180
180
|
* agent.addTools([tool]);
|
|
181
181
|
* ```
|
|
@@ -187,7 +187,7 @@ class VectorStore {
|
|
|
187
187
|
properties: {
|
|
188
188
|
id: {
|
|
189
189
|
type: "string",
|
|
190
|
-
description: "The chunk ID to retrieve (e.g., from
|
|
190
|
+
description: "The chunk ID to retrieve (e.g., from prev_id or next_id metadata)",
|
|
191
191
|
},
|
|
192
192
|
},
|
|
193
193
|
required: ["id"],
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentionai/agents",
|
|
3
3
|
"author": "Laurent Zuijdwijk",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.8.1-beta",
|
|
5
5
|
"description": "Agent Library",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
@@ -33,6 +33,26 @@
|
|
|
33
33
|
"./embeddings": {
|
|
34
34
|
"types": "./dist/embeddings/index.d.ts",
|
|
35
35
|
"default": "./dist/embeddings/index.js"
|
|
36
|
+
},
|
|
37
|
+
"./vectorstore": {
|
|
38
|
+
"types": "./dist/vectorstore/index.d.ts",
|
|
39
|
+
"default": "./dist/vectorstore/index.js"
|
|
40
|
+
},
|
|
41
|
+
"./mcp": {
|
|
42
|
+
"types": "./dist/mcp/index.d.ts",
|
|
43
|
+
"default": "./dist/mcp/index.js"
|
|
44
|
+
},
|
|
45
|
+
"./viz": {
|
|
46
|
+
"types": "./dist/viz/index.d.ts",
|
|
47
|
+
"default": "./dist/viz/index.js"
|
|
48
|
+
},
|
|
49
|
+
"./chunkers": {
|
|
50
|
+
"types": "./dist/chunkers/index.d.ts",
|
|
51
|
+
"default": "./dist/chunkers/index.js"
|
|
52
|
+
},
|
|
53
|
+
"./ingestion": {
|
|
54
|
+
"types": "./dist/ingestion/index.d.ts",
|
|
55
|
+
"default": "./dist/ingestion/index.js"
|
|
36
56
|
}
|
|
37
57
|
},
|
|
38
58
|
"files": [
|
|
@@ -54,8 +74,8 @@
|
|
|
54
74
|
"lint:fix": "eslint 'src/**/*.{js,ts}' --fix",
|
|
55
75
|
"format": "prettier --write 'src/**/*.{js,ts,json,md}'",
|
|
56
76
|
"prepare": "npm run build",
|
|
57
|
-
"example": "
|
|
58
|
-
"example:watch": "nodemon --watch examples --watch src --ext ts --exec 'ts-node' examples/index.ts",
|
|
77
|
+
"example": "tsx",
|
|
78
|
+
"example:watch": "nodemon --watch examples --watch src --ext ts --exec 'ts-node --project tsconfig.esm.json' examples/index.ts",
|
|
59
79
|
"docs": "npm run docs:api && npm run docs:site",
|
|
60
80
|
"docs:api": "typedoc",
|
|
61
81
|
"docs:site": "vitepress build docs",
|