@agentionai/agents 0.7.0 → 0.8.1-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,7 +29,7 @@ export declare abstract class Chunker {
29
29
  */
30
30
  protected computeHash(content: string): string;
31
31
  /**
32
- * Link chunks with previousChunkId and nextChunkId.
32
+ * Link chunks with prev_id and next_id.
33
33
  */
34
34
  protected linkChunks(chunks: Chunk[]): void;
35
35
  /**
@@ -53,17 +53,17 @@ class Chunker {
53
53
  }
54
54
  const id = this.generateId(content, i, options?.sourceId);
55
55
  const metadata = {
56
- chunkIndex: i,
57
- totalChunks: splits.length,
58
- previousChunkId: null, // Will be linked after
59
- nextChunkId: null, // Will be linked after
60
- startOffset,
61
- endOffset,
62
- sourceId: options?.sourceId,
63
- sourcePath: options?.sourcePath,
64
- charCount: content.length,
56
+ index: i,
57
+ total: splits.length,
58
+ prev_id: null, // Will be linked after
59
+ next_id: null, // Will be linked after
60
+ start: startOffset,
61
+ end: endOffset,
62
+ source_id: options?.sourceId,
63
+ source_path: options?.sourcePath,
64
+ char_count: content.length,
65
65
  hash: this.computeHash(content),
66
- sectionTitle: currentSection,
66
+ section: currentSection,
67
67
  ...options?.metadata,
68
68
  };
69
69
  chunks.push({ id, content, metadata });
@@ -71,9 +71,9 @@ class Chunker {
71
71
  }
72
72
  // Link chunks together
73
73
  this.linkChunks(chunks);
74
- // Update totalChunks now that we know the final count
74
+ // Update total now that we know the final count
75
75
  for (const chunk of chunks) {
76
- chunk.metadata.totalChunks = chunks.length;
76
+ chunk.metadata.total = chunks.length;
77
77
  }
78
78
  // Apply processor if provided
79
79
  if (this.config.chunkProcessor) {
@@ -100,15 +100,15 @@ class Chunker {
100
100
  return (0, crypto_1.createHash)("sha256").update(content).digest("hex");
101
101
  }
102
102
  /**
103
- * Link chunks with previousChunkId and nextChunkId.
103
+ * Link chunks with prev_id and next_id.
104
104
  */
105
105
  linkChunks(chunks) {
106
106
  for (let i = 0; i < chunks.length; i++) {
107
107
  if (i > 0) {
108
- chunks[i].metadata.previousChunkId = chunks[i - 1].id;
108
+ chunks[i].metadata.prev_id = chunks[i - 1].id;
109
109
  }
110
110
  if (i < chunks.length - 1) {
111
- chunks[i].metadata.nextChunkId = chunks[i + 1].id;
111
+ chunks[i].metadata.next_id = chunks[i + 1].id;
112
112
  }
113
113
  }
114
114
  }
@@ -128,11 +128,10 @@ class Chunker {
128
128
  }
129
129
  // Re-link after filtering and update indices
130
130
  for (let i = 0; i < processed.length; i++) {
131
- processed[i].metadata.chunkIndex = i;
132
- processed[i].metadata.totalChunks = processed.length;
133
- processed[i].metadata.previousChunkId =
134
- i > 0 ? processed[i - 1].id : null;
135
- processed[i].metadata.nextChunkId =
131
+ processed[i].metadata.index = i;
132
+ processed[i].metadata.total = processed.length;
133
+ processed[i].metadata.prev_id = i > 0 ? processed[i - 1].id : null;
134
+ processed[i].metadata.next_id =
136
135
  i < processed.length - 1 ? processed[i + 1].id : null;
137
136
  }
138
137
  return processed;
@@ -26,7 +26,7 @@ export declare function resetTokenxCache(): void;
26
26
  * });
27
27
  *
28
28
  * const chunks = await chunker.chunk(longDocument);
29
- * // Each chunk.metadata.tokenCount contains estimated tokens
29
+ * // Each chunk.metadata.token_count contains estimated tokens
30
30
  * ```
31
31
  */
32
32
  export declare class TokenChunker extends Chunker {
@@ -46,7 +46,6 @@ let tokenxModule = null;
46
46
  */
47
47
  async function loadTokenx() {
48
48
  if (!tokenxModule) {
49
- // Use dynamic import for ESM module
50
49
  tokenxModule = await Promise.resolve().then(() => __importStar(require("tokenx")));
51
50
  }
52
51
  return tokenxModule;
@@ -73,7 +72,7 @@ function resetTokenxCache() {
73
72
  * });
74
73
  *
75
74
  * const chunks = await chunker.chunk(longDocument);
76
- * // Each chunk.metadata.tokenCount contains estimated tokens
75
+ * // Each chunk.metadata.token_count contains estimated tokens
77
76
  * ```
78
77
  */
79
78
  class TokenChunker extends Chunker_1.Chunker {
@@ -160,7 +159,7 @@ class TokenChunker extends Chunker_1.Chunker {
160
159
  const { estimateTokenCount } = tokenx;
161
160
  // Add token count to each chunk's metadata
162
161
  for (const chunk of chunks) {
163
- chunk.metadata.tokenCount = estimateTokenCount(chunk.content);
162
+ chunk.metadata.token_count = estimateTokenCount(chunk.content);
164
163
  }
165
164
  return chunks;
166
165
  }
@@ -11,32 +11,38 @@ export interface Chunk {
11
11
  }
12
12
  /**
13
13
  * Metadata associated with each chunk.
14
+ *
15
+ * When stored in LanceDB via `LanceDBVectorStore`, these fields are
16
+ * automatically packed into a `chunk_metadata` struct column — they do
17
+ * not need to be declared in `metadataFields`.
14
18
  */
15
19
  export interface ChunkMetadata {
16
20
  /** Zero-based index of this chunk in the sequence */
17
- chunkIndex: number;
21
+ index: number;
18
22
  /** Total number of chunks in the sequence */
19
- totalChunks: number;
23
+ total: number;
20
24
  /** ID of the previous chunk, or null if first */
21
- previousChunkId: string | null;
25
+ prev_id: string | null;
22
26
  /** ID of the next chunk, or null if last */
23
- nextChunkId: string | null;
27
+ next_id: string | null;
24
28
  /** Character offset where this chunk starts in the source text */
25
- startOffset: number;
29
+ start: number;
26
30
  /** Character offset where this chunk ends in the source text */
27
- endOffset: number;
31
+ end: number;
28
32
  /** Optional identifier for the source document */
29
- sourceId?: string;
33
+ source_id?: string;
30
34
  /** Optional path to the source file */
31
- sourcePath?: string;
35
+ source_path?: string;
32
36
  /** Number of characters in the chunk content */
33
- charCount: number;
37
+ char_count: number;
34
38
  /** Estimated number of tokens (when available) */
35
- tokenCount?: number;
39
+ token_count?: number;
36
40
  /** SHA-256 hash of the content for deduplication */
37
41
  hash: string;
38
42
  /** Section title if detected (e.g., markdown headers) */
39
- sectionTitle?: string;
43
+ section?: string;
44
+ /** Page number in the source document (e.g., PDF page) */
45
+ page?: number;
40
46
  [key: string]: unknown;
41
47
  }
42
48
  /**
package/dist/claude.d.ts CHANGED
@@ -1,9 +1,4 @@
1
- export * from "./agents/BaseAgent";
1
+ export * from "./core";
2
2
  export * from "./agents/anthropic/ClaudeAgent";
3
- export * from "./agents/model-types";
4
3
  export { anthropicTransformer } from "./history/transformers";
5
- export * from "./history/History";
6
- export * from "./history/types";
7
- export * from "./tools/Tool";
8
- export * from "./graph/AgentGraph";
9
4
  //# sourceMappingURL=claude.d.ts.map
package/dist/claude.js CHANGED
@@ -16,14 +16,8 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
17
  exports.anthropicTransformer = void 0;
18
18
  // Claude Agent Entry Point
19
- __exportStar(require("./agents/BaseAgent"), exports);
19
+ __exportStar(require("./core"), exports);
20
20
  __exportStar(require("./agents/anthropic/ClaudeAgent"), exports);
21
- __exportStar(require("./agents/model-types"), exports);
22
21
  var transformers_1 = require("./history/transformers");
23
22
  Object.defineProperty(exports, "anthropicTransformer", { enumerable: true, get: function () { return transformers_1.anthropicTransformer; } });
24
- // Re-export core functionality
25
- __exportStar(require("./history/History"), exports);
26
- __exportStar(require("./history/types"), exports);
27
- __exportStar(require("./tools/Tool"), exports);
28
- __exportStar(require("./graph/AgentGraph"), exports);
29
23
  //# sourceMappingURL=claude.js.map
package/dist/core.d.ts CHANGED
@@ -9,6 +9,7 @@ export * from "./graph/AgentGraph";
9
9
  export * from "./tools/Tool";
10
10
  export * from "./mcp";
11
11
  export * from "./viz";
12
+ export * from "./embeddings";
12
13
  export * from "./vectorstore";
13
14
  export * from "./chunkers";
14
15
  export * from "./ingestion";
package/dist/core.js CHANGED
@@ -33,6 +33,8 @@ __exportStar(require("./tools/Tool"), exports);
33
33
  __exportStar(require("./mcp"), exports);
34
34
  // Visualization
35
35
  __exportStar(require("./viz"), exports);
36
+ // Embeddings
37
+ __exportStar(require("./embeddings"), exports);
36
38
  // Vector Store
37
39
  __exportStar(require("./vectorstore"), exports);
38
40
  // Chunkers
package/dist/gemini.d.ts CHANGED
@@ -1,9 +1,4 @@
1
- export * from "./agents/BaseAgent";
1
+ export * from "./core";
2
2
  export { GeminiAgent } from "./agents/google/GeminiAgent";
3
- export * from "./agents/model-types";
4
3
  export { geminiTransformer } from "./history/transformers";
5
- export * from "./history/History";
6
- export * from "./history/types";
7
- export * from "./tools/Tool";
8
- export * from "./graph/AgentGraph";
9
4
  //# sourceMappingURL=gemini.d.ts.map
package/dist/gemini.js CHANGED
@@ -16,15 +16,9 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
17
  exports.geminiTransformer = exports.GeminiAgent = void 0;
18
18
  // Gemini Agent Entry Point
19
- __exportStar(require("./agents/BaseAgent"), exports);
19
+ __exportStar(require("./core"), exports);
20
20
  var GeminiAgent_1 = require("./agents/google/GeminiAgent");
21
21
  Object.defineProperty(exports, "GeminiAgent", { enumerable: true, get: function () { return GeminiAgent_1.GeminiAgent; } });
22
- __exportStar(require("./agents/model-types"), exports);
23
22
  var transformers_1 = require("./history/transformers");
24
23
  Object.defineProperty(exports, "geminiTransformer", { enumerable: true, get: function () { return transformers_1.geminiTransformer; } });
25
- // Re-export core functionality
26
- __exportStar(require("./history/History"), exports);
27
- __exportStar(require("./history/types"), exports);
28
- __exportStar(require("./tools/Tool"), exports);
29
- __exportStar(require("./graph/AgentGraph"), exports);
30
24
  //# sourceMappingURL=gemini.js.map
package/dist/index.d.ts CHANGED
@@ -4,6 +4,9 @@ export { OpenAiAgent } from "./agents/openai/OpenAiAgent";
4
4
  export { MistralAgent } from "./agents/mistral/MistralAgent";
5
5
  export { GeminiAgent } from "./agents/google/GeminiAgent";
6
6
  export * from "./agents/model-types";
7
+ export * from "./agents/AgentConfig";
8
+ export * from "./agents/AgentEvent";
9
+ export * from "./agents/errors/AgentError";
7
10
  export * from "./history/History";
8
11
  export * from "./history/types";
9
12
  export { anthropicTransformer, openAiTransformer, mistralTransformer, geminiTransformer, } from "./history/transformers";
package/dist/index.js CHANGED
@@ -33,6 +33,9 @@ Object.defineProperty(exports, "MistralAgent", { enumerable: true, get: function
33
33
  var GeminiAgent_1 = require("./agents/google/GeminiAgent");
34
34
  Object.defineProperty(exports, "GeminiAgent", { enumerable: true, get: function () { return GeminiAgent_1.GeminiAgent; } });
35
35
  __exportStar(require("./agents/model-types"), exports);
36
+ __exportStar(require("./agents/AgentConfig"), exports);
37
+ __exportStar(require("./agents/AgentEvent"), exports);
38
+ __exportStar(require("./agents/errors/AgentError"), exports);
36
39
  // History
37
40
  __exportStar(require("./history/History"), exports);
38
41
  __exportStar(require("./history/types"), exports);
package/dist/mistral.d.ts CHANGED
@@ -1,9 +1,4 @@
1
- export * from "./agents/BaseAgent";
1
+ export * from "./core";
2
2
  export { MistralAgent } from "./agents/mistral/MistralAgent";
3
- export * from "./agents/model-types";
4
3
  export { mistralTransformer } from "./history/transformers";
5
- export * from "./history/History";
6
- export * from "./history/types";
7
- export * from "./tools/Tool";
8
- export * from "./graph/AgentGraph";
9
4
  //# sourceMappingURL=mistral.d.ts.map
package/dist/mistral.js CHANGED
@@ -16,15 +16,9 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
17
  exports.mistralTransformer = exports.MistralAgent = void 0;
18
18
  // Mistral Agent Entry Point
19
- __exportStar(require("./agents/BaseAgent"), exports);
19
+ __exportStar(require("./core"), exports);
20
20
  var MistralAgent_1 = require("./agents/mistral/MistralAgent");
21
21
  Object.defineProperty(exports, "MistralAgent", { enumerable: true, get: function () { return MistralAgent_1.MistralAgent; } });
22
- __exportStar(require("./agents/model-types"), exports);
23
22
  var transformers_1 = require("./history/transformers");
24
23
  Object.defineProperty(exports, "mistralTransformer", { enumerable: true, get: function () { return transformers_1.mistralTransformer; } });
25
- // Re-export core functionality
26
- __exportStar(require("./history/History"), exports);
27
- __exportStar(require("./history/types"), exports);
28
- __exportStar(require("./tools/Tool"), exports);
29
- __exportStar(require("./graph/AgentGraph"), exports);
30
24
  //# sourceMappingURL=mistral.js.map
package/dist/openai.d.ts CHANGED
@@ -1,9 +1,4 @@
1
- export * from "./agents/BaseAgent";
1
+ export * from "./core";
2
2
  export { OpenAiAgent } from "./agents/openai/OpenAiAgent";
3
- export * from "./agents/model-types";
4
3
  export { openAiTransformer } from "./history/transformers";
5
- export * from "./history/History";
6
- export * from "./history/types";
7
- export * from "./tools/Tool";
8
- export * from "./graph/AgentGraph";
9
4
  //# sourceMappingURL=openai.d.ts.map
package/dist/openai.js CHANGED
@@ -16,15 +16,9 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
17
  exports.openAiTransformer = exports.OpenAiAgent = void 0;
18
18
  // OpenAI Agent Entry Point
19
- __exportStar(require("./agents/BaseAgent"), exports);
19
+ __exportStar(require("./core"), exports);
20
20
  var OpenAiAgent_1 = require("./agents/openai/OpenAiAgent");
21
21
  Object.defineProperty(exports, "OpenAiAgent", { enumerable: true, get: function () { return OpenAiAgent_1.OpenAiAgent; } });
22
- __exportStar(require("./agents/model-types"), exports);
23
22
  var transformers_1 = require("./history/transformers");
24
23
  Object.defineProperty(exports, "openAiTransformer", { enumerable: true, get: function () { return transformers_1.openAiTransformer; } });
25
- // Re-export core functionality
26
- __exportStar(require("./history/History"), exports);
27
- __exportStar(require("./history/types"), exports);
28
- __exportStar(require("./tools/Tool"), exports);
29
- __exportStar(require("./graph/AgentGraph"), exports);
30
24
  //# sourceMappingURL=openai.js.map
@@ -18,7 +18,7 @@ export type MetadataFieldType = "string" | "number" | "boolean";
18
18
  * Definition for a metadata field that will be stored as a separate column.
19
19
  */
20
20
  export interface MetadataFieldDefinition {
21
- /** Name of the metadata field */
21
+ /** Name of the metadata field. Use snake_case (e.g. `tenant_id`) to avoid SQL filter issues. */
22
22
  name: string;
23
23
  /** Data type for the field */
24
24
  type: MetadataFieldType;
@@ -42,73 +42,84 @@ export interface LanceDBVectorStoreConfig {
42
42
  /** Additional connection options */
43
43
  connectionOptions?: Partial<ConnectionOptions>;
44
44
  /**
45
- * Metadata field definitions for filterable columns.
46
- * When specified, metadata fields are stored as separate columns enabling efficient filtering.
47
- * If not specified, metadata is stored as a JSON string (legacy behavior).
45
+ * User-defined metadata field definitions.
46
+ *
47
+ * When provided, these fields are stored as typed Arrow columns and are
48
+ * filterable via SQL predicates in `search()`. The table is created on
49
+ * first insert using an explicit Arrow schema built from these definitions.
50
+ *
51
+ * **Important:** Use `snake_case` for field names (e.g. `tenant_id`, not
52
+ * `tenantId`). LanceDB uses DataFusion for SQL filtering, which normalizes
53
+ * unquoted identifiers to lowercase. Mixed-case names like `tenantId` will
54
+ * fail to match the column `tenantId` because the filter resolves to
55
+ * `tenantid`.
56
+ *
57
+ * Chunk metadata fields (index, hash, prev_id, etc.) are handled
58
+ * automatically via a `chunk_metadata` struct column — they do not need
59
+ * to be listed here.
60
+ *
61
+ * When omitted, the store connects to a **pre-existing** table (created
62
+ * independently, e.g. via the LanceDB CLI or another tool). In that case
63
+ * the schema is not managed by this class and all non-system columns are
64
+ * returned as metadata on read.
48
65
  */
49
66
  metadataFields?: MetadataFieldDefinition[];
50
67
  }
51
68
  /**
52
69
  * LanceDB implementation of the VectorStore interface.
53
70
  *
54
- * @example Basic usage with JSON metadata (legacy)
55
- * ```typescript
56
- * import { LanceDBVectorStore, OpenAIEmbeddings } from "@agentionai/agents";
71
+ * Supports two modes of operation:
57
72
  *
58
- * // Create with OpenAI embeddings
59
- * const embeddings = new OpenAIEmbeddings({
60
- * model: "text-embedding-3-small",
61
- * });
62
- *
63
- * const store = await LanceDBVectorStore.create({
64
- * name: "knowledge_base",
65
- * uri: "./my-database",
66
- * tableName: "documents",
67
- * embeddings,
68
- * });
73
+ * **Managed mode** (`metadataFields` provided): The store creates the LanceDB
74
+ * table on first insert using an explicit Arrow schema derived from
75
+ * `metadataFields`. User-defined fields are stored as typed top-level columns.
76
+ * Chunk metadata (from chunkers) is automatically packed into a `chunk_metadata`
77
+ * struct column.
69
78
  *
70
- * // Add documents (embeddings generated automatically)
71
- * await store.addDocuments([
72
- * { id: "1", content: "LanceDB is a vector database" },
73
- * { id: "2", content: "Vector search enables semantic queries" },
74
- * ]);
79
+ * **Pre-existing table mode** (`metadataFields` omitted): The store connects
80
+ * to a table that was created independently (e.g. via LanceDB CLI or another
81
+ * tool). No schema management is performed; all non-system columns are returned
82
+ * as metadata on read.
75
83
  *
76
- * // Search
77
- * const results = await store.search("What is LanceDB?", { limit: 5 });
84
+ * @example Managed mode — user-defined metadata fields
85
+ * ```typescript
86
+ * import { LanceDBVectorStore } from "@agentionai/agents";
87
+ * import { OpenAIEmbeddings } from "@agentionai/agents/embeddings";
78
88
  *
79
- * // Create a tool for agents
80
- * const searchTool = store.toRetrievalTool("Search the knowledge base");
81
- * ```
89
+ * const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-small" });
82
90
  *
83
- * @example With filterable metadata fields
84
- * ```typescript
85
91
  * const store = await LanceDBVectorStore.create({
86
92
  * name: "knowledge_base",
87
93
  * uri: "./my-database",
88
- * tableName: "documents",
94
+ * tableName: "chunks",
89
95
  * embeddings,
90
96
  * metadataFields: [
91
- * { name: "category", type: "string" },
92
- * { name: "source", type: "string" },
93
- * { name: "year", type: "number" },
94
- * { name: "verified", type: "boolean" },
95
- * { name: "hash", type: "string" }, // Enables efficient deduplication
97
+ * { name: "author", type: "string", nullable: true },
98
+ * { name: "category", type: "string", nullable: true },
96
99
  * ],
97
100
  * });
98
101
  *
99
- * // Add documents with metadata
102
+ * // Chunk metadata (index, hash, prev_id, etc.) is stored automatically
103
+ * // in a chunk_metadata struct column — no need to declare it.
100
104
  * await store.addDocuments([
101
- * {
102
- * id: "1",
103
- * content: "LanceDB is a vector database",
104
- * metadata: { category: "database", source: "docs", year: 2024, verified: true },
105
- * },
105
+ * { id: "1", content: "LanceDB is a vector database", metadata: { category: "db" } },
106
106
  * ]);
107
107
  *
108
- * // Search with filters on metadata columns
108
+ * // Search with filters on user metadata columns
109
109
  * const results = await store.search("vector database", {
110
110
  * limit: 5,
111
- * filter: { category: "database", year: 2024 },
111
+ * filter: { category: "db" },
112
+ * });
113
+ * ```
114
+ *
115
+ * @example Pre-existing table mode — connect to externally managed table
116
+ * ```typescript
117
+ * const store = await LanceDBVectorStore.create({
118
+ * name: "my_store",
119
+ * uri: "./my-database",
120
+ * tableName: "existing_table", // table already exists with its own schema
121
+ * embeddings,
122
+ * // metadataFields omitted — schema is not managed by this class
112
123
  * });
113
124
  * ```
114
125
  */
@@ -124,14 +135,25 @@ export declare class LanceDBVectorStore extends VectorStore {
124
135
  /**
125
136
  * Create a new LanceDBVectorStore instance.
126
137
  *
127
- * This is an async factory method since LanceDB connection is asynchronous.
138
+ * - If the table already exists it is opened immediately.
139
+ * - If `metadataFields` is provided and the table does not exist yet, it
140
+ * will be created on the first insert with an explicit Arrow schema.
141
+ * - If `metadataFields` is **not** provided and the table does not exist,
142
+ * an error is thrown — the store cannot manage an unknown schema.
128
143
  *
129
144
  * @param config - Configuration for the store
130
145
  * @returns A configured LanceDBVectorStore instance
131
146
  *
132
147
  * @throws Error if @lancedb/lancedb is not installed
148
+ * @throws Error if the table does not exist and no metadataFields are provided
133
149
  */
134
150
  static create(config: LanceDBVectorStoreConfig): Promise<LanceDBVectorStore>;
151
+ /**
152
+ * Create the table with an explicit Arrow schema derived from `metadataFields`
153
+ * plus a `chunk_metadata` struct column.
154
+ * Called on the first insert when operating in managed mode.
155
+ */
156
+ private createManagedTable;
135
157
  /**
136
158
  * Add documents to the vector store.
137
159
  * If an embeddings provider is configured, embeddings are generated automatically.
@@ -139,8 +161,29 @@ export declare class LanceDBVectorStore extends VectorStore {
139
161
  addDocuments(documents: Document[], _options?: AddDocumentsOptions): Promise<string[]>;
140
162
  /**
141
163
  * Add documents with pre-computed embeddings.
164
+ *
165
+ * In managed mode, chunk metadata fields are packed into a `chunk_metadata`
166
+ * struct and user-defined fields are projected to their declared columns.
167
+ * The table is created on the first call; subsequent calls append directly.
168
+ *
169
+ * In pre-existing table mode all metadata is spread flat as-is.
142
170
  */
143
171
  addEmbeddedDocuments(documents: EmbeddedDocument[], _options?: AddDocumentsOptions): Promise<string[]>;
172
+ /**
173
+ * Pack chunk metadata fields from flat metadata into a struct object.
174
+ * Returns a plain object for the `chunk_metadata` column, or null if
175
+ * no chunk metadata fields are present.
176
+ */
177
+ private packChunkMetadata;
178
+ /**
179
+ * Unpack a chunk_metadata struct value back to flat metadata keys.
180
+ */
181
+ private unpackChunkMetadata;
182
+ /**
183
+ * Project a record to only the columns declared in the schema
184
+ * (id, text, vector, chunk_metadata, plus all metadataFields).
185
+ */
186
+ private projectToSchema;
144
187
  /**
145
188
  * Search for documents similar to the query.
146
189
  */
@@ -165,8 +208,9 @@ export declare class LanceDBVectorStore extends VectorStore {
165
208
  * Get existing documents by their content hashes.
166
209
  * Used for deduplication during ingestion.
167
210
  *
168
- * Note: If using metadataFields, include a "hash" field of type "string"
169
- * for efficient hash lookups. Otherwise, falls back to LIKE queries on JSON metadata.
211
+ * Requires that documents were stored with chunk metadata containing
212
+ * a `hash` field (automatically present when using chunkers from this library).
213
+ * Queries the `chunk_metadata.hash` struct sub-field.
170
214
  */
171
215
  getByHashes(hashes: string[], _options?: DeleteOptions): Promise<Map<string, string>>;
172
216
  /**
@@ -174,9 +218,9 @@ export declare class LanceDBVectorStore extends VectorStore {
174
218
  */
175
219
  getConnection(): Connection;
176
220
  /**
177
- * Get the underlying LanceDB table.
221
+ * Get the underlying LanceDB table, or null if no data has been inserted yet.
178
222
  */
179
- getTable(): Table;
223
+ getTable(): Table | null;
180
224
  /**
181
225
  * Get the configured embeddings provider.
182
226
  */
@@ -194,6 +238,10 @@ export declare class LanceDBVectorStore extends VectorStore {
194
238
  * Optimize the table for better performance.
195
239
  */
196
240
  optimize(): Promise<void>;
241
+ /**
242
+ * Get the configured metadata fields.
243
+ */
244
+ getMetadataFields(): MetadataFieldDefinition[] | undefined;
197
245
  /**
198
246
  * Build a SQL filter string from a filter object.
199
247
  */
@@ -203,12 +251,12 @@ export declare class LanceDBVectorStore extends VectorStore {
203
251
  */
204
252
  private processResults;
205
253
  /**
206
- * Extract metadata from a row based on metadataFields configuration.
254
+ * Extract metadata from a row.
255
+ *
256
+ * In managed mode: returns user-defined fields plus unpacked chunk_metadata.
257
+ * In pre-existing table mode: returns all non-system columns, with
258
+ * chunk_metadata unpacked if present.
207
259
  */
208
260
  private extractMetadata;
209
- /**
210
- * Get the configured metadata fields.
211
- */
212
- getMetadataFields(): MetadataFieldDefinition[] | undefined;
213
261
  }
214
262
  //# sourceMappingURL=LanceDBVectorStore.d.ts.map
@@ -44,67 +44,71 @@ var __importStar = (this && this.__importStar) || (function () {
44
44
  Object.defineProperty(exports, "__esModule", { value: true });
45
45
  exports.LanceDBVectorStore = void 0;
46
46
  const VectorStore_1 = require("./VectorStore");
47
+ /**
48
+ * All known ChunkMetadata field names.
49
+ * Used to separate chunk metadata from user metadata when packing/unpacking.
50
+ */
51
+ const CHUNK_METADATA_KEYS = [
52
+ "index", "total", "prev_id", "next_id",
53
+ "start", "end", "source_id", "source_path",
54
+ "char_count", "token_count", "hash", "section", "page",
55
+ ];
56
+ const CHUNK_METADATA_KEY_SET = new Set(CHUNK_METADATA_KEYS);
47
57
  /**
48
58
  * LanceDB implementation of the VectorStore interface.
49
59
  *
50
- * @example Basic usage with JSON metadata (legacy)
51
- * ```typescript
52
- * import { LanceDBVectorStore, OpenAIEmbeddings } from "@agentionai/agents";
60
+ * Supports two modes of operation:
53
61
  *
54
- * // Create with OpenAI embeddings
55
- * const embeddings = new OpenAIEmbeddings({
56
- * model: "text-embedding-3-small",
57
- * });
62
+ * **Managed mode** (`metadataFields` provided): The store creates the LanceDB
63
+ * table on first insert using an explicit Arrow schema derived from
64
+ * `metadataFields`. User-defined fields are stored as typed top-level columns.
65
+ * Chunk metadata (from chunkers) is automatically packed into a `chunk_metadata`
66
+ * struct column.
58
67
  *
59
- * const store = await LanceDBVectorStore.create({
60
- * name: "knowledge_base",
61
- * uri: "./my-database",
62
- * tableName: "documents",
63
- * embeddings,
64
- * });
68
+ * **Pre-existing table mode** (`metadataFields` omitted): The store connects
69
+ * to a table that was created independently (e.g. via LanceDB CLI or another
70
+ * tool). No schema management is performed; all non-system columns are returned
71
+ * as metadata on read.
65
72
  *
66
- * // Add documents (embeddings generated automatically)
67
- * await store.addDocuments([
68
- * { id: "1", content: "LanceDB is a vector database" },
69
- * { id: "2", content: "Vector search enables semantic queries" },
70
- * ]);
71
- *
72
- * // Search
73
- * const results = await store.search("What is LanceDB?", { limit: 5 });
73
+ * @example Managed mode user-defined metadata fields
74
+ * ```typescript
75
+ * import { LanceDBVectorStore } from "@agentionai/agents";
76
+ * import { OpenAIEmbeddings } from "@agentionai/agents/embeddings";
74
77
  *
75
- * // Create a tool for agents
76
- * const searchTool = store.toRetrievalTool("Search the knowledge base");
77
- * ```
78
+ * const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-small" });
78
79
  *
79
- * @example With filterable metadata fields
80
- * ```typescript
81
80
  * const store = await LanceDBVectorStore.create({
82
81
  * name: "knowledge_base",
83
82
  * uri: "./my-database",
84
- * tableName: "documents",
83
+ * tableName: "chunks",
85
84
  * embeddings,
86
85
  * metadataFields: [
87
- * { name: "category", type: "string" },
88
- * { name: "source", type: "string" },
89
- * { name: "year", type: "number" },
90
- * { name: "verified", type: "boolean" },
91
- * { name: "hash", type: "string" }, // Enables efficient deduplication
86
+ * { name: "author", type: "string", nullable: true },
87
+ * { name: "category", type: "string", nullable: true },
92
88
  * ],
93
89
  * });
94
90
  *
95
- * // Add documents with metadata
91
+ * // Chunk metadata (index, hash, prev_id, etc.) is stored automatically
92
+ * // in a chunk_metadata struct column — no need to declare it.
96
93
  * await store.addDocuments([
97
- * {
98
- * id: "1",
99
- * content: "LanceDB is a vector database",
100
- * metadata: { category: "database", source: "docs", year: 2024, verified: true },
101
- * },
94
+ * { id: "1", content: "LanceDB is a vector database", metadata: { category: "db" } },
102
95
  * ]);
103
96
  *
104
- * // Search with filters on metadata columns
97
+ * // Search with filters on user metadata columns
105
98
  * const results = await store.search("vector database", {
106
99
  * limit: 5,
107
- * filter: { category: "database", year: 2024 },
100
+ * filter: { category: "db" },
101
+ * });
102
+ * ```
103
+ *
104
+ * @example Pre-existing table mode — connect to externally managed table
105
+ * ```typescript
106
+ * const store = await LanceDBVectorStore.create({
107
+ * name: "my_store",
108
+ * uri: "./my-database",
109
+ * tableName: "existing_table", // table already exists with its own schema
110
+ * embeddings,
111
+ * // metadataFields omitted — schema is not managed by this class
108
112
  * });
109
113
  * ```
110
114
  */
@@ -123,15 +127,19 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
123
127
  /**
124
128
  * Create a new LanceDBVectorStore instance.
125
129
  *
126
- * This is an async factory method since LanceDB connection is asynchronous.
130
+ * - If the table already exists it is opened immediately.
131
+ * - If `metadataFields` is provided and the table does not exist yet, it
132
+ * will be created on the first insert with an explicit Arrow schema.
133
+ * - If `metadataFields` is **not** provided and the table does not exist,
134
+ * an error is thrown — the store cannot manage an unknown schema.
127
135
  *
128
136
  * @param config - Configuration for the store
129
137
  * @returns A configured LanceDBVectorStore instance
130
138
  *
131
139
  * @throws Error if @lancedb/lancedb is not installed
140
+ * @throws Error if the table does not exist and no metadataFields are provided
132
141
  */
133
142
  static async create(config) {
134
- // Dynamic import to make lancedb an optional dependency
135
143
  let lancedb;
136
144
  try {
137
145
  lancedb = await Promise.resolve().then(() => __importStar(require("@lancedb/lancedb")));
@@ -141,55 +149,80 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
141
149
  }
142
150
  const connection = await lancedb.connect(config.uri, config.connectionOptions);
143
151
  const tableNames = await connection.tableNames();
144
- let table;
145
- const dimensions = config.dimensions ?? config.embeddings?.dimensions ?? 1536;
152
+ let table = null;
146
153
  if (tableNames.includes(config.tableName)) {
147
154
  table = await connection.openTable(config.tableName);
148
155
  }
149
- else {
150
- // Create table with schema
151
- let arrow;
152
- try {
153
- arrow = await Promise.resolve().then(() => __importStar(require("apache-arrow")));
156
+ else if (!config.metadataFields) {
157
+ throw new Error(`Table "${config.tableName}" does not exist and no metadataFields were provided. ` +
158
+ `Either create the table independently or provide metadataFields so the store can create it on first insert.`);
159
+ }
160
+ // Table doesn't exist but metadataFields provided → will be created on first insert.
161
+ return new LanceDBVectorStore(config, connection, table);
162
+ }
163
+ /**
164
+ * Create the table with an explicit Arrow schema derived from `metadataFields`
165
+ * plus a `chunk_metadata` struct column.
166
+ * Called on the first insert when operating in managed mode.
167
+ */
168
+ async createManagedTable(records) {
169
+ let arrow;
170
+ try {
171
+ arrow = await Promise.resolve().then(() => __importStar(require("apache-arrow")));
172
+ }
173
+ catch {
174
+ throw new Error("apache-arrow is not installed. Install it with: npm install apache-arrow");
175
+ }
176
+ const schemaFields = [
177
+ new arrow.Field("id", new arrow.Utf8(), false),
178
+ new arrow.Field("text", new arrow.Utf8(), false),
179
+ new arrow.Field("vector", new arrow.FixedSizeList(this.dimensions, new arrow.Field("item", new arrow.Float32(), true)), false),
180
+ ];
181
+ // Warn about non-snake_case field names (DataFusion normalizes SQL identifiers to lowercase)
182
+ for (const fieldDef of this.metadataFields) {
183
+ if (fieldDef.name !== fieldDef.name.toLowerCase()) {
184
+ console.warn(`[LanceDBVectorStore] Warning: metadata field "${fieldDef.name}" contains uppercase characters. ` +
185
+ `LanceDB uses DataFusion for SQL filtering, which normalizes unquoted identifiers to lowercase. ` +
186
+ `Use snake_case names (e.g. "${fieldDef.name.replace(/[A-Z]/g, (c) => "_" + c.toLowerCase()).replace(/^_/, "")}") to avoid filter issues.`);
154
187
  }
155
- catch {
156
- throw new Error("apache-arrow is not installed. Install it with: npm install apache-arrow");
188
+ }
189
+ // User-defined metadata columns
190
+ for (const fieldDef of this.metadataFields) {
191
+ const nullable = fieldDef.nullable !== false; // default true
192
+ let arrowType;
193
+ if (fieldDef.type === "number") {
194
+ arrowType = new arrow.Float64();
157
195
  }
158
- // Build schema fields - use explicit type to allow different Field types
159
- const schemaFields = [
160
- new arrow.Field("id", new arrow.Utf8(), false),
161
- new arrow.Field("text", new arrow.Utf8(), false),
162
- new arrow.Field("vector", new arrow.FixedSizeList(dimensions, new arrow.Field("item", new arrow.Float32(), true)), false),
163
- ];
164
- // Add metadata fields - either as separate columns or as a JSON string
165
- if (config.metadataFields && config.metadataFields.length > 0) {
166
- for (const field of config.metadataFields) {
167
- const nullable = field.nullable !== false;
168
- let arrowType;
169
- switch (field.type) {
170
- case "string":
171
- arrowType = new arrow.Utf8();
172
- break;
173
- case "number":
174
- arrowType = new arrow.Float64();
175
- break;
176
- case "boolean":
177
- arrowType = new arrow.Bool();
178
- break;
179
- default:
180
- throw new Error(`Unsupported metadata field type: ${field.type}`);
181
- }
182
- schemaFields.push(new arrow.Field(field.name, arrowType, nullable));
183
- }
196
+ else if (fieldDef.type === "boolean") {
197
+ arrowType = new arrow.Bool();
184
198
  }
185
199
  else {
186
- // Legacy: store metadata as JSON string
187
- schemaFields.push(new arrow.Field("metadata", new arrow.Utf8(), true));
200
+ arrowType = new arrow.Utf8();
188
201
  }
189
- const schema = new arrow.Schema(schemaFields);
190
- table = await connection.createEmptyTable(config.tableName, schema);
202
+ schemaFields.push(new arrow.Field(fieldDef.name, arrowType, nullable));
191
203
  }
192
- return new LanceDBVectorStore(config, connection, table);
204
+ // Chunk metadata struct column (always included, nullable for non-chunk docs)
205
+ schemaFields.push(new arrow.Field("chunk_metadata", new arrow.Struct([
206
+ new arrow.Field("index", new arrow.Float64(), true),
207
+ new arrow.Field("total", new arrow.Float64(), true),
208
+ new arrow.Field("prev_id", new arrow.Utf8(), true),
209
+ new arrow.Field("next_id", new arrow.Utf8(), true),
210
+ new arrow.Field("start", new arrow.Float64(), true),
211
+ new arrow.Field("end", new arrow.Float64(), true),
212
+ new arrow.Field("source_id", new arrow.Utf8(), true),
213
+ new arrow.Field("source_path", new arrow.Utf8(), true),
214
+ new arrow.Field("char_count", new arrow.Float64(), true),
215
+ new arrow.Field("token_count", new arrow.Float64(), true),
216
+ new arrow.Field("hash", new arrow.Utf8(), true),
217
+ new arrow.Field("section", new arrow.Utf8(), true),
218
+ new arrow.Field("page", new arrow.Float64(), true),
219
+ ]), true // nullable — non-chunk documents get null
220
+ ));
221
+ const schema = new arrow.Schema(schemaFields);
222
+ this.table = await this.connection.createTable(this.tableName, records, {
223
+ schema,
224
+ });
225
+ return this.table;
193
226
  }
194
227
  /**
195
228
  * Add documents to the vector store.
@@ -199,10 +232,8 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
199
232
  if (!this.embeddings) {
200
233
  throw new Error("No embeddings provider configured. Use addEmbeddedDocuments() with pre-computed embeddings, or configure an embeddings provider.");
201
234
  }
202
- // Generate embeddings for all documents
203
235
  const texts = documents.map((doc) => doc.content);
204
236
  const vectors = await this.embeddings.embed(texts);
205
- // Convert to embedded documents
206
237
  const embeddedDocs = documents.map((doc, i) => ({
207
238
  ...doc,
208
239
  embedding: vectors[i],
@@ -211,6 +242,12 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
211
242
  }
212
243
  /**
213
244
  * Add documents with pre-computed embeddings.
245
+ *
246
+ * In managed mode, chunk metadata fields are packed into a `chunk_metadata`
247
+ * struct and user-defined fields are projected to their declared columns.
248
+ * The table is created on the first call; subsequent calls append directly.
249
+ *
250
+ * In pre-existing table mode all metadata is spread flat as-is.
214
251
  */
215
252
  async addEmbeddedDocuments(documents, _options) {
216
253
  const records = documents.map((doc) => {
@@ -218,23 +255,69 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
218
255
  id: doc.id,
219
256
  text: doc.content,
220
257
  vector: doc.embedding,
258
+ ...doc.metadata,
221
259
  };
222
- if (this.metadataFields && this.metadataFields.length > 0) {
223
- // Store each metadata field as a separate column
224
- for (const field of this.metadataFields) {
225
- const value = doc.metadata?.[field.name];
226
- record[field.name] = value !== undefined ? value : null;
227
- }
228
- }
229
- else {
230
- // Legacy: store metadata as JSON string
231
- record.metadata = doc.metadata ? JSON.stringify(doc.metadata) : undefined;
260
+ // In managed mode, pack chunk metadata into struct and project to schema
261
+ if (this.metadataFields) {
262
+ const packed = this.packChunkMetadata(doc.metadata ?? {});
263
+ record.chunk_metadata = packed;
264
+ return this.projectToSchema(record);
232
265
  }
233
266
  return record;
234
267
  });
235
- await this.table.add(records);
268
+ if (this.table) {
269
+ await this.table.add(records);
270
+ }
271
+ else {
272
+ // Managed mode: metadataFields must be present (enforced in create())
273
+ await this.createManagedTable(records);
274
+ }
236
275
  return documents.map((d) => d.id);
237
276
  }
277
+ /**
278
+ * Pack chunk metadata fields from flat metadata into a struct object.
279
+ * Returns a plain object for the `chunk_metadata` column, or null if
280
+ * no chunk metadata fields are present.
281
+ */
282
+ packChunkMetadata(metadata) {
283
+ const struct = {};
284
+ let found = false;
285
+ for (const key of CHUNK_METADATA_KEYS) {
286
+ if (key in metadata) {
287
+ struct[key] = metadata[key] ?? null;
288
+ found = true;
289
+ }
290
+ }
291
+ return found ? struct : null;
292
+ }
293
+ /**
294
+ * Unpack a chunk_metadata struct value back to flat metadata keys.
295
+ */
296
+ unpackChunkMetadata(struct, target) {
297
+ for (const key of CHUNK_METADATA_KEYS) {
298
+ const value = struct[key];
299
+ if (value !== null && value !== undefined) {
300
+ target[key] = value;
301
+ }
302
+ }
303
+ }
304
+ /**
305
+ * Project a record to only the columns declared in the schema
306
+ * (id, text, vector, chunk_metadata, plus all metadataFields).
307
+ */
308
+ projectToSchema(record) {
309
+ const projected = { id: record.id, text: record.text };
310
+ if (record.vector !== undefined) {
311
+ projected.vector = record.vector;
312
+ }
313
+ // User-defined metadata fields
314
+ for (const f of this.metadataFields) {
315
+ projected[f.name] = record[f.name] ?? null;
316
+ }
317
+ // Chunk metadata struct
318
+ projected.chunk_metadata = record.chunk_metadata ?? null;
319
+ return projected;
320
+ }
238
321
  /**
239
322
  * Search for documents similar to the query.
240
323
  */
@@ -249,6 +332,9 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
249
332
  * Search using a pre-computed embedding vector.
250
333
  */
251
334
  async searchByVector(embedding, options) {
335
+ if (!this.table) {
336
+ return [];
337
+ }
252
338
  const limit = options?.limit ?? 10;
253
339
  const scoreThreshold = options?.scoreThreshold;
254
340
  let queryBuilder = this.table.vectorSearch(embedding).limit(limit);
@@ -265,6 +351,8 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
265
351
  * Delete documents by their IDs.
266
352
  */
267
353
  async delete(ids, _options) {
354
+ if (!this.table)
355
+ return 0;
268
356
  const idList = ids.map((id) => `'${id}'`).join(", ");
269
357
  const filter = `id IN (${idList})`;
270
358
  const countBefore = await this.table.countRows();
@@ -276,12 +364,16 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
276
364
  * Delete all documents.
277
365
  */
278
366
  async clear(_options) {
367
+ if (!this.table)
368
+ return;
279
369
  await this.table.delete("id IS NOT NULL");
280
370
  }
281
371
  /**
282
372
  * Get a document by its ID.
283
373
  */
284
374
  async getById(id, _options) {
375
+ if (!this.table)
376
+ return null;
285
377
  const results = await this.table
286
378
  .query()
287
379
  .where(`id = '${id}'`)
@@ -301,34 +393,21 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
301
393
  * Get existing documents by their content hashes.
302
394
  * Used for deduplication during ingestion.
303
395
  *
304
- * Note: If using metadataFields, include a "hash" field of type "string"
305
- * for efficient hash lookups. Otherwise, falls back to LIKE queries on JSON metadata.
396
+ * Requires that documents were stored with chunk metadata containing
397
+ * a `hash` field (automatically present when using chunkers from this library).
398
+ * Queries the `chunk_metadata.hash` struct sub-field.
306
399
  */
307
400
  async getByHashes(hashes, _options) {
308
401
  const hashMap = new Map();
309
- if (hashes.length === 0) {
402
+ if (hashes.length === 0 || !this.table) {
310
403
  return hashMap;
311
404
  }
312
- // Check if hash is a defined metadata field for efficient queries
313
- const hasHashField = this.metadataFields?.some((field) => field.name === "hash");
314
405
  for (const hash of hashes) {
315
- let results;
316
- if (hasHashField) {
317
- // Efficient direct column query
318
- results = await this.table
319
- .query()
320
- .where(`hash = '${hash}'`)
321
- .limit(1)
322
- .toArray();
323
- }
324
- else {
325
- // Legacy: search for hash string in JSON metadata
326
- results = await this.table
327
- .query()
328
- .where(`metadata LIKE '%${hash}%'`)
329
- .limit(1)
330
- .toArray();
331
- }
406
+ const results = await this.table
407
+ .query()
408
+ .where(`chunk_metadata.hash = '${hash}'`)
409
+ .limit(1)
410
+ .toArray();
332
411
  if (results.length > 0) {
333
412
  const record = results[0];
334
413
  hashMap.set(hash, record.id);
@@ -343,7 +422,7 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
343
422
  return this.connection;
344
423
  }
345
424
  /**
346
- * Get the underlying LanceDB table.
425
+ * Get the underlying LanceDB table, or null if no data has been inserted yet.
347
426
  */
348
427
  getTable() {
349
428
  return this.table;
@@ -365,14 +444,24 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
365
444
  * Recommended for tables with more than 10,000 rows.
366
445
  */
367
446
  async createIndex() {
447
+ if (!this.table)
448
+ throw new Error("Table not yet created — insert data first.");
368
449
  await this.table.createIndex("vector");
369
450
  }
370
451
  /**
371
452
  * Optimize the table for better performance.
372
453
  */
373
454
  async optimize() {
455
+ if (!this.table)
456
+ return;
374
457
  await this.table.optimize();
375
458
  }
459
+ /**
460
+ * Get the configured metadata fields.
461
+ */
462
+ getMetadataFields() {
463
+ return this.metadataFields;
464
+ }
376
465
  /**
377
466
  * Build a SQL filter string from a filter object.
378
467
  */
@@ -397,19 +486,16 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
397
486
  processResults(results, scoreThreshold) {
398
487
  const searchResults = [];
399
488
  for (const row of results) {
400
- // LanceDB returns _distance for vector search
401
489
  const distance = row._distance ?? 0;
402
- // Convert distance to similarity score (lower distance = higher similarity)
403
490
  const score = 1 / (1 + distance);
404
491
  if (scoreThreshold !== undefined && score < scoreThreshold) {
405
492
  continue;
406
493
  }
407
- const metadata = this.extractMetadata(row);
408
494
  searchResults.push({
409
495
  document: {
410
496
  id: row.id,
411
497
  content: row.text,
412
- metadata,
498
+ metadata: this.extractMetadata(row),
413
499
  },
414
500
  score,
415
501
  });
@@ -417,13 +503,18 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
417
503
  return searchResults;
418
504
  }
419
505
  /**
420
- * Extract metadata from a row based on metadataFields configuration.
506
+ * Extract metadata from a row.
507
+ *
508
+ * In managed mode: returns user-defined fields plus unpacked chunk_metadata.
509
+ * In pre-existing table mode: returns all non-system columns, with
510
+ * chunk_metadata unpacked if present.
421
511
  */
422
512
  extractMetadata(row) {
513
+ const SYSTEM_COLS = new Set(["id", "text", "vector", "_distance", "chunk_metadata"]);
514
+ const metadata = {};
515
+ let hasValue = false;
423
516
  if (this.metadataFields && this.metadataFields.length > 0) {
424
- // Reconstruct metadata from separate columns
425
- const metadata = {};
426
- let hasValue = false;
517
+ // Managed mode: collect declared user fields
427
518
  for (const field of this.metadataFields) {
428
519
  const value = row[field.name];
429
520
  if (value !== null && value !== undefined) {
@@ -431,18 +522,23 @@ class LanceDBVectorStore extends VectorStore_1.VectorStore {
431
522
  hasValue = true;
432
523
  }
433
524
  }
434
- return hasValue ? metadata : undefined;
435
525
  }
436
526
  else {
437
- // Legacy: parse metadata from JSON string
438
- return row.metadata ? JSON.parse(row.metadata) : undefined;
527
+ // Pre-existing table mode: return all non-system columns
528
+ for (const [key, value] of Object.entries(row)) {
529
+ if (!SYSTEM_COLS.has(key) && value !== null && value !== undefined) {
530
+ metadata[key] = value;
531
+ hasValue = true;
532
+ }
533
+ }
439
534
  }
440
- }
441
- /**
442
- * Get the configured metadata fields.
443
- */
444
- getMetadataFields() {
445
- return this.metadataFields;
535
+ // Unpack chunk_metadata struct if present
536
+ const chunkStruct = row.chunk_metadata;
537
+ if (chunkStruct && typeof chunkStruct === "object") {
538
+ this.unpackChunkMetadata(chunkStruct, metadata);
539
+ hasValue = true;
540
+ }
541
+ return hasValue ? metadata : undefined;
446
542
  }
447
543
  }
448
544
  exports.LanceDBVectorStore = LanceDBVectorStore;
@@ -235,7 +235,7 @@ export declare abstract class VectorStore {
235
235
  }>;
236
236
  /**
237
237
  * Create a tool that agents can use to retrieve a chunk by its ID.
238
- * Useful for navigating chunk chains using previousChunkId/nextChunkId metadata.
238
+ * Useful for navigating chunk chains using prev_id/next_id metadata.
239
239
  *
240
240
  * @param description - Description of what the tool does (e.g., "Get a specific chunk by ID to read adjacent context")
241
241
  * @param options - Configuration options for the tool
@@ -245,7 +245,7 @@ export declare abstract class VectorStore {
245
245
  * ```typescript
246
246
  * const store = new LanceDBVectorStore({ ... });
247
247
  * const tool = store.toGetChunkByIdTool(
248
- * "Retrieve a specific chunk by ID. Use previousChunkId or nextChunkId from search results to get surrounding context."
248
+ * "Retrieve a specific chunk by ID. Use prev_id or next_id from search results to get surrounding context."
249
249
  * );
250
250
  * agent.addTools([tool]);
251
251
  * ```
@@ -165,7 +165,7 @@ class VectorStore {
165
165
  }
166
166
  /**
167
167
  * Create a tool that agents can use to retrieve a chunk by its ID.
168
- * Useful for navigating chunk chains using previousChunkId/nextChunkId metadata.
168
+ * Useful for navigating chunk chains using prev_id/next_id metadata.
169
169
  *
170
170
  * @param description - Description of what the tool does (e.g., "Get a specific chunk by ID to read adjacent context")
171
171
  * @param options - Configuration options for the tool
@@ -175,7 +175,7 @@ class VectorStore {
175
175
  * ```typescript
176
176
  * const store = new LanceDBVectorStore({ ... });
177
177
  * const tool = store.toGetChunkByIdTool(
178
- * "Retrieve a specific chunk by ID. Use previousChunkId or nextChunkId from search results to get surrounding context."
178
+ * "Retrieve a specific chunk by ID. Use prev_id or next_id from search results to get surrounding context."
179
179
  * );
180
180
  * agent.addTools([tool]);
181
181
  * ```
@@ -187,7 +187,7 @@ class VectorStore {
187
187
  properties: {
188
188
  id: {
189
189
  type: "string",
190
- description: "The chunk ID to retrieve (e.g., from previousChunkId or nextChunkId metadata)",
190
+ description: "The chunk ID to retrieve (e.g., from prev_id or next_id metadata)",
191
191
  },
192
192
  },
193
193
  required: ["id"],
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@agentionai/agents",
3
3
  "author": "Laurent Zuijdwijk",
4
- "version": "0.7.0",
4
+ "version": "0.8.1-beta",
5
5
  "description": "Agent Library",
6
6
  "main": "dist/index.js",
7
7
  "types": "dist/index.d.ts",
@@ -33,6 +33,26 @@
33
33
  "./embeddings": {
34
34
  "types": "./dist/embeddings/index.d.ts",
35
35
  "default": "./dist/embeddings/index.js"
36
+ },
37
+ "./vectorstore": {
38
+ "types": "./dist/vectorstore/index.d.ts",
39
+ "default": "./dist/vectorstore/index.js"
40
+ },
41
+ "./mcp": {
42
+ "types": "./dist/mcp/index.d.ts",
43
+ "default": "./dist/mcp/index.js"
44
+ },
45
+ "./viz": {
46
+ "types": "./dist/viz/index.d.ts",
47
+ "default": "./dist/viz/index.js"
48
+ },
49
+ "./chunkers": {
50
+ "types": "./dist/chunkers/index.d.ts",
51
+ "default": "./dist/chunkers/index.js"
52
+ },
53
+ "./ingestion": {
54
+ "types": "./dist/ingestion/index.d.ts",
55
+ "default": "./dist/ingestion/index.js"
36
56
  }
37
57
  },
38
58
  "files": [
@@ -54,8 +74,8 @@
54
74
  "lint:fix": "eslint 'src/**/*.{js,ts}' --fix",
55
75
  "format": "prettier --write 'src/**/*.{js,ts,json,md}'",
56
76
  "prepare": "npm run build",
57
- "example": "ts-node examples/index.ts",
58
- "example:watch": "nodemon --watch examples --watch src --ext ts --exec 'ts-node' examples/index.ts",
77
+ "example": "tsx",
78
+ "example:watch": "nodemon --watch examples --watch src --ext ts --exec 'ts-node --project tsconfig.esm.json' examples/index.ts",
59
79
  "docs": "npm run docs:api && npm run docs:site",
60
80
  "docs:api": "typedoc",
61
81
  "docs:site": "vitepress build docs",