@agentionai/agents 0.10.2 → 0.12.0-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunkers/ElementChunker.d.ts +100 -0
- package/dist/chunkers/ElementChunker.js +242 -0
- package/dist/chunkers/index.d.ts +1 -0
- package/dist/chunkers/index.js +3 -1
- package/dist/ingestion/IngestionPipeline.d.ts +73 -1
- package/dist/ingestion/IngestionPipeline.js +110 -1
- package/dist/parsers/DocumentParser.d.ts +36 -0
- package/dist/parsers/DocumentParser.js +35 -0
- package/dist/parsers/LlamaIndexParser.d.ts +58 -0
- package/dist/parsers/LlamaIndexParser.js +71 -0
- package/dist/parsers/OllamaOCRParser.d.ts +98 -0
- package/dist/parsers/OllamaOCRParser.js +203 -0
- package/dist/parsers/UnstructuredAPIParser.d.ts +57 -0
- package/dist/parsers/UnstructuredAPIParser.js +131 -0
- package/dist/parsers/UnstructuredLocalParser.d.ts +42 -0
- package/dist/parsers/UnstructuredLocalParser.js +118 -0
- package/dist/parsers/index.d.ts +3 -0
- package/dist/parsers/index.js +6 -0
- package/dist/parsers/types.d.ts +50 -0
- package/dist/parsers/types.js +3 -0
- package/dist/vectorstore/LanceDBVectorStore.d.ts +1 -16
- package/dist/vectorstore/OpenSearchVectorStore.d.ts +259 -0
- package/dist/vectorstore/OpenSearchVectorStore.js +481 -0
- package/dist/vectorstore/VectorStore.d.ts +25 -0
- package/dist/vectorstore/index.d.ts +3 -2
- package/dist/vectorstore/index.js +3 -1
- package/package.json +50 -2
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* OpenSearch implementation of the VectorStore interface.
|
|
4
|
+
*
|
|
5
|
+
* Uses the OpenSearch k-NN plugin for approximate nearest-neighbour search
|
|
6
|
+
* via HNSW indexing. Supports cosine similarity, L2, and inner product
|
|
7
|
+
* space types.
|
|
8
|
+
*
|
|
9
|
+
* @requires @opensearch-project/opensearch - Install with: npm install @opensearch-project/opensearch
|
|
10
|
+
*/
|
|
11
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
12
|
+
if (k2 === undefined) k2 = k;
|
|
13
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
14
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
15
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
16
|
+
}
|
|
17
|
+
Object.defineProperty(o, k2, desc);
|
|
18
|
+
}) : (function(o, m, k, k2) {
|
|
19
|
+
if (k2 === undefined) k2 = k;
|
|
20
|
+
o[k2] = m[k];
|
|
21
|
+
}));
|
|
22
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
23
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
24
|
+
}) : function(o, v) {
|
|
25
|
+
o["default"] = v;
|
|
26
|
+
});
|
|
27
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
28
|
+
var ownKeys = function(o) {
|
|
29
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
30
|
+
var ar = [];
|
|
31
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
32
|
+
return ar;
|
|
33
|
+
};
|
|
34
|
+
return ownKeys(o);
|
|
35
|
+
};
|
|
36
|
+
return function (mod) {
|
|
37
|
+
if (mod && mod.__esModule) return mod;
|
|
38
|
+
var result = {};
|
|
39
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
40
|
+
__setModuleDefault(result, mod);
|
|
41
|
+
return result;
|
|
42
|
+
};
|
|
43
|
+
})();
|
|
44
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
|
+
exports.OpenSearchVectorStore = void 0;
|
|
46
|
+
const VectorStore_1 = require("./VectorStore");
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// Score normalisation helpers
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
/**
|
|
51
|
+
* Raw OpenSearch k-NN scores are not in the [0, 1] range for all space types.
|
|
52
|
+
* This function normalises them:
|
|
53
|
+
* - cosinesimil: OpenSearch returns `1 + cos(q, d)` → range [0, 2]; divide by 2.
|
|
54
|
+
* - l2: OpenSearch returns `1 / (1 + l2_dist)` → already in (0, 1].
|
|
55
|
+
* - innerproduct: pass through as-is (application-defined interpretation).
|
|
56
|
+
*/
|
|
57
|
+
function normalizeScore(rawScore, spaceType) {
|
|
58
|
+
if (spaceType === "cosinesimil") {
|
|
59
|
+
return rawScore / 2;
|
|
60
|
+
}
|
|
61
|
+
return rawScore;
|
|
62
|
+
}
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// OpenSearchVectorStore
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
/**
|
|
67
|
+
* OpenSearch implementation of the VectorStore interface.
|
|
68
|
+
*
|
|
69
|
+
* Stores documents in an OpenSearch index with a `knn_vector` field and
|
|
70
|
+
* performs approximate nearest-neighbour search using the k-NN plugin (HNSW).
|
|
71
|
+
*
|
|
72
|
+
* **Namespace support**: namespaces are stored as a top-level `namespace`
|
|
73
|
+
* keyword field. All search / delete operations that receive a namespace
|
|
74
|
+
* automatically add a term filter on this field.
|
|
75
|
+
*
|
|
76
|
+
* **Metadata**: stored as a nested `metadata` object with dynamic mapping.
|
|
77
|
+
* Chunk metadata fields produced by the library's chunkers (e.g. `hash`,
|
|
78
|
+
* `prev_id`, `next_id`) live inside `metadata` and are searchable via
|
|
79
|
+
* `metadata.<field>` queries.
|
|
80
|
+
*
|
|
81
|
+
* @example Basic setup with OpenAI embeddings
|
|
82
|
+
* ```typescript
|
|
83
|
+
* import { OpenSearchVectorStore } from "@agentionai/agents/vectorstore";
|
|
84
|
+
* import { OpenAIEmbeddings } from "@agentionai/agents/embeddings";
|
|
85
|
+
*
|
|
86
|
+
* const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-small" });
|
|
87
|
+
*
|
|
88
|
+
* const store = await OpenSearchVectorStore.create({
|
|
89
|
+
* name: "my_store",
|
|
90
|
+
* node: "https://localhost:9200",
|
|
91
|
+
* auth: { username: "admin", password: "admin" },
|
|
92
|
+
* ssl: { rejectUnauthorized: false },
|
|
93
|
+
* indexName: "knowledge_base",
|
|
94
|
+
* embeddings,
|
|
95
|
+
* });
|
|
96
|
+
*
|
|
97
|
+
* await store.addDocuments([
|
|
98
|
+
* { id: "1", content: "OpenSearch is a distributed search engine.", metadata: { source: "docs" } },
|
|
99
|
+
* ]);
|
|
100
|
+
*
|
|
101
|
+
* const results = await store.search("distributed search", { limit: 5 });
|
|
102
|
+
* ```
|
|
103
|
+
*
|
|
104
|
+
* @example Use as an agent retrieval tool
|
|
105
|
+
* ```typescript
|
|
106
|
+
* const searchTool = store.toRetrievalTool("Search product documentation");
|
|
107
|
+
* const agent = new ClaudeAgent({ tools: [searchTool], ... });
|
|
108
|
+
* ```
|
|
109
|
+
*/
|
|
110
|
+
/**
|
|
111
|
+
* Chunk metadata field names always declared explicitly in the mapping.
|
|
112
|
+
* These are produced by the library's chunkers and used internally for
|
|
113
|
+
* deduplication (hash) and chunk navigation (prev_id, next_id).
|
|
114
|
+
*/
|
|
115
|
+
const CHUNK_KEYWORD_FIELDS = new Set([
|
|
116
|
+
"hash", "prev_id", "next_id", "source_id", "source_path", "section",
|
|
117
|
+
]);
|
|
118
|
+
const CHUNK_NUMERIC_FIELDS = new Set([
|
|
119
|
+
"index", "total", "start", "end", "char_count", "token_count", "page",
|
|
120
|
+
]);
|
|
121
|
+
class OpenSearchVectorStore extends VectorStore_1.VectorStore {
|
|
122
|
+
constructor(config, client) {
|
|
123
|
+
super();
|
|
124
|
+
this.name = config.name;
|
|
125
|
+
this.client = client;
|
|
126
|
+
this.indexName = config.indexName;
|
|
127
|
+
this.embeddings = config.embeddings;
|
|
128
|
+
this.dimensions =
|
|
129
|
+
config.dimensions ?? config.embeddings?.dimensions ?? 1536;
|
|
130
|
+
this.spaceType = config.spaceType ?? "cosinesimil";
|
|
131
|
+
this.engine = config.engine ?? "lucene";
|
|
132
|
+
this.efSearch = config.efSearch ?? 512;
|
|
133
|
+
this.efConstruction = config.efConstruction ?? 512;
|
|
134
|
+
this.m = config.m ?? 16;
|
|
135
|
+
this.metadataFields = config.metadataFields;
|
|
136
|
+
// Build the set of field names that are explicitly mapped as keyword.
|
|
137
|
+
// Used by the filter builder to decide whether to append ".keyword".
|
|
138
|
+
this.keywordFields = new Set(CHUNK_KEYWORD_FIELDS);
|
|
139
|
+
for (const field of config.metadataFields ?? []) {
|
|
140
|
+
if (field.type === "string") {
|
|
141
|
+
this.keywordFields.add(field.name);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Create a new OpenSearchVectorStore instance.
|
|
147
|
+
*
|
|
148
|
+
* Connects to the given OpenSearch node and creates the index (with k-NN
|
|
149
|
+
* mapping) if it does not already exist.
|
|
150
|
+
*
|
|
151
|
+
* @param config - Store configuration
|
|
152
|
+
* @returns A ready-to-use OpenSearchVectorStore instance
|
|
153
|
+
* @throws Error if `@opensearch-project/opensearch` is not installed
|
|
154
|
+
*/
|
|
155
|
+
static async create(config) {
|
|
156
|
+
let ClientCtor;
|
|
157
|
+
try {
|
|
158
|
+
// Use a variable so TypeScript does not attempt static module resolution
|
|
159
|
+
// for this optional peer dependency.
|
|
160
|
+
const pkgName = "@opensearch-project/opensearch";
|
|
161
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
162
|
+
const mod = (await Promise.resolve(`${pkgName}`).then(s => __importStar(require(s))));
|
|
163
|
+
ClientCtor = mod.Client;
|
|
164
|
+
}
|
|
165
|
+
catch {
|
|
166
|
+
throw new Error("@opensearch-project/opensearch is not installed. " +
|
|
167
|
+
"Install it with: npm install @opensearch-project/opensearch");
|
|
168
|
+
}
|
|
169
|
+
const clientCfg = { node: config.node };
|
|
170
|
+
if (config.auth)
|
|
171
|
+
clientCfg.auth = config.auth;
|
|
172
|
+
if (config.ssl)
|
|
173
|
+
clientCfg.ssl = config.ssl;
|
|
174
|
+
const client = new ClientCtor(clientCfg);
|
|
175
|
+
const store = new OpenSearchVectorStore(config, client);
|
|
176
|
+
await store.ensureIndex();
|
|
177
|
+
return store;
|
|
178
|
+
}
|
|
179
|
+
// -------------------------------------------------------------------------
|
|
180
|
+
// Index management
|
|
181
|
+
// -------------------------------------------------------------------------
|
|
182
|
+
/**
|
|
183
|
+
* Create the k-NN index if it does not already exist.
|
|
184
|
+
*
|
|
185
|
+
* The `metadata` object always includes explicit mappings for chunk metadata
|
|
186
|
+
* fields (hash, prev_id, etc.) so they work correctly in term queries.
|
|
187
|
+
* Any user-declared `metadataFields` are also mapped with proper types.
|
|
188
|
+
* All other metadata fields fall back to dynamic mapping.
|
|
189
|
+
*/
|
|
190
|
+
async ensureIndex() {
|
|
191
|
+
const { body: exists } = await this.client.indices.exists({
|
|
192
|
+
index: this.indexName,
|
|
193
|
+
});
|
|
194
|
+
if (exists)
|
|
195
|
+
return;
|
|
196
|
+
// Build explicit sub-properties for the metadata object.
|
|
197
|
+
const metadataProperties = {};
|
|
198
|
+
// Chunk metadata fields — always declared with correct types.
|
|
199
|
+
for (const field of CHUNK_KEYWORD_FIELDS) {
|
|
200
|
+
metadataProperties[field] = { type: "keyword" };
|
|
201
|
+
}
|
|
202
|
+
for (const field of CHUNK_NUMERIC_FIELDS) {
|
|
203
|
+
metadataProperties[field] = { type: "integer" };
|
|
204
|
+
}
|
|
205
|
+
// User-declared metadata fields.
|
|
206
|
+
for (const field of this.metadataFields ?? []) {
|
|
207
|
+
if (field.type === "number") {
|
|
208
|
+
metadataProperties[field.name] = { type: "double" };
|
|
209
|
+
}
|
|
210
|
+
else if (field.type === "boolean") {
|
|
211
|
+
metadataProperties[field.name] = { type: "boolean" };
|
|
212
|
+
}
|
|
213
|
+
else {
|
|
214
|
+
metadataProperties[field.name] = { type: "keyword" };
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
await this.client.indices.create({
|
|
218
|
+
index: this.indexName,
|
|
219
|
+
body: {
|
|
220
|
+
settings: {
|
|
221
|
+
index: {
|
|
222
|
+
knn: true,
|
|
223
|
+
"knn.algo_param.ef_search": this.efSearch,
|
|
224
|
+
},
|
|
225
|
+
},
|
|
226
|
+
mappings: {
|
|
227
|
+
properties: {
|
|
228
|
+
id: { type: "keyword" },
|
|
229
|
+
content: { type: "text" },
|
|
230
|
+
embedding: {
|
|
231
|
+
type: "knn_vector",
|
|
232
|
+
dimension: this.dimensions,
|
|
233
|
+
method: {
|
|
234
|
+
name: "hnsw",
|
|
235
|
+
space_type: this.spaceType,
|
|
236
|
+
engine: this.engine,
|
|
237
|
+
parameters: {
|
|
238
|
+
ef_construction: this.efConstruction,
|
|
239
|
+
m: this.m,
|
|
240
|
+
},
|
|
241
|
+
},
|
|
242
|
+
},
|
|
243
|
+
namespace: { type: "keyword" },
|
|
244
|
+
metadata: {
|
|
245
|
+
type: "object",
|
|
246
|
+
dynamic: true, // undeclared fields still work via dynamic mapping
|
|
247
|
+
properties: metadataProperties,
|
|
248
|
+
},
|
|
249
|
+
},
|
|
250
|
+
},
|
|
251
|
+
},
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
// -------------------------------------------------------------------------
|
|
255
|
+
// VectorStore abstract method implementations
|
|
256
|
+
// -------------------------------------------------------------------------
|
|
257
|
+
/**
|
|
258
|
+
* Add documents to the store.
|
|
259
|
+
* Embeddings are generated automatically using the configured provider.
|
|
260
|
+
*/
|
|
261
|
+
async addDocuments(documents, options) {
|
|
262
|
+
if (!this.embeddings) {
|
|
263
|
+
throw new Error("No embeddings provider configured. " +
|
|
264
|
+
"Use addEmbeddedDocuments() with pre-computed embeddings, " +
|
|
265
|
+
"or pass an embeddings provider in the config.");
|
|
266
|
+
}
|
|
267
|
+
const texts = documents.map((d) => d.content);
|
|
268
|
+
const vectors = await this.embeddings.embed(texts);
|
|
269
|
+
const embedded = documents.map((doc, i) => ({
|
|
270
|
+
...doc,
|
|
271
|
+
embedding: vectors[i],
|
|
272
|
+
}));
|
|
273
|
+
return this.addEmbeddedDocuments(embedded, options);
|
|
274
|
+
}
|
|
275
|
+
/**
|
|
276
|
+
* Add documents with pre-computed embeddings.
|
|
277
|
+
* Uses OpenSearch bulk API for efficiency.
|
|
278
|
+
*/
|
|
279
|
+
async addEmbeddedDocuments(documents, options) {
|
|
280
|
+
if (documents.length === 0)
|
|
281
|
+
return [];
|
|
282
|
+
const namespace = options?.namespace;
|
|
283
|
+
// Flatten documents into bulk request body
|
|
284
|
+
const body = [];
|
|
285
|
+
for (const doc of documents) {
|
|
286
|
+
body.push({ index: { _index: this.indexName, _id: doc.id } });
|
|
287
|
+
const osDoc = {
|
|
288
|
+
id: doc.id,
|
|
289
|
+
content: doc.content,
|
|
290
|
+
embedding: doc.embedding,
|
|
291
|
+
metadata: doc.metadata,
|
|
292
|
+
};
|
|
293
|
+
if (namespace)
|
|
294
|
+
osDoc.namespace = namespace;
|
|
295
|
+
body.push(osDoc);
|
|
296
|
+
}
|
|
297
|
+
await this.client.bulk({ body, refresh: true });
|
|
298
|
+
return documents.map((d) => d.id);
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Search for documents similar to the query text.
|
|
302
|
+
* The query is embedded automatically using the configured embeddings provider.
|
|
303
|
+
*/
|
|
304
|
+
async search(query, options) {
|
|
305
|
+
if (!this.embeddings) {
|
|
306
|
+
throw new Error("No embeddings provider configured. " +
|
|
307
|
+
"Use searchByVector() with a pre-computed query embedding, " +
|
|
308
|
+
"or pass an embeddings provider in the config.");
|
|
309
|
+
}
|
|
310
|
+
const queryVector = await this.embeddings.embedQuery(query);
|
|
311
|
+
return this.searchByVector(queryVector, options);
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Search using a pre-computed embedding vector.
|
|
315
|
+
* Executes a k-NN query against the OpenSearch index.
|
|
316
|
+
*/
|
|
317
|
+
async searchByVector(embedding, options) {
|
|
318
|
+
const limit = options?.limit ?? 10;
|
|
319
|
+
const scoreThreshold = options?.scoreThreshold;
|
|
320
|
+
const namespace = options?.namespace;
|
|
321
|
+
const filter = options?.filter;
|
|
322
|
+
// Build bool filters for namespace and metadata
|
|
323
|
+
const filters = [];
|
|
324
|
+
if (namespace) {
|
|
325
|
+
filters.push({ term: { namespace } });
|
|
326
|
+
}
|
|
327
|
+
if (filter) {
|
|
328
|
+
for (const [key, value] of Object.entries(filter)) {
|
|
329
|
+
// Declared keyword fields can be queried directly.
|
|
330
|
+
// Undeclared string values are dynamically mapped as text+keyword;
|
|
331
|
+
// append ".keyword" to target the exact-match sub-field.
|
|
332
|
+
const isUndeclaredString = typeof value === "string" && !this.keywordFields.has(key);
|
|
333
|
+
const fieldPath = isUndeclaredString
|
|
334
|
+
? `metadata.${key}.keyword`
|
|
335
|
+
: `metadata.${key}`;
|
|
336
|
+
filters.push({ term: { [fieldPath]: value } });
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
const knnClause = {
|
|
340
|
+
embedding: { vector: embedding, k: limit },
|
|
341
|
+
};
|
|
342
|
+
const queryBody = filters.length > 0
|
|
343
|
+
? {
|
|
344
|
+
bool: {
|
|
345
|
+
must: [{ knn: knnClause }],
|
|
346
|
+
filter: filters,
|
|
347
|
+
},
|
|
348
|
+
}
|
|
349
|
+
: { knn: knnClause };
|
|
350
|
+
const response = await this.client.search({
|
|
351
|
+
index: this.indexName,
|
|
352
|
+
body: { size: limit, query: queryBody },
|
|
353
|
+
});
|
|
354
|
+
const hits = response.body.hits?.hits ?? [];
|
|
355
|
+
const results = [];
|
|
356
|
+
for (const hit of hits) {
|
|
357
|
+
const score = normalizeScore(hit._score, this.spaceType);
|
|
358
|
+
if (scoreThreshold !== undefined && score < scoreThreshold)
|
|
359
|
+
continue;
|
|
360
|
+
results.push({
|
|
361
|
+
document: {
|
|
362
|
+
id: hit._source.id,
|
|
363
|
+
content: hit._source.content,
|
|
364
|
+
metadata: hit._source.metadata,
|
|
365
|
+
},
|
|
366
|
+
score,
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
return results;
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Delete documents by their IDs.
|
|
373
|
+
* @returns Number of documents actually deleted.
|
|
374
|
+
*/
|
|
375
|
+
async delete(ids, _options) {
|
|
376
|
+
if (ids.length === 0)
|
|
377
|
+
return 0;
|
|
378
|
+
const body = ids.map((id) => ({
|
|
379
|
+
delete: { _index: this.indexName, _id: id },
|
|
380
|
+
}));
|
|
381
|
+
const response = await this.client.bulk({ body, refresh: true });
|
|
382
|
+
return (response.body.items ?? []).filter((item) => item.delete?.result === "deleted").length;
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Delete all documents, optionally scoped to a namespace.
|
|
386
|
+
*/
|
|
387
|
+
async clear(options) {
|
|
388
|
+
const namespace = options?.namespace;
|
|
389
|
+
const queryBody = namespace
|
|
390
|
+
? { query: { term: { namespace } } }
|
|
391
|
+
: { query: { match_all: {} } };
|
|
392
|
+
await this.client.deleteByQuery({
|
|
393
|
+
index: this.indexName,
|
|
394
|
+
body: queryBody,
|
|
395
|
+
refresh: true,
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
/**
|
|
399
|
+
* Retrieve a document by its ID.
|
|
400
|
+
* @returns The document, or `null` if not found.
|
|
401
|
+
*/
|
|
402
|
+
async getById(id, _options) {
|
|
403
|
+
try {
|
|
404
|
+
const response = await this.client.get({
|
|
405
|
+
index: this.indexName,
|
|
406
|
+
id,
|
|
407
|
+
});
|
|
408
|
+
if (!response.body.found)
|
|
409
|
+
return null;
|
|
410
|
+
const src = response.body._source;
|
|
411
|
+
return {
|
|
412
|
+
id: src.id,
|
|
413
|
+
content: src.content,
|
|
414
|
+
metadata: src.metadata,
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
catch (err) {
|
|
418
|
+
if (err.statusCode === 404)
|
|
419
|
+
return null;
|
|
420
|
+
throw err;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
/**
|
|
424
|
+
* Get existing documents by their content hashes.
|
|
425
|
+
* Used by the ingestion pipeline for deduplication.
|
|
426
|
+
*
|
|
427
|
+
* Requires that documents were stored with chunk metadata containing a
|
|
428
|
+
* `hash` field (automatically set by chunkers in this library).
|
|
429
|
+
*
|
|
430
|
+
* @returns Map of hash → document ID for hashes that already exist.
|
|
431
|
+
*/
|
|
432
|
+
async getByHashes(hashes, _options) {
|
|
433
|
+
const hashMap = new Map();
|
|
434
|
+
if (hashes.length === 0)
|
|
435
|
+
return hashMap;
|
|
436
|
+
const response = await this.client.search({
|
|
437
|
+
index: this.indexName,
|
|
438
|
+
body: {
|
|
439
|
+
size: hashes.length,
|
|
440
|
+
query: { terms: { "metadata.hash": hashes } },
|
|
441
|
+
_source: ["id", "metadata.hash"],
|
|
442
|
+
},
|
|
443
|
+
});
|
|
444
|
+
for (const hit of response.body.hits?.hits ?? []) {
|
|
445
|
+
const hash = hit._source.metadata
|
|
446
|
+
?.hash;
|
|
447
|
+
if (hash) {
|
|
448
|
+
hashMap.set(hash, hit._source.id);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
return hashMap;
|
|
452
|
+
}
|
|
453
|
+
// -------------------------------------------------------------------------
|
|
454
|
+
// OpenSearch-specific accessors
|
|
455
|
+
// -------------------------------------------------------------------------
|
|
456
|
+
/**
|
|
457
|
+
* Delete the entire OpenSearch index.
|
|
458
|
+
* WARNING: This permanently removes all indexed documents and the mapping.
|
|
459
|
+
*/
|
|
460
|
+
async deleteIndex() {
|
|
461
|
+
await this.client.indices.delete({ index: this.indexName });
|
|
462
|
+
}
|
|
463
|
+
/** The OpenSearch index name used by this store. */
|
|
464
|
+
getIndexName() {
|
|
465
|
+
return this.indexName;
|
|
466
|
+
}
|
|
467
|
+
/** The configured vector dimensions. */
|
|
468
|
+
getDimensions() {
|
|
469
|
+
return this.dimensions;
|
|
470
|
+
}
|
|
471
|
+
/** The configured embeddings provider, if any. */
|
|
472
|
+
getEmbeddings() {
|
|
473
|
+
return this.embeddings;
|
|
474
|
+
}
|
|
475
|
+
/** The underlying OpenSearch client instance. */
|
|
476
|
+
getClient() {
|
|
477
|
+
return this.client;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
exports.OpenSearchVectorStore = OpenSearchVectorStore;
|
|
481
|
+
//# sourceMappingURL=OpenSearchVectorStore.js.map
|
|
@@ -5,6 +5,31 @@
|
|
|
5
5
|
* enabling document storage with embeddings and semantic search capabilities.
|
|
6
6
|
*/
|
|
7
7
|
import { Tool } from "../tools/Tool";
|
|
8
|
+
/**
|
|
9
|
+
* Supported types for metadata field definitions.
|
|
10
|
+
*/
|
|
11
|
+
export type MetadataFieldType = "string" | "number" | "boolean";
|
|
12
|
+
/**
|
|
13
|
+
* Definition for a typed metadata field.
|
|
14
|
+
* Used by vector store implementations to declare explicit field types
|
|
15
|
+
* for metadata properties, enabling correct indexing and filtering.
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* const fields: MetadataFieldDefinition[] = [
|
|
20
|
+
* { name: "source", type: "string" },
|
|
21
|
+
* { name: "page", type: "number" },
|
|
22
|
+
* ];
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
export interface MetadataFieldDefinition {
|
|
26
|
+
/** Name of the metadata field. Use snake_case (e.g. `tenant_id`). */
|
|
27
|
+
name: string;
|
|
28
|
+
/** Data type for the field */
|
|
29
|
+
type: MetadataFieldType;
|
|
30
|
+
/** Whether the field can be null (default: true) */
|
|
31
|
+
nullable?: boolean;
|
|
32
|
+
}
|
|
8
33
|
/**
|
|
9
34
|
* Represents a document with its content and optional metadata.
|
|
10
35
|
*/
|
|
@@ -22,8 +22,9 @@
|
|
|
22
22
|
* const addTool = store.toAddDocumentsTool("Save new documents");
|
|
23
23
|
* ```
|
|
24
24
|
*/
|
|
25
|
-
export { VectorStore, Document, EmbeddedDocument, SearchResult, AddDocumentsOptions, SearchOptions, DeleteOptions, RetrievalToolOptions, AddDocumentsToolOptions, } from "./VectorStore";
|
|
26
|
-
export { LanceDBVectorStore, LanceDBVectorStoreConfig,
|
|
25
|
+
export { VectorStore, Document, EmbeddedDocument, SearchResult, AddDocumentsOptions, SearchOptions, DeleteOptions, RetrievalToolOptions, AddDocumentsToolOptions, MetadataFieldType, MetadataFieldDefinition, } from "./VectorStore";
|
|
26
|
+
export { LanceDBVectorStore, LanceDBVectorStoreConfig, } from "./LanceDBVectorStore";
|
|
27
|
+
export { OpenSearchVectorStore, OpenSearchVectorStoreConfig, OpenSearchSpaceType, OpenSearchKnnEngine, } from "./OpenSearchVectorStore";
|
|
27
28
|
export { Embeddings, EmbeddingOptions } from "../embeddings/Embeddings";
|
|
28
29
|
export { OpenAIEmbeddings, OpenAIEmbeddingsConfig, OpenAIEmbeddingModel, } from "../embeddings/OpenAIEmbeddings";
|
|
29
30
|
export { VoyageAIEmbeddings, VoyageAIEmbeddingsConfig, VoyageAIEmbeddingModel, VoyageAIMultimodalModel, } from "../embeddings/VoyageAIEmbeddings";
|
|
@@ -24,11 +24,13 @@
|
|
|
24
24
|
* ```
|
|
25
25
|
*/
|
|
26
26
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
27
|
-
exports.VoyageAIEmbeddings = exports.OpenAIEmbeddings = exports.Embeddings = exports.LanceDBVectorStore = exports.VectorStore = void 0;
|
|
27
|
+
exports.VoyageAIEmbeddings = exports.OpenAIEmbeddings = exports.Embeddings = exports.OpenSearchVectorStore = exports.LanceDBVectorStore = exports.VectorStore = void 0;
|
|
28
28
|
var VectorStore_1 = require("./VectorStore");
|
|
29
29
|
Object.defineProperty(exports, "VectorStore", { enumerable: true, get: function () { return VectorStore_1.VectorStore; } });
|
|
30
30
|
var LanceDBVectorStore_1 = require("./LanceDBVectorStore");
|
|
31
31
|
Object.defineProperty(exports, "LanceDBVectorStore", { enumerable: true, get: function () { return LanceDBVectorStore_1.LanceDBVectorStore; } });
|
|
32
|
+
var OpenSearchVectorStore_1 = require("./OpenSearchVectorStore");
|
|
33
|
+
Object.defineProperty(exports, "OpenSearchVectorStore", { enumerable: true, get: function () { return OpenSearchVectorStore_1.OpenSearchVectorStore; } });
|
|
32
34
|
// Re-export embeddings for backward compatibility
|
|
33
35
|
var Embeddings_1 = require("../embeddings/Embeddings");
|
|
34
36
|
Object.defineProperty(exports, "Embeddings", { enumerable: true, get: function () { return Embeddings_1.Embeddings; } });
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentionai/agents",
|
|
3
3
|
"author": "Laurent Zuijdwijk",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.12.0-beta",
|
|
5
5
|
"description": "Agent Library",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
@@ -30,6 +30,10 @@
|
|
|
30
30
|
"types": "./dist/gemini.d.ts",
|
|
31
31
|
"default": "./dist/gemini.js"
|
|
32
32
|
},
|
|
33
|
+
"./ollama": {
|
|
34
|
+
"types": "./dist/agents/ollama/OllamaAgent.d.ts",
|
|
35
|
+
"default": "./dist/agents/ollama/OllamaAgent.js"
|
|
36
|
+
},
|
|
33
37
|
"./embeddings": {
|
|
34
38
|
"types": "./dist/embeddings/index.d.ts",
|
|
35
39
|
"default": "./dist/embeddings/index.js"
|
|
@@ -61,6 +65,26 @@
|
|
|
61
65
|
"./history/plugins": {
|
|
62
66
|
"types": "./dist/history/plugins/index.d.ts",
|
|
63
67
|
"default": "./dist/history/plugins/index.js"
|
|
68
|
+
},
|
|
69
|
+
"./parsers": {
|
|
70
|
+
"types": "./dist/parsers/index.d.ts",
|
|
71
|
+
"default": "./dist/parsers/index.js"
|
|
72
|
+
},
|
|
73
|
+
"./parsers/unstructured-local": {
|
|
74
|
+
"types": "./dist/parsers/UnstructuredLocalParser.d.ts",
|
|
75
|
+
"default": "./dist/parsers/UnstructuredLocalParser.js"
|
|
76
|
+
},
|
|
77
|
+
"./parsers/unstructured-api": {
|
|
78
|
+
"types": "./dist/parsers/UnstructuredAPIParser.d.ts",
|
|
79
|
+
"default": "./dist/parsers/UnstructuredAPIParser.js"
|
|
80
|
+
},
|
|
81
|
+
"./parsers/llamaindex": {
|
|
82
|
+
"types": "./dist/parsers/LlamaIndexParser.d.ts",
|
|
83
|
+
"default": "./dist/parsers/LlamaIndexParser.js"
|
|
84
|
+
},
|
|
85
|
+
"./parsers/ollama-ocr": {
|
|
86
|
+
"types": "./dist/parsers/OllamaOCRParser.d.ts",
|
|
87
|
+
"default": "./dist/parsers/OllamaOCRParser.js"
|
|
64
88
|
}
|
|
65
89
|
},
|
|
66
90
|
"files": [
|
|
@@ -141,7 +165,12 @@
|
|
|
141
165
|
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
142
166
|
"apache-arrow": "^18.0.0",
|
|
143
167
|
"openai": "^6.16.0",
|
|
144
|
-
"voyageai": "^0.0.3"
|
|
168
|
+
"voyageai": "^0.0.3",
|
|
169
|
+
"@epilogo/unstructured-io-node": "*",
|
|
170
|
+
"unstructured-client": "*",
|
|
171
|
+
"llamaindex": "*",
|
|
172
|
+
"@llamaindex/readers": "*",
|
|
173
|
+
"pdf-to-img": "*"
|
|
145
174
|
},
|
|
146
175
|
"peerDependenciesMeta": {
|
|
147
176
|
"@lancedb/lancedb": {
|
|
@@ -167,9 +196,28 @@
|
|
|
167
196
|
},
|
|
168
197
|
"voyageai": {
|
|
169
198
|
"optional": true
|
|
199
|
+
},
|
|
200
|
+
"@opensearch-project/opensearch": {
|
|
201
|
+
"optional": true
|
|
202
|
+
},
|
|
203
|
+
"@epilogo/unstructured-io-node": {
|
|
204
|
+
"optional": true
|
|
205
|
+
},
|
|
206
|
+
"unstructured-client": {
|
|
207
|
+
"optional": true
|
|
208
|
+
},
|
|
209
|
+
"llamaindex": {
|
|
210
|
+
"optional": true
|
|
211
|
+
},
|
|
212
|
+
"@llamaindex/readers": {
|
|
213
|
+
"optional": true
|
|
214
|
+
},
|
|
215
|
+
"pdf-to-img": {
|
|
216
|
+
"optional": true
|
|
170
217
|
}
|
|
171
218
|
},
|
|
172
219
|
"dependencies": {
|
|
220
|
+
"@opensearch-project/opensearch": "^3.5.1",
|
|
173
221
|
"tokenx": "^1.2.1"
|
|
174
222
|
}
|
|
175
223
|
}
|