@agentionai/agents 0.10.1 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/anthropic/ClaudeAgent.js +6 -0
- package/dist/agents/google/GeminiAgent.js +6 -0
- package/dist/agents/mistral/MistralAgent.js +6 -0
- package/dist/agents/openai/OpenAiAgent.js +6 -0
- package/dist/history/History.d.ts +17 -0
- package/dist/history/History.js +26 -0
- package/dist/vectorstore/LanceDBVectorStore.d.ts +1 -16
- package/dist/vectorstore/OpenSearchVectorStore.d.ts +259 -0
- package/dist/vectorstore/OpenSearchVectorStore.js +481 -0
- package/dist/vectorstore/VectorStore.d.ts +25 -0
- package/dist/vectorstore/index.d.ts +3 -2
- package/dist/vectorstore/index.js +3 -1
- package/package.json +5 -1
|
@@ -84,6 +84,9 @@ class ClaudeAgent extends BaseAgent_1.BaseAgent {
|
|
|
84
84
|
// Mark session boundary so transform plugins (e.g. toolResultMaskingPlugin)
|
|
85
85
|
// don't mask tool results produced within this execute() loop.
|
|
86
86
|
this.history.setSessionAnchor();
|
|
87
|
+
// Suspend auto-trimming so tool_use / tool_result pairs are never split
|
|
88
|
+
// mid-loop. endExecution() in the finally block enforces limits once.
|
|
89
|
+
this.history.beginExecution();
|
|
87
90
|
try {
|
|
88
91
|
const messages = transformers_1.anthropicTransformer.toProvider(this.history.getEntries());
|
|
89
92
|
const systemMessage = this.history.getSystemMessage();
|
|
@@ -125,6 +128,9 @@ class ClaudeAgent extends BaseAgent_1.BaseAgent {
|
|
|
125
128
|
throw executionError;
|
|
126
129
|
}
|
|
127
130
|
}
|
|
131
|
+
finally {
|
|
132
|
+
this.history.endExecution();
|
|
133
|
+
}
|
|
128
134
|
}
|
|
129
135
|
async handleResponse(response) {
|
|
130
136
|
const usage = this.parseUsage(response.usage);
|
|
@@ -184,6 +184,9 @@ class GeminiAgent extends BaseAgent_1.BaseAgent {
|
|
|
184
184
|
// Mark session boundary so transform plugins (e.g. toolResultMaskingPlugin)
|
|
185
185
|
// don't mask tool results produced within this execute() loop.
|
|
186
186
|
this.history.setSessionAnchor();
|
|
187
|
+
// Suspend auto-trimming so tool_use / tool_result pairs are never split
|
|
188
|
+
// mid-loop. endExecution() in the finally block enforces limits once.
|
|
189
|
+
this.history.beginExecution();
|
|
187
190
|
try {
|
|
188
191
|
const contents = transformers_1.geminiTransformer.toProvider(this.history.getEntries());
|
|
189
192
|
const systemMessage = this.history.getSystemMessage();
|
|
@@ -229,6 +232,9 @@ class GeminiAgent extends BaseAgent_1.BaseAgent {
|
|
|
229
232
|
throw executionError;
|
|
230
233
|
}
|
|
231
234
|
}
|
|
235
|
+
finally {
|
|
236
|
+
this.history.endExecution();
|
|
237
|
+
}
|
|
232
238
|
}
|
|
233
239
|
async handleResponse(response) {
|
|
234
240
|
const result = response.response;
|
|
@@ -94,6 +94,9 @@ class MistralAgent extends BaseAgent_1.BaseAgent {
|
|
|
94
94
|
// Mark session boundary so transform plugins (e.g. toolResultMaskingPlugin)
|
|
95
95
|
// don't mask tool results produced within this execute() loop.
|
|
96
96
|
this.history.setSessionAnchor();
|
|
97
|
+
// Suspend auto-trimming so tool_use / tool_result pairs are never split
|
|
98
|
+
// mid-loop. endExecution() in the finally block enforces limits once.
|
|
99
|
+
this.history.beginExecution();
|
|
97
100
|
try {
|
|
98
101
|
const messages = transformers_1.mistralTransformer.toProvider(this.history.getEntries());
|
|
99
102
|
const response = await this.client.chat.complete({
|
|
@@ -133,6 +136,9 @@ class MistralAgent extends BaseAgent_1.BaseAgent {
|
|
|
133
136
|
throw executionError;
|
|
134
137
|
}
|
|
135
138
|
}
|
|
139
|
+
finally {
|
|
140
|
+
this.history.endExecution();
|
|
141
|
+
}
|
|
136
142
|
}
|
|
137
143
|
async handleResponse(response) {
|
|
138
144
|
if (!response.choices || response.choices.length === 0) {
|
|
@@ -105,6 +105,9 @@ class OpenAiAgent extends BaseAgent_1.BaseAgent {
|
|
|
105
105
|
// Mark session boundary so transform plugins (e.g. toolResultMaskingPlugin)
|
|
106
106
|
// don't mask tool results produced within this execute() loop.
|
|
107
107
|
this.history.setSessionAnchor();
|
|
108
|
+
// Suspend auto-trimming so tool_use / tool_result pairs are never split
|
|
109
|
+
// mid-loop. endExecution() in the finally block enforces limits once.
|
|
110
|
+
this.history.beginExecution();
|
|
108
111
|
try {
|
|
109
112
|
const inputMessages = transformers_1.openAiTransformer.toProvider(this.history.getEntries());
|
|
110
113
|
const response = await this.client.responses.create({
|
|
@@ -150,6 +153,9 @@ class OpenAiAgent extends BaseAgent_1.BaseAgent {
|
|
|
150
153
|
throw executionError;
|
|
151
154
|
}
|
|
152
155
|
}
|
|
156
|
+
finally {
|
|
157
|
+
this.history.endExecution();
|
|
158
|
+
}
|
|
153
159
|
}
|
|
154
160
|
async handleResponse(response) {
|
|
155
161
|
if (!response.output || !response.output.length) {
|
|
@@ -123,6 +123,7 @@ export declare class History extends EventEmitter {
|
|
|
123
123
|
transient: boolean;
|
|
124
124
|
private _plugins;
|
|
125
125
|
private _reducing;
|
|
126
|
+
private _executing;
|
|
126
127
|
private _sessionAnchor;
|
|
127
128
|
constructor(entries?: HistoryEntry[], options?: HistoryOptions);
|
|
128
129
|
/**
|
|
@@ -247,6 +248,22 @@ export declare class History extends EventEmitter {
|
|
|
247
248
|
* Create a copy of this history
|
|
248
249
|
*/
|
|
249
250
|
clone(options?: HistoryOptions): History;
|
|
251
|
+
/**
|
|
252
|
+
* Signal the start of an agent execute() loop. While executing, automatic
|
|
253
|
+
* trimming on addEntry() is suspended so tool_use / tool_result pairs are
|
|
254
|
+
* never split mid-loop. Call endExecution() in a finally block to resume.
|
|
255
|
+
*/
|
|
256
|
+
beginExecution(): void;
|
|
257
|
+
/**
|
|
258
|
+
* Signal the end of an agent execute() loop. Resumes automatic trimming and
|
|
259
|
+
* immediately enforces maxLength / maxTokens limits on the accumulated history.
|
|
260
|
+
*/
|
|
261
|
+
endExecution(): void;
|
|
262
|
+
/**
|
|
263
|
+
* Explicitly enforce maxLength and maxTokens limits. Useful when using
|
|
264
|
+
* History standalone, outside of an agent execute() loop.
|
|
265
|
+
*/
|
|
266
|
+
trim(): void;
|
|
250
267
|
/**
|
|
251
268
|
* Apply maxLength and maxTokens trimming to the current entry list.
|
|
252
269
|
* Safe to call after bulk-loading entries (e.g. RedisHistory.load()).
|
package/dist/history/History.js
CHANGED
|
@@ -127,6 +127,7 @@ class History extends events_1.default {
|
|
|
127
127
|
this.transient = false;
|
|
128
128
|
this._plugins = [];
|
|
129
129
|
this._reducing = false;
|
|
130
|
+
this._executing = false;
|
|
130
131
|
this._sessionAnchor = null;
|
|
131
132
|
this.options = options;
|
|
132
133
|
this.transient = Boolean(options?.transient);
|
|
@@ -399,12 +400,37 @@ class History extends events_1.default {
|
|
|
399
400
|
// ===========================================================================
|
|
400
401
|
// Private helpers
|
|
401
402
|
// ===========================================================================
|
|
403
|
+
/**
|
|
404
|
+
* Signal the start of an agent execute() loop. While executing, automatic
|
|
405
|
+
* trimming on addEntry() is suspended so tool_use / tool_result pairs are
|
|
406
|
+
* never split mid-loop. Call endExecution() in a finally block to resume.
|
|
407
|
+
*/
|
|
408
|
+
beginExecution() {
|
|
409
|
+
this._executing = true;
|
|
410
|
+
}
|
|
411
|
+
/**
|
|
412
|
+
* Signal the end of an agent execute() loop. Resumes automatic trimming and
|
|
413
|
+
* immediately enforces maxLength / maxTokens limits on the accumulated history.
|
|
414
|
+
*/
|
|
415
|
+
endExecution() {
|
|
416
|
+
this._executing = false;
|
|
417
|
+
this.applyTrimming();
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* Explicitly enforce maxLength and maxTokens limits. Useful when using
|
|
421
|
+
* History standalone, outside of an agent execute() loop.
|
|
422
|
+
*/
|
|
423
|
+
trim() {
|
|
424
|
+
this.applyTrimming();
|
|
425
|
+
}
|
|
402
426
|
/**
|
|
403
427
|
* Apply maxLength and maxTokens trimming to the current entry list.
|
|
404
428
|
* Safe to call after bulk-loading entries (e.g. RedisHistory.load()).
|
|
405
429
|
* Subclasses may call this after directly manipulating _entries.
|
|
406
430
|
*/
|
|
407
431
|
applyTrimming() {
|
|
432
|
+
if (this._executing)
|
|
433
|
+
return;
|
|
408
434
|
if (this.options.maxLength && this._entries.length > this.options.maxLength) {
|
|
409
435
|
this._entries = this._entries.slice(this._entries.length - this.options.maxLength);
|
|
410
436
|
this.sanitizeToolPairs();
|
|
@@ -8,23 +8,8 @@
|
|
|
8
8
|
* @requires apache-arrow - Install with: npm install apache-arrow
|
|
9
9
|
*/
|
|
10
10
|
import type { Connection, Table, ConnectionOptions } from "@lancedb/lancedb";
|
|
11
|
-
import { VectorStore, Document, EmbeddedDocument, SearchResult, AddDocumentsOptions, SearchOptions, DeleteOptions } from "./VectorStore";
|
|
11
|
+
import { VectorStore, Document, EmbeddedDocument, SearchResult, AddDocumentsOptions, SearchOptions, DeleteOptions, MetadataFieldDefinition } from "./VectorStore";
|
|
12
12
|
import { Embeddings } from "../embeddings/Embeddings";
|
|
13
|
-
/**
|
|
14
|
-
* Supported types for metadata fields.
|
|
15
|
-
*/
|
|
16
|
-
export type MetadataFieldType = "string" | "number" | "boolean";
|
|
17
|
-
/**
|
|
18
|
-
* Definition for a metadata field that will be stored as a separate column.
|
|
19
|
-
*/
|
|
20
|
-
export interface MetadataFieldDefinition {
|
|
21
|
-
/** Name of the metadata field. Use snake_case (e.g. `tenant_id`) to avoid SQL filter issues. */
|
|
22
|
-
name: string;
|
|
23
|
-
/** Data type for the field */
|
|
24
|
-
type: MetadataFieldType;
|
|
25
|
-
/** Whether the field can be null (default: true) */
|
|
26
|
-
nullable?: boolean;
|
|
27
|
-
}
|
|
28
13
|
/**
|
|
29
14
|
* Configuration for LanceDBVectorStore.
|
|
30
15
|
*/
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenSearch implementation of the VectorStore interface.
|
|
3
|
+
*
|
|
4
|
+
* Uses the OpenSearch k-NN plugin for approximate nearest-neighbour search
|
|
5
|
+
* via HNSW indexing. Supports cosine similarity, L2, and inner product
|
|
6
|
+
* space types.
|
|
7
|
+
*
|
|
8
|
+
* @requires @opensearch-project/opensearch - Install with: npm install @opensearch-project/opensearch
|
|
9
|
+
*/
|
|
10
|
+
import { VectorStore, Document, EmbeddedDocument, SearchResult, AddDocumentsOptions, SearchOptions, DeleteOptions, MetadataFieldDefinition } from "./VectorStore";
|
|
11
|
+
import { Embeddings } from "../embeddings/Embeddings";
|
|
12
|
+
interface OpenSearchHit<T> {
|
|
13
|
+
_id: string;
|
|
14
|
+
_score: number;
|
|
15
|
+
_source: T;
|
|
16
|
+
}
|
|
17
|
+
interface OpenSearchClient {
|
|
18
|
+
indices: {
|
|
19
|
+
exists(params: {
|
|
20
|
+
index: string;
|
|
21
|
+
}): Promise<{
|
|
22
|
+
body: boolean;
|
|
23
|
+
}>;
|
|
24
|
+
create(params: {
|
|
25
|
+
index: string;
|
|
26
|
+
body: unknown;
|
|
27
|
+
}): Promise<unknown>;
|
|
28
|
+
delete(params: {
|
|
29
|
+
index: string;
|
|
30
|
+
}): Promise<unknown>;
|
|
31
|
+
};
|
|
32
|
+
bulk(params: {
|
|
33
|
+
body: unknown[];
|
|
34
|
+
refresh?: boolean | string;
|
|
35
|
+
}): Promise<{
|
|
36
|
+
body: {
|
|
37
|
+
items?: Array<{
|
|
38
|
+
delete?: {
|
|
39
|
+
result?: string;
|
|
40
|
+
};
|
|
41
|
+
index?: {
|
|
42
|
+
result?: string;
|
|
43
|
+
};
|
|
44
|
+
}>;
|
|
45
|
+
errors?: boolean;
|
|
46
|
+
};
|
|
47
|
+
}>;
|
|
48
|
+
search<T>(params: {
|
|
49
|
+
index: string;
|
|
50
|
+
body: unknown;
|
|
51
|
+
}): Promise<{
|
|
52
|
+
body: {
|
|
53
|
+
hits?: {
|
|
54
|
+
hits?: OpenSearchHit<T>[];
|
|
55
|
+
};
|
|
56
|
+
};
|
|
57
|
+
}>;
|
|
58
|
+
get<T>(params: {
|
|
59
|
+
index: string;
|
|
60
|
+
id: string;
|
|
61
|
+
}): Promise<{
|
|
62
|
+
body: {
|
|
63
|
+
found: boolean;
|
|
64
|
+
_source: T;
|
|
65
|
+
};
|
|
66
|
+
}>;
|
|
67
|
+
deleteByQuery(params: {
|
|
68
|
+
index: string;
|
|
69
|
+
body: unknown;
|
|
70
|
+
refresh?: boolean | string;
|
|
71
|
+
}): Promise<unknown>;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* k-NN vector space type used by the OpenSearch k-NN plugin.
|
|
75
|
+
* - `cosinesimil` — cosine similarity (default, normalised vectors recommended)
|
|
76
|
+
* - `l2` — Euclidean L2 distance
|
|
77
|
+
* - `innerproduct` — inner / dot product
|
|
78
|
+
*/
|
|
79
|
+
export type OpenSearchSpaceType = "cosinesimil" | "l2" | "innerproduct";
|
|
80
|
+
/**
|
|
81
|
+
* k-NN engine used by the OpenSearch k-NN plugin.
|
|
82
|
+
* - `lucene` — native Lucene ANN (cosinesimil and l2 only); default since OpenSearch 3.x
|
|
83
|
+
* - `faiss` — high-throughput GPU-accelerated (l2 and innerproduct only)
|
|
84
|
+
* - `nmslib` — deprecated and removed in OpenSearch 3.0; do not use
|
|
85
|
+
*/
|
|
86
|
+
export type OpenSearchKnnEngine = "lucene" | "faiss" | "nmslib";
|
|
87
|
+
/**
|
|
88
|
+
* Configuration for OpenSearchVectorStore.
|
|
89
|
+
*/
|
|
90
|
+
export interface OpenSearchVectorStoreConfig {
|
|
91
|
+
/** Name identifier for this store instance */
|
|
92
|
+
name: string;
|
|
93
|
+
/** OpenSearch node URL (e.g. `https://localhost:9200`) */
|
|
94
|
+
node: string;
|
|
95
|
+
/** Basic-auth credentials */
|
|
96
|
+
auth?: {
|
|
97
|
+
username: string;
|
|
98
|
+
password: string;
|
|
99
|
+
};
|
|
100
|
+
/** SSL options — set `rejectUnauthorized: false` for self-signed certs */
|
|
101
|
+
ssl?: {
|
|
102
|
+
rejectUnauthorized?: boolean;
|
|
103
|
+
};
|
|
104
|
+
/** OpenSearch index name to use for document storage */
|
|
105
|
+
indexName: string;
|
|
106
|
+
/** Embeddings provider for automatic embedding generation */
|
|
107
|
+
embeddings?: Embeddings;
|
|
108
|
+
/**
|
|
109
|
+
* Vector dimensions.
|
|
110
|
+
* Defaults to `embeddings.dimensions` when an embeddings provider is given,
|
|
111
|
+
* otherwise falls back to `1536`.
|
|
112
|
+
*/
|
|
113
|
+
dimensions?: number;
|
|
114
|
+
/**
|
|
115
|
+
* k-NN vector space type (default: `"cosinesimil"`).
|
|
116
|
+
* Must match the space type the embeddings model was trained for.
|
|
117
|
+
*/
|
|
118
|
+
spaceType?: OpenSearchSpaceType;
|
|
119
|
+
/**
|
|
120
|
+
* k-NN engine (default: `"lucene"`).
|
|
121
|
+
* `nmslib` was removed in OpenSearch 3.0 and cannot be used for new indices.
|
|
122
|
+
*/
|
|
123
|
+
engine?: OpenSearchKnnEngine;
|
|
124
|
+
/**
|
|
125
|
+
* HNSW `ef_search` parameter — controls recall vs. latency at query time.
|
|
126
|
+
* Higher values improve recall at the cost of latency. Default: `512`.
|
|
127
|
+
*/
|
|
128
|
+
efSearch?: number;
|
|
129
|
+
/**
|
|
130
|
+
* HNSW `ef_construction` parameter — controls graph quality at index time.
|
|
131
|
+
* Higher values improve recall at the cost of indexing speed. Default: `512`.
|
|
132
|
+
*/
|
|
133
|
+
efConstruction?: number;
|
|
134
|
+
/**
|
|
135
|
+
* HNSW `M` parameter — number of bidirectional links per node.
|
|
136
|
+
* Higher values improve recall but increase memory usage. Default: `16`.
|
|
137
|
+
*/
|
|
138
|
+
m?: number;
|
|
139
|
+
/**
|
|
140
|
+
* Optional user-defined metadata field definitions.
|
|
141
|
+
*
|
|
142
|
+
* When provided, these fields are declared in the index mapping with proper
|
|
143
|
+
* types (`keyword` for strings, `double` for numbers, `boolean` for booleans),
|
|
144
|
+
* which enables reliable exact-match filtering via `SearchOptions.filter`.
|
|
145
|
+
*
|
|
146
|
+
* Without this option, OpenSearch uses dynamic mapping for the `metadata`
|
|
147
|
+
* object. String fields are mapped as `text` with a `.keyword` sub-field —
|
|
148
|
+
* the store handles this automatically by appending `.keyword` to undeclared
|
|
149
|
+
* string filter values at query time.
|
|
150
|
+
*
|
|
151
|
+
* Chunk metadata fields produced by the library's chunkers (`hash`,
|
|
152
|
+
* `prev_id`, `next_id`, etc.) are always declared explicitly — you do not
|
|
153
|
+
* need to list them here.
|
|
154
|
+
*
|
|
155
|
+
* @example
|
|
156
|
+
* ```typescript
|
|
157
|
+
* metadataFields: [
|
|
158
|
+
* { name: "source", type: "string" },
|
|
159
|
+
* { name: "page", type: "number" },
|
|
160
|
+
* ]
|
|
161
|
+
* ```
|
|
162
|
+
*/
|
|
163
|
+
metadataFields?: MetadataFieldDefinition[];
|
|
164
|
+
}
|
|
165
|
+
export declare class OpenSearchVectorStore extends VectorStore {
|
|
166
|
+
readonly name: string;
|
|
167
|
+
private client;
|
|
168
|
+
private indexName;
|
|
169
|
+
private embeddings?;
|
|
170
|
+
private dimensions;
|
|
171
|
+
private spaceType;
|
|
172
|
+
private engine;
|
|
173
|
+
private efSearch;
|
|
174
|
+
private efConstruction;
|
|
175
|
+
private m;
|
|
176
|
+
private metadataFields?;
|
|
177
|
+
/** Set of metadata field names declared as keyword (string) type. */
|
|
178
|
+
private keywordFields;
|
|
179
|
+
private constructor();
|
|
180
|
+
/**
|
|
181
|
+
* Create a new OpenSearchVectorStore instance.
|
|
182
|
+
*
|
|
183
|
+
* Connects to the given OpenSearch node and creates the index (with k-NN
|
|
184
|
+
* mapping) if it does not already exist.
|
|
185
|
+
*
|
|
186
|
+
* @param config - Store configuration
|
|
187
|
+
* @returns A ready-to-use OpenSearchVectorStore instance
|
|
188
|
+
* @throws Error if `@opensearch-project/opensearch` is not installed
|
|
189
|
+
*/
|
|
190
|
+
static create(config: OpenSearchVectorStoreConfig): Promise<OpenSearchVectorStore>;
|
|
191
|
+
/**
|
|
192
|
+
* Create the k-NN index if it does not already exist.
|
|
193
|
+
*
|
|
194
|
+
* The `metadata` object always includes explicit mappings for chunk metadata
|
|
195
|
+
* fields (hash, prev_id, etc.) so they work correctly in term queries.
|
|
196
|
+
* Any user-declared `metadataFields` are also mapped with proper types.
|
|
197
|
+
* All other metadata fields fall back to dynamic mapping.
|
|
198
|
+
*/
|
|
199
|
+
private ensureIndex;
|
|
200
|
+
/**
|
|
201
|
+
* Add documents to the store.
|
|
202
|
+
* Embeddings are generated automatically using the configured provider.
|
|
203
|
+
*/
|
|
204
|
+
addDocuments(documents: Document[], options?: AddDocumentsOptions): Promise<string[]>;
|
|
205
|
+
/**
|
|
206
|
+
* Add documents with pre-computed embeddings.
|
|
207
|
+
* Uses OpenSearch bulk API for efficiency.
|
|
208
|
+
*/
|
|
209
|
+
addEmbeddedDocuments(documents: EmbeddedDocument[], options?: AddDocumentsOptions): Promise<string[]>;
|
|
210
|
+
/**
|
|
211
|
+
* Search for documents similar to the query text.
|
|
212
|
+
* The query is embedded automatically using the configured embeddings provider.
|
|
213
|
+
*/
|
|
214
|
+
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
215
|
+
/**
|
|
216
|
+
* Search using a pre-computed embedding vector.
|
|
217
|
+
* Executes a k-NN query against the OpenSearch index.
|
|
218
|
+
*/
|
|
219
|
+
searchByVector(embedding: number[], options?: SearchOptions): Promise<SearchResult[]>;
|
|
220
|
+
/**
|
|
221
|
+
* Delete documents by their IDs.
|
|
222
|
+
* @returns Number of documents actually deleted.
|
|
223
|
+
*/
|
|
224
|
+
delete(ids: string[], _options?: DeleteOptions): Promise<number>;
|
|
225
|
+
/**
|
|
226
|
+
* Delete all documents, optionally scoped to a namespace.
|
|
227
|
+
*/
|
|
228
|
+
clear(options?: DeleteOptions): Promise<void>;
|
|
229
|
+
/**
|
|
230
|
+
* Retrieve a document by its ID.
|
|
231
|
+
* @returns The document, or `null` if not found.
|
|
232
|
+
*/
|
|
233
|
+
getById(id: string, _options?: DeleteOptions): Promise<Document | null>;
|
|
234
|
+
/**
|
|
235
|
+
* Get existing documents by their content hashes.
|
|
236
|
+
* Used by the ingestion pipeline for deduplication.
|
|
237
|
+
*
|
|
238
|
+
* Requires that documents were stored with chunk metadata containing a
|
|
239
|
+
* `hash` field (automatically set by chunkers in this library).
|
|
240
|
+
*
|
|
241
|
+
* @returns Map of hash → document ID for hashes that already exist.
|
|
242
|
+
*/
|
|
243
|
+
getByHashes(hashes: string[], _options?: DeleteOptions): Promise<Map<string, string>>;
|
|
244
|
+
/**
|
|
245
|
+
* Delete the entire OpenSearch index.
|
|
246
|
+
* WARNING: This permanently removes all indexed documents and the mapping.
|
|
247
|
+
*/
|
|
248
|
+
deleteIndex(): Promise<void>;
|
|
249
|
+
/** The OpenSearch index name used by this store. */
|
|
250
|
+
getIndexName(): string;
|
|
251
|
+
/** The configured vector dimensions. */
|
|
252
|
+
getDimensions(): number;
|
|
253
|
+
/** The configured embeddings provider, if any. */
|
|
254
|
+
getEmbeddings(): Embeddings | undefined;
|
|
255
|
+
/** The underlying OpenSearch client instance. */
|
|
256
|
+
getClient(): OpenSearchClient;
|
|
257
|
+
}
|
|
258
|
+
export {};
|
|
259
|
+
//# sourceMappingURL=OpenSearchVectorStore.d.ts.map
|
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* OpenSearch implementation of the VectorStore interface.
|
|
4
|
+
*
|
|
5
|
+
* Uses the OpenSearch k-NN plugin for approximate nearest-neighbour search
|
|
6
|
+
* via HNSW indexing. Supports cosine similarity, L2, and inner product
|
|
7
|
+
* space types.
|
|
8
|
+
*
|
|
9
|
+
* @requires @opensearch-project/opensearch - Install with: npm install @opensearch-project/opensearch
|
|
10
|
+
*/
|
|
11
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
12
|
+
if (k2 === undefined) k2 = k;
|
|
13
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
14
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
15
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
16
|
+
}
|
|
17
|
+
Object.defineProperty(o, k2, desc);
|
|
18
|
+
}) : (function(o, m, k, k2) {
|
|
19
|
+
if (k2 === undefined) k2 = k;
|
|
20
|
+
o[k2] = m[k];
|
|
21
|
+
}));
|
|
22
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
23
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
24
|
+
}) : function(o, v) {
|
|
25
|
+
o["default"] = v;
|
|
26
|
+
});
|
|
27
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
28
|
+
var ownKeys = function(o) {
|
|
29
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
30
|
+
var ar = [];
|
|
31
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
32
|
+
return ar;
|
|
33
|
+
};
|
|
34
|
+
return ownKeys(o);
|
|
35
|
+
};
|
|
36
|
+
return function (mod) {
|
|
37
|
+
if (mod && mod.__esModule) return mod;
|
|
38
|
+
var result = {};
|
|
39
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
40
|
+
__setModuleDefault(result, mod);
|
|
41
|
+
return result;
|
|
42
|
+
};
|
|
43
|
+
})();
|
|
44
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
|
+
exports.OpenSearchVectorStore = void 0;
|
|
46
|
+
const VectorStore_1 = require("./VectorStore");
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// Score normalisation helpers
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
/**
|
|
51
|
+
* Raw OpenSearch k-NN scores are not in the [0, 1] range for all space types.
|
|
52
|
+
* This function normalises them:
|
|
53
|
+
* - cosinesimil: OpenSearch returns `1 + cos(q, d)` → range [0, 2]; divide by 2.
|
|
54
|
+
* - l2: OpenSearch returns `1 / (1 + l2_dist)` → already in (0, 1].
|
|
55
|
+
* - innerproduct: pass through as-is (application-defined interpretation).
|
|
56
|
+
*/
|
|
57
|
+
function normalizeScore(rawScore, spaceType) {
|
|
58
|
+
if (spaceType === "cosinesimil") {
|
|
59
|
+
return rawScore / 2;
|
|
60
|
+
}
|
|
61
|
+
return rawScore;
|
|
62
|
+
}
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// OpenSearchVectorStore
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
/**
|
|
67
|
+
* OpenSearch implementation of the VectorStore interface.
|
|
68
|
+
*
|
|
69
|
+
* Stores documents in an OpenSearch index with a `knn_vector` field and
|
|
70
|
+
* performs approximate nearest-neighbour search using the k-NN plugin (HNSW).
|
|
71
|
+
*
|
|
72
|
+
* **Namespace support**: namespaces are stored as a top-level `namespace`
|
|
73
|
+
* keyword field. All search / delete operations that receive a namespace
|
|
74
|
+
* automatically add a term filter on this field.
|
|
75
|
+
*
|
|
76
|
+
* **Metadata**: stored as a nested `metadata` object with dynamic mapping.
|
|
77
|
+
* Chunk metadata fields produced by the library's chunkers (e.g. `hash`,
|
|
78
|
+
* `prev_id`, `next_id`) live inside `metadata` and are searchable via
|
|
79
|
+
* `metadata.<field>` queries.
|
|
80
|
+
*
|
|
81
|
+
* @example Basic setup with OpenAI embeddings
|
|
82
|
+
* ```typescript
|
|
83
|
+
* import { OpenSearchVectorStore } from "@agentionai/agents/vectorstore";
|
|
84
|
+
* import { OpenAIEmbeddings } from "@agentionai/agents/embeddings";
|
|
85
|
+
*
|
|
86
|
+
* const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-small" });
|
|
87
|
+
*
|
|
88
|
+
* const store = await OpenSearchVectorStore.create({
|
|
89
|
+
* name: "my_store",
|
|
90
|
+
* node: "https://localhost:9200",
|
|
91
|
+
* auth: { username: "admin", password: "admin" },
|
|
92
|
+
* ssl: { rejectUnauthorized: false },
|
|
93
|
+
* indexName: "knowledge_base",
|
|
94
|
+
* embeddings,
|
|
95
|
+
* });
|
|
96
|
+
*
|
|
97
|
+
* await store.addDocuments([
|
|
98
|
+
* { id: "1", content: "OpenSearch is a distributed search engine.", metadata: { source: "docs" } },
|
|
99
|
+
* ]);
|
|
100
|
+
*
|
|
101
|
+
* const results = await store.search("distributed search", { limit: 5 });
|
|
102
|
+
* ```
|
|
103
|
+
*
|
|
104
|
+
* @example Use as an agent retrieval tool
|
|
105
|
+
* ```typescript
|
|
106
|
+
* const searchTool = store.toRetrievalTool("Search product documentation");
|
|
107
|
+
* const agent = new ClaudeAgent({ tools: [searchTool], ... });
|
|
108
|
+
* ```
|
|
109
|
+
*/
|
|
110
|
+
/**
|
|
111
|
+
* Chunk metadata field names always declared explicitly in the mapping.
|
|
112
|
+
* These are produced by the library's chunkers and used internally for
|
|
113
|
+
* deduplication (hash) and chunk navigation (prev_id, next_id).
|
|
114
|
+
*/
|
|
115
|
+
const CHUNK_KEYWORD_FIELDS = new Set([
|
|
116
|
+
"hash", "prev_id", "next_id", "source_id", "source_path", "section",
|
|
117
|
+
]);
|
|
118
|
+
const CHUNK_NUMERIC_FIELDS = new Set([
|
|
119
|
+
"index", "total", "start", "end", "char_count", "token_count", "page",
|
|
120
|
+
]);
|
|
121
|
+
class OpenSearchVectorStore extends VectorStore_1.VectorStore {
|
|
122
|
+
constructor(config, client) {
|
|
123
|
+
super();
|
|
124
|
+
this.name = config.name;
|
|
125
|
+
this.client = client;
|
|
126
|
+
this.indexName = config.indexName;
|
|
127
|
+
this.embeddings = config.embeddings;
|
|
128
|
+
this.dimensions =
|
|
129
|
+
config.dimensions ?? config.embeddings?.dimensions ?? 1536;
|
|
130
|
+
this.spaceType = config.spaceType ?? "cosinesimil";
|
|
131
|
+
this.engine = config.engine ?? "lucene";
|
|
132
|
+
this.efSearch = config.efSearch ?? 512;
|
|
133
|
+
this.efConstruction = config.efConstruction ?? 512;
|
|
134
|
+
this.m = config.m ?? 16;
|
|
135
|
+
this.metadataFields = config.metadataFields;
|
|
136
|
+
// Build the set of field names that are explicitly mapped as keyword.
|
|
137
|
+
// Used by the filter builder to decide whether to append ".keyword".
|
|
138
|
+
this.keywordFields = new Set(CHUNK_KEYWORD_FIELDS);
|
|
139
|
+
for (const field of config.metadataFields ?? []) {
|
|
140
|
+
if (field.type === "string") {
|
|
141
|
+
this.keywordFields.add(field.name);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Create a new OpenSearchVectorStore instance.
|
|
147
|
+
*
|
|
148
|
+
* Connects to the given OpenSearch node and creates the index (with k-NN
|
|
149
|
+
* mapping) if it does not already exist.
|
|
150
|
+
*
|
|
151
|
+
* @param config - Store configuration
|
|
152
|
+
* @returns A ready-to-use OpenSearchVectorStore instance
|
|
153
|
+
* @throws Error if `@opensearch-project/opensearch` is not installed
|
|
154
|
+
*/
|
|
155
|
+
static async create(config) {
|
|
156
|
+
let ClientCtor;
|
|
157
|
+
try {
|
|
158
|
+
// Use a variable so TypeScript does not attempt static module resolution
|
|
159
|
+
// for this optional peer dependency.
|
|
160
|
+
const pkgName = "@opensearch-project/opensearch";
|
|
161
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
162
|
+
const mod = (await Promise.resolve(`${pkgName}`).then(s => __importStar(require(s))));
|
|
163
|
+
ClientCtor = mod.Client;
|
|
164
|
+
}
|
|
165
|
+
catch {
|
|
166
|
+
throw new Error("@opensearch-project/opensearch is not installed. " +
|
|
167
|
+
"Install it with: npm install @opensearch-project/opensearch");
|
|
168
|
+
}
|
|
169
|
+
const clientCfg = { node: config.node };
|
|
170
|
+
if (config.auth)
|
|
171
|
+
clientCfg.auth = config.auth;
|
|
172
|
+
if (config.ssl)
|
|
173
|
+
clientCfg.ssl = config.ssl;
|
|
174
|
+
const client = new ClientCtor(clientCfg);
|
|
175
|
+
const store = new OpenSearchVectorStore(config, client);
|
|
176
|
+
await store.ensureIndex();
|
|
177
|
+
return store;
|
|
178
|
+
}
|
|
179
|
+
// -------------------------------------------------------------------------
|
|
180
|
+
// Index management
|
|
181
|
+
// -------------------------------------------------------------------------
|
|
182
|
+
/**
|
|
183
|
+
* Create the k-NN index if it does not already exist.
|
|
184
|
+
*
|
|
185
|
+
* The `metadata` object always includes explicit mappings for chunk metadata
|
|
186
|
+
* fields (hash, prev_id, etc.) so they work correctly in term queries.
|
|
187
|
+
* Any user-declared `metadataFields` are also mapped with proper types.
|
|
188
|
+
* All other metadata fields fall back to dynamic mapping.
|
|
189
|
+
*/
|
|
190
|
+
async ensureIndex() {
|
|
191
|
+
const { body: exists } = await this.client.indices.exists({
|
|
192
|
+
index: this.indexName,
|
|
193
|
+
});
|
|
194
|
+
if (exists)
|
|
195
|
+
return;
|
|
196
|
+
// Build explicit sub-properties for the metadata object.
|
|
197
|
+
const metadataProperties = {};
|
|
198
|
+
// Chunk metadata fields — always declared with correct types.
|
|
199
|
+
for (const field of CHUNK_KEYWORD_FIELDS) {
|
|
200
|
+
metadataProperties[field] = { type: "keyword" };
|
|
201
|
+
}
|
|
202
|
+
for (const field of CHUNK_NUMERIC_FIELDS) {
|
|
203
|
+
metadataProperties[field] = { type: "integer" };
|
|
204
|
+
}
|
|
205
|
+
// User-declared metadata fields.
|
|
206
|
+
for (const field of this.metadataFields ?? []) {
|
|
207
|
+
if (field.type === "number") {
|
|
208
|
+
metadataProperties[field.name] = { type: "double" };
|
|
209
|
+
}
|
|
210
|
+
else if (field.type === "boolean") {
|
|
211
|
+
metadataProperties[field.name] = { type: "boolean" };
|
|
212
|
+
}
|
|
213
|
+
else {
|
|
214
|
+
metadataProperties[field.name] = { type: "keyword" };
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
await this.client.indices.create({
|
|
218
|
+
index: this.indexName,
|
|
219
|
+
body: {
|
|
220
|
+
settings: {
|
|
221
|
+
index: {
|
|
222
|
+
knn: true,
|
|
223
|
+
"knn.algo_param.ef_search": this.efSearch,
|
|
224
|
+
},
|
|
225
|
+
},
|
|
226
|
+
mappings: {
|
|
227
|
+
properties: {
|
|
228
|
+
id: { type: "keyword" },
|
|
229
|
+
content: { type: "text" },
|
|
230
|
+
embedding: {
|
|
231
|
+
type: "knn_vector",
|
|
232
|
+
dimension: this.dimensions,
|
|
233
|
+
method: {
|
|
234
|
+
name: "hnsw",
|
|
235
|
+
space_type: this.spaceType,
|
|
236
|
+
engine: this.engine,
|
|
237
|
+
parameters: {
|
|
238
|
+
ef_construction: this.efConstruction,
|
|
239
|
+
m: this.m,
|
|
240
|
+
},
|
|
241
|
+
},
|
|
242
|
+
},
|
|
243
|
+
namespace: { type: "keyword" },
|
|
244
|
+
metadata: {
|
|
245
|
+
type: "object",
|
|
246
|
+
dynamic: true, // undeclared fields still work via dynamic mapping
|
|
247
|
+
properties: metadataProperties,
|
|
248
|
+
},
|
|
249
|
+
},
|
|
250
|
+
},
|
|
251
|
+
},
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
// -------------------------------------------------------------------------
|
|
255
|
+
// VectorStore abstract method implementations
|
|
256
|
+
// -------------------------------------------------------------------------
|
|
257
|
+
/**
|
|
258
|
+
* Add documents to the store.
|
|
259
|
+
* Embeddings are generated automatically using the configured provider.
|
|
260
|
+
*/
|
|
261
|
+
async addDocuments(documents, options) {
|
|
262
|
+
if (!this.embeddings) {
|
|
263
|
+
throw new Error("No embeddings provider configured. " +
|
|
264
|
+
"Use addEmbeddedDocuments() with pre-computed embeddings, " +
|
|
265
|
+
"or pass an embeddings provider in the config.");
|
|
266
|
+
}
|
|
267
|
+
const texts = documents.map((d) => d.content);
|
|
268
|
+
const vectors = await this.embeddings.embed(texts);
|
|
269
|
+
const embedded = documents.map((doc, i) => ({
|
|
270
|
+
...doc,
|
|
271
|
+
embedding: vectors[i],
|
|
272
|
+
}));
|
|
273
|
+
return this.addEmbeddedDocuments(embedded, options);
|
|
274
|
+
}
|
|
275
|
+
/**
|
|
276
|
+
* Add documents with pre-computed embeddings.
|
|
277
|
+
* Uses OpenSearch bulk API for efficiency.
|
|
278
|
+
*/
|
|
279
|
+
async addEmbeddedDocuments(documents, options) {
|
|
280
|
+
if (documents.length === 0)
|
|
281
|
+
return [];
|
|
282
|
+
const namespace = options?.namespace;
|
|
283
|
+
// Flatten documents into bulk request body
|
|
284
|
+
const body = [];
|
|
285
|
+
for (const doc of documents) {
|
|
286
|
+
body.push({ index: { _index: this.indexName, _id: doc.id } });
|
|
287
|
+
const osDoc = {
|
|
288
|
+
id: doc.id,
|
|
289
|
+
content: doc.content,
|
|
290
|
+
embedding: doc.embedding,
|
|
291
|
+
metadata: doc.metadata,
|
|
292
|
+
};
|
|
293
|
+
if (namespace)
|
|
294
|
+
osDoc.namespace = namespace;
|
|
295
|
+
body.push(osDoc);
|
|
296
|
+
}
|
|
297
|
+
await this.client.bulk({ body, refresh: true });
|
|
298
|
+
return documents.map((d) => d.id);
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Search for documents similar to the query text.
|
|
302
|
+
* The query is embedded automatically using the configured embeddings provider.
|
|
303
|
+
*/
|
|
304
|
+
async search(query, options) {
|
|
305
|
+
if (!this.embeddings) {
|
|
306
|
+
throw new Error("No embeddings provider configured. " +
|
|
307
|
+
"Use searchByVector() with a pre-computed query embedding, " +
|
|
308
|
+
"or pass an embeddings provider in the config.");
|
|
309
|
+
}
|
|
310
|
+
const queryVector = await this.embeddings.embedQuery(query);
|
|
311
|
+
return this.searchByVector(queryVector, options);
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Search using a pre-computed embedding vector.
|
|
315
|
+
* Executes a k-NN query against the OpenSearch index.
|
|
316
|
+
*/
|
|
317
|
+
async searchByVector(embedding, options) {
|
|
318
|
+
const limit = options?.limit ?? 10;
|
|
319
|
+
const scoreThreshold = options?.scoreThreshold;
|
|
320
|
+
const namespace = options?.namespace;
|
|
321
|
+
const filter = options?.filter;
|
|
322
|
+
// Build bool filters for namespace and metadata
|
|
323
|
+
const filters = [];
|
|
324
|
+
if (namespace) {
|
|
325
|
+
filters.push({ term: { namespace } });
|
|
326
|
+
}
|
|
327
|
+
if (filter) {
|
|
328
|
+
for (const [key, value] of Object.entries(filter)) {
|
|
329
|
+
// Declared keyword fields can be queried directly.
|
|
330
|
+
// Undeclared string values are dynamically mapped as text+keyword;
|
|
331
|
+
// append ".keyword" to target the exact-match sub-field.
|
|
332
|
+
const isUndeclaredString = typeof value === "string" && !this.keywordFields.has(key);
|
|
333
|
+
const fieldPath = isUndeclaredString
|
|
334
|
+
? `metadata.${key}.keyword`
|
|
335
|
+
: `metadata.${key}`;
|
|
336
|
+
filters.push({ term: { [fieldPath]: value } });
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
const knnClause = {
|
|
340
|
+
embedding: { vector: embedding, k: limit },
|
|
341
|
+
};
|
|
342
|
+
const queryBody = filters.length > 0
|
|
343
|
+
? {
|
|
344
|
+
bool: {
|
|
345
|
+
must: [{ knn: knnClause }],
|
|
346
|
+
filter: filters,
|
|
347
|
+
},
|
|
348
|
+
}
|
|
349
|
+
: { knn: knnClause };
|
|
350
|
+
const response = await this.client.search({
|
|
351
|
+
index: this.indexName,
|
|
352
|
+
body: { size: limit, query: queryBody },
|
|
353
|
+
});
|
|
354
|
+
const hits = response.body.hits?.hits ?? [];
|
|
355
|
+
const results = [];
|
|
356
|
+
for (const hit of hits) {
|
|
357
|
+
const score = normalizeScore(hit._score, this.spaceType);
|
|
358
|
+
if (scoreThreshold !== undefined && score < scoreThreshold)
|
|
359
|
+
continue;
|
|
360
|
+
results.push({
|
|
361
|
+
document: {
|
|
362
|
+
id: hit._source.id,
|
|
363
|
+
content: hit._source.content,
|
|
364
|
+
metadata: hit._source.metadata,
|
|
365
|
+
},
|
|
366
|
+
score,
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
return results;
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Delete documents by their IDs.
|
|
373
|
+
* @returns Number of documents actually deleted.
|
|
374
|
+
*/
|
|
375
|
+
async delete(ids, _options) {
|
|
376
|
+
if (ids.length === 0)
|
|
377
|
+
return 0;
|
|
378
|
+
const body = ids.map((id) => ({
|
|
379
|
+
delete: { _index: this.indexName, _id: id },
|
|
380
|
+
}));
|
|
381
|
+
const response = await this.client.bulk({ body, refresh: true });
|
|
382
|
+
return (response.body.items ?? []).filter((item) => item.delete?.result === "deleted").length;
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Delete all documents, optionally scoped to a namespace.
|
|
386
|
+
*/
|
|
387
|
+
async clear(options) {
|
|
388
|
+
const namespace = options?.namespace;
|
|
389
|
+
const queryBody = namespace
|
|
390
|
+
? { query: { term: { namespace } } }
|
|
391
|
+
: { query: { match_all: {} } };
|
|
392
|
+
await this.client.deleteByQuery({
|
|
393
|
+
index: this.indexName,
|
|
394
|
+
body: queryBody,
|
|
395
|
+
refresh: true,
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
/**
|
|
399
|
+
* Retrieve a document by its ID.
|
|
400
|
+
* @returns The document, or `null` if not found.
|
|
401
|
+
*/
|
|
402
|
+
async getById(id, _options) {
|
|
403
|
+
try {
|
|
404
|
+
const response = await this.client.get({
|
|
405
|
+
index: this.indexName,
|
|
406
|
+
id,
|
|
407
|
+
});
|
|
408
|
+
if (!response.body.found)
|
|
409
|
+
return null;
|
|
410
|
+
const src = response.body._source;
|
|
411
|
+
return {
|
|
412
|
+
id: src.id,
|
|
413
|
+
content: src.content,
|
|
414
|
+
metadata: src.metadata,
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
catch (err) {
|
|
418
|
+
if (err.statusCode === 404)
|
|
419
|
+
return null;
|
|
420
|
+
throw err;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
/**
|
|
424
|
+
* Get existing documents by their content hashes.
|
|
425
|
+
* Used by the ingestion pipeline for deduplication.
|
|
426
|
+
*
|
|
427
|
+
* Requires that documents were stored with chunk metadata containing a
|
|
428
|
+
* `hash` field (automatically set by chunkers in this library).
|
|
429
|
+
*
|
|
430
|
+
* @returns Map of hash → document ID for hashes that already exist.
|
|
431
|
+
*/
|
|
432
|
+
async getByHashes(hashes, _options) {
|
|
433
|
+
const hashMap = new Map();
|
|
434
|
+
if (hashes.length === 0)
|
|
435
|
+
return hashMap;
|
|
436
|
+
const response = await this.client.search({
|
|
437
|
+
index: this.indexName,
|
|
438
|
+
body: {
|
|
439
|
+
size: hashes.length,
|
|
440
|
+
query: { terms: { "metadata.hash": hashes } },
|
|
441
|
+
_source: ["id", "metadata.hash"],
|
|
442
|
+
},
|
|
443
|
+
});
|
|
444
|
+
for (const hit of response.body.hits?.hits ?? []) {
|
|
445
|
+
const hash = hit._source.metadata
|
|
446
|
+
?.hash;
|
|
447
|
+
if (hash) {
|
|
448
|
+
hashMap.set(hash, hit._source.id);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
return hashMap;
|
|
452
|
+
}
|
|
453
|
+
// -------------------------------------------------------------------------
|
|
454
|
+
// OpenSearch-specific accessors
|
|
455
|
+
// -------------------------------------------------------------------------
|
|
456
|
+
/**
|
|
457
|
+
* Delete the entire OpenSearch index.
|
|
458
|
+
* WARNING: This permanently removes all indexed documents and the mapping.
|
|
459
|
+
*/
|
|
460
|
+
async deleteIndex() {
|
|
461
|
+
await this.client.indices.delete({ index: this.indexName });
|
|
462
|
+
}
|
|
463
|
+
/** The OpenSearch index name used by this store. */
|
|
464
|
+
getIndexName() {
|
|
465
|
+
return this.indexName;
|
|
466
|
+
}
|
|
467
|
+
/** The configured vector dimensions. */
|
|
468
|
+
getDimensions() {
|
|
469
|
+
return this.dimensions;
|
|
470
|
+
}
|
|
471
|
+
/** The configured embeddings provider, if any. */
|
|
472
|
+
getEmbeddings() {
|
|
473
|
+
return this.embeddings;
|
|
474
|
+
}
|
|
475
|
+
/** The underlying OpenSearch client instance. */
|
|
476
|
+
getClient() {
|
|
477
|
+
return this.client;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
exports.OpenSearchVectorStore = OpenSearchVectorStore;
|
|
481
|
+
//# sourceMappingURL=OpenSearchVectorStore.js.map
|
|
@@ -5,6 +5,31 @@
|
|
|
5
5
|
* enabling document storage with embeddings and semantic search capabilities.
|
|
6
6
|
*/
|
|
7
7
|
import { Tool } from "../tools/Tool";
|
|
8
|
+
/**
|
|
9
|
+
* Supported types for metadata field definitions.
|
|
10
|
+
*/
|
|
11
|
+
export type MetadataFieldType = "string" | "number" | "boolean";
|
|
12
|
+
/**
|
|
13
|
+
* Definition for a typed metadata field.
|
|
14
|
+
* Used by vector store implementations to declare explicit field types
|
|
15
|
+
* for metadata properties, enabling correct indexing and filtering.
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* const fields: MetadataFieldDefinition[] = [
|
|
20
|
+
* { name: "source", type: "string" },
|
|
21
|
+
* { name: "page", type: "number" },
|
|
22
|
+
* ];
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
export interface MetadataFieldDefinition {
|
|
26
|
+
/** Name of the metadata field. Use snake_case (e.g. `tenant_id`). */
|
|
27
|
+
name: string;
|
|
28
|
+
/** Data type for the field */
|
|
29
|
+
type: MetadataFieldType;
|
|
30
|
+
/** Whether the field can be null (default: true) */
|
|
31
|
+
nullable?: boolean;
|
|
32
|
+
}
|
|
8
33
|
/**
|
|
9
34
|
* Represents a document with its content and optional metadata.
|
|
10
35
|
*/
|
|
@@ -22,8 +22,9 @@
|
|
|
22
22
|
* const addTool = store.toAddDocumentsTool("Save new documents");
|
|
23
23
|
* ```
|
|
24
24
|
*/
|
|
25
|
-
export { VectorStore, Document, EmbeddedDocument, SearchResult, AddDocumentsOptions, SearchOptions, DeleteOptions, RetrievalToolOptions, AddDocumentsToolOptions, } from "./VectorStore";
|
|
26
|
-
export { LanceDBVectorStore, LanceDBVectorStoreConfig,
|
|
25
|
+
export { VectorStore, Document, EmbeddedDocument, SearchResult, AddDocumentsOptions, SearchOptions, DeleteOptions, RetrievalToolOptions, AddDocumentsToolOptions, MetadataFieldType, MetadataFieldDefinition, } from "./VectorStore";
|
|
26
|
+
export { LanceDBVectorStore, LanceDBVectorStoreConfig, } from "./LanceDBVectorStore";
|
|
27
|
+
export { OpenSearchVectorStore, OpenSearchVectorStoreConfig, OpenSearchSpaceType, OpenSearchKnnEngine, } from "./OpenSearchVectorStore";
|
|
27
28
|
export { Embeddings, EmbeddingOptions } from "../embeddings/Embeddings";
|
|
28
29
|
export { OpenAIEmbeddings, OpenAIEmbeddingsConfig, OpenAIEmbeddingModel, } from "../embeddings/OpenAIEmbeddings";
|
|
29
30
|
export { VoyageAIEmbeddings, VoyageAIEmbeddingsConfig, VoyageAIEmbeddingModel, VoyageAIMultimodalModel, } from "../embeddings/VoyageAIEmbeddings";
|
|
@@ -24,11 +24,13 @@
|
|
|
24
24
|
* ```
|
|
25
25
|
*/
|
|
26
26
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
27
|
-
exports.VoyageAIEmbeddings = exports.OpenAIEmbeddings = exports.Embeddings = exports.LanceDBVectorStore = exports.VectorStore = void 0;
|
|
27
|
+
exports.VoyageAIEmbeddings = exports.OpenAIEmbeddings = exports.Embeddings = exports.OpenSearchVectorStore = exports.LanceDBVectorStore = exports.VectorStore = void 0;
|
|
28
28
|
var VectorStore_1 = require("./VectorStore");
|
|
29
29
|
Object.defineProperty(exports, "VectorStore", { enumerable: true, get: function () { return VectorStore_1.VectorStore; } });
|
|
30
30
|
var LanceDBVectorStore_1 = require("./LanceDBVectorStore");
|
|
31
31
|
Object.defineProperty(exports, "LanceDBVectorStore", { enumerable: true, get: function () { return LanceDBVectorStore_1.LanceDBVectorStore; } });
|
|
32
|
+
var OpenSearchVectorStore_1 = require("./OpenSearchVectorStore");
|
|
33
|
+
Object.defineProperty(exports, "OpenSearchVectorStore", { enumerable: true, get: function () { return OpenSearchVectorStore_1.OpenSearchVectorStore; } });
|
|
32
34
|
// Re-export embeddings for backward compatibility
|
|
33
35
|
var Embeddings_1 = require("../embeddings/Embeddings");
|
|
34
36
|
Object.defineProperty(exports, "Embeddings", { enumerable: true, get: function () { return Embeddings_1.Embeddings; } });
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentionai/agents",
|
|
3
3
|
"author": "Laurent Zuijdwijk",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.11.0",
|
|
5
5
|
"description": "Agent Library",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
@@ -167,9 +167,13 @@
|
|
|
167
167
|
},
|
|
168
168
|
"voyageai": {
|
|
169
169
|
"optional": true
|
|
170
|
+
},
|
|
171
|
+
"@opensearch-project/opensearch": {
|
|
172
|
+
"optional": true
|
|
170
173
|
}
|
|
171
174
|
},
|
|
172
175
|
"dependencies": {
|
|
176
|
+
"@opensearch-project/opensearch": "^3.5.1",
|
|
173
177
|
"tokenx": "^1.2.1"
|
|
174
178
|
}
|
|
175
179
|
}
|