@galdor/memory-s3vectors 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -0
- package/dist/index.d.ts +172 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +358 -0
- package/dist/index.js.map +1 -0
- package/package.json +37 -0
- package/src/index.ts +470 -0
- package/src/s3vectors.test.ts +241 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,470 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `@galdor/memory-s3vectors` — an Amazon S3 Vectors long-term memory store.
|
|
3
|
+
*
|
|
4
|
+
* Implements the core {@link Store} interface from `@galdor/core/memory` over the
|
|
5
|
+
* Amazon S3 Vectors API, so it drops in behind a {@link Retriever} exactly like
|
|
6
|
+
* the bundled `InMemoryStore` — but persists vectors in an S3 Vectors index.
|
|
7
|
+
*
|
|
8
|
+
* Credentials are resolved by the AWS SDK's default provider chain (environment
|
|
9
|
+
* variables → shared `~/.aws` config → ECS container credentials → EC2 IMDS /
|
|
10
|
+
* task role); no static keys are accepted here. The index is created on first
|
|
11
|
+
* use if it does not already exist.
|
|
12
|
+
*
|
|
13
|
+
* Construct one with {@link openS3Vectors}, which probes (and auto-creates) the
|
|
14
|
+
* index before returning a ready store.
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```ts
|
|
18
|
+
* const store = await openS3Vectors({ bucket: "my-vectors", index: "galdor-chunks", region: "us-east-1", dim: 1024 });
|
|
19
|
+
* await store.add([{ id: "c1", documentId: "d1", index: 0, text: "…", embedding: vec, metadata: { lang: "es" } }]);
|
|
20
|
+
* const hits = await store.retrieve({ embedding: queryVec, k: 5, filter: { lang: "es" } });
|
|
21
|
+
* await store.close();
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import {
|
|
26
|
+
CreateIndexCommand,
|
|
27
|
+
DataType,
|
|
28
|
+
DeleteVectorsCommand,
|
|
29
|
+
DistanceMetric,
|
|
30
|
+
GetIndexCommand,
|
|
31
|
+
ListVectorsCommand,
|
|
32
|
+
PutVectorsCommand,
|
|
33
|
+
type PutVectorsCommandInput,
|
|
34
|
+
QueryVectorsCommand,
|
|
35
|
+
type QueryVectorsCommandInput,
|
|
36
|
+
S3VectorsClient,
|
|
37
|
+
} from "@aws-sdk/client-s3vectors";
|
|
38
|
+
import type { Chunk, Query, Result, Store } from "@galdor/core/memory";
|
|
39
|
+
|
|
40
|
+
// ── Reserved metadata keys (stable names for cross-store compatibility) ──
|
|
41
|
+
const META_DOCUMENT_ID = "__document_id";
|
|
42
|
+
const META_INDEX = "__index";
|
|
43
|
+
const META_TEXT = "__text";
|
|
44
|
+
const RESERVED_PREFIX = "__";
|
|
45
|
+
|
|
46
|
+
/** S3 Vectors batch/page limits. */
|
|
47
|
+
const MAX_PUT_BATCH = 500;
|
|
48
|
+
const MAX_DELETE_BATCH = 500;
|
|
49
|
+
const LIST_PAGE_SIZE = 1000;
|
|
50
|
+
|
|
51
|
+
const DEFAULT_INDEX = "galdor-chunks";
|
|
52
|
+
const DEFAULT_K = 5;
|
|
53
|
+
|
|
54
|
+
/** A JSON metadata document, as the S3 Vectors API models it. */
|
|
55
|
+
type Metadata = Record<string, unknown>;
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* The subset of the S3 Vectors API this store uses. Extracted as an interface so
|
|
59
|
+
* the store can be unit-tested against a fake, without a live AWS account.
|
|
60
|
+
*/
|
|
61
|
+
export interface S3VectorsAPI {
|
|
62
|
+
getIndex(input: { vectorBucketName: string; indexName: string }): Promise<{ index?: { dimension?: number } }>;
|
|
63
|
+
createIndex(input: {
|
|
64
|
+
vectorBucketName: string;
|
|
65
|
+
indexName: string;
|
|
66
|
+
dataType: DataType;
|
|
67
|
+
dimension: number;
|
|
68
|
+
distanceMetric: DistanceMetric;
|
|
69
|
+
metadataConfiguration?: { nonFilterableMetadataKeys: string[] };
|
|
70
|
+
}): Promise<unknown>;
|
|
71
|
+
putVectors(
|
|
72
|
+
input: {
|
|
73
|
+
vectorBucketName: string;
|
|
74
|
+
indexName: string;
|
|
75
|
+
vectors: { key: string; data: { float32: number[] }; metadata?: Metadata }[];
|
|
76
|
+
},
|
|
77
|
+
opts?: SendOptions,
|
|
78
|
+
): Promise<unknown>;
|
|
79
|
+
queryVectors(
|
|
80
|
+
input: {
|
|
81
|
+
vectorBucketName: string;
|
|
82
|
+
indexName: string;
|
|
83
|
+
topK: number;
|
|
84
|
+
queryVector: { float32: number[] };
|
|
85
|
+
returnMetadata: boolean;
|
|
86
|
+
returnDistance: boolean;
|
|
87
|
+
filter?: Metadata;
|
|
88
|
+
nextToken?: string;
|
|
89
|
+
},
|
|
90
|
+
opts?: SendOptions,
|
|
91
|
+
): Promise<{
|
|
92
|
+
vectors?: { key?: string; distance?: number; metadata?: Metadata }[];
|
|
93
|
+
distanceMetric?: DistanceMetric;
|
|
94
|
+
nextToken?: string;
|
|
95
|
+
}>;
|
|
96
|
+
listVectors(
|
|
97
|
+
input: {
|
|
98
|
+
vectorBucketName: string;
|
|
99
|
+
indexName: string;
|
|
100
|
+
returnMetadata: boolean;
|
|
101
|
+
maxResults: number;
|
|
102
|
+
nextToken?: string;
|
|
103
|
+
},
|
|
104
|
+
opts?: SendOptions,
|
|
105
|
+
): Promise<{ vectors?: { key?: string; metadata?: Metadata }[]; nextToken?: string }>;
|
|
106
|
+
deleteVectors(input: { vectorBucketName: string; indexName: string; keys: string[] }, opts?: SendOptions): Promise<unknown>;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** Per-call options forwarded to the AWS SDK; carries the caller's abort signal. */
|
|
110
|
+
export interface SendOptions {
|
|
111
|
+
abortSignal?: AbortSignal;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/** Construction options for {@link openS3Vectors}. */
|
|
115
|
+
export interface Config {
|
|
116
|
+
/** S3 Vectors bucket name. The bucket must already exist. Required. */
|
|
117
|
+
bucket: string;
|
|
118
|
+
/** Index name within the bucket. Defaults to `"galdor-chunks"`. Created if missing. */
|
|
119
|
+
index?: string;
|
|
120
|
+
/** AWS region. When empty, resolved from the default AWS chain. */
|
|
121
|
+
region?: string;
|
|
122
|
+
/** Embedding dimensionality, e.g. `1024`. Required and must be > 0. */
|
|
123
|
+
dim: number;
|
|
124
|
+
/** Distance metric used when creating the index. Defaults to `"cosine"`. Ignored if the index exists. */
|
|
125
|
+
distance?: DistanceMetric;
|
|
126
|
+
/** Inject a custom API implementation (for tests). Defaults to a real {@link S3VectorsClient}. */
|
|
127
|
+
api?: S3VectorsAPI;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/** Wrap a real {@link S3VectorsClient} as the {@link S3VectorsAPI} the store uses. */
|
|
131
|
+
function clientAPI(client: S3VectorsClient): S3VectorsAPI {
|
|
132
|
+
// Our metadata/filter are plain JSON objects; the SDK models them as the
|
|
133
|
+
// recursive `DocumentType`. Cast at this single boundary rather than leak the
|
|
134
|
+
// SDK's document type into the store's own contract.
|
|
135
|
+
return {
|
|
136
|
+
getIndex: (i) => client.send(new GetIndexCommand(i)),
|
|
137
|
+
createIndex: (i) => client.send(new CreateIndexCommand(i)),
|
|
138
|
+
putVectors: (i, o) => client.send(new PutVectorsCommand(i as unknown as PutVectorsCommandInput), o),
|
|
139
|
+
queryVectors: (i, o) => client.send(new QueryVectorsCommand(i as unknown as QueryVectorsCommandInput), o),
|
|
140
|
+
listVectors: (i, o) => client.send(new ListVectorsCommand(i), o),
|
|
141
|
+
deleteVectors: (i, o) => client.send(new DeleteVectorsCommand(i), o),
|
|
142
|
+
} as S3VectorsAPI;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Open an S3 Vectors-backed {@link Store}, probing and auto-creating the index.
|
|
147
|
+
*
|
|
148
|
+
* @param cfg - The {@link Config}.
|
|
149
|
+
* @returns A ready store.
|
|
150
|
+
* @throws {Error} When `bucket` is empty, `dim` is not positive, or the index
|
|
151
|
+
* exists with a different dimension.
|
|
152
|
+
*/
|
|
153
|
+
export async function openS3Vectors(cfg: Config): Promise<S3VectorsStore> {
|
|
154
|
+
if (!cfg.bucket || cfg.bucket.trim() === "") throw new Error("memory/s3vectors: bucket is required");
|
|
155
|
+
if (!Number.isInteger(cfg.dim) || cfg.dim <= 0) throw new Error("memory/s3vectors: dim must be a positive integer");
|
|
156
|
+
const index = cfg.index && cfg.index.trim() !== "" ? cfg.index : DEFAULT_INDEX;
|
|
157
|
+
validateIndexName(index);
|
|
158
|
+
const distance = cfg.distance ?? DistanceMetric.COSINE;
|
|
159
|
+
|
|
160
|
+
const ownsClient = cfg.api === undefined;
|
|
161
|
+
const client = ownsClient ? new S3VectorsClient(cfg.region ? { region: cfg.region } : {}) : undefined;
|
|
162
|
+
const api = cfg.api ?? clientAPI(client!);
|
|
163
|
+
|
|
164
|
+
const store = new S3VectorsStore(api, cfg.bucket, index, cfg.dim, distance, client);
|
|
165
|
+
await store.ensureIndex();
|
|
166
|
+
return store;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* A {@link Store} backed by an Amazon S3 Vectors index.
|
|
171
|
+
*
|
|
172
|
+
* Prefer {@link openS3Vectors} to construct one; the constructor is exported for
|
|
173
|
+
* advanced use and testing.
|
|
174
|
+
*/
|
|
175
|
+
export class S3VectorsStore implements Store {
|
|
176
|
+
readonly #api: S3VectorsAPI;
|
|
177
|
+
readonly #bucket: string;
|
|
178
|
+
readonly #index: string;
|
|
179
|
+
readonly #dim: number;
|
|
180
|
+
readonly #distance: DistanceMetric;
|
|
181
|
+
readonly #client?: S3VectorsClient;
|
|
182
|
+
|
|
183
|
+
constructor(
|
|
184
|
+
api: S3VectorsAPI,
|
|
185
|
+
bucket: string,
|
|
186
|
+
index: string,
|
|
187
|
+
dim: number,
|
|
188
|
+
distance: DistanceMetric,
|
|
189
|
+
client?: S3VectorsClient,
|
|
190
|
+
) {
|
|
191
|
+
this.#api = api;
|
|
192
|
+
this.#bucket = bucket;
|
|
193
|
+
this.#index = index;
|
|
194
|
+
this.#dim = dim;
|
|
195
|
+
this.#distance = distance;
|
|
196
|
+
if (client) this.#client = client;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/** Probe the index; create it (with this store's dim + metric) if it is missing. */
|
|
200
|
+
async ensureIndex(): Promise<void> {
|
|
201
|
+
try {
|
|
202
|
+
const out = await this.#api.getIndex({ vectorBucketName: this.#bucket, indexName: this.#index });
|
|
203
|
+
const got = out.index?.dimension;
|
|
204
|
+
if (typeof got === "number" && got !== this.#dim) {
|
|
205
|
+
throw new Error(`memory/s3vectors: index "${this.#index}" is ${got}-dim; Config.dim is ${this.#dim}`);
|
|
206
|
+
}
|
|
207
|
+
return;
|
|
208
|
+
} catch (err) {
|
|
209
|
+
if (!isNotFound(err)) throw err;
|
|
210
|
+
}
|
|
211
|
+
// Index missing: create it. `__text` is large and not useful as a filter, so
|
|
212
|
+
// it is declared non-filterable.
|
|
213
|
+
await this.#api.createIndex({
|
|
214
|
+
vectorBucketName: this.#bucket,
|
|
215
|
+
indexName: this.#index,
|
|
216
|
+
dataType: DataType.FLOAT32,
|
|
217
|
+
dimension: this.#dim,
|
|
218
|
+
distanceMetric: this.#distance,
|
|
219
|
+
metadataConfiguration: { nonFilterableMetadataKeys: [META_TEXT] },
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Upsert chunks, keyed by their `id` (idempotent). Each chunk's embedding is
|
|
225
|
+
* stored as the vector; `documentId`, `index`, `text` and any chunk metadata
|
|
226
|
+
* are stored as queryable/returnable metadata.
|
|
227
|
+
*
|
|
228
|
+
* @throws {Error} When a chunk has an empty `id`, a missing/mismatched-dimension
|
|
229
|
+
* embedding, or metadata using the reserved `__` prefix.
|
|
230
|
+
*/
|
|
231
|
+
async add(chunks: Chunk[], signal?: AbortSignal): Promise<void> {
|
|
232
|
+
if (chunks.length === 0) return;
|
|
233
|
+
const vectors = chunks.map((c) => {
|
|
234
|
+
if (c.id === "") throw new Error("memory/s3vectors: Chunk.id is empty (caller must assign ids)");
|
|
235
|
+
if (c.embedding === undefined || c.embedding.length !== this.#dim) {
|
|
236
|
+
throw new Error(
|
|
237
|
+
`memory/s3vectors: chunk "${c.id}" has ${c.embedding?.length ?? 0}-dim embedding; index is ${this.#dim}-dim`,
|
|
238
|
+
);
|
|
239
|
+
}
|
|
240
|
+
return { key: c.id, data: { float32: [...c.embedding] }, metadata: buildMetadata(c) };
|
|
241
|
+
});
|
|
242
|
+
for (let start = 0; start < vectors.length; start += MAX_PUT_BATCH) {
|
|
243
|
+
await this.#api.putVectors(
|
|
244
|
+
{
|
|
245
|
+
vectorBucketName: this.#bucket,
|
|
246
|
+
indexName: this.#index,
|
|
247
|
+
vectors: vectors.slice(start, start + MAX_PUT_BATCH),
|
|
248
|
+
},
|
|
249
|
+
sendOpts(signal),
|
|
250
|
+
);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Return the top-K nearest chunks by the query embedding, in descending
|
|
256
|
+
* relevance. `query.filter` is applied as an exact-match AND over metadata.
|
|
257
|
+
*
|
|
258
|
+
* @throws {Error} When `query.embedding` is absent or its dimension mismatches.
|
|
259
|
+
*/
|
|
260
|
+
async retrieve(query: Query, signal?: AbortSignal): Promise<Result[]> {
|
|
261
|
+
if (query.embedding === undefined || query.embedding.length === 0) {
|
|
262
|
+
throw new Error("memory/s3vectors: Query.embedding is required (this backend is vector-only)");
|
|
263
|
+
}
|
|
264
|
+
if (query.embedding.length !== this.#dim) {
|
|
265
|
+
throw new Error(`memory/s3vectors: query has ${query.embedding.length}-dim embedding; index is ${this.#dim}-dim`);
|
|
266
|
+
}
|
|
267
|
+
let k = query.k ?? 0;
|
|
268
|
+
if (k <= 0) k = DEFAULT_K;
|
|
269
|
+
|
|
270
|
+
const filter = buildFilter(query.filter);
|
|
271
|
+
const results: Result[] = [];
|
|
272
|
+
let nextToken: string | undefined;
|
|
273
|
+
for (;;) {
|
|
274
|
+
const out = await this.#api.queryVectors(
|
|
275
|
+
{
|
|
276
|
+
vectorBucketName: this.#bucket,
|
|
277
|
+
indexName: this.#index,
|
|
278
|
+
topK: k,
|
|
279
|
+
queryVector: { float32: [...query.embedding] },
|
|
280
|
+
returnMetadata: true,
|
|
281
|
+
returnDistance: true,
|
|
282
|
+
...(filter ? { filter } : {}),
|
|
283
|
+
...(nextToken ? { nextToken } : {}),
|
|
284
|
+
},
|
|
285
|
+
sendOpts(signal),
|
|
286
|
+
);
|
|
287
|
+
const metric = out.distanceMetric || this.#distance;
|
|
288
|
+
for (const v of out.vectors ?? []) {
|
|
289
|
+
const chunk = chunkFromVector(v.key, v.metadata);
|
|
290
|
+
const dist = v.distance ?? 0;
|
|
291
|
+
if (metric === DistanceMetric.EUCLIDEAN) {
|
|
292
|
+
// Euclidean (squared L2) ∈ [0, ∞): map to (0, 1], monotone-decreasing.
|
|
293
|
+
results.push({ chunk, score: 1 / (1 + dist) });
|
|
294
|
+
continue;
|
|
295
|
+
}
|
|
296
|
+
// Cosine distance ∈ [0, 2] → similarity = 1 - distance ∈ [-1, 1].
|
|
297
|
+
// Drop anti-correlated hits for parity with the other backends.
|
|
298
|
+
const score = 1 - dist;
|
|
299
|
+
if (score < 0) continue;
|
|
300
|
+
results.push({ chunk, score });
|
|
301
|
+
}
|
|
302
|
+
if (!out.nextToken || results.length >= k) break;
|
|
303
|
+
nextToken = out.nextToken;
|
|
304
|
+
}
|
|
305
|
+
if (results.length > k) results.length = k;
|
|
306
|
+
return results;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Remove every chunk whose `documentId` metadata matches. Scans the index for
|
|
311
|
+
* matching keys, then batch-deletes them. A no-op when nothing matches.
|
|
312
|
+
*
|
|
313
|
+
* @throws {Error} When `documentId` is empty.
|
|
314
|
+
*/
|
|
315
|
+
async delete(documentId: string, signal?: AbortSignal): Promise<void> {
|
|
316
|
+
if (documentId === "") throw new Error("memory/s3vectors: delete called with empty documentId");
|
|
317
|
+
const keys: string[] = [];
|
|
318
|
+
let nextToken: string | undefined;
|
|
319
|
+
for (;;) {
|
|
320
|
+
const out = await this.#api.listVectors(
|
|
321
|
+
{
|
|
322
|
+
vectorBucketName: this.#bucket,
|
|
323
|
+
indexName: this.#index,
|
|
324
|
+
returnMetadata: true,
|
|
325
|
+
maxResults: LIST_PAGE_SIZE,
|
|
326
|
+
...(nextToken ? { nextToken } : {}),
|
|
327
|
+
},
|
|
328
|
+
sendOpts(signal),
|
|
329
|
+
);
|
|
330
|
+
for (const v of out.vectors ?? []) {
|
|
331
|
+
if (v.key && metadataString(v.metadata, META_DOCUMENT_ID) === documentId) keys.push(v.key);
|
|
332
|
+
}
|
|
333
|
+
if (!out.nextToken) break;
|
|
334
|
+
nextToken = out.nextToken;
|
|
335
|
+
}
|
|
336
|
+
for (let start = 0; start < keys.length; start += MAX_DELETE_BATCH) {
|
|
337
|
+
await this.#api.deleteVectors(
|
|
338
|
+
{
|
|
339
|
+
vectorBucketName: this.#bucket,
|
|
340
|
+
indexName: this.#index,
|
|
341
|
+
keys: keys.slice(start, start + MAX_DELETE_BATCH),
|
|
342
|
+
},
|
|
343
|
+
sendOpts(signal),
|
|
344
|
+
);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Number of vectors in the index. Scans the index (paginated `ListVectors`), so
|
|
350
|
+
* it is O(index size). Useful for tests; not part of {@link Store}.
|
|
351
|
+
*/
|
|
352
|
+
async len(): Promise<number> {
|
|
353
|
+
let count = 0;
|
|
354
|
+
let nextToken: string | undefined;
|
|
355
|
+
for (;;) {
|
|
356
|
+
const out = await this.#api.listVectors({
|
|
357
|
+
vectorBucketName: this.#bucket,
|
|
358
|
+
indexName: this.#index,
|
|
359
|
+
returnMetadata: false,
|
|
360
|
+
maxResults: LIST_PAGE_SIZE,
|
|
361
|
+
...(nextToken ? { nextToken } : {}),
|
|
362
|
+
});
|
|
363
|
+
count += out.vectors?.length ?? 0;
|
|
364
|
+
if (!out.nextToken) break;
|
|
365
|
+
nextToken = out.nextToken;
|
|
366
|
+
}
|
|
367
|
+
return count;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/** Release the underlying client. The AWS client pools its own transport; safe to call repeatedly. */
|
|
371
|
+
async close(): Promise<void> {
|
|
372
|
+
this.#client?.destroy();
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
// ── helpers ──────────────────────────────────────────────────────────────────
|
|
377
|
+
|
|
378
|
+
/** Build the metadata document for a chunk; rejects caller keys using the reserved prefix. */
|
|
379
|
+
function buildMetadata(c: Chunk): Metadata {
|
|
380
|
+
const m: Metadata = { [META_DOCUMENT_ID]: c.documentId, [META_INDEX]: c.index, [META_TEXT]: c.text };
|
|
381
|
+
if (c.metadata) {
|
|
382
|
+
for (const [k, v] of Object.entries(c.metadata)) {
|
|
383
|
+
if (k.startsWith(RESERVED_PREFIX)) {
|
|
384
|
+
throw new Error(`memory/s3vectors: chunk "${c.id}" metadata key "${k}" uses the reserved "${RESERVED_PREFIX}" prefix`);
|
|
385
|
+
}
|
|
386
|
+
m[k] = v;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
return m;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
/**
|
|
393
|
+
* Build the S3 Vectors filter document from a metadata filter:
|
|
394
|
+
* - 0 keys → no filter
|
|
395
|
+
* - 1 key → bare `{ k: v }` (implicit equals)
|
|
396
|
+
* - >1 key → explicit `{ "$and": [{ k1: v1 }, …] }` (top-level implicit AND is rejected by the API)
|
|
397
|
+
*/
|
|
398
|
+
function buildFilter(filter: Record<string, string> | undefined): Metadata | undefined {
|
|
399
|
+
if (!filter) return undefined;
|
|
400
|
+
const keys = Object.keys(filter);
|
|
401
|
+
if (keys.length === 0) return undefined;
|
|
402
|
+
if (keys.length === 1) {
|
|
403
|
+
const k = keys[0]!;
|
|
404
|
+
return { [k]: filter[k]! };
|
|
405
|
+
}
|
|
406
|
+
return { $and: keys.map((k) => ({ [k]: filter[k]! })) };
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
/** Reconstruct a {@link Chunk} from a vector key + returned metadata. */
|
|
410
|
+
function chunkFromVector(key: string | undefined, meta: Metadata | undefined): Chunk {
|
|
411
|
+
const chunk: Chunk = { id: key ?? "", documentId: "", index: 0, text: "" };
|
|
412
|
+
if (!meta) return chunk;
|
|
413
|
+
if (typeof meta[META_DOCUMENT_ID] === "string") chunk.documentId = meta[META_DOCUMENT_ID] as string;
|
|
414
|
+
if (typeof meta[META_TEXT] === "string") chunk.text = meta[META_TEXT] as string;
|
|
415
|
+
chunk.index = toInt(meta[META_INDEX]);
|
|
416
|
+
let userMeta: Record<string, string> | undefined;
|
|
417
|
+
for (const [k, v] of Object.entries(meta)) {
|
|
418
|
+
if (k.startsWith(RESERVED_PREFIX)) continue;
|
|
419
|
+
if (typeof v !== "string") continue;
|
|
420
|
+
(userMeta ??= {})[k] = v;
|
|
421
|
+
}
|
|
422
|
+
if (userMeta) chunk.metadata = userMeta;
|
|
423
|
+
return chunk;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
/** Read a string-valued metadata field, or `""` when absent or non-string. */
|
|
427
|
+
function metadataString(meta: Metadata | undefined, key: string): string {
|
|
428
|
+
const v = meta?.[key];
|
|
429
|
+
return typeof v === "string" ? v : "";
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/** Coerce a metadata value (number, or numeric string) to an integer; 0 otherwise. */
|
|
433
|
+
function toInt(v: unknown): number {
|
|
434
|
+
if (typeof v === "number") return Math.trunc(v);
|
|
435
|
+
if (typeof v === "string" && v !== "" && !Number.isNaN(Number(v))) return Math.trunc(Number(v));
|
|
436
|
+
return 0;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
/** Whether an error is an S3 Vectors `NotFoundException` (by class name, robust across SDK copies). */
|
|
440
|
+
function isNotFound(err: unknown): boolean {
|
|
441
|
+
return !!err && typeof err === "object" && (err as { name?: string }).name === "NotFoundException";
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
/** Options passed to the AWS SDK for a single call; `undefined` when there is no signal. */
|
|
445
|
+
function sendOpts(signal: AbortSignal | undefined): SendOptions | undefined {
|
|
446
|
+
return signal ? { abortSignal: signal } : undefined;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/**
|
|
450
|
+
* Validate an index name against the S3 Vectors DNS-style rules: 3–63 chars;
|
|
451
|
+
* lowercase letters, digits, '-' and '.'; must begin and end with a letter or
|
|
452
|
+
* digit. No underscores or uppercase — a common slip when carrying over the
|
|
453
|
+
* pgvector/qdrant table/collection naming convention.
|
|
454
|
+
*/
|
|
455
|
+
function validateIndexName(name: string): void {
|
|
456
|
+
if (name.length < 3 || name.length > 63) {
|
|
457
|
+
throw new Error(`memory/s3vectors: index name "${name}" must be 3–63 characters`);
|
|
458
|
+
}
|
|
459
|
+
const isAlnum = (ch: string): boolean => /^[a-z0-9]$/.test(ch);
|
|
460
|
+
if (!isAlnum(name[0]!) || !isAlnum(name[name.length - 1]!)) {
|
|
461
|
+
throw new Error(`memory/s3vectors: index name "${name}" must begin and end with a lowercase letter or digit`);
|
|
462
|
+
}
|
|
463
|
+
for (const ch of name) {
|
|
464
|
+
if (!isAlnum(ch) && ch !== "-" && ch !== ".") {
|
|
465
|
+
throw new Error(
|
|
466
|
+
`memory/s3vectors: index name "${name}" has invalid character "${ch}" (allowed: a-z, 0-9, '-', '.'; no underscores or uppercase)`,
|
|
467
|
+
);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|