@galdor/memory-s3vectors 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,241 @@
1
+ /**
2
+ * Behavioral tests for the S3 Vectors store, driven by an injected fake API so
3
+ * they run without AWS. They mirror the reference adapter's coverage: index
4
+ * auto-create, round-trip with metadata, the filter shape, distance→score
5
+ * conversion, delete-by-documentId across pages, and idempotent add by id.
6
+ */
7
+ import { describe, expect, test } from "bun:test";
8
+ import type { Chunk } from "@galdor/core/memory";
9
+ import { Retriever } from "@galdor/core/memory";
10
+ import { openS3Vectors, type S3VectorsAPI } from "./index.ts";
11
+
12
+ type Vec = { key: string; data: { float32: number[] }; metadata?: Record<string, unknown> };
13
+
14
+ /** An in-memory fake of the S3 Vectors API, recording calls for assertions. */
15
+ class FakeAPI implements S3VectorsAPI {
16
+ vectors = new Map<string, Vec>();
17
+ created: { dimension: number; distanceMetric: string; nonFilterable?: string[] } | undefined;
18
+ indexExists: boolean;
19
+ existingDim: number | undefined;
20
+ // Override query behavior per-test (default returns all stored, distance 0).
21
+ onQuery?: (input: { topK: number; filter?: Record<string, unknown> }) => {
22
+ vectors?: { key?: string; distance?: number; metadata?: Record<string, unknown> }[];
23
+ distanceMetric?: string;
24
+ nextToken?: string;
25
+ };
26
+
27
+ constructor(opts: { indexExists?: boolean; existingDim?: number } = {}) {
28
+ this.indexExists = opts.indexExists ?? false;
29
+ if (opts.existingDim !== undefined) this.existingDim = opts.existingDim;
30
+ }
31
+
32
+ async getIndex(_i: { vectorBucketName: string; indexName: string }) {
33
+ if (!this.indexExists) {
34
+ const e = new Error("not found");
35
+ e.name = "NotFoundException";
36
+ throw e;
37
+ }
38
+ return this.existingDim !== undefined ? { index: { dimension: this.existingDim } } : {};
39
+ }
40
+ async createIndex(i: {
41
+ dimension: number;
42
+ distanceMetric: string;
43
+ metadataConfiguration?: { nonFilterableMetadataKeys: string[] };
44
+ }) {
45
+ const c: { dimension: number; distanceMetric: string; nonFilterable?: string[] } = {
46
+ dimension: i.dimension,
47
+ distanceMetric: i.distanceMetric,
48
+ };
49
+ if (i.metadataConfiguration) c.nonFilterable = i.metadataConfiguration.nonFilterableMetadataKeys;
50
+ this.created = c;
51
+ this.indexExists = true;
52
+ return {};
53
+ }
54
+ async putVectors(i: { vectors: Vec[] }) {
55
+ for (const v of i.vectors) this.vectors.set(v.key, v); // upsert by key = idempotent
56
+ return {};
57
+ }
58
+ async queryVectors(i: { topK: number; filter?: Record<string, unknown> }) {
59
+ if (this.onQuery) return this.onQuery(i);
60
+ const vectors = [...this.vectors.values()].map((v) => ({ key: v.key, distance: 0, metadata: v.metadata }));
61
+ return { vectors, distanceMetric: "cosine" as const };
62
+ }
63
+ async listVectors(_i: { maxResults: number; nextToken?: string }) {
64
+ const vectors = [...this.vectors.values()].map((v) => ({ key: v.key, metadata: v.metadata }));
65
+ return { vectors };
66
+ }
67
+ async deleteVectors(i: { keys: string[] }) {
68
+ for (const k of i.keys) this.vectors.delete(k);
69
+ return {};
70
+ }
71
+ }
72
+
73
+ const vec = (...xs: number[]): number[] => xs;
74
+ const chunk = (over: Partial<Chunk> & Pick<Chunk, "id" | "embedding">): Chunk => ({
75
+ documentId: "d1",
76
+ index: 0,
77
+ text: "t",
78
+ ...over,
79
+ });
80
+
81
+ describe("openS3Vectors / ensureIndex", () => {
82
+ test("creates the index when missing, with dim, cosine, and __text non-filterable", async () => {
83
+ const api = new FakeAPI({ indexExists: false });
84
+ await openS3Vectors({ bucket: "b", index: "galdor-chunks", dim: 4, api });
85
+ expect(api.created).toBeDefined();
86
+ expect(api.created!.dimension).toBe(4);
87
+ expect(api.created!.distanceMetric).toBe("cosine");
88
+ expect(api.created!.nonFilterable).toEqual(["__text"]);
89
+ });
90
+
91
+ test("does not create when the index already exists at the same dim", async () => {
92
+ const api = new FakeAPI({ indexExists: true, existingDim: 4 });
93
+ await openS3Vectors({ bucket: "b", dim: 4, api });
94
+ expect(api.created).toBeUndefined();
95
+ });
96
+
97
+ test("throws when the existing index has a different dimension", async () => {
98
+ const api = new FakeAPI({ indexExists: true, existingDim: 8 });
99
+ await expect(openS3Vectors({ bucket: "b", dim: 4, api })).rejects.toThrow(/8-dim/);
100
+ });
101
+
102
+ test("validates bucket, dim, and index name", async () => {
103
+ await expect(openS3Vectors({ bucket: "", dim: 4, api: new FakeAPI() })).rejects.toThrow(/bucket/);
104
+ await expect(openS3Vectors({ bucket: "b", dim: 0, api: new FakeAPI() })).rejects.toThrow(/dim/);
105
+ await expect(openS3Vectors({ bucket: "b", dim: 4, index: "AB", api: new FakeAPI() })).rejects.toThrow(/index name/);
106
+ });
107
+ });
108
+
109
+ describe("add / retrieve round-trip", () => {
110
+ test("stores embedding + metadata and reconstructs the chunk on retrieve", async () => {
111
+ const api = new FakeAPI();
112
+ const store = await openS3Vectors({ bucket: "b", dim: 4, api });
113
+ await store.add([
114
+ chunk({ id: "c1", documentId: "doc1", index: 3, text: "hello", embedding: vec(1, 0, 0, 0), metadata: { lang: "es" } }),
115
+ ]);
116
+ const hits = await store.retrieve({ embedding: vec(1, 0, 0, 0), k: 5 });
117
+ expect(hits).toHaveLength(1);
118
+ const c = hits[0]!.chunk;
119
+ expect(c.id).toBe("c1");
120
+ expect(c.documentId).toBe("doc1");
121
+ expect(c.index).toBe(3);
122
+ expect(c.text).toBe("hello");
123
+ expect(c.metadata).toEqual({ lang: "es" });
124
+ expect(hits[0]!.score).toBeCloseTo(1, 6); // 1 - distance(0)
125
+ });
126
+
127
+ test("add is idempotent by chunk id (re-add overwrites, no duplicate)", async () => {
128
+ const api = new FakeAPI();
129
+ const store = await openS3Vectors({ bucket: "b", dim: 4, api });
130
+ await store.add([chunk({ id: "c1", text: "v1", embedding: vec(1, 0, 0, 0) })]);
131
+ await store.add([chunk({ id: "c1", text: "v2", embedding: vec(1, 0, 0, 0) })]);
132
+ expect(api.vectors.size).toBe(1);
133
+ const hits = await store.retrieve({ embedding: vec(1, 0, 0, 0) });
134
+ expect(hits).toHaveLength(1);
135
+ expect(hits[0]!.chunk.text).toBe("v2");
136
+ });
137
+
138
+ test("rejects empty id, mismatched dimension, and reserved metadata keys", async () => {
139
+ const store = await openS3Vectors({ bucket: "b", dim: 4, api: new FakeAPI() });
140
+ await expect(store.add([chunk({ id: "", embedding: vec(1, 0, 0, 0) })])).rejects.toThrow(/id is empty/);
141
+ await expect(store.add([chunk({ id: "c", embedding: vec(1, 0) })])).rejects.toThrow(/2-dim/);
142
+ await expect(
143
+ store.add([chunk({ id: "c", embedding: vec(1, 0, 0, 0), metadata: { __document_id: "x" } })]),
144
+ ).rejects.toThrow(/reserved/);
145
+ });
146
+ });
147
+
148
+ describe("retrieve scoring + filter", () => {
149
+ test("cosine: converts distance to similarity and drops anti-correlated hits", async () => {
150
+ const api = new FakeAPI();
151
+ api.onQuery = () => ({
152
+ distanceMetric: "cosine",
153
+ vectors: [
154
+ { key: "a", distance: 0.1, metadata: { __text: "near" } },
155
+ { key: "b", distance: 1.8, metadata: { __text: "anti" } }, // score 1-1.8 = -0.8 → dropped
156
+ ],
157
+ });
158
+ const store = await openS3Vectors({ bucket: "b", dim: 4, api });
159
+ const hits = await store.retrieve({ embedding: vec(1, 0, 0, 0), k: 5 });
160
+ expect(hits).toHaveLength(1);
161
+ expect(hits[0]!.chunk.id).toBe("a");
162
+ expect(hits[0]!.score).toBeCloseTo(0.9, 6);
163
+ });
164
+
165
+ test("euclidean: keeps all, score is 1/(1+distance), monotone-decreasing", async () => {
166
+ const api = new FakeAPI();
167
+ api.onQuery = () => ({
168
+ distanceMetric: "euclidean",
169
+ vectors: [
170
+ { key: "near", distance: 0, metadata: { __text: "n" } },
171
+ { key: "far", distance: 9, metadata: { __text: "f" } },
172
+ ],
173
+ });
174
+ const store = await openS3Vectors({ bucket: "b", dim: 4, api });
175
+ const hits = await store.retrieve({ embedding: vec(1, 0, 0, 0), k: 5 });
176
+ expect(hits).toHaveLength(2);
177
+ expect(hits[0]!.score).toBeCloseTo(1, 6);
178
+ expect(hits[1]!.score).toBeCloseTo(0.1, 6);
179
+ expect(hits[0]!.score).toBeGreaterThan(hits[1]!.score);
180
+ });
181
+
182
+ test("filter: single key is bare, multiple keys use $and", async () => {
183
+ const api = new FakeAPI();
184
+ let seen: Record<string, unknown> | undefined;
185
+ api.onQuery = (i) => {
186
+ seen = i.filter;
187
+ return { vectors: [], distanceMetric: "cosine" };
188
+ };
189
+ const store = await openS3Vectors({ bucket: "b", dim: 4, api });
190
+
191
+ await store.retrieve({ embedding: vec(1, 0, 0, 0), filter: { lang: "es" } });
192
+ expect(seen).toEqual({ lang: "es" });
193
+
194
+ await store.retrieve({ embedding: vec(1, 0, 0, 0), filter: { type: "skill", category: "backend" } });
195
+ expect(seen).toHaveProperty("$and");
196
+ const conds = (seen as { $and: Record<string, string>[] }).$and;
197
+ expect(conds).toHaveLength(2);
198
+ expect(Object.assign({}, ...conds)).toEqual({ type: "skill", category: "backend" });
199
+ });
200
+
201
+ test("requires an embedding and a matching dimension", async () => {
202
+ const store = await openS3Vectors({ bucket: "b", dim: 4, api: new FakeAPI() });
203
+ await expect(store.retrieve({ text: "x" })).rejects.toThrow(/embedding is required/);
204
+ await expect(store.retrieve({ embedding: vec(1, 0) })).rejects.toThrow(/2-dim/);
205
+ });
206
+ });
207
+
208
+ describe("delete", () => {
209
+ test("removes exactly the chunks of the given documentId", async () => {
210
+ const api = new FakeAPI();
211
+ const store = await openS3Vectors({ bucket: "b", dim: 4, api });
212
+ await store.add([
213
+ chunk({ id: "d1#0", documentId: "d1", embedding: vec(1, 0, 0, 0) }),
214
+ chunk({ id: "d1#1", documentId: "d1", embedding: vec(0, 1, 0, 0) }),
215
+ chunk({ id: "d2#0", documentId: "d2", embedding: vec(0, 0, 1, 0) }),
216
+ ]);
217
+ await store.delete("d1");
218
+ expect([...api.vectors.keys()].sort()).toEqual(["d2#0"]);
219
+ });
220
+
221
+ test("delete with no matches is a no-op; empty id throws", async () => {
222
+ const api = new FakeAPI();
223
+ const store = await openS3Vectors({ bucket: "b", dim: 4, api });
224
+ await store.add([chunk({ id: "d2#0", documentId: "d2", embedding: vec(1, 0, 0, 0) })]);
225
+ await store.delete("missing");
226
+ expect(api.vectors.size).toBe(1);
227
+ await expect(store.delete("")).rejects.toThrow(/empty documentId/);
228
+ });
229
+ });
230
+
231
+ describe("behind a Retriever (same path as InMemoryStore)", () => {
232
+ test("Retriever.retrieve delegates to the store and returns hits", async () => {
233
+ const api = new FakeAPI();
234
+ const store = await openS3Vectors({ bucket: "b", dim: 4, api });
235
+ await store.add([chunk({ id: "c1", text: "hola", embedding: vec(1, 0, 0, 0) })]);
236
+ const retriever = new Retriever({ store, defaultK: 5 });
237
+ const hits = await retriever.retrieve({ embedding: vec(1, 0, 0, 0) });
238
+ expect(hits).toHaveLength(1);
239
+ expect(hits[0]!.chunk.text).toBe("hola");
240
+ });
241
+ });