@framers/agentos-ext-topicality 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,310 @@
1
+ /**
2
+ * @fileoverview Unit tests for TopicEmbeddingIndex.
3
+ *
4
+ * All tests use a deterministic mock embeddingFn that returns fixed
5
+ * pre-defined vectors, eliminating network calls and making assertions exact.
6
+ *
7
+ * Coverage:
8
+ * - isBuilt transitions (false → true)
9
+ * - match returns results sorted descending by similarity
10
+ * - matchByVector does not invoke embeddingFn again after build
11
+ * - isOnTopic respects threshold comparisons
12
+ * - build with empty topics array
13
+ * - similarity values are clamped to [0, 1] (no negatives)
14
+ * - centroid averaging across description + examples
15
+ */
16
+
17
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
18
+ import { TopicEmbeddingIndex } from '../src/TopicEmbeddingIndex';
19
+ import type { TopicDescriptor } from '../src/types';
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Test fixtures
23
+ // ---------------------------------------------------------------------------
24
+
25
+ /**
26
+ * Minimal two-topic fixture with controlled, non-degenerate embeddings.
27
+ *
28
+ * Topic A ("sports") centroid ≈ average of [1,0,0] and [0.9,0.1,0] = [0.95, 0.05, 0]
29
+ * Topic B ("cooking") centroid ≈ average of [0,1,0] and [0.1,0.9,0] = [0.05, 0.95, 0]
30
+ */
31
+ const TOPIC_A: TopicDescriptor = {
32
+ id: 'sports',
33
+ name: 'Sports',
34
+ description: 'Sports and athletics discussion.',
35
+ examples: ['football game results', 'basketball scores'],
36
+ };
37
+
38
+ const TOPIC_B: TopicDescriptor = {
39
+ id: 'cooking',
40
+ name: 'Cooking',
41
+ description: 'Food and cooking discussion.',
42
+ examples: ['recipe for pasta', 'how to bake bread'],
43
+ };
44
+
45
+ /**
46
+ * Embedding map used by the mock function.
47
+ *
48
+ * Key: text string.
49
+ * Value: 3-dimensional vector.
50
+ *
51
+ * Topic A texts are aligned with [1, 0, 0].
52
+ * Topic B texts are aligned with [0, 1, 0].
53
+ * Query vectors for assertions are also defined here.
54
+ */
55
+ const EMBED_MAP: Record<string, number[]> = {
56
+ // Topic A (sports) — description + 2 examples
57
+ 'Sports and athletics discussion.': [1, 0, 0],
58
+ 'football game results': [0.9, 0.1, 0],
59
+ 'basketball scores': [0.95, 0.05, 0],
60
+
61
+ // Topic B (cooking) — description + 2 examples
62
+ 'Food and cooking discussion.': [0, 1, 0],
63
+ 'recipe for pasta': [0.1, 0.9, 0],
64
+ 'how to bake bread': [0.05, 0.95, 0],
65
+
66
+ // Query texts used in tests
67
+ 'Who won the game last night?': [0.92, 0.08, 0], // close to sports
68
+ 'How do I make carbonara?': [0.05, 0.95, 0], // close to cooking
69
+ 'Unrelated topic XYZ': [0, 0, 1], // orthogonal to both
70
+ };
71
+
72
+ /**
73
+ * Deterministic mock embedding function.
74
+ * Returns fixed vectors from EMBED_MAP; throws for unknown text so tests fail
75
+ * loudly if an unexpected string is embedded.
76
+ */
77
+ function makeMockEmbeddingFn() {
78
+ return vi.fn(async (texts: string[]): Promise<number[][]> => {
79
+ return texts.map((t) => {
80
+ const vec = EMBED_MAP[t];
81
+ if (!vec) throw new Error(`Mock embeddingFn: unknown text "${t}"`);
82
+ return vec;
83
+ });
84
+ });
85
+ }
86
+
87
+ // ---------------------------------------------------------------------------
88
+ // isBuilt — transitions
89
+ // ---------------------------------------------------------------------------
90
+
91
+ describe('TopicEmbeddingIndex.isBuilt', () => {
92
+ it('is false before build() is called', () => {
93
+ const index = new TopicEmbeddingIndex(makeMockEmbeddingFn());
94
+ expect(index.isBuilt).toBe(false);
95
+ });
96
+
97
+ it('is true after build() completes with topics', async () => {
98
+ const index = new TopicEmbeddingIndex(makeMockEmbeddingFn());
99
+ await index.build([TOPIC_A, TOPIC_B]);
100
+ expect(index.isBuilt).toBe(true);
101
+ });
102
+
103
+ it('is true after build() completes with an empty topic array', async () => {
104
+ const index = new TopicEmbeddingIndex(makeMockEmbeddingFn());
105
+ await index.build([]);
106
+ expect(index.isBuilt).toBe(true);
107
+ });
108
+ });
109
+
110
+ // ---------------------------------------------------------------------------
111
+ // build — batch embedding call count
112
+ // ---------------------------------------------------------------------------
113
+
114
+ describe('TopicEmbeddingIndex.build', () => {
115
+ it('calls embeddingFn exactly once with all texts batched', async () => {
116
+ const fn = makeMockEmbeddingFn();
117
+ const index = new TopicEmbeddingIndex(fn);
118
+ await index.build([TOPIC_A, TOPIC_B]);
119
+
120
+ // Only one call regardless of the number of topics.
121
+ expect(fn).toHaveBeenCalledTimes(1);
122
+
123
+ // The batch must contain description + all examples for every topic.
124
+ const calledWith: string[] = fn.mock.calls[0][0];
125
+ expect(calledWith).toContain(TOPIC_A.description);
126
+ expect(calledWith).toContain(TOPIC_A.examples[0]);
127
+ expect(calledWith).toContain(TOPIC_A.examples[1]);
128
+ expect(calledWith).toContain(TOPIC_B.description);
129
+ expect(calledWith).toContain(TOPIC_B.examples[0]);
130
+ expect(calledWith).toContain(TOPIC_B.examples[1]);
131
+ });
132
+
133
+ it('returns empty match list when built with no topics', async () => {
134
+ const index = new TopicEmbeddingIndex(makeMockEmbeddingFn());
135
+ await index.build([]);
136
+ const matches = index.matchByVector([1, 0, 0]);
137
+ expect(matches).toEqual([]);
138
+ });
139
+ });
140
+
141
+ // ---------------------------------------------------------------------------
142
+ // match — results are sorted descending
143
+ // ---------------------------------------------------------------------------
144
+
145
+ describe('TopicEmbeddingIndex.match', () => {
146
+ let index: TopicEmbeddingIndex;
147
+ let fn: ReturnType<typeof makeMockEmbeddingFn>;
148
+
149
+ beforeEach(async () => {
150
+ fn = makeMockEmbeddingFn();
151
+ index = new TopicEmbeddingIndex(fn);
152
+ await index.build([TOPIC_A, TOPIC_B]);
153
+ // Reset call count so match() calls can be counted independently.
154
+ fn.mockClear();
155
+ });
156
+
157
+ it('returns matches sorted descending by similarity', async () => {
158
+ // A sports-like query should rank TOPIC_A first.
159
+ const matches = await index.match('Who won the game last night?');
160
+ expect(matches.length).toBe(2);
161
+ // First result must have the highest similarity.
162
+ expect(matches[0].topicId).toBe('sports');
163
+ expect(matches[0].similarity).toBeGreaterThan(matches[1].similarity);
164
+ });
165
+
166
+ it('returns the correct topicName in each match', async () => {
167
+ const matches = await index.match('Who won the game last night?');
168
+ const sportsMatch = matches.find((m) => m.topicId === 'sports');
169
+ expect(sportsMatch?.topicName).toBe('Sports');
170
+ });
171
+
172
+ it('calls embeddingFn once for the query text', async () => {
173
+ await index.match('Who won the game last night?');
174
+ // One call for the single query text.
175
+ expect(fn).toHaveBeenCalledTimes(1);
176
+ });
177
+ });
178
+
179
+ // ---------------------------------------------------------------------------
180
+ // matchByVector — no extra embedding calls
181
+ // ---------------------------------------------------------------------------
182
+
183
+ describe('TopicEmbeddingIndex.matchByVector', () => {
184
+ it('does not call embeddingFn after build is complete', async () => {
185
+ const fn = makeMockEmbeddingFn();
186
+ const index = new TopicEmbeddingIndex(fn);
187
+ await index.build([TOPIC_A, TOPIC_B]);
188
+
189
+ // Reset after build to isolate matchByVector behaviour.
190
+ fn.mockClear();
191
+
192
+ // A pre-computed sports-like vector.
193
+ index.matchByVector([1, 0, 0]);
194
+
195
+ expect(fn).not.toHaveBeenCalled();
196
+ });
197
+
198
+ it('returns matches sorted descending for a sports-aligned vector', async () => {
199
+ const index = new TopicEmbeddingIndex(makeMockEmbeddingFn());
200
+ await index.build([TOPIC_A, TOPIC_B]);
201
+
202
+ const matches = index.matchByVector([1, 0, 0]);
203
+ expect(matches[0].topicId).toBe('sports');
204
+ expect(matches[0].similarity).toBeGreaterThan(matches[1].similarity);
205
+ });
206
+
207
+ it('returns matches sorted descending for a cooking-aligned vector', async () => {
208
+ const index = new TopicEmbeddingIndex(makeMockEmbeddingFn());
209
+ await index.build([TOPIC_A, TOPIC_B]);
210
+
211
+ const matches = index.matchByVector([0, 1, 0]);
212
+ expect(matches[0].topicId).toBe('cooking');
213
+ expect(matches[0].similarity).toBeGreaterThan(matches[1].similarity);
214
+ });
215
+ });
216
+
217
+ // ---------------------------------------------------------------------------
218
+ // isOnTopic — threshold comparison
219
+ // ---------------------------------------------------------------------------
220
+
221
+ describe('TopicEmbeddingIndex.isOnTopic', () => {
222
+ let index: TopicEmbeddingIndex;
223
+
224
+ beforeEach(async () => {
225
+ index = new TopicEmbeddingIndex(makeMockEmbeddingFn());
226
+ await index.build([TOPIC_A, TOPIC_B]);
227
+ });
228
+
229
+ it('returns true when best similarity exceeds threshold', async () => {
230
+ // Sports-like text should score well above 0.35 against TOPIC_A.
231
+ const result = await index.isOnTopic('Who won the game last night?', 0.35);
232
+ expect(result).toBe(true);
233
+ });
234
+
235
+ it('returns false when best similarity is below a strict threshold', async () => {
236
+ // Completely unrelated text maps to [0,0,1], orthogonal to both topics.
237
+ const result = await index.isOnTopic('Unrelated topic XYZ', 0.35);
238
+ expect(result).toBe(false);
239
+ });
240
+
241
+ it('returns false when threshold is 1.0 (impossible to exceed)', async () => {
242
+ // Nothing short of an exact centroid match can exceed similarity = 1.0.
243
+ const result = await index.isOnTopic('Who won the game last night?', 1.0);
244
+ expect(result).toBe(false);
245
+ });
246
+ });
247
+
248
+ // ---------------------------------------------------------------------------
249
+ // isOnTopicByVector — threshold comparison without embedding
250
+ // ---------------------------------------------------------------------------
251
+
252
+ describe('TopicEmbeddingIndex.isOnTopicByVector', () => {
253
+ let index: TopicEmbeddingIndex;
254
+
255
+ beforeEach(async () => {
256
+ index = new TopicEmbeddingIndex(makeMockEmbeddingFn());
257
+ await index.build([TOPIC_A, TOPIC_B]);
258
+ });
259
+
260
+ it('returns true for a vector well-aligned with a topic centroid', () => {
261
+ // [1, 0, 0] is the sports description embedding — very high similarity.
262
+ expect(index.isOnTopicByVector([1, 0, 0], 0.35)).toBe(true);
263
+ });
264
+
265
+ it('returns false for an orthogonal vector', () => {
266
+ // [0, 0, 1] is orthogonal to both topic centroids → similarity = 0.
267
+ expect(index.isOnTopicByVector([0, 0, 1], 0.35)).toBe(false);
268
+ });
269
+
270
+ it('respects an exact threshold boundary (similarity must be strictly greater)', () => {
271
+ // Directly use a cooking vector — find the exact similarity first.
272
+ const matches = index.matchByVector([0, 1, 0]);
273
+ const bestSim = matches[0].similarity;
274
+ // Threshold equal to bestSim → NOT on-topic (must be strictly greater).
275
+ expect(index.isOnTopicByVector([0, 1, 0], bestSim)).toBe(false);
276
+ // Threshold just below bestSim → on-topic.
277
+ expect(index.isOnTopicByVector([0, 1, 0], bestSim - 0.001)).toBe(true);
278
+ });
279
+ });
280
+
281
+ // ---------------------------------------------------------------------------
282
+ // Similarity clamping — no negative values
283
+ // ---------------------------------------------------------------------------
284
+
285
+ describe('TopicEmbeddingIndex similarity clamping', () => {
286
+ it('clamps negative cosine similarities to 0', async () => {
287
+ // Use a custom embed map where the query points opposite to a topic.
288
+ const customMap: Record<string, number[]> = {
289
+ 'Topic description.': [1, 0, 0],
290
+ 'example one': [1, 0, 0],
291
+ 'opposite query': [-1, 0, 0], // exact opposite → raw cosine = -1
292
+ };
293
+ const fn = vi.fn(async (texts: string[]) => texts.map((t) => customMap[t]));
294
+
295
+ const topic: TopicDescriptor = {
296
+ id: 'topic',
297
+ name: 'Topic',
298
+ description: 'Topic description.',
299
+ examples: ['example one'],
300
+ };
301
+
302
+ const index = new TopicEmbeddingIndex(fn);
303
+ await index.build([topic]);
304
+
305
+ // Directly test matchByVector with an opposite vector.
306
+ const matches = index.matchByVector([-1, 0, 0]);
307
+ // Raw cosine would be -1; clamped to 0.
308
+ expect(matches[0].similarity).toBe(0);
309
+ });
310
+ });