@crashbytes/semantic-text-toolkit 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ /**
2
+ * Vector Mathematics Test Suite
3
+ *
4
+ * Comprehensive validation of mathematical operations fundamental to
5
+ * semantic similarity computation and vector search functionality.
6
+ *
7
+ * Test Architecture:
8
+ * - Unit tests for pure mathematical functions
9
+ * - Edge case validation for robustness
10
+ * - Error handling verification
11
+ * - Performance characteristic validation
12
+ */
13
+
14
+ import {
15
+ dotProduct,
16
+ magnitude,
17
+ cosineSimilarity,
18
+ euclideanDistance,
19
+ normalize,
20
+ centroid,
21
+ topKSimilar,
22
+ } from '../vector';
23
+ import { SemanticError, SemanticErrorCode } from '../../types';
24
+
25
+ describe('Vector Mathematics - Production Validation', () => {
26
+ describe('dotProduct', () => {
27
+ it('computes correctly for positive vectors', () => {
28
+ // [1,2,3] · [4,5,6] = 1*4 + 2*5 + 3*6 = 4 + 10 + 18 = 32
29
+ expect(dotProduct([1, 2, 3], [4, 5, 6])).toBe(32);
30
+ });
31
+
32
+ it('handles zero vectors', () => {
33
+ expect(dotProduct([0, 0, 0], [1, 2, 3])).toBe(0);
34
+ expect(dotProduct([1, 2, 3], [0, 0, 0])).toBe(0);
35
+ });
36
+
37
+ it('computes correctly with negative values', () => {
38
+ // [1,-2,3] · [4,5,-6] = 1*4 + (-2)*5 + 3*(-6) = 4 - 10 - 18 = -24
39
+ expect(dotProduct([1, -2, 3], [4, 5, -6])).toBe(-24);
40
+ });
41
+
42
+ it('handles single-dimensional vectors', () => {
43
+ expect(dotProduct([5], [3])).toBe(15);
44
+ });
45
+
46
+ it('throws SemanticError on dimension mismatch', () => {
47
+ expect(() => dotProduct([1, 2], [1, 2, 3]))
48
+ .toThrow(SemanticError);
49
+ });
50
+
51
+ it('provides detailed error context on mismatch', () => {
52
+ try {
53
+ dotProduct([1, 2], [1, 2, 3, 4]);
54
+ fail('Should have thrown SemanticError');
55
+ } catch (error) {
56
+ expect(error).toBeInstanceOf(SemanticError);
57
+ const semanticError = error as SemanticError;
58
+ expect(semanticError.code).toBe(SemanticErrorCode.DIMENSION_MISMATCH);
59
+ expect(semanticError.details).toMatchObject({
60
+ dimensions: [2, 4],
61
+ });
62
+ }
63
+ });
64
+
65
+ it('rejects empty arrays', () => {
66
+ expect(() => dotProduct([], [1, 2]))
67
+ .toThrow(SemanticError);
68
+ });
69
+ });
70
+
71
+ describe('magnitude', () => {
72
+ it('computes correctly for standard vectors', () => {
73
+ // √(3² + 4²) = √(9 + 16) = √25 = 5
74
+ expect(magnitude([3, 4])).toBe(5);
75
+ });
76
+
77
+ it('handles zero vector', () => {
78
+ expect(magnitude([0, 0, 0])).toBe(0);
79
+ });
80
+
81
+ it('computes correctly for unit vectors', () => {
82
+ expect(magnitude([1, 0, 0])).toBe(1);
83
+ expect(magnitude([0, 1, 0])).toBe(1);
84
+ });
85
+
86
+ it('handles negative components', () => {
87
+ // √((-3)² + (-4)²) = √(9 + 16) = 5
88
+ expect(magnitude([-3, -4])).toBe(5);
89
+ });
90
+
91
+ it('computes correctly for high-dimensional vectors', () => {
92
+ const vector = Array(384).fill(1);
93
+ // √(1² * 384) = √384 ≈ 19.595
94
+ expect(magnitude(vector)).toBeCloseTo(19.595, 2);
95
+ });
96
+
97
+ it('rejects empty arrays', () => {
98
+ expect(() => magnitude([]))
99
+ .toThrow(SemanticError);
100
+ });
101
+ });
102
+
103
+ describe('cosineSimilarity', () => {
104
+ it('returns 1.0 for identical vectors', () => {
105
+ expect(cosineSimilarity([1, 0, 0], [1, 0, 0])).toBe(1.0);
106
+ expect(cosineSimilarity([1, 2, 3], [1, 2, 3])).toBe(1.0);
107
+ });
108
+
109
+ it('returns 0.0 for orthogonal vectors', () => {
110
+ // Perpendicular vectors have zero dot product
111
+ expect(cosineSimilarity([1, 0], [0, 1])).toBeCloseTo(0.0, 10);
112
+ expect(cosineSimilarity([1, 0, 0], [0, 1, 0])).toBeCloseTo(0.0, 10);
113
+ });
114
+
115
+ it('returns -1.0 for opposite vectors', () => {
116
+ expect(cosineSimilarity([1, 0], [-1, 0])).toBe(-1.0);
117
+ expect(cosineSimilarity([1, 2, 3], [-1, -2, -3])).toBeCloseTo(-1.0, 10);
118
+ });
119
+
120
+ it('computes correctly for arbitrary vectors', () => {
121
+ // cos(θ) for [1,2] and [2,1]:
122
+ // dot = 1*2 + 2*1 = 4
123
+ // |a| = √5, |b| = √5
124
+ // cos(θ) = 4 / 5 = 0.8
125
+ expect(cosineSimilarity([1, 2], [2, 1])).toBeCloseTo(0.8, 10);
126
+ });
127
+
128
+ it('handles high-dimensional embeddings', () => {
129
+ const a = Array(384).fill(0).map((_, i) => i % 2 === 0 ? 1 : 0);
130
+ const b = Array(384).fill(0).map((_, i) => i % 2 === 0 ? 1 : 0);
131
+ // Use toBeCloseTo to handle floating point precision
132
+ expect(cosineSimilarity(a, b)).toBeCloseTo(1.0, 10);
133
+ });
134
+
135
+ it('throws on zero-magnitude vectors', () => {
136
+ expect(() => cosineSimilarity([0, 0], [1, 0]))
137
+ .toThrow(SemanticError);
138
+ });
139
+
140
+ it('throws on dimension mismatch', () => {
141
+ expect(() => cosineSimilarity([1, 2], [1, 2, 3]))
142
+ .toThrow(SemanticError);
143
+ });
144
+ });
145
+
146
+ describe('euclideanDistance', () => {
147
+ it('computes zero distance for identical vectors', () => {
148
+ expect(euclideanDistance([1, 2, 3], [1, 2, 3])).toBe(0);
149
+ });
150
+
151
+ it('computes correctly for standard cases', () => {
152
+ // √((3-0)² + (4-0)²) = √(9 + 16) = 5
153
+ expect(euclideanDistance([0, 0], [3, 4])).toBe(5);
154
+ });
155
+
156
+ it('computes correctly for negative differences', () => {
157
+ expect(euclideanDistance([1, 1], [-1, -1])).toBeCloseTo(2.828, 3);
158
+ });
159
+
160
+ it('handles unit distance in each dimension', () => {
161
+ // √(1² + 1² + 1²) = √3 ≈ 1.732
162
+ expect(euclideanDistance([0, 0, 0], [1, 1, 1])).toBeCloseTo(1.732, 3);
163
+ });
164
+
165
+ it('throws on dimension mismatch', () => {
166
+ expect(() => euclideanDistance([1, 2], [1, 2, 3]))
167
+ .toThrow(SemanticError);
168
+ });
169
+ });
170
+
171
+ describe('normalize', () => {
172
+ it('produces unit vector', () => {
173
+ const normalized = normalize([3, 4]);
174
+ expect(magnitude(normalized)).toBeCloseTo(1.0, 10);
175
+ });
176
+
177
+ it('preserves direction', () => {
178
+ const original = [3, 4];
179
+ const normalized = normalize(original);
180
+
181
+ // Normalized should be parallel (cosine similarity = 1)
182
+ expect(cosineSimilarity(original, normalized)).toBeCloseTo(1.0, 10);
183
+ });
184
+
185
+ it('computes correctly for known vectors', () => {
186
+ // [3,4] normalized = [3/5, 4/5] = [0.6, 0.8]
187
+ const normalized = normalize([3, 4]);
188
+ expect(normalized[0]).toBeCloseTo(0.6, 10);
189
+ expect(normalized[1]).toBeCloseTo(0.8, 10);
190
+ });
191
+
192
+ it('handles already normalized vectors', () => {
193
+ const unit = [1, 0, 0];
194
+ const normalized = normalize(unit);
195
+ expect(normalized).toEqual(unit);
196
+ });
197
+
198
+ it('throws on zero vector', () => {
199
+ expect(() => normalize([0, 0, 0]))
200
+ .toThrow(SemanticError);
201
+ });
202
+ });
203
+
204
+ describe('centroid', () => {
205
+ it('computes center of identical vectors', () => {
206
+ const vectors = [
207
+ [1, 2, 3],
208
+ [1, 2, 3],
209
+ [1, 2, 3],
210
+ ];
211
+ expect(centroid(vectors)).toEqual([1, 2, 3]);
212
+ });
213
+
214
+ it('computes average position correctly', () => {
215
+ const vectors = [
216
+ [0, 0],
217
+ [2, 0],
218
+ [0, 2],
219
+ [2, 2],
220
+ ];
221
+ // Average: [(0+2+0+2)/4, (0+0+2+2)/4] = [1, 1]
222
+ expect(centroid(vectors)).toEqual([1, 1]);
223
+ });
224
+
225
+ it('handles single vector', () => {
226
+ expect(centroid([[5, 10, 15]])).toEqual([5, 10, 15]);
227
+ });
228
+
229
+ it('handles negative values', () => {
230
+ const vectors = [
231
+ [-1, -1],
232
+ [1, 1],
233
+ ];
234
+ expect(centroid(vectors)).toEqual([0, 0]);
235
+ });
236
+
237
+ it('throws on empty array', () => {
238
+ expect(() => centroid([]))
239
+ .toThrow(SemanticError);
240
+ });
241
+
242
+ it('throws on dimension mismatch', () => {
243
+ const vectors = [
244
+ [1, 2],
245
+ [1, 2, 3],
246
+ ];
247
+ expect(() => centroid(vectors))
248
+ .toThrow(SemanticError);
249
+ });
250
+ });
251
+
252
+ describe('topKSimilar', () => {
253
+ const query = [1, 0, 0];
254
+ const candidates = [
255
+ [1, 0, 0], // Perfect match: similarity = 1.0
256
+ [0.9, 0.1, 0], // Close match: similarity ≈ 0.995
257
+ [0, 1, 0], // Orthogonal: similarity = 0.0
258
+ [-1, 0, 0], // Opposite: similarity = -1.0
259
+ ];
260
+
261
+ it('returns correct number of results', () => {
262
+ expect(topKSimilar(query, candidates, 2)).toHaveLength(2);
263
+ expect(topKSimilar(query, candidates, 3)).toHaveLength(3);
264
+ });
265
+
266
+ it('orders results by similarity descending', () => {
267
+ const results = topKSimilar(query, candidates, 4);
268
+
269
+ // Results should be in descending order
270
+ for (let i = 0; i < results.length - 1; i++) {
271
+ expect(results[i][1]).toBeGreaterThanOrEqual(results[i + 1][1]);
272
+ }
273
+ });
274
+
275
+ it('returns most similar vector first', () => {
276
+ const results = topKSimilar(query, candidates, 1);
277
+
278
+ expect(results[0][0]).toBe(0); // Index of perfect match
279
+ expect(results[0][1]).toBe(1.0); // Perfect similarity
280
+ });
281
+
282
+ it('handles k larger than candidate count', () => {
283
+ const results = topKSimilar(query, candidates, 100);
284
+ expect(results).toHaveLength(candidates.length);
285
+ });
286
+
287
+ it('returns empty for empty candidates', () => {
288
+ expect(topKSimilar(query, [], 10)).toEqual([]);
289
+ });
290
+
291
+ it('handles dimension mismatch gracefully', () => {
292
+ const mixedCandidates = [
293
+ [1, 0],
294
+ [1, 0, 0],
295
+ [0, 1],
296
+ ];
297
+
298
+ const results = topKSimilar([1, 0, 0], mixedCandidates, 3);
299
+
300
+ // Should handle mismatches by assigning -Infinity
301
+ expect(results.some(([, score]) => score === -Infinity)).toBe(true);
302
+ });
303
+
304
+ it('throws on invalid k', () => {
305
+ expect(() => topKSimilar(query, candidates, 0))
306
+ .toThrow(SemanticError);
307
+
308
+ expect(() => topKSimilar(query, candidates, -1))
309
+ .toThrow(SemanticError);
310
+ });
311
+
312
+ it('uses default k=10 when not specified', () => {
313
+ // Create 15 candidates
314
+ const manyCandidates = Array(15).fill(null).map((_, i) =>
315
+ [1, 0, 0].map(v => v + i * 0.01)
316
+ );
317
+
318
+ // Call without k parameter - should use default of 10
319
+ const results = topKSimilar(query, manyCandidates);
320
+
321
+ expect(results).toHaveLength(10);
322
+ });
323
+ });
324
+
325
+ describe('Performance Characteristics', () => {
326
+ it('handles high-dimensional vectors efficiently', () => {
327
+ const dim = 384; // Standard embedding dimension
328
+ const a = Array(dim).fill(0).map(() => Math.random());
329
+ const b = Array(dim).fill(0).map(() => Math.random());
330
+
331
+ const startTime = performance.now();
332
+ const similarity = cosineSimilarity(a, b);
333
+ const endTime = performance.now();
334
+
335
+ expect(similarity).toBeGreaterThanOrEqual(-1);
336
+ expect(similarity).toBeLessThanOrEqual(1);
337
+ expect(endTime - startTime).toBeLessThan(50); // < 50ms (allow for CI variance)
338
+ });
339
+
340
+ it('processes batch operations efficiently', () => {
341
+ const query = Array(384).fill(0).map(() => Math.random());
342
+ const candidates = Array(1000).fill(null).map(() =>
343
+ Array(384).fill(0).map(() => Math.random())
344
+ );
345
+
346
+ const startTime = performance.now();
347
+ topKSimilar(query, candidates, 10);
348
+ const endTime = performance.now();
349
+
350
+ // Should process 1000 candidates in reasonable time
351
+ expect(endTime - startTime).toBeLessThan(100); // < 100ms
352
+ });
353
+ });
354
+ });