@totalreclaw/totalreclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,594 @@
1
+ /**
2
+ * Unit tests for the TotalReclaw re-ranker module.
3
+ *
4
+ * Run with: npx tsx reranker.test.ts
5
+ *
6
+ * Uses TAP-style output (no test framework dependency).
7
+ */
8
+
9
+ import {
10
+ tokenize,
11
+ bm25Score,
12
+ cosineSimilarity,
13
+ rrfFuse,
14
+ rerank,
15
+ detectQueryIntent,
16
+ DEFAULT_WEIGHTS,
17
+ type RankedItem,
18
+ type RerankerCandidate,
19
+ type RankingWeights,
20
+ type QueryIntent,
21
+ } from './reranker.js';
22
+
23
+ let passed = 0;
24
+ let failed = 0;
25
+ let testNum = 0;
26
+
27
+ function assert(condition: boolean, message: string): void {
28
+ testNum++;
29
+ if (condition) {
30
+ passed++;
31
+ console.log(`ok ${testNum} - ${message}`);
32
+ } else {
33
+ failed++;
34
+ console.log(`not ok ${testNum} - ${message}`);
35
+ }
36
+ }
37
+
38
+ function assertClose(actual: number, expected: number, epsilon: number, message: string): void {
39
+ const diff = Math.abs(actual - expected);
40
+ assert(diff < epsilon, `${message} (expected ~${expected}, got ${actual}, diff=${diff})`);
41
+ }
42
+
43
+ // ---------------------------------------------------------------------------
44
+ // Tokenization tests
45
+ // ---------------------------------------------------------------------------
46
+
47
+ console.log('# Tokenization');
48
+
49
+ {
50
+ const tokens = tokenize('Hello World');
51
+ // "hello" and "world" are both NOT stop words, so both are kept
52
+ assert(tokens.length === 2, 'keeps both "hello" and "world" (neither is a stop word)');
53
+ assert(tokens.includes('hello'), '"hello" is kept');
54
+ assert(tokens.includes('world'), '"world" is kept');
55
+ }
56
+
57
+ {
58
+ const tokens = tokenize('The quick brown fox jumps over the lazy dog');
59
+ // "the" (x2), "over" are stop words? "over" is NOT in stop list. "the" IS.
60
+ // After stop word removal: "quick", "brown", "fox", "jumps", "over", "lazy", "dog"
61
+ assert(!tokens.includes('the'), 'removes "the" stop word');
62
+ assert(tokens.includes('quick'), 'keeps "quick"');
63
+ assert(tokens.includes('fox'), 'keeps "fox"');
64
+ }
65
+
66
+ {
67
+ const tokens = tokenize('I am a test!', false);
68
+ // Without stop word removal: "am", "test" ("I" and "a" are < 2 chars)
69
+ assert(tokens.includes('am'), 'keeps short words without stop word removal');
70
+ assert(tokens.includes('test'), 'keeps "test"');
71
+ assert(!tokens.includes('a'), 'filters single-char tokens');
72
+ }
73
+
74
+ {
75
+ const tokens = tokenize('Hello, World! How are you?');
76
+ // Punctuation removed, then stop words: "hello" stays (not a stop word),
77
+ // "world" stays, "how" removed (stop word), "are" removed, "you" removed
78
+ assert(tokens.includes('hello'), 'punctuation removed, "hello" kept');
79
+ assert(tokens.includes('world'), '"world" kept after punctuation removal');
80
+ assert(!tokens.includes('how'), '"how" removed as stop word');
81
+ assert(!tokens.includes('are'), '"are" removed as stop word');
82
+ }
83
+
84
+ {
85
+ const tokens = tokenize('');
86
+ assert(tokens.length === 0, 'empty string returns empty array');
87
+ }
88
+
89
+ // ---------------------------------------------------------------------------
90
+ // BM25 tests
91
+ // ---------------------------------------------------------------------------
92
+
93
+ console.log('# BM25 Scoring');
94
+
95
+ {
96
+ // Simple single-document scenario
97
+ const queryTerms = ['alex', 'works'];
98
+ const docTerms = ['alex', 'works', 'nexus', 'labs'];
99
+ const avgDocLen = 4;
100
+ const docCount = 1;
101
+ const termDocFreqs = new Map<string, number>([
102
+ ['alex', 1], ['works', 1], ['nexus', 1], ['labs', 1],
103
+ ]);
104
+
105
+ const score = bm25Score(queryTerms, docTerms, avgDocLen, docCount, termDocFreqs);
106
+ assert(score > 0, `BM25: matching query gets positive score (${score.toFixed(4)})`);
107
+ }
108
+
109
+ {
110
+ // No overlap
111
+ const queryTerms = ['python', 'programming'];
112
+ const docTerms = ['alex', 'works', 'nexus', 'labs'];
113
+ const avgDocLen = 4;
114
+ const docCount = 1;
115
+ const termDocFreqs = new Map<string, number>([
116
+ ['alex', 1], ['works', 1], ['nexus', 1], ['labs', 1],
117
+ ]);
118
+
119
+ const score = bm25Score(queryTerms, docTerms, avgDocLen, docCount, termDocFreqs);
120
+ assert(score === 0, 'BM25: no overlap gives zero score');
121
+ }
122
+
123
+ {
124
+ // Higher TF -> higher score
125
+ const queryTerms = ['test'];
126
+ const doc1Terms = ['test', 'document', 'about', 'testing']; // test appears 1x
127
+ const doc2Terms = ['test', 'test', 'test', 'document']; // test appears 3x
128
+ const avgDocLen = 4;
129
+ const docCount = 2;
130
+ const termDocFreqs = new Map<string, number>([['test', 2], ['document', 2], ['about', 1], ['testing', 1]]);
131
+
132
+ const score1 = bm25Score(queryTerms, doc1Terms, avgDocLen, docCount, termDocFreqs);
133
+ const score2 = bm25Score(queryTerms, doc2Terms, avgDocLen, docCount, termDocFreqs);
134
+ assert(score2 > score1, `BM25: higher TF gives higher score (${score2.toFixed(4)} > ${score1.toFixed(4)})`);
135
+ }
136
+
137
+ {
138
+ // Rarer terms have higher IDF
139
+ const queryTerms = ['rare'];
140
+ const docTerms = ['rare', 'word'];
141
+ const avgDocLen = 4;
142
+ const docCount = 100;
143
+ const rareFreq = new Map<string, number>([['rare', 1], ['word', 50]]);
144
+ const commonFreq = new Map<string, number>([['rare', 50], ['word', 50]]);
145
+
146
+ const scoreRare = bm25Score(queryTerms, docTerms, avgDocLen, docCount, rareFreq);
147
+ const scoreCommon = bm25Score(queryTerms, docTerms, avgDocLen, docCount, commonFreq);
148
+ assert(scoreRare > scoreCommon, `BM25: rare terms score higher (${scoreRare.toFixed(4)} > ${scoreCommon.toFixed(4)})`);
149
+ }
150
+
151
+ {
152
+ // Empty inputs
153
+ assert(bm25Score(['hello'], [], 4, 10, new Map()) === 0, 'BM25: empty doc returns 0');
154
+ assert(bm25Score([], ['hello'], 4, 10, new Map()) === 0, 'BM25: empty query returns 0');
155
+ assert(bm25Score(['hello'], ['hello'], 0, 10, new Map([['hello', 1]])) === 0, 'BM25: avgDocLen=0 returns 0');
156
+ }
157
+
158
+ {
159
+ // Known BM25 computation (manual verification)
160
+ // query = ["test"], doc = ["test", "document"], N=10, n("test")=3, avgdl=5
161
+ // IDF = ln((10 - 3 + 0.5) / (3 + 0.5) + 1) = ln(7.5/3.5 + 1) = ln(3.14286) = 1.14554
162
+ // TF = (1 * (1.2 + 1)) / (1 + 1.2 * (1 - 0.75 + 0.75 * 2/5)) = 2.2 / (1 + 1.2 * 0.55) = 2.2 / 1.66 = 1.32530
163
+ // score = 1.14554 * 1.32530 = 1.51849
164
+ const queryTerms = ['test'];
165
+ const docTerms = ['test', 'document'];
166
+ const score = bm25Score(queryTerms, docTerms, 5, 10, new Map([['test', 3]]));
167
+ assertClose(score, 1.51849, 0.01, 'BM25: known computation matches expected value');
168
+ }
169
+
170
+ // ---------------------------------------------------------------------------
171
+ // Cosine similarity tests
172
+ // ---------------------------------------------------------------------------
173
+
174
+ console.log('# Cosine Similarity');
175
+
176
+ {
177
+ // Parallel vectors -> 1.0
178
+ const a = [1, 2, 3];
179
+ const b = [2, 4, 6];
180
+ assertClose(cosineSimilarity(a, b), 1.0, 1e-10, 'Cosine: parallel vectors = 1.0');
181
+ }
182
+
183
+ {
184
+ // Orthogonal vectors -> 0.0
185
+ const a = [1, 0];
186
+ const b = [0, 1];
187
+ assertClose(cosineSimilarity(a, b), 0.0, 1e-10, 'Cosine: orthogonal vectors = 0.0');
188
+ }
189
+
190
+ {
191
+ // Opposite vectors -> -1.0
192
+ const a = [1, 2, 3];
193
+ const b = [-1, -2, -3];
194
+ assertClose(cosineSimilarity(a, b), -1.0, 1e-10, 'Cosine: opposite vectors = -1.0');
195
+ }
196
+
197
+ {
198
+ // Same vector -> 1.0
199
+ const a = [3, 4];
200
+ assertClose(cosineSimilarity(a, a), 1.0, 1e-10, 'Cosine: vector with itself = 1.0');
201
+ }
202
+
203
+ {
204
+ // Zero vector -> 0.0
205
+ const a = [0, 0, 0];
206
+ const b = [1, 2, 3];
207
+ assertClose(cosineSimilarity(a, b), 0.0, 1e-10, 'Cosine: zero vector = 0.0');
208
+ }
209
+
210
+ {
211
+ // Both zero vectors -> 0.0
212
+ const a = [0, 0];
213
+ const b = [0, 0];
214
+ assertClose(cosineSimilarity(a, b), 0.0, 1e-10, 'Cosine: both zero vectors = 0.0');
215
+ }
216
+
217
+ {
218
+ // Empty vectors -> 0.0
219
+ assertClose(cosineSimilarity([], [1, 2]), 0.0, 1e-10, 'Cosine: empty vector a = 0.0');
220
+ assertClose(cosineSimilarity([1, 2], []), 0.0, 1e-10, 'Cosine: empty vector b = 0.0');
221
+ }
222
+
223
+ {
224
+ // Known cosine: [1,1] and [1,0] -> cos(45 deg) = sqrt(2)/2 ~ 0.7071
225
+ const a = [1, 1];
226
+ const b = [1, 0];
227
+ assertClose(cosineSimilarity(a, b), Math.SQRT2 / 2, 1e-10, 'Cosine: 45-degree angle = sqrt(2)/2');
228
+ }
229
+
230
+ // ---------------------------------------------------------------------------
231
+ // RRF tests
232
+ // ---------------------------------------------------------------------------
233
+
234
+ console.log('# Reciprocal Rank Fusion');
235
+
236
+ {
237
+ // Two rankings, same items, different order
238
+ const ranking1: RankedItem[] = [
239
+ { id: 'A', score: 10 },
240
+ { id: 'B', score: 8 },
241
+ { id: 'C', score: 6 },
242
+ ];
243
+ const ranking2: RankedItem[] = [
244
+ { id: 'C', score: 10 },
245
+ { id: 'A', score: 8 },
246
+ { id: 'B', score: 6 },
247
+ ];
248
+
249
+ const fused = rrfFuse([ranking1, ranking2], 60);
250
+
251
+ // Expected RRF scores (k=60, 1-based ranks):
252
+ // A: 1/(60+1) + 1/(60+2) = 0.01639 + 0.01613 = 0.03253
253
+ // B: 1/(60+2) + 1/(60+3) = 0.01613 + 0.01587 = 0.03200
254
+ // C: 1/(60+3) + 1/(60+1) = 0.01587 + 0.01639 = 0.03226
255
+
256
+ assert(fused.length === 3, 'RRF: all items present');
257
+ assert(fused[0].id === 'A', 'RRF: A ranked first (appears in rank 1 + rank 2)');
258
+ assert(fused[1].id === 'C', 'RRF: C ranked second (appears in rank 3 + rank 1)');
259
+ assert(fused[2].id === 'B', 'RRF: B ranked third');
260
+ }
261
+
262
+ {
263
+ // Items present in only one ranking
264
+ const ranking1: RankedItem[] = [
265
+ { id: 'X', score: 10 },
266
+ { id: 'Y', score: 5 },
267
+ ];
268
+ const ranking2: RankedItem[] = [
269
+ { id: 'Z', score: 10 },
270
+ { id: 'X', score: 5 },
271
+ ];
272
+
273
+ const fused = rrfFuse([ranking1, ranking2], 60);
274
+
275
+ assert(fused.length === 3, 'RRF: all unique items present (X, Y, Z)');
276
+ // X appears in both rankings -> highest score
277
+ assert(fused[0].id === 'X', 'RRF: X ranked first (in both lists)');
278
+ }
279
+
280
+ {
281
+ // Single ranking -> RRF just orders by that ranking
282
+ const ranking: RankedItem[] = [
283
+ { id: 'A', score: 10 },
284
+ { id: 'B', score: 5 },
285
+ ];
286
+
287
+ const fused = rrfFuse([ranking], 60);
288
+ assert(fused[0].id === 'A', 'RRF: single ranking preserves order');
289
+ assert(fused[1].id === 'B', 'RRF: single ranking preserves order (second)');
290
+ }
291
+
292
+ {
293
+ // Empty rankings
294
+ const fused = rrfFuse([], 60);
295
+ assert(fused.length === 0, 'RRF: empty input returns empty');
296
+ }
297
+
298
+ {
299
+ // Verify RRF scores numerically
300
+ const ranking1: RankedItem[] = [
301
+ { id: 'D1', score: 1 }, // rank 1 -> 1/(60+1)
302
+ { id: 'D2', score: 0.5 }, // rank 2 -> 1/(60+2)
303
+ ];
304
+ const ranking2: RankedItem[] = [
305
+ { id: 'D2', score: 1 }, // rank 1 -> 1/(60+1)
306
+ { id: 'D1', score: 0.5 }, // rank 2 -> 1/(60+2)
307
+ ];
308
+
309
+ const fused = rrfFuse([ranking1, ranking2], 60);
310
+
311
+ // D1: 1/61 + 1/62 = 0.03252
312
+ // D2: 1/62 + 1/61 = 0.03252
313
+ // Both should have equal score
314
+ assertClose(fused[0].score, fused[1].score, 1e-10, 'RRF: symmetric rankings produce equal scores');
315
+ assertClose(fused[0].score, 1/61 + 1/62, 1e-10, 'RRF: correct numeric score');
316
+ }
317
+
318
+ // ---------------------------------------------------------------------------
319
+ // End-to-end rerank tests
320
+ // ---------------------------------------------------------------------------
321
+
322
+ console.log('# Rerank (end-to-end)');
323
+
324
+ {
325
+ // BM25-only (no embeddings) - should still work
326
+ // Use terms that match exactly to avoid stemming issues (BM25 has no stemmer)
327
+ const candidates: RerankerCandidate[] = [
328
+ { id: '1', text: 'Alex works Nexus Labs senior engineer' },
329
+ { id: '2', text: 'The weather today is sunny and warm' },
330
+ { id: '3', text: 'Bob enjoys hiking in the mountains on weekends' },
331
+ ];
332
+
333
+ // Query: "Alex Nexus Labs" -> tokens: ["alex", "nexus", "labs"]
334
+ // Doc 1 matches all 3 terms, Doc 2 and 3 match zero
335
+ const results = rerank('Alex Nexus Labs', [], candidates, 2);
336
+
337
+ assert(results.length === 2, 'Rerank: returns topK=2 results');
338
+ assert(results[0].id === '1', 'Rerank: BM25-only ranks matching document first');
339
+ }
340
+
341
+ {
342
+ // Mixed candidates: some with embeddings, some without
343
+ // Embeddings are fabricated such that candidate 2 is semantically closer
344
+ // to the query than candidate 1, even though candidate 1 has better BM25.
345
+ const queryEmb = [1, 0, 0, 0];
346
+ const candidates: RerankerCandidate[] = [
347
+ {
348
+ id: '1',
349
+ text: 'Alex employed Nexus Labs senior engineer', // good BM25 match for "alex" + "employed"
350
+ embedding: [0, 1, 0, 0], // orthogonal to query embedding
351
+ },
352
+ {
353
+ id: '2',
354
+ text: 'career position company staff', // poor BM25 match (no overlapping terms)
355
+ embedding: [0.99, 0.1, 0, 0], // very close to query embedding
356
+ },
357
+ {
358
+ id: '3',
359
+ text: 'sunny weather forecast today',
360
+ embedding: [0, 0, 0, 1], // orthogonal
361
+ },
362
+ ];
363
+
364
+ const results = rerank('Alex employed somewhere', queryEmb, candidates, 3);
365
+ assert(results.length === 3, 'Rerank: returns all 3 candidates');
366
+
367
+ // With RRF fusion, candidate 1 (good BM25) and candidate 2 (good cosine)
368
+ // should be in top 2. The exact order depends on RRF, but both should
369
+ // beat candidate 3 which has neither.
370
+ const topTwoIds = new Set([results[0].id, results[1].id]);
371
+ assert(topTwoIds.has('1'), 'Rerank: BM25-strong candidate in top 2');
372
+ assert(topTwoIds.has('2'), 'Rerank: cosine-strong candidate in top 2');
373
+ assert(results[2].id === '3', 'Rerank: irrelevant candidate ranked last');
374
+ }
375
+
376
+ {
377
+ // Empty candidates
378
+ const results = rerank('test query', [1, 0, 0], [], 5);
379
+ assert(results.length === 0, 'Rerank: empty candidates returns empty');
380
+ }
381
+
382
+ {
383
+ // topK larger than candidates
384
+ const candidates: RerankerCandidate[] = [
385
+ { id: '1', text: 'only candidate' },
386
+ ];
387
+ const results = rerank('only', [], candidates, 10);
388
+ assert(results.length === 1, 'Rerank: returns all candidates when topK > count');
389
+ }
390
+
391
+ {
392
+ // Backward compatibility: candidates without embeddings
393
+ // Even when queryEmbedding is provided, candidates without embeddings
394
+ // should still be ranked by BM25 only.
395
+ const queryEmb = [1, 0, 0];
396
+ const candidates: RerankerCandidate[] = [
397
+ { id: 'v1', text: 'Alex works at Nexus Labs' }, // no embedding (v1 fact)
398
+ { id: 'v2', text: 'Alex works at Nexus Labs', embedding: [0.9, 0.1, 0] }, // has embedding
399
+ ];
400
+
401
+ const results = rerank('Alex Nexus Labs', queryEmb, candidates, 2);
402
+ assert(results.length === 2, 'Rerank: backward compat - both v1 and v2 facts returned');
403
+ // v2 should rank higher because it benefits from both BM25 and cosine
404
+ assert(results[0].id === 'v2', 'Rerank: v2 fact with embedding ranks above v1 (benefits from cosine + BM25)');
405
+ }
406
+
407
+ // ---------------------------------------------------------------------------
408
+ // RerankResult.cosineSimilarity tests
409
+ // ---------------------------------------------------------------------------
410
+
411
+ console.log('# RerankResult cosine similarity field');
412
+
413
+ {
414
+ // Results with embeddings should have cosineSimilarity set
415
+ const queryEmb = [1, 0, 0];
416
+ const candidates: RerankerCandidate[] = [
417
+ { id: 'a', text: 'first document', embedding: [0.9, 0.1, 0] },
418
+ { id: 'b', text: 'second document', embedding: [0, 1, 0] },
419
+ ];
420
+
421
+ const results = rerank('first document', queryEmb, candidates, 2);
422
+ assert(results.length === 2, 'RerankResult: returns 2 results');
423
+ const resultA = results.find(r => r.id === 'a');
424
+ const resultB = results.find(r => r.id === 'b');
425
+ assert(resultA !== undefined, 'RerankResult: result a exists');
426
+ assert(resultB !== undefined, 'RerankResult: result b exists');
427
+ assert(resultA!.cosineSimilarity !== undefined, 'RerankResult: a has cosineSimilarity');
428
+ assert(resultB!.cosineSimilarity !== undefined, 'RerankResult: b has cosineSimilarity');
429
+ assert(resultA!.cosineSimilarity! > resultB!.cosineSimilarity!, 'RerankResult: a has higher cosine similarity than b');
430
+ }
431
+
432
+ {
433
+ // Results without embeddings should have cosineSimilarity undefined
434
+ const candidates: RerankerCandidate[] = [
435
+ { id: 'no-emb', text: 'a document without embedding' },
436
+ ];
437
+
438
+ const results = rerank('document', [], candidates, 1);
439
+ assert(results.length === 1, 'RerankResult: returns 1 result without embedding');
440
+ assert(results[0].cosineSimilarity === undefined, 'RerankResult: no embedding -> cosineSimilarity is undefined');
441
+ }
442
+
443
+ // ---------------------------------------------------------------------------
444
+ // Weighted reranking tests
445
+ // ---------------------------------------------------------------------------
446
+
447
+ console.log('# Weighted Reranking');
448
+
449
+ {
450
+ // Recency-heavy weights should promote newer facts
451
+ const now = Math.floor(Date.now() / 1000);
452
+ const candidates: RerankerCandidate[] = [
453
+ { id: 'old', text: 'meeting notes from project kickoff', createdAt: now - 30 * 24 * 60 * 60, importance: 0.5 },
454
+ { id: 'new', text: 'meeting notes from project update', createdAt: now - 1 * 60 * 60, importance: 0.5 },
455
+ ];
456
+
457
+ const temporalWeights: RankingWeights = { bm25: 0.15, cosine: 0.20, importance: 0.20, recency: 0.45 };
458
+ const results = rerank('meeting notes', [], candidates, 2, temporalWeights);
459
+
460
+ assert(results.length === 2, 'Weighted: returns both candidates');
461
+ assert(results[0].id === 'new', 'Weighted: temporal weights promote newer fact to top');
462
+ }
463
+
464
+ {
465
+ // Importance-heavy weights should promote high-importance facts
466
+ const now = Math.floor(Date.now() / 1000);
467
+ const candidates: RerankerCandidate[] = [
468
+ { id: 'low-imp', text: 'Alex mentioned liking coffee', importance: 0.2, createdAt: now - 3600 },
469
+ { id: 'high-imp', text: 'Alex mentioned liking tea', importance: 0.9, createdAt: now - 3600 },
470
+ ];
471
+
472
+ const importanceWeights: RankingWeights = { bm25: 0.10, cosine: 0.10, importance: 0.70, recency: 0.10 };
473
+ const results = rerank('Alex likes', [], candidates, 2, importanceWeights);
474
+
475
+ assert(results.length === 2, 'Weighted: returns both candidates with importance weights');
476
+ assert(results[0].id === 'high-imp', 'Weighted: importance-heavy weights promote high-importance fact');
477
+ }
478
+
479
+ {
480
+ // BM25-heavy weights (factual) should promote exact term matches
481
+ const candidates: RerankerCandidate[] = [
482
+ { id: 'exact', text: "Alex's email is alex@example.com" },
483
+ { id: 'vague', text: 'Someone once mentioned contact information for reaching out' },
484
+ ];
485
+
486
+ const factualWeights: RankingWeights = { bm25: 0.40, cosine: 0.20, importance: 0.25, recency: 0.15 };
487
+ const results = rerank("What is Alex's email?", [], candidates, 2, factualWeights);
488
+
489
+ assert(results[0].id === 'exact', 'Weighted: factual weights promote exact term match (BM25-heavy)');
490
+ }
491
+
492
+ {
493
+ // Default weights should work the same as no weights
494
+ const candidates: RerankerCandidate[] = [
495
+ { id: '1', text: 'Alex works Nexus Labs senior engineer' },
496
+ { id: '2', text: 'The weather today is sunny and warm' },
497
+ ];
498
+
499
+ const withDefaults = rerank('Alex Nexus Labs', [], candidates, 2, DEFAULT_WEIGHTS);
500
+ const withoutWeights = rerank('Alex Nexus Labs', [], candidates, 2);
501
+
502
+ assert(withDefaults[0].id === withoutWeights[0].id, 'Weighted: default weights match no-weights behavior');
503
+ }
504
+
505
+ // ---------------------------------------------------------------------------
506
+ // Query Intent Detection tests
507
+ // ---------------------------------------------------------------------------
508
+
509
+ console.log('# Query Intent Detection');
510
+
511
+ {
512
+ // Factual queries
513
+ const factualQueries = [
514
+ "What's Alex's email?",
515
+ "Who is the project lead?",
516
+ "Where does Sarah live?",
517
+ "How many people are on the team?",
518
+ "Is the project using TypeScript?",
519
+ "Does Alex work at Nexus?",
520
+ ];
521
+
522
+ for (const q of factualQueries) {
523
+ const intent = detectQueryIntent(q);
524
+ assert(intent === 'factual', `Intent: "${q}" => factual (got ${intent})`);
525
+ }
526
+ }
527
+
528
+ {
529
+ // Temporal queries
530
+ const temporalQueries = [
531
+ "What did we discuss yesterday?",
532
+ "What happened last week?",
533
+ "Any recent updates?",
534
+ "What was mentioned earlier today?",
535
+ "Tell me what changed since Monday",
536
+ "What did Alex say this morning?",
537
+ ];
538
+
539
+ for (const q of temporalQueries) {
540
+ const intent = detectQueryIntent(q);
541
+ assert(intent === 'temporal', `Intent: "${q}" => temporal (got ${intent})`);
542
+ }
543
+ }
544
+
545
+ {
546
+ // Semantic queries (default)
547
+ const semanticQueries = [
548
+ "Tell me about Alex's work preferences",
549
+ "Explain the project architecture",
550
+ "Summarize the project architecture and its main design decisions and tradeoffs that were discussed",
551
+ "Alex personality traits and communication style",
552
+ ];
553
+
554
+ for (const q of semanticQueries) {
555
+ const intent = detectQueryIntent(q);
556
+ assert(intent === 'semantic', `Intent: "${q}" => semantic (got ${intent})`);
557
+ }
558
+ }
559
+
560
+ {
561
+ // Temporal overrides factual: "What did we discuss yesterday?" has both
562
+ // factual pattern ("What") and temporal keyword ("yesterday")
563
+ const intent = detectQueryIntent("What did we discuss yesterday?");
564
+ assert(intent === 'temporal', 'Intent: temporal overrides factual when both match');
565
+ }
566
+
567
+ {
568
+ // Long factual-pattern queries fall through to semantic (>80 chars)
569
+ const longQuery = "What are all the different design patterns and architectural decisions that were discussed in the project?";
570
+ const intent = detectQueryIntent(longQuery);
571
+ assert(intent === 'semantic', `Intent: long factual-pattern query => semantic (got ${intent})`);
572
+ }
573
+
574
+ {
575
+ // "What do you know about X?" is factual (starts with "what", under 80 chars)
576
+ const intent = detectQueryIntent("What do you know about hiking?");
577
+ assert(intent === 'factual', `Intent: "What do you know about..." => factual (got ${intent})`);
578
+ }
579
+
580
+ // ---------------------------------------------------------------------------
581
+ // Summary
582
+ // ---------------------------------------------------------------------------
583
+
584
+ console.log(`\n1..${testNum}`);
585
+ console.log(`# pass: ${passed}`);
586
+ console.log(`# fail: ${failed}`);
587
+
588
+ if (failed > 0) {
589
+ console.log('\nFAILED');
590
+ process.exit(1);
591
+ } else {
592
+ console.log('\nALL TESTS PASSED');
593
+ process.exit(0);
594
+ }