context-mode 1.0.24 → 1.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/store.js CHANGED
@@ -116,6 +116,52 @@ export function cleanupStaleDBs() {
116
116
  catch { /* ignore readdir errors */ }
117
117
  return cleaned;
118
118
  }
119
+ // ── Proximity helpers (pure functions) ──
120
+ /** Find all positions of a term in text. */
121
+ function findAllPositions(text, term) {
122
+ const positions = [];
123
+ let idx = text.indexOf(term);
124
+ while (idx !== -1) {
125
+ positions.push(idx);
126
+ idx = text.indexOf(term, idx + 1);
127
+ }
128
+ return positions;
129
+ }
130
+ /**
131
+ * Find minimum span (window) covering at least one position from each list.
132
+ * Uses a sweep-line approach: advance the pointer at the current minimum.
133
+ */
134
+ function findMinSpan(positionLists) {
135
+ if (positionLists.length === 0)
136
+ return Infinity;
137
+ if (positionLists.length === 1)
138
+ return 0;
139
+ const sorted = positionLists.map((p) => [...p].sort((a, b) => a - b));
140
+ const ptrs = new Array(sorted.length).fill(0);
141
+ let minSpan = Infinity;
142
+ while (true) {
143
+ let curMin = Infinity;
144
+ let curMax = -Infinity;
145
+ let minIdx = 0;
146
+ for (let i = 0; i < sorted.length; i++) {
147
+ const val = sorted[i][ptrs[i]];
148
+ if (val < curMin) {
149
+ curMin = val;
150
+ minIdx = i;
151
+ }
152
+ if (val > curMax) {
153
+ curMax = val;
154
+ }
155
+ }
156
+ const span = curMax - curMin;
157
+ if (span < minSpan)
158
+ minSpan = span;
159
+ ptrs[minIdx]++;
160
+ if (ptrs[minIdx] >= sorted[minIdx].length)
161
+ break;
162
+ }
163
+ return minSpan;
164
+ }
119
165
  export class ContentStore {
120
166
  #db;
121
167
  #dbPath;
@@ -138,6 +184,10 @@ export class ContentStore {
138
184
  #stmtSearchTrigram;
139
185
  #stmtSearchTrigramFiltered;
140
186
  #stmtFuzzyVocab;
187
+ #stmtSearchPorterContentType;
188
+ #stmtSearchPorterFilteredContentType;
189
+ #stmtSearchTrigramContentType;
190
+ #stmtSearchTrigramFilteredContentType;
141
191
  // Read path
142
192
  #stmtListSources;
143
193
  #stmtChunksBySource;
@@ -217,7 +267,7 @@ export class ContentStore {
217
267
  chunks.content,
218
268
  chunks.content_type,
219
269
  sources.label,
220
- bm25(chunks, 2.0, 1.0) AS rank,
270
+ bm25(chunks, 5.0, 1.0) AS rank,
221
271
  highlight(chunks, 1, char(2), char(3)) AS highlighted
222
272
  FROM chunks
223
273
  JOIN sources ON sources.id = chunks.source_id
@@ -231,7 +281,7 @@ export class ContentStore {
231
281
  chunks.content,
232
282
  chunks.content_type,
233
283
  sources.label,
234
- bm25(chunks, 2.0, 1.0) AS rank,
284
+ bm25(chunks, 5.0, 1.0) AS rank,
235
285
  highlight(chunks, 1, char(2), char(3)) AS highlighted
236
286
  FROM chunks
237
287
  JOIN sources ON sources.id = chunks.source_id
@@ -245,7 +295,7 @@ export class ContentStore {
245
295
  chunks_trigram.content,
246
296
  chunks_trigram.content_type,
247
297
  sources.label,
248
- bm25(chunks_trigram, 2.0, 1.0) AS rank,
298
+ bm25(chunks_trigram, 5.0, 1.0) AS rank,
249
299
  highlight(chunks_trigram, 1, char(2), char(3)) AS highlighted
250
300
  FROM chunks_trigram
251
301
  JOIN sources ON sources.id = chunks_trigram.source_id
@@ -259,13 +309,70 @@ export class ContentStore {
259
309
  chunks_trigram.content,
260
310
  chunks_trigram.content_type,
261
311
  sources.label,
262
- bm25(chunks_trigram, 2.0, 1.0) AS rank,
312
+ bm25(chunks_trigram, 5.0, 1.0) AS rank,
263
313
  highlight(chunks_trigram, 1, char(2), char(3)) AS highlighted
264
314
  FROM chunks_trigram
265
315
  JOIN sources ON sources.id = chunks_trigram.source_id
266
316
  WHERE chunks_trigram MATCH ? AND sources.label LIKE ?
267
317
  ORDER BY rank
268
318
  LIMIT ?
319
+ `);
320
+ // Content-type filtered variants
321
+ this.#stmtSearchPorterContentType = this.#db.prepare(`
322
+ SELECT
323
+ chunks.title,
324
+ chunks.content,
325
+ chunks.content_type,
326
+ sources.label,
327
+ bm25(chunks, 5.0, 1.0) AS rank,
328
+ highlight(chunks, 1, char(2), char(3)) AS highlighted
329
+ FROM chunks
330
+ JOIN sources ON sources.id = chunks.source_id
331
+ WHERE chunks MATCH ? AND chunks.content_type = ?
332
+ ORDER BY rank
333
+ LIMIT ?
334
+ `);
335
+ this.#stmtSearchPorterFilteredContentType = this.#db.prepare(`
336
+ SELECT
337
+ chunks.title,
338
+ chunks.content,
339
+ chunks.content_type,
340
+ sources.label,
341
+ bm25(chunks, 5.0, 1.0) AS rank,
342
+ highlight(chunks, 1, char(2), char(3)) AS highlighted
343
+ FROM chunks
344
+ JOIN sources ON sources.id = chunks.source_id
345
+ WHERE chunks MATCH ? AND sources.label LIKE ? AND chunks.content_type = ?
346
+ ORDER BY rank
347
+ LIMIT ?
348
+ `);
349
+ this.#stmtSearchTrigramContentType = this.#db.prepare(`
350
+ SELECT
351
+ chunks_trigram.title,
352
+ chunks_trigram.content,
353
+ chunks_trigram.content_type,
354
+ sources.label,
355
+ bm25(chunks_trigram, 5.0, 1.0) AS rank,
356
+ highlight(chunks_trigram, 1, char(2), char(3)) AS highlighted
357
+ FROM chunks_trigram
358
+ JOIN sources ON sources.id = chunks_trigram.source_id
359
+ WHERE chunks_trigram MATCH ? AND chunks_trigram.content_type = ?
360
+ ORDER BY rank
361
+ LIMIT ?
362
+ `);
363
+ this.#stmtSearchTrigramFilteredContentType = this.#db.prepare(`
364
+ SELECT
365
+ chunks_trigram.title,
366
+ chunks_trigram.content,
367
+ chunks_trigram.content_type,
368
+ sources.label,
369
+ bm25(chunks_trigram, 5.0, 1.0) AS rank,
370
+ highlight(chunks_trigram, 1, char(2), char(3)) AS highlighted
371
+ FROM chunks_trigram
372
+ JOIN sources ON sources.id = chunks_trigram.source_id
373
+ WHERE chunks_trigram MATCH ? AND sources.label LIKE ? AND chunks_trigram.content_type = ?
374
+ ORDER BY rank
375
+ LIMIT ?
269
376
  `);
270
377
  // Fuzzy path
271
378
  this.#stmtFuzzyVocab = this.#db.prepare("SELECT word FROM vocabulary WHERE length(word) BETWEEN ? AND ?");
@@ -374,14 +481,26 @@ export class ContentStore {
374
481
  };
375
482
  }
376
483
  // ── Search ──
377
- search(query, limit = 3, source, mode = "AND") {
484
+ search(query, limit = 3, source, mode = "AND", contentType) {
378
485
  const sanitized = sanitizeQuery(query, mode);
379
- const stmt = source
380
- ? this.#stmtSearchPorterFiltered
381
- : this.#stmtSearchPorter;
382
- const params = source
383
- ? [sanitized, `%${source}%`, limit]
384
- : [sanitized, limit];
486
+ let stmt;
487
+ let params;
488
+ if (source && contentType) {
489
+ stmt = this.#stmtSearchPorterFilteredContentType;
490
+ params = [sanitized, `%${source}%`, contentType, limit];
491
+ }
492
+ else if (source) {
493
+ stmt = this.#stmtSearchPorterFiltered;
494
+ params = [sanitized, `%${source}%`, limit];
495
+ }
496
+ else if (contentType) {
497
+ stmt = this.#stmtSearchPorterContentType;
498
+ params = [sanitized, contentType, limit];
499
+ }
500
+ else {
501
+ stmt = this.#stmtSearchPorter;
502
+ params = [sanitized, limit];
503
+ }
385
504
  const rows = stmt.all(...params);
386
505
  return rows.map((r) => ({
387
506
  title: r.title,
@@ -393,16 +512,28 @@ export class ContentStore {
393
512
  }));
394
513
  }
395
514
  // ── Trigram Search (Layer 2) ──
396
- searchTrigram(query, limit = 3, source, mode = "AND") {
515
+ searchTrigram(query, limit = 3, source, mode = "AND", contentType) {
397
516
  const sanitized = sanitizeTrigramQuery(query, mode);
398
517
  if (!sanitized)
399
518
  return [];
400
- const stmt = source
401
- ? this.#stmtSearchTrigramFiltered
402
- : this.#stmtSearchTrigram;
403
- const params = source
404
- ? [sanitized, `%${source}%`, limit]
405
- : [sanitized, limit];
519
+ let stmt;
520
+ let params;
521
+ if (source && contentType) {
522
+ stmt = this.#stmtSearchTrigramFilteredContentType;
523
+ params = [sanitized, `%${source}%`, contentType, limit];
524
+ }
525
+ else if (source) {
526
+ stmt = this.#stmtSearchTrigramFiltered;
527
+ params = [sanitized, `%${source}%`, limit];
528
+ }
529
+ else if (contentType) {
530
+ stmt = this.#stmtSearchTrigramContentType;
531
+ params = [sanitized, contentType, limit];
532
+ }
533
+ else {
534
+ stmt = this.#stmtSearchTrigram;
535
+ params = [sanitized, limit];
536
+ }
406
537
  const rows = stmt.all(...params);
407
538
  return rows.map((r) => ({
408
539
  title: r.title,
@@ -433,35 +564,72 @@ export class ContentStore {
433
564
  }
434
565
  return bestDist <= maxDist ? bestWord : null;
435
566
  }
567
+ // ── Reciprocal Rank Fusion (Cormack et al. 2009) ──
568
+ #rrfSearch(query, limit, source, contentType) {
569
+ const K = 60; // Standard RRF constant
570
+ const fetchLimit = Math.max(limit * 2, 10);
571
+ const porterResults = this.search(query, fetchLimit, source, "OR", contentType);
572
+ const trigramResults = this.searchTrigram(query, fetchLimit, source, "OR", contentType);
573
+ const scoreMap = new Map();
574
+ const key = (r) => `${r.source}::${r.title}`;
575
+ for (const [i, r] of porterResults.entries()) {
576
+ const k = key(r);
577
+ const existing = scoreMap.get(k);
578
+ if (existing) {
579
+ existing.score += 1 / (K + i + 1);
580
+ }
581
+ else {
582
+ scoreMap.set(k, { result: r, score: 1 / (K + i + 1) });
583
+ }
584
+ }
585
+ for (const [i, r] of trigramResults.entries()) {
586
+ const k = key(r);
587
+ const existing = scoreMap.get(k);
588
+ if (existing) {
589
+ existing.score += 1 / (K + i + 1);
590
+ }
591
+ else {
592
+ scoreMap.set(k, { result: r, score: 1 / (K + i + 1) });
593
+ }
594
+ }
595
+ return Array.from(scoreMap.values())
596
+ .sort((a, b) => b.score - a.score)
597
+ .slice(0, limit)
598
+ .map(({ result, score }) => ({ ...result, rank: -score }));
599
+ }
600
+ // ── Proximity Reranking ──
601
+ #applyProximityReranking(results, query) {
602
+ const terms = query
603
+ .toLowerCase()
604
+ .split(/\s+/)
605
+ .filter((w) => w.length >= 2);
606
+ // Single-term queries: no reranking needed
607
+ if (terms.length < 2)
608
+ return results;
609
+ return results
610
+ .map((r) => {
611
+ const content = r.content.toLowerCase();
612
+ const positions = terms.map((t) => findAllPositions(content, t));
613
+ // If any term is missing from content, no proximity boost
614
+ if (positions.some((p) => p.length === 0)) {
615
+ return { result: r, boost: 0 };
616
+ }
617
+ const minSpan = findMinSpan(positions);
618
+ const boost = 1 / (1 + minSpan / Math.max(content.length, 1));
619
+ return { result: r, boost };
620
+ })
621
+ .sort((a, b) => b.boost - a.boost || a.result.rank - b.result.rank)
622
+ .map(({ result }) => result);
623
+ }
436
624
  // ── Unified Fallback Search ──
437
- searchWithFallback(query, limit = 3, source) {
438
- // Layer 1a: Porter + AND (most precise)
439
- const porterAnd = this.search(query, limit, source, "AND");
440
- if (porterAnd.length > 0) {
441
- return porterAnd.map((r) => ({ ...r, matchLayer: "porter" }));
442
- }
443
- // Layer 1b: Porter + OR (fallback when AND finds nothing)
444
- const porterOr = this.search(query, limit, source, "OR");
445
- if (porterOr.length > 0) {
446
- return porterOr.map((r) => ({ ...r, matchLayer: "porter" }));
447
- }
448
- // Layer 2a: Trigram + AND
449
- const trigramAnd = this.searchTrigram(query, limit, source, "AND");
450
- if (trigramAnd.length > 0) {
451
- return trigramAnd.map((r) => ({
452
- ...r,
453
- matchLayer: "trigram",
454
- }));
455
- }
456
- // Layer 2b: Trigram + OR
457
- const trigramOr = this.searchTrigram(query, limit, source, "OR");
458
- if (trigramOr.length > 0) {
459
- return trigramOr.map((r) => ({
460
- ...r,
461
- matchLayer: "trigram",
462
- }));
463
- }
464
- // Layer 3: Fuzzy correction + re-search (AND then OR)
625
+ searchWithFallback(query, limit = 3, source, contentType) {
626
+ // Step 1: RRF fusion (porter OR + trigram OR → merge)
627
+ const rrfResults = this.#rrfSearch(query, limit, source, contentType);
628
+ if (rrfResults.length > 0) {
629
+ const reranked = this.#applyProximityReranking(rrfResults, query);
630
+ return reranked.map((r) => ({ ...r, matchLayer: "rrf" }));
631
+ }
632
+ // Step 2: Fuzzy correction RRF re-run
465
633
  const words = query
466
634
  .toLowerCase()
467
635
  .trim()
@@ -471,21 +639,10 @@ export class ContentStore {
471
639
  const correctedWords = words.map((w) => this.fuzzyCorrect(w) ?? w);
472
640
  const correctedQuery = correctedWords.join(" ");
473
641
  if (correctedQuery !== original) {
474
- const fuzzyPorterAnd = this.search(correctedQuery, limit, source, "AND");
475
- if (fuzzyPorterAnd.length > 0) {
476
- return fuzzyPorterAnd.map((r) => ({ ...r, matchLayer: "fuzzy" }));
477
- }
478
- const fuzzyPorterOr = this.search(correctedQuery, limit, source, "OR");
479
- if (fuzzyPorterOr.length > 0) {
480
- return fuzzyPorterOr.map((r) => ({ ...r, matchLayer: "fuzzy" }));
481
- }
482
- const fuzzyTrigramAnd = this.searchTrigram(correctedQuery, limit, source, "AND");
483
- if (fuzzyTrigramAnd.length > 0) {
484
- return fuzzyTrigramAnd.map((r) => ({ ...r, matchLayer: "fuzzy" }));
485
- }
486
- const fuzzyTrigramOr = this.searchTrigram(correctedQuery, limit, source, "OR");
487
- if (fuzzyTrigramOr.length > 0) {
488
- return fuzzyTrigramOr.map((r) => ({ ...r, matchLayer: "fuzzy" }));
642
+ const fuzzyResults = this.#rrfSearch(correctedQuery, limit, source, contentType);
643
+ if (fuzzyResults.length > 0) {
644
+ const reranked = this.#applyProximityReranking(fuzzyResults, correctedQuery);
645
+ return reranked.map((r) => ({ ...r, matchLayer: "rrf-fuzzy" }));
489
646
  }
490
647
  }
491
648
  return [];
package/build/types.d.ts CHANGED
@@ -64,7 +64,7 @@ export interface SearchResult {
64
64
  source: string;
65
65
  rank: number;
66
66
  contentType: "code" | "prose";
67
- matchLayer?: "porter" | "trigram" | "fuzzy";
67
+ matchLayer?: "porter" | "trigram" | "fuzzy" | "rrf" | "rrf-fuzzy";
68
68
  highlighted?: string;
69
69
  }
70
70
  /**