sweet-search 2.4.2 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/core/cli.js +43 -5
  2. package/core/embedding/embedding-cache.js +266 -18
  3. package/core/embedding/embedding-service.js +45 -9
  4. package/core/graph/graph-expansion.js +52 -12
  5. package/core/graph/graph-extractor.js +30 -1
  6. package/core/indexing/ast-chunker.js +331 -16
  7. package/core/indexing/chunking/chunk-builder.js +34 -1
  8. package/core/indexing/index-codebase-v21.js +31 -2
  9. package/core/indexing/index.js +6 -3
  10. package/core/indexing/indexer-ann.js +45 -6
  11. package/core/indexing/indexer-build.js +9 -1
  12. package/core/indexing/indexer-phases.js +6 -4
  13. package/core/indexing/indexing-file-policy.js +140 -0
  14. package/core/indexing/li-skip-policy.js +11 -220
  15. package/core/infrastructure/codebase-repository.js +21 -0
  16. package/core/infrastructure/config/embedding.js +20 -1
  17. package/core/infrastructure/config/graph.js +2 -2
  18. package/core/infrastructure/config/ranking.js +10 -0
  19. package/core/infrastructure/config/vector-store.js +1 -1
  20. package/core/infrastructure/coreml-cascade.js +236 -30
  21. package/core/infrastructure/coreml-cascade.json +25 -0
  22. package/core/infrastructure/index.js +17 -0
  23. package/core/infrastructure/init-config.js +216 -0
  24. package/core/infrastructure/language-patterns/registry-core.js +18 -0
  25. package/core/infrastructure/model-registry.js +12 -0
  26. package/core/infrastructure/native-inference.js +143 -51
  27. package/core/infrastructure/tree-sitter-provider.js +92 -2
  28. package/core/ranking/cascaded-scorer.js +6 -2
  29. package/core/ranking/file-kind-ranking.js +264 -0
  30. package/core/ranking/late-interaction-index.js +10 -4
  31. package/core/ranking/late-interaction-policy.js +304 -0
  32. package/core/search/context-expander.js +267 -28
  33. package/core/search/index.js +4 -0
  34. package/core/search/search-cli.js +3 -1
  35. package/core/search/search-pattern.js +4 -3
  36. package/core/search/search-postprocess.js +189 -8
  37. package/core/search/search-read-semantic.js +734 -0
  38. package/core/search/search-read.js +481 -0
  39. package/core/search/search-server.js +153 -5
  40. package/core/search/sweet-search.js +133 -16
  41. package/core/start-server.js +13 -2
  42. package/mcp/server.js +41 -0
  43. package/mcp/tool-handlers.js +117 -6
  44. package/package.json +9 -7
  45. package/scripts/init.js +386 -5
  46. package/scripts/uninstall.js +152 -6
@@ -14,6 +14,7 @@ import { expandResults } from '../graph/graph-expansion.js';
14
14
  import { int8CosineSimilarity } from '../embedding/embedding-service.js';
15
15
  import { QualityScorer } from '../ranking/quality-scorer.js';
16
16
  import { classifyIntent, getIntentPolicy } from '../query/intent-router.js';
17
+ import { applyFileKindRanking, classifyFileKindIntent } from '../ranking/file-kind-ranking.js';
17
18
  import { recordQueryTelemetry } from '../embedding/embedding-cache.js';
18
19
  import { expandAliases } from './dedup/sibling-expander.js';
19
20
 
@@ -175,10 +176,20 @@ export async function applyPostRetrieval(results, query, options, searchContext)
175
176
  ...(intentEdgeTypes && !graphExpandOptions.edgeTypes ? { edgeTypes: intentEdgeTypes } : {}),
176
177
  ...graphExpandOptions,
177
178
  });
179
+
180
+ // Attach LI chunk ids to expanded entities so they can participate
181
+ // in the post-expansion MaxSim rerank pool. The graph stores entities
182
+ // (entity_id keyed by code-graph.db) while LI is keyed by chunk id;
183
+ // without this bridge expanded entries fall through hasTokens() and
184
+ // are appended to the result tail without ever competing for top-K.
185
+ const expandedAttached = attachChunkIdsToExpanded(results, this.codebaseRepo);
186
+
178
187
  stats.graphExpansion = {
179
188
  mode: effectiveGraphExpand,
180
189
  latency_ms: Date.now() - expandStart,
181
190
  total: results.length,
191
+ expanded: results.filter(r => r.is_expanded).length,
192
+ expandedWithLiChunk: expandedAttached,
182
193
  };
183
194
  }
184
195
  } catch (err) {
@@ -255,8 +266,22 @@ export async function applyPostRetrieval(results, query, options, searchContext)
255
266
  if (shouldRunLateInteraction) {
256
267
  try {
257
268
  const liStart = performance.now();
258
- const liCandidateCount = this.stage3Candidates || 20;
259
- const topCandidates = results.slice(0, liCandidateCount);
269
+ // Pool size and original/expanded split are overridable per call so
270
+ // the graph-2hop sweep can compare allocations without forking the
271
+ // pipeline. Defaults preserve production behaviour.
272
+ const liCandidateCount =
273
+ options.liPoolSize ?? this.stage3Candidates ?? 20;
274
+ const liExpandedFraction = options.liExpandedFraction; // undefined → builder default
275
+
276
+ // Build a bounded MIXED rerank pool: top originals + top expanded.
277
+ // Without this, expanded entries always sit behind the originals'
278
+ // tail and the LI rerank only re-orders the original head — graph
279
+ // expansion has zero effect on top-K. Reserve a slice of the rerank
280
+ // pool for the highest-scoring expanded candidates so they actually
281
+ // compete for top-K positions.
282
+ const { topCandidates, expandedQuotaUsed } = buildMixedRerankPool(
283
+ results, liCandidateCount, liExpandedFraction,
284
+ );
260
285
 
261
286
  const { encodeQuery } = await import('../ranking/late-interaction-model.js');
262
287
  const queryTokens = await encodeQuery(query);
@@ -276,20 +301,21 @@ export async function applyPostRetrieval(results, query, options, searchContext)
276
301
 
277
302
  scored.sort((a, b) => b.score - a.score);
278
303
 
279
- results = [
280
- ...scored,
281
- ...results.slice(liCandidateCount),
282
- ];
304
+ // Anything not in the rerank pool keeps original ordering at the tail.
305
+ const pickedKeys = new Set(topCandidates.map(c => c.id || c.entity_id));
306
+ const tail = results.filter(r => !pickedKeys.has(r.id || r.entity_id));
307
+ results = [...scored, ...tail];
283
308
  }
284
309
 
285
310
  stats.lateInteraction = {
286
311
  position: 'post-expansion',
287
- mode: 'pure-reranker',
312
+ mode: 'pure-reranker-mixed-pool',
288
313
  latency_us: Math.round((performance.now() - liStart) * 1000),
289
314
  candidates: topCandidates.length,
315
+ expandedInPool: expandedQuotaUsed,
290
316
  queryTokens: queryTokens?.length || 0,
291
317
  };
292
- this.log(`LateInteraction (pure reranker): ${stats.lateInteraction.latency_us}us for ${topCandidates.length} candidates (${queryTokens?.length || 0} query tokens)`);
318
+ this.log(`LateInteraction (mixed-pool): ${stats.lateInteraction.latency_us}us for ${topCandidates.length} candidates (${expandedQuotaUsed} expanded, ${queryTokens?.length || 0} query tokens)`);
293
319
  } catch (err) {
294
320
  this.log(`LateInteraction rerank failed: ${err.message}`);
295
321
  stats.lateInteraction = { position: 'post-expansion', error: err.message };
@@ -374,6 +400,32 @@ export async function applyPostRetrieval(results, query, options, searchContext)
374
400
  }
375
401
  }
376
402
 
403
+ // =========================================================================
404
+ // Intent-aware file-kind ranking
405
+ // =========================================================================
406
+ // Soft-demote docs/tests/types files when the query is confidently
407
+ // implementation-seeking AND the top-N window contains both docs/tests/
408
+ // types and implementation candidates. No-op otherwise. Disable with
409
+ // SWEET_SEARCH_FILE_KIND_RANKING=0; tune SWEET_SEARCH_FILE_KIND_FACTOR.
410
+ if (Array.isArray(results) && results.length > 0) {
411
+ const fileKindIntent = classifyFileKindIntent(query);
412
+ const beforeTop = results[0];
413
+ const afterFK = applyFileKindRanking(results, { intent: fileKindIntent });
414
+ if (afterFK !== results) {
415
+ results = afterFK;
416
+ stats.fileKindRanking = {
417
+ intent: fileKindIntent,
418
+ applied: true,
419
+ top1Changed: !!beforeTop && results[0] && (beforeTop !== results[0]),
420
+ };
421
+ } else {
422
+ stats.fileKindRanking = {
423
+ intent: fileKindIntent,
424
+ applied: false,
425
+ };
426
+ }
427
+ }
428
+
377
429
  stats.total_ms = Date.now() - start;
378
430
  stats.results_count = Array.isArray(results) ? results.length : 0;
379
431
 
@@ -450,3 +502,132 @@ export function computeCacheHit(mode, {
450
502
 
451
503
  return { lexSubLatency, lexHit, semHit, cacheHit };
452
504
  }
505
+
506
+ // =============================================================================
507
+ // Mixed rerank pool helpers (post-expansion LI / cascade)
508
+ // =============================================================================
509
+
510
+ /**
511
+ * For each `is_expanded` result with a known file_path + line range, find the
512
+ * codebase chunk that best covers it and stash its id under `_liChunkId`.
513
+ *
514
+ * Why: graph expansion produces results keyed by entity_id (from code-graph.db)
515
+ * but the LI index is keyed by chunk id (from codebase.db). Without bridging
516
+ * the two ID spaces, expanded results can never participate in MaxSim rerank.
517
+ *
518
+ * Best-effort: missing/zero-overlap entries are left as-is and will fall
519
+ * through to the unscored path.
520
+ *
521
+ * @param {Array} results
522
+ * @param {import('../infrastructure/codebase-repository.js').CodebaseRepository} codebaseRepo
523
+ * @returns {number} count of expanded results that received a _liChunkId
524
+ */
525
+ export function attachChunkIdsToExpanded(results, codebaseRepo) {
526
+ if (!Array.isArray(results) || results.length === 0 || !codebaseRepo) return 0;
527
+ const fileChunkCache = new Map(); // file_path -> Array<{ id, file_path, text, metadata }>
528
+ let attached = 0;
529
+
530
+ for (const r of results) {
531
+ if (!r.is_expanded || r._liChunkId) continue;
532
+ const fp = r.file_path || r.file || r.metadata?.file || r.metadata?.path;
533
+ const sl = r.start_line ?? r.startLine ?? r.metadata?.start_line ?? r.metadata?.startLine;
534
+ if (!fp || sl == null) continue;
535
+ const el = r.end_line ?? r.endLine ?? r.metadata?.end_line ?? r.metadata?.endLine ?? sl;
536
+
537
+ let chunks = fileChunkCache.get(fp);
538
+ if (!chunks) {
539
+ try { chunks = codebaseRepo.getChunksByFilePath(fp) || []; }
540
+ catch { chunks = []; }
541
+ fileChunkCache.set(fp, chunks);
542
+ }
543
+ if (chunks.length === 0) continue;
544
+
545
+ // Greatest line-range overlap with the entity wins; ties broken by smaller
546
+ // chunk (tighter match). Chunk metadata is the primary signal; chunk id
547
+ // pattern `<path>:<start>-<end>:<n>` is a fallback when metadata is sparse.
548
+ let bestId = null;
549
+ let bestOverlap = 0;
550
+ let bestSize = Infinity;
551
+ for (const c of chunks) {
552
+ let cs, ce;
553
+ let meta = c.metadata;
554
+ if (typeof meta === 'string') { try { meta = JSON.parse(meta); } catch { meta = null; } }
555
+ if (meta) {
556
+ cs = meta.start_line ?? meta.startLine;
557
+ ce = meta.end_line ?? meta.endLine;
558
+ }
559
+ if (cs == null || ce == null) {
560
+ const m = typeof c.id === 'string' ? c.id.match(/:(\d+)-(\d+)(?::|$)/) : null;
561
+ if (m) { cs = parseInt(m[1], 10); ce = parseInt(m[2], 10); }
562
+ }
563
+ if (cs == null || ce == null) continue;
564
+ const overlap = Math.max(0, Math.min(el, ce) - Math.max(sl, cs) + 1);
565
+ if (overlap <= 0) continue;
566
+ const size = ce - cs + 1;
567
+ if (overlap > bestOverlap || (overlap === bestOverlap && size < bestSize)) {
568
+ bestOverlap = overlap;
569
+ bestSize = size;
570
+ bestId = c.id;
571
+ }
572
+ }
573
+
574
+ if (bestId) {
575
+ r._liChunkId = bestId;
576
+ attached++;
577
+ }
578
+ }
579
+ return attached;
580
+ }
581
+
582
+ /**
583
+ * Build a bounded LI rerank pool that mixes top originals and top expanded.
584
+ *
585
+ * Reserves `expandedQuota = floor(slot * expandedFraction)` of the rerank
586
+ * slots for the highest-scoring expanded candidates (so adaptive 2-hop's
587
+ * scoring choices actually influence the top-K), with the remainder going
588
+ * to the highest-scoring originals (preserving lexical/HNSW lead).
589
+ *
590
+ * If there are fewer expanded (or fewer originals) than the quota, the
591
+ * unused slots flow to the other side.
592
+ *
593
+ * @param {Array} results - Combined original + expanded result list
594
+ * @param {number} slot - Total rerank slots (e.g. stage3Candidates)
595
+ * @param {number} [expandedFraction=0.4] - Fraction of pool reserved for expanded
596
+ * @returns {{ topCandidates: Array, expandedQuotaUsed: number }}
597
+ */
598
+ export function buildMixedRerankPool(results, slot, expandedFraction = 0.4) {
599
+ const EXPANDED_FRACTION = Math.max(0, Math.min(1, expandedFraction));
600
+
601
+ const originals = results.filter(r => !r.is_expanded);
602
+ const expanded = results.filter(r => r.is_expanded);
603
+
604
+ if (expanded.length === 0) {
605
+ return { topCandidates: originals.slice(0, slot), expandedQuotaUsed: 0 };
606
+ }
607
+
608
+ const expandedScore = (r) =>
609
+ r.expansion?.adaptiveScore ?? r.score ?? 0;
610
+ const originalScore = (r) =>
611
+ r.score ?? r.int8Score ?? r.hybridScore ?? 0;
612
+
613
+ const sortedOriginals = [...originals].sort((a, b) => originalScore(b) - originalScore(a));
614
+ const sortedExpanded = [...expanded].sort((a, b) => expandedScore(b) - expandedScore(a));
615
+
616
+ const expandedQuota = Math.min(
617
+ Math.floor(slot * EXPANDED_FRACTION),
618
+ sortedExpanded.length,
619
+ );
620
+ const originalQuota = Math.min(slot - expandedQuota, sortedOriginals.length);
621
+
622
+ // If originals can't fill their quota, redirect the surplus to expanded.
623
+ const originalShort = (slot - expandedQuota) - originalQuota;
624
+ const finalExpandedQuota = Math.min(expandedQuota + originalShort, sortedExpanded.length);
625
+
626
+ const topOriginals = sortedOriginals.slice(0, originalQuota);
627
+ const topExpanded = sortedExpanded.slice(0, finalExpandedQuota);
628
+
629
+ return {
630
+ topCandidates: [...topOriginals, ...topExpanded],
631
+ expandedQuotaUsed: topExpanded.length,
632
+ };
633
+ }