npm - sweet-search - Versions diffs - 2.4.2 → 2.5.1 - Mend

sweet-search 2.4.2 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/core/cli.js +19 -5
package/core/embedding/embedding-cache.js +177 -15
package/core/embedding/embedding-service.js +18 -4
package/core/graph/graph-expansion.js +52 -12
package/core/graph/graph-extractor.js +30 -1
package/core/indexing/ast-chunker.js +331 -16
package/core/indexing/chunking/chunk-builder.js +34 -1
package/core/indexing/index.js +6 -3
package/core/indexing/indexer-ann.js +45 -6
package/core/indexing/indexer-build.js +9 -1
package/core/indexing/indexer-phases.js +6 -4
package/core/indexing/indexing-file-policy.js +140 -0
package/core/indexing/li-skip-policy.js +11 -220
package/core/infrastructure/codebase-repository.js +21 -0
package/core/infrastructure/config/embedding.js +20 -1
package/core/infrastructure/config/graph.js +2 -2
package/core/infrastructure/config/ranking.js +10 -0
package/core/infrastructure/config/vector-store.js +1 -1
package/core/infrastructure/coreml-cascade.js +236 -30
package/core/infrastructure/coreml-cascade.json +25 -0
package/core/infrastructure/index.js +15 -0
package/core/infrastructure/init-config.js +78 -0
package/core/infrastructure/language-patterns/registry-core.js +18 -0
package/core/infrastructure/model-registry.js +12 -0
package/core/infrastructure/native-inference.js +143 -51
package/core/infrastructure/tree-sitter-provider.js +92 -2
package/core/ranking/cascaded-scorer.js +6 -2
package/core/ranking/file-kind-ranking.js +264 -0
package/core/ranking/late-interaction-index.js +10 -4
package/core/ranking/late-interaction-policy.js +304 -0
package/core/search/context-expander.js +267 -28
package/core/search/index.js +4 -0
package/core/search/search-cli.js +3 -1
package/core/search/search-pattern.js +4 -3
package/core/search/search-postprocess.js +189 -8
package/core/search/search-read-semantic.js +717 -0
package/core/search/search-read.js +481 -0
package/core/search/search-server.js +6 -4
package/core/search/sweet-search.js +119 -15
package/mcp/server.js +41 -0
package/mcp/tool-handlers.js +117 -6
package/package.json +9 -7
package/scripts/init.js +386 -5

package/core/infrastructure/model-registry.js CHANGED Viewed

@@ -103,6 +103,18 @@ export const MODEL_REGISTRY = {
     ],
   },
+  'lateon-code-edge-fp32': {
+    hfId: 'lightonai/LateOn-Code-edge',
+    profile: 'full',
+    description: 'Late interaction edge model (FP32 safetensors, backbone 256d, 2-stage projection) for native inference',
+    files: [
+      { path: 'model.safetensors', sizeBytes: 67195976, sha256: '7ffc36b8ff71367249cd5220dbdd4bdbe177bc0e305b2e978a8b598bd8296f04' },
+      { path: '1_Dense/model.safetensors', sizeBytes: 524376, sha256: '9efb17fcb2106cd8fcb01d57a9cd9c997a487ad20630ec8e44ce3f9d89efe0a7' },
+      { path: '2_Dense/model.safetensors', sizeBytes: 98392, sha256: 'a7a388138b3c4bb1a81c8c3bcb9de123f1e652b9e9464a72707ca19ee86a26b1' },
+      { path: 'config.json', sizeBytes: 1252, sha256: null },
+    ],
+  },
   'ms-marco-tinybert': {
     hfId: 'Xenova/ms-marco-TinyBERT-L-2-v2',
     profile: 'full',

package/core/infrastructure/native-inference.js CHANGED Viewed

@@ -55,6 +55,7 @@ import { getModelCacheDir, fetchModel } from './model-fetcher.js';
 import { getModelEntry } from './model-registry.js';
 import { getCoremlCascadeResolvedDirs } from './coreml-cascade.js';
 import { detectHardwareCapability } from './hardware-capability.js';
+import { LATE_INTERACTION_CONFIG } from './config/ranking.js';
 const require = createRequire(import.meta.url);
@@ -63,12 +64,21 @@ const require = createRequire(import.meta.url);
 let _addon = null;
 let _embeddingModel = null;
 let _embeddingModelLoadPromise = null; // race-gate for concurrent first calls
-let _liModel = null;
-let _liModelLoadPromise = null;
+// Per-variant LI model cache. Keyed by FP32 registry key
+// ('lateon-code-fp32' or 'lateon-code-edge-fp32') so a variant swap
+// inside a single process (e.g. ORT-eval session followed by native
+// indexing) doesn't return a stale model. Each entry is
+// `{ model, promise }` where `promise` race-gates concurrent first
+// calls and `model` becomes non-null on resolution.
+const _liModels = new Map();
 let _embTokenizer = null;
 let _embTokenizerLoadPromise = null;
-let _liTokenizer = null;
-let _liTokenizerLoadPromise = null;
+// Per-variant LI tokenizer cache. Keyed by tokenizer source key
+// (matches the ORT-side registry key — `lateon-code` /
+// `lateon-code-edge`). Standard and edge tokenizer.json files are
+// byte-identical today but per-variant resolution is correct and
+// future-proof.
+const _liTokenizers = new Map();
 let _available = null;
 let _coremlCascadeLogged = false;
@@ -123,12 +133,17 @@ function propagateCudaComputeCapToAddonEnv() {
  * Logged exactly once per process so a mis-configured cascade surfaces
  * at startup instead of silently falling through on every call.
  *
+ * Routes the LI cascade dir to `coreml-cascade/li/` (standard) or
+ * `coreml-cascade/li-edge/` (edge) based on the active variant in
+ * `LATE_INTERACTION_CONFIG`. The embed cascade is shared.
+ *
  * Always returns an object — never throws. The returned dirs can be
  * `null`, which the Rust addon treats as "CoreML path disabled" and
  * falls back to candle unconditionally.
  */
 function resolveCoremlCascadeForAddon() {
-  const resolved = getCoremlCascadeResolvedDirs();
+  const liVariantKey = LATE_INTERACTION_CONFIG.model;
+  const resolved = getCoremlCascadeResolvedDirs(liVariantKey);
   if (!_coremlCascadeLogged) {
     _coremlCascadeLogged = true;
     const hw = detectHardwareCapability();
@@ -137,7 +152,7 @@ function resolveCoremlCascadeForAddon() {
     if (resolved.embedDir || resolved.liDir) {
       process.stderr.write(
         `[NativeInference] CoreML cascade: ${resolved.status}` +
-        ` (embed=${resolved.embedDir ? 'yes' : 'no'}, li=${resolved.liDir ? 'yes' : 'no'},` +
+        ` (embed=${resolved.embedDir ? 'yes' : 'no'}, li=${resolved.liDir ? 'yes' : 'no'} [${liVariantKey}],` +
         ` chip=${hw.brandString || 'unknown'})\n`
       );
     } else if (hw.coremlCascadeEligible) {
@@ -327,57 +342,117 @@ export async function nativeEmbed(texts, options = {}) {
 // ─── Late Interaction Model ───
 /**
- * Load the native LI model (LateOn-Code FP32 safetensors + projection).
- * Returns the model instance or null if unavailable. Race-gated.
+ * Resolve the active LI variant from `LATE_INTERACTION_CONFIG`. Returns
+ * the manifest the native loaders need (registry keys + projection
+ * paths and dims). Pure helper — no I/O, no caching.
+ *
+ * Falls back to the standard `lateon-code` entry if the active config
+ * is missing fields (defensive — every shipping config has them).
  */
-export async function getNativeLiModel() {
-  if (_liModel) return _liModel;
-  if (_liModelLoadPromise) return _liModelLoadPromise;
-  _liModelLoadPromise = (async () => {
+export function resolveNativeLiVariant() {
+  const cfg = LATE_INTERACTION_CONFIG.activeModel;
+  const cfgKey = LATE_INTERACTION_CONFIG.model;
+  if (!cfg) {
+    throw new Error(
+      `[NativeInference] LATE_INTERACTION_CONFIG.model='${cfgKey}' is not a known variant`,
+    );
+  }
+  const fp32RegistryKey = cfg.nativeRegistryKey || `${cfgKey}-fp32`;
+  return {
+    cfgKey,                                   // 'lateon-code' | 'lateon-code-edge'
+    fp32RegistryKey,                          // 'lateon-code-fp32' | 'lateon-code-edge-fp32'
+    tokenizerKey: cfgKey,                     // tokenizer lives next to the ORT model
+    projectionPaths: cfg.projectionPaths,     // ['1_Dense/...'] | ['1_Dense/...', '2_Dense/...']
+    projectionDims: cfg.projectionDims,       // [128] | [512, 48]
+    tokenDimension: cfg.tokenDimension,       // 128 | 48
+  };
+}
+/**
+ * Internal: load the native LI model for a specific variant on the
+ * default device. Race-gated per variant via the `_liModels` Map so
+ * concurrent first callers share one load. Returns null if the addon
+ * isn't available or required files are missing.
+ */
+async function loadNativeLiVariantOnDefaultDevice(variant) {
+  const cached = _liModels.get(variant.fp32RegistryKey);
+  if (cached?.model) return cached.model;
+  if (cached?.promise) return cached.promise;
+  const promise = (async () => {
     const addon = loadAddon();
     if (!addon?.NativeLateInteractionModel) return null;
-    await fetchModel('lateon-code-fp32');
+    await fetchModel(variant.fp32RegistryKey);
-    const entry = getModelEntry('lateon-code-fp32');
+    const entry = getModelEntry(variant.fp32RegistryKey);
     const modelDir = getModelCacheDir(entry.hfId);
     const backbonePath = join(modelDir, 'model.safetensors');
-    const projPath = join(modelDir, '1_Dense', 'model.safetensors');
     const configPath = join(modelDir, 'config.json');
+    const projAbsPaths = variant.projectionPaths.map((p) => join(modelDir, p));
-    if (!existsSync(backbonePath) || !existsSync(projPath) || !existsSync(configPath)) return null;
+    if (!existsSync(backbonePath) || !existsSync(configPath)) return null;
+    if (!projAbsPaths.every(existsSync)) return null;
     // Resolve the CoreML cascade dir for ModernBERT LI. Same contract
-    // as the embedding model above — see that comment.
+    // as the embedding model above — see that comment. The dir
+    // depends on the active variant (`coreml-cascade/li/` vs
+    // `coreml-cascade/li-edge/`).
     const cascade = resolveCoremlCascadeForAddon();
     const t0 = Date.now();
-    _liModel = addon.NativeLateInteractionModel.load(
+    const model = addon.NativeLateInteractionModel.load(
       backbonePath,
-      projPath,
+      projAbsPaths,
+      variant.projectionDims,
       configPath,
       cascade.liDir || undefined,
     );
-    console.log(`[NativeInference] LI model loaded in ${Date.now() - t0}ms (dim: ${_liModel.dim}, device: ${addon.nativeInferenceDevice()})`);
+    console.log(
+      `[NativeInference] LI model '${variant.cfgKey}' loaded in ${Date.now() - t0}ms `
+      + `(dim: ${model.dim}, device: ${addon.nativeInferenceDevice()})`,
+    );
-    return _liModel;
+    const slot = _liModels.get(variant.fp32RegistryKey);
+    if (slot) slot.model = model;
+    return model;
   })();
-  return _liModelLoadPromise;
+  _liModels.set(variant.fp32RegistryKey, { model: null, promise });
+  return promise;
+}
+/**
+ * Load the native LI model for the currently-configured variant.
+ * Returns the model instance or null if unavailable. Race-gated per
+ * variant.
+ */
+export async function getNativeLiModel() {
+  const variant = resolveNativeLiVariant();
+  return loadNativeLiVariantOnDefaultDevice(variant);
 }
 /**
- * Get or create the LI tokenizer. Race-gated.
+ * Get or create the LI tokenizer for the currently-configured variant.
+ * Race-gated per variant via the `_liTokenizers` Map.
  */
 async function getLiTokenizer() {
-  if (_liTokenizer) return _liTokenizer;
-  if (_liTokenizerLoadPromise) return _liTokenizerLoadPromise;
-  _liTokenizerLoadPromise = (async () => {
-    const entry = getModelEntry('lateon-code');
+  const variant = resolveNativeLiVariant();
+  const cached = _liTokenizers.get(variant.tokenizerKey);
+  if (cached?.tokenizer) return cached.tokenizer;
+  if (cached?.promise) return cached.promise;
+  const promise = (async () => {
+    const entry = getModelEntry(variant.tokenizerKey);
     const tokenizerPath = join(getModelCacheDir(entry.hfId), 'tokenizer.json');
-    _liTokenizer = await createTokenizer(tokenizerPath);
-    return _liTokenizer;
+    const tokenizer = await createTokenizer(tokenizerPath);
+    const slot = _liTokenizers.get(variant.tokenizerKey);
+    if (slot) slot.tokenizer = tokenizer;
+    return tokenizer;
   })();
-  return _liTokenizerLoadPromise;
+  _liTokenizers.set(variant.tokenizerKey, { tokenizer: null, promise });
+  return promise;
 }
 /**
@@ -457,7 +532,11 @@ export function isNativeEmbeddingModelLoaded() {
 }
 export function isNativeLiModelLoaded() {
-  return _liModel != null;
+  // True only when the *active* variant is loaded — a stale standard
+  // model lingering after a config swap to edge would otherwise
+  // mask the fact that edge encoding still has to load.
+  const variant = resolveNativeLiVariant();
+  return _liModels.get(variant.fp32RegistryKey)?.model != null;
 }
 // ─── Device-explicit loading ───
@@ -518,28 +597,32 @@ export async function loadNativeEmbeddingModelWithDevice(deviceKind, cascadeDirO
 }
 /**
- * Load the native LI model on a specific device.
+ * Load the native LI model on a specific device for the
+ * currently-configured variant. Race-gated per variant.
  */
 export async function loadNativeLiModelWithDevice(deviceKind, cascadeDirOverride) {
-  if (_liModel) return _liModel;
-  if (_liModelLoadPromise) return _liModelLoadPromise;
+  const variant = resolveNativeLiVariant();
+  const cached = _liModels.get(variant.fp32RegistryKey);
+  if (cached?.model) return cached.model;
+  if (cached?.promise) return cached.promise;
-  _liModelLoadPromise = (async () => {
+  const promise = (async () => {
     const addon = loadAddon();
     if (!addon?.NativeLateInteractionModel?.loadWithDevice) return null;
     // See loadNativeEmbeddingModelWithDevice for why this is CUDA-only.
     if (deviceKind === 'cuda') propagateCudaComputeCapToAddonEnv();
-    await fetchModel('lateon-code-fp32');
+    await fetchModel(variant.fp32RegistryKey);
-    const entry = getModelEntry('lateon-code-fp32');
+    const entry = getModelEntry(variant.fp32RegistryKey);
     const modelDir = getModelCacheDir(entry.hfId);
     const backbonePath = join(modelDir, 'model.safetensors');
-    const projPath = join(modelDir, '1_Dense', 'model.safetensors');
     const configPath = join(modelDir, 'config.json');
+    const projAbsPaths = variant.projectionPaths.map((p) => join(modelDir, p));
-    if (!existsSync(backbonePath) || !existsSync(projPath) || !existsSync(configPath)) return null;
+    if (!existsSync(backbonePath) || !existsSync(configPath)) return null;
+    if (!projAbsPaths.every(existsSync)) return null;
     // CUDA has no cascade — see the matching comment in
     // loadNativeEmbeddingModelWithDevice.
@@ -550,19 +633,26 @@ export async function loadNativeLiModelWithDevice(deviceKind, cascadeDirOverride
     );
     const t0 = Date.now();
-    _liModel = addon.NativeLateInteractionModel.loadWithDevice(
+    const model = addon.NativeLateInteractionModel.loadWithDevice(
       backbonePath,
-      projPath,
+      projAbsPaths,
+      variant.projectionDims,
       configPath,
       cascadeDir,
       deviceKind,
     );
-    console.log(`[NativeInference] LI model loaded in ${Date.now() - t0}ms (dim: ${_liModel.dim}, device: ${deviceKind})`);
+    console.log(
+      `[NativeInference] LI model '${variant.cfgKey}' loaded in ${Date.now() - t0}ms `
+      + `(dim: ${model.dim}, device: ${deviceKind})`,
+    );
-    return _liModel;
+    const slot = _liModels.get(variant.fp32RegistryKey);
+    if (slot) slot.model = model;
+    return model;
   })();
-  return _liModelLoadPromise;
+  _liModels.set(variant.fp32RegistryKey, { model: null, promise });
+  return promise;
 }
 // ─── Warmup primitives ───
@@ -575,10 +665,14 @@ export async function warmupNativeEmbeddingModel() {
 }
 export async function warmupNativeLiModel() {
-  if (!_liModel?.warmupForward) return;
+  // Warm up only the *active* variant — warming up an unused stale
+  // variant would be wasted Metal queue time.
+  const variant = resolveNativeLiVariant();
+  const model = _liModels.get(variant.fp32RegistryKey)?.model;
+  if (!model?.warmupForward) return;
   const t0 = Date.now();
-  await _liModel.warmupForward();
-  console.log(`[NativeInference] LI warmup forward in ${Date.now() - t0}ms`);
+  await model.warmupForward();
+  console.log(`[NativeInference] LI warmup forward (${variant.cfgKey}) in ${Date.now() - t0}ms`);
 }
 // ─── Cleanup ───
@@ -586,12 +680,10 @@ export async function warmupNativeLiModel() {
 export function unloadNativeModels() {
   _embeddingModel = null;
   _embeddingModelLoadPromise = null;
-  _liModel = null;
-  _liModelLoadPromise = null;
+  _liModels.clear();
   _embTokenizer = null;
   _embTokenizerLoadPromise = null;
-  _liTokenizer = null;
-  _liTokenizerLoadPromise = null;
+  _liTokenizers.clear();
   _addon = null;
   _available = null;
   _coremlCascadeLogged = false;

package/core/infrastructure/tree-sitter-provider.js CHANGED Viewed

@@ -69,6 +69,28 @@ const BOUNDARY_TYPES = new Set([
   'class_specifier', 'namespace_definition',
 ]);
+// AST node types that represent function/class bodies. Used by
+// extractSignature() to find where the declaration's body starts so
+// the signature span is everything before it (decorators + name +
+// parameters + return type, excluding body).
+const BODY_TYPES = new Set([
+  // JS/TS, Java, Go, Rust, Kotlin, Swift, C#, Ruby (sometimes)
+  'block', 'statement_block', 'class_body', 'function_body',
+  // C / C++ — function bodies
+  'compound_statement', 'field_declaration_list',
+  // Python uses `block` (already covered) but `:` precedes it
+  // PHP — function/method body
+  'compound_statement_php',
+  // Swift / Kotlin — sometimes labelled differently
+  'enum_class_body', 'enum_body', 'interface_body',
+  // Rust impl/trait bodies
+  'declaration_list',
+]);
+// Maximum signature length (chars) after whitespace normalization.
+// Signatures longer than this get truncated with `…`.
+const MAX_SIGNATURE_LENGTH = 200;
 // Map tree-sitter node type -> our chunk type label
 const NODE_TYPE_MAP = {
   'function_declaration': 'function',
@@ -410,12 +432,23 @@ export class TreeSitterProvider {
   /**
    * Parse file content into semantic chunks using the cAST recursive algorithm.
    * Returns array of chunk objects or null if tree-sitter can't handle it.
+   *
+   * Header-aware budget (research-only ablation, May 2026): set
+   * SWEET_SEARCH_CHUNK_HEADER_OVERHEAD=N to subtract N chars from the
+   * cAST max chunk size, leaving room for the embedding-text headers
+   * (path / parent / symbol / language ≈ 50–100 chars) without spilling
+   * past the embedding cap. Default 0 = byte-identical to shipped. The
+   * audit motivating this lever lives in eval/results/chunk-overflow-audit.md.
    */
   async parseFileToChunks(content, languageId, options = {}) {
     const tree = await this.parse(content, languageId);
     if (!tree) return null;
-    const maxChunkSize = options.maxChunkSize || 2000;
+    const headerOverhead = (() => {
+      const v = parseInt(process.env.SWEET_SEARCH_CHUNK_HEADER_OVERHEAD || '', 10);
+      return Number.isFinite(v) && v >= 0 ? v : 0;
+    })();
+    const maxChunkSize = (options.maxChunkSize || 2000) - headerOverhead;
     this._chunkCounter = 0;
     const children = this._getChildren(tree.rootNode);
@@ -467,6 +500,7 @@ export class TreeSitterProvider {
         const firstBoundary = buffer.find(n => BOUNDARY_TYPES.has(n.type));
         const name = firstBoundary ? this._extractNodeName(firstBoundary) : null;
         const type = firstBoundary ? (NODE_TYPE_MAP[firstBoundary.type] || 'code') : 'code';
+        const signature = firstBoundary ? this._extractSignature(firstBoundary, content) : null;
         chunks.push({
           chunkId: this._nextChunkId(),
@@ -478,6 +512,7 @@ export class TreeSitterProvider {
           endLine: buffer[buffer.length - 1].endPosition.row,
           type,
           name: name || (buffer.length === 1 ? null : null),
+          signature,
         });
       }
       buffer = [];
@@ -536,6 +571,7 @@ export class TreeSitterProvider {
               endLine: node.endPosition.row,
               type: NODE_TYPE_MAP[node.type] || 'code',
               name: this._extractNodeName(node),
+              signature: this._extractSignature(node, content),
             });
           }
         }
@@ -546,6 +582,60 @@ export class TreeSitterProvider {
     return chunks;
   }
+  /**
+   * Extract a compact, single-line signature for a boundary AST node.
+   *
+   * Strategy: find the first body-like child (block / statement_block /
+   * compound_statement / class_body / declaration_list / …), and return
+   * the source span [node.startIndex, body.startIndex) with whitespace
+   * normalized to single spaces. If no body child is found (e.g.
+   * declarations without a body, abstract methods, interface members),
+   * return the full first line of the node.
+   *
+   * Returns null when the node has no children to inspect.
+   *
+   * Used by the `signature` R1 embedding-text variant. Intentionally
+   * does NOT alter `text`, `li_text`, or `li_greedy_text` — signature
+   * surface is research-only on `embedding_text`.
+   */
+  _extractSignature(node, content) {
+    if (!node || !content) return null;
+    if (!BOUNDARY_TYPES.has(node.type)) return null;
+    let bodyStart = null;
+    // Try field-name lookup first (works for most modern grammars).
+    const bodyField = node.childForFieldName?.('body');
+    if (bodyField && BODY_TYPES.has(bodyField.type)) {
+      bodyStart = bodyField.startIndex;
+    } else {
+      // Fall back to scanning children for a body-shaped child.
+      for (let i = 0; i < node.childCount; i++) {
+        const child = node.child(i);
+        if (BODY_TYPES.has(child.type)) {
+          bodyStart = child.startIndex;
+          break;
+        }
+      }
+    }
+    let raw;
+    if (bodyStart != null && bodyStart > node.startIndex) {
+      raw = content.substring(node.startIndex, bodyStart);
+    } else {
+      // No body found — declaration only (e.g. abstract method, type
+      // alias). Take the whole node text.
+      raw = content.substring(node.startIndex, node.endIndex);
+    }
+    // Normalize: collapse runs of whitespace (including newlines) to a
+    // single space, drop leading/trailing whitespace.
+    const normalized = raw.replace(/\s+/g, ' ').trim();
+    if (!normalized) return null;
+    if (normalized.length <= MAX_SIGNATURE_LENGTH) return normalized;
+    return normalized.slice(0, MAX_SIGNATURE_LENGTH - 1) + '…';
+  }
   /** Extract symbol name from an AST node */
   _extractNodeName(node) {
     // Try field name first (most reliable)
@@ -662,4 +752,4 @@ export function resetTreeSitterProvider() {
 }
 // Re-export constants for testing
-export { GRAMMAR_MAP, IDENT_TYPES, BOUNDARY_TYPES, NODE_TYPE_MAP, TAGS_QUERIES, CAPTURE_TO_ENTITY_TYPE };
+export { GRAMMAR_MAP, IDENT_TYPES, BOUNDARY_TYPES, BODY_TYPES, MAX_SIGNATURE_LENGTH, NODE_TYPE_MAP, TAGS_QUERIES, CAPTURE_TO_ENTITY_TYPE };

package/core/ranking/cascaded-scorer.js CHANGED Viewed

@@ -121,11 +121,15 @@ function partitionByTokenAvailability(candidates, liIndex) {
   if (!liIndex) {
     return { withTokens: [], withoutTokens: [...candidates] };
   }
-  const available = liIndex.hasTokens(candidates.map(c => c.id || c.entity_id));
+  // Graph-expanded candidates have entity_id-based public ids that don't
+  // match LI-indexed chunk ids; they carry the resolved chunk id under
+  // _liChunkId. Honour it so expanded candidates can participate in MaxSim.
+  const lookupId = (c) => c._liChunkId || c.id || c.entity_id;
+  const available = liIndex.hasTokens(candidates.map(lookupId));
   const withTokens = [];
   const withoutTokens = [];
   for (const c of candidates) {
-    (available.has(c.id || c.entity_id) ? withTokens : withoutTokens).push(c);
+    (available.has(lookupId(c)) ? withTokens : withoutTokens).push(c);
   }
   return { withTokens, withoutTokens };
 }