sweet-search 2.5.1 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/core/cli.js +45 -0
  2. package/core/embedding/embedding-cache.js +90 -4
  3. package/core/embedding/embedding-service.js +27 -5
  4. package/core/graph/graph-expansion.js +215 -36
  5. package/core/graph/graph-extractor.js +196 -11
  6. package/core/graph/graph-search.js +395 -92
  7. package/core/graph/hcgs-generator.js +2 -1
  8. package/core/graph/index.js +2 -0
  9. package/core/graph/repo-map.js +28 -6
  10. package/core/graph/structural-answer-cues.js +168 -0
  11. package/core/graph/structural-callsite-hints.js +40 -0
  12. package/core/graph/structural-context-format.js +40 -0
  13. package/core/graph/structural-context.js +450 -0
  14. package/core/graph/structural-forward-push.js +156 -0
  15. package/core/graph/structural-header-context.js +19 -0
  16. package/core/graph/structural-importance.js +148 -0
  17. package/core/graph/structural-pagerank.js +197 -0
  18. package/core/graph/summary-manager.js +13 -9
  19. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  20. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  21. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  22. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  23. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  24. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  25. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  26. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  27. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  28. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  29. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  30. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  31. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  32. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  33. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  34. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  35. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  36. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  37. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  38. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  39. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  40. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  41. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  42. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  43. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  44. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  45. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  46. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  47. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  48. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  49. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  50. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  51. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  52. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  53. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  54. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  55. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  56. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  57. package/core/indexing/admission-policy.js +139 -0
  58. package/core/indexing/artifact-builder.js +29 -12
  59. package/core/indexing/ast-chunker.js +107 -30
  60. package/core/indexing/dedup/exemplar-selector.js +19 -1
  61. package/core/indexing/gitignore-filter.js +223 -0
  62. package/core/indexing/incremental-tracker.js +99 -30
  63. package/core/indexing/index-codebase-v21.js +37 -7
  64. package/core/indexing/index-maintainer.mjs +698 -6
  65. package/core/indexing/indexer-ann.js +99 -15
  66. package/core/indexing/indexer-build.js +158 -45
  67. package/core/indexing/indexer-empty-baseline.js +80 -0
  68. package/core/indexing/indexer-manifest.js +66 -0
  69. package/core/indexing/indexer-phases.js +56 -23
  70. package/core/indexing/indexer-sparse-gram.js +54 -13
  71. package/core/indexing/indexer-utils.js +26 -208
  72. package/core/indexing/indexing-file-policy.js +32 -7
  73. package/core/indexing/maintainer-launcher.mjs +137 -0
  74. package/core/indexing/merkle-tracker.js +251 -244
  75. package/core/indexing/model-pool.js +46 -5
  76. package/core/infrastructure/code-graph-repository.js +758 -6
  77. package/core/infrastructure/code-graph-visibility.js +157 -0
  78. package/core/infrastructure/codebase-repository.js +100 -13
  79. package/core/infrastructure/config/search.js +1 -1
  80. package/core/infrastructure/db-utils.js +118 -0
  81. package/core/infrastructure/dedup-hashing.js +10 -13
  82. package/core/infrastructure/hardware-capability.js +17 -7
  83. package/core/infrastructure/index.js +10 -2
  84. package/core/infrastructure/init-config.js +138 -0
  85. package/core/infrastructure/language-patterns/maps.js +4 -1
  86. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  87. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  88. package/core/infrastructure/language-patterns.js +69 -0
  89. package/core/infrastructure/model-registry.js +20 -0
  90. package/core/infrastructure/native-inference.js +7 -12
  91. package/core/infrastructure/native-resolver.js +52 -37
  92. package/core/infrastructure/native-sparse-gram.js +261 -20
  93. package/core/infrastructure/native-tokenizer.js +6 -15
  94. package/core/infrastructure/simd-distance.js +10 -16
  95. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  96. package/core/infrastructure/structural-alias-resolver.js +122 -0
  97. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  98. package/core/infrastructure/structural-context-repository.js +472 -0
  99. package/core/infrastructure/structural-context-utils.js +51 -0
  100. package/core/infrastructure/structural-graph-signals.js +121 -0
  101. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  102. package/core/infrastructure/structural-source-definitions.js +100 -0
  103. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  104. package/core/infrastructure/tree-sitter-provider.js +811 -37
  105. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  106. package/core/query/query-router.js +55 -5
  107. package/core/ranking/file-kind-ranking.js +2192 -15
  108. package/core/ranking/late-interaction-index.js +87 -12
  109. package/core/search/cli-decoration.js +290 -0
  110. package/core/search/context-expander.js +988 -78
  111. package/core/search/index.js +1 -0
  112. package/core/search/output-policy.js +275 -0
  113. package/core/search/search-anchor.js +499 -0
  114. package/core/search/search-boost.js +93 -1
  115. package/core/search/search-cli.js +61 -204
  116. package/core/search/search-hybrid.js +250 -10
  117. package/core/search/search-pattern-chunks.js +57 -8
  118. package/core/search/search-pattern-planner.js +68 -9
  119. package/core/search/search-pattern-prefilter.js +30 -10
  120. package/core/search/search-pattern-ripgrep.js +40 -4
  121. package/core/search/search-pattern-sparse-overlay.js +256 -0
  122. package/core/search/search-pattern.js +117 -29
  123. package/core/search/search-postprocess.js +479 -5
  124. package/core/search/search-read-semantic.js +277 -23
  125. package/core/search/search-read.js +82 -64
  126. package/core/search/search-reader-pin.js +71 -0
  127. package/core/search/search-rrf.js +279 -0
  128. package/core/search/search-semantic.js +110 -5
  129. package/core/search/search-server.js +273 -54
  130. package/core/search/search-trace.js +107 -0
  131. package/core/search/server-identity.js +93 -0
  132. package/core/search/session-daemon-prewarm.mjs +33 -10
  133. package/core/search/sweet-search.js +414 -9
  134. package/core/skills/sweet-index/SKILL.md +8 -6
  135. package/core/start-server.js +13 -2
  136. package/core/vector-store/binary-hnsw-index.js +194 -30
  137. package/core/vector-store/float-vector-store.js +96 -6
  138. package/core/vector-store/hnsw-index.js +220 -49
  139. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  140. package/eval/agent-read-workflows/bin/ss-find +15 -0
  141. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  142. package/eval/agent-read-workflows/bin/ss-read +14 -0
  143. package/eval/agent-read-workflows/bin/ss-search +18 -0
  144. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  145. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  146. package/mcp/read-tool.js +109 -0
  147. package/mcp/server.js +55 -15
  148. package/mcp/tool-handlers.js +14 -124
  149. package/mcp/trace-tool.js +81 -0
  150. package/package.json +25 -10
  151. package/scripts/hooks/intercept-read.mjs +55 -0
  152. package/scripts/hooks/remind-tools.mjs +40 -0
  153. package/scripts/init.js +698 -54
  154. package/scripts/inject-agent-instructions.js +431 -0
  155. package/scripts/install-prompt-reminders.js +188 -0
  156. package/scripts/install-tool-enforcement.js +220 -0
  157. package/scripts/smoke-test.js +12 -9
  158. package/scripts/uninstall.js +427 -23
  159. package/scripts/write-claude-rules.js +110 -0
package/core/cli.js CHANGED
@@ -20,6 +20,9 @@ if (args[0] === 'init') {
20
20
  } else if (args[0] === 'prewarm-vocab') {
21
21
  const { handlePrewarmVocabCli } = await import('./vocabulary/index.js');
22
22
  await handlePrewarmVocabCli(args.slice(1));
23
+ } else if (args[0] === 'reconcile' || args[0] === 'rebuild') {
24
+ const { handleIncrementalCli } = await import('./incremental-indexing/application/operator-cli.mjs');
25
+ await handleIncrementalCli(args[0], args.slice(1));
23
26
  } else if (args[0] === 'read') {
24
27
  // Filesystem-grounded reader; runs in JS (no native equivalent yet).
25
28
  const { handleReadCli } = await import('./search/search-read.js');
@@ -28,6 +31,30 @@ if (args[0] === 'init') {
28
31
  // Hybrid span-selection reader; runs in JS (depends on LI index + ranking).
29
32
  const { handleReadSemanticCli } = await import('./search/search-read-semantic.js');
30
33
  await handleReadSemanticCli(args.slice(1));
34
+ } else if (args[0] === 'trace') {
35
+ // Unified structural code context: callers, callees, and impact.
36
+ const { handleTraceCli } = await import('./search/search-trace.js');
37
+ await handleTraceCli(args.slice(1));
38
+ } else if (args[0] === 'index') {
39
+ // Indexing pipeline. Forwarded to index-codebase-v21.js::main(), which
40
+ // reads its own flags via process.argv. Setting argv here is required
41
+ // because the indexer's parseArgs reads process.argv.slice(2) by default.
42
+ // Without this subcommand, npm-installed users had no way to invoke
43
+ // indexing — `node ./node_modules/sweet-search/core/indexing/index-codebase-v21.js`
44
+ // was a silent no-op (direct-run guard mismatched under symlinked installs)
45
+ // and the bin had no `index` entry at all. Forwards every argument after
46
+ // `index` so existing flag combos (--full / --graph-only / --vectors-only /
47
+ // --files-from-stdin / --late-interaction-model=… / etc.) all work.
48
+ const indexerArgs = args.slice(1);
49
+ const hasAddHint = indexerArgs.includes('--add') || indexerArgs.some((arg) => arg.startsWith('--add='));
50
+ if (hasAddHint) {
51
+ const { handleIndexAddCli } = await import('./incremental-indexing/application/operator-cli.mjs');
52
+ await handleIndexAddCli(indexerArgs);
53
+ } else {
54
+ process.argv = [process.argv[0], 'index-codebase-v21.js', ...indexerArgs];
55
+ const { main: runIndexer } = await import('./indexing/index-codebase-v21.js');
56
+ await runIndexer();
57
+ }
31
58
  } else if (args[0] === '--serve' || args[0] === '--stop') {
32
59
  // Warm search server lifecycle is implemented in JS.
33
60
  const { runCli } = await import('./search/index.js');
@@ -37,8 +64,16 @@ if (args[0] === 'init') {
37
64
 
38
65
  Usage:
39
66
  sweet-search <query> Search the indexed codebase
67
+ sweet-search trace <symbol> Structural context: callers, callees, impact
40
68
  sweet-search read <file...> Filesystem-grounded read (1-20 files)
41
69
  sweet-search read-semantic <f> <q> Return only file spans relevant to a query
70
+ sweet-search index [options] Build / update the codebase index
71
+ sweet-search index --add <path> Hint a file as dirty
72
+ sweet-search reconcile status Show incremental epoch and dirty status
73
+ sweet-search reconcile inspect <path> Explain why a file is dirty or clean
74
+ sweet-search reconcile pause|resume Pause or resume automatic reconcile work
75
+ sweet-search rebuild status Show incremental maintenance queue
76
+ sweet-search rebuild force <tier> Queue maintenance for a tier
42
77
  sweet-search init [options] Set up runtime assets and models
43
78
  sweet-search uninstall [opts] Remove local state created by init
44
79
  sweet-search prewarm-vocab [file] Pre-warm vocabulary cache with terms
@@ -50,6 +85,16 @@ Options:
50
85
  --json Output results as JSON
51
86
  --cold Force cold start (skip warm server)
52
87
 
88
+ Indexing flags (sweet-search index ...):
89
+ --full Full reindex from scratch
90
+ --graph-only Build code graph only
91
+ --vectors-only Build vectors + HNSW only (skips code graph)
92
+ --files-from-stdin Read newline-delimited paths from stdin
93
+ --add <path> Queue a dirty-file hint without running the indexer
94
+ --late-interaction-model=ID Override the LI variant for this run
95
+ --no-late-interaction Skip LI index build
96
+ --quiet | --verbose Logging verbosity
97
+
53
98
  Run 'sweet-search init --help' or 'sweet-search uninstall --help' for subcommand options.`);
54
99
  } else {
55
100
  const { resolveNativeBinary } = await import('./infrastructure/index.js');
@@ -45,6 +45,7 @@ export class LRUCache {
45
45
  }
46
46
 
47
47
  has(key) { return this.cache.has(key); }
48
+ delete(key) { this.hitCount.delete(key); return this.cache.delete(key); }
48
49
  getHitCount(key) { return this.hitCount.get(key) || 0; }
49
50
  size() { return this.cache.size; }
50
51
  clear() { this.cache.clear(); this.hitCount.clear(); }
@@ -191,6 +192,62 @@ export class QueryStats {
191
192
  // model is not silently served when a different model is active.
192
193
  const VOCAB_SCHEMA_VERSION = 3;
193
194
 
195
+ /**
196
+ * Coerce an input value into a Float32Array suitable for downstream embedding
197
+ * math (truncateForHNSW, late-interaction MaxSim, cosine similarity).
198
+ *
199
+ * Why this exists: persisted vocabularies are JSON-serialised. JSON.stringify
200
+ * on a Float32Array produces an indexed object `{"0": v0, "1": v1, ...}`,
201
+ * not an array. After `JSON.parse`, the value has `.length === undefined`,
202
+ * `.slice === undefined`, and crashes any downstream consumer that calls
203
+ * vector methods. This helper repairs the value at the cache boundary so
204
+ * the rest of the embedding pipeline can rely on a uniform vector contract.
205
+ *
206
+ * Accepted inputs:
207
+ * - Float32Array → returned as-is
208
+ * - Array<number> → wrapped in Float32Array
209
+ * - Float64Array / Int*Array etc. → copied into Float32Array
210
+ * - Plain object with stringly-keyed numeric indices ("0","1",...,"N-1")
211
+ * → reconstructed as Float32Array of length N
212
+ *
213
+ * Returns null when the input cannot be sensibly interpreted as a vector
214
+ * (callers should drop the cache entry and re-derive).
215
+ *
216
+ * @param {*} value
217
+ * @returns {Float32Array|null}
218
+ */
219
+ export function coerceToFloat32Vector(value) {
220
+ if (value == null) return null;
221
+ if (value instanceof Float32Array) return value;
222
+ if (Array.isArray(value)) return Float32Array.from(value);
223
+ // Other typed arrays: copy values into a Float32Array.
224
+ if (ArrayBuffer.isView(value) && typeof value.length === 'number') {
225
+ return Float32Array.from(value);
226
+ }
227
+ // Plain object form from JSON-deserialised Float32Array.
228
+ if (typeof value === 'object') {
229
+ const keys = Object.keys(value);
230
+ if (keys.length === 0) return null;
231
+ // All keys must be string-encoded non-negative integers and contiguous
232
+ // from 0 to length-1. (We do not try to "fill gaps" — that would silently
233
+ // mask a real bug.)
234
+ const indices = new Array(keys.length);
235
+ for (let i = 0; i < keys.length; i++) {
236
+ const k = keys[i];
237
+ // Reject anything that isn't an integer-shaped key.
238
+ if (!/^\d+$/.test(k)) return null;
239
+ const n = +k;
240
+ if (!Number.isInteger(n) || n < 0 || n >= keys.length) return null;
241
+ indices[n] = value[k];
242
+ }
243
+ for (let i = 0; i < indices.length; i++) {
244
+ if (typeof indices[i] !== 'number' || !Number.isFinite(indices[i])) return null;
245
+ }
246
+ return Float32Array.from(indices);
247
+ }
248
+ return null;
249
+ }
250
+
194
251
  /** Build the embedding-fingerprint we expect a vocabulary file to match. */
195
252
  function currentVocabFingerprint() {
196
253
  return {
@@ -269,10 +326,27 @@ export class Vocabulary {
269
326
  this.terms.clear();
270
327
  } else {
271
328
  this.metadata = { ...this.metadata, ...(data.metadata || {}) };
272
- for (const [term, embedding] of Object.entries(data.terms || {})) {
273
- this.terms.set(term, embedding);
329
+ let normalized = 0;
330
+ let dropped = 0;
331
+ for (const [term, raw] of Object.entries(data.terms || {})) {
332
+ // Coerce to Float32Array. Persisted vocabs JSON-serialise typed
333
+ // arrays as indexed objects (`{"0": v0, ...}`), which otherwise
334
+ // crash downstream `embedding.slice(...)` calls (see
335
+ // `truncateForHNSW`). Reject any entry we cannot interpret as a
336
+ // vector — better to re-embed than to surface a corrupt vector.
337
+ const vec = coerceToFloat32Vector(raw);
338
+ if (vec) {
339
+ this.terms.set(term, vec);
340
+ normalized++;
341
+ } else {
342
+ dropped++;
343
+ }
344
+ }
345
+ if (dropped > 0) {
346
+ console.log(`Vocabulary: Loaded ${normalized} pre-computed embeddings (dropped ${dropped} unrecognised)`);
347
+ } else {
348
+ console.log(`Vocabulary: Loaded ${normalized} pre-computed embeddings`);
274
349
  }
275
- console.log(`Vocabulary: Loaded ${this.terms.size} pre-computed embeddings`);
276
350
  }
277
351
  }
278
352
  } catch (err) {
@@ -292,7 +366,18 @@ export class Vocabulary {
292
366
  this.metadata.model = EMBEDDING_CONFIG.model;
293
367
  this.metadata.dimension = EMBEDDING_CONFIG.dimension;
294
368
  if (!this.metadata.created) this.metadata.created = this.metadata.lastUpdated;
295
- const data = { metadata: this.metadata, terms: Object.fromEntries(this.terms) };
369
+ // Normalise to plain arrays so JSON.stringify produces a compact,
370
+ // round-trippable form. Float32Array would otherwise serialise as
371
+ // an indexed object ({"0": v0, "1": v1, ...}) which load() can read
372
+ // (via coerceToFloat32Vector) but which is wasteful and was the
373
+ // shape that originally caused the `embedding.slice` bug.
374
+ const termsOut = {};
375
+ for (const [term, vec] of this.terms.entries()) {
376
+ termsOut[term] = vec instanceof Float32Array || ArrayBuffer.isView(vec)
377
+ ? Array.from(vec)
378
+ : vec;
379
+ }
380
+ const data = { metadata: this.metadata, terms: termsOut };
296
381
  await writeJsonAtomic(this.vocabPath, JSON.stringify(data, null, 2));
297
382
  });
298
383
  }
@@ -303,6 +388,7 @@ export class Vocabulary {
303
388
  }
304
389
  set(term, embedding) { this.terms.set(this.normalize(term), embedding); }
305
390
  has(term) { return this.terms.has(this.normalize(term)); }
391
+ delete(term) { return this.terms.delete(this.normalize(term)); }
306
392
  normalize(term) { return term.toLowerCase().trim(); }
307
393
  size() { return this.terms.size; }
308
394
 
@@ -45,6 +45,7 @@ import {
45
45
  queryDeduplicator,
46
46
  queryStats,
47
47
  cacheStats,
48
+ coerceToFloat32Vector,
48
49
  getCacheStats as _getCacheStats,
49
50
  getSemanticCacheStats,
50
51
  clearCache,
@@ -205,17 +206,38 @@ export async function getEmbedding(text, options = {}) {
205
206
  if (useCache && EMBEDDING_CONFIG.cache?.enabled) {
206
207
  const cached = queryCache.get(cacheKey);
207
208
  if (cached) {
208
- cacheStats.hits++;
209
- return { embedding: cached, cached: true, source: 'lru', latency_us: Math.round((performance.now() - start) * 1000) };
209
+ // Defensive guard: a cache value MUST be a vector with .length and
210
+ // .slice. Persisted vocabularies that round-tripped through JSON
211
+ // produce indexed-object shapes which crash downstream consumers.
212
+ // Coerce; if unrecoverable, drop the entry and fall through.
213
+ const cachedVec = coerceToFloat32Vector(cached);
214
+ if (cachedVec) {
215
+ if (cachedVec !== cached) queryCache.set(cacheKey, cachedVec);
216
+ cacheStats.hits++;
217
+ return { embedding: cachedVec, cached: true, source: 'lru', latency_us: Math.round((performance.now() - start) * 1000) };
218
+ }
219
+ queryCache.delete?.(cacheKey);
220
+ console.warn(`[embedding] LRU cache held non-vector for "${cacheKey.slice(0, 60)}"; regenerating`);
210
221
  }
211
222
 
212
223
  if (isQuery && EMBEDDING_CONFIG.cache?.useVocabulary !== false) {
213
224
  await vocabulary.load();
214
225
  const vocabHit = vocabulary.get(text);
215
226
  if (vocabHit) {
216
- cacheStats.vocabularyHits++;
217
- queryCache.set(cacheKey, vocabHit);
218
- return { embedding: vocabHit, cached: true, source: 'vocabulary', latency_us: Math.round((performance.now() - start) * 1000) };
227
+ const vocabVec = coerceToFloat32Vector(vocabHit);
228
+ if (vocabVec) {
229
+ // Backfill the in-memory vocab map with the typed-array form so
230
+ // subsequent hits skip re-coercion.
231
+ if (vocabVec !== vocabHit) vocabulary.set?.(text, vocabVec);
232
+ cacheStats.vocabularyHits++;
233
+ queryCache.set(cacheKey, vocabVec);
234
+ return { embedding: vocabVec, cached: true, source: 'vocabulary', latency_us: Math.round((performance.now() - start) * 1000) };
235
+ }
236
+ // Unrecoverable vocab entry — drop it and continue. (load() now
237
+ // normalises on read, so this branch should be unreachable in
238
+ // practice; it is the belt-and-braces for older code paths.)
239
+ vocabulary.delete?.(text);
240
+ console.warn(`[embedding] vocabulary held non-vector for "${text.slice(0, 60)}"; dropping and regenerating`);
219
241
  }
220
242
  }
221
243
  }