npm - sweet-search - Versions diffs - 0.0.1 → 2.3.0 - Mend

sweet-search 0.0.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

package/LICENSE +190 -0
package/NOTICE +23 -0
package/core/cli.js +51 -0
package/core/config.js +27 -0
package/core/embedding/embedding-cache.js +467 -0
package/core/embedding/embedding-local-model.js +845 -0
package/core/embedding/embedding-remote.js +492 -0
package/core/embedding/embedding-service.js +712 -0
package/core/embedding/embedding-telemetry.js +219 -0
package/core/embedding/index.js +40 -0
package/core/graph/community-detector.js +294 -0
package/core/graph/graph-expansion.js +839 -0
package/core/graph/graph-extractor.js +2304 -0
package/core/graph/graph-search.js +2148 -0
package/core/graph/hcgs-generator.js +666 -0
package/core/graph/index.js +16 -0
package/core/graph/leiden-algorithm.js +547 -0
package/core/graph/relationship-resolver.js +366 -0
package/core/graph/repo-map.js +408 -0
package/core/graph/summary-manager.js +549 -0
package/core/indexing/artifact-builder.js +1054 -0
package/core/indexing/ast-chunker.js +709 -0
package/core/indexing/chunking/chunk-builder.js +170 -0
package/core/indexing/chunking/markdown-chunker.js +503 -0
package/core/indexing/chunking/plaintext-chunker.js +104 -0
package/core/indexing/dedup/dedup-phase.js +159 -0
package/core/indexing/dedup/exemplar-selector.js +65 -0
package/core/indexing/document-chunker.js +56 -0
package/core/indexing/incremental-parser.js +390 -0
package/core/indexing/incremental-tracker.js +761 -0
package/core/indexing/index-codebase-v21.js +472 -0
package/core/indexing/index-maintainer.mjs +1674 -0
package/core/indexing/index.js +90 -0
package/core/indexing/indexer-ann.js +1077 -0
package/core/indexing/indexer-build.js +742 -0
package/core/indexing/indexer-phases.js +800 -0
package/core/indexing/indexer-pool.js +764 -0
package/core/indexing/indexer-sparse-gram.js +98 -0
package/core/indexing/indexer-utils.js +536 -0
package/core/indexing/indexer-worker.js +148 -0
package/core/indexing/li-skip-policy.js +225 -0
package/core/indexing/merkle-tracker.js +244 -0
package/core/indexing/model-pool.js +166 -0
package/core/infrastructure/code-graph-repository.js +120 -0
package/core/infrastructure/codebase-repository.js +131 -0
package/core/infrastructure/config/dedup.js +54 -0
package/core/infrastructure/config/embedding.js +298 -0
package/core/infrastructure/config/graph.js +80 -0
package/core/infrastructure/config/index.js +82 -0
package/core/infrastructure/config/indexing.js +8 -0
package/core/infrastructure/config/platform.js +254 -0
package/core/infrastructure/config/ranking.js +221 -0
package/core/infrastructure/config/search.js +396 -0
package/core/infrastructure/config/translation.js +89 -0
package/core/infrastructure/config/vector-store.js +114 -0
package/core/infrastructure/constants.js +86 -0
package/core/infrastructure/coreml-cascade.js +909 -0
package/core/infrastructure/coreml-cascade.json +46 -0
package/core/infrastructure/coreml-provider.js +81 -0
package/core/infrastructure/db-utils.js +69 -0
package/core/infrastructure/dedup-hashing.js +83 -0
package/core/infrastructure/hardware-capability.js +332 -0
package/core/infrastructure/index.js +104 -0
package/core/infrastructure/language-patterns/maps.js +121 -0
package/core/infrastructure/language-patterns/registry-core.js +323 -0
package/core/infrastructure/language-patterns/registry-data-query.js +155 -0
package/core/infrastructure/language-patterns/registry-object-oriented.js +285 -0
package/core/infrastructure/language-patterns/registry-tooling.js +240 -0
package/core/infrastructure/language-patterns/registry-web-style.js +143 -0
package/core/infrastructure/language-patterns/registry.js +19 -0
package/core/infrastructure/language-patterns.js +141 -0
package/core/infrastructure/llm-provider.js +733 -0
package/core/infrastructure/manifest.json +46 -0
package/core/infrastructure/maxsim.wasm +0 -0
package/core/infrastructure/model-fetcher.js +423 -0
package/core/infrastructure/model-registry.js +214 -0
package/core/infrastructure/native-inference.js +587 -0
package/core/infrastructure/native-resolver.js +187 -0
package/core/infrastructure/native-sparse-gram.js +257 -0
package/core/infrastructure/native-tokenizer.js +160 -0
package/core/infrastructure/onnx-mutex.js +45 -0
package/core/infrastructure/onnx-session-utils.js +261 -0
package/core/infrastructure/ort-pipeline.js +111 -0
package/core/infrastructure/project-detector.js +102 -0
package/core/infrastructure/quantization.js +410 -0
package/core/infrastructure/simd-distance.js +502 -0
package/core/infrastructure/simd-distance.wasm +0 -0
package/core/infrastructure/tree-sitter-provider.js +665 -0
package/core/infrastructure/webgpu-maxsim.js +222 -0
package/core/query/index.js +35 -0
package/core/query/intent-detector.js +201 -0
package/core/query/intent-router.js +156 -0
package/core/query/query-router-catboost.js +222 -0
package/core/query/query-router-ml.js +266 -0
package/core/query/query-router.js +213 -0
package/core/ranking/cascaded-scorer.js +379 -0
package/core/ranking/flashrank.js +810 -0
package/core/ranking/index.js +49 -0
package/core/ranking/late-interaction-index.js +2383 -0
package/core/ranking/late-interaction-model.js +812 -0
package/core/ranking/local-reranker.js +374 -0
package/core/ranking/mmr.js +379 -0
package/core/ranking/quality-scorer.js +363 -0
package/core/search/context-expander.js +1167 -0
package/core/search/dedup/sibling-expander.js +327 -0
package/core/search/index.js +16 -0
package/core/search/search-boost.js +259 -0
package/core/search/search-cli.js +544 -0
package/core/search/search-format.js +282 -0
package/core/search/search-fusion.js +327 -0
package/core/search/search-hybrid.js +204 -0
package/core/search/search-pattern-chunks.js +337 -0
package/core/search/search-pattern-planner.js +439 -0
package/core/search/search-pattern-prefilter.js +412 -0
package/core/search/search-pattern-ripgrep.js +663 -0
package/core/search/search-pattern.js +463 -0
package/core/search/search-postprocess.js +452 -0
package/core/search/search-semantic.js +706 -0
package/core/search/search-server.js +554 -0
package/core/search/session-daemon-prewarm.mjs +164 -0
package/core/search/session-warmup.js +595 -0
package/core/search/sweet-search.js +632 -0
package/core/search/warmup-metrics.js +532 -0
package/core/start-server.js +6 -0
package/core/training/query-router/features/extractor.js +762 -0
package/core/training/query-router/features/multilingual-patterns.js +431 -0
package/core/training/query-router/features/text-segmenter.js +303 -0
package/core/training/query-router/features/unicode-utils.js +383 -0
package/core/training/query-router/output/v45_router_d4.js +11521 -0
package/core/training/query-router/output/v46_router_d4.js +11498 -0
package/core/vector-store/binary-heap.js +227 -0
package/core/vector-store/binary-hnsw-index.js +1004 -0
package/core/vector-store/float-vector-store.js +234 -0
package/core/vector-store/hnsw-index.js +580 -0
package/core/vector-store/index.js +39 -0
package/core/vector-store/seismic-index.js +498 -0
package/core/vocabulary/index.js +84 -0
package/core/vocabulary/vocab-constants.js +20 -0
package/core/vocabulary/vocab-miner-extractors.js +375 -0
package/core/vocabulary/vocab-miner-nl.js +404 -0
package/core/vocabulary/vocab-miner-utils.js +146 -0
package/core/vocabulary/vocab-miner.js +574 -0
package/core/vocabulary/vocab-prewarm-cli.js +110 -0
package/core/vocabulary/vocab-ranker.js +492 -0
package/core/vocabulary/vocab-warmer.js +523 -0
package/core/vocabulary/vocab-warmup-orchestrator.js +425 -0
package/core/vocabulary/vocabulary-utils.js +704 -0
package/crates/wasm-router/pkg/package.json +13 -0
package/crates/wasm-router/pkg/query_router_wasm.d.ts +36 -0
package/crates/wasm-router/pkg/query_router_wasm.js +271 -0
package/crates/wasm-router/pkg/query_router_wasm_bg.wasm +0 -0
package/crates/wasm-router/pkg/query_router_wasm_bg.wasm.d.ts +19 -0
package/mcp/config-gen.js +121 -0
package/mcp/server.js +335 -0
package/mcp/tool-handlers.js +476 -0
package/package.json +131 -9
package/scripts/benchmark-harness.js +794 -0
package/scripts/init.js +1058 -0
package/scripts/smoke-test.js +435 -0
package/scripts/uninstall.js +478 -0
package/scripts/verify-runtime.js +176 -0

package/core/embedding/embedding-service.js ADDED Viewed

@@ -0,0 +1,712 @@
+#!/usr/bin/env node
+/**
+ * Embedding Service v2.2 (SOTA December 2025)
+ *
+ * Facade module - imports from specialized sub-modules and re-exports all public APIs.
+ * Hub functions (generateEmbedding, getEmbedding, etc.) live here because they
+ * orchestrate across remote, local-model, and cache sub-modules.
+ */
+import { EMBEDDING_CONFIG, EMBEDDING_PROVIDERS } from '../infrastructure/config/index.js';
+import { wasmHammingDistance, wasmInt8Cosine, wasmAsymmetricDistance, wasmInt8BatchDot, isWasmAvailable } from '../infrastructure/simd-distance.js';
+// --- Sub-module imports (no circular deps) ---
+import {
+  circuitBreaker,
+  _providerCompressionSupport,
+  looksLikeJson,
+  RateLimiter,
+  TimeWindowRateLimiter,
+  rateLimiters,
+  timeWindowLimiters,
+  callVoyageAPI,
+  callMistralAPI,
+  callJinaAPI,
+} from './embedding-remote.js';
+import {
+  INDEXING_MAX_LENGTH,
+  QUERY_MAX_LENGTH,
+  callLocalModel,
+  callLocalModelBucketed,
+  applyLocalQueryPrefix,
+  getLocalPipeline,
+  unloadLocalModel,
+  isLocalModelLoaded,
+  configureLocalModelRuntime,
+  resetLocalModelRuntime,
+} from './embedding-local-model.js';
+import {
+  queryCache,
+  vocabulary,
+  semanticCache,
+  queryDeduplicator,
+  queryStats,
+  cacheStats,
+  getCacheStats as _getCacheStats,
+  getSemanticCacheStats,
+  clearCache,
+  getFrequentQueries,
+  addToVocabulary as _addToVocabulary,
+  expandVocabulary as _expandVocabulary,
+  autoPersistFrequentQueries,
+  registerAutoPersistOnExit,
+} from './embedding-cache.js';
+// Re-export sub-module symbols that were previously named exports
+export { circuitBreaker };
+export { TimeWindowRateLimiter };
+// =============================================================================
+// UNIFIED EMBEDDING SERVICE (hub functions)
+// =============================================================================
+/** Generate embedding using the active provider with circuit breaker */
+async function generateEmbedding(text, provider = EMBEDDING_CONFIG.provider, isQuery = false) {
+  const localText = isQuery ? applyLocalQueryPrefix(text) : text;
+  const localMaxLength = isQuery ? QUERY_MAX_LENGTH : INDEXING_MAX_LENGTH;
+  const config = EMBEDDING_PROVIDERS[provider];
+  if (!config || !config.enabled) {
+    return (await callLocalModel([localText], { maxLength: localMaxLength }))[0];
+  }
+  if (provider !== 'local') {
+    const circuitCheck = circuitBreaker.canRequest();
+    if (!circuitCheck.allowed) {
+      console.warn(`[embedding-service] Circuit breaker blocked request: ${circuitCheck.reason}, falling back to local`);
+      return (await callLocalModel([localText], { maxLength: localMaxLength }))[0];
+    }
+  }
+  const rateLimit = config.rateLimit;
+  let lastError = null;
+  for (let attempt = 0; attempt < (rateLimit?.maxRetries || 3); attempt++) {
+    try {
+      if (rateLimiters[provider]) {
+        await rateLimiters[provider].waitForSlot(text.length);
+      }
+      cacheStats.apiCalls++;
+      let result;
+      switch (provider) {
+        case 'voyage':
+          result = (await callVoyageAPI([text], config, { inputType: isQuery ? 'query' : 'document' }))[0];
+          break;
+        case 'mistral':
+          result = (await callMistralAPI([text], config))[0];
+          break;
+        case 'jina':
+          result = (await callJinaAPI([text], config, { task: isQuery ? 'retrieval.query' : 'retrieval.passage' }))[0];
+          break;
+        case 'local':
+          result = (await callLocalModel([localText], { maxLength: localMaxLength }))[0];
+          break;
+        default:
+          throw new Error(`Unknown provider: ${provider}`);
+      }
+      if (provider !== 'local') circuitBreaker.recordSuccess();
+      return result;
+    } catch (err) {
+      lastError = err;
+      if (provider !== 'local') circuitBreaker.recordFailure();
+      const delay = (rateLimit?.retryDelay || 1000) * Math.pow(rateLimit?.backoffMultiplier || 2, attempt);
+      console.warn(`Embedding attempt ${attempt + 1} failed: ${err.message}, retrying in ${delay}ms`);
+      await new Promise(resolve => setTimeout(resolve, delay));
+    }
+  }
+  console.warn(`All attempts failed for ${provider}, falling back to local model`);
+  return (await callLocalModel([localText], { maxLength: localMaxLength }))[0];
+}
+/** Generate embeddings for multiple texts (batched, V2 concurrent) */
+async function generateEmbeddings(texts, provider = EMBEDDING_CONFIG.provider, options = {}) {
+  if (!texts || texts.length === 0) return [];
+  const localBucketOptions = {
+    maxLength: options.maxLength,
+    hardCap: options.hardCap,
+    resolveHardCap: options.resolveHardCap,
+    batchingSafety: options.batchingSafety,
+    onProgress: options.onProgress,
+  };
+  const config = EMBEDDING_PROVIDERS[provider];
+  // Local model: always use direct bucketed path for global length-sorting.
+  // The API-style batched path (below) splits texts into batchSize=32 chunks
+  // which destroys length-sorting and causes massive padding waste (5.5x slower).
+  if (!config || !config.enabled || provider === 'local') {
+    return callLocalModelBucketed(texts, localBucketOptions);
+  }
+  const batchSize = config.batchSize || 32;
+  const concurrency = options.concurrency || 4;
+  const apiOptions = {};
+  if (options.outputDimension) apiOptions.outputDimension = options.outputDimension;
+  if (options.inputType) apiOptions.inputType = options.inputType;
+  const batches = [];
+  for (let i = 0; i < texts.length; i += batchSize) {
+    batches.push(texts.slice(i, i + batchSize));
+  }
+  const results = [];
+  async function embedBatch(batch) {
+    if (timeWindowLimiters[provider]) {
+      await timeWindowLimiters[provider].acquire();
+    }
+    cacheStats.apiCalls++;
+    switch (provider) {
+      case 'voyage': return callVoyageAPI(batch, config, apiOptions);
+      case 'mistral': return callMistralAPI(batch, config, apiOptions);
+      case 'jina': return callJinaAPI(batch, config, apiOptions);
+      case 'local': return callLocalModelBucketed(batch, localBucketOptions);
+      default: throw new Error(`Unknown provider: ${provider}`);
+    }
+  }
+  for (let i = 0; i < batches.length; i += concurrency) {
+    const concurrent = batches.slice(i, i + concurrency);
+    const batchResults = await Promise.all(
+      concurrent.map(async (batch) => {
+        try {
+          return await embedBatch(batch);
+        } catch (err) {
+          console.warn(`Batch embedding failed: ${err.message}, falling back to local`);
+          return callLocalModelBucketed(batch, localBucketOptions);
+        }
+      })
+    );
+    for (const batchResult of batchResults) {
+      results.push(...batchResult);
+    }
+  }
+  return results;
+}
+/** Get embedding with caching (LRU -> Vocabulary -> Semantic Cache -> API) */
+export async function getEmbedding(text, options = {}) {
+  const { useCache = true, useSemanticCache = true, isQuery = false } = options;
+  const start = performance.now();
+  const cacheKey = isQuery ? `q:${text}` : text;
+  if (useCache && EMBEDDING_CONFIG.cache?.enabled) {
+    const cached = queryCache.get(cacheKey);
+    if (cached) {
+      cacheStats.hits++;
+      return { embedding: cached, cached: true, source: 'lru', latency_us: Math.round((performance.now() - start) * 1000) };
+    }
+    if (isQuery) {
+      await vocabulary.load();
+      const vocabHit = vocabulary.get(text);
+      if (vocabHit) {
+        cacheStats.vocabularyHits++;
+        queryCache.set(cacheKey, vocabHit);
+        return { embedding: vocabHit, cached: true, source: 'vocabulary', latency_us: Math.round((performance.now() - start) * 1000) };
+      }
+    }
+  }
+  if (isQuery && useSemanticCache && EMBEDDING_CONFIG.isRemote) {
+    const semanticResult = await semanticCache.findSimilar(text);
+    if (semanticResult?.voyageEmb) {
+      cacheStats.hits++;
+      if (useCache && EMBEDDING_CONFIG.cache?.enabled) {
+        queryCache.set(cacheKey, semanticResult.voyageEmb);
+      }
+      return {
+        embedding: semanticResult.voyageEmb,
+        cached: true,
+        source: 'semantic-cache',
+        similarity: semanticResult.similarity,
+        matchedQuery: semanticResult.matchedQuery,
+        latency_us: Math.round((performance.now() - start) * 1000),
+      };
+    }
+    var localEmbForCache = semanticResult?.localEmb;
+  }
+  cacheStats.misses++;
+  const inflight = queryDeduplicator.get(cacheKey);
+  if (inflight) {
+    const result = await inflight;
+    return { ...result, source: 'deduplicated', latency_us: Math.round((performance.now() - start) * 1000) };
+  }
+  const embeddingPromise = generateEmbedding(text, EMBEDDING_CONFIG.provider, isQuery);
+  queryDeduplicator.set(cacheKey, embeddingPromise.then(emb => ({ embedding: emb })));
+  const embedding = await embeddingPromise;
+  if (useCache && EMBEDDING_CONFIG.cache?.enabled) {
+    queryCache.set(cacheKey, embedding);
+    if (localEmbForCache) {
+      semanticCache.add(text, localEmbForCache, embedding);
+    }
+    if (isQuery && EMBEDDING_CONFIG.cache?.autoExpand) {
+      await queryStats.load();
+      const usageCount = queryStats.increment(text);
+      queryStats.save().catch(() => {});
+      const threshold = EMBEDDING_CONFIG.cache?.expansionThreshold || 3;
+      if (usageCount >= threshold && !vocabulary.has(text)) {
+        vocabulary.set(text, embedding);
+        vocabulary.save().catch(() => {});
+        console.log(`Vocabulary: Auto-added "${text}" (used ${usageCount}x)`);
+      }
+    }
+  }
+  return {
+    embedding,
+    cached: false,
+    source: EMBEDDING_CONFIG.provider,
+    latency_us: Math.round((performance.now() - start) * 1000),
+  };
+}
+export async function embed(text, options = {}) {
+  const result = await getEmbedding(text, options);
+  return result.embedding;
+}
+export async function getEmbeddings(texts, options = {}) {
+  const {
+    useCache = true,
+    provider = EMBEDDING_CONFIG.provider,
+    providerOptions = {},
+    onProgress,
+  } = options;
+  const hasShapeAffectingProviderOptions =
+    provider !== EMBEDDING_CONFIG.provider ||
+    providerOptions.maxLength !== undefined ||
+    providerOptions.outputDimension !== undefined ||
+    providerOptions.outputDtype !== undefined ||
+    providerOptions.inputType !== undefined;
+  const allowCache = useCache && !hasShapeAffectingProviderOptions;
+  const results = new Array(texts.length);
+  const uncachedIndices = [];
+  const uncachedTexts = [];
+  if (allowCache && EMBEDDING_CONFIG.cache?.enabled) {
+    for (let i = 0; i < texts.length; i++) {
+      const cached = queryCache.get(texts[i]);
+      if (cached) {
+        results[i] = { embedding: cached, cached: true };
+        cacheStats.hits++;
+      } else {
+        uncachedIndices.push(i);
+        uncachedTexts.push(texts[i]);
+        cacheStats.misses++;
+      }
+    }
+  } else {
+    for (let i = 0; i < texts.length; i++) {
+      uncachedIndices.push(i);
+      uncachedTexts.push(texts[i]);
+    }
+  }
+  if (uncachedTexts.length > 0) {
+    const newEmbeddings = await generateEmbeddings(uncachedTexts, provider, { ...providerOptions, onProgress });
+    for (let i = 0; i < uncachedIndices.length; i++) {
+      const idx = uncachedIndices[i];
+      results[idx] = { embedding: newEmbeddings[i], cached: false };
+      if (allowCache && EMBEDDING_CONFIG.cache?.enabled) {
+        queryCache.set(texts[idx], newEmbeddings[i]);
+      }
+    }
+  }
+  return results;
+}
+// =============================================================================
+// QUANTIZATION / MATH RE-EXPORTS (canonical impls in infrastructure/quantization.js)
+// =============================================================================
+import {
+  truncateForHNSW as _truncateForHNSW,
+  fisherYatesShuffle,
+  floatToBinary,
+  computeCentroid,
+  generateSignVector,
+  walshHadamardTransform,
+  fastRotate,
+  asymmetricDocEncode,
+  asymmetricQueryEncode,
+  floatToInt8,
+  normalizedFloatToInt8,
+} from '../infrastructure/quantization.js';
+export { fisherYatesShuffle, floatToBinary, computeCentroid, generateSignVector, walshHadamardTransform, fastRotate, asymmetricDocEncode, asymmetricQueryEncode };
+/**
+ * Truncate embedding to target dimension and L2 re-normalize.
+ * Thin wrapper that supplies the domain-specific default dimension.
+ * @param {number[]} embedding
+ * @param {number} [targetDim] - defaults to EMBEDDING_CONFIG.hnswDimension
+ */
+export function truncateForHNSW(embedding, targetDim = EMBEDDING_CONFIG.hnswDimension) {
+  return _truncateForHNSW(embedding, targetDim);
+}
+/**
+ * Asymmetric distance: 1-bit document vs 4-bit query.
+ * Approximates squared distance via asymmetric dot product + correction.
+ */
+export function asymmetricDistance(docBinary, queryInt4, queryNorm) {
+  // Delegate to WASM when available
+  if (isWasmAvailable()) {
+    // WASM version expects integer-scaled queryNorm
+    return wasmAsymmetricDistance(docBinary, queryInt4, Math.round(queryNorm));
+  }
+  let approxDot = 0;
+  const dim = queryInt4.length;
+  for (let byteIdx = 0; byteIdx < docBinary.length; byteIdx++) {
+    let byte = docBinary[byteIdx];
+    const baseIdx = byteIdx * 8;
+    for (let bit = 7; bit >= 0; bit--) {
+      const idx = baseIdx + (7 - bit);
+      if (idx >= dim) break;
+      if (byte & (1 << bit)) {
+        approxDot += queryInt4[idx];
+      } else {
+        approxDot -= queryInt4[idx];
+      }
+    }
+  }
+  return queryNorm - 2 * approxDot;
+}
+export { floatToInt8, normalizedFloatToInt8 };
+/**
+ * Batch int8 dot product scoring for normalized vectors.
+ * Returns scores in cosine-similarity scale [~-1, ~1].
+ *
+ * For int8 vectors from L2-normalized floats:
+ *   rawDot ≈ 127² × cos(a_float, b_float)
+ *   normalizedScore = rawDot / (127 * 127)
+ *
+ * @param {Int8Array} query - Query int8 vector (from normalizedFloatToInt8)
+ * @param {Int8Array[]} candidates - Candidate int8 vectors
+ * @returns {Float64Array} Cosine-approximation scores
+ */
+export function int8BatchDotScores(query, candidates) {
+  const rawDots = wasmInt8BatchDot(query, candidates);
+  const scale = 1.0 / (127 * 127);
+  const scores = new Float64Array(rawDots.length);
+  for (let i = 0; i < rawDots.length; i++) {
+    scores[i] = rawDots[i] * scale;
+  }
+  return scores;
+}
+const POPCOUNT_TABLE = new Uint8Array(256);
+for (let i = 0; i < 256; i++) {
+  POPCOUNT_TABLE[i] = (i & 1) + POPCOUNT_TABLE[i >> 1];
+}
+export function hammingDistance(a, b) {
+  if (a.length !== b.length) {
+    throw new Error(`Hamming dimension mismatch: ${a.length} vs ${b.length}`);
+  }
+  if (isWasmAvailable()) {
+    return wasmHammingDistance(a, b);
+  }
+  let distance = 0;
+  for (let i = 0; i < a.length; i++) {
+    distance += POPCOUNT_TABLE[a[i] ^ b[i]];
+  }
+  return distance;
+}
+export function int8CosineSimilarity(a, b) {
+  if (a.length !== b.length) {
+    throw new Error(`int8CosineSimilarity dimension mismatch: ${a.length} vs ${b.length}`);
+  }
+  if (isWasmAvailable()) {
+    return wasmInt8Cosine(a, b);
+  }
+  let dot = 0, normA = 0, normB = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i] * b[i];
+    normA += a[i] * a[i];
+    normB += b[i] * b[i];
+  }
+  normA = Math.sqrt(normA);
+  normB = Math.sqrt(normB);
+  if (normA === 0 || normB === 0) return 0;
+  return dot / (normA * normB);
+}
+/** @deprecated Use int8CosineSimilarity (for cosine) or int8BatchDotScores (for normalized dot). */
+export { int8CosineSimilarity as int8DotProduct };
+// =============================================================================
+// BINARY / INT8 EMBEDDING
+// =============================================================================
+export async function getBinaryEmbedding(text) {
+  const start = performance.now();
+  const cacheKey = `binary:${text}`;
+  if (EMBEDDING_CONFIG.cache?.enabled) {
+    const cached = queryCache.get(cacheKey);
+    if (cached) {
+      cacheStats.hits++;
+      return { binary: cached.binary, float: cached.float, cached: true, source: 'cache', latency_us: Math.round((performance.now() - start) * 1000) };
+    }
+  }
+  // Voyage-native binary (outputDtype: 'ubinary') bypasses the asymmetric
+  // quantization pipeline. All providers use client-side quantization so
+  // query vectors match the index-time center→rotate→quantize encoding.
+  const floatResult = await getEmbedding(text, { isQuery: true });
+  const truncated = truncateForHNSW(floatResult.embedding);
+  const binary = floatToBinary(truncated);
+  const result = { binary, float: floatResult.embedding, cached: false, source: 'client-quantized', latency_us: Math.round((performance.now() - start) * 1000) };
+  if (EMBEDDING_CONFIG.cache?.enabled) queryCache.set(cacheKey, { binary: result.binary, float: result.float });
+  return result;
+}
+export async function getInt8Embedding(text) {
+  const start = performance.now();
+  const cacheKey = `int8:${text}`;
+  if (EMBEDDING_CONFIG.cache?.enabled) {
+    const cached = queryCache.get(cacheKey);
+    if (cached) {
+      cacheStats.hits++;
+      return { int8: cached.int8, float: cached.float, cached: true, source: 'cache', latency_us: Math.round((performance.now() - start) * 1000) };
+    }
+  }
+  // All providers use client-side int8 quantization for consistency.
+  const floatResult = await getEmbedding(text, { isQuery: true });
+  const truncated = truncateForHNSW(floatResult.embedding);
+  const int8 = floatToInt8(truncated);
+  const result = { int8, float: floatResult.embedding, cached: false, source: 'client-quantized', latency_us: Math.round((performance.now() - start) * 1000) };
+  if (EMBEDDING_CONFIG.cache?.enabled) queryCache.set(cacheKey, { int8: result.int8, float: result.float });
+  return result;
+}
+// =============================================================================
+// WARMUP / LIFECYCLE
+// =============================================================================
+export async function warmup(options = {}) {
+  const { initVocabulary = true, initSemanticCache = true } = options;
+  console.log(`\nWarming up embedding service...`);
+  console.log(`  Provider: ${EMBEDDING_CONFIG.provider} (${EMBEDDING_CONFIG.model})`);
+  console.log(`  Dimensions: ${EMBEDDING_CONFIG.dimension}d full, ${EMBEDDING_CONFIG.hnswDimension}d HNSW`);
+  const warmupStart = performance.now();
+  const warmupTasks = [];
+  if (EMBEDDING_CONFIG.provider === 'local') {
+    warmupTasks.push(
+      getLocalPipeline().then(() => console.log(`  ✓ Local embedding model loaded`))
+    );
+  }
+  if (initSemanticCache && EMBEDDING_CONFIG.isRemote) {
+    warmupTasks.push(
+      semanticCache.getLocalModel().then(() => console.log(`  ✓ SemanticCache local model loaded`))
+    );
+  }
+  if (EMBEDDING_CONFIG.isRemote) {
+    warmupTasks.push(
+      generateEmbedding('warmup')
+        .then(() => console.log(`  ✓ ${EMBEDDING_CONFIG.provider} API connection verified`))
+        .catch(err => console.log(`  ⚠ ${EMBEDDING_CONFIG.provider} API: ${err.message}`))
+    );
+  }
+  if (initVocabulary && EMBEDDING_CONFIG.cache?.enabled) {
+    warmupTasks.push(
+      vocabulary.load()
+        .then(() => console.log(`  ✓ Vocabulary loaded (${vocabulary.size()} terms)`))
+    );
+  }
+  await Promise.all(warmupTasks);
+  const elapsed = Math.round(performance.now() - warmupStart);
+  console.log(`Warmup complete in ${elapsed}ms\n`);
+  return true;
+}
+export function isWarm() {
+  return EMBEDDING_CONFIG.provider === 'local' ? isLocalModelLoaded() : true;
+}
+export function getModelInfo() {
+  return {
+    provider: EMBEDDING_CONFIG.provider,
+    model: EMBEDDING_CONFIG.model,
+    dimension: EMBEDDING_CONFIG.dimension,
+    hnswDimension: EMBEDDING_CONFIG.hnswDimension,
+    isRemote: EMBEDDING_CONFIG.isRemote,
+    isWarm: isWarm(),
+    cache: { enabled: EMBEDDING_CONFIG.cache?.enabled, ...getCacheStats() },
+    availableProviders: Object.entries(EMBEDDING_PROVIDERS)
+      .filter(([_, p]) => p.enabled)
+      .map(([name, p]) => ({ name, model: p.model, priority: p.priority })),
+  };
+}
+export async function unload() {
+  await unloadLocalModel();
+  console.log('Local model unloaded');
+}
+// =============================================================================
+// WRAPPED CACHE EXPORTS (inject dependencies to avoid circular imports)
+// =============================================================================
+export function getCacheStats() {
+  return _getCacheStats(circuitBreaker.getState());
+}
+export async function addToVocabulary(term) {
+  return _addToVocabulary(term, embed);
+}
+export async function expandVocabulary(terms) {
+  return _expandVocabulary(terms, embed);
+}
+// Re-export cache functions that need no wrapping
+export { getSemanticCacheStats, clearCache, getFrequentQueries, autoPersistFrequentQueries, registerAutoPersistOnExit };
+// Named exports for internal generator functions (used by vocab-warmer)
+export { generateEmbedding, generateEmbeddings };
+// Local model runtime configuration (Phase 2 — parallel indexing)
+// Pool lifecycle (initEmbeddingPool / shutdownEmbeddingPool) lives in
+// core/indexing/indexer-pool.js per the DDD dependency matrix.
+export {
+  configureLocalModelRuntime,
+  resetLocalModelRuntime,
+};
+// =============================================================================
+// CLI INTERFACE
+// =============================================================================
+if (import.meta.url === `file://${process.argv[1]}`) {
+  const args = process.argv.slice(2);
+  if (args.includes('--warmup') || args.includes('warmup')) {
+    await warmup({ initVocabulary: true });
+    console.log('--- Cache Performance Test ---');
+    const q1 = await getEmbedding('AuthService');
+    console.log(`Query 1 "AuthService": ${q1.latency_us}μs (${q1.source})`);
+    const q2 = await getEmbedding('AuthService');
+    console.log(`Query 2 "AuthService": ${q2.latency_us}μs (${q2.source})`);
+    const q3 = await getEmbedding('how does authentication work in this codebase');
+    console.log(`Query 3 "how does auth work": ${q3.latency_us}μs (${q3.source})`);
+    console.log('\nCache stats:', getCacheStats());
+  } else if (args.includes('--test') || args.includes('test')) {
+    console.log('Testing embedding service...\n');
+    console.log('Model info:', JSON.stringify(getModelInfo(), null, 2));
+    console.log('\nGenerating test embedding...');
+    const result = await getEmbedding('function calculateTotal(items) { return items.reduce((sum, i) => sum + i.price, 0); }');
+    console.log(`  Embedding dimension: ${result.embedding.length}`);
+    console.log(`  Source: ${result.source}`);
+    console.log(`  Latency: ${result.latency_us}μs`);
+    console.log(`\nHNSW truncation: ${result.embedding.length}d → ${truncateForHNSW(result.embedding).length}d`);
+  } else if (args.includes('--stats')) {
+    await vocabulary.load();
+    console.log('Embedding Service Stats:');
+    console.log(JSON.stringify(getModelInfo(), null, 2));
+  } else if (args.includes('--expand')) {
+    const terms = args.slice(args.indexOf('--expand') + 1);
+    if (terms.length === 0) {
+      console.log('Usage: node embedding-service.js --expand term1 term2 ...');
+    } else {
+      await warmup({ initVocabulary: false });
+      const added = await expandVocabulary(terms);
+      console.log(`Added ${added} new terms to vocabulary`);
+    }
+  } else {
+    console.log(`
+Embedding Service v2.2 (SOTA December 2025)
+Usage:
+  node embedding-service.js warmup     Preload model + initialize vocabulary
+  node embedding-service.js test       Test embedding generation
+  node embedding-service.js --stats    Show model and cache info
+  node embedding-service.js --expand term1 term2   Add terms to vocabulary
+Active Provider: ${EMBEDDING_CONFIG.provider} (${EMBEDDING_CONFIG.model})
+Dimensions: ${EMBEDDING_CONFIG.dimension}d full, ${EMBEDDING_CONFIG.hnswDimension}d HNSW
+Available Providers:
+${Object.entries(EMBEDDING_PROVIDERS)
+  .map(([name, p]) => `  ${p.enabled ? '✓' : '✗'} ${name}: ${p.model} (priority ${p.priority})`)
+  .join('\n')}
+`);
+  }
+}
+// Deprecated alias for backward compatibility
+const int8DotProduct = int8CosineSimilarity;
+export default {
+  getEmbedding,
+  embed,
+  getEmbeddings,
+  truncateForHNSW,
+  floatToBinary,
+  floatToInt8,
+  normalizedFloatToInt8,
+  hammingDistance,
+  int8CosineSimilarity,
+  int8DotProduct,
+  int8BatchDotScores,
+  getBinaryEmbedding,
+  getInt8Embedding,
+  warmup,
+  isWarm,
+  getModelInfo,
+  unload,
+  getCacheStats,
+  clearCache,
+  addToVocabulary,
+  expandVocabulary,
+  getFrequentQueries,
+  generateEmbedding,
+  generateEmbeddings,
+  INDEXING_MAX_LENGTH,
+  QUERY_MAX_LENGTH,
+  callLocalModelBucketed,
+  TimeWindowRateLimiter,
+  looksLikeJson,
+  _providerCompressionSupport,
+};