sweet-search 0.0.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/LICENSE +190 -0
  2. package/NOTICE +23 -0
  3. package/core/cli.js +51 -0
  4. package/core/config.js +27 -0
  5. package/core/embedding/embedding-cache.js +467 -0
  6. package/core/embedding/embedding-local-model.js +845 -0
  7. package/core/embedding/embedding-remote.js +492 -0
  8. package/core/embedding/embedding-service.js +712 -0
  9. package/core/embedding/embedding-telemetry.js +219 -0
  10. package/core/embedding/index.js +40 -0
  11. package/core/graph/community-detector.js +294 -0
  12. package/core/graph/graph-expansion.js +839 -0
  13. package/core/graph/graph-extractor.js +2304 -0
  14. package/core/graph/graph-search.js +2148 -0
  15. package/core/graph/hcgs-generator.js +666 -0
  16. package/core/graph/index.js +16 -0
  17. package/core/graph/leiden-algorithm.js +547 -0
  18. package/core/graph/relationship-resolver.js +366 -0
  19. package/core/graph/repo-map.js +408 -0
  20. package/core/graph/summary-manager.js +549 -0
  21. package/core/indexing/artifact-builder.js +1054 -0
  22. package/core/indexing/ast-chunker.js +709 -0
  23. package/core/indexing/chunking/chunk-builder.js +170 -0
  24. package/core/indexing/chunking/markdown-chunker.js +503 -0
  25. package/core/indexing/chunking/plaintext-chunker.js +104 -0
  26. package/core/indexing/dedup/dedup-phase.js +159 -0
  27. package/core/indexing/dedup/exemplar-selector.js +65 -0
  28. package/core/indexing/document-chunker.js +56 -0
  29. package/core/indexing/incremental-parser.js +390 -0
  30. package/core/indexing/incremental-tracker.js +761 -0
  31. package/core/indexing/index-codebase-v21.js +472 -0
  32. package/core/indexing/index-maintainer.mjs +1674 -0
  33. package/core/indexing/index.js +90 -0
  34. package/core/indexing/indexer-ann.js +1077 -0
  35. package/core/indexing/indexer-build.js +742 -0
  36. package/core/indexing/indexer-phases.js +800 -0
  37. package/core/indexing/indexer-pool.js +764 -0
  38. package/core/indexing/indexer-sparse-gram.js +98 -0
  39. package/core/indexing/indexer-utils.js +536 -0
  40. package/core/indexing/indexer-worker.js +148 -0
  41. package/core/indexing/li-skip-policy.js +225 -0
  42. package/core/indexing/merkle-tracker.js +244 -0
  43. package/core/indexing/model-pool.js +166 -0
  44. package/core/infrastructure/code-graph-repository.js +120 -0
  45. package/core/infrastructure/codebase-repository.js +131 -0
  46. package/core/infrastructure/config/dedup.js +54 -0
  47. package/core/infrastructure/config/embedding.js +298 -0
  48. package/core/infrastructure/config/graph.js +80 -0
  49. package/core/infrastructure/config/index.js +82 -0
  50. package/core/infrastructure/config/indexing.js +8 -0
  51. package/core/infrastructure/config/platform.js +254 -0
  52. package/core/infrastructure/config/ranking.js +221 -0
  53. package/core/infrastructure/config/search.js +396 -0
  54. package/core/infrastructure/config/translation.js +89 -0
  55. package/core/infrastructure/config/vector-store.js +114 -0
  56. package/core/infrastructure/constants.js +86 -0
  57. package/core/infrastructure/coreml-cascade.js +909 -0
  58. package/core/infrastructure/coreml-cascade.json +46 -0
  59. package/core/infrastructure/coreml-provider.js +81 -0
  60. package/core/infrastructure/db-utils.js +69 -0
  61. package/core/infrastructure/dedup-hashing.js +83 -0
  62. package/core/infrastructure/hardware-capability.js +332 -0
  63. package/core/infrastructure/index.js +104 -0
  64. package/core/infrastructure/language-patterns/maps.js +121 -0
  65. package/core/infrastructure/language-patterns/registry-core.js +323 -0
  66. package/core/infrastructure/language-patterns/registry-data-query.js +155 -0
  67. package/core/infrastructure/language-patterns/registry-object-oriented.js +285 -0
  68. package/core/infrastructure/language-patterns/registry-tooling.js +240 -0
  69. package/core/infrastructure/language-patterns/registry-web-style.js +143 -0
  70. package/core/infrastructure/language-patterns/registry.js +19 -0
  71. package/core/infrastructure/language-patterns.js +141 -0
  72. package/core/infrastructure/llm-provider.js +733 -0
  73. package/core/infrastructure/manifest.json +46 -0
  74. package/core/infrastructure/maxsim.wasm +0 -0
  75. package/core/infrastructure/model-fetcher.js +423 -0
  76. package/core/infrastructure/model-registry.js +214 -0
  77. package/core/infrastructure/native-inference.js +587 -0
  78. package/core/infrastructure/native-resolver.js +187 -0
  79. package/core/infrastructure/native-sparse-gram.js +257 -0
  80. package/core/infrastructure/native-tokenizer.js +160 -0
  81. package/core/infrastructure/onnx-mutex.js +45 -0
  82. package/core/infrastructure/onnx-session-utils.js +261 -0
  83. package/core/infrastructure/ort-pipeline.js +111 -0
  84. package/core/infrastructure/project-detector.js +102 -0
  85. package/core/infrastructure/quantization.js +410 -0
  86. package/core/infrastructure/simd-distance.js +502 -0
  87. package/core/infrastructure/simd-distance.wasm +0 -0
  88. package/core/infrastructure/tree-sitter-provider.js +665 -0
  89. package/core/infrastructure/webgpu-maxsim.js +222 -0
  90. package/core/query/index.js +35 -0
  91. package/core/query/intent-detector.js +201 -0
  92. package/core/query/intent-router.js +156 -0
  93. package/core/query/query-router-catboost.js +222 -0
  94. package/core/query/query-router-ml.js +266 -0
  95. package/core/query/query-router.js +213 -0
  96. package/core/ranking/cascaded-scorer.js +379 -0
  97. package/core/ranking/flashrank.js +810 -0
  98. package/core/ranking/index.js +49 -0
  99. package/core/ranking/late-interaction-index.js +2383 -0
  100. package/core/ranking/late-interaction-model.js +812 -0
  101. package/core/ranking/local-reranker.js +374 -0
  102. package/core/ranking/mmr.js +379 -0
  103. package/core/ranking/quality-scorer.js +363 -0
  104. package/core/search/context-expander.js +1167 -0
  105. package/core/search/dedup/sibling-expander.js +327 -0
  106. package/core/search/index.js +16 -0
  107. package/core/search/search-boost.js +259 -0
  108. package/core/search/search-cli.js +544 -0
  109. package/core/search/search-format.js +282 -0
  110. package/core/search/search-fusion.js +327 -0
  111. package/core/search/search-hybrid.js +204 -0
  112. package/core/search/search-pattern-chunks.js +337 -0
  113. package/core/search/search-pattern-planner.js +439 -0
  114. package/core/search/search-pattern-prefilter.js +412 -0
  115. package/core/search/search-pattern-ripgrep.js +663 -0
  116. package/core/search/search-pattern.js +463 -0
  117. package/core/search/search-postprocess.js +452 -0
  118. package/core/search/search-semantic.js +706 -0
  119. package/core/search/search-server.js +554 -0
  120. package/core/search/session-daemon-prewarm.mjs +164 -0
  121. package/core/search/session-warmup.js +595 -0
  122. package/core/search/sweet-search.js +632 -0
  123. package/core/search/warmup-metrics.js +532 -0
  124. package/core/start-server.js +6 -0
  125. package/core/training/query-router/features/extractor.js +762 -0
  126. package/core/training/query-router/features/multilingual-patterns.js +431 -0
  127. package/core/training/query-router/features/text-segmenter.js +303 -0
  128. package/core/training/query-router/features/unicode-utils.js +383 -0
  129. package/core/training/query-router/output/v45_router_d4.js +11521 -0
  130. package/core/training/query-router/output/v46_router_d4.js +11498 -0
  131. package/core/vector-store/binary-heap.js +227 -0
  132. package/core/vector-store/binary-hnsw-index.js +1004 -0
  133. package/core/vector-store/float-vector-store.js +234 -0
  134. package/core/vector-store/hnsw-index.js +580 -0
  135. package/core/vector-store/index.js +39 -0
  136. package/core/vector-store/seismic-index.js +498 -0
  137. package/core/vocabulary/index.js +84 -0
  138. package/core/vocabulary/vocab-constants.js +20 -0
  139. package/core/vocabulary/vocab-miner-extractors.js +375 -0
  140. package/core/vocabulary/vocab-miner-nl.js +404 -0
  141. package/core/vocabulary/vocab-miner-utils.js +146 -0
  142. package/core/vocabulary/vocab-miner.js +574 -0
  143. package/core/vocabulary/vocab-prewarm-cli.js +110 -0
  144. package/core/vocabulary/vocab-ranker.js +492 -0
  145. package/core/vocabulary/vocab-warmer.js +523 -0
  146. package/core/vocabulary/vocab-warmup-orchestrator.js +425 -0
  147. package/core/vocabulary/vocabulary-utils.js +704 -0
  148. package/crates/wasm-router/pkg/package.json +13 -0
  149. package/crates/wasm-router/pkg/query_router_wasm.d.ts +36 -0
  150. package/crates/wasm-router/pkg/query_router_wasm.js +271 -0
  151. package/crates/wasm-router/pkg/query_router_wasm_bg.wasm +0 -0
  152. package/crates/wasm-router/pkg/query_router_wasm_bg.wasm.d.ts +19 -0
  153. package/mcp/config-gen.js +121 -0
  154. package/mcp/server.js +335 -0
  155. package/mcp/tool-handlers.js +476 -0
  156. package/package.json +131 -9
  157. package/scripts/benchmark-harness.js +794 -0
  158. package/scripts/init.js +1058 -0
  159. package/scripts/smoke-test.js +435 -0
  160. package/scripts/uninstall.js +478 -0
  161. package/scripts/verify-runtime.js +176 -0
@@ -0,0 +1,254 @@
1
+ /**
2
+ * Platform Configuration — PROJECT_ROOT, DB_PATHS, platform detection, logging.
3
+ * Split from core/config.js during DDD migration.
4
+ */
5
+
6
+ import path from 'path';
7
+ import { existsSync, readFileSync } from 'fs';
8
+ import { fileURLToPath } from 'url';
9
+ import os from 'os';
10
+ import { estimateComputeCores } from '../onnx-session-utils.js';
11
+
12
+ const __filename = fileURLToPath(import.meta.url);
13
+ const __dirname = path.dirname(__filename);
14
+
15
+ function resolveProjectRoot() {
16
+ const fromEnv = process.env.SWEET_SEARCH_PROJECT_ROOT?.trim();
17
+ if (fromEnv) return path.resolve(fromEnv);
18
+
19
+ // Walk up from cwd looking for .git or package.json to find the real
20
+ // project root, so that running from a subdirectory still finds the
21
+ // .sweet-search/ data dir and init config.
22
+ let dir = process.cwd();
23
+ while (true) {
24
+ if (existsSync(path.join(dir, '.git')) || existsSync(path.join(dir, 'package.json'))) {
25
+ return dir;
26
+ }
27
+ const parent = path.dirname(dir);
28
+ if (parent === dir) break; // filesystem root
29
+ dir = parent;
30
+ }
31
+
32
+ // Fallback to cwd if no project marker found
33
+ return process.cwd();
34
+ }
35
+
36
+ // Project root detection
37
+ export const PROJECT_ROOT = resolveProjectRoot();
38
+
39
+ // =============================================================================
40
+ // ENVIRONMENT & API KEYS
41
+ // =============================================================================
42
+
43
+ // Load .env file if exists (check both local and project root)
44
+ try {
45
+ const { existsSync, readFileSync } = await import('fs');
46
+
47
+ // Priority 1: Local .env (in sweet-search directory)
48
+ // __dirname is now core/infrastructure/config/, so go up 3 levels
49
+ const localEnvPath = path.join(__dirname, '..', '..', '..', '.env');
50
+ // Priority 2: Project root .env
51
+ const projectEnvPath = path.join(PROJECT_ROOT, '.env');
52
+
53
+ const dotenvPath = existsSync(localEnvPath) ? localEnvPath : projectEnvPath;
54
+
55
+ if (existsSync(dotenvPath)) {
56
+ const envContent = readFileSync(dotenvPath, 'utf-8');
57
+ for (const line of envContent.split('\n')) {
58
+ const match = line.match(/^([^=]+)=["']?([^"'\n]+)["']?$/);
59
+ if (match && !process.env[match[1]]) {
60
+ process.env[match[1]] = match[2];
61
+ }
62
+ }
63
+ }
64
+ } catch (e) { /* .env loading is optional */ }
65
+
66
+ // =============================================================================
67
+ // DATABASE PATHS
68
+ // =============================================================================
69
+
70
+ // Data directory: SWEET_SEARCH_DATA_DIR env or default .sweet-search
71
+ const DATA_DIR_NAME = (() => {
72
+ if (process.env.SWEET_SEARCH_DATA_DIR) {
73
+ return process.env.SWEET_SEARCH_DATA_DIR;
74
+ }
75
+ return '.sweet-search';
76
+ })();
77
+
78
+ export const DB_PATHS = {
79
+ // Main codebase vectors
80
+ codebase: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase.db'),
81
+
82
+ // Code graph (entities + relationships + FTS5 + summaries)
83
+ codeGraph: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'code-graph.db'),
84
+
85
+ // HNSW index (in-memory at query time)
86
+ hnswIndex: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase-hnsw.idx'),
87
+
88
+ // Binary HNSW index (32x smaller, Hamming distance)
89
+ binaryHnswIndex: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase-binary-hnsw.idx'),
90
+
91
+ // Int8 vectors for rescore stage
92
+ // DEPRECATED: This SQLite DB path is no longer used. Int8 vectors are stored in
93
+ // .int8.json sidecar alongside binary HNSW index. See binary-hnsw-index.js.
94
+ // Kept for backward compatibility - getArtifactStats() warns if this file exists.
95
+ int8Vectors: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase-int8.db'),
96
+
97
+ // Late interaction token embeddings
98
+ lateInteraction: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase-late-interaction.db'),
99
+
100
+ // Sparse gram artifact for pattern prefiltering
101
+ sparseGramIndex: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase-sparse-grams.idx'),
102
+
103
+ // Merkle state for incremental indexing
104
+ merkle: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'merkle-state.json'),
105
+
106
+ // Query vocabulary cache
107
+ vocabulary: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'query-vocabulary.json'),
108
+
109
+ // HCGS summaries cache (hierarchical code graph summaries)
110
+ summaries: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'code-summaries.json'),
111
+
112
+ };
113
+
114
+ // =============================================================================
115
+ // PLATFORM DETECTION
116
+ // =============================================================================
117
+
118
+ /**
119
+ * Platform-aware indexer defaults for local embedding.
120
+ *
121
+ * Apple Silicon unified memory architecture benefits from larger batch
122
+ * sizes and immediate DB flushes. x86/WSL performs best with sequential
123
+ * single-item batching and buffered writes.
124
+ *
125
+ * Override via SWEET_SEARCH_INDEXER_BATCH_SIZE / SWEET_SEARCH_INDEXER_WRITE_FLUSH_ROWS.
126
+ */
127
+ export function detectIndexerProfile(overrides) {
128
+ const platform = overrides?.platform ?? process.platform;
129
+ const arch = overrides?.arch ?? process.arch;
130
+ const isWSL = overrides?.isWSL ??
131
+ (!!process.env.WSL_DISTRO_NAME || os.release().toLowerCase().includes('microsoft'));
132
+ const totalMemBytes = overrides?.totalMemBytes ?? os.totalmem();
133
+ const logicalCores = overrides?.cpuCount ?? os.cpus().length;
134
+ const computeCores = estimateComputeCores({ logicalCores, arch });
135
+ const totalMemGB = totalMemBytes / (1024 ** 3);
136
+
137
+ let batchSize = 8;
138
+ let flushRows = 64;
139
+
140
+ if (totalMemGB >= 24 && computeCores >= 8) {
141
+ batchSize = 64;
142
+ flushRows = 1;
143
+ } else if (totalMemGB >= 12 && computeCores >= 6) {
144
+ batchSize = 32;
145
+ flushRows = 8;
146
+ } else if (totalMemGB >= 8 && computeCores >= 4) {
147
+ batchSize = 16;
148
+ flushRows = 32;
149
+ }
150
+
151
+ // Default to sequential embedding -> LI phases. This is the safe
152
+ // cross-machine choice when both models are CPU-bound and share caches.
153
+ // Callers can still opt into overlap via env override for experimentation.
154
+ const parallelLI = false;
155
+
156
+ return {
157
+ batchSize,
158
+ flushRows,
159
+ parallelLI,
160
+ executionMode: parallelLI ? 'parallel-models' : 'sequential-phases',
161
+ logicalCores,
162
+ computeCores,
163
+ totalMemBytes,
164
+ totalMemGB,
165
+ isWSL,
166
+ platform,
167
+ arch,
168
+ };
169
+ }
170
+
171
+ // =============================================================================
172
+ // MODEL DELIVERY CONFIGURATION
173
+ // =============================================================================
174
+
175
+ function envBool(name, defaultValue) {
176
+ const raw = (process.env[name] ?? '').trim().toLowerCase();
177
+ if (raw === '0' || raw === 'false' || raw === 'off') return false;
178
+ if (raw === '1' || raw === 'true' || raw === 'on') return true;
179
+ return defaultValue;
180
+ }
181
+
182
+ export const MODEL_DELIVERY_CONFIG = {
183
+ // Allow models to be downloaded at runtime.
184
+ // Priority: explicit env var > init config (.sweet-search/config.json) > default true.
185
+ allowRuntimeModelDownload: (() => {
186
+ // Explicit env var always wins
187
+ if (process.env.SWEET_SEARCH_ALLOW_RUNTIME_DOWNLOAD !== undefined) {
188
+ return envBool('SWEET_SEARCH_ALLOW_RUNTIME_DOWNLOAD', true);
189
+ }
190
+ // Check init config written by `sweet-search init`
191
+ try {
192
+ const initConfigPath = path.join(PROJECT_ROOT, DATA_DIR_NAME, 'config.json');
193
+ if (existsSync(initConfigPath)) {
194
+ const initConfig = JSON.parse(readFileSync(initConfigPath, 'utf-8'));
195
+ if (initConfig.runtime?.allowRuntimeModelDownload !== undefined) {
196
+ return initConfig.runtime.allowRuntimeModelDownload;
197
+ }
198
+ }
199
+ } catch { /* init config is optional — fall through to default */ }
200
+ return true; // backward-compatible default
201
+ })(),
202
+
203
+ // Managed model cache root directory
204
+ modelCacheRoot: process.env.SWEET_SEARCH_MODEL_CACHE
205
+ || path.join(os.homedir(), '.cache', 'sweet-search', 'models'),
206
+
207
+ // HuggingFace endpoint (overrideable for enterprise mirrors/proxies)
208
+ hfEndpoint: process.env.SWEET_SEARCH_HF_ENDPOINT || 'https://huggingface.co',
209
+ };
210
+
211
+ // =============================================================================
212
+ // LOGGING
213
+ // =============================================================================
214
+
215
+ // Global quiet mode flag - set by CLI tools when --quiet is passed
216
+ let _quietMode = false;
217
+
218
+ /**
219
+ * Set quiet mode globally. When enabled:
220
+ * - timing logs are suppressed
221
+ * - verbose logs are suppressed
222
+ * - Only errors and essential output are shown
223
+ *
224
+ * @param {boolean} enabled - Whether quiet mode should be enabled
225
+ */
226
+ export function setQuietMode(enabled) {
227
+ _quietMode = enabled;
228
+ }
229
+
230
+ /**
231
+ * Check if quiet mode is currently enabled.
232
+ * @returns {boolean} - True if quiet mode is enabled
233
+ */
234
+ export function isQuietMode() {
235
+ return _quietMode;
236
+ }
237
+
238
+ export const LOGGING = {
239
+ // Verbose logging (detailed operation info)
240
+ get verbose() {
241
+ return !_quietMode && process.env.SEARCH_VERBOSE === 'true';
242
+ },
243
+
244
+ // Timing logs (performance measurements)
245
+ // Only enabled when SEARCH_TIMING=true AND not in quiet mode
246
+ get timing() {
247
+ return !_quietMode && process.env.SEARCH_TIMING === 'true';
248
+ },
249
+
250
+ // Debug logs (developer debugging)
251
+ get debug() {
252
+ return process.env.SEARCH_DEBUG === 'true';
253
+ },
254
+ };
@@ -0,0 +1,221 @@
1
+ /**
2
+ * Ranking Configuration — reranking, cascade scoring, late interaction.
3
+ * Split from core/config.js during DDD migration.
4
+ */
5
+
6
+ const VOYAGEAI_API_KEY = process.env.VOYAGEAI_API_KEY || '';
7
+ const JINA_API_KEY = process.env.JINA_API_KEY || '';
8
+
9
+ // =============================================================================
10
+ // RERANKING CONFIGURATION (Tiered)
11
+ // =============================================================================
12
+
13
+ export const RERANK_CONFIG = {
14
+ // Shared settings for remote rerankers
15
+ timeout: 30000, // 30s timeout for API calls (prevents indefinite hangs)
16
+ maxRetries: 2, // Retry transient failures up to 2 times
17
+ retryDelayMs: 100, // Base delay for exponential backoff (100, 200, 400ms)
18
+ maxDocTruncation: { // Per-document truncation limits
19
+ voyage: 4000, // Voyage has stricter token limits
20
+ jina: 8000, // Jina v3 has 131K context, but limit per-doc for efficiency
21
+ },
22
+
23
+ // Tier 1: Voyage Rerank-2.5 (same quality as 2, 2x faster latency)
24
+ // Reference: https://agentset.ai/rerankers (Dec 2025 benchmarks)
25
+ voyage: {
26
+ enabled: VOYAGEAI_API_KEY.length > 0,
27
+ priority: 1,
28
+ model: 'rerank-2.5',
29
+ endpoint: 'https://api.voyageai.com/v1/rerank',
30
+ apiKey: VOYAGEAI_API_KEY,
31
+ maxDocuments: 100,
32
+ topK: 20,
33
+ },
34
+
35
+ // Tier 2: Jina Reranker v3 (listwise, 0.6B params, SOTA BEIR 61.94)
36
+ // "Last but not late" interaction - arXiv:2509.25085
37
+ // Reference: https://jina.ai/reranker
38
+ jina: {
39
+ enabled: JINA_API_KEY.length > 0,
40
+ priority: 2,
41
+ model: 'jina-reranker-v3',
42
+ endpoint: 'https://api.jina.ai/v1/rerank',
43
+ apiKey: JINA_API_KEY,
44
+ maxDocuments: 100,
45
+ topK: 20,
46
+ contextLength: 131072, // 131K context window (64 docs simultaneously)
47
+ },
48
+
49
+ // Tier 3: FlashRank (manual fallback only)
50
+ flashrank: {
51
+ enabled: true,
52
+ priority: 99,
53
+ model: 'ms-marco-TinyBERT-L-2-v2',
54
+ maxDocLength: 512,
55
+ },
56
+ };
57
+
58
+ // =============================================================================
59
+ // LOCAL RERANKER CONFIGURATION (GTE ModernBERT INT8)
60
+ //
61
+ // Architecture:
62
+ // - Model: Alibaba-NLP/gte-reranker-modernbert-base (INT8 quantized)
63
+ // - Library: Direct onnxruntime-node + native-tokenizer.js
64
+ // - Inference: Sequential scoring with global ONNX mutex (onnx-mutex.js)
65
+ // - Latency: ~700ms for 50 docs (~14ms/doc after warmup), ~15s cold start
66
+ //
67
+ // Default direct reranker priority: Local ModernBERT → Jina API → Voyage API
68
+ // =============================================================================
69
+
70
+ export const LOCAL_RERANKER_CONFIG = {
71
+ // Master switch for local reranker — DISABLED by default.
72
+ //
73
+ // Measured on gencodesearchnet (6000q, 2026-04-22): CE rerank produces
74
+ // different rankings (Kendall τ varies −1.0 to +0.33 vs baseline) but
75
+ // delivers ZERO MRR/Recall improvement while costing ~3× wall-clock
76
+ // latency (3021s vs 1046s). The int8 HNSW rescore + optional LI MaxSim
77
+ // already put ground-truth answers near the top; CE has nothing left
78
+ // to rescue on this benchmark.
79
+ //
80
+ // Infra kept intact — flip to true (or set env SWEET_SEARCH_ENABLE_LOCAL_RERANKER=1)
81
+ // to A/B a new CE model without re-adding the code. Remote Voyage/Jina
82
+ // still activate via their API keys if present.
83
+ useLocalReranker: false,
84
+
85
+ // Model settings
86
+ model: {
87
+ name: 'gte-reranker-modernbert-base-int8',
88
+ huggingfaceId: 'Alibaba-NLP/gte-reranker-modernbert-base',
89
+ dtype: 'q8', // INT8 quantization (~150MB, no AVX512 required)
90
+ path: 'models/gte-reranker-int8', // HuggingFace cache directory
91
+ maxLength: 512, // Max tokens per query-document pair
92
+ },
93
+
94
+ // Direct reranker settings
95
+ stage2: {
96
+ candidateCount: 50,
97
+ requestTimeout: 10000, // 10s timeout per request
98
+ },
99
+ };
100
+
101
+ /**
102
+ * Check if local reranker should be used.
103
+ *
104
+ * DEFAULT: TRUE (local reranker enabled by default)
105
+ * Set LOCAL_RERANKER_CONFIG.useLocalReranker = false to use Voyage/Jina API instead.
106
+ *
107
+ * Priority when enabled: Local ModernBERT > Voyage API > Jina API
108
+ *
109
+ * @returns {boolean} True if local reranker should be used
110
+ */
111
+ export function shouldUseLocalReranker() {
112
+ // Env-var kill switch takes precedence (set SWEET_SEARCH_DISABLE_LOCAL_RERANKER=1).
113
+ const disable = (process.env.SWEET_SEARCH_DISABLE_LOCAL_RERANKER ?? '').trim().toLowerCase();
114
+ if (disable === '1' || disable === 'true' || disable === 'on') return false;
115
+ // Opt-in via env (local reranker is OFF by default — see LOCAL_RERANKER_CONFIG).
116
+ const enable = (process.env.SWEET_SEARCH_ENABLE_LOCAL_RERANKER ?? '').trim().toLowerCase();
117
+ if (enable === '1' || enable === 'true' || enable === 'on') return true;
118
+ return LOCAL_RERANKER_CONFIG.useLocalReranker;
119
+ }
120
+
121
+ // Jina Reranker helpers (separate from embeddings)
122
+ export function isJinaRerankerAvailable() {
123
+ return RERANK_CONFIG.jina.enabled;
124
+ }
125
+
126
+ export function getJinaRerankerApiKey() {
127
+ return RERANK_CONFIG.jina.apiKey;
128
+ }
129
+
130
+ // =============================================================================
131
+ // CASCADE SCORING CONFIGURATION (Section 26)
132
+ // =============================================================================
133
+ // Streamlined semantic pipeline: HNSW → expand → MaxSim → gate → conditional CE.
134
+ // Default-on. Set SWEET_SEARCH_CASCADE_ENABLED=false to opt out.
135
+
136
+ export const CASCADE_CONFIG = {
137
+ // Cascade = MaxSim gate → conditional CE rerank.
138
+ // DISABLED by default alongside the CE reranker — benchmarks show the
139
+ // cascade produces no MRR/Recall gain over plain HNSW+Int8 on
140
+ // gencodesearchnet while costing ~3× latency.
141
+ // Opt in with SWEET_SEARCH_CASCADE_ENABLED=true.
142
+ enabled: process.env.SWEET_SEARCH_CASCADE_ENABLED === 'true',
143
+
144
+ // MaxSim score gap threshold for decisive classification
145
+ gateThreshold: parseFloat(process.env.SWEET_SEARCH_CASCADE_GATE_THRESHOLD) || 0.08,
146
+
147
+ // K_max for adaptive-K candidate selection (actual K is 3..ceTopK based on score distribution)
148
+ ceTopK: parseInt(process.env.SWEET_SEARCH_CASCADE_CE_TOP_K) || 20,
149
+
150
+ // Whether to force cross-encoder on ALL candidates (bypass gate, for benchmarking)
151
+ forceFullCrossEncoder: process.env.SWEET_SEARCH_FORCE_FULL_CE === 'true',
152
+
153
+ // Shadow mode DISABLED by default — no background CE compute at all.
154
+ // Opt in with SWEET_SEARCH_CASCADE_SHADOW=true to resume data collection
155
+ // for identifying the CE-helpful query subset.
156
+ shadowMode: process.env.SWEET_SEARCH_CASCADE_SHADOW === 'true',
157
+ };
158
+
159
+ // =============================================================================
160
+ // LATE INTERACTION CONFIGURATION
161
+ // =============================================================================
162
+
163
+ export const LATE_INTERACTION_CONFIG = {
164
+ // false = disabled, 'lateon-code' = full 149M, 'lateon-code-edge' = 17M
165
+ model: process.env.SWEET_SEARCH_LATE_INTERACTION_MODEL || 'lateon-code',
166
+
167
+ get enabled() {
168
+ return !!this.model && this.model !== 'false';
169
+ },
170
+
171
+ // Per-model maxDocLength is overridable via SWEET_SEARCH_LI_MAX_DOC_LENGTH.
172
+ // Lower caps (e.g. 1024) trade some long-context recall for ~4× less
173
+ // attention compute on the long-chunk tail (attention is O(seq²)).
174
+ // The A/B benchmark for the cap lives in `eval/run_benchmark.js`.
175
+ models: {
176
+ 'lateon-code': {
177
+ hfId: 'lightonai/LateOn-Code',
178
+ onnxFile: 'model_int8.onnx', // 150 MB INT8
179
+ backboneDim: 768, // raw ModernBERT hidden size
180
+ tokenDimension: 128, // final output after projection
181
+ projectionPaths: ['1_Dense/model.safetensors'], // 768→128 single stage
182
+ maxQueryLength: 256,
183
+ get maxDocLength() {
184
+ const env = parseInt(process.env.SWEET_SEARCH_LI_MAX_DOC_LENGTH || '', 10);
185
+ return Number.isFinite(env) && env > 0 ? env : 2048;
186
+ },
187
+ queryPrefix: '[Q] ',
188
+ docPrefix: '[D] ',
189
+ },
190
+ 'lateon-code-edge': {
191
+ hfId: 'lightonai/LateOn-Code-edge',
192
+ onnxFile: 'model.onnx', // 68 MB FP32
193
+ backboneDim: 256, // raw ModernBERT hidden size
194
+ tokenDimension: 48, // final output after 2-stage projection
195
+ projectionPaths: ['1_Dense/model.safetensors', '2_Dense/model.safetensors'], // 256→512→48
196
+ maxQueryLength: 256,
197
+ get maxDocLength() {
198
+ const env = parseInt(process.env.SWEET_SEARCH_LI_MAX_DOC_LENGTH || '', 10);
199
+ return Number.isFinite(env) && env > 0 ? env : 2048;
200
+ },
201
+ queryPrefix: '[Q] ',
202
+ docPrefix: '[D] ',
203
+ },
204
+ },
205
+
206
+ get activeModel() { return this.models[this.model] || null; },
207
+ get tokenDimension() { return this.activeModel?.tokenDimension || 128; },
208
+ get hfModelId() { return this.activeModel?.hfId || null; },
209
+
210
+ // Storage default: int4 per-token quantization for LI tokens.
211
+ // Query-time behavior is restored from persisted index metadata.
212
+ quantization: 'int4',
213
+ blendWeight: 0.3, // tune per-model in Phase 5
214
+
215
+ // 32 skiplist punctuation characters (filtered from doc tokens, NOT query tokens)
216
+ skiplistChars: new Set([
217
+ '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.',
218
+ '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`',
219
+ '{', '|', '}', '~',
220
+ ]),
221
+ };