sweet-search 0.0.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/NOTICE +23 -0
- package/core/cli.js +51 -0
- package/core/config.js +27 -0
- package/core/embedding/embedding-cache.js +467 -0
- package/core/embedding/embedding-local-model.js +845 -0
- package/core/embedding/embedding-remote.js +492 -0
- package/core/embedding/embedding-service.js +712 -0
- package/core/embedding/embedding-telemetry.js +219 -0
- package/core/embedding/index.js +40 -0
- package/core/graph/community-detector.js +294 -0
- package/core/graph/graph-expansion.js +839 -0
- package/core/graph/graph-extractor.js +2304 -0
- package/core/graph/graph-search.js +2148 -0
- package/core/graph/hcgs-generator.js +666 -0
- package/core/graph/index.js +16 -0
- package/core/graph/leiden-algorithm.js +547 -0
- package/core/graph/relationship-resolver.js +366 -0
- package/core/graph/repo-map.js +408 -0
- package/core/graph/summary-manager.js +549 -0
- package/core/indexing/artifact-builder.js +1054 -0
- package/core/indexing/ast-chunker.js +709 -0
- package/core/indexing/chunking/chunk-builder.js +170 -0
- package/core/indexing/chunking/markdown-chunker.js +503 -0
- package/core/indexing/chunking/plaintext-chunker.js +104 -0
- package/core/indexing/dedup/dedup-phase.js +159 -0
- package/core/indexing/dedup/exemplar-selector.js +65 -0
- package/core/indexing/document-chunker.js +56 -0
- package/core/indexing/incremental-parser.js +390 -0
- package/core/indexing/incremental-tracker.js +761 -0
- package/core/indexing/index-codebase-v21.js +472 -0
- package/core/indexing/index-maintainer.mjs +1674 -0
- package/core/indexing/index.js +90 -0
- package/core/indexing/indexer-ann.js +1077 -0
- package/core/indexing/indexer-build.js +742 -0
- package/core/indexing/indexer-phases.js +800 -0
- package/core/indexing/indexer-pool.js +764 -0
- package/core/indexing/indexer-sparse-gram.js +98 -0
- package/core/indexing/indexer-utils.js +536 -0
- package/core/indexing/indexer-worker.js +148 -0
- package/core/indexing/li-skip-policy.js +225 -0
- package/core/indexing/merkle-tracker.js +244 -0
- package/core/indexing/model-pool.js +166 -0
- package/core/infrastructure/code-graph-repository.js +120 -0
- package/core/infrastructure/codebase-repository.js +131 -0
- package/core/infrastructure/config/dedup.js +54 -0
- package/core/infrastructure/config/embedding.js +298 -0
- package/core/infrastructure/config/graph.js +80 -0
- package/core/infrastructure/config/index.js +82 -0
- package/core/infrastructure/config/indexing.js +8 -0
- package/core/infrastructure/config/platform.js +254 -0
- package/core/infrastructure/config/ranking.js +221 -0
- package/core/infrastructure/config/search.js +396 -0
- package/core/infrastructure/config/translation.js +89 -0
- package/core/infrastructure/config/vector-store.js +114 -0
- package/core/infrastructure/constants.js +86 -0
- package/core/infrastructure/coreml-cascade.js +909 -0
- package/core/infrastructure/coreml-cascade.json +46 -0
- package/core/infrastructure/coreml-provider.js +81 -0
- package/core/infrastructure/db-utils.js +69 -0
- package/core/infrastructure/dedup-hashing.js +83 -0
- package/core/infrastructure/hardware-capability.js +332 -0
- package/core/infrastructure/index.js +104 -0
- package/core/infrastructure/language-patterns/maps.js +121 -0
- package/core/infrastructure/language-patterns/registry-core.js +323 -0
- package/core/infrastructure/language-patterns/registry-data-query.js +155 -0
- package/core/infrastructure/language-patterns/registry-object-oriented.js +285 -0
- package/core/infrastructure/language-patterns/registry-tooling.js +240 -0
- package/core/infrastructure/language-patterns/registry-web-style.js +143 -0
- package/core/infrastructure/language-patterns/registry.js +19 -0
- package/core/infrastructure/language-patterns.js +141 -0
- package/core/infrastructure/llm-provider.js +733 -0
- package/core/infrastructure/manifest.json +46 -0
- package/core/infrastructure/maxsim.wasm +0 -0
- package/core/infrastructure/model-fetcher.js +423 -0
- package/core/infrastructure/model-registry.js +214 -0
- package/core/infrastructure/native-inference.js +587 -0
- package/core/infrastructure/native-resolver.js +187 -0
- package/core/infrastructure/native-sparse-gram.js +257 -0
- package/core/infrastructure/native-tokenizer.js +160 -0
- package/core/infrastructure/onnx-mutex.js +45 -0
- package/core/infrastructure/onnx-session-utils.js +261 -0
- package/core/infrastructure/ort-pipeline.js +111 -0
- package/core/infrastructure/project-detector.js +102 -0
- package/core/infrastructure/quantization.js +410 -0
- package/core/infrastructure/simd-distance.js +502 -0
- package/core/infrastructure/simd-distance.wasm +0 -0
- package/core/infrastructure/tree-sitter-provider.js +665 -0
- package/core/infrastructure/webgpu-maxsim.js +222 -0
- package/core/query/index.js +35 -0
- package/core/query/intent-detector.js +201 -0
- package/core/query/intent-router.js +156 -0
- package/core/query/query-router-catboost.js +222 -0
- package/core/query/query-router-ml.js +266 -0
- package/core/query/query-router.js +213 -0
- package/core/ranking/cascaded-scorer.js +379 -0
- package/core/ranking/flashrank.js +810 -0
- package/core/ranking/index.js +49 -0
- package/core/ranking/late-interaction-index.js +2383 -0
- package/core/ranking/late-interaction-model.js +812 -0
- package/core/ranking/local-reranker.js +374 -0
- package/core/ranking/mmr.js +379 -0
- package/core/ranking/quality-scorer.js +363 -0
- package/core/search/context-expander.js +1167 -0
- package/core/search/dedup/sibling-expander.js +327 -0
- package/core/search/index.js +16 -0
- package/core/search/search-boost.js +259 -0
- package/core/search/search-cli.js +544 -0
- package/core/search/search-format.js +282 -0
- package/core/search/search-fusion.js +327 -0
- package/core/search/search-hybrid.js +204 -0
- package/core/search/search-pattern-chunks.js +337 -0
- package/core/search/search-pattern-planner.js +439 -0
- package/core/search/search-pattern-prefilter.js +412 -0
- package/core/search/search-pattern-ripgrep.js +663 -0
- package/core/search/search-pattern.js +463 -0
- package/core/search/search-postprocess.js +452 -0
- package/core/search/search-semantic.js +706 -0
- package/core/search/search-server.js +554 -0
- package/core/search/session-daemon-prewarm.mjs +164 -0
- package/core/search/session-warmup.js +595 -0
- package/core/search/sweet-search.js +632 -0
- package/core/search/warmup-metrics.js +532 -0
- package/core/start-server.js +6 -0
- package/core/training/query-router/features/extractor.js +762 -0
- package/core/training/query-router/features/multilingual-patterns.js +431 -0
- package/core/training/query-router/features/text-segmenter.js +303 -0
- package/core/training/query-router/features/unicode-utils.js +383 -0
- package/core/training/query-router/output/v45_router_d4.js +11521 -0
- package/core/training/query-router/output/v46_router_d4.js +11498 -0
- package/core/vector-store/binary-heap.js +227 -0
- package/core/vector-store/binary-hnsw-index.js +1004 -0
- package/core/vector-store/float-vector-store.js +234 -0
- package/core/vector-store/hnsw-index.js +580 -0
- package/core/vector-store/index.js +39 -0
- package/core/vector-store/seismic-index.js +498 -0
- package/core/vocabulary/index.js +84 -0
- package/core/vocabulary/vocab-constants.js +20 -0
- package/core/vocabulary/vocab-miner-extractors.js +375 -0
- package/core/vocabulary/vocab-miner-nl.js +404 -0
- package/core/vocabulary/vocab-miner-utils.js +146 -0
- package/core/vocabulary/vocab-miner.js +574 -0
- package/core/vocabulary/vocab-prewarm-cli.js +110 -0
- package/core/vocabulary/vocab-ranker.js +492 -0
- package/core/vocabulary/vocab-warmer.js +523 -0
- package/core/vocabulary/vocab-warmup-orchestrator.js +425 -0
- package/core/vocabulary/vocabulary-utils.js +704 -0
- package/crates/wasm-router/pkg/package.json +13 -0
- package/crates/wasm-router/pkg/query_router_wasm.d.ts +36 -0
- package/crates/wasm-router/pkg/query_router_wasm.js +271 -0
- package/crates/wasm-router/pkg/query_router_wasm_bg.wasm +0 -0
- package/crates/wasm-router/pkg/query_router_wasm_bg.wasm.d.ts +19 -0
- package/mcp/config-gen.js +121 -0
- package/mcp/server.js +335 -0
- package/mcp/tool-handlers.js +476 -0
- package/package.json +131 -9
- package/scripts/benchmark-harness.js +794 -0
- package/scripts/init.js +1058 -0
- package/scripts/smoke-test.js +435 -0
- package/scripts/uninstall.js +478 -0
- package/scripts/verify-runtime.js +176 -0
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Platform Configuration — PROJECT_ROOT, DB_PATHS, platform detection, logging.
|
|
3
|
+
* Split from core/config.js during DDD migration.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import path from 'path';
|
|
7
|
+
import { existsSync, readFileSync } from 'fs';
|
|
8
|
+
import { fileURLToPath } from 'url';
|
|
9
|
+
import os from 'os';
|
|
10
|
+
import { estimateComputeCores } from '../onnx-session-utils.js';
|
|
11
|
+
|
|
12
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
13
|
+
const __dirname = path.dirname(__filename);
|
|
14
|
+
|
|
15
|
+
function resolveProjectRoot() {
|
|
16
|
+
const fromEnv = process.env.SWEET_SEARCH_PROJECT_ROOT?.trim();
|
|
17
|
+
if (fromEnv) return path.resolve(fromEnv);
|
|
18
|
+
|
|
19
|
+
// Walk up from cwd looking for .git or package.json to find the real
|
|
20
|
+
// project root, so that running from a subdirectory still finds the
|
|
21
|
+
// .sweet-search/ data dir and init config.
|
|
22
|
+
let dir = process.cwd();
|
|
23
|
+
while (true) {
|
|
24
|
+
if (existsSync(path.join(dir, '.git')) || existsSync(path.join(dir, 'package.json'))) {
|
|
25
|
+
return dir;
|
|
26
|
+
}
|
|
27
|
+
const parent = path.dirname(dir);
|
|
28
|
+
if (parent === dir) break; // filesystem root
|
|
29
|
+
dir = parent;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Fallback to cwd if no project marker found
|
|
33
|
+
return process.cwd();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Project root detection
|
|
37
|
+
export const PROJECT_ROOT = resolveProjectRoot();
|
|
38
|
+
|
|
39
|
+
// =============================================================================
|
|
40
|
+
// ENVIRONMENT & API KEYS
|
|
41
|
+
// =============================================================================
|
|
42
|
+
|
|
43
|
+
// Load .env file if exists (check both local and project root)
|
|
44
|
+
try {
|
|
45
|
+
const { existsSync, readFileSync } = await import('fs');
|
|
46
|
+
|
|
47
|
+
// Priority 1: Local .env (in sweet-search directory)
|
|
48
|
+
// __dirname is now core/infrastructure/config/, so go up 3 levels
|
|
49
|
+
const localEnvPath = path.join(__dirname, '..', '..', '..', '.env');
|
|
50
|
+
// Priority 2: Project root .env
|
|
51
|
+
const projectEnvPath = path.join(PROJECT_ROOT, '.env');
|
|
52
|
+
|
|
53
|
+
const dotenvPath = existsSync(localEnvPath) ? localEnvPath : projectEnvPath;
|
|
54
|
+
|
|
55
|
+
if (existsSync(dotenvPath)) {
|
|
56
|
+
const envContent = readFileSync(dotenvPath, 'utf-8');
|
|
57
|
+
for (const line of envContent.split('\n')) {
|
|
58
|
+
const match = line.match(/^([^=]+)=["']?([^"'\n]+)["']?$/);
|
|
59
|
+
if (match && !process.env[match[1]]) {
|
|
60
|
+
process.env[match[1]] = match[2];
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
} catch (e) { /* .env loading is optional */ }
|
|
65
|
+
|
|
66
|
+
// =============================================================================
|
|
67
|
+
// DATABASE PATHS
|
|
68
|
+
// =============================================================================
|
|
69
|
+
|
|
70
|
+
// Data directory: SWEET_SEARCH_DATA_DIR env or default .sweet-search
|
|
71
|
+
const DATA_DIR_NAME = (() => {
|
|
72
|
+
if (process.env.SWEET_SEARCH_DATA_DIR) {
|
|
73
|
+
return process.env.SWEET_SEARCH_DATA_DIR;
|
|
74
|
+
}
|
|
75
|
+
return '.sweet-search';
|
|
76
|
+
})();
|
|
77
|
+
|
|
78
|
+
export const DB_PATHS = {
|
|
79
|
+
// Main codebase vectors
|
|
80
|
+
codebase: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase.db'),
|
|
81
|
+
|
|
82
|
+
// Code graph (entities + relationships + FTS5 + summaries)
|
|
83
|
+
codeGraph: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'code-graph.db'),
|
|
84
|
+
|
|
85
|
+
// HNSW index (in-memory at query time)
|
|
86
|
+
hnswIndex: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase-hnsw.idx'),
|
|
87
|
+
|
|
88
|
+
// Binary HNSW index (32x smaller, Hamming distance)
|
|
89
|
+
binaryHnswIndex: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase-binary-hnsw.idx'),
|
|
90
|
+
|
|
91
|
+
// Int8 vectors for rescore stage
|
|
92
|
+
// DEPRECATED: This SQLite DB path is no longer used. Int8 vectors are stored in
|
|
93
|
+
// .int8.json sidecar alongside binary HNSW index. See binary-hnsw-index.js.
|
|
94
|
+
// Kept for backward compatibility - getArtifactStats() warns if this file exists.
|
|
95
|
+
int8Vectors: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase-int8.db'),
|
|
96
|
+
|
|
97
|
+
// Late interaction token embeddings
|
|
98
|
+
lateInteraction: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase-late-interaction.db'),
|
|
99
|
+
|
|
100
|
+
// Sparse gram artifact for pattern prefiltering
|
|
101
|
+
sparseGramIndex: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'codebase-sparse-grams.idx'),
|
|
102
|
+
|
|
103
|
+
// Merkle state for incremental indexing
|
|
104
|
+
merkle: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'merkle-state.json'),
|
|
105
|
+
|
|
106
|
+
// Query vocabulary cache
|
|
107
|
+
vocabulary: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'query-vocabulary.json'),
|
|
108
|
+
|
|
109
|
+
// HCGS summaries cache (hierarchical code graph summaries)
|
|
110
|
+
summaries: path.join(PROJECT_ROOT, DATA_DIR_NAME, 'code-summaries.json'),
|
|
111
|
+
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
// =============================================================================
|
|
115
|
+
// PLATFORM DETECTION
|
|
116
|
+
// =============================================================================
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Platform-aware indexer defaults for local embedding.
|
|
120
|
+
*
|
|
121
|
+
* Apple Silicon unified memory architecture benefits from larger batch
|
|
122
|
+
* sizes and immediate DB flushes. x86/WSL performs best with sequential
|
|
123
|
+
* single-item batching and buffered writes.
|
|
124
|
+
*
|
|
125
|
+
* Override via SWEET_SEARCH_INDEXER_BATCH_SIZE / SWEET_SEARCH_INDEXER_WRITE_FLUSH_ROWS.
|
|
126
|
+
*/
|
|
127
|
+
export function detectIndexerProfile(overrides) {
|
|
128
|
+
const platform = overrides?.platform ?? process.platform;
|
|
129
|
+
const arch = overrides?.arch ?? process.arch;
|
|
130
|
+
const isWSL = overrides?.isWSL ??
|
|
131
|
+
(!!process.env.WSL_DISTRO_NAME || os.release().toLowerCase().includes('microsoft'));
|
|
132
|
+
const totalMemBytes = overrides?.totalMemBytes ?? os.totalmem();
|
|
133
|
+
const logicalCores = overrides?.cpuCount ?? os.cpus().length;
|
|
134
|
+
const computeCores = estimateComputeCores({ logicalCores, arch });
|
|
135
|
+
const totalMemGB = totalMemBytes / (1024 ** 3);
|
|
136
|
+
|
|
137
|
+
let batchSize = 8;
|
|
138
|
+
let flushRows = 64;
|
|
139
|
+
|
|
140
|
+
if (totalMemGB >= 24 && computeCores >= 8) {
|
|
141
|
+
batchSize = 64;
|
|
142
|
+
flushRows = 1;
|
|
143
|
+
} else if (totalMemGB >= 12 && computeCores >= 6) {
|
|
144
|
+
batchSize = 32;
|
|
145
|
+
flushRows = 8;
|
|
146
|
+
} else if (totalMemGB >= 8 && computeCores >= 4) {
|
|
147
|
+
batchSize = 16;
|
|
148
|
+
flushRows = 32;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Default to sequential embedding -> LI phases. This is the safe
|
|
152
|
+
// cross-machine choice when both models are CPU-bound and share caches.
|
|
153
|
+
// Callers can still opt into overlap via env override for experimentation.
|
|
154
|
+
const parallelLI = false;
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
batchSize,
|
|
158
|
+
flushRows,
|
|
159
|
+
parallelLI,
|
|
160
|
+
executionMode: parallelLI ? 'parallel-models' : 'sequential-phases',
|
|
161
|
+
logicalCores,
|
|
162
|
+
computeCores,
|
|
163
|
+
totalMemBytes,
|
|
164
|
+
totalMemGB,
|
|
165
|
+
isWSL,
|
|
166
|
+
platform,
|
|
167
|
+
arch,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// =============================================================================
|
|
172
|
+
// MODEL DELIVERY CONFIGURATION
|
|
173
|
+
// =============================================================================
|
|
174
|
+
|
|
175
|
+
function envBool(name, defaultValue) {
|
|
176
|
+
const raw = (process.env[name] ?? '').trim().toLowerCase();
|
|
177
|
+
if (raw === '0' || raw === 'false' || raw === 'off') return false;
|
|
178
|
+
if (raw === '1' || raw === 'true' || raw === 'on') return true;
|
|
179
|
+
return defaultValue;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
export const MODEL_DELIVERY_CONFIG = {
|
|
183
|
+
// Allow models to be downloaded at runtime.
|
|
184
|
+
// Priority: explicit env var > init config (.sweet-search/config.json) > default true.
|
|
185
|
+
allowRuntimeModelDownload: (() => {
|
|
186
|
+
// Explicit env var always wins
|
|
187
|
+
if (process.env.SWEET_SEARCH_ALLOW_RUNTIME_DOWNLOAD !== undefined) {
|
|
188
|
+
return envBool('SWEET_SEARCH_ALLOW_RUNTIME_DOWNLOAD', true);
|
|
189
|
+
}
|
|
190
|
+
// Check init config written by `sweet-search init`
|
|
191
|
+
try {
|
|
192
|
+
const initConfigPath = path.join(PROJECT_ROOT, DATA_DIR_NAME, 'config.json');
|
|
193
|
+
if (existsSync(initConfigPath)) {
|
|
194
|
+
const initConfig = JSON.parse(readFileSync(initConfigPath, 'utf-8'));
|
|
195
|
+
if (initConfig.runtime?.allowRuntimeModelDownload !== undefined) {
|
|
196
|
+
return initConfig.runtime.allowRuntimeModelDownload;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
} catch { /* init config is optional — fall through to default */ }
|
|
200
|
+
return true; // backward-compatible default
|
|
201
|
+
})(),
|
|
202
|
+
|
|
203
|
+
// Managed model cache root directory
|
|
204
|
+
modelCacheRoot: process.env.SWEET_SEARCH_MODEL_CACHE
|
|
205
|
+
|| path.join(os.homedir(), '.cache', 'sweet-search', 'models'),
|
|
206
|
+
|
|
207
|
+
// HuggingFace endpoint (overrideable for enterprise mirrors/proxies)
|
|
208
|
+
hfEndpoint: process.env.SWEET_SEARCH_HF_ENDPOINT || 'https://huggingface.co',
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
// =============================================================================
|
|
212
|
+
// LOGGING
|
|
213
|
+
// =============================================================================
|
|
214
|
+
|
|
215
|
+
// Global quiet mode flag - set by CLI tools when --quiet is passed
|
|
216
|
+
let _quietMode = false;
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Set quiet mode globally. When enabled:
|
|
220
|
+
* - timing logs are suppressed
|
|
221
|
+
* - verbose logs are suppressed
|
|
222
|
+
* - Only errors and essential output are shown
|
|
223
|
+
*
|
|
224
|
+
* @param {boolean} enabled - Whether quiet mode should be enabled
|
|
225
|
+
*/
|
|
226
|
+
export function setQuietMode(enabled) {
|
|
227
|
+
_quietMode = enabled;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Check if quiet mode is currently enabled.
|
|
232
|
+
* @returns {boolean} - True if quiet mode is enabled
|
|
233
|
+
*/
|
|
234
|
+
export function isQuietMode() {
|
|
235
|
+
return _quietMode;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
export const LOGGING = {
|
|
239
|
+
// Verbose logging (detailed operation info)
|
|
240
|
+
get verbose() {
|
|
241
|
+
return !_quietMode && process.env.SEARCH_VERBOSE === 'true';
|
|
242
|
+
},
|
|
243
|
+
|
|
244
|
+
// Timing logs (performance measurements)
|
|
245
|
+
// Only enabled when SEARCH_TIMING=true AND not in quiet mode
|
|
246
|
+
get timing() {
|
|
247
|
+
return !_quietMode && process.env.SEARCH_TIMING === 'true';
|
|
248
|
+
},
|
|
249
|
+
|
|
250
|
+
// Debug logs (developer debugging)
|
|
251
|
+
get debug() {
|
|
252
|
+
return process.env.SEARCH_DEBUG === 'true';
|
|
253
|
+
},
|
|
254
|
+
};
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ranking Configuration — reranking, cascade scoring, late interaction.
|
|
3
|
+
* Split from core/config.js during DDD migration.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const VOYAGEAI_API_KEY = process.env.VOYAGEAI_API_KEY || '';
|
|
7
|
+
const JINA_API_KEY = process.env.JINA_API_KEY || '';
|
|
8
|
+
|
|
9
|
+
// =============================================================================
|
|
10
|
+
// RERANKING CONFIGURATION (Tiered)
|
|
11
|
+
// =============================================================================
|
|
12
|
+
|
|
13
|
+
export const RERANK_CONFIG = {
|
|
14
|
+
// Shared settings for remote rerankers
|
|
15
|
+
timeout: 30000, // 30s timeout for API calls (prevents indefinite hangs)
|
|
16
|
+
maxRetries: 2, // Retry transient failures up to 2 times
|
|
17
|
+
retryDelayMs: 100, // Base delay for exponential backoff (100, 200, 400ms)
|
|
18
|
+
maxDocTruncation: { // Per-document truncation limits
|
|
19
|
+
voyage: 4000, // Voyage has stricter token limits
|
|
20
|
+
jina: 8000, // Jina v3 has 131K context, but limit per-doc for efficiency
|
|
21
|
+
},
|
|
22
|
+
|
|
23
|
+
// Tier 1: Voyage Rerank-2.5 (same quality as 2, 2x faster latency)
|
|
24
|
+
// Reference: https://agentset.ai/rerankers (Dec 2025 benchmarks)
|
|
25
|
+
voyage: {
|
|
26
|
+
enabled: VOYAGEAI_API_KEY.length > 0,
|
|
27
|
+
priority: 1,
|
|
28
|
+
model: 'rerank-2.5',
|
|
29
|
+
endpoint: 'https://api.voyageai.com/v1/rerank',
|
|
30
|
+
apiKey: VOYAGEAI_API_KEY,
|
|
31
|
+
maxDocuments: 100,
|
|
32
|
+
topK: 20,
|
|
33
|
+
},
|
|
34
|
+
|
|
35
|
+
// Tier 2: Jina Reranker v3 (listwise, 0.6B params, SOTA BEIR 61.94)
|
|
36
|
+
// "Last but not late" interaction - arXiv:2509.25085
|
|
37
|
+
// Reference: https://jina.ai/reranker
|
|
38
|
+
jina: {
|
|
39
|
+
enabled: JINA_API_KEY.length > 0,
|
|
40
|
+
priority: 2,
|
|
41
|
+
model: 'jina-reranker-v3',
|
|
42
|
+
endpoint: 'https://api.jina.ai/v1/rerank',
|
|
43
|
+
apiKey: JINA_API_KEY,
|
|
44
|
+
maxDocuments: 100,
|
|
45
|
+
topK: 20,
|
|
46
|
+
contextLength: 131072, // 131K context window (64 docs simultaneously)
|
|
47
|
+
},
|
|
48
|
+
|
|
49
|
+
// Tier 3: FlashRank (manual fallback only)
|
|
50
|
+
flashrank: {
|
|
51
|
+
enabled: true,
|
|
52
|
+
priority: 99,
|
|
53
|
+
model: 'ms-marco-TinyBERT-L-2-v2',
|
|
54
|
+
maxDocLength: 512,
|
|
55
|
+
},
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
// =============================================================================
|
|
59
|
+
// LOCAL RERANKER CONFIGURATION (GTE ModernBERT INT8)
|
|
60
|
+
//
|
|
61
|
+
// Architecture:
|
|
62
|
+
// - Model: Alibaba-NLP/gte-reranker-modernbert-base (INT8 quantized)
|
|
63
|
+
// - Library: Direct onnxruntime-node + native-tokenizer.js
|
|
64
|
+
// - Inference: Sequential scoring with global ONNX mutex (onnx-mutex.js)
|
|
65
|
+
// - Latency: ~700ms for 50 docs (~14ms/doc after warmup), ~15s cold start
|
|
66
|
+
//
|
|
67
|
+
// Default direct reranker priority: Local ModernBERT → Jina API → Voyage API
|
|
68
|
+
// =============================================================================
|
|
69
|
+
|
|
70
|
+
export const LOCAL_RERANKER_CONFIG = {
|
|
71
|
+
// Master switch for local reranker — DISABLED by default.
|
|
72
|
+
//
|
|
73
|
+
// Measured on gencodesearchnet (6000q, 2026-04-22): CE rerank produces
|
|
74
|
+
// different rankings (Kendall τ varies −1.0 to +0.33 vs baseline) but
|
|
75
|
+
// delivers ZERO MRR/Recall improvement while costing ~3× wall-clock
|
|
76
|
+
// latency (3021s vs 1046s). The int8 HNSW rescore + optional LI MaxSim
|
|
77
|
+
// already put ground-truth answers near the top; CE has nothing left
|
|
78
|
+
// to rescue on this benchmark.
|
|
79
|
+
//
|
|
80
|
+
// Infra kept intact — flip to true (or set env SWEET_SEARCH_ENABLE_LOCAL_RERANKER=1)
|
|
81
|
+
// to A/B a new CE model without re-adding the code. Remote Voyage/Jina
|
|
82
|
+
// still activate via their API keys if present.
|
|
83
|
+
useLocalReranker: false,
|
|
84
|
+
|
|
85
|
+
// Model settings
|
|
86
|
+
model: {
|
|
87
|
+
name: 'gte-reranker-modernbert-base-int8',
|
|
88
|
+
huggingfaceId: 'Alibaba-NLP/gte-reranker-modernbert-base',
|
|
89
|
+
dtype: 'q8', // INT8 quantization (~150MB, no AVX512 required)
|
|
90
|
+
path: 'models/gte-reranker-int8', // HuggingFace cache directory
|
|
91
|
+
maxLength: 512, // Max tokens per query-document pair
|
|
92
|
+
},
|
|
93
|
+
|
|
94
|
+
// Direct reranker settings
|
|
95
|
+
stage2: {
|
|
96
|
+
candidateCount: 50,
|
|
97
|
+
requestTimeout: 10000, // 10s timeout per request
|
|
98
|
+
},
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Check if local reranker should be used.
|
|
103
|
+
*
|
|
104
|
+
* DEFAULT: TRUE (local reranker enabled by default)
|
|
105
|
+
* Set LOCAL_RERANKER_CONFIG.useLocalReranker = false to use Voyage/Jina API instead.
|
|
106
|
+
*
|
|
107
|
+
* Priority when enabled: Local ModernBERT > Voyage API > Jina API
|
|
108
|
+
*
|
|
109
|
+
* @returns {boolean} True if local reranker should be used
|
|
110
|
+
*/
|
|
111
|
+
export function shouldUseLocalReranker() {
|
|
112
|
+
// Env-var kill switch takes precedence (set SWEET_SEARCH_DISABLE_LOCAL_RERANKER=1).
|
|
113
|
+
const disable = (process.env.SWEET_SEARCH_DISABLE_LOCAL_RERANKER ?? '').trim().toLowerCase();
|
|
114
|
+
if (disable === '1' || disable === 'true' || disable === 'on') return false;
|
|
115
|
+
// Opt-in via env (local reranker is OFF by default — see LOCAL_RERANKER_CONFIG).
|
|
116
|
+
const enable = (process.env.SWEET_SEARCH_ENABLE_LOCAL_RERANKER ?? '').trim().toLowerCase();
|
|
117
|
+
if (enable === '1' || enable === 'true' || enable === 'on') return true;
|
|
118
|
+
return LOCAL_RERANKER_CONFIG.useLocalReranker;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Jina Reranker helpers (separate from embeddings)
|
|
122
|
+
export function isJinaRerankerAvailable() {
|
|
123
|
+
return RERANK_CONFIG.jina.enabled;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function getJinaRerankerApiKey() {
|
|
127
|
+
return RERANK_CONFIG.jina.apiKey;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// =============================================================================
|
|
131
|
+
// CASCADE SCORING CONFIGURATION (Section 26)
|
|
132
|
+
// =============================================================================
|
|
133
|
+
// Streamlined semantic pipeline: HNSW → expand → MaxSim → gate → conditional CE.
|
|
134
|
+
// Default-on. Set SWEET_SEARCH_CASCADE_ENABLED=false to opt out.
|
|
135
|
+
|
|
136
|
+
export const CASCADE_CONFIG = {
|
|
137
|
+
// Cascade = MaxSim gate → conditional CE rerank.
|
|
138
|
+
// DISABLED by default alongside the CE reranker — benchmarks show the
|
|
139
|
+
// cascade produces no MRR/Recall gain over plain HNSW+Int8 on
|
|
140
|
+
// gencodesearchnet while costing ~3× latency.
|
|
141
|
+
// Opt in with SWEET_SEARCH_CASCADE_ENABLED=true.
|
|
142
|
+
enabled: process.env.SWEET_SEARCH_CASCADE_ENABLED === 'true',
|
|
143
|
+
|
|
144
|
+
// MaxSim score gap threshold for decisive classification
|
|
145
|
+
gateThreshold: parseFloat(process.env.SWEET_SEARCH_CASCADE_GATE_THRESHOLD) || 0.08,
|
|
146
|
+
|
|
147
|
+
// K_max for adaptive-K candidate selection (actual K is 3..ceTopK based on score distribution)
|
|
148
|
+
ceTopK: parseInt(process.env.SWEET_SEARCH_CASCADE_CE_TOP_K) || 20,
|
|
149
|
+
|
|
150
|
+
// Whether to force cross-encoder on ALL candidates (bypass gate, for benchmarking)
|
|
151
|
+
forceFullCrossEncoder: process.env.SWEET_SEARCH_FORCE_FULL_CE === 'true',
|
|
152
|
+
|
|
153
|
+
// Shadow mode DISABLED by default — no background CE compute at all.
|
|
154
|
+
// Opt in with SWEET_SEARCH_CASCADE_SHADOW=true to resume data collection
|
|
155
|
+
// for identifying the CE-helpful query subset.
|
|
156
|
+
shadowMode: process.env.SWEET_SEARCH_CASCADE_SHADOW === 'true',
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
// =============================================================================
|
|
160
|
+
// LATE INTERACTION CONFIGURATION
|
|
161
|
+
// =============================================================================
|
|
162
|
+
|
|
163
|
+
export const LATE_INTERACTION_CONFIG = {
|
|
164
|
+
// false = disabled, 'lateon-code' = full 149M, 'lateon-code-edge' = 17M
|
|
165
|
+
model: process.env.SWEET_SEARCH_LATE_INTERACTION_MODEL || 'lateon-code',
|
|
166
|
+
|
|
167
|
+
get enabled() {
|
|
168
|
+
return !!this.model && this.model !== 'false';
|
|
169
|
+
},
|
|
170
|
+
|
|
171
|
+
// Per-model maxDocLength is overridable via SWEET_SEARCH_LI_MAX_DOC_LENGTH.
|
|
172
|
+
// Lower caps (e.g. 1024) trade some long-context recall for ~4× less
|
|
173
|
+
// attention compute on the long-chunk tail (attention is O(seq²)).
|
|
174
|
+
// The A/B benchmark for the cap lives in `eval/run_benchmark.js`.
|
|
175
|
+
models: {
|
|
176
|
+
'lateon-code': {
|
|
177
|
+
hfId: 'lightonai/LateOn-Code',
|
|
178
|
+
onnxFile: 'model_int8.onnx', // 150 MB INT8
|
|
179
|
+
backboneDim: 768, // raw ModernBERT hidden size
|
|
180
|
+
tokenDimension: 128, // final output after projection
|
|
181
|
+
projectionPaths: ['1_Dense/model.safetensors'], // 768→128 single stage
|
|
182
|
+
maxQueryLength: 256,
|
|
183
|
+
get maxDocLength() {
|
|
184
|
+
const env = parseInt(process.env.SWEET_SEARCH_LI_MAX_DOC_LENGTH || '', 10);
|
|
185
|
+
return Number.isFinite(env) && env > 0 ? env : 2048;
|
|
186
|
+
},
|
|
187
|
+
queryPrefix: '[Q] ',
|
|
188
|
+
docPrefix: '[D] ',
|
|
189
|
+
},
|
|
190
|
+
'lateon-code-edge': {
|
|
191
|
+
hfId: 'lightonai/LateOn-Code-edge',
|
|
192
|
+
onnxFile: 'model.onnx', // 68 MB FP32
|
|
193
|
+
backboneDim: 256, // raw ModernBERT hidden size
|
|
194
|
+
tokenDimension: 48, // final output after 2-stage projection
|
|
195
|
+
projectionPaths: ['1_Dense/model.safetensors', '2_Dense/model.safetensors'], // 256→512→48
|
|
196
|
+
maxQueryLength: 256,
|
|
197
|
+
get maxDocLength() {
|
|
198
|
+
const env = parseInt(process.env.SWEET_SEARCH_LI_MAX_DOC_LENGTH || '', 10);
|
|
199
|
+
return Number.isFinite(env) && env > 0 ? env : 2048;
|
|
200
|
+
},
|
|
201
|
+
queryPrefix: '[Q] ',
|
|
202
|
+
docPrefix: '[D] ',
|
|
203
|
+
},
|
|
204
|
+
},
|
|
205
|
+
|
|
206
|
+
get activeModel() { return this.models[this.model] || null; },
|
|
207
|
+
get tokenDimension() { return this.activeModel?.tokenDimension || 128; },
|
|
208
|
+
get hfModelId() { return this.activeModel?.hfId || null; },
|
|
209
|
+
|
|
210
|
+
// Storage default: int4 per-token quantization for LI tokens.
|
|
211
|
+
// Query-time behavior is restored from persisted index metadata.
|
|
212
|
+
quantization: 'int4',
|
|
213
|
+
blendWeight: 0.3, // tune per-model in Phase 5
|
|
214
|
+
|
|
215
|
+
// 32 skiplist punctuation characters (filtered from doc tokens, NOT query tokens)
|
|
216
|
+
skiplistChars: new Set([
|
|
217
|
+
'!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.',
|
|
218
|
+
'/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`',
|
|
219
|
+
'{', '|', '}', '~',
|
|
220
|
+
]),
|
|
221
|
+
};
|