@hasna/search 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +602 -128
- package/dist/db/index-migrations.d.ts.map +1 -1
- package/dist/db/results.d.ts +2 -0
- package/dist/db/results.d.ts.map +1 -1
- package/dist/db/searches.d.ts +1 -0
- package/dist/db/searches.d.ts.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +602 -126
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/local/find.d.ts +1 -1
- package/dist/lib/local/find.d.ts.map +1 -1
- package/dist/lib/local/indexer.d.ts +6 -0
- package/dist/lib/local/indexer.d.ts.map +1 -1
- package/dist/lib/local/query.d.ts.map +1 -1
- package/dist/lib/router.d.ts +10 -0
- package/dist/lib/router.d.ts.map +1 -0
- package/dist/lib/search.d.ts +1 -0
- package/dist/lib/search.d.ts.map +1 -1
- package/dist/mcp/index.js +608 -130
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/server/index.js +611 -131
- package/dist/server/serve.d.ts.map +1 -1
- package/dist/types/index.d.ts +22 -0
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/mcp/index.js
CHANGED
|
@@ -11644,7 +11644,7 @@ var require_lib2 = __commonJS((exports, module) => {
|
|
|
11644
11644
|
var require_package = __commonJS((exports, module) => {
|
|
11645
11645
|
module.exports = {
|
|
11646
11646
|
name: "@hasna/search",
|
|
11647
|
-
version: "0.0.
|
|
11647
|
+
version: "0.0.11",
|
|
11648
11648
|
description: "Unified search \u2014 local file index (find files by name/path/content/regex in ms, trigram FTS) + 12 web providers (Google, SerpAPI, Exa, Perplexity, Twitter, Reddit, YouTube, Brave, Bing, Hacker News, GitHub, arXiv) + YouTube transcription. CLI + MCP + REST API + Dashboard.",
|
|
11649
11649
|
type: "module",
|
|
11650
11650
|
main: "dist/index.js",
|
|
@@ -24742,12 +24742,19 @@ var DEFAULT_CONFIG = {
|
|
|
24742
24742
|
defaultLimit: 10,
|
|
24743
24743
|
defaultProviders: [],
|
|
24744
24744
|
defaultProfile: null,
|
|
24745
|
+
router: {
|
|
24746
|
+
enabled: false,
|
|
24747
|
+
model: "gpt-oss-120b",
|
|
24748
|
+
maxProviders: 3,
|
|
24749
|
+
timeoutMs: 1200
|
|
24750
|
+
},
|
|
24745
24751
|
transcriber: {
|
|
24746
24752
|
baseUrl: "http://localhost:19600",
|
|
24747
24753
|
fallbackCli: "microservice-transcriber"
|
|
24748
24754
|
},
|
|
24749
24755
|
dedup: true,
|
|
24750
24756
|
maxConcurrent: 5,
|
|
24757
|
+
providerTimeoutMs: 15000,
|
|
24751
24758
|
indexStaleMinutes: 5,
|
|
24752
24759
|
indexAutoRefresh: true,
|
|
24753
24760
|
recordLocalResults: false
|
|
@@ -25482,6 +25489,31 @@ var migrations = [
|
|
|
25482
25489
|
);
|
|
25483
25490
|
`);
|
|
25484
25491
|
}
|
|
25492
|
+
},
|
|
25493
|
+
{
|
|
25494
|
+
version: 2,
|
|
25495
|
+
description: "Local file index filter indexes",
|
|
25496
|
+
up: (db) => {
|
|
25497
|
+
db.exec(`
|
|
25498
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_ext ON files(root_id, ext);
|
|
25499
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_dir ON files(root_id, dir);
|
|
25500
|
+
`);
|
|
25501
|
+
}
|
|
25502
|
+
},
|
|
25503
|
+
{
|
|
25504
|
+
version: 3,
|
|
25505
|
+
description: "Local content short-token filter grams",
|
|
25506
|
+
up: (db) => {
|
|
25507
|
+
db.exec(`
|
|
25508
|
+
CREATE TABLE IF NOT EXISTS file_content_grams (
|
|
25509
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
25510
|
+
gram TEXT NOT NULL,
|
|
25511
|
+
PRIMARY KEY (file_id, gram)
|
|
25512
|
+
);
|
|
25513
|
+
CREATE INDEX IF NOT EXISTS idx_file_content_grams_gram_file
|
|
25514
|
+
ON file_content_grams(gram, file_id);
|
|
25515
|
+
`);
|
|
25516
|
+
}
|
|
25485
25517
|
}
|
|
25486
25518
|
];
|
|
25487
25519
|
function runIndexMigrations(db) {
|
|
@@ -25553,7 +25585,18 @@ function getConfig() {
|
|
|
25553
25585
|
try {
|
|
25554
25586
|
const raw = readFileSync(path, "utf-8");
|
|
25555
25587
|
const parsed = JSON.parse(raw);
|
|
25556
|
-
return {
|
|
25588
|
+
return {
|
|
25589
|
+
...DEFAULT_CONFIG,
|
|
25590
|
+
...parsed,
|
|
25591
|
+
router: {
|
|
25592
|
+
...DEFAULT_CONFIG.router,
|
|
25593
|
+
...parsed.router ?? {}
|
|
25594
|
+
},
|
|
25595
|
+
transcriber: {
|
|
25596
|
+
...DEFAULT_CONFIG.transcriber,
|
|
25597
|
+
...parsed.transcriber ?? {}
|
|
25598
|
+
}
|
|
25599
|
+
};
|
|
25557
25600
|
} catch {
|
|
25558
25601
|
return { ...DEFAULT_CONFIG };
|
|
25559
25602
|
}
|
|
@@ -25971,6 +26014,7 @@ function removeRoot(idOrPath, db) {
|
|
|
25971
26014
|
d.exec("BEGIN");
|
|
25972
26015
|
try {
|
|
25973
26016
|
d.prepare("DELETE FROM file_content_fts WHERE rowid IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
26017
|
+
d.prepare("DELETE FROM file_content_grams WHERE file_id IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
25974
26018
|
d.prepare("DELETE FROM index_roots WHERE id = ?").run(root.id);
|
|
25975
26019
|
d.exec("COMMIT");
|
|
25976
26020
|
} catch (err) {
|
|
@@ -25982,6 +26026,21 @@ function removeRoot(idOrPath, db) {
|
|
|
25982
26026
|
function shouldIndexContent(root, file) {
|
|
25983
26027
|
return root.contentIndexing && file.size > 0 && file.size <= root.maxFileSize && !hasBinaryExtension(file.ext) && !isContentExcluded(file.name);
|
|
25984
26028
|
}
|
|
26029
|
+
function contentShortGrams(body) {
|
|
26030
|
+
const grams = new Set;
|
|
26031
|
+
const words = body.toLowerCase().matchAll(/[a-z0-9_$]+/g);
|
|
26032
|
+
for (const match of words) {
|
|
26033
|
+
const word = match[0];
|
|
26034
|
+
for (let i = 0;i < word.length; i++) {
|
|
26035
|
+
grams.add(word[i]);
|
|
26036
|
+
if (i + 1 < word.length)
|
|
26037
|
+
grams.add(word.slice(i, i + 2));
|
|
26038
|
+
}
|
|
26039
|
+
if (grams.size >= 2048)
|
|
26040
|
+
break;
|
|
26041
|
+
}
|
|
26042
|
+
return [...grams];
|
|
26043
|
+
}
|
|
25985
26044
|
function indexRoot(idOrPath, opts = {}, db) {
|
|
25986
26045
|
const d = db ?? getIndexDb();
|
|
25987
26046
|
const root = getRoot(idOrPath, d);
|
|
@@ -26000,6 +26059,8 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
26000
26059
|
const deleteFile = d.prepare("DELETE FROM files WHERE id = ?");
|
|
26001
26060
|
const insertContent = d.prepare("INSERT INTO file_content_fts (rowid, body) VALUES (?, ?)");
|
|
26002
26061
|
const deleteContent = d.prepare("DELETE FROM file_content_fts WHERE rowid = ?");
|
|
26062
|
+
const insertContentGram = d.prepare("INSERT OR IGNORE INTO file_content_grams (file_id, gram) VALUES (?, ?)");
|
|
26063
|
+
const deleteContentGrams = d.prepare("DELETE FROM file_content_grams WHERE file_id = ?");
|
|
26003
26064
|
const stats = {
|
|
26004
26065
|
rootId: root.id,
|
|
26005
26066
|
added: 0,
|
|
@@ -26010,38 +26071,57 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
26010
26071
|
skippedDirs: skippedDirs.length,
|
|
26011
26072
|
durationMs: 0
|
|
26012
26073
|
};
|
|
26074
|
+
const seen = new Set;
|
|
26075
|
+
const changes = [];
|
|
26076
|
+
for (const file of scanned) {
|
|
26077
|
+
seen.add(file.relPath);
|
|
26078
|
+
const prev = existing.get(file.relPath);
|
|
26079
|
+
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
26080
|
+
if (prev && !changed && !opts.force)
|
|
26081
|
+
continue;
|
|
26082
|
+
const wantContent = shouldIndexContent(root, file);
|
|
26083
|
+
const absPath = `${root.path}/${file.relPath}`;
|
|
26084
|
+
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
26085
|
+
let body = null;
|
|
26086
|
+
if (wantContent && !isBinary) {
|
|
26087
|
+
try {
|
|
26088
|
+
body = readFileSync3(absPath, "utf-8");
|
|
26089
|
+
} catch {
|
|
26090
|
+
isBinary = true;
|
|
26091
|
+
}
|
|
26092
|
+
}
|
|
26093
|
+
changes.push({
|
|
26094
|
+
file,
|
|
26095
|
+
prev,
|
|
26096
|
+
isBinary,
|
|
26097
|
+
body,
|
|
26098
|
+
grams: body !== null ? contentShortGrams(body) : [],
|
|
26099
|
+
contentIndexed: body !== null ? 1 : 0
|
|
26100
|
+
});
|
|
26101
|
+
}
|
|
26013
26102
|
d.exec("BEGIN");
|
|
26014
26103
|
try {
|
|
26015
|
-
const
|
|
26016
|
-
for (const file of scanned) {
|
|
26017
|
-
seen.add(file.relPath);
|
|
26018
|
-
const prev = existing.get(file.relPath);
|
|
26019
|
-
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
26020
|
-
if (prev && !changed && !opts.force)
|
|
26021
|
-
continue;
|
|
26022
|
-
const wantContent = shouldIndexContent(root, file);
|
|
26023
|
-
const absPath = `${root.path}/${file.relPath}`;
|
|
26024
|
-
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
26025
|
-
let body = null;
|
|
26026
|
-
if (wantContent && !isBinary) {
|
|
26027
|
-
try {
|
|
26028
|
-
body = readFileSync3(absPath, "utf-8");
|
|
26029
|
-
} catch {
|
|
26030
|
-
isBinary = true;
|
|
26031
|
-
}
|
|
26032
|
-
}
|
|
26033
|
-
const contentIndexed = body !== null ? 1 : 0;
|
|
26104
|
+
for (const { file, prev, isBinary, body, grams, contentIndexed } of changes) {
|
|
26034
26105
|
if (prev) {
|
|
26035
|
-
if (prev.content_indexed)
|
|
26106
|
+
if (prev.content_indexed) {
|
|
26036
26107
|
deleteContent.run(prev.id);
|
|
26108
|
+
deleteContentGrams.run(prev.id);
|
|
26109
|
+
}
|
|
26037
26110
|
updateFile.run(file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now, prev.id);
|
|
26038
|
-
if (body !== null)
|
|
26111
|
+
if (body !== null) {
|
|
26039
26112
|
insertContent.run(prev.id, body);
|
|
26113
|
+
for (const gram of grams)
|
|
26114
|
+
insertContentGram.run(prev.id, gram);
|
|
26115
|
+
}
|
|
26040
26116
|
stats.updated++;
|
|
26041
26117
|
} else {
|
|
26042
26118
|
const inserted = insertFile.run(root.id, file.relPath, file.name, file.ext, file.dir, file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now);
|
|
26043
|
-
if (body !== null)
|
|
26044
|
-
|
|
26119
|
+
if (body !== null) {
|
|
26120
|
+
const fileId = Number(inserted.lastInsertRowid);
|
|
26121
|
+
insertContent.run(fileId, body);
|
|
26122
|
+
for (const gram of grams)
|
|
26123
|
+
insertContentGram.run(fileId, gram);
|
|
26124
|
+
}
|
|
26045
26125
|
stats.added++;
|
|
26046
26126
|
}
|
|
26047
26127
|
if (contentIndexed)
|
|
@@ -26050,8 +26130,10 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
26050
26130
|
for (const [relPath, row] of existing) {
|
|
26051
26131
|
if (seen.has(relPath))
|
|
26052
26132
|
continue;
|
|
26053
|
-
if (row.content_indexed)
|
|
26133
|
+
if (row.content_indexed) {
|
|
26054
26134
|
deleteContent.run(row.id);
|
|
26135
|
+
deleteContentGrams.run(row.id);
|
|
26136
|
+
}
|
|
26055
26137
|
deleteFile.run(row.id);
|
|
26056
26138
|
stats.deleted++;
|
|
26057
26139
|
}
|
|
@@ -26073,6 +26155,9 @@ function indexAllRoots(opts = {}, db) {
|
|
|
26073
26155
|
return listRoots(db).map((root) => indexRoot(root.id, opts, db));
|
|
26074
26156
|
}
|
|
26075
26157
|
var refreshing = new Set;
|
|
26158
|
+
var lastDefaultAutoRefreshCheckAt = 0;
|
|
26159
|
+
var AUTO_REFRESH_CHECK_THROTTLE_MS = 1000;
|
|
26160
|
+
var defaultRefreshScheduled = false;
|
|
26076
26161
|
function refreshStaleRoots(staleMinutes, db) {
|
|
26077
26162
|
const cutoff = Date.now() - staleMinutes * 60000;
|
|
26078
26163
|
const stats = [];
|
|
@@ -26096,8 +26181,31 @@ function autoRefreshStaleRoots(db) {
|
|
|
26096
26181
|
const config2 = getConfig();
|
|
26097
26182
|
if (!config2.indexAutoRefresh)
|
|
26098
26183
|
return [];
|
|
26184
|
+
if (!db) {
|
|
26185
|
+
const now = Date.now();
|
|
26186
|
+
if (now - lastDefaultAutoRefreshCheckAt < AUTO_REFRESH_CHECK_THROTTLE_MS)
|
|
26187
|
+
return [];
|
|
26188
|
+
lastDefaultAutoRefreshCheckAt = now;
|
|
26189
|
+
}
|
|
26099
26190
|
return refreshStaleRoots(config2.indexStaleMinutes, db);
|
|
26100
26191
|
}
|
|
26192
|
+
function scheduleAutoRefreshStaleRoots(db) {
|
|
26193
|
+
if (db)
|
|
26194
|
+
return autoRefreshStaleRoots(db);
|
|
26195
|
+
const config2 = getConfig();
|
|
26196
|
+
if (!config2.indexAutoRefresh || defaultRefreshScheduled)
|
|
26197
|
+
return [];
|
|
26198
|
+
defaultRefreshScheduled = true;
|
|
26199
|
+
const timer = setTimeout(() => {
|
|
26200
|
+
try {
|
|
26201
|
+
autoRefreshStaleRoots();
|
|
26202
|
+
} catch {} finally {
|
|
26203
|
+
defaultRefreshScheduled = false;
|
|
26204
|
+
}
|
|
26205
|
+
}, 0);
|
|
26206
|
+
timer.unref?.();
|
|
26207
|
+
return [];
|
|
26208
|
+
}
|
|
26101
26209
|
function startBackgroundRefresh() {
|
|
26102
26210
|
const minutes = Math.max(1, getConfig().indexStaleMinutes);
|
|
26103
26211
|
const timer = setInterval(() => {
|
|
@@ -26336,6 +26444,9 @@ function compileSearchRegex(pattern, caseSensitive = false) {
|
|
|
26336
26444
|
// src/lib/local/query.ts
|
|
26337
26445
|
var MAX_LINE_LENGTH = 200;
|
|
26338
26446
|
var MAX_MATCHES_PER_FILE = 5;
|
|
26447
|
+
var MAX_PATH_CANDIDATES = 20000;
|
|
26448
|
+
var MAX_CONTENT_CANDIDATES = 50000;
|
|
26449
|
+
var MAX_REGEX_CANDIDATES = 50000;
|
|
26339
26450
|
function tokenize(query) {
|
|
26340
26451
|
return query.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "").split(/\s+/).filter(Boolean);
|
|
26341
26452
|
}
|
|
@@ -26366,11 +26477,40 @@ function filterClauses(opts, db) {
|
|
|
26366
26477
|
}
|
|
26367
26478
|
if (opts.dir) {
|
|
26368
26479
|
clauses.push("f.dir LIKE ? ESCAPE '\\'");
|
|
26369
|
-
const dir = opts.dir.replace(/^\/|\/$/g, "")
|
|
26480
|
+
const dir = escapeLike(opts.dir.replace(/^\/|\/$/g, ""));
|
|
26370
26481
|
params.push(`%${dir}%`);
|
|
26371
26482
|
}
|
|
26372
26483
|
return { sql: clauses.length > 0 ? ` AND ${clauses.join(" AND ")}` : "", params };
|
|
26373
26484
|
}
|
|
26485
|
+
function escapeLike(value) {
|
|
26486
|
+
return value.replace(/[\\%_]/g, "\\$&");
|
|
26487
|
+
}
|
|
26488
|
+
function shortTokenClauses(tokens) {
|
|
26489
|
+
if (tokens.length === 0)
|
|
26490
|
+
return { sql: "", params: [] };
|
|
26491
|
+
return {
|
|
26492
|
+
sql: ` AND ${tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ")}`,
|
|
26493
|
+
params: tokens.map((token) => `%${escapeLike(token)}%`)
|
|
26494
|
+
};
|
|
26495
|
+
}
|
|
26496
|
+
function contentGramClauses(tokens) {
|
|
26497
|
+
const gramTokens = tokens.filter((token) => /^[a-z0-9_$]{1,2}$/.test(token));
|
|
26498
|
+
if (gramTokens.length === 0)
|
|
26499
|
+
return { sql: "", params: [] };
|
|
26500
|
+
return {
|
|
26501
|
+
sql: gramTokens.map((_token, index) => ` AND (
|
|
26502
|
+
NOT EXISTS (
|
|
26503
|
+
SELECT 1 FROM file_content_grams cg_any_${index}
|
|
26504
|
+
WHERE cg_any_${index}.file_id = f.id
|
|
26505
|
+
)
|
|
26506
|
+
OR EXISTS (
|
|
26507
|
+
SELECT 1 FROM file_content_grams cg_${index}
|
|
26508
|
+
WHERE cg_${index}.file_id = f.id AND cg_${index}.gram = ?
|
|
26509
|
+
)
|
|
26510
|
+
)`).join(""),
|
|
26511
|
+
params: gramTokens
|
|
26512
|
+
};
|
|
26513
|
+
}
|
|
26374
26514
|
function rowToHit(row, score) {
|
|
26375
26515
|
return {
|
|
26376
26516
|
rootId: row.root_id,
|
|
@@ -26440,6 +26580,8 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
26440
26580
|
return [];
|
|
26441
26581
|
const ftsQuery = buildFtsQuery(query);
|
|
26442
26582
|
const filters = filterClauses(opts, d);
|
|
26583
|
+
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
26584
|
+
const shortFilters = shortTokenClauses(shortTokens);
|
|
26443
26585
|
const candidateLimit = Math.max(200, limit * 10);
|
|
26444
26586
|
let rows;
|
|
26445
26587
|
if (ftsQuery) {
|
|
@@ -26447,16 +26589,16 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
26447
26589
|
FROM files_fts fts
|
|
26448
26590
|
JOIN files f ON f.id = fts.rowid
|
|
26449
26591
|
JOIN index_roots r ON r.id = f.root_id
|
|
26450
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
26592
|
+
WHERE files_fts MATCH ?${filters.sql}${shortFilters.sql}
|
|
26451
26593
|
ORDER BY bm25(files_fts, 10.0, 1.0)
|
|
26452
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, candidateLimit);
|
|
26453
|
-
const namePattern = `${query.trim()
|
|
26594
|
+
LIMIT ?`).all(ftsQuery, ...filters.params, ...shortFilters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
26595
|
+
const namePattern = `${escapeLike(query.trim())}%`;
|
|
26454
26596
|
const nameRows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26455
26597
|
FROM files f
|
|
26456
26598
|
JOIN index_roots r ON r.id = f.root_id
|
|
26457
|
-
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}
|
|
26599
|
+
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}${shortFilters.sql}
|
|
26458
26600
|
ORDER BY length(f.name)
|
|
26459
|
-
LIMIT 100`).all(namePattern, ...filters.params);
|
|
26601
|
+
LIMIT 100`).all(namePattern, ...filters.params, ...shortFilters.params);
|
|
26460
26602
|
const seen = new Set(rows.map((row) => row.id));
|
|
26461
26603
|
for (const row of nameRows) {
|
|
26462
26604
|
if (!seen.has(row.id))
|
|
@@ -26464,14 +26606,14 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
26464
26606
|
}
|
|
26465
26607
|
} else {
|
|
26466
26608
|
const likeClauses = tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ");
|
|
26467
|
-
const likeParams = tokens.map((t) => `%${t
|
|
26609
|
+
const likeParams = tokens.map((t) => `%${escapeLike(t)}%`);
|
|
26468
26610
|
rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26469
26611
|
FROM files f
|
|
26470
26612
|
JOIN index_roots r ON r.id = f.root_id
|
|
26471
26613
|
WHERE ${likeClauses}${filters.sql}
|
|
26472
|
-
|
|
26614
|
+
ORDER BY length(f.name), length(f.rel_path), f.rel_path
|
|
26615
|
+
LIMIT ?`).all(...likeParams, ...filters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
26473
26616
|
}
|
|
26474
|
-
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
26475
26617
|
const filtered = shortTokens.length > 0 ? rows.filter((row) => shortTokens.every((t) => row.rel_path.toLowerCase().includes(t))) : rows;
|
|
26476
26618
|
return filtered.map((row) => rowToHit(row, scoreFileName(query, tokens, row))).sort((a, b) => b.score - a.score).filter((hit) => existsSync2(hit.absPath)).slice(0, limit);
|
|
26477
26619
|
}
|
|
@@ -26509,24 +26651,31 @@ function searchFilePathsRegex(pattern, opts = {}, db) {
|
|
|
26509
26651
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'handle.*Click', not '\\w+').");
|
|
26510
26652
|
}
|
|
26511
26653
|
const filters = filterClauses(opts, d);
|
|
26512
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26513
|
-
FROM files_fts fts
|
|
26514
|
-
JOIN files f ON f.id = fts.rowid
|
|
26515
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
26516
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
26517
|
-
ORDER BY fts.rank
|
|
26518
|
-
LIMIT 5000`).all(ftsQuery, ...filters.params);
|
|
26519
26654
|
const hits = [];
|
|
26520
|
-
|
|
26521
|
-
|
|
26522
|
-
|
|
26523
|
-
|
|
26524
|
-
|
|
26525
|
-
|
|
26526
|
-
|
|
26527
|
-
|
|
26528
|
-
|
|
26529
|
-
if (
|
|
26655
|
+
const pageSize = Math.max(500, limit * 20);
|
|
26656
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
26657
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26658
|
+
FROM files_fts fts
|
|
26659
|
+
JOIN files f ON f.id = fts.rowid
|
|
26660
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
26661
|
+
WHERE files_fts MATCH ?${filters.sql}
|
|
26662
|
+
ORDER BY fts.rank
|
|
26663
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
26664
|
+
if (rows.length === 0)
|
|
26665
|
+
break;
|
|
26666
|
+
for (const row of rows) {
|
|
26667
|
+
if (!regex.test(row.rel_path) && !regex.test(row.name))
|
|
26668
|
+
continue;
|
|
26669
|
+
const depth = row.rel_path.split("/").length - 1;
|
|
26670
|
+
const score = Math.max(0.05, 0.6 - depth * 0.02);
|
|
26671
|
+
const hit = rowToHit(row, score);
|
|
26672
|
+
if (!existsSync2(hit.absPath))
|
|
26673
|
+
continue;
|
|
26674
|
+
hits.push(hit);
|
|
26675
|
+
if (hits.length >= limit)
|
|
26676
|
+
break;
|
|
26677
|
+
}
|
|
26678
|
+
if (rows.length < pageSize)
|
|
26530
26679
|
break;
|
|
26531
26680
|
}
|
|
26532
26681
|
return hits;
|
|
@@ -26540,40 +26689,48 @@ function searchFileContentRegex(pattern, opts = {}, db) {
|
|
|
26540
26689
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'export.*function', not '\\d+').");
|
|
26541
26690
|
}
|
|
26542
26691
|
const filters = filterClauses(opts, d);
|
|
26543
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26544
|
-
FROM file_content_fts fts
|
|
26545
|
-
JOIN files f ON f.id = fts.rowid
|
|
26546
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
26547
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
26548
|
-
ORDER BY fts.rank
|
|
26549
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(200, limit * 10));
|
|
26550
26692
|
const hits = [];
|
|
26551
|
-
|
|
26552
|
-
|
|
26553
|
-
const
|
|
26554
|
-
|
|
26555
|
-
|
|
26556
|
-
|
|
26557
|
-
|
|
26558
|
-
|
|
26559
|
-
|
|
26560
|
-
|
|
26693
|
+
const pageSize = Math.max(200, limit * 10);
|
|
26694
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
26695
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26696
|
+
FROM file_content_fts fts
|
|
26697
|
+
JOIN files f ON f.id = fts.rowid
|
|
26698
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
26699
|
+
WHERE file_content_fts MATCH ?${filters.sql}
|
|
26700
|
+
ORDER BY fts.rank
|
|
26701
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
26702
|
+
if (rows.length === 0)
|
|
26703
|
+
break;
|
|
26704
|
+
for (let i = 0;i < rows.length && hits.length < limit; i++) {
|
|
26705
|
+
const row = rows[i];
|
|
26706
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
26707
|
+
let content;
|
|
26708
|
+
try {
|
|
26709
|
+
content = readFileSync4(absPath, "utf-8");
|
|
26710
|
+
} catch {
|
|
26711
|
+
continue;
|
|
26712
|
+
}
|
|
26713
|
+
const lines = content.split(`
|
|
26561
26714
|
`);
|
|
26562
|
-
|
|
26563
|
-
|
|
26564
|
-
|
|
26565
|
-
|
|
26715
|
+
const matches = [];
|
|
26716
|
+
for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
|
|
26717
|
+
if (regex.test(lines[n])) {
|
|
26718
|
+
matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
|
|
26719
|
+
}
|
|
26566
26720
|
}
|
|
26721
|
+
if (matches.length === 0)
|
|
26722
|
+
continue;
|
|
26723
|
+
const rankIndex = offset + i;
|
|
26724
|
+
const score = Math.max(0.25, 0.65 - rankIndex * 0.05);
|
|
26725
|
+
hits.push({
|
|
26726
|
+
...rowToHit(row, score),
|
|
26727
|
+
line: matches[0].line,
|
|
26728
|
+
lineText: matches[0].text,
|
|
26729
|
+
matches
|
|
26730
|
+
});
|
|
26567
26731
|
}
|
|
26568
|
-
if (
|
|
26569
|
-
|
|
26570
|
-
const score = Math.max(0.25, 0.65 - i * 0.05);
|
|
26571
|
-
hits.push({
|
|
26572
|
-
...rowToHit(row, score),
|
|
26573
|
-
line: matches[0].line,
|
|
26574
|
-
lineText: matches[0].text,
|
|
26575
|
-
matches
|
|
26576
|
-
});
|
|
26732
|
+
if (rows.length < pageSize)
|
|
26733
|
+
break;
|
|
26577
26734
|
}
|
|
26578
26735
|
return hits;
|
|
26579
26736
|
}
|
|
@@ -26584,42 +26741,51 @@ function searchFileContent(query, opts = {}, db) {
|
|
|
26584
26741
|
if (!ftsQuery)
|
|
26585
26742
|
return [];
|
|
26586
26743
|
const filters = filterClauses(opts, d);
|
|
26587
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26588
|
-
FROM file_content_fts fts
|
|
26589
|
-
JOIN files f ON f.id = fts.rowid
|
|
26590
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
26591
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
26592
|
-
ORDER BY fts.rank
|
|
26593
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(50, limit * 3));
|
|
26594
26744
|
const tokens = tokenize(query);
|
|
26595
26745
|
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
26746
|
+
const gramFilters = contentGramClauses(shortTokens);
|
|
26596
26747
|
const scored = [];
|
|
26597
|
-
|
|
26598
|
-
|
|
26599
|
-
const
|
|
26600
|
-
|
|
26601
|
-
|
|
26602
|
-
|
|
26603
|
-
|
|
26604
|
-
|
|
26605
|
-
|
|
26606
|
-
if (
|
|
26607
|
-
|
|
26608
|
-
|
|
26748
|
+
const pageSize = Math.max(50, limit * 3);
|
|
26749
|
+
for (let offset = 0;scored.length < limit * 2 && offset < MAX_CONTENT_CANDIDATES; offset += pageSize) {
|
|
26750
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26751
|
+
FROM file_content_fts fts
|
|
26752
|
+
JOIN files f ON f.id = fts.rowid
|
|
26753
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
26754
|
+
WHERE file_content_fts MATCH ?${filters.sql}${gramFilters.sql}
|
|
26755
|
+
ORDER BY fts.rank
|
|
26756
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, ...gramFilters.params, pageSize, offset);
|
|
26757
|
+
if (rows.length === 0)
|
|
26758
|
+
break;
|
|
26759
|
+
for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
|
|
26760
|
+
const row = rows[i];
|
|
26761
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
26762
|
+
let content;
|
|
26763
|
+
try {
|
|
26764
|
+
content = readFileSync4(absPath, "utf-8");
|
|
26765
|
+
} catch {
|
|
26609
26766
|
continue;
|
|
26767
|
+
}
|
|
26768
|
+
if (shortTokens.length > 0) {
|
|
26769
|
+
const lower = content.toLowerCase();
|
|
26770
|
+
if (!shortTokens.every((t) => lower.includes(t)))
|
|
26771
|
+
continue;
|
|
26772
|
+
}
|
|
26773
|
+
const { matches, tier } = findLineMatches(content, query, tokens);
|
|
26774
|
+
if (matches.length === 0)
|
|
26775
|
+
continue;
|
|
26776
|
+
const rankIndex = offset + i;
|
|
26777
|
+
const base = Math.max(0.25, 0.55 - rankIndex * 0.04);
|
|
26778
|
+
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
26779
|
+
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
26780
|
+
scored.push({
|
|
26781
|
+
...rowToHit(row, score),
|
|
26782
|
+
line: matches[0].line,
|
|
26783
|
+
lineText: matches[0].text,
|
|
26784
|
+
matches
|
|
26785
|
+
});
|
|
26610
26786
|
}
|
|
26611
|
-
|
|
26612
|
-
|
|
26613
|
-
continue;
|
|
26614
|
-
const base = Math.max(0.25, 0.55 - i * 0.04);
|
|
26615
|
-
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
26616
|
-
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
26617
|
-
scored.push({
|
|
26618
|
-
...rowToHit(row, score),
|
|
26619
|
-
line: matches[0].line,
|
|
26620
|
-
lineText: matches[0].text,
|
|
26621
|
-
matches
|
|
26622
|
-
});
|
|
26787
|
+
if (rows.length < pageSize)
|
|
26788
|
+
break;
|
|
26623
26789
|
}
|
|
26624
26790
|
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
26625
26791
|
}
|
|
@@ -26632,7 +26798,7 @@ class FilesProvider {
|
|
|
26632
26798
|
return hasReadyRoot();
|
|
26633
26799
|
}
|
|
26634
26800
|
async search(query, options) {
|
|
26635
|
-
|
|
26801
|
+
scheduleAutoRefreshStaleRoots();
|
|
26636
26802
|
const hits = searchFilePaths(query, { limit: options?.limit ?? 10 });
|
|
26637
26803
|
return hits.map((hit) => ({
|
|
26638
26804
|
title: hit.name,
|
|
@@ -26660,7 +26826,7 @@ class ContentProvider {
|
|
|
26660
26826
|
return hasReadyRoot();
|
|
26661
26827
|
}
|
|
26662
26828
|
async search(query, options) {
|
|
26663
|
-
|
|
26829
|
+
scheduleAutoRefreshStaleRoots();
|
|
26664
26830
|
const hits = searchFileContent(query, { limit: options?.limit ?? 10 });
|
|
26665
26831
|
return hits.map((hit) => ({
|
|
26666
26832
|
title: hit.name,
|
|
@@ -26787,7 +26953,7 @@ function rowToSearch(row) {
|
|
|
26787
26953
|
}
|
|
26788
26954
|
function createSearch(data, db) {
|
|
26789
26955
|
const d = db ?? getDb();
|
|
26790
|
-
const id = generateId();
|
|
26956
|
+
const id = data.id ?? generateId();
|
|
26791
26957
|
const now = new Date().toISOString();
|
|
26792
26958
|
d.prepare(`INSERT INTO searches (id, query, providers, profile_id, result_count, duration, created_at)
|
|
26793
26959
|
VALUES (?, ?, ?, ?, ?, ?, ?)`).run(id, data.query, JSON.stringify(data.providers), data.profileId ?? null, data.resultCount ?? 0, data.duration ?? 0, now);
|
|
@@ -26869,7 +27035,7 @@ function createResults(results, db) {
|
|
|
26869
27035
|
d.exec("BEGIN");
|
|
26870
27036
|
try {
|
|
26871
27037
|
for (const data of results) {
|
|
26872
|
-
const id = generateId();
|
|
27038
|
+
const id = data.id ?? generateId();
|
|
26873
27039
|
stmt.run(id, data.searchId, data.title, data.url, data.snippet, data.source, data.provider, data.rank, data.score ?? null, data.publishedAt ?? null, data.thumbnail ?? null, JSON.stringify(data.metadata ?? {}), now);
|
|
26874
27040
|
created.push({
|
|
26875
27041
|
id,
|
|
@@ -27026,13 +27192,300 @@ function isProviderConfigured(provider) {
|
|
|
27026
27192
|
return !!Bun.env[provider.apiKeyEnv];
|
|
27027
27193
|
}
|
|
27028
27194
|
|
|
27195
|
+
// src/lib/router.ts
|
|
27196
|
+
var PROVIDER_DESCRIPTIONS = {
|
|
27197
|
+
files: "Local file names and paths. Best for known filenames, path fragments, extensions, and repo navigation.",
|
|
27198
|
+
content: "Local indexed file contents. Best for code symbols, exact phrases, docs, snippets, and grep-style discovery.",
|
|
27199
|
+
google: "General web search through SerpAPI. Best for broad web coverage and current public pages.",
|
|
27200
|
+
serpapi: "SerpAPI multi-engine web search. Best for general web queries when Google-style results are desired.",
|
|
27201
|
+
exa: "Neural/semantic web search. Best for research, conceptual queries, docs, and high-relevance pages.",
|
|
27202
|
+
perplexity: "Answer-oriented web research with citations. Best for synthesized factual questions and research summaries.",
|
|
27203
|
+
brave: "General independent web search. Best for current web, news-like, product, and navigational queries.",
|
|
27204
|
+
bing: "General web search. Best for current web and Microsoft/Bing-indexed pages.",
|
|
27205
|
+
twitter: "X/Twitter search. Best for tweets, social reactions, breaking discourse, and people posting updates.",
|
|
27206
|
+
reddit: "Reddit search. Best for opinions, product experiences, troubleshooting threads, and community recommendations.",
|
|
27207
|
+
youtube: "YouTube search. Best for videos, tutorials, talks, demos, and channels.",
|
|
27208
|
+
hackernews: "Hacker News search. Best for startup, programming, launch, and technical discussion threads.",
|
|
27209
|
+
github: "GitHub code and repository search. Best for open-source repos, code examples, packages, and implementation details.",
|
|
27210
|
+
arxiv: "arXiv academic search. Best for papers, preprints, ML/AI/math/physics research, and scholarly topics."
|
|
27211
|
+
};
|
|
27212
|
+
function clampMaxProviders(value) {
|
|
27213
|
+
if (value === undefined || !Number.isFinite(value))
|
|
27214
|
+
return 3;
|
|
27215
|
+
return Math.max(1, Math.min(5, Math.floor(value)));
|
|
27216
|
+
}
|
|
27217
|
+
function clampConfidence(value) {
|
|
27218
|
+
return typeof value === "number" && Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.5;
|
|
27219
|
+
}
|
|
27220
|
+
function normalizeCandidates(candidates) {
|
|
27221
|
+
const allowed = new Set(PROVIDER_NAMES);
|
|
27222
|
+
const seen = new Set;
|
|
27223
|
+
const normalized = [];
|
|
27224
|
+
for (const candidate of candidates) {
|
|
27225
|
+
if (!allowed.has(candidate) || seen.has(candidate))
|
|
27226
|
+
continue;
|
|
27227
|
+
seen.add(candidate);
|
|
27228
|
+
normalized.push(candidate);
|
|
27229
|
+
}
|
|
27230
|
+
return normalized;
|
|
27231
|
+
}
|
|
27232
|
+
function addScore(scores, candidateSet, provider, amount) {
|
|
27233
|
+
if (!candidateSet.has(provider))
|
|
27234
|
+
return;
|
|
27235
|
+
scores.set(provider, (scores.get(provider) ?? 0) + amount);
|
|
27236
|
+
}
|
|
27237
|
+
function hasAny(query, patterns) {
|
|
27238
|
+
return patterns.some((pattern) => pattern.test(query));
|
|
27239
|
+
}
|
|
27240
|
+
function routeSearchProvidersHeuristic(query, candidates, options = {}) {
|
|
27241
|
+
const normalized = normalizeCandidates(candidates);
|
|
27242
|
+
const maxProviders = clampMaxProviders(options.maxProviders);
|
|
27243
|
+
if (normalized.length === 0) {
|
|
27244
|
+
return {
|
|
27245
|
+
strategy: "heuristic",
|
|
27246
|
+
selectedProviders: [],
|
|
27247
|
+
candidates: [],
|
|
27248
|
+
reason: "No configured providers were available to route.",
|
|
27249
|
+
confidence: 0
|
|
27250
|
+
};
|
|
27251
|
+
}
|
|
27252
|
+
const candidateSet = new Set(normalized);
|
|
27253
|
+
const scores = new Map;
|
|
27254
|
+
const reasons = [];
|
|
27255
|
+
const q = query.trim().toLowerCase();
|
|
27256
|
+
for (const candidate of normalized)
|
|
27257
|
+
scores.set(candidate, 0.05);
|
|
27258
|
+
if (hasAny(q, [
|
|
27259
|
+
/\b(file|filename|path|folder|directory|repo|workspace)\b/,
|
|
27260
|
+
/(^|[/\s])[\w.-]+\.(ts|tsx|js|jsx|py|rs|go|md|json|yaml|yml|css|html)\b/
|
|
27261
|
+
])) {
|
|
27262
|
+
addScore(scores, candidateSet, "files", 5);
|
|
27263
|
+
addScore(scores, candidateSet, "content", 3);
|
|
27264
|
+
reasons.push("query looks local-file oriented");
|
|
27265
|
+
}
|
|
27266
|
+
if (hasAny(q, [
|
|
27267
|
+
/\b(function|class|interface|type|const|import|export|error|stack|symbol|grep|regex)\b/,
|
|
27268
|
+
/[A-Za-z_$][\w$]*\([^)]*\)/,
|
|
27269
|
+
/[A-Za-z_$][\w$]*::[A-Za-z_$]/
|
|
27270
|
+
])) {
|
|
27271
|
+
addScore(scores, candidateSet, "content", 5);
|
|
27272
|
+
addScore(scores, candidateSet, "files", 2);
|
|
27273
|
+
addScore(scores, candidateSet, "github", 1.5);
|
|
27274
|
+
reasons.push("query contains code/content lookup signals");
|
|
27275
|
+
}
|
|
27276
|
+
if (hasAny(q, [/\b(paper|papers|arxiv|preprint|doi|citation|survey|benchmark|research)\b/])) {
|
|
27277
|
+
addScore(scores, candidateSet, "arxiv", 5);
|
|
27278
|
+
addScore(scores, candidateSet, "exa", 3);
|
|
27279
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
27280
|
+
reasons.push("query asks for scholarly or research material");
|
|
27281
|
+
}
|
|
27282
|
+
if (hasAny(q, [/\b(github|repo|repository|source code|open source|package|library|sdk|api example)\b/])) {
|
|
27283
|
+
addScore(scores, candidateSet, "github", 5);
|
|
27284
|
+
addScore(scores, candidateSet, "exa", 2);
|
|
27285
|
+
reasons.push("query asks for code or repository material");
|
|
27286
|
+
}
|
|
27287
|
+
if (hasAny(q, [/\b(video|youtube|tutorial|demo|talk|lecture|channel)\b/])) {
|
|
27288
|
+
addScore(scores, candidateSet, "youtube", 5);
|
|
27289
|
+
reasons.push("query asks for video material");
|
|
27290
|
+
}
|
|
27291
|
+
if (hasAny(q, [/\b(reddit|subreddit|opinion|experience|reviews?|worth it|recommendations?)\b/])) {
|
|
27292
|
+
addScore(scores, candidateSet, "reddit", 5);
|
|
27293
|
+
addScore(scores, candidateSet, "hackernews", 1.5);
|
|
27294
|
+
reasons.push("query asks for community discussion");
|
|
27295
|
+
}
|
|
27296
|
+
if (hasAny(q, [/\b(hacker news|hn|show hn|launch|startup)\b/])) {
|
|
27297
|
+
addScore(scores, candidateSet, "hackernews", 5);
|
|
27298
|
+
reasons.push("query asks for Hacker News style discussion");
|
|
27299
|
+
}
|
|
27300
|
+
if (hasAny(q, [/\b(twitter|tweet|tweets|x\.com|social reaction|trending)\b/])) {
|
|
27301
|
+
addScore(scores, candidateSet, "twitter", 5);
|
|
27302
|
+
reasons.push("query asks for social posts");
|
|
27303
|
+
}
|
|
27304
|
+
if (hasAny(q, [/\b(latest|today|yesterday|news|current|2025|2026|price|release|launched)\b/])) {
|
|
27305
|
+
addScore(scores, candidateSet, "brave", 3);
|
|
27306
|
+
addScore(scores, candidateSet, "bing", 2.5);
|
|
27307
|
+
addScore(scores, candidateSet, "google", 2.5);
|
|
27308
|
+
addScore(scores, candidateSet, "serpapi", 2);
|
|
27309
|
+
reasons.push("query appears time-sensitive");
|
|
27310
|
+
}
|
|
27311
|
+
if (reasons.length === 0) {
|
|
27312
|
+
addScore(scores, candidateSet, "exa", 2.5);
|
|
27313
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
27314
|
+
addScore(scores, candidateSet, "brave", 1.5);
|
|
27315
|
+
addScore(scores, candidateSet, "google", 1.5);
|
|
27316
|
+
addScore(scores, candidateSet, "hackernews", 0.75);
|
|
27317
|
+
reasons.push("general query fallback");
|
|
27318
|
+
}
|
|
27319
|
+
const selectedProviders = [...scores.entries()].sort((a, b) => b[1] - a[1] || normalized.indexOf(a[0]) - normalized.indexOf(b[0])).slice(0, Math.min(maxProviders, normalized.length)).map(([provider]) => provider);
|
|
27320
|
+
const topScore = scores.get(selectedProviders[0]) ?? 0;
|
|
27321
|
+
const confidence = Math.max(0.35, Math.min(0.9, topScore / 6));
|
|
27322
|
+
return {
|
|
27323
|
+
strategy: "heuristic",
|
|
27324
|
+
selectedProviders,
|
|
27325
|
+
candidates: normalized,
|
|
27326
|
+
reason: reasons.join("; "),
|
|
27327
|
+
confidence
|
|
27328
|
+
};
|
|
27329
|
+
}
|
|
27330
|
+
function routerSchema(candidates, maxProviders) {
|
|
27331
|
+
return {
|
|
27332
|
+
type: "object",
|
|
27333
|
+
properties: {
|
|
27334
|
+
selectedProviders: {
|
|
27335
|
+
type: "array",
|
|
27336
|
+
items: { type: "string", enum: candidates },
|
|
27337
|
+
minItems: 1,
|
|
27338
|
+
maxItems: maxProviders
|
|
27339
|
+
},
|
|
27340
|
+
reason: { type: "string" },
|
|
27341
|
+
confidence: { type: "number", minimum: 0, maximum: 1 }
|
|
27342
|
+
},
|
|
27343
|
+
required: ["selectedProviders", "reason", "confidence"],
|
|
27344
|
+
additionalProperties: false
|
|
27345
|
+
};
|
|
27346
|
+
}
|
|
27347
|
+
function parseCerebrasRouting(raw, candidates, maxProviders) {
|
|
27348
|
+
let parsed;
|
|
27349
|
+
try {
|
|
27350
|
+
parsed = JSON.parse(raw);
|
|
27351
|
+
} catch {
|
|
27352
|
+
return null;
|
|
27353
|
+
}
|
|
27354
|
+
if (!Array.isArray(parsed.selectedProviders))
|
|
27355
|
+
return null;
|
|
27356
|
+
const candidateSet = new Set(candidates);
|
|
27357
|
+
const selectedProviders = parsed.selectedProviders.filter((provider) => typeof provider === "string" && candidateSet.has(provider)).slice(0, maxProviders);
|
|
27358
|
+
if (selectedProviders.length === 0)
|
|
27359
|
+
return null;
|
|
27360
|
+
return {
|
|
27361
|
+
selectedProviders,
|
|
27362
|
+
reason: typeof parsed.reason === "string" ? parsed.reason : "Cerebras router selected providers.",
|
|
27363
|
+
confidence: clampConfidence(parsed.confidence)
|
|
27364
|
+
};
|
|
27365
|
+
}
|
|
27366
|
+
async function routeWithCerebras(query, candidates, options) {
|
|
27367
|
+
const apiKey = Bun.env.CEREBRAS_API_KEY;
|
|
27368
|
+
if (!apiKey) {
|
|
27369
|
+
return {
|
|
27370
|
+
...routeSearchProvidersHeuristic(query, candidates, options),
|
|
27371
|
+
error: "CEREBRAS_API_KEY is not configured; used heuristic routing."
|
|
27372
|
+
};
|
|
27373
|
+
}
|
|
27374
|
+
const providerGuide = candidates.map((name) => ({
|
|
27375
|
+
name,
|
|
27376
|
+
description: PROVIDER_DESCRIPTIONS[name]
|
|
27377
|
+
}));
|
|
27378
|
+
const res = await fetch("https://api.cerebras.ai/v1/chat/completions", {
|
|
27379
|
+
method: "POST",
|
|
27380
|
+
signal: AbortSignal.timeout(options.timeoutMs),
|
|
27381
|
+
headers: {
|
|
27382
|
+
"Content-Type": "application/json",
|
|
27383
|
+
Authorization: `Bearer ${apiKey}`
|
|
27384
|
+
},
|
|
27385
|
+
body: JSON.stringify({
|
|
27386
|
+
model: options.model,
|
|
27387
|
+
temperature: 0,
|
|
27388
|
+
messages: [
|
|
27389
|
+
{
|
|
27390
|
+
role: "system",
|
|
27391
|
+
content: "You route a search query to the smallest useful set of available search providers. Select only listed providers. Prefer local providers for local files/code in the indexed workspace. Prefer scholarly, code, video, social, or web providers when the query clearly asks for those domains."
|
|
27392
|
+
},
|
|
27393
|
+
{
|
|
27394
|
+
role: "user",
|
|
27395
|
+
content: JSON.stringify({
|
|
27396
|
+
query,
|
|
27397
|
+
maxProviders: options.maxProviders,
|
|
27398
|
+
providers: providerGuide
|
|
27399
|
+
})
|
|
27400
|
+
}
|
|
27401
|
+
],
|
|
27402
|
+
response_format: {
|
|
27403
|
+
type: "json_schema",
|
|
27404
|
+
json_schema: {
|
|
27405
|
+
name: "search_router",
|
|
27406
|
+
strict: true,
|
|
27407
|
+
schema: routerSchema(candidates, options.maxProviders)
|
|
27408
|
+
}
|
|
27409
|
+
}
|
|
27410
|
+
})
|
|
27411
|
+
});
|
|
27412
|
+
if (!res.ok) {
|
|
27413
|
+
throw new Error(`Cerebras router error: ${res.status} ${res.statusText}`);
|
|
27414
|
+
}
|
|
27415
|
+
const data = await res.json();
|
|
27416
|
+
const content = data.choices?.[0]?.message?.content;
|
|
27417
|
+
if (!content)
|
|
27418
|
+
throw new Error("Cerebras router returned no content");
|
|
27419
|
+
const parsed = parseCerebrasRouting(content, candidates, options.maxProviders);
|
|
27420
|
+
if (!parsed)
|
|
27421
|
+
throw new Error("Cerebras router returned invalid provider selection");
|
|
27422
|
+
return {
|
|
27423
|
+
strategy: "cerebras",
|
|
27424
|
+
candidates,
|
|
27425
|
+
...parsed
|
|
27426
|
+
};
|
|
27427
|
+
}
|
|
27428
|
+
async function routeSearchProviders(query, candidates, options = {}) {
|
|
27429
|
+
const normalized = normalizeCandidates(candidates);
|
|
27430
|
+
const maxProviders = Math.min(clampMaxProviders(options.maxProviders), Math.max(1, normalized.length));
|
|
27431
|
+
const timeoutMs = options.timeoutMs && Number.isFinite(options.timeoutMs) ? Math.max(250, Math.floor(options.timeoutMs)) : 1200;
|
|
27432
|
+
const model = options.model ?? Bun.env.CEREBRAS_MODEL ?? "gpt-oss-120b";
|
|
27433
|
+
if (normalized.length === 0) {
|
|
27434
|
+
return routeSearchProvidersHeuristic(query, normalized, { maxProviders });
|
|
27435
|
+
}
|
|
27436
|
+
try {
|
|
27437
|
+
return await routeWithCerebras(query, normalized, { maxProviders, timeoutMs, model });
|
|
27438
|
+
} catch (err) {
|
|
27439
|
+
return {
|
|
27440
|
+
...routeSearchProvidersHeuristic(query, normalized, { maxProviders }),
|
|
27441
|
+
error: err instanceof Error ? err.message : String(err)
|
|
27442
|
+
};
|
|
27443
|
+
}
|
|
27444
|
+
}
|
|
27445
|
+
|
|
27029
27446
|
// src/lib/search.ts
|
|
27447
|
+
async function withTimeout(promise2, timeoutMs, label) {
|
|
27448
|
+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
|
|
27449
|
+
return promise2;
|
|
27450
|
+
let timer;
|
|
27451
|
+
try {
|
|
27452
|
+
return await Promise.race([
|
|
27453
|
+
promise2,
|
|
27454
|
+
new Promise((_resolve, reject) => {
|
|
27455
|
+
timer = setTimeout(() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
27456
|
+
timer.unref?.();
|
|
27457
|
+
})
|
|
27458
|
+
]);
|
|
27459
|
+
} finally {
|
|
27460
|
+
if (timer)
|
|
27461
|
+
clearTimeout(timer);
|
|
27462
|
+
}
|
|
27463
|
+
}
|
|
27464
|
+
async function allSettledLimited(items, concurrency, task) {
|
|
27465
|
+
const results = new Array(items.length);
|
|
27466
|
+
let next = 0;
|
|
27467
|
+
async function worker() {
|
|
27468
|
+
while (next < items.length) {
|
|
27469
|
+
const index = next++;
|
|
27470
|
+
const item = items[index];
|
|
27471
|
+
try {
|
|
27472
|
+
results[index] = { status: "fulfilled", value: await task(item) };
|
|
27473
|
+
} catch (reason) {
|
|
27474
|
+
results[index] = { status: "rejected", reason };
|
|
27475
|
+
}
|
|
27476
|
+
}
|
|
27477
|
+
}
|
|
27478
|
+
const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length);
|
|
27479
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
27480
|
+
return results;
|
|
27481
|
+
}
|
|
27030
27482
|
async function unifiedSearch(query, opts = {}) {
|
|
27031
27483
|
const config2 = getConfig();
|
|
27032
27484
|
const startTime = Date.now();
|
|
27033
27485
|
const db = opts.db;
|
|
27034
27486
|
let providerNames = opts.providers ?? [];
|
|
27035
|
-
|
|
27487
|
+
const smartProfile = opts.profile === "smart";
|
|
27488
|
+
if (opts.profile && !smartProfile) {
|
|
27036
27489
|
const profile = getProfileByName(opts.profile, db);
|
|
27037
27490
|
if (profile) {
|
|
27038
27491
|
providerNames = profile.providers;
|
|
@@ -27048,7 +27501,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
27048
27501
|
}
|
|
27049
27502
|
const errors4 = [];
|
|
27050
27503
|
const explicitRequest = (opts.providers?.length ?? 0) > 0 || Boolean(opts.profile);
|
|
27051
|
-
|
|
27504
|
+
let activeProviders = providerNames.filter((name) => {
|
|
27052
27505
|
try {
|
|
27053
27506
|
if (getProvider(name).isConfigured())
|
|
27054
27507
|
return true;
|
|
@@ -27059,20 +27512,36 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
27059
27512
|
});
|
|
27060
27513
|
}
|
|
27061
27514
|
return false;
|
|
27062
|
-
} catch {
|
|
27515
|
+
} catch (err) {
|
|
27516
|
+
if (explicitRequest) {
|
|
27517
|
+
errors4.push({
|
|
27518
|
+
provider: name,
|
|
27519
|
+
error: err instanceof Error ? err.message : "unknown provider"
|
|
27520
|
+
});
|
|
27521
|
+
}
|
|
27063
27522
|
return false;
|
|
27064
27523
|
}
|
|
27065
27524
|
});
|
|
27525
|
+
const routingRequested = opts.smart === true || smartProfile || !explicitRequest && config2.router.enabled;
|
|
27526
|
+
let routing;
|
|
27527
|
+
if (routingRequested && activeProviders.length > 0) {
|
|
27528
|
+
routing = await routeSearchProviders(query, activeProviders, {
|
|
27529
|
+
maxProviders: config2.router.maxProviders,
|
|
27530
|
+
timeoutMs: config2.router.timeoutMs,
|
|
27531
|
+
model: config2.router.model
|
|
27532
|
+
});
|
|
27533
|
+
activeProviders = routing.selectedProviders;
|
|
27534
|
+
}
|
|
27066
27535
|
const searchOptions = {
|
|
27067
27536
|
limit: config2.defaultLimit,
|
|
27068
27537
|
...opts.options
|
|
27069
27538
|
};
|
|
27070
|
-
const results = await
|
|
27539
|
+
const results = await allSettledLimited(activeProviders, config2.maxConcurrent, async (name) => {
|
|
27071
27540
|
const provider = getProvider(name);
|
|
27072
|
-
const rawResults = await provider.search(query, searchOptions);
|
|
27541
|
+
const rawResults = await withTimeout(provider.search(query, searchOptions), config2.providerTimeoutMs, provider.displayName);
|
|
27073
27542
|
updateProviderLastUsed(name, db);
|
|
27074
27543
|
return { name, results: rawResults };
|
|
27075
|
-
})
|
|
27544
|
+
});
|
|
27076
27545
|
const allResults = [];
|
|
27077
27546
|
const searchId = generateId();
|
|
27078
27547
|
for (const result of results) {
|
|
@@ -27126,11 +27595,13 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
27126
27595
|
createdAt: new Date().toISOString()
|
|
27127
27596
|
},
|
|
27128
27597
|
results: finalResults,
|
|
27129
|
-
errors: errors4
|
|
27598
|
+
errors: errors4,
|
|
27599
|
+
...routing && { routing }
|
|
27130
27600
|
};
|
|
27131
27601
|
}
|
|
27132
27602
|
const persistable = config2.recordLocalResults ? finalResults : finalResults.filter((r) => !LOCAL_PROVIDER_NAMES.has(r.source));
|
|
27133
27603
|
const search = createSearch({
|
|
27604
|
+
id: searchId,
|
|
27134
27605
|
query,
|
|
27135
27606
|
providers: activeProviders,
|
|
27136
27607
|
resultCount: persistable.length,
|
|
@@ -27139,6 +27610,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
27139
27610
|
if (persistable.length > 0) {
|
|
27140
27611
|
createResults(persistable.map((r) => ({
|
|
27141
27612
|
searchId: search.id,
|
|
27613
|
+
id: r.id,
|
|
27142
27614
|
title: r.title,
|
|
27143
27615
|
url: r.url,
|
|
27144
27616
|
snippet: r.snippet,
|
|
@@ -27155,7 +27627,8 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
27155
27627
|
return {
|
|
27156
27628
|
search: { ...search, resultCount: finalResults.length, duration: duration3 },
|
|
27157
27629
|
results: finalResults,
|
|
27158
|
-
errors: errors4
|
|
27630
|
+
errors: errors4,
|
|
27631
|
+
...routing && { routing }
|
|
27159
27632
|
};
|
|
27160
27633
|
}
|
|
27161
27634
|
async function searchSingleProvider(provider, query, options, db) {
|
|
@@ -27389,7 +27862,7 @@ function findLocal(query, opts = {}, db) {
|
|
|
27389
27862
|
return { query, kind, indexed: false, roots: roots.length, total: 0, results: [] };
|
|
27390
27863
|
}
|
|
27391
27864
|
if (opts.refresh !== false)
|
|
27392
|
-
|
|
27865
|
+
scheduleAutoRefreshStaleRoots(db);
|
|
27393
27866
|
const queryOpts = {
|
|
27394
27867
|
root: opts.root,
|
|
27395
27868
|
ext: opts.ext,
|
|
@@ -28000,13 +28473,15 @@ function buildServer() {
|
|
|
28000
28473
|
providers: exports_external.array(SearchProviderNameSchema).optional().describe("Providers to search (default: all enabled)"),
|
|
28001
28474
|
profile: exports_external.string().optional().describe("Search profile name (e.g. research, social, code)"),
|
|
28002
28475
|
limit: exports_external.number().int().min(1).max(100).optional().describe("Max results per provider"),
|
|
28003
|
-
dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)")
|
|
28004
|
-
|
|
28476
|
+
dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)"),
|
|
28477
|
+
smart: exports_external.boolean().optional().describe("Route to the best configured providers before searching")
|
|
28478
|
+
}, async ({ query, providers, profile, limit, dedup, smart }) => {
|
|
28005
28479
|
const response = await unifiedSearch(query, {
|
|
28006
28480
|
providers,
|
|
28007
28481
|
profile,
|
|
28008
28482
|
options: limit ? { limit } : undefined,
|
|
28009
|
-
dedup
|
|
28483
|
+
dedup,
|
|
28484
|
+
smart
|
|
28010
28485
|
});
|
|
28011
28486
|
return {
|
|
28012
28487
|
content: [
|
|
@@ -28025,7 +28500,8 @@ function buildServer() {
|
|
|
28025
28500
|
source: r.source,
|
|
28026
28501
|
score: r.score
|
|
28027
28502
|
})),
|
|
28028
|
-
errors: response.errors
|
|
28503
|
+
errors: response.errors,
|
|
28504
|
+
routing: response.routing
|
|
28029
28505
|
}, null, 2)
|
|
28030
28506
|
}
|
|
28031
28507
|
]
|
|
@@ -28280,12 +28756,14 @@ function buildServer() {
|
|
|
28280
28756
|
default_limit: exports_external.number().int().optional(),
|
|
28281
28757
|
dedup: exports_external.boolean().optional(),
|
|
28282
28758
|
max_concurrent: exports_external.number().int().optional(),
|
|
28759
|
+
provider_timeout_ms: exports_external.number().int().optional(),
|
|
28283
28760
|
default_profile: exports_external.string().nullable().optional()
|
|
28284
28761
|
}, async (updates) => {
|
|
28285
28762
|
const config2 = setConfig({
|
|
28286
28763
|
...updates.default_limit !== undefined && { defaultLimit: updates.default_limit },
|
|
28287
28764
|
...updates.dedup !== undefined && { dedup: updates.dedup },
|
|
28288
28765
|
...updates.max_concurrent !== undefined && { maxConcurrent: updates.max_concurrent },
|
|
28766
|
+
...updates.provider_timeout_ms !== undefined && { providerTimeoutMs: updates.provider_timeout_ms },
|
|
28289
28767
|
...updates.default_profile !== undefined && { defaultProfile: updates.default_profile }
|
|
28290
28768
|
});
|
|
28291
28769
|
return {
|