@hasna/search 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +602 -128
- package/dist/db/index-migrations.d.ts.map +1 -1
- package/dist/db/results.d.ts +2 -0
- package/dist/db/results.d.ts.map +1 -1
- package/dist/db/searches.d.ts +1 -0
- package/dist/db/searches.d.ts.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +602 -126
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/local/find.d.ts +1 -1
- package/dist/lib/local/find.d.ts.map +1 -1
- package/dist/lib/local/indexer.d.ts +6 -0
- package/dist/lib/local/indexer.d.ts.map +1 -1
- package/dist/lib/local/query.d.ts.map +1 -1
- package/dist/lib/router.d.ts +10 -0
- package/dist/lib/router.d.ts.map +1 -0
- package/dist/lib/search.d.ts +1 -0
- package/dist/lib/search.d.ts.map +1 -1
- package/dist/mcp/index.js +608 -130
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/server/index.js +611 -131
- package/dist/server/serve.d.ts.map +1 -1
- package/dist/types/index.d.ts +22 -0
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/server/index.js
CHANGED
|
@@ -11644,7 +11644,7 @@ var require_lib2 = __commonJS((exports, module) => {
|
|
|
11644
11644
|
var require_package = __commonJS((exports, module) => {
|
|
11645
11645
|
module.exports = {
|
|
11646
11646
|
name: "@hasna/search",
|
|
11647
|
-
version: "0.0.
|
|
11647
|
+
version: "0.0.11",
|
|
11648
11648
|
description: "Unified search \u2014 local file index (find files by name/path/content/regex in ms, trigram FTS) + 12 web providers (Google, SerpAPI, Exa, Perplexity, Twitter, Reddit, YouTube, Brave, Bing, Hacker News, GitHub, arXiv) + YouTube transcription. CLI + MCP + REST API + Dashboard.",
|
|
11649
11649
|
type: "module",
|
|
11650
11650
|
main: "dist/index.js",
|
|
@@ -15756,12 +15756,19 @@ var DEFAULT_CONFIG = {
|
|
|
15756
15756
|
defaultLimit: 10,
|
|
15757
15757
|
defaultProviders: [],
|
|
15758
15758
|
defaultProfile: null,
|
|
15759
|
+
router: {
|
|
15760
|
+
enabled: false,
|
|
15761
|
+
model: "gpt-oss-120b",
|
|
15762
|
+
maxProviders: 3,
|
|
15763
|
+
timeoutMs: 1200
|
|
15764
|
+
},
|
|
15759
15765
|
transcriber: {
|
|
15760
15766
|
baseUrl: "http://localhost:19600",
|
|
15761
15767
|
fallbackCli: "microservice-transcriber"
|
|
15762
15768
|
},
|
|
15763
15769
|
dedup: true,
|
|
15764
15770
|
maxConcurrent: 5,
|
|
15771
|
+
providerTimeoutMs: 15000,
|
|
15765
15772
|
indexStaleMinutes: 5,
|
|
15766
15773
|
indexAutoRefresh: true,
|
|
15767
15774
|
recordLocalResults: false
|
|
@@ -16496,6 +16503,31 @@ var migrations = [
|
|
|
16496
16503
|
);
|
|
16497
16504
|
`);
|
|
16498
16505
|
}
|
|
16506
|
+
},
|
|
16507
|
+
{
|
|
16508
|
+
version: 2,
|
|
16509
|
+
description: "Local file index filter indexes",
|
|
16510
|
+
up: (db) => {
|
|
16511
|
+
db.exec(`
|
|
16512
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_ext ON files(root_id, ext);
|
|
16513
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_dir ON files(root_id, dir);
|
|
16514
|
+
`);
|
|
16515
|
+
}
|
|
16516
|
+
},
|
|
16517
|
+
{
|
|
16518
|
+
version: 3,
|
|
16519
|
+
description: "Local content short-token filter grams",
|
|
16520
|
+
up: (db) => {
|
|
16521
|
+
db.exec(`
|
|
16522
|
+
CREATE TABLE IF NOT EXISTS file_content_grams (
|
|
16523
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
16524
|
+
gram TEXT NOT NULL,
|
|
16525
|
+
PRIMARY KEY (file_id, gram)
|
|
16526
|
+
);
|
|
16527
|
+
CREATE INDEX IF NOT EXISTS idx_file_content_grams_gram_file
|
|
16528
|
+
ON file_content_grams(gram, file_id);
|
|
16529
|
+
`);
|
|
16530
|
+
}
|
|
16499
16531
|
}
|
|
16500
16532
|
];
|
|
16501
16533
|
function runIndexMigrations(db) {
|
|
@@ -16567,7 +16599,18 @@ function getConfig() {
|
|
|
16567
16599
|
try {
|
|
16568
16600
|
const raw = readFileSync(path, "utf-8");
|
|
16569
16601
|
const parsed = JSON.parse(raw);
|
|
16570
|
-
return {
|
|
16602
|
+
return {
|
|
16603
|
+
...DEFAULT_CONFIG,
|
|
16604
|
+
...parsed,
|
|
16605
|
+
router: {
|
|
16606
|
+
...DEFAULT_CONFIG.router,
|
|
16607
|
+
...parsed.router ?? {}
|
|
16608
|
+
},
|
|
16609
|
+
transcriber: {
|
|
16610
|
+
...DEFAULT_CONFIG.transcriber,
|
|
16611
|
+
...parsed.transcriber ?? {}
|
|
16612
|
+
}
|
|
16613
|
+
};
|
|
16571
16614
|
} catch {
|
|
16572
16615
|
return { ...DEFAULT_CONFIG };
|
|
16573
16616
|
}
|
|
@@ -16985,6 +17028,7 @@ function removeRoot(idOrPath, db) {
|
|
|
16985
17028
|
d.exec("BEGIN");
|
|
16986
17029
|
try {
|
|
16987
17030
|
d.prepare("DELETE FROM file_content_fts WHERE rowid IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
17031
|
+
d.prepare("DELETE FROM file_content_grams WHERE file_id IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
16988
17032
|
d.prepare("DELETE FROM index_roots WHERE id = ?").run(root.id);
|
|
16989
17033
|
d.exec("COMMIT");
|
|
16990
17034
|
} catch (err) {
|
|
@@ -16996,6 +17040,21 @@ function removeRoot(idOrPath, db) {
|
|
|
16996
17040
|
function shouldIndexContent(root, file) {
|
|
16997
17041
|
return root.contentIndexing && file.size > 0 && file.size <= root.maxFileSize && !hasBinaryExtension(file.ext) && !isContentExcluded(file.name);
|
|
16998
17042
|
}
|
|
17043
|
+
function contentShortGrams(body) {
|
|
17044
|
+
const grams = new Set;
|
|
17045
|
+
const words = body.toLowerCase().matchAll(/[a-z0-9_$]+/g);
|
|
17046
|
+
for (const match of words) {
|
|
17047
|
+
const word = match[0];
|
|
17048
|
+
for (let i = 0;i < word.length; i++) {
|
|
17049
|
+
grams.add(word[i]);
|
|
17050
|
+
if (i + 1 < word.length)
|
|
17051
|
+
grams.add(word.slice(i, i + 2));
|
|
17052
|
+
}
|
|
17053
|
+
if (grams.size >= 2048)
|
|
17054
|
+
break;
|
|
17055
|
+
}
|
|
17056
|
+
return [...grams];
|
|
17057
|
+
}
|
|
16999
17058
|
function indexRoot(idOrPath, opts = {}, db) {
|
|
17000
17059
|
const d = db ?? getIndexDb();
|
|
17001
17060
|
const root = getRoot(idOrPath, d);
|
|
@@ -17014,6 +17073,8 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
17014
17073
|
const deleteFile = d.prepare("DELETE FROM files WHERE id = ?");
|
|
17015
17074
|
const insertContent = d.prepare("INSERT INTO file_content_fts (rowid, body) VALUES (?, ?)");
|
|
17016
17075
|
const deleteContent = d.prepare("DELETE FROM file_content_fts WHERE rowid = ?");
|
|
17076
|
+
const insertContentGram = d.prepare("INSERT OR IGNORE INTO file_content_grams (file_id, gram) VALUES (?, ?)");
|
|
17077
|
+
const deleteContentGrams = d.prepare("DELETE FROM file_content_grams WHERE file_id = ?");
|
|
17017
17078
|
const stats = {
|
|
17018
17079
|
rootId: root.id,
|
|
17019
17080
|
added: 0,
|
|
@@ -17024,38 +17085,57 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
17024
17085
|
skippedDirs: skippedDirs.length,
|
|
17025
17086
|
durationMs: 0
|
|
17026
17087
|
};
|
|
17088
|
+
const seen = new Set;
|
|
17089
|
+
const changes = [];
|
|
17090
|
+
for (const file of scanned) {
|
|
17091
|
+
seen.add(file.relPath);
|
|
17092
|
+
const prev = existing.get(file.relPath);
|
|
17093
|
+
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
17094
|
+
if (prev && !changed && !opts.force)
|
|
17095
|
+
continue;
|
|
17096
|
+
const wantContent = shouldIndexContent(root, file);
|
|
17097
|
+
const absPath = `${root.path}/${file.relPath}`;
|
|
17098
|
+
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
17099
|
+
let body = null;
|
|
17100
|
+
if (wantContent && !isBinary) {
|
|
17101
|
+
try {
|
|
17102
|
+
body = readFileSync3(absPath, "utf-8");
|
|
17103
|
+
} catch {
|
|
17104
|
+
isBinary = true;
|
|
17105
|
+
}
|
|
17106
|
+
}
|
|
17107
|
+
changes.push({
|
|
17108
|
+
file,
|
|
17109
|
+
prev,
|
|
17110
|
+
isBinary,
|
|
17111
|
+
body,
|
|
17112
|
+
grams: body !== null ? contentShortGrams(body) : [],
|
|
17113
|
+
contentIndexed: body !== null ? 1 : 0
|
|
17114
|
+
});
|
|
17115
|
+
}
|
|
17027
17116
|
d.exec("BEGIN");
|
|
17028
17117
|
try {
|
|
17029
|
-
const
|
|
17030
|
-
for (const file of scanned) {
|
|
17031
|
-
seen.add(file.relPath);
|
|
17032
|
-
const prev = existing.get(file.relPath);
|
|
17033
|
-
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
17034
|
-
if (prev && !changed && !opts.force)
|
|
17035
|
-
continue;
|
|
17036
|
-
const wantContent = shouldIndexContent(root, file);
|
|
17037
|
-
const absPath = `${root.path}/${file.relPath}`;
|
|
17038
|
-
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
17039
|
-
let body = null;
|
|
17040
|
-
if (wantContent && !isBinary) {
|
|
17041
|
-
try {
|
|
17042
|
-
body = readFileSync3(absPath, "utf-8");
|
|
17043
|
-
} catch {
|
|
17044
|
-
isBinary = true;
|
|
17045
|
-
}
|
|
17046
|
-
}
|
|
17047
|
-
const contentIndexed = body !== null ? 1 : 0;
|
|
17118
|
+
for (const { file, prev, isBinary, body, grams, contentIndexed } of changes) {
|
|
17048
17119
|
if (prev) {
|
|
17049
|
-
if (prev.content_indexed)
|
|
17120
|
+
if (prev.content_indexed) {
|
|
17050
17121
|
deleteContent.run(prev.id);
|
|
17122
|
+
deleteContentGrams.run(prev.id);
|
|
17123
|
+
}
|
|
17051
17124
|
updateFile.run(file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now, prev.id);
|
|
17052
|
-
if (body !== null)
|
|
17125
|
+
if (body !== null) {
|
|
17053
17126
|
insertContent.run(prev.id, body);
|
|
17127
|
+
for (const gram of grams)
|
|
17128
|
+
insertContentGram.run(prev.id, gram);
|
|
17129
|
+
}
|
|
17054
17130
|
stats.updated++;
|
|
17055
17131
|
} else {
|
|
17056
17132
|
const inserted = insertFile.run(root.id, file.relPath, file.name, file.ext, file.dir, file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now);
|
|
17057
|
-
if (body !== null)
|
|
17058
|
-
|
|
17133
|
+
if (body !== null) {
|
|
17134
|
+
const fileId = Number(inserted.lastInsertRowid);
|
|
17135
|
+
insertContent.run(fileId, body);
|
|
17136
|
+
for (const gram of grams)
|
|
17137
|
+
insertContentGram.run(fileId, gram);
|
|
17138
|
+
}
|
|
17059
17139
|
stats.added++;
|
|
17060
17140
|
}
|
|
17061
17141
|
if (contentIndexed)
|
|
@@ -17064,8 +17144,10 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
17064
17144
|
for (const [relPath, row] of existing) {
|
|
17065
17145
|
if (seen.has(relPath))
|
|
17066
17146
|
continue;
|
|
17067
|
-
if (row.content_indexed)
|
|
17147
|
+
if (row.content_indexed) {
|
|
17068
17148
|
deleteContent.run(row.id);
|
|
17149
|
+
deleteContentGrams.run(row.id);
|
|
17150
|
+
}
|
|
17069
17151
|
deleteFile.run(row.id);
|
|
17070
17152
|
stats.deleted++;
|
|
17071
17153
|
}
|
|
@@ -17087,6 +17169,9 @@ function indexAllRoots(opts = {}, db) {
|
|
|
17087
17169
|
return listRoots(db).map((root) => indexRoot(root.id, opts, db));
|
|
17088
17170
|
}
|
|
17089
17171
|
var refreshing = new Set;
|
|
17172
|
+
var lastDefaultAutoRefreshCheckAt = 0;
|
|
17173
|
+
var AUTO_REFRESH_CHECK_THROTTLE_MS = 1000;
|
|
17174
|
+
var defaultRefreshScheduled = false;
|
|
17090
17175
|
function refreshStaleRoots(staleMinutes, db) {
|
|
17091
17176
|
const cutoff = Date.now() - staleMinutes * 60000;
|
|
17092
17177
|
const stats = [];
|
|
@@ -17110,8 +17195,31 @@ function autoRefreshStaleRoots(db) {
|
|
|
17110
17195
|
const config = getConfig();
|
|
17111
17196
|
if (!config.indexAutoRefresh)
|
|
17112
17197
|
return [];
|
|
17198
|
+
if (!db) {
|
|
17199
|
+
const now = Date.now();
|
|
17200
|
+
if (now - lastDefaultAutoRefreshCheckAt < AUTO_REFRESH_CHECK_THROTTLE_MS)
|
|
17201
|
+
return [];
|
|
17202
|
+
lastDefaultAutoRefreshCheckAt = now;
|
|
17203
|
+
}
|
|
17113
17204
|
return refreshStaleRoots(config.indexStaleMinutes, db);
|
|
17114
17205
|
}
|
|
17206
|
+
function scheduleAutoRefreshStaleRoots(db) {
|
|
17207
|
+
if (db)
|
|
17208
|
+
return autoRefreshStaleRoots(db);
|
|
17209
|
+
const config = getConfig();
|
|
17210
|
+
if (!config.indexAutoRefresh || defaultRefreshScheduled)
|
|
17211
|
+
return [];
|
|
17212
|
+
defaultRefreshScheduled = true;
|
|
17213
|
+
const timer = setTimeout(() => {
|
|
17214
|
+
try {
|
|
17215
|
+
autoRefreshStaleRoots();
|
|
17216
|
+
} catch {} finally {
|
|
17217
|
+
defaultRefreshScheduled = false;
|
|
17218
|
+
}
|
|
17219
|
+
}, 0);
|
|
17220
|
+
timer.unref?.();
|
|
17221
|
+
return [];
|
|
17222
|
+
}
|
|
17115
17223
|
function startBackgroundRefresh() {
|
|
17116
17224
|
const minutes = Math.max(1, getConfig().indexStaleMinutes);
|
|
17117
17225
|
const timer = setInterval(() => {
|
|
@@ -17350,6 +17458,9 @@ function compileSearchRegex(pattern, caseSensitive = false) {
|
|
|
17350
17458
|
// src/lib/local/query.ts
|
|
17351
17459
|
var MAX_LINE_LENGTH = 200;
|
|
17352
17460
|
var MAX_MATCHES_PER_FILE = 5;
|
|
17461
|
+
var MAX_PATH_CANDIDATES = 20000;
|
|
17462
|
+
var MAX_CONTENT_CANDIDATES = 50000;
|
|
17463
|
+
var MAX_REGEX_CANDIDATES = 50000;
|
|
17353
17464
|
function tokenize(query) {
|
|
17354
17465
|
return query.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "").split(/\s+/).filter(Boolean);
|
|
17355
17466
|
}
|
|
@@ -17380,11 +17491,40 @@ function filterClauses(opts, db) {
|
|
|
17380
17491
|
}
|
|
17381
17492
|
if (opts.dir) {
|
|
17382
17493
|
clauses.push("f.dir LIKE ? ESCAPE '\\'");
|
|
17383
|
-
const dir = opts.dir.replace(/^\/|\/$/g, "")
|
|
17494
|
+
const dir = escapeLike(opts.dir.replace(/^\/|\/$/g, ""));
|
|
17384
17495
|
params.push(`%${dir}%`);
|
|
17385
17496
|
}
|
|
17386
17497
|
return { sql: clauses.length > 0 ? ` AND ${clauses.join(" AND ")}` : "", params };
|
|
17387
17498
|
}
|
|
17499
|
+
function escapeLike(value) {
|
|
17500
|
+
return value.replace(/[\\%_]/g, "\\$&");
|
|
17501
|
+
}
|
|
17502
|
+
function shortTokenClauses(tokens) {
|
|
17503
|
+
if (tokens.length === 0)
|
|
17504
|
+
return { sql: "", params: [] };
|
|
17505
|
+
return {
|
|
17506
|
+
sql: ` AND ${tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ")}`,
|
|
17507
|
+
params: tokens.map((token) => `%${escapeLike(token)}%`)
|
|
17508
|
+
};
|
|
17509
|
+
}
|
|
17510
|
+
function contentGramClauses(tokens) {
|
|
17511
|
+
const gramTokens = tokens.filter((token) => /^[a-z0-9_$]{1,2}$/.test(token));
|
|
17512
|
+
if (gramTokens.length === 0)
|
|
17513
|
+
return { sql: "", params: [] };
|
|
17514
|
+
return {
|
|
17515
|
+
sql: gramTokens.map((_token, index) => ` AND (
|
|
17516
|
+
NOT EXISTS (
|
|
17517
|
+
SELECT 1 FROM file_content_grams cg_any_${index}
|
|
17518
|
+
WHERE cg_any_${index}.file_id = f.id
|
|
17519
|
+
)
|
|
17520
|
+
OR EXISTS (
|
|
17521
|
+
SELECT 1 FROM file_content_grams cg_${index}
|
|
17522
|
+
WHERE cg_${index}.file_id = f.id AND cg_${index}.gram = ?
|
|
17523
|
+
)
|
|
17524
|
+
)`).join(""),
|
|
17525
|
+
params: gramTokens
|
|
17526
|
+
};
|
|
17527
|
+
}
|
|
17388
17528
|
function rowToHit(row, score) {
|
|
17389
17529
|
return {
|
|
17390
17530
|
rootId: row.root_id,
|
|
@@ -17454,6 +17594,8 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
17454
17594
|
return [];
|
|
17455
17595
|
const ftsQuery = buildFtsQuery(query);
|
|
17456
17596
|
const filters = filterClauses(opts, d);
|
|
17597
|
+
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
17598
|
+
const shortFilters = shortTokenClauses(shortTokens);
|
|
17457
17599
|
const candidateLimit = Math.max(200, limit * 10);
|
|
17458
17600
|
let rows;
|
|
17459
17601
|
if (ftsQuery) {
|
|
@@ -17461,16 +17603,16 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
17461
17603
|
FROM files_fts fts
|
|
17462
17604
|
JOIN files f ON f.id = fts.rowid
|
|
17463
17605
|
JOIN index_roots r ON r.id = f.root_id
|
|
17464
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
17606
|
+
WHERE files_fts MATCH ?${filters.sql}${shortFilters.sql}
|
|
17465
17607
|
ORDER BY bm25(files_fts, 10.0, 1.0)
|
|
17466
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, candidateLimit);
|
|
17467
|
-
const namePattern = `${query.trim()
|
|
17608
|
+
LIMIT ?`).all(ftsQuery, ...filters.params, ...shortFilters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
17609
|
+
const namePattern = `${escapeLike(query.trim())}%`;
|
|
17468
17610
|
const nameRows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17469
17611
|
FROM files f
|
|
17470
17612
|
JOIN index_roots r ON r.id = f.root_id
|
|
17471
|
-
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}
|
|
17613
|
+
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}${shortFilters.sql}
|
|
17472
17614
|
ORDER BY length(f.name)
|
|
17473
|
-
LIMIT 100`).all(namePattern, ...filters.params);
|
|
17615
|
+
LIMIT 100`).all(namePattern, ...filters.params, ...shortFilters.params);
|
|
17474
17616
|
const seen = new Set(rows.map((row) => row.id));
|
|
17475
17617
|
for (const row of nameRows) {
|
|
17476
17618
|
if (!seen.has(row.id))
|
|
@@ -17478,14 +17620,14 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
17478
17620
|
}
|
|
17479
17621
|
} else {
|
|
17480
17622
|
const likeClauses = tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ");
|
|
17481
|
-
const likeParams = tokens.map((t) => `%${t
|
|
17623
|
+
const likeParams = tokens.map((t) => `%${escapeLike(t)}%`);
|
|
17482
17624
|
rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17483
17625
|
FROM files f
|
|
17484
17626
|
JOIN index_roots r ON r.id = f.root_id
|
|
17485
17627
|
WHERE ${likeClauses}${filters.sql}
|
|
17486
|
-
|
|
17628
|
+
ORDER BY length(f.name), length(f.rel_path), f.rel_path
|
|
17629
|
+
LIMIT ?`).all(...likeParams, ...filters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
17487
17630
|
}
|
|
17488
|
-
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
17489
17631
|
const filtered = shortTokens.length > 0 ? rows.filter((row) => shortTokens.every((t) => row.rel_path.toLowerCase().includes(t))) : rows;
|
|
17490
17632
|
return filtered.map((row) => rowToHit(row, scoreFileName(query, tokens, row))).sort((a, b) => b.score - a.score).filter((hit) => existsSync2(hit.absPath)).slice(0, limit);
|
|
17491
17633
|
}
|
|
@@ -17523,24 +17665,31 @@ function searchFilePathsRegex(pattern, opts = {}, db) {
|
|
|
17523
17665
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'handle.*Click', not '\\w+').");
|
|
17524
17666
|
}
|
|
17525
17667
|
const filters = filterClauses(opts, d);
|
|
17526
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17527
|
-
FROM files_fts fts
|
|
17528
|
-
JOIN files f ON f.id = fts.rowid
|
|
17529
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
17530
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
17531
|
-
ORDER BY fts.rank
|
|
17532
|
-
LIMIT 5000`).all(ftsQuery, ...filters.params);
|
|
17533
17668
|
const hits = [];
|
|
17534
|
-
|
|
17535
|
-
|
|
17536
|
-
|
|
17537
|
-
|
|
17538
|
-
|
|
17539
|
-
|
|
17540
|
-
|
|
17541
|
-
|
|
17542
|
-
|
|
17543
|
-
if (
|
|
17669
|
+
const pageSize = Math.max(500, limit * 20);
|
|
17670
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
17671
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17672
|
+
FROM files_fts fts
|
|
17673
|
+
JOIN files f ON f.id = fts.rowid
|
|
17674
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
17675
|
+
WHERE files_fts MATCH ?${filters.sql}
|
|
17676
|
+
ORDER BY fts.rank
|
|
17677
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
17678
|
+
if (rows.length === 0)
|
|
17679
|
+
break;
|
|
17680
|
+
for (const row of rows) {
|
|
17681
|
+
if (!regex.test(row.rel_path) && !regex.test(row.name))
|
|
17682
|
+
continue;
|
|
17683
|
+
const depth = row.rel_path.split("/").length - 1;
|
|
17684
|
+
const score = Math.max(0.05, 0.6 - depth * 0.02);
|
|
17685
|
+
const hit = rowToHit(row, score);
|
|
17686
|
+
if (!existsSync2(hit.absPath))
|
|
17687
|
+
continue;
|
|
17688
|
+
hits.push(hit);
|
|
17689
|
+
if (hits.length >= limit)
|
|
17690
|
+
break;
|
|
17691
|
+
}
|
|
17692
|
+
if (rows.length < pageSize)
|
|
17544
17693
|
break;
|
|
17545
17694
|
}
|
|
17546
17695
|
return hits;
|
|
@@ -17554,40 +17703,48 @@ function searchFileContentRegex(pattern, opts = {}, db) {
|
|
|
17554
17703
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'export.*function', not '\\d+').");
|
|
17555
17704
|
}
|
|
17556
17705
|
const filters = filterClauses(opts, d);
|
|
17557
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17558
|
-
FROM file_content_fts fts
|
|
17559
|
-
JOIN files f ON f.id = fts.rowid
|
|
17560
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
17561
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
17562
|
-
ORDER BY fts.rank
|
|
17563
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(200, limit * 10));
|
|
17564
17706
|
const hits = [];
|
|
17565
|
-
|
|
17566
|
-
|
|
17567
|
-
const
|
|
17568
|
-
|
|
17569
|
-
|
|
17570
|
-
|
|
17571
|
-
|
|
17572
|
-
|
|
17573
|
-
|
|
17574
|
-
|
|
17707
|
+
const pageSize = Math.max(200, limit * 10);
|
|
17708
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
17709
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17710
|
+
FROM file_content_fts fts
|
|
17711
|
+
JOIN files f ON f.id = fts.rowid
|
|
17712
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
17713
|
+
WHERE file_content_fts MATCH ?${filters.sql}
|
|
17714
|
+
ORDER BY fts.rank
|
|
17715
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
17716
|
+
if (rows.length === 0)
|
|
17717
|
+
break;
|
|
17718
|
+
for (let i = 0;i < rows.length && hits.length < limit; i++) {
|
|
17719
|
+
const row = rows[i];
|
|
17720
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
17721
|
+
let content;
|
|
17722
|
+
try {
|
|
17723
|
+
content = readFileSync4(absPath, "utf-8");
|
|
17724
|
+
} catch {
|
|
17725
|
+
continue;
|
|
17726
|
+
}
|
|
17727
|
+
const lines = content.split(`
|
|
17575
17728
|
`);
|
|
17576
|
-
|
|
17577
|
-
|
|
17578
|
-
|
|
17579
|
-
|
|
17729
|
+
const matches = [];
|
|
17730
|
+
for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
|
|
17731
|
+
if (regex.test(lines[n])) {
|
|
17732
|
+
matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
|
|
17733
|
+
}
|
|
17580
17734
|
}
|
|
17735
|
+
if (matches.length === 0)
|
|
17736
|
+
continue;
|
|
17737
|
+
const rankIndex = offset + i;
|
|
17738
|
+
const score = Math.max(0.25, 0.65 - rankIndex * 0.05);
|
|
17739
|
+
hits.push({
|
|
17740
|
+
...rowToHit(row, score),
|
|
17741
|
+
line: matches[0].line,
|
|
17742
|
+
lineText: matches[0].text,
|
|
17743
|
+
matches
|
|
17744
|
+
});
|
|
17581
17745
|
}
|
|
17582
|
-
if (
|
|
17583
|
-
|
|
17584
|
-
const score = Math.max(0.25, 0.65 - i * 0.05);
|
|
17585
|
-
hits.push({
|
|
17586
|
-
...rowToHit(row, score),
|
|
17587
|
-
line: matches[0].line,
|
|
17588
|
-
lineText: matches[0].text,
|
|
17589
|
-
matches
|
|
17590
|
-
});
|
|
17746
|
+
if (rows.length < pageSize)
|
|
17747
|
+
break;
|
|
17591
17748
|
}
|
|
17592
17749
|
return hits;
|
|
17593
17750
|
}
|
|
@@ -17598,42 +17755,51 @@ function searchFileContent(query, opts = {}, db) {
|
|
|
17598
17755
|
if (!ftsQuery)
|
|
17599
17756
|
return [];
|
|
17600
17757
|
const filters = filterClauses(opts, d);
|
|
17601
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17602
|
-
FROM file_content_fts fts
|
|
17603
|
-
JOIN files f ON f.id = fts.rowid
|
|
17604
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
17605
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
17606
|
-
ORDER BY fts.rank
|
|
17607
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(50, limit * 3));
|
|
17608
17758
|
const tokens = tokenize(query);
|
|
17609
17759
|
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
17760
|
+
const gramFilters = contentGramClauses(shortTokens);
|
|
17610
17761
|
const scored = [];
|
|
17611
|
-
|
|
17612
|
-
|
|
17613
|
-
const
|
|
17614
|
-
|
|
17615
|
-
|
|
17616
|
-
|
|
17617
|
-
|
|
17618
|
-
|
|
17619
|
-
|
|
17620
|
-
if (
|
|
17621
|
-
|
|
17622
|
-
|
|
17762
|
+
const pageSize = Math.max(50, limit * 3);
|
|
17763
|
+
for (let offset = 0;scored.length < limit * 2 && offset < MAX_CONTENT_CANDIDATES; offset += pageSize) {
|
|
17764
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17765
|
+
FROM file_content_fts fts
|
|
17766
|
+
JOIN files f ON f.id = fts.rowid
|
|
17767
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
17768
|
+
WHERE file_content_fts MATCH ?${filters.sql}${gramFilters.sql}
|
|
17769
|
+
ORDER BY fts.rank
|
|
17770
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, ...gramFilters.params, pageSize, offset);
|
|
17771
|
+
if (rows.length === 0)
|
|
17772
|
+
break;
|
|
17773
|
+
for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
|
|
17774
|
+
const row = rows[i];
|
|
17775
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
17776
|
+
let content;
|
|
17777
|
+
try {
|
|
17778
|
+
content = readFileSync4(absPath, "utf-8");
|
|
17779
|
+
} catch {
|
|
17623
17780
|
continue;
|
|
17781
|
+
}
|
|
17782
|
+
if (shortTokens.length > 0) {
|
|
17783
|
+
const lower = content.toLowerCase();
|
|
17784
|
+
if (!shortTokens.every((t) => lower.includes(t)))
|
|
17785
|
+
continue;
|
|
17786
|
+
}
|
|
17787
|
+
const { matches, tier } = findLineMatches(content, query, tokens);
|
|
17788
|
+
if (matches.length === 0)
|
|
17789
|
+
continue;
|
|
17790
|
+
const rankIndex = offset + i;
|
|
17791
|
+
const base = Math.max(0.25, 0.55 - rankIndex * 0.04);
|
|
17792
|
+
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
17793
|
+
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
17794
|
+
scored.push({
|
|
17795
|
+
...rowToHit(row, score),
|
|
17796
|
+
line: matches[0].line,
|
|
17797
|
+
lineText: matches[0].text,
|
|
17798
|
+
matches
|
|
17799
|
+
});
|
|
17624
17800
|
}
|
|
17625
|
-
|
|
17626
|
-
|
|
17627
|
-
continue;
|
|
17628
|
-
const base = Math.max(0.25, 0.55 - i * 0.04);
|
|
17629
|
-
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
17630
|
-
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
17631
|
-
scored.push({
|
|
17632
|
-
...rowToHit(row, score),
|
|
17633
|
-
line: matches[0].line,
|
|
17634
|
-
lineText: matches[0].text,
|
|
17635
|
-
matches
|
|
17636
|
-
});
|
|
17801
|
+
if (rows.length < pageSize)
|
|
17802
|
+
break;
|
|
17637
17803
|
}
|
|
17638
17804
|
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
17639
17805
|
}
|
|
@@ -17646,7 +17812,7 @@ class FilesProvider {
|
|
|
17646
17812
|
return hasReadyRoot();
|
|
17647
17813
|
}
|
|
17648
17814
|
async search(query, options) {
|
|
17649
|
-
|
|
17815
|
+
scheduleAutoRefreshStaleRoots();
|
|
17650
17816
|
const hits = searchFilePaths(query, { limit: options?.limit ?? 10 });
|
|
17651
17817
|
return hits.map((hit) => ({
|
|
17652
17818
|
title: hit.name,
|
|
@@ -17674,7 +17840,7 @@ class ContentProvider {
|
|
|
17674
17840
|
return hasReadyRoot();
|
|
17675
17841
|
}
|
|
17676
17842
|
async search(query, options) {
|
|
17677
|
-
|
|
17843
|
+
scheduleAutoRefreshStaleRoots();
|
|
17678
17844
|
const hits = searchFileContent(query, { limit: options?.limit ?? 10 });
|
|
17679
17845
|
return hits.map((hit) => ({
|
|
17680
17846
|
title: hit.name,
|
|
@@ -17801,7 +17967,7 @@ function rowToSearch(row) {
|
|
|
17801
17967
|
}
|
|
17802
17968
|
function createSearch(data, db) {
|
|
17803
17969
|
const d = db ?? getDb();
|
|
17804
|
-
const id = generateId();
|
|
17970
|
+
const id = data.id ?? generateId();
|
|
17805
17971
|
const now = new Date().toISOString();
|
|
17806
17972
|
d.prepare(`INSERT INTO searches (id, query, providers, profile_id, result_count, duration, created_at)
|
|
17807
17973
|
VALUES (?, ?, ?, ?, ?, ?, ?)`).run(id, data.query, JSON.stringify(data.providers), data.profileId ?? null, data.resultCount ?? 0, data.duration ?? 0, now);
|
|
@@ -17883,7 +18049,7 @@ function createResults(results, db) {
|
|
|
17883
18049
|
d.exec("BEGIN");
|
|
17884
18050
|
try {
|
|
17885
18051
|
for (const data of results) {
|
|
17886
|
-
const id = generateId();
|
|
18052
|
+
const id = data.id ?? generateId();
|
|
17887
18053
|
stmt.run(id, data.searchId, data.title, data.url, data.snippet, data.source, data.provider, data.rank, data.score ?? null, data.publishedAt ?? null, data.thumbnail ?? null, JSON.stringify(data.metadata ?? {}), now);
|
|
17888
18054
|
created.push({
|
|
17889
18055
|
id,
|
|
@@ -18040,13 +18206,300 @@ function isProviderConfigured(provider) {
|
|
|
18040
18206
|
return !!Bun.env[provider.apiKeyEnv];
|
|
18041
18207
|
}
|
|
18042
18208
|
|
|
18209
|
+
// src/lib/router.ts
|
|
18210
|
+
var PROVIDER_DESCRIPTIONS = {
|
|
18211
|
+
files: "Local file names and paths. Best for known filenames, path fragments, extensions, and repo navigation.",
|
|
18212
|
+
content: "Local indexed file contents. Best for code symbols, exact phrases, docs, snippets, and grep-style discovery.",
|
|
18213
|
+
google: "General web search through SerpAPI. Best for broad web coverage and current public pages.",
|
|
18214
|
+
serpapi: "SerpAPI multi-engine web search. Best for general web queries when Google-style results are desired.",
|
|
18215
|
+
exa: "Neural/semantic web search. Best for research, conceptual queries, docs, and high-relevance pages.",
|
|
18216
|
+
perplexity: "Answer-oriented web research with citations. Best for synthesized factual questions and research summaries.",
|
|
18217
|
+
brave: "General independent web search. Best for current web, news-like, product, and navigational queries.",
|
|
18218
|
+
bing: "General web search. Best for current web and Microsoft/Bing-indexed pages.",
|
|
18219
|
+
twitter: "X/Twitter search. Best for tweets, social reactions, breaking discourse, and people posting updates.",
|
|
18220
|
+
reddit: "Reddit search. Best for opinions, product experiences, troubleshooting threads, and community recommendations.",
|
|
18221
|
+
youtube: "YouTube search. Best for videos, tutorials, talks, demos, and channels.",
|
|
18222
|
+
hackernews: "Hacker News search. Best for startup, programming, launch, and technical discussion threads.",
|
|
18223
|
+
github: "GitHub code and repository search. Best for open-source repos, code examples, packages, and implementation details.",
|
|
18224
|
+
arxiv: "arXiv academic search. Best for papers, preprints, ML/AI/math/physics research, and scholarly topics."
|
|
18225
|
+
};
|
|
18226
|
+
function clampMaxProviders(value) {
|
|
18227
|
+
if (value === undefined || !Number.isFinite(value))
|
|
18228
|
+
return 3;
|
|
18229
|
+
return Math.max(1, Math.min(5, Math.floor(value)));
|
|
18230
|
+
}
|
|
18231
|
+
function clampConfidence(value) {
|
|
18232
|
+
return typeof value === "number" && Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.5;
|
|
18233
|
+
}
|
|
18234
|
+
function normalizeCandidates(candidates) {
|
|
18235
|
+
const allowed = new Set(PROVIDER_NAMES);
|
|
18236
|
+
const seen = new Set;
|
|
18237
|
+
const normalized = [];
|
|
18238
|
+
for (const candidate of candidates) {
|
|
18239
|
+
if (!allowed.has(candidate) || seen.has(candidate))
|
|
18240
|
+
continue;
|
|
18241
|
+
seen.add(candidate);
|
|
18242
|
+
normalized.push(candidate);
|
|
18243
|
+
}
|
|
18244
|
+
return normalized;
|
|
18245
|
+
}
|
|
18246
|
+
function addScore(scores, candidateSet, provider, amount) {
|
|
18247
|
+
if (!candidateSet.has(provider))
|
|
18248
|
+
return;
|
|
18249
|
+
scores.set(provider, (scores.get(provider) ?? 0) + amount);
|
|
18250
|
+
}
|
|
18251
|
+
function hasAny(query, patterns) {
|
|
18252
|
+
return patterns.some((pattern) => pattern.test(query));
|
|
18253
|
+
}
|
|
18254
|
+
function routeSearchProvidersHeuristic(query, candidates, options = {}) {
|
|
18255
|
+
const normalized = normalizeCandidates(candidates);
|
|
18256
|
+
const maxProviders = clampMaxProviders(options.maxProviders);
|
|
18257
|
+
if (normalized.length === 0) {
|
|
18258
|
+
return {
|
|
18259
|
+
strategy: "heuristic",
|
|
18260
|
+
selectedProviders: [],
|
|
18261
|
+
candidates: [],
|
|
18262
|
+
reason: "No configured providers were available to route.",
|
|
18263
|
+
confidence: 0
|
|
18264
|
+
};
|
|
18265
|
+
}
|
|
18266
|
+
const candidateSet = new Set(normalized);
|
|
18267
|
+
const scores = new Map;
|
|
18268
|
+
const reasons = [];
|
|
18269
|
+
const q = query.trim().toLowerCase();
|
|
18270
|
+
for (const candidate of normalized)
|
|
18271
|
+
scores.set(candidate, 0.05);
|
|
18272
|
+
if (hasAny(q, [
|
|
18273
|
+
/\b(file|filename|path|folder|directory|repo|workspace)\b/,
|
|
18274
|
+
/(^|[/\s])[\w.-]+\.(ts|tsx|js|jsx|py|rs|go|md|json|yaml|yml|css|html)\b/
|
|
18275
|
+
])) {
|
|
18276
|
+
addScore(scores, candidateSet, "files", 5);
|
|
18277
|
+
addScore(scores, candidateSet, "content", 3);
|
|
18278
|
+
reasons.push("query looks local-file oriented");
|
|
18279
|
+
}
|
|
18280
|
+
if (hasAny(q, [
|
|
18281
|
+
/\b(function|class|interface|type|const|import|export|error|stack|symbol|grep|regex)\b/,
|
|
18282
|
+
/[A-Za-z_$][\w$]*\([^)]*\)/,
|
|
18283
|
+
/[A-Za-z_$][\w$]*::[A-Za-z_$]/
|
|
18284
|
+
])) {
|
|
18285
|
+
addScore(scores, candidateSet, "content", 5);
|
|
18286
|
+
addScore(scores, candidateSet, "files", 2);
|
|
18287
|
+
addScore(scores, candidateSet, "github", 1.5);
|
|
18288
|
+
reasons.push("query contains code/content lookup signals");
|
|
18289
|
+
}
|
|
18290
|
+
if (hasAny(q, [/\b(paper|papers|arxiv|preprint|doi|citation|survey|benchmark|research)\b/])) {
|
|
18291
|
+
addScore(scores, candidateSet, "arxiv", 5);
|
|
18292
|
+
addScore(scores, candidateSet, "exa", 3);
|
|
18293
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
18294
|
+
reasons.push("query asks for scholarly or research material");
|
|
18295
|
+
}
|
|
18296
|
+
if (hasAny(q, [/\b(github|repo|repository|source code|open source|package|library|sdk|api example)\b/])) {
|
|
18297
|
+
addScore(scores, candidateSet, "github", 5);
|
|
18298
|
+
addScore(scores, candidateSet, "exa", 2);
|
|
18299
|
+
reasons.push("query asks for code or repository material");
|
|
18300
|
+
}
|
|
18301
|
+
if (hasAny(q, [/\b(video|youtube|tutorial|demo|talk|lecture|channel)\b/])) {
|
|
18302
|
+
addScore(scores, candidateSet, "youtube", 5);
|
|
18303
|
+
reasons.push("query asks for video material");
|
|
18304
|
+
}
|
|
18305
|
+
if (hasAny(q, [/\b(reddit|subreddit|opinion|experience|reviews?|worth it|recommendations?)\b/])) {
|
|
18306
|
+
addScore(scores, candidateSet, "reddit", 5);
|
|
18307
|
+
addScore(scores, candidateSet, "hackernews", 1.5);
|
|
18308
|
+
reasons.push("query asks for community discussion");
|
|
18309
|
+
}
|
|
18310
|
+
if (hasAny(q, [/\b(hacker news|hn|show hn|launch|startup)\b/])) {
|
|
18311
|
+
addScore(scores, candidateSet, "hackernews", 5);
|
|
18312
|
+
reasons.push("query asks for Hacker News style discussion");
|
|
18313
|
+
}
|
|
18314
|
+
if (hasAny(q, [/\b(twitter|tweet|tweets|x\.com|social reaction|trending)\b/])) {
|
|
18315
|
+
addScore(scores, candidateSet, "twitter", 5);
|
|
18316
|
+
reasons.push("query asks for social posts");
|
|
18317
|
+
}
|
|
18318
|
+
if (hasAny(q, [/\b(latest|today|yesterday|news|current|2025|2026|price|release|launched)\b/])) {
|
|
18319
|
+
addScore(scores, candidateSet, "brave", 3);
|
|
18320
|
+
addScore(scores, candidateSet, "bing", 2.5);
|
|
18321
|
+
addScore(scores, candidateSet, "google", 2.5);
|
|
18322
|
+
addScore(scores, candidateSet, "serpapi", 2);
|
|
18323
|
+
reasons.push("query appears time-sensitive");
|
|
18324
|
+
}
|
|
18325
|
+
if (reasons.length === 0) {
|
|
18326
|
+
addScore(scores, candidateSet, "exa", 2.5);
|
|
18327
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
18328
|
+
addScore(scores, candidateSet, "brave", 1.5);
|
|
18329
|
+
addScore(scores, candidateSet, "google", 1.5);
|
|
18330
|
+
addScore(scores, candidateSet, "hackernews", 0.75);
|
|
18331
|
+
reasons.push("general query fallback");
|
|
18332
|
+
}
|
|
18333
|
+
const selectedProviders = [...scores.entries()].sort((a, b) => b[1] - a[1] || normalized.indexOf(a[0]) - normalized.indexOf(b[0])).slice(0, Math.min(maxProviders, normalized.length)).map(([provider]) => provider);
|
|
18334
|
+
const topScore = scores.get(selectedProviders[0]) ?? 0;
|
|
18335
|
+
const confidence = Math.max(0.35, Math.min(0.9, topScore / 6));
|
|
18336
|
+
return {
|
|
18337
|
+
strategy: "heuristic",
|
|
18338
|
+
selectedProviders,
|
|
18339
|
+
candidates: normalized,
|
|
18340
|
+
reason: reasons.join("; "),
|
|
18341
|
+
confidence
|
|
18342
|
+
};
|
|
18343
|
+
}
|
|
18344
|
+
function routerSchema(candidates, maxProviders) {
|
|
18345
|
+
return {
|
|
18346
|
+
type: "object",
|
|
18347
|
+
properties: {
|
|
18348
|
+
selectedProviders: {
|
|
18349
|
+
type: "array",
|
|
18350
|
+
items: { type: "string", enum: candidates },
|
|
18351
|
+
minItems: 1,
|
|
18352
|
+
maxItems: maxProviders
|
|
18353
|
+
},
|
|
18354
|
+
reason: { type: "string" },
|
|
18355
|
+
confidence: { type: "number", minimum: 0, maximum: 1 }
|
|
18356
|
+
},
|
|
18357
|
+
required: ["selectedProviders", "reason", "confidence"],
|
|
18358
|
+
additionalProperties: false
|
|
18359
|
+
};
|
|
18360
|
+
}
|
|
18361
|
+
function parseCerebrasRouting(raw, candidates, maxProviders) {
|
|
18362
|
+
let parsed;
|
|
18363
|
+
try {
|
|
18364
|
+
parsed = JSON.parse(raw);
|
|
18365
|
+
} catch {
|
|
18366
|
+
return null;
|
|
18367
|
+
}
|
|
18368
|
+
if (!Array.isArray(parsed.selectedProviders))
|
|
18369
|
+
return null;
|
|
18370
|
+
const candidateSet = new Set(candidates);
|
|
18371
|
+
const selectedProviders = parsed.selectedProviders.filter((provider) => typeof provider === "string" && candidateSet.has(provider)).slice(0, maxProviders);
|
|
18372
|
+
if (selectedProviders.length === 0)
|
|
18373
|
+
return null;
|
|
18374
|
+
return {
|
|
18375
|
+
selectedProviders,
|
|
18376
|
+
reason: typeof parsed.reason === "string" ? parsed.reason : "Cerebras router selected providers.",
|
|
18377
|
+
confidence: clampConfidence(parsed.confidence)
|
|
18378
|
+
};
|
|
18379
|
+
}
|
|
18380
|
+
async function routeWithCerebras(query, candidates, options) {
|
|
18381
|
+
const apiKey = Bun.env.CEREBRAS_API_KEY;
|
|
18382
|
+
if (!apiKey) {
|
|
18383
|
+
return {
|
|
18384
|
+
...routeSearchProvidersHeuristic(query, candidates, options),
|
|
18385
|
+
error: "CEREBRAS_API_KEY is not configured; used heuristic routing."
|
|
18386
|
+
};
|
|
18387
|
+
}
|
|
18388
|
+
const providerGuide = candidates.map((name) => ({
|
|
18389
|
+
name,
|
|
18390
|
+
description: PROVIDER_DESCRIPTIONS[name]
|
|
18391
|
+
}));
|
|
18392
|
+
const res = await fetch("https://api.cerebras.ai/v1/chat/completions", {
|
|
18393
|
+
method: "POST",
|
|
18394
|
+
signal: AbortSignal.timeout(options.timeoutMs),
|
|
18395
|
+
headers: {
|
|
18396
|
+
"Content-Type": "application/json",
|
|
18397
|
+
Authorization: `Bearer ${apiKey}`
|
|
18398
|
+
},
|
|
18399
|
+
body: JSON.stringify({
|
|
18400
|
+
model: options.model,
|
|
18401
|
+
temperature: 0,
|
|
18402
|
+
messages: [
|
|
18403
|
+
{
|
|
18404
|
+
role: "system",
|
|
18405
|
+
content: "You route a search query to the smallest useful set of available search providers. Select only listed providers. Prefer local providers for local files/code in the indexed workspace. Prefer scholarly, code, video, social, or web providers when the query clearly asks for those domains."
|
|
18406
|
+
},
|
|
18407
|
+
{
|
|
18408
|
+
role: "user",
|
|
18409
|
+
content: JSON.stringify({
|
|
18410
|
+
query,
|
|
18411
|
+
maxProviders: options.maxProviders,
|
|
18412
|
+
providers: providerGuide
|
|
18413
|
+
})
|
|
18414
|
+
}
|
|
18415
|
+
],
|
|
18416
|
+
response_format: {
|
|
18417
|
+
type: "json_schema",
|
|
18418
|
+
json_schema: {
|
|
18419
|
+
name: "search_router",
|
|
18420
|
+
strict: true,
|
|
18421
|
+
schema: routerSchema(candidates, options.maxProviders)
|
|
18422
|
+
}
|
|
18423
|
+
}
|
|
18424
|
+
})
|
|
18425
|
+
});
|
|
18426
|
+
if (!res.ok) {
|
|
18427
|
+
throw new Error(`Cerebras router error: ${res.status} ${res.statusText}`);
|
|
18428
|
+
}
|
|
18429
|
+
const data = await res.json();
|
|
18430
|
+
const content = data.choices?.[0]?.message?.content;
|
|
18431
|
+
if (!content)
|
|
18432
|
+
throw new Error("Cerebras router returned no content");
|
|
18433
|
+
const parsed = parseCerebrasRouting(content, candidates, options.maxProviders);
|
|
18434
|
+
if (!parsed)
|
|
18435
|
+
throw new Error("Cerebras router returned invalid provider selection");
|
|
18436
|
+
return {
|
|
18437
|
+
strategy: "cerebras",
|
|
18438
|
+
candidates,
|
|
18439
|
+
...parsed
|
|
18440
|
+
};
|
|
18441
|
+
}
|
|
18442
|
+
async function routeSearchProviders(query, candidates, options = {}) {
|
|
18443
|
+
const normalized = normalizeCandidates(candidates);
|
|
18444
|
+
const maxProviders = Math.min(clampMaxProviders(options.maxProviders), Math.max(1, normalized.length));
|
|
18445
|
+
const timeoutMs = options.timeoutMs && Number.isFinite(options.timeoutMs) ? Math.max(250, Math.floor(options.timeoutMs)) : 1200;
|
|
18446
|
+
const model = options.model ?? Bun.env.CEREBRAS_MODEL ?? "gpt-oss-120b";
|
|
18447
|
+
if (normalized.length === 0) {
|
|
18448
|
+
return routeSearchProvidersHeuristic(query, normalized, { maxProviders });
|
|
18449
|
+
}
|
|
18450
|
+
try {
|
|
18451
|
+
return await routeWithCerebras(query, normalized, { maxProviders, timeoutMs, model });
|
|
18452
|
+
} catch (err) {
|
|
18453
|
+
return {
|
|
18454
|
+
...routeSearchProvidersHeuristic(query, normalized, { maxProviders }),
|
|
18455
|
+
error: err instanceof Error ? err.message : String(err)
|
|
18456
|
+
};
|
|
18457
|
+
}
|
|
18458
|
+
}
|
|
18459
|
+
|
|
18043
18460
|
// src/lib/search.ts
|
|
18461
|
+
async function withTimeout(promise, timeoutMs, label) {
|
|
18462
|
+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
|
|
18463
|
+
return promise;
|
|
18464
|
+
let timer;
|
|
18465
|
+
try {
|
|
18466
|
+
return await Promise.race([
|
|
18467
|
+
promise,
|
|
18468
|
+
new Promise((_resolve, reject) => {
|
|
18469
|
+
timer = setTimeout(() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
18470
|
+
timer.unref?.();
|
|
18471
|
+
})
|
|
18472
|
+
]);
|
|
18473
|
+
} finally {
|
|
18474
|
+
if (timer)
|
|
18475
|
+
clearTimeout(timer);
|
|
18476
|
+
}
|
|
18477
|
+
}
|
|
18478
|
+
async function allSettledLimited(items, concurrency, task) {
|
|
18479
|
+
const results = new Array(items.length);
|
|
18480
|
+
let next = 0;
|
|
18481
|
+
async function worker() {
|
|
18482
|
+
while (next < items.length) {
|
|
18483
|
+
const index = next++;
|
|
18484
|
+
const item = items[index];
|
|
18485
|
+
try {
|
|
18486
|
+
results[index] = { status: "fulfilled", value: await task(item) };
|
|
18487
|
+
} catch (reason) {
|
|
18488
|
+
results[index] = { status: "rejected", reason };
|
|
18489
|
+
}
|
|
18490
|
+
}
|
|
18491
|
+
}
|
|
18492
|
+
const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length);
|
|
18493
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
18494
|
+
return results;
|
|
18495
|
+
}
|
|
18044
18496
|
async function unifiedSearch(query, opts = {}) {
|
|
18045
18497
|
const config = getConfig();
|
|
18046
18498
|
const startTime = Date.now();
|
|
18047
18499
|
const db = opts.db;
|
|
18048
18500
|
let providerNames = opts.providers ?? [];
|
|
18049
|
-
|
|
18501
|
+
const smartProfile = opts.profile === "smart";
|
|
18502
|
+
if (opts.profile && !smartProfile) {
|
|
18050
18503
|
const profile = getProfileByName(opts.profile, db);
|
|
18051
18504
|
if (profile) {
|
|
18052
18505
|
providerNames = profile.providers;
|
|
@@ -18062,7 +18515,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
18062
18515
|
}
|
|
18063
18516
|
const errors2 = [];
|
|
18064
18517
|
const explicitRequest = (opts.providers?.length ?? 0) > 0 || Boolean(opts.profile);
|
|
18065
|
-
|
|
18518
|
+
let activeProviders = providerNames.filter((name) => {
|
|
18066
18519
|
try {
|
|
18067
18520
|
if (getProvider(name).isConfigured())
|
|
18068
18521
|
return true;
|
|
@@ -18073,20 +18526,36 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
18073
18526
|
});
|
|
18074
18527
|
}
|
|
18075
18528
|
return false;
|
|
18076
|
-
} catch {
|
|
18529
|
+
} catch (err) {
|
|
18530
|
+
if (explicitRequest) {
|
|
18531
|
+
errors2.push({
|
|
18532
|
+
provider: name,
|
|
18533
|
+
error: err instanceof Error ? err.message : "unknown provider"
|
|
18534
|
+
});
|
|
18535
|
+
}
|
|
18077
18536
|
return false;
|
|
18078
18537
|
}
|
|
18079
18538
|
});
|
|
18539
|
+
const routingRequested = opts.smart === true || smartProfile || !explicitRequest && config.router.enabled;
|
|
18540
|
+
let routing;
|
|
18541
|
+
if (routingRequested && activeProviders.length > 0) {
|
|
18542
|
+
routing = await routeSearchProviders(query, activeProviders, {
|
|
18543
|
+
maxProviders: config.router.maxProviders,
|
|
18544
|
+
timeoutMs: config.router.timeoutMs,
|
|
18545
|
+
model: config.router.model
|
|
18546
|
+
});
|
|
18547
|
+
activeProviders = routing.selectedProviders;
|
|
18548
|
+
}
|
|
18080
18549
|
const searchOptions = {
|
|
18081
18550
|
limit: config.defaultLimit,
|
|
18082
18551
|
...opts.options
|
|
18083
18552
|
};
|
|
18084
|
-
const results = await
|
|
18553
|
+
const results = await allSettledLimited(activeProviders, config.maxConcurrent, async (name) => {
|
|
18085
18554
|
const provider = getProvider(name);
|
|
18086
|
-
const rawResults = await provider.search(query, searchOptions);
|
|
18555
|
+
const rawResults = await withTimeout(provider.search(query, searchOptions), config.providerTimeoutMs, provider.displayName);
|
|
18087
18556
|
updateProviderLastUsed(name, db);
|
|
18088
18557
|
return { name, results: rawResults };
|
|
18089
|
-
})
|
|
18558
|
+
});
|
|
18090
18559
|
const allResults = [];
|
|
18091
18560
|
const searchId = generateId();
|
|
18092
18561
|
for (const result of results) {
|
|
@@ -18140,11 +18609,13 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
18140
18609
|
createdAt: new Date().toISOString()
|
|
18141
18610
|
},
|
|
18142
18611
|
results: finalResults,
|
|
18143
|
-
errors: errors2
|
|
18612
|
+
errors: errors2,
|
|
18613
|
+
...routing && { routing }
|
|
18144
18614
|
};
|
|
18145
18615
|
}
|
|
18146
18616
|
const persistable = config.recordLocalResults ? finalResults : finalResults.filter((r) => !LOCAL_PROVIDER_NAMES.has(r.source));
|
|
18147
18617
|
const search = createSearch({
|
|
18618
|
+
id: searchId,
|
|
18148
18619
|
query,
|
|
18149
18620
|
providers: activeProviders,
|
|
18150
18621
|
resultCount: persistable.length,
|
|
@@ -18153,6 +18624,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
18153
18624
|
if (persistable.length > 0) {
|
|
18154
18625
|
createResults(persistable.map((r) => ({
|
|
18155
18626
|
searchId: search.id,
|
|
18627
|
+
id: r.id,
|
|
18156
18628
|
title: r.title,
|
|
18157
18629
|
url: r.url,
|
|
18158
18630
|
snippet: r.snippet,
|
|
@@ -18169,7 +18641,8 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
18169
18641
|
return {
|
|
18170
18642
|
search: { ...search, resultCount: finalResults.length, duration },
|
|
18171
18643
|
results: finalResults,
|
|
18172
|
-
errors: errors2
|
|
18644
|
+
errors: errors2,
|
|
18645
|
+
...routing && { routing }
|
|
18173
18646
|
};
|
|
18174
18647
|
}
|
|
18175
18648
|
async function searchSingleProvider(provider, query, options, db) {
|
|
@@ -18403,7 +18876,7 @@ function findLocal(query, opts = {}, db) {
|
|
|
18403
18876
|
return { query, kind, indexed: false, roots: roots.length, total: 0, results: [] };
|
|
18404
18877
|
}
|
|
18405
18878
|
if (opts.refresh !== false)
|
|
18406
|
-
|
|
18879
|
+
scheduleAutoRefreshStaleRoots(db);
|
|
18407
18880
|
const queryOpts = {
|
|
18408
18881
|
root: opts.root,
|
|
18409
18882
|
ext: opts.ext,
|
|
@@ -28643,13 +29116,15 @@ function buildServer() {
|
|
|
28643
29116
|
providers: exports_external.array(SearchProviderNameSchema).optional().describe("Providers to search (default: all enabled)"),
|
|
28644
29117
|
profile: exports_external.string().optional().describe("Search profile name (e.g. research, social, code)"),
|
|
28645
29118
|
limit: exports_external.number().int().min(1).max(100).optional().describe("Max results per provider"),
|
|
28646
|
-
dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)")
|
|
28647
|
-
|
|
29119
|
+
dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)"),
|
|
29120
|
+
smart: exports_external.boolean().optional().describe("Route to the best configured providers before searching")
|
|
29121
|
+
}, async ({ query, providers, profile, limit, dedup, smart }) => {
|
|
28648
29122
|
const response = await unifiedSearch(query, {
|
|
28649
29123
|
providers,
|
|
28650
29124
|
profile,
|
|
28651
29125
|
options: limit ? { limit } : undefined,
|
|
28652
|
-
dedup
|
|
29126
|
+
dedup,
|
|
29127
|
+
smart
|
|
28653
29128
|
});
|
|
28654
29129
|
return {
|
|
28655
29130
|
content: [
|
|
@@ -28668,7 +29143,8 @@ function buildServer() {
|
|
|
28668
29143
|
source: r.source,
|
|
28669
29144
|
score: r.score
|
|
28670
29145
|
})),
|
|
28671
|
-
errors: response.errors
|
|
29146
|
+
errors: response.errors,
|
|
29147
|
+
routing: response.routing
|
|
28672
29148
|
}, null, 2)
|
|
28673
29149
|
}
|
|
28674
29150
|
]
|
|
@@ -28923,12 +29399,14 @@ function buildServer() {
|
|
|
28923
29399
|
default_limit: exports_external.number().int().optional(),
|
|
28924
29400
|
dedup: exports_external.boolean().optional(),
|
|
28925
29401
|
max_concurrent: exports_external.number().int().optional(),
|
|
29402
|
+
provider_timeout_ms: exports_external.number().int().optional(),
|
|
28926
29403
|
default_profile: exports_external.string().nullable().optional()
|
|
28927
29404
|
}, async (updates) => {
|
|
28928
29405
|
const config2 = setConfig({
|
|
28929
29406
|
...updates.default_limit !== undefined && { defaultLimit: updates.default_limit },
|
|
28930
29407
|
...updates.dedup !== undefined && { dedup: updates.dedup },
|
|
28931
29408
|
...updates.max_concurrent !== undefined && { maxConcurrent: updates.max_concurrent },
|
|
29409
|
+
...updates.provider_timeout_ms !== undefined && { providerTimeoutMs: updates.provider_timeout_ms },
|
|
28932
29410
|
...updates.default_profile !== undefined && { defaultProfile: updates.default_profile }
|
|
28933
29411
|
});
|
|
28934
29412
|
return {
|
|
@@ -29071,10 +29549,12 @@ function startServer(port) {
|
|
|
29071
29549
|
const providers = url.searchParams.get("providers")?.split(",");
|
|
29072
29550
|
const profile = url.searchParams.get("profile") ?? undefined;
|
|
29073
29551
|
const limit = url.searchParams.get("limit") ? parseInt(url.searchParams.get("limit")) : undefined;
|
|
29552
|
+
const smart = url.searchParams.get("smart") === "1" || url.searchParams.get("smart") === "true";
|
|
29074
29553
|
const response = await unifiedSearch(q, {
|
|
29075
29554
|
providers,
|
|
29076
29555
|
profile,
|
|
29077
|
-
options: limit ? { limit } : undefined
|
|
29556
|
+
options: limit ? { limit } : undefined,
|
|
29557
|
+
smart
|
|
29078
29558
|
});
|
|
29079
29559
|
return json(response);
|
|
29080
29560
|
}
|