@hasna/search 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +602 -128
- package/dist/db/index-migrations.d.ts.map +1 -1
- package/dist/db/results.d.ts +2 -0
- package/dist/db/results.d.ts.map +1 -1
- package/dist/db/searches.d.ts +1 -0
- package/dist/db/searches.d.ts.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +602 -126
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/local/find.d.ts +1 -1
- package/dist/lib/local/find.d.ts.map +1 -1
- package/dist/lib/local/indexer.d.ts +11 -0
- package/dist/lib/local/indexer.d.ts.map +1 -1
- package/dist/lib/local/query.d.ts.map +1 -1
- package/dist/lib/router.d.ts +10 -0
- package/dist/lib/router.d.ts.map +1 -0
- package/dist/lib/search.d.ts +1 -0
- package/dist/lib/search.d.ts.map +1 -1
- package/dist/mcp/index.js +621 -130
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/server/index.js +624 -139
- package/dist/server/serve.d.ts.map +1 -1
- package/dist/types/index.d.ts +22 -0
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/mcp/index.js
CHANGED
|
@@ -11644,7 +11644,7 @@ var require_lib2 = __commonJS((exports, module) => {
|
|
|
11644
11644
|
var require_package = __commonJS((exports, module) => {
|
|
11645
11645
|
module.exports = {
|
|
11646
11646
|
name: "@hasna/search",
|
|
11647
|
-
version: "0.0.
|
|
11647
|
+
version: "0.0.11",
|
|
11648
11648
|
description: "Unified search \u2014 local file index (find files by name/path/content/regex in ms, trigram FTS) + 12 web providers (Google, SerpAPI, Exa, Perplexity, Twitter, Reddit, YouTube, Brave, Bing, Hacker News, GitHub, arXiv) + YouTube transcription. CLI + MCP + REST API + Dashboard.",
|
|
11649
11649
|
type: "module",
|
|
11650
11650
|
main: "dist/index.js",
|
|
@@ -24742,12 +24742,19 @@ var DEFAULT_CONFIG = {
|
|
|
24742
24742
|
defaultLimit: 10,
|
|
24743
24743
|
defaultProviders: [],
|
|
24744
24744
|
defaultProfile: null,
|
|
24745
|
+
router: {
|
|
24746
|
+
enabled: false,
|
|
24747
|
+
model: "gpt-oss-120b",
|
|
24748
|
+
maxProviders: 3,
|
|
24749
|
+
timeoutMs: 1200
|
|
24750
|
+
},
|
|
24745
24751
|
transcriber: {
|
|
24746
24752
|
baseUrl: "http://localhost:19600",
|
|
24747
24753
|
fallbackCli: "microservice-transcriber"
|
|
24748
24754
|
},
|
|
24749
24755
|
dedup: true,
|
|
24750
24756
|
maxConcurrent: 5,
|
|
24757
|
+
providerTimeoutMs: 15000,
|
|
24751
24758
|
indexStaleMinutes: 5,
|
|
24752
24759
|
indexAutoRefresh: true,
|
|
24753
24760
|
recordLocalResults: false
|
|
@@ -25482,6 +25489,31 @@ var migrations = [
|
|
|
25482
25489
|
);
|
|
25483
25490
|
`);
|
|
25484
25491
|
}
|
|
25492
|
+
},
|
|
25493
|
+
{
|
|
25494
|
+
version: 2,
|
|
25495
|
+
description: "Local file index filter indexes",
|
|
25496
|
+
up: (db) => {
|
|
25497
|
+
db.exec(`
|
|
25498
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_ext ON files(root_id, ext);
|
|
25499
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_dir ON files(root_id, dir);
|
|
25500
|
+
`);
|
|
25501
|
+
}
|
|
25502
|
+
},
|
|
25503
|
+
{
|
|
25504
|
+
version: 3,
|
|
25505
|
+
description: "Local content short-token filter grams",
|
|
25506
|
+
up: (db) => {
|
|
25507
|
+
db.exec(`
|
|
25508
|
+
CREATE TABLE IF NOT EXISTS file_content_grams (
|
|
25509
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
25510
|
+
gram TEXT NOT NULL,
|
|
25511
|
+
PRIMARY KEY (file_id, gram)
|
|
25512
|
+
);
|
|
25513
|
+
CREATE INDEX IF NOT EXISTS idx_file_content_grams_gram_file
|
|
25514
|
+
ON file_content_grams(gram, file_id);
|
|
25515
|
+
`);
|
|
25516
|
+
}
|
|
25485
25517
|
}
|
|
25486
25518
|
];
|
|
25487
25519
|
function runIndexMigrations(db) {
|
|
@@ -25553,7 +25585,18 @@ function getConfig() {
|
|
|
25553
25585
|
try {
|
|
25554
25586
|
const raw = readFileSync(path, "utf-8");
|
|
25555
25587
|
const parsed = JSON.parse(raw);
|
|
25556
|
-
return {
|
|
25588
|
+
return {
|
|
25589
|
+
...DEFAULT_CONFIG,
|
|
25590
|
+
...parsed,
|
|
25591
|
+
router: {
|
|
25592
|
+
...DEFAULT_CONFIG.router,
|
|
25593
|
+
...parsed.router ?? {}
|
|
25594
|
+
},
|
|
25595
|
+
transcriber: {
|
|
25596
|
+
...DEFAULT_CONFIG.transcriber,
|
|
25597
|
+
...parsed.transcriber ?? {}
|
|
25598
|
+
}
|
|
25599
|
+
};
|
|
25557
25600
|
} catch {
|
|
25558
25601
|
return { ...DEFAULT_CONFIG };
|
|
25559
25602
|
}
|
|
@@ -25971,6 +26014,7 @@ function removeRoot(idOrPath, db) {
|
|
|
25971
26014
|
d.exec("BEGIN");
|
|
25972
26015
|
try {
|
|
25973
26016
|
d.prepare("DELETE FROM file_content_fts WHERE rowid IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
26017
|
+
d.prepare("DELETE FROM file_content_grams WHERE file_id IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
25974
26018
|
d.prepare("DELETE FROM index_roots WHERE id = ?").run(root.id);
|
|
25975
26019
|
d.exec("COMMIT");
|
|
25976
26020
|
} catch (err) {
|
|
@@ -25982,6 +26026,21 @@ function removeRoot(idOrPath, db) {
|
|
|
25982
26026
|
function shouldIndexContent(root, file) {
|
|
25983
26027
|
return root.contentIndexing && file.size > 0 && file.size <= root.maxFileSize && !hasBinaryExtension(file.ext) && !isContentExcluded(file.name);
|
|
25984
26028
|
}
|
|
26029
|
+
function contentShortGrams(body) {
|
|
26030
|
+
const grams = new Set;
|
|
26031
|
+
const words = body.toLowerCase().matchAll(/[a-z0-9_$]+/g);
|
|
26032
|
+
for (const match of words) {
|
|
26033
|
+
const word = match[0];
|
|
26034
|
+
for (let i = 0;i < word.length; i++) {
|
|
26035
|
+
grams.add(word[i]);
|
|
26036
|
+
if (i + 1 < word.length)
|
|
26037
|
+
grams.add(word.slice(i, i + 2));
|
|
26038
|
+
}
|
|
26039
|
+
if (grams.size >= 2048)
|
|
26040
|
+
break;
|
|
26041
|
+
}
|
|
26042
|
+
return [...grams];
|
|
26043
|
+
}
|
|
25985
26044
|
function indexRoot(idOrPath, opts = {}, db) {
|
|
25986
26045
|
const d = db ?? getIndexDb();
|
|
25987
26046
|
const root = getRoot(idOrPath, d);
|
|
@@ -26000,6 +26059,8 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
26000
26059
|
const deleteFile = d.prepare("DELETE FROM files WHERE id = ?");
|
|
26001
26060
|
const insertContent = d.prepare("INSERT INTO file_content_fts (rowid, body) VALUES (?, ?)");
|
|
26002
26061
|
const deleteContent = d.prepare("DELETE FROM file_content_fts WHERE rowid = ?");
|
|
26062
|
+
const insertContentGram = d.prepare("INSERT OR IGNORE INTO file_content_grams (file_id, gram) VALUES (?, ?)");
|
|
26063
|
+
const deleteContentGrams = d.prepare("DELETE FROM file_content_grams WHERE file_id = ?");
|
|
26003
26064
|
const stats = {
|
|
26004
26065
|
rootId: root.id,
|
|
26005
26066
|
added: 0,
|
|
@@ -26010,38 +26071,57 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
26010
26071
|
skippedDirs: skippedDirs.length,
|
|
26011
26072
|
durationMs: 0
|
|
26012
26073
|
};
|
|
26074
|
+
const seen = new Set;
|
|
26075
|
+
const changes = [];
|
|
26076
|
+
for (const file of scanned) {
|
|
26077
|
+
seen.add(file.relPath);
|
|
26078
|
+
const prev = existing.get(file.relPath);
|
|
26079
|
+
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
26080
|
+
if (prev && !changed && !opts.force)
|
|
26081
|
+
continue;
|
|
26082
|
+
const wantContent = shouldIndexContent(root, file);
|
|
26083
|
+
const absPath = `${root.path}/${file.relPath}`;
|
|
26084
|
+
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
26085
|
+
let body = null;
|
|
26086
|
+
if (wantContent && !isBinary) {
|
|
26087
|
+
try {
|
|
26088
|
+
body = readFileSync3(absPath, "utf-8");
|
|
26089
|
+
} catch {
|
|
26090
|
+
isBinary = true;
|
|
26091
|
+
}
|
|
26092
|
+
}
|
|
26093
|
+
changes.push({
|
|
26094
|
+
file,
|
|
26095
|
+
prev,
|
|
26096
|
+
isBinary,
|
|
26097
|
+
body,
|
|
26098
|
+
grams: body !== null ? contentShortGrams(body) : [],
|
|
26099
|
+
contentIndexed: body !== null ? 1 : 0
|
|
26100
|
+
});
|
|
26101
|
+
}
|
|
26013
26102
|
d.exec("BEGIN");
|
|
26014
26103
|
try {
|
|
26015
|
-
const
|
|
26016
|
-
for (const file of scanned) {
|
|
26017
|
-
seen.add(file.relPath);
|
|
26018
|
-
const prev = existing.get(file.relPath);
|
|
26019
|
-
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
26020
|
-
if (prev && !changed && !opts.force)
|
|
26021
|
-
continue;
|
|
26022
|
-
const wantContent = shouldIndexContent(root, file);
|
|
26023
|
-
const absPath = `${root.path}/${file.relPath}`;
|
|
26024
|
-
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
26025
|
-
let body = null;
|
|
26026
|
-
if (wantContent && !isBinary) {
|
|
26027
|
-
try {
|
|
26028
|
-
body = readFileSync3(absPath, "utf-8");
|
|
26029
|
-
} catch {
|
|
26030
|
-
isBinary = true;
|
|
26031
|
-
}
|
|
26032
|
-
}
|
|
26033
|
-
const contentIndexed = body !== null ? 1 : 0;
|
|
26104
|
+
for (const { file, prev, isBinary, body, grams, contentIndexed } of changes) {
|
|
26034
26105
|
if (prev) {
|
|
26035
|
-
if (prev.content_indexed)
|
|
26106
|
+
if (prev.content_indexed) {
|
|
26036
26107
|
deleteContent.run(prev.id);
|
|
26108
|
+
deleteContentGrams.run(prev.id);
|
|
26109
|
+
}
|
|
26037
26110
|
updateFile.run(file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now, prev.id);
|
|
26038
|
-
if (body !== null)
|
|
26111
|
+
if (body !== null) {
|
|
26039
26112
|
insertContent.run(prev.id, body);
|
|
26113
|
+
for (const gram of grams)
|
|
26114
|
+
insertContentGram.run(prev.id, gram);
|
|
26115
|
+
}
|
|
26040
26116
|
stats.updated++;
|
|
26041
26117
|
} else {
|
|
26042
26118
|
const inserted = insertFile.run(root.id, file.relPath, file.name, file.ext, file.dir, file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now);
|
|
26043
|
-
if (body !== null)
|
|
26044
|
-
|
|
26119
|
+
if (body !== null) {
|
|
26120
|
+
const fileId = Number(inserted.lastInsertRowid);
|
|
26121
|
+
insertContent.run(fileId, body);
|
|
26122
|
+
for (const gram of grams)
|
|
26123
|
+
insertContentGram.run(fileId, gram);
|
|
26124
|
+
}
|
|
26045
26125
|
stats.added++;
|
|
26046
26126
|
}
|
|
26047
26127
|
if (contentIndexed)
|
|
@@ -26050,8 +26130,10 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
26050
26130
|
for (const [relPath, row] of existing) {
|
|
26051
26131
|
if (seen.has(relPath))
|
|
26052
26132
|
continue;
|
|
26053
|
-
if (row.content_indexed)
|
|
26133
|
+
if (row.content_indexed) {
|
|
26054
26134
|
deleteContent.run(row.id);
|
|
26135
|
+
deleteContentGrams.run(row.id);
|
|
26136
|
+
}
|
|
26055
26137
|
deleteFile.run(row.id);
|
|
26056
26138
|
stats.deleted++;
|
|
26057
26139
|
}
|
|
@@ -26073,6 +26155,9 @@ function indexAllRoots(opts = {}, db) {
|
|
|
26073
26155
|
return listRoots(db).map((root) => indexRoot(root.id, opts, db));
|
|
26074
26156
|
}
|
|
26075
26157
|
var refreshing = new Set;
|
|
26158
|
+
var lastDefaultAutoRefreshCheckAt = 0;
|
|
26159
|
+
var AUTO_REFRESH_CHECK_THROTTLE_MS = 1000;
|
|
26160
|
+
var defaultRefreshScheduled = false;
|
|
26076
26161
|
function refreshStaleRoots(staleMinutes, db) {
|
|
26077
26162
|
const cutoff = Date.now() - staleMinutes * 60000;
|
|
26078
26163
|
const stats = [];
|
|
@@ -26096,8 +26181,43 @@ function autoRefreshStaleRoots(db) {
|
|
|
26096
26181
|
const config2 = getConfig();
|
|
26097
26182
|
if (!config2.indexAutoRefresh)
|
|
26098
26183
|
return [];
|
|
26184
|
+
if (!db) {
|
|
26185
|
+
const now = Date.now();
|
|
26186
|
+
if (now - lastDefaultAutoRefreshCheckAt < AUTO_REFRESH_CHECK_THROTTLE_MS)
|
|
26187
|
+
return [];
|
|
26188
|
+
lastDefaultAutoRefreshCheckAt = now;
|
|
26189
|
+
}
|
|
26099
26190
|
return refreshStaleRoots(config2.indexStaleMinutes, db);
|
|
26100
26191
|
}
|
|
26192
|
+
function scheduleAutoRefreshStaleRoots(db) {
|
|
26193
|
+
if (db)
|
|
26194
|
+
return autoRefreshStaleRoots(db);
|
|
26195
|
+
const config2 = getConfig();
|
|
26196
|
+
if (!config2.indexAutoRefresh || defaultRefreshScheduled)
|
|
26197
|
+
return [];
|
|
26198
|
+
defaultRefreshScheduled = true;
|
|
26199
|
+
const timer = setTimeout(() => {
|
|
26200
|
+
try {
|
|
26201
|
+
autoRefreshStaleRoots();
|
|
26202
|
+
} catch {} finally {
|
|
26203
|
+
defaultRefreshScheduled = false;
|
|
26204
|
+
}
|
|
26205
|
+
}, 0);
|
|
26206
|
+
timer.unref?.();
|
|
26207
|
+
return [];
|
|
26208
|
+
}
|
|
26209
|
+
function startBackgroundRefresh() {
|
|
26210
|
+
const minutes = Math.max(1, getConfig().indexStaleMinutes);
|
|
26211
|
+
const timer = setInterval(() => {
|
|
26212
|
+
try {
|
|
26213
|
+
autoRefreshStaleRoots();
|
|
26214
|
+
} catch (err) {
|
|
26215
|
+
console.error("Index refresh failed:", err);
|
|
26216
|
+
}
|
|
26217
|
+
}, minutes * 60000);
|
|
26218
|
+
timer.unref?.();
|
|
26219
|
+
return timer;
|
|
26220
|
+
}
|
|
26101
26221
|
|
|
26102
26222
|
// src/lib/local/query.ts
|
|
26103
26223
|
import { existsSync as existsSync2, readFileSync as readFileSync4 } from "fs";
|
|
@@ -26324,6 +26444,9 @@ function compileSearchRegex(pattern, caseSensitive = false) {
|
|
|
26324
26444
|
// src/lib/local/query.ts
|
|
26325
26445
|
var MAX_LINE_LENGTH = 200;
|
|
26326
26446
|
var MAX_MATCHES_PER_FILE = 5;
|
|
26447
|
+
var MAX_PATH_CANDIDATES = 20000;
|
|
26448
|
+
var MAX_CONTENT_CANDIDATES = 50000;
|
|
26449
|
+
var MAX_REGEX_CANDIDATES = 50000;
|
|
26327
26450
|
function tokenize(query) {
|
|
26328
26451
|
return query.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "").split(/\s+/).filter(Boolean);
|
|
26329
26452
|
}
|
|
@@ -26354,11 +26477,40 @@ function filterClauses(opts, db) {
|
|
|
26354
26477
|
}
|
|
26355
26478
|
if (opts.dir) {
|
|
26356
26479
|
clauses.push("f.dir LIKE ? ESCAPE '\\'");
|
|
26357
|
-
const dir = opts.dir.replace(/^\/|\/$/g, "")
|
|
26480
|
+
const dir = escapeLike(opts.dir.replace(/^\/|\/$/g, ""));
|
|
26358
26481
|
params.push(`%${dir}%`);
|
|
26359
26482
|
}
|
|
26360
26483
|
return { sql: clauses.length > 0 ? ` AND ${clauses.join(" AND ")}` : "", params };
|
|
26361
26484
|
}
|
|
26485
|
+
function escapeLike(value) {
|
|
26486
|
+
return value.replace(/[\\%_]/g, "\\$&");
|
|
26487
|
+
}
|
|
26488
|
+
function shortTokenClauses(tokens) {
|
|
26489
|
+
if (tokens.length === 0)
|
|
26490
|
+
return { sql: "", params: [] };
|
|
26491
|
+
return {
|
|
26492
|
+
sql: ` AND ${tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ")}`,
|
|
26493
|
+
params: tokens.map((token) => `%${escapeLike(token)}%`)
|
|
26494
|
+
};
|
|
26495
|
+
}
|
|
26496
|
+
function contentGramClauses(tokens) {
|
|
26497
|
+
const gramTokens = tokens.filter((token) => /^[a-z0-9_$]{1,2}$/.test(token));
|
|
26498
|
+
if (gramTokens.length === 0)
|
|
26499
|
+
return { sql: "", params: [] };
|
|
26500
|
+
return {
|
|
26501
|
+
sql: gramTokens.map((_token, index) => ` AND (
|
|
26502
|
+
NOT EXISTS (
|
|
26503
|
+
SELECT 1 FROM file_content_grams cg_any_${index}
|
|
26504
|
+
WHERE cg_any_${index}.file_id = f.id
|
|
26505
|
+
)
|
|
26506
|
+
OR EXISTS (
|
|
26507
|
+
SELECT 1 FROM file_content_grams cg_${index}
|
|
26508
|
+
WHERE cg_${index}.file_id = f.id AND cg_${index}.gram = ?
|
|
26509
|
+
)
|
|
26510
|
+
)`).join(""),
|
|
26511
|
+
params: gramTokens
|
|
26512
|
+
};
|
|
26513
|
+
}
|
|
26362
26514
|
function rowToHit(row, score) {
|
|
26363
26515
|
return {
|
|
26364
26516
|
rootId: row.root_id,
|
|
@@ -26428,6 +26580,8 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
26428
26580
|
return [];
|
|
26429
26581
|
const ftsQuery = buildFtsQuery(query);
|
|
26430
26582
|
const filters = filterClauses(opts, d);
|
|
26583
|
+
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
26584
|
+
const shortFilters = shortTokenClauses(shortTokens);
|
|
26431
26585
|
const candidateLimit = Math.max(200, limit * 10);
|
|
26432
26586
|
let rows;
|
|
26433
26587
|
if (ftsQuery) {
|
|
@@ -26435,16 +26589,16 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
26435
26589
|
FROM files_fts fts
|
|
26436
26590
|
JOIN files f ON f.id = fts.rowid
|
|
26437
26591
|
JOIN index_roots r ON r.id = f.root_id
|
|
26438
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
26592
|
+
WHERE files_fts MATCH ?${filters.sql}${shortFilters.sql}
|
|
26439
26593
|
ORDER BY bm25(files_fts, 10.0, 1.0)
|
|
26440
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, candidateLimit);
|
|
26441
|
-
const namePattern = `${query.trim()
|
|
26594
|
+
LIMIT ?`).all(ftsQuery, ...filters.params, ...shortFilters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
26595
|
+
const namePattern = `${escapeLike(query.trim())}%`;
|
|
26442
26596
|
const nameRows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26443
26597
|
FROM files f
|
|
26444
26598
|
JOIN index_roots r ON r.id = f.root_id
|
|
26445
|
-
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}
|
|
26599
|
+
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}${shortFilters.sql}
|
|
26446
26600
|
ORDER BY length(f.name)
|
|
26447
|
-
LIMIT 100`).all(namePattern, ...filters.params);
|
|
26601
|
+
LIMIT 100`).all(namePattern, ...filters.params, ...shortFilters.params);
|
|
26448
26602
|
const seen = new Set(rows.map((row) => row.id));
|
|
26449
26603
|
for (const row of nameRows) {
|
|
26450
26604
|
if (!seen.has(row.id))
|
|
@@ -26452,14 +26606,14 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
26452
26606
|
}
|
|
26453
26607
|
} else {
|
|
26454
26608
|
const likeClauses = tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ");
|
|
26455
|
-
const likeParams = tokens.map((t) => `%${t
|
|
26609
|
+
const likeParams = tokens.map((t) => `%${escapeLike(t)}%`);
|
|
26456
26610
|
rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26457
26611
|
FROM files f
|
|
26458
26612
|
JOIN index_roots r ON r.id = f.root_id
|
|
26459
26613
|
WHERE ${likeClauses}${filters.sql}
|
|
26460
|
-
|
|
26614
|
+
ORDER BY length(f.name), length(f.rel_path), f.rel_path
|
|
26615
|
+
LIMIT ?`).all(...likeParams, ...filters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
26461
26616
|
}
|
|
26462
|
-
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
26463
26617
|
const filtered = shortTokens.length > 0 ? rows.filter((row) => shortTokens.every((t) => row.rel_path.toLowerCase().includes(t))) : rows;
|
|
26464
26618
|
return filtered.map((row) => rowToHit(row, scoreFileName(query, tokens, row))).sort((a, b) => b.score - a.score).filter((hit) => existsSync2(hit.absPath)).slice(0, limit);
|
|
26465
26619
|
}
|
|
@@ -26497,24 +26651,31 @@ function searchFilePathsRegex(pattern, opts = {}, db) {
|
|
|
26497
26651
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'handle.*Click', not '\\w+').");
|
|
26498
26652
|
}
|
|
26499
26653
|
const filters = filterClauses(opts, d);
|
|
26500
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26501
|
-
FROM files_fts fts
|
|
26502
|
-
JOIN files f ON f.id = fts.rowid
|
|
26503
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
26504
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
26505
|
-
ORDER BY fts.rank
|
|
26506
|
-
LIMIT 5000`).all(ftsQuery, ...filters.params);
|
|
26507
26654
|
const hits = [];
|
|
26508
|
-
|
|
26509
|
-
|
|
26510
|
-
|
|
26511
|
-
|
|
26512
|
-
|
|
26513
|
-
|
|
26514
|
-
|
|
26515
|
-
|
|
26516
|
-
|
|
26517
|
-
if (
|
|
26655
|
+
const pageSize = Math.max(500, limit * 20);
|
|
26656
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
26657
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26658
|
+
FROM files_fts fts
|
|
26659
|
+
JOIN files f ON f.id = fts.rowid
|
|
26660
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
26661
|
+
WHERE files_fts MATCH ?${filters.sql}
|
|
26662
|
+
ORDER BY fts.rank
|
|
26663
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
26664
|
+
if (rows.length === 0)
|
|
26665
|
+
break;
|
|
26666
|
+
for (const row of rows) {
|
|
26667
|
+
if (!regex.test(row.rel_path) && !regex.test(row.name))
|
|
26668
|
+
continue;
|
|
26669
|
+
const depth = row.rel_path.split("/").length - 1;
|
|
26670
|
+
const score = Math.max(0.05, 0.6 - depth * 0.02);
|
|
26671
|
+
const hit = rowToHit(row, score);
|
|
26672
|
+
if (!existsSync2(hit.absPath))
|
|
26673
|
+
continue;
|
|
26674
|
+
hits.push(hit);
|
|
26675
|
+
if (hits.length >= limit)
|
|
26676
|
+
break;
|
|
26677
|
+
}
|
|
26678
|
+
if (rows.length < pageSize)
|
|
26518
26679
|
break;
|
|
26519
26680
|
}
|
|
26520
26681
|
return hits;
|
|
@@ -26528,40 +26689,48 @@ function searchFileContentRegex(pattern, opts = {}, db) {
|
|
|
26528
26689
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'export.*function', not '\\d+').");
|
|
26529
26690
|
}
|
|
26530
26691
|
const filters = filterClauses(opts, d);
|
|
26531
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26532
|
-
FROM file_content_fts fts
|
|
26533
|
-
JOIN files f ON f.id = fts.rowid
|
|
26534
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
26535
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
26536
|
-
ORDER BY fts.rank
|
|
26537
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(200, limit * 10));
|
|
26538
26692
|
const hits = [];
|
|
26539
|
-
|
|
26540
|
-
|
|
26541
|
-
const
|
|
26542
|
-
|
|
26543
|
-
|
|
26544
|
-
|
|
26545
|
-
|
|
26546
|
-
|
|
26547
|
-
|
|
26548
|
-
|
|
26693
|
+
const pageSize = Math.max(200, limit * 10);
|
|
26694
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
26695
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26696
|
+
FROM file_content_fts fts
|
|
26697
|
+
JOIN files f ON f.id = fts.rowid
|
|
26698
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
26699
|
+
WHERE file_content_fts MATCH ?${filters.sql}
|
|
26700
|
+
ORDER BY fts.rank
|
|
26701
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
26702
|
+
if (rows.length === 0)
|
|
26703
|
+
break;
|
|
26704
|
+
for (let i = 0;i < rows.length && hits.length < limit; i++) {
|
|
26705
|
+
const row = rows[i];
|
|
26706
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
26707
|
+
let content;
|
|
26708
|
+
try {
|
|
26709
|
+
content = readFileSync4(absPath, "utf-8");
|
|
26710
|
+
} catch {
|
|
26711
|
+
continue;
|
|
26712
|
+
}
|
|
26713
|
+
const lines = content.split(`
|
|
26549
26714
|
`);
|
|
26550
|
-
|
|
26551
|
-
|
|
26552
|
-
|
|
26553
|
-
|
|
26715
|
+
const matches = [];
|
|
26716
|
+
for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
|
|
26717
|
+
if (regex.test(lines[n])) {
|
|
26718
|
+
matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
|
|
26719
|
+
}
|
|
26554
26720
|
}
|
|
26721
|
+
if (matches.length === 0)
|
|
26722
|
+
continue;
|
|
26723
|
+
const rankIndex = offset + i;
|
|
26724
|
+
const score = Math.max(0.25, 0.65 - rankIndex * 0.05);
|
|
26725
|
+
hits.push({
|
|
26726
|
+
...rowToHit(row, score),
|
|
26727
|
+
line: matches[0].line,
|
|
26728
|
+
lineText: matches[0].text,
|
|
26729
|
+
matches
|
|
26730
|
+
});
|
|
26555
26731
|
}
|
|
26556
|
-
if (
|
|
26557
|
-
|
|
26558
|
-
const score = Math.max(0.25, 0.65 - i * 0.05);
|
|
26559
|
-
hits.push({
|
|
26560
|
-
...rowToHit(row, score),
|
|
26561
|
-
line: matches[0].line,
|
|
26562
|
-
lineText: matches[0].text,
|
|
26563
|
-
matches
|
|
26564
|
-
});
|
|
26732
|
+
if (rows.length < pageSize)
|
|
26733
|
+
break;
|
|
26565
26734
|
}
|
|
26566
26735
|
return hits;
|
|
26567
26736
|
}
|
|
@@ -26572,42 +26741,51 @@ function searchFileContent(query, opts = {}, db) {
|
|
|
26572
26741
|
if (!ftsQuery)
|
|
26573
26742
|
return [];
|
|
26574
26743
|
const filters = filterClauses(opts, d);
|
|
26575
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26576
|
-
FROM file_content_fts fts
|
|
26577
|
-
JOIN files f ON f.id = fts.rowid
|
|
26578
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
26579
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
26580
|
-
ORDER BY fts.rank
|
|
26581
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(50, limit * 3));
|
|
26582
26744
|
const tokens = tokenize(query);
|
|
26583
26745
|
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
26746
|
+
const gramFilters = contentGramClauses(shortTokens);
|
|
26584
26747
|
const scored = [];
|
|
26585
|
-
|
|
26586
|
-
|
|
26587
|
-
const
|
|
26588
|
-
|
|
26589
|
-
|
|
26590
|
-
|
|
26591
|
-
|
|
26592
|
-
|
|
26593
|
-
|
|
26594
|
-
if (
|
|
26595
|
-
|
|
26596
|
-
|
|
26748
|
+
const pageSize = Math.max(50, limit * 3);
|
|
26749
|
+
for (let offset = 0;scored.length < limit * 2 && offset < MAX_CONTENT_CANDIDATES; offset += pageSize) {
|
|
26750
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
26751
|
+
FROM file_content_fts fts
|
|
26752
|
+
JOIN files f ON f.id = fts.rowid
|
|
26753
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
26754
|
+
WHERE file_content_fts MATCH ?${filters.sql}${gramFilters.sql}
|
|
26755
|
+
ORDER BY fts.rank
|
|
26756
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, ...gramFilters.params, pageSize, offset);
|
|
26757
|
+
if (rows.length === 0)
|
|
26758
|
+
break;
|
|
26759
|
+
for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
|
|
26760
|
+
const row = rows[i];
|
|
26761
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
26762
|
+
let content;
|
|
26763
|
+
try {
|
|
26764
|
+
content = readFileSync4(absPath, "utf-8");
|
|
26765
|
+
} catch {
|
|
26766
|
+
continue;
|
|
26767
|
+
}
|
|
26768
|
+
if (shortTokens.length > 0) {
|
|
26769
|
+
const lower = content.toLowerCase();
|
|
26770
|
+
if (!shortTokens.every((t) => lower.includes(t)))
|
|
26771
|
+
continue;
|
|
26772
|
+
}
|
|
26773
|
+
const { matches, tier } = findLineMatches(content, query, tokens);
|
|
26774
|
+
if (matches.length === 0)
|
|
26597
26775
|
continue;
|
|
26776
|
+
const rankIndex = offset + i;
|
|
26777
|
+
const base = Math.max(0.25, 0.55 - rankIndex * 0.04);
|
|
26778
|
+
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
26779
|
+
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
26780
|
+
scored.push({
|
|
26781
|
+
...rowToHit(row, score),
|
|
26782
|
+
line: matches[0].line,
|
|
26783
|
+
lineText: matches[0].text,
|
|
26784
|
+
matches
|
|
26785
|
+
});
|
|
26598
26786
|
}
|
|
26599
|
-
|
|
26600
|
-
|
|
26601
|
-
continue;
|
|
26602
|
-
const base = Math.max(0.25, 0.55 - i * 0.04);
|
|
26603
|
-
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
26604
|
-
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
26605
|
-
scored.push({
|
|
26606
|
-
...rowToHit(row, score),
|
|
26607
|
-
line: matches[0].line,
|
|
26608
|
-
lineText: matches[0].text,
|
|
26609
|
-
matches
|
|
26610
|
-
});
|
|
26787
|
+
if (rows.length < pageSize)
|
|
26788
|
+
break;
|
|
26611
26789
|
}
|
|
26612
26790
|
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
26613
26791
|
}
|
|
@@ -26620,7 +26798,7 @@ class FilesProvider {
|
|
|
26620
26798
|
return hasReadyRoot();
|
|
26621
26799
|
}
|
|
26622
26800
|
async search(query, options) {
|
|
26623
|
-
|
|
26801
|
+
scheduleAutoRefreshStaleRoots();
|
|
26624
26802
|
const hits = searchFilePaths(query, { limit: options?.limit ?? 10 });
|
|
26625
26803
|
return hits.map((hit) => ({
|
|
26626
26804
|
title: hit.name,
|
|
@@ -26648,7 +26826,7 @@ class ContentProvider {
|
|
|
26648
26826
|
return hasReadyRoot();
|
|
26649
26827
|
}
|
|
26650
26828
|
async search(query, options) {
|
|
26651
|
-
|
|
26829
|
+
scheduleAutoRefreshStaleRoots();
|
|
26652
26830
|
const hits = searchFileContent(query, { limit: options?.limit ?? 10 });
|
|
26653
26831
|
return hits.map((hit) => ({
|
|
26654
26832
|
title: hit.name,
|
|
@@ -26775,7 +26953,7 @@ function rowToSearch(row) {
|
|
|
26775
26953
|
}
|
|
26776
26954
|
function createSearch(data, db) {
|
|
26777
26955
|
const d = db ?? getDb();
|
|
26778
|
-
const id = generateId();
|
|
26956
|
+
const id = data.id ?? generateId();
|
|
26779
26957
|
const now = new Date().toISOString();
|
|
26780
26958
|
d.prepare(`INSERT INTO searches (id, query, providers, profile_id, result_count, duration, created_at)
|
|
26781
26959
|
VALUES (?, ?, ?, ?, ?, ?, ?)`).run(id, data.query, JSON.stringify(data.providers), data.profileId ?? null, data.resultCount ?? 0, data.duration ?? 0, now);
|
|
@@ -26857,7 +27035,7 @@ function createResults(results, db) {
|
|
|
26857
27035
|
d.exec("BEGIN");
|
|
26858
27036
|
try {
|
|
26859
27037
|
for (const data of results) {
|
|
26860
|
-
const id = generateId();
|
|
27038
|
+
const id = data.id ?? generateId();
|
|
26861
27039
|
stmt.run(id, data.searchId, data.title, data.url, data.snippet, data.source, data.provider, data.rank, data.score ?? null, data.publishedAt ?? null, data.thumbnail ?? null, JSON.stringify(data.metadata ?? {}), now);
|
|
26862
27040
|
created.push({
|
|
26863
27041
|
id,
|
|
@@ -27014,13 +27192,300 @@ function isProviderConfigured(provider) {
|
|
|
27014
27192
|
return !!Bun.env[provider.apiKeyEnv];
|
|
27015
27193
|
}
|
|
27016
27194
|
|
|
27195
|
+
// src/lib/router.ts
|
|
27196
|
+
var PROVIDER_DESCRIPTIONS = {
|
|
27197
|
+
files: "Local file names and paths. Best for known filenames, path fragments, extensions, and repo navigation.",
|
|
27198
|
+
content: "Local indexed file contents. Best for code symbols, exact phrases, docs, snippets, and grep-style discovery.",
|
|
27199
|
+
google: "General web search through SerpAPI. Best for broad web coverage and current public pages.",
|
|
27200
|
+
serpapi: "SerpAPI multi-engine web search. Best for general web queries when Google-style results are desired.",
|
|
27201
|
+
exa: "Neural/semantic web search. Best for research, conceptual queries, docs, and high-relevance pages.",
|
|
27202
|
+
perplexity: "Answer-oriented web research with citations. Best for synthesized factual questions and research summaries.",
|
|
27203
|
+
brave: "General independent web search. Best for current web, news-like, product, and navigational queries.",
|
|
27204
|
+
bing: "General web search. Best for current web and Microsoft/Bing-indexed pages.",
|
|
27205
|
+
twitter: "X/Twitter search. Best for tweets, social reactions, breaking discourse, and people posting updates.",
|
|
27206
|
+
reddit: "Reddit search. Best for opinions, product experiences, troubleshooting threads, and community recommendations.",
|
|
27207
|
+
youtube: "YouTube search. Best for videos, tutorials, talks, demos, and channels.",
|
|
27208
|
+
hackernews: "Hacker News search. Best for startup, programming, launch, and technical discussion threads.",
|
|
27209
|
+
github: "GitHub code and repository search. Best for open-source repos, code examples, packages, and implementation details.",
|
|
27210
|
+
arxiv: "arXiv academic search. Best for papers, preprints, ML/AI/math/physics research, and scholarly topics."
|
|
27211
|
+
};
|
|
27212
|
+
function clampMaxProviders(value) {
|
|
27213
|
+
if (value === undefined || !Number.isFinite(value))
|
|
27214
|
+
return 3;
|
|
27215
|
+
return Math.max(1, Math.min(5, Math.floor(value)));
|
|
27216
|
+
}
|
|
27217
|
+
function clampConfidence(value) {
|
|
27218
|
+
return typeof value === "number" && Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.5;
|
|
27219
|
+
}
|
|
27220
|
+
function normalizeCandidates(candidates) {
|
|
27221
|
+
const allowed = new Set(PROVIDER_NAMES);
|
|
27222
|
+
const seen = new Set;
|
|
27223
|
+
const normalized = [];
|
|
27224
|
+
for (const candidate of candidates) {
|
|
27225
|
+
if (!allowed.has(candidate) || seen.has(candidate))
|
|
27226
|
+
continue;
|
|
27227
|
+
seen.add(candidate);
|
|
27228
|
+
normalized.push(candidate);
|
|
27229
|
+
}
|
|
27230
|
+
return normalized;
|
|
27231
|
+
}
|
|
27232
|
+
function addScore(scores, candidateSet, provider, amount) {
|
|
27233
|
+
if (!candidateSet.has(provider))
|
|
27234
|
+
return;
|
|
27235
|
+
scores.set(provider, (scores.get(provider) ?? 0) + amount);
|
|
27236
|
+
}
|
|
27237
|
+
function hasAny(query, patterns) {
|
|
27238
|
+
return patterns.some((pattern) => pattern.test(query));
|
|
27239
|
+
}
|
|
27240
|
+
function routeSearchProvidersHeuristic(query, candidates, options = {}) {
|
|
27241
|
+
const normalized = normalizeCandidates(candidates);
|
|
27242
|
+
const maxProviders = clampMaxProviders(options.maxProviders);
|
|
27243
|
+
if (normalized.length === 0) {
|
|
27244
|
+
return {
|
|
27245
|
+
strategy: "heuristic",
|
|
27246
|
+
selectedProviders: [],
|
|
27247
|
+
candidates: [],
|
|
27248
|
+
reason: "No configured providers were available to route.",
|
|
27249
|
+
confidence: 0
|
|
27250
|
+
};
|
|
27251
|
+
}
|
|
27252
|
+
const candidateSet = new Set(normalized);
|
|
27253
|
+
const scores = new Map;
|
|
27254
|
+
const reasons = [];
|
|
27255
|
+
const q = query.trim().toLowerCase();
|
|
27256
|
+
for (const candidate of normalized)
|
|
27257
|
+
scores.set(candidate, 0.05);
|
|
27258
|
+
if (hasAny(q, [
|
|
27259
|
+
/\b(file|filename|path|folder|directory|repo|workspace)\b/,
|
|
27260
|
+
/(^|[/\s])[\w.-]+\.(ts|tsx|js|jsx|py|rs|go|md|json|yaml|yml|css|html)\b/
|
|
27261
|
+
])) {
|
|
27262
|
+
addScore(scores, candidateSet, "files", 5);
|
|
27263
|
+
addScore(scores, candidateSet, "content", 3);
|
|
27264
|
+
reasons.push("query looks local-file oriented");
|
|
27265
|
+
}
|
|
27266
|
+
if (hasAny(q, [
|
|
27267
|
+
/\b(function|class|interface|type|const|import|export|error|stack|symbol|grep|regex)\b/,
|
|
27268
|
+
/[A-Za-z_$][\w$]*\([^)]*\)/,
|
|
27269
|
+
/[A-Za-z_$][\w$]*::[A-Za-z_$]/
|
|
27270
|
+
])) {
|
|
27271
|
+
addScore(scores, candidateSet, "content", 5);
|
|
27272
|
+
addScore(scores, candidateSet, "files", 2);
|
|
27273
|
+
addScore(scores, candidateSet, "github", 1.5);
|
|
27274
|
+
reasons.push("query contains code/content lookup signals");
|
|
27275
|
+
}
|
|
27276
|
+
if (hasAny(q, [/\b(paper|papers|arxiv|preprint|doi|citation|survey|benchmark|research)\b/])) {
|
|
27277
|
+
addScore(scores, candidateSet, "arxiv", 5);
|
|
27278
|
+
addScore(scores, candidateSet, "exa", 3);
|
|
27279
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
27280
|
+
reasons.push("query asks for scholarly or research material");
|
|
27281
|
+
}
|
|
27282
|
+
if (hasAny(q, [/\b(github|repo|repository|source code|open source|package|library|sdk|api example)\b/])) {
|
|
27283
|
+
addScore(scores, candidateSet, "github", 5);
|
|
27284
|
+
addScore(scores, candidateSet, "exa", 2);
|
|
27285
|
+
reasons.push("query asks for code or repository material");
|
|
27286
|
+
}
|
|
27287
|
+
if (hasAny(q, [/\b(video|youtube|tutorial|demo|talk|lecture|channel)\b/])) {
|
|
27288
|
+
addScore(scores, candidateSet, "youtube", 5);
|
|
27289
|
+
reasons.push("query asks for video material");
|
|
27290
|
+
}
|
|
27291
|
+
if (hasAny(q, [/\b(reddit|subreddit|opinion|experience|reviews?|worth it|recommendations?)\b/])) {
|
|
27292
|
+
addScore(scores, candidateSet, "reddit", 5);
|
|
27293
|
+
addScore(scores, candidateSet, "hackernews", 1.5);
|
|
27294
|
+
reasons.push("query asks for community discussion");
|
|
27295
|
+
}
|
|
27296
|
+
if (hasAny(q, [/\b(hacker news|hn|show hn|launch|startup)\b/])) {
|
|
27297
|
+
addScore(scores, candidateSet, "hackernews", 5);
|
|
27298
|
+
reasons.push("query asks for Hacker News style discussion");
|
|
27299
|
+
}
|
|
27300
|
+
if (hasAny(q, [/\b(twitter|tweet|tweets|x\.com|social reaction|trending)\b/])) {
|
|
27301
|
+
addScore(scores, candidateSet, "twitter", 5);
|
|
27302
|
+
reasons.push("query asks for social posts");
|
|
27303
|
+
}
|
|
27304
|
+
if (hasAny(q, [/\b(latest|today|yesterday|news|current|2025|2026|price|release|launched)\b/])) {
|
|
27305
|
+
addScore(scores, candidateSet, "brave", 3);
|
|
27306
|
+
addScore(scores, candidateSet, "bing", 2.5);
|
|
27307
|
+
addScore(scores, candidateSet, "google", 2.5);
|
|
27308
|
+
addScore(scores, candidateSet, "serpapi", 2);
|
|
27309
|
+
reasons.push("query appears time-sensitive");
|
|
27310
|
+
}
|
|
27311
|
+
if (reasons.length === 0) {
|
|
27312
|
+
addScore(scores, candidateSet, "exa", 2.5);
|
|
27313
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
27314
|
+
addScore(scores, candidateSet, "brave", 1.5);
|
|
27315
|
+
addScore(scores, candidateSet, "google", 1.5);
|
|
27316
|
+
addScore(scores, candidateSet, "hackernews", 0.75);
|
|
27317
|
+
reasons.push("general query fallback");
|
|
27318
|
+
}
|
|
27319
|
+
const selectedProviders = [...scores.entries()].sort((a, b) => b[1] - a[1] || normalized.indexOf(a[0]) - normalized.indexOf(b[0])).slice(0, Math.min(maxProviders, normalized.length)).map(([provider]) => provider);
|
|
27320
|
+
const topScore = scores.get(selectedProviders[0]) ?? 0;
|
|
27321
|
+
const confidence = Math.max(0.35, Math.min(0.9, topScore / 6));
|
|
27322
|
+
return {
|
|
27323
|
+
strategy: "heuristic",
|
|
27324
|
+
selectedProviders,
|
|
27325
|
+
candidates: normalized,
|
|
27326
|
+
reason: reasons.join("; "),
|
|
27327
|
+
confidence
|
|
27328
|
+
};
|
|
27329
|
+
}
|
|
27330
|
+
function routerSchema(candidates, maxProviders) {
|
|
27331
|
+
return {
|
|
27332
|
+
type: "object",
|
|
27333
|
+
properties: {
|
|
27334
|
+
selectedProviders: {
|
|
27335
|
+
type: "array",
|
|
27336
|
+
items: { type: "string", enum: candidates },
|
|
27337
|
+
minItems: 1,
|
|
27338
|
+
maxItems: maxProviders
|
|
27339
|
+
},
|
|
27340
|
+
reason: { type: "string" },
|
|
27341
|
+
confidence: { type: "number", minimum: 0, maximum: 1 }
|
|
27342
|
+
},
|
|
27343
|
+
required: ["selectedProviders", "reason", "confidence"],
|
|
27344
|
+
additionalProperties: false
|
|
27345
|
+
};
|
|
27346
|
+
}
|
|
27347
|
+
function parseCerebrasRouting(raw, candidates, maxProviders) {
|
|
27348
|
+
let parsed;
|
|
27349
|
+
try {
|
|
27350
|
+
parsed = JSON.parse(raw);
|
|
27351
|
+
} catch {
|
|
27352
|
+
return null;
|
|
27353
|
+
}
|
|
27354
|
+
if (!Array.isArray(parsed.selectedProviders))
|
|
27355
|
+
return null;
|
|
27356
|
+
const candidateSet = new Set(candidates);
|
|
27357
|
+
const selectedProviders = parsed.selectedProviders.filter((provider) => typeof provider === "string" && candidateSet.has(provider)).slice(0, maxProviders);
|
|
27358
|
+
if (selectedProviders.length === 0)
|
|
27359
|
+
return null;
|
|
27360
|
+
return {
|
|
27361
|
+
selectedProviders,
|
|
27362
|
+
reason: typeof parsed.reason === "string" ? parsed.reason : "Cerebras router selected providers.",
|
|
27363
|
+
confidence: clampConfidence(parsed.confidence)
|
|
27364
|
+
};
|
|
27365
|
+
}
|
|
27366
|
+
async function routeWithCerebras(query, candidates, options) {
|
|
27367
|
+
const apiKey = Bun.env.CEREBRAS_API_KEY;
|
|
27368
|
+
if (!apiKey) {
|
|
27369
|
+
return {
|
|
27370
|
+
...routeSearchProvidersHeuristic(query, candidates, options),
|
|
27371
|
+
error: "CEREBRAS_API_KEY is not configured; used heuristic routing."
|
|
27372
|
+
};
|
|
27373
|
+
}
|
|
27374
|
+
const providerGuide = candidates.map((name) => ({
|
|
27375
|
+
name,
|
|
27376
|
+
description: PROVIDER_DESCRIPTIONS[name]
|
|
27377
|
+
}));
|
|
27378
|
+
const res = await fetch("https://api.cerebras.ai/v1/chat/completions", {
|
|
27379
|
+
method: "POST",
|
|
27380
|
+
signal: AbortSignal.timeout(options.timeoutMs),
|
|
27381
|
+
headers: {
|
|
27382
|
+
"Content-Type": "application/json",
|
|
27383
|
+
Authorization: `Bearer ${apiKey}`
|
|
27384
|
+
},
|
|
27385
|
+
body: JSON.stringify({
|
|
27386
|
+
model: options.model,
|
|
27387
|
+
temperature: 0,
|
|
27388
|
+
messages: [
|
|
27389
|
+
{
|
|
27390
|
+
role: "system",
|
|
27391
|
+
content: "You route a search query to the smallest useful set of available search providers. Select only listed providers. Prefer local providers for local files/code in the indexed workspace. Prefer scholarly, code, video, social, or web providers when the query clearly asks for those domains."
|
|
27392
|
+
},
|
|
27393
|
+
{
|
|
27394
|
+
role: "user",
|
|
27395
|
+
content: JSON.stringify({
|
|
27396
|
+
query,
|
|
27397
|
+
maxProviders: options.maxProviders,
|
|
27398
|
+
providers: providerGuide
|
|
27399
|
+
})
|
|
27400
|
+
}
|
|
27401
|
+
],
|
|
27402
|
+
response_format: {
|
|
27403
|
+
type: "json_schema",
|
|
27404
|
+
json_schema: {
|
|
27405
|
+
name: "search_router",
|
|
27406
|
+
strict: true,
|
|
27407
|
+
schema: routerSchema(candidates, options.maxProviders)
|
|
27408
|
+
}
|
|
27409
|
+
}
|
|
27410
|
+
})
|
|
27411
|
+
});
|
|
27412
|
+
if (!res.ok) {
|
|
27413
|
+
throw new Error(`Cerebras router error: ${res.status} ${res.statusText}`);
|
|
27414
|
+
}
|
|
27415
|
+
const data = await res.json();
|
|
27416
|
+
const content = data.choices?.[0]?.message?.content;
|
|
27417
|
+
if (!content)
|
|
27418
|
+
throw new Error("Cerebras router returned no content");
|
|
27419
|
+
const parsed = parseCerebrasRouting(content, candidates, options.maxProviders);
|
|
27420
|
+
if (!parsed)
|
|
27421
|
+
throw new Error("Cerebras router returned invalid provider selection");
|
|
27422
|
+
return {
|
|
27423
|
+
strategy: "cerebras",
|
|
27424
|
+
candidates,
|
|
27425
|
+
...parsed
|
|
27426
|
+
};
|
|
27427
|
+
}
|
|
27428
|
+
async function routeSearchProviders(query, candidates, options = {}) {
|
|
27429
|
+
const normalized = normalizeCandidates(candidates);
|
|
27430
|
+
const maxProviders = Math.min(clampMaxProviders(options.maxProviders), Math.max(1, normalized.length));
|
|
27431
|
+
const timeoutMs = options.timeoutMs && Number.isFinite(options.timeoutMs) ? Math.max(250, Math.floor(options.timeoutMs)) : 1200;
|
|
27432
|
+
const model = options.model ?? Bun.env.CEREBRAS_MODEL ?? "gpt-oss-120b";
|
|
27433
|
+
if (normalized.length === 0) {
|
|
27434
|
+
return routeSearchProvidersHeuristic(query, normalized, { maxProviders });
|
|
27435
|
+
}
|
|
27436
|
+
try {
|
|
27437
|
+
return await routeWithCerebras(query, normalized, { maxProviders, timeoutMs, model });
|
|
27438
|
+
} catch (err) {
|
|
27439
|
+
return {
|
|
27440
|
+
...routeSearchProvidersHeuristic(query, normalized, { maxProviders }),
|
|
27441
|
+
error: err instanceof Error ? err.message : String(err)
|
|
27442
|
+
};
|
|
27443
|
+
}
|
|
27444
|
+
}
|
|
27445
|
+
|
|
27017
27446
|
// src/lib/search.ts
|
|
27447
|
+
async function withTimeout(promise2, timeoutMs, label) {
|
|
27448
|
+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
|
|
27449
|
+
return promise2;
|
|
27450
|
+
let timer;
|
|
27451
|
+
try {
|
|
27452
|
+
return await Promise.race([
|
|
27453
|
+
promise2,
|
|
27454
|
+
new Promise((_resolve, reject) => {
|
|
27455
|
+
timer = setTimeout(() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
27456
|
+
timer.unref?.();
|
|
27457
|
+
})
|
|
27458
|
+
]);
|
|
27459
|
+
} finally {
|
|
27460
|
+
if (timer)
|
|
27461
|
+
clearTimeout(timer);
|
|
27462
|
+
}
|
|
27463
|
+
}
|
|
27464
|
+
async function allSettledLimited(items, concurrency, task) {
|
|
27465
|
+
const results = new Array(items.length);
|
|
27466
|
+
let next = 0;
|
|
27467
|
+
async function worker() {
|
|
27468
|
+
while (next < items.length) {
|
|
27469
|
+
const index = next++;
|
|
27470
|
+
const item = items[index];
|
|
27471
|
+
try {
|
|
27472
|
+
results[index] = { status: "fulfilled", value: await task(item) };
|
|
27473
|
+
} catch (reason) {
|
|
27474
|
+
results[index] = { status: "rejected", reason };
|
|
27475
|
+
}
|
|
27476
|
+
}
|
|
27477
|
+
}
|
|
27478
|
+
const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length);
|
|
27479
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
27480
|
+
return results;
|
|
27481
|
+
}
|
|
27018
27482
|
async function unifiedSearch(query, opts = {}) {
|
|
27019
27483
|
const config2 = getConfig();
|
|
27020
27484
|
const startTime = Date.now();
|
|
27021
27485
|
const db = opts.db;
|
|
27022
27486
|
let providerNames = opts.providers ?? [];
|
|
27023
|
-
|
|
27487
|
+
const smartProfile = opts.profile === "smart";
|
|
27488
|
+
if (opts.profile && !smartProfile) {
|
|
27024
27489
|
const profile = getProfileByName(opts.profile, db);
|
|
27025
27490
|
if (profile) {
|
|
27026
27491
|
providerNames = profile.providers;
|
|
@@ -27036,7 +27501,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
27036
27501
|
}
|
|
27037
27502
|
const errors4 = [];
|
|
27038
27503
|
const explicitRequest = (opts.providers?.length ?? 0) > 0 || Boolean(opts.profile);
|
|
27039
|
-
|
|
27504
|
+
let activeProviders = providerNames.filter((name) => {
|
|
27040
27505
|
try {
|
|
27041
27506
|
if (getProvider(name).isConfigured())
|
|
27042
27507
|
return true;
|
|
@@ -27047,20 +27512,36 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
27047
27512
|
});
|
|
27048
27513
|
}
|
|
27049
27514
|
return false;
|
|
27050
|
-
} catch {
|
|
27515
|
+
} catch (err) {
|
|
27516
|
+
if (explicitRequest) {
|
|
27517
|
+
errors4.push({
|
|
27518
|
+
provider: name,
|
|
27519
|
+
error: err instanceof Error ? err.message : "unknown provider"
|
|
27520
|
+
});
|
|
27521
|
+
}
|
|
27051
27522
|
return false;
|
|
27052
27523
|
}
|
|
27053
27524
|
});
|
|
27525
|
+
const routingRequested = opts.smart === true || smartProfile || !explicitRequest && config2.router.enabled;
|
|
27526
|
+
let routing;
|
|
27527
|
+
if (routingRequested && activeProviders.length > 0) {
|
|
27528
|
+
routing = await routeSearchProviders(query, activeProviders, {
|
|
27529
|
+
maxProviders: config2.router.maxProviders,
|
|
27530
|
+
timeoutMs: config2.router.timeoutMs,
|
|
27531
|
+
model: config2.router.model
|
|
27532
|
+
});
|
|
27533
|
+
activeProviders = routing.selectedProviders;
|
|
27534
|
+
}
|
|
27054
27535
|
const searchOptions = {
|
|
27055
27536
|
limit: config2.defaultLimit,
|
|
27056
27537
|
...opts.options
|
|
27057
27538
|
};
|
|
27058
|
-
const results = await
|
|
27539
|
+
const results = await allSettledLimited(activeProviders, config2.maxConcurrent, async (name) => {
|
|
27059
27540
|
const provider = getProvider(name);
|
|
27060
|
-
const rawResults = await provider.search(query, searchOptions);
|
|
27541
|
+
const rawResults = await withTimeout(provider.search(query, searchOptions), config2.providerTimeoutMs, provider.displayName);
|
|
27061
27542
|
updateProviderLastUsed(name, db);
|
|
27062
27543
|
return { name, results: rawResults };
|
|
27063
|
-
})
|
|
27544
|
+
});
|
|
27064
27545
|
const allResults = [];
|
|
27065
27546
|
const searchId = generateId();
|
|
27066
27547
|
for (const result of results) {
|
|
@@ -27114,11 +27595,13 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
27114
27595
|
createdAt: new Date().toISOString()
|
|
27115
27596
|
},
|
|
27116
27597
|
results: finalResults,
|
|
27117
|
-
errors: errors4
|
|
27598
|
+
errors: errors4,
|
|
27599
|
+
...routing && { routing }
|
|
27118
27600
|
};
|
|
27119
27601
|
}
|
|
27120
27602
|
const persistable = config2.recordLocalResults ? finalResults : finalResults.filter((r) => !LOCAL_PROVIDER_NAMES.has(r.source));
|
|
27121
27603
|
const search = createSearch({
|
|
27604
|
+
id: searchId,
|
|
27122
27605
|
query,
|
|
27123
27606
|
providers: activeProviders,
|
|
27124
27607
|
resultCount: persistable.length,
|
|
@@ -27127,6 +27610,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
27127
27610
|
if (persistable.length > 0) {
|
|
27128
27611
|
createResults(persistable.map((r) => ({
|
|
27129
27612
|
searchId: search.id,
|
|
27613
|
+
id: r.id,
|
|
27130
27614
|
title: r.title,
|
|
27131
27615
|
url: r.url,
|
|
27132
27616
|
snippet: r.snippet,
|
|
@@ -27143,7 +27627,8 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
27143
27627
|
return {
|
|
27144
27628
|
search: { ...search, resultCount: finalResults.length, duration: duration3 },
|
|
27145
27629
|
results: finalResults,
|
|
27146
|
-
errors: errors4
|
|
27630
|
+
errors: errors4,
|
|
27631
|
+
...routing && { routing }
|
|
27147
27632
|
};
|
|
27148
27633
|
}
|
|
27149
27634
|
async function searchSingleProvider(provider, query, options, db) {
|
|
@@ -27377,7 +27862,7 @@ function findLocal(query, opts = {}, db) {
|
|
|
27377
27862
|
return { query, kind, indexed: false, roots: roots.length, total: 0, results: [] };
|
|
27378
27863
|
}
|
|
27379
27864
|
if (opts.refresh !== false)
|
|
27380
|
-
|
|
27865
|
+
scheduleAutoRefreshStaleRoots(db);
|
|
27381
27866
|
const queryOpts = {
|
|
27382
27867
|
root: opts.root,
|
|
27383
27868
|
ext: opts.ext,
|
|
@@ -27988,13 +28473,15 @@ function buildServer() {
|
|
|
27988
28473
|
providers: exports_external.array(SearchProviderNameSchema).optional().describe("Providers to search (default: all enabled)"),
|
|
27989
28474
|
profile: exports_external.string().optional().describe("Search profile name (e.g. research, social, code)"),
|
|
27990
28475
|
limit: exports_external.number().int().min(1).max(100).optional().describe("Max results per provider"),
|
|
27991
|
-
dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)")
|
|
27992
|
-
|
|
28476
|
+
dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)"),
|
|
28477
|
+
smart: exports_external.boolean().optional().describe("Route to the best configured providers before searching")
|
|
28478
|
+
}, async ({ query, providers, profile, limit, dedup, smart }) => {
|
|
27993
28479
|
const response = await unifiedSearch(query, {
|
|
27994
28480
|
providers,
|
|
27995
28481
|
profile,
|
|
27996
28482
|
options: limit ? { limit } : undefined,
|
|
27997
|
-
dedup
|
|
28483
|
+
dedup,
|
|
28484
|
+
smart
|
|
27998
28485
|
});
|
|
27999
28486
|
return {
|
|
28000
28487
|
content: [
|
|
@@ -28013,7 +28500,8 @@ function buildServer() {
|
|
|
28013
28500
|
source: r.source,
|
|
28014
28501
|
score: r.score
|
|
28015
28502
|
})),
|
|
28016
|
-
errors: response.errors
|
|
28503
|
+
errors: response.errors,
|
|
28504
|
+
routing: response.routing
|
|
28017
28505
|
}, null, 2)
|
|
28018
28506
|
}
|
|
28019
28507
|
]
|
|
@@ -28268,12 +28756,14 @@ function buildServer() {
|
|
|
28268
28756
|
default_limit: exports_external.number().int().optional(),
|
|
28269
28757
|
dedup: exports_external.boolean().optional(),
|
|
28270
28758
|
max_concurrent: exports_external.number().int().optional(),
|
|
28759
|
+
provider_timeout_ms: exports_external.number().int().optional(),
|
|
28271
28760
|
default_profile: exports_external.string().nullable().optional()
|
|
28272
28761
|
}, async (updates) => {
|
|
28273
28762
|
const config2 = setConfig({
|
|
28274
28763
|
...updates.default_limit !== undefined && { defaultLimit: updates.default_limit },
|
|
28275
28764
|
...updates.dedup !== undefined && { dedup: updates.dedup },
|
|
28276
28765
|
...updates.max_concurrent !== undefined && { maxConcurrent: updates.max_concurrent },
|
|
28766
|
+
...updates.provider_timeout_ms !== undefined && { providerTimeoutMs: updates.provider_timeout_ms },
|
|
28277
28767
|
...updates.default_profile !== undefined && { defaultProfile: updates.default_profile }
|
|
28278
28768
|
});
|
|
28279
28769
|
return {
|
|
@@ -29537,6 +30027,7 @@ if (handleCliFlags(argv)) {
|
|
|
29537
30027
|
process.exit(0);
|
|
29538
30028
|
}
|
|
29539
30029
|
async function main() {
|
|
30030
|
+
startBackgroundRefresh();
|
|
29540
30031
|
if (isHttpMode(argv)) {
|
|
29541
30032
|
startMcpHttpServer({ port: resolveMcpHttpPort(argv) });
|
|
29542
30033
|
return;
|