@hasna/search 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +602 -128
- package/dist/db/index-migrations.d.ts.map +1 -1
- package/dist/db/results.d.ts +2 -0
- package/dist/db/results.d.ts.map +1 -1
- package/dist/db/searches.d.ts +1 -0
- package/dist/db/searches.d.ts.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +602 -126
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/local/find.d.ts +1 -1
- package/dist/lib/local/find.d.ts.map +1 -1
- package/dist/lib/local/indexer.d.ts +11 -0
- package/dist/lib/local/indexer.d.ts.map +1 -1
- package/dist/lib/local/query.d.ts.map +1 -1
- package/dist/lib/router.d.ts +10 -0
- package/dist/lib/router.d.ts.map +1 -0
- package/dist/lib/search.d.ts +1 -0
- package/dist/lib/search.d.ts.map +1 -1
- package/dist/mcp/index.js +621 -130
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/server/index.js +624 -139
- package/dist/server/serve.d.ts.map +1 -1
- package/dist/types/index.d.ts +22 -0
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -8927,12 +8927,19 @@ var DEFAULT_CONFIG = {
|
|
|
8927
8927
|
defaultLimit: 10,
|
|
8928
8928
|
defaultProviders: [],
|
|
8929
8929
|
defaultProfile: null,
|
|
8930
|
+
router: {
|
|
8931
|
+
enabled: false,
|
|
8932
|
+
model: "gpt-oss-120b",
|
|
8933
|
+
maxProviders: 3,
|
|
8934
|
+
timeoutMs: 1200
|
|
8935
|
+
},
|
|
8930
8936
|
transcriber: {
|
|
8931
8937
|
baseUrl: "http://localhost:19600",
|
|
8932
8938
|
fallbackCli: "microservice-transcriber"
|
|
8933
8939
|
},
|
|
8934
8940
|
dedup: true,
|
|
8935
8941
|
maxConcurrent: 5,
|
|
8942
|
+
providerTimeoutMs: 15000,
|
|
8936
8943
|
indexStaleMinutes: 5,
|
|
8937
8944
|
indexAutoRefresh: true,
|
|
8938
8945
|
recordLocalResults: false
|
|
@@ -9673,7 +9680,7 @@ function rowToSearch(row) {
|
|
|
9673
9680
|
}
|
|
9674
9681
|
function createSearch(data, db) {
|
|
9675
9682
|
const d = db ?? getDb();
|
|
9676
|
-
const id = generateId();
|
|
9683
|
+
const id = data.id ?? generateId();
|
|
9677
9684
|
const now = new Date().toISOString();
|
|
9678
9685
|
d.prepare(`INSERT INTO searches (id, query, providers, profile_id, result_count, duration, created_at)
|
|
9679
9686
|
VALUES (?, ?, ?, ?, ?, ?, ?)`).run(id, data.query, JSON.stringify(data.providers), data.profileId ?? null, data.resultCount ?? 0, data.duration ?? 0, now);
|
|
@@ -9746,7 +9753,7 @@ function rowToResult(row) {
|
|
|
9746
9753
|
}
|
|
9747
9754
|
function createResult(data, db) {
|
|
9748
9755
|
const d = db ?? getDb();
|
|
9749
|
-
const id = generateId();
|
|
9756
|
+
const id = data.id ?? generateId();
|
|
9750
9757
|
const now = new Date().toISOString();
|
|
9751
9758
|
d.prepare(`INSERT INTO search_results (id, search_id, title, url, snippet, source, provider, rank, score, published_at, thumbnail, metadata, created_at)
|
|
9752
9759
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(id, data.searchId, data.title, data.url, data.snippet, data.source, data.provider, data.rank, data.score ?? null, data.publishedAt ?? null, data.thumbnail ?? null, JSON.stringify(data.metadata ?? {}), now);
|
|
@@ -9775,7 +9782,7 @@ function createResults(results, db) {
|
|
|
9775
9782
|
d.exec("BEGIN");
|
|
9776
9783
|
try {
|
|
9777
9784
|
for (const data of results) {
|
|
9778
|
-
const id = generateId();
|
|
9785
|
+
const id = data.id ?? generateId();
|
|
9779
9786
|
stmt.run(id, data.searchId, data.title, data.url, data.snippet, data.source, data.provider, data.rank, data.score ?? null, data.publishedAt ?? null, data.thumbnail ?? null, JSON.stringify(data.metadata ?? {}), now);
|
|
9780
9787
|
created.push({
|
|
9781
9788
|
id,
|
|
@@ -9956,6 +9963,31 @@ var migrations2 = [
|
|
|
9956
9963
|
);
|
|
9957
9964
|
`);
|
|
9958
9965
|
}
|
|
9966
|
+
},
|
|
9967
|
+
{
|
|
9968
|
+
version: 2,
|
|
9969
|
+
description: "Local file index filter indexes",
|
|
9970
|
+
up: (db) => {
|
|
9971
|
+
db.exec(`
|
|
9972
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_ext ON files(root_id, ext);
|
|
9973
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_dir ON files(root_id, dir);
|
|
9974
|
+
`);
|
|
9975
|
+
}
|
|
9976
|
+
},
|
|
9977
|
+
{
|
|
9978
|
+
version: 3,
|
|
9979
|
+
description: "Local content short-token filter grams",
|
|
9980
|
+
up: (db) => {
|
|
9981
|
+
db.exec(`
|
|
9982
|
+
CREATE TABLE IF NOT EXISTS file_content_grams (
|
|
9983
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
9984
|
+
gram TEXT NOT NULL,
|
|
9985
|
+
PRIMARY KEY (file_id, gram)
|
|
9986
|
+
);
|
|
9987
|
+
CREATE INDEX IF NOT EXISTS idx_file_content_grams_gram_file
|
|
9988
|
+
ON file_content_grams(gram, file_id);
|
|
9989
|
+
`);
|
|
9990
|
+
}
|
|
9959
9991
|
}
|
|
9960
9992
|
];
|
|
9961
9993
|
function runIndexMigrations(db) {
|
|
@@ -10036,7 +10068,18 @@ function getConfig() {
|
|
|
10036
10068
|
try {
|
|
10037
10069
|
const raw = readFileSync2(path, "utf-8");
|
|
10038
10070
|
const parsed = JSON.parse(raw);
|
|
10039
|
-
return {
|
|
10071
|
+
return {
|
|
10072
|
+
...DEFAULT_CONFIG,
|
|
10073
|
+
...parsed,
|
|
10074
|
+
router: {
|
|
10075
|
+
...DEFAULT_CONFIG.router,
|
|
10076
|
+
...parsed.router ?? {}
|
|
10077
|
+
},
|
|
10078
|
+
transcriber: {
|
|
10079
|
+
...DEFAULT_CONFIG.transcriber,
|
|
10080
|
+
...parsed.transcriber ?? {}
|
|
10081
|
+
}
|
|
10082
|
+
};
|
|
10040
10083
|
} catch {
|
|
10041
10084
|
return { ...DEFAULT_CONFIG };
|
|
10042
10085
|
}
|
|
@@ -10468,6 +10511,7 @@ function removeRoot(idOrPath, db) {
|
|
|
10468
10511
|
d.exec("BEGIN");
|
|
10469
10512
|
try {
|
|
10470
10513
|
d.prepare("DELETE FROM file_content_fts WHERE rowid IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
10514
|
+
d.prepare("DELETE FROM file_content_grams WHERE file_id IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
10471
10515
|
d.prepare("DELETE FROM index_roots WHERE id = ?").run(root.id);
|
|
10472
10516
|
d.exec("COMMIT");
|
|
10473
10517
|
} catch (err) {
|
|
@@ -10479,6 +10523,21 @@ function removeRoot(idOrPath, db) {
|
|
|
10479
10523
|
function shouldIndexContent(root, file) {
|
|
10480
10524
|
return root.contentIndexing && file.size > 0 && file.size <= root.maxFileSize && !hasBinaryExtension(file.ext) && !isContentExcluded(file.name);
|
|
10481
10525
|
}
|
|
10526
|
+
function contentShortGrams(body) {
|
|
10527
|
+
const grams = new Set;
|
|
10528
|
+
const words = body.toLowerCase().matchAll(/[a-z0-9_$]+/g);
|
|
10529
|
+
for (const match of words) {
|
|
10530
|
+
const word = match[0];
|
|
10531
|
+
for (let i = 0;i < word.length; i++) {
|
|
10532
|
+
grams.add(word[i]);
|
|
10533
|
+
if (i + 1 < word.length)
|
|
10534
|
+
grams.add(word.slice(i, i + 2));
|
|
10535
|
+
}
|
|
10536
|
+
if (grams.size >= 2048)
|
|
10537
|
+
break;
|
|
10538
|
+
}
|
|
10539
|
+
return [...grams];
|
|
10540
|
+
}
|
|
10482
10541
|
function indexRoot(idOrPath, opts = {}, db) {
|
|
10483
10542
|
const d = db ?? getIndexDb();
|
|
10484
10543
|
const root = getRoot(idOrPath, d);
|
|
@@ -10497,6 +10556,8 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
10497
10556
|
const deleteFile = d.prepare("DELETE FROM files WHERE id = ?");
|
|
10498
10557
|
const insertContent = d.prepare("INSERT INTO file_content_fts (rowid, body) VALUES (?, ?)");
|
|
10499
10558
|
const deleteContent = d.prepare("DELETE FROM file_content_fts WHERE rowid = ?");
|
|
10559
|
+
const insertContentGram = d.prepare("INSERT OR IGNORE INTO file_content_grams (file_id, gram) VALUES (?, ?)");
|
|
10560
|
+
const deleteContentGrams = d.prepare("DELETE FROM file_content_grams WHERE file_id = ?");
|
|
10500
10561
|
const stats = {
|
|
10501
10562
|
rootId: root.id,
|
|
10502
10563
|
added: 0,
|
|
@@ -10507,38 +10568,57 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
10507
10568
|
skippedDirs: skippedDirs.length,
|
|
10508
10569
|
durationMs: 0
|
|
10509
10570
|
};
|
|
10571
|
+
const seen = new Set;
|
|
10572
|
+
const changes = [];
|
|
10573
|
+
for (const file of scanned) {
|
|
10574
|
+
seen.add(file.relPath);
|
|
10575
|
+
const prev = existing.get(file.relPath);
|
|
10576
|
+
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
10577
|
+
if (prev && !changed && !opts.force)
|
|
10578
|
+
continue;
|
|
10579
|
+
const wantContent = shouldIndexContent(root, file);
|
|
10580
|
+
const absPath = `${root.path}/${file.relPath}`;
|
|
10581
|
+
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
10582
|
+
let body = null;
|
|
10583
|
+
if (wantContent && !isBinary) {
|
|
10584
|
+
try {
|
|
10585
|
+
body = readFileSync4(absPath, "utf-8");
|
|
10586
|
+
} catch {
|
|
10587
|
+
isBinary = true;
|
|
10588
|
+
}
|
|
10589
|
+
}
|
|
10590
|
+
changes.push({
|
|
10591
|
+
file,
|
|
10592
|
+
prev,
|
|
10593
|
+
isBinary,
|
|
10594
|
+
body,
|
|
10595
|
+
grams: body !== null ? contentShortGrams(body) : [],
|
|
10596
|
+
contentIndexed: body !== null ? 1 : 0
|
|
10597
|
+
});
|
|
10598
|
+
}
|
|
10510
10599
|
d.exec("BEGIN");
|
|
10511
10600
|
try {
|
|
10512
|
-
const
|
|
10513
|
-
for (const file of scanned) {
|
|
10514
|
-
seen.add(file.relPath);
|
|
10515
|
-
const prev = existing.get(file.relPath);
|
|
10516
|
-
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
10517
|
-
if (prev && !changed && !opts.force)
|
|
10518
|
-
continue;
|
|
10519
|
-
const wantContent = shouldIndexContent(root, file);
|
|
10520
|
-
const absPath = `${root.path}/${file.relPath}`;
|
|
10521
|
-
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
10522
|
-
let body = null;
|
|
10523
|
-
if (wantContent && !isBinary) {
|
|
10524
|
-
try {
|
|
10525
|
-
body = readFileSync4(absPath, "utf-8");
|
|
10526
|
-
} catch {
|
|
10527
|
-
isBinary = true;
|
|
10528
|
-
}
|
|
10529
|
-
}
|
|
10530
|
-
const contentIndexed = body !== null ? 1 : 0;
|
|
10601
|
+
for (const { file, prev, isBinary, body, grams, contentIndexed } of changes) {
|
|
10531
10602
|
if (prev) {
|
|
10532
|
-
if (prev.content_indexed)
|
|
10603
|
+
if (prev.content_indexed) {
|
|
10533
10604
|
deleteContent.run(prev.id);
|
|
10605
|
+
deleteContentGrams.run(prev.id);
|
|
10606
|
+
}
|
|
10534
10607
|
updateFile.run(file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now, prev.id);
|
|
10535
|
-
if (body !== null)
|
|
10608
|
+
if (body !== null) {
|
|
10536
10609
|
insertContent.run(prev.id, body);
|
|
10610
|
+
for (const gram of grams)
|
|
10611
|
+
insertContentGram.run(prev.id, gram);
|
|
10612
|
+
}
|
|
10537
10613
|
stats.updated++;
|
|
10538
10614
|
} else {
|
|
10539
10615
|
const inserted = insertFile.run(root.id, file.relPath, file.name, file.ext, file.dir, file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now);
|
|
10540
|
-
if (body !== null)
|
|
10541
|
-
|
|
10616
|
+
if (body !== null) {
|
|
10617
|
+
const fileId = Number(inserted.lastInsertRowid);
|
|
10618
|
+
insertContent.run(fileId, body);
|
|
10619
|
+
for (const gram of grams)
|
|
10620
|
+
insertContentGram.run(fileId, gram);
|
|
10621
|
+
}
|
|
10542
10622
|
stats.added++;
|
|
10543
10623
|
}
|
|
10544
10624
|
if (contentIndexed)
|
|
@@ -10547,8 +10627,10 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
10547
10627
|
for (const [relPath, row] of existing) {
|
|
10548
10628
|
if (seen.has(relPath))
|
|
10549
10629
|
continue;
|
|
10550
|
-
if (row.content_indexed)
|
|
10630
|
+
if (row.content_indexed) {
|
|
10551
10631
|
deleteContent.run(row.id);
|
|
10632
|
+
deleteContentGrams.run(row.id);
|
|
10633
|
+
}
|
|
10552
10634
|
deleteFile.run(row.id);
|
|
10553
10635
|
stats.deleted++;
|
|
10554
10636
|
}
|
|
@@ -10570,6 +10652,9 @@ function indexAllRoots(opts = {}, db) {
|
|
|
10570
10652
|
return listRoots(db).map((root) => indexRoot(root.id, opts, db));
|
|
10571
10653
|
}
|
|
10572
10654
|
var refreshing = new Set;
|
|
10655
|
+
var lastDefaultAutoRefreshCheckAt = 0;
|
|
10656
|
+
var AUTO_REFRESH_CHECK_THROTTLE_MS = 1000;
|
|
10657
|
+
var defaultRefreshScheduled = false;
|
|
10573
10658
|
function refreshStaleRoots(staleMinutes, db) {
|
|
10574
10659
|
const cutoff = Date.now() - staleMinutes * 60000;
|
|
10575
10660
|
const stats = [];
|
|
@@ -10593,8 +10678,31 @@ function autoRefreshStaleRoots(db) {
|
|
|
10593
10678
|
const config = getConfig();
|
|
10594
10679
|
if (!config.indexAutoRefresh)
|
|
10595
10680
|
return [];
|
|
10681
|
+
if (!db) {
|
|
10682
|
+
const now = Date.now();
|
|
10683
|
+
if (now - lastDefaultAutoRefreshCheckAt < AUTO_REFRESH_CHECK_THROTTLE_MS)
|
|
10684
|
+
return [];
|
|
10685
|
+
lastDefaultAutoRefreshCheckAt = now;
|
|
10686
|
+
}
|
|
10596
10687
|
return refreshStaleRoots(config.indexStaleMinutes, db);
|
|
10597
10688
|
}
|
|
10689
|
+
function scheduleAutoRefreshStaleRoots(db) {
|
|
10690
|
+
if (db)
|
|
10691
|
+
return autoRefreshStaleRoots(db);
|
|
10692
|
+
const config = getConfig();
|
|
10693
|
+
if (!config.indexAutoRefresh || defaultRefreshScheduled)
|
|
10694
|
+
return [];
|
|
10695
|
+
defaultRefreshScheduled = true;
|
|
10696
|
+
const timer = setTimeout(() => {
|
|
10697
|
+
try {
|
|
10698
|
+
autoRefreshStaleRoots();
|
|
10699
|
+
} catch {} finally {
|
|
10700
|
+
defaultRefreshScheduled = false;
|
|
10701
|
+
}
|
|
10702
|
+
}, 0);
|
|
10703
|
+
timer.unref?.();
|
|
10704
|
+
return [];
|
|
10705
|
+
}
|
|
10598
10706
|
|
|
10599
10707
|
// src/db/providers.ts
|
|
10600
10708
|
function rowToProvider(row) {
|
|
@@ -11574,6 +11682,9 @@ function compileSearchRegex(pattern, caseSensitive = false) {
|
|
|
11574
11682
|
// src/lib/local/query.ts
|
|
11575
11683
|
var MAX_LINE_LENGTH = 200;
|
|
11576
11684
|
var MAX_MATCHES_PER_FILE = 5;
|
|
11685
|
+
var MAX_PATH_CANDIDATES = 20000;
|
|
11686
|
+
var MAX_CONTENT_CANDIDATES = 50000;
|
|
11687
|
+
var MAX_REGEX_CANDIDATES = 50000;
|
|
11577
11688
|
function tokenize(query) {
|
|
11578
11689
|
return query.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "").split(/\s+/).filter(Boolean);
|
|
11579
11690
|
}
|
|
@@ -11604,11 +11715,40 @@ function filterClauses(opts, db) {
|
|
|
11604
11715
|
}
|
|
11605
11716
|
if (opts.dir) {
|
|
11606
11717
|
clauses.push("f.dir LIKE ? ESCAPE '\\'");
|
|
11607
|
-
const dir = opts.dir.replace(/^\/|\/$/g, "")
|
|
11718
|
+
const dir = escapeLike(opts.dir.replace(/^\/|\/$/g, ""));
|
|
11608
11719
|
params.push(`%${dir}%`);
|
|
11609
11720
|
}
|
|
11610
11721
|
return { sql: clauses.length > 0 ? ` AND ${clauses.join(" AND ")}` : "", params };
|
|
11611
11722
|
}
|
|
11723
|
+
function escapeLike(value) {
|
|
11724
|
+
return value.replace(/[\\%_]/g, "\\$&");
|
|
11725
|
+
}
|
|
11726
|
+
function shortTokenClauses(tokens) {
|
|
11727
|
+
if (tokens.length === 0)
|
|
11728
|
+
return { sql: "", params: [] };
|
|
11729
|
+
return {
|
|
11730
|
+
sql: ` AND ${tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ")}`,
|
|
11731
|
+
params: tokens.map((token) => `%${escapeLike(token)}%`)
|
|
11732
|
+
};
|
|
11733
|
+
}
|
|
11734
|
+
function contentGramClauses(tokens) {
|
|
11735
|
+
const gramTokens = tokens.filter((token) => /^[a-z0-9_$]{1,2}$/.test(token));
|
|
11736
|
+
if (gramTokens.length === 0)
|
|
11737
|
+
return { sql: "", params: [] };
|
|
11738
|
+
return {
|
|
11739
|
+
sql: gramTokens.map((_token, index) => ` AND (
|
|
11740
|
+
NOT EXISTS (
|
|
11741
|
+
SELECT 1 FROM file_content_grams cg_any_${index}
|
|
11742
|
+
WHERE cg_any_${index}.file_id = f.id
|
|
11743
|
+
)
|
|
11744
|
+
OR EXISTS (
|
|
11745
|
+
SELECT 1 FROM file_content_grams cg_${index}
|
|
11746
|
+
WHERE cg_${index}.file_id = f.id AND cg_${index}.gram = ?
|
|
11747
|
+
)
|
|
11748
|
+
)`).join(""),
|
|
11749
|
+
params: gramTokens
|
|
11750
|
+
};
|
|
11751
|
+
}
|
|
11612
11752
|
function rowToHit(row, score) {
|
|
11613
11753
|
return {
|
|
11614
11754
|
rootId: row.root_id,
|
|
@@ -11678,6 +11818,8 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
11678
11818
|
return [];
|
|
11679
11819
|
const ftsQuery = buildFtsQuery(query);
|
|
11680
11820
|
const filters = filterClauses(opts, d);
|
|
11821
|
+
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
11822
|
+
const shortFilters = shortTokenClauses(shortTokens);
|
|
11681
11823
|
const candidateLimit = Math.max(200, limit * 10);
|
|
11682
11824
|
let rows;
|
|
11683
11825
|
if (ftsQuery) {
|
|
@@ -11685,16 +11827,16 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
11685
11827
|
FROM files_fts fts
|
|
11686
11828
|
JOIN files f ON f.id = fts.rowid
|
|
11687
11829
|
JOIN index_roots r ON r.id = f.root_id
|
|
11688
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
11830
|
+
WHERE files_fts MATCH ?${filters.sql}${shortFilters.sql}
|
|
11689
11831
|
ORDER BY bm25(files_fts, 10.0, 1.0)
|
|
11690
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, candidateLimit);
|
|
11691
|
-
const namePattern = `${query.trim()
|
|
11832
|
+
LIMIT ?`).all(ftsQuery, ...filters.params, ...shortFilters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
11833
|
+
const namePattern = `${escapeLike(query.trim())}%`;
|
|
11692
11834
|
const nameRows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11693
11835
|
FROM files f
|
|
11694
11836
|
JOIN index_roots r ON r.id = f.root_id
|
|
11695
|
-
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}
|
|
11837
|
+
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}${shortFilters.sql}
|
|
11696
11838
|
ORDER BY length(f.name)
|
|
11697
|
-
LIMIT 100`).all(namePattern, ...filters.params);
|
|
11839
|
+
LIMIT 100`).all(namePattern, ...filters.params, ...shortFilters.params);
|
|
11698
11840
|
const seen = new Set(rows.map((row) => row.id));
|
|
11699
11841
|
for (const row of nameRows) {
|
|
11700
11842
|
if (!seen.has(row.id))
|
|
@@ -11702,14 +11844,14 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
11702
11844
|
}
|
|
11703
11845
|
} else {
|
|
11704
11846
|
const likeClauses = tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ");
|
|
11705
|
-
const likeParams = tokens.map((t) => `%${t
|
|
11847
|
+
const likeParams = tokens.map((t) => `%${escapeLike(t)}%`);
|
|
11706
11848
|
rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11707
11849
|
FROM files f
|
|
11708
11850
|
JOIN index_roots r ON r.id = f.root_id
|
|
11709
11851
|
WHERE ${likeClauses}${filters.sql}
|
|
11710
|
-
|
|
11852
|
+
ORDER BY length(f.name), length(f.rel_path), f.rel_path
|
|
11853
|
+
LIMIT ?`).all(...likeParams, ...filters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
11711
11854
|
}
|
|
11712
|
-
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
11713
11855
|
const filtered = shortTokens.length > 0 ? rows.filter((row) => shortTokens.every((t) => row.rel_path.toLowerCase().includes(t))) : rows;
|
|
11714
11856
|
return filtered.map((row) => rowToHit(row, scoreFileName(query, tokens, row))).sort((a, b) => b.score - a.score).filter((hit) => existsSync3(hit.absPath)).slice(0, limit);
|
|
11715
11857
|
}
|
|
@@ -11747,24 +11889,31 @@ function searchFilePathsRegex(pattern, opts = {}, db) {
|
|
|
11747
11889
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'handle.*Click', not '\\w+').");
|
|
11748
11890
|
}
|
|
11749
11891
|
const filters = filterClauses(opts, d);
|
|
11750
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11751
|
-
FROM files_fts fts
|
|
11752
|
-
JOIN files f ON f.id = fts.rowid
|
|
11753
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
11754
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
11755
|
-
ORDER BY fts.rank
|
|
11756
|
-
LIMIT 5000`).all(ftsQuery, ...filters.params);
|
|
11757
11892
|
const hits = [];
|
|
11758
|
-
|
|
11759
|
-
|
|
11760
|
-
|
|
11761
|
-
|
|
11762
|
-
|
|
11763
|
-
|
|
11764
|
-
|
|
11765
|
-
|
|
11766
|
-
|
|
11767
|
-
if (
|
|
11893
|
+
const pageSize = Math.max(500, limit * 20);
|
|
11894
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
11895
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11896
|
+
FROM files_fts fts
|
|
11897
|
+
JOIN files f ON f.id = fts.rowid
|
|
11898
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
11899
|
+
WHERE files_fts MATCH ?${filters.sql}
|
|
11900
|
+
ORDER BY fts.rank
|
|
11901
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
11902
|
+
if (rows.length === 0)
|
|
11903
|
+
break;
|
|
11904
|
+
for (const row of rows) {
|
|
11905
|
+
if (!regex.test(row.rel_path) && !regex.test(row.name))
|
|
11906
|
+
continue;
|
|
11907
|
+
const depth = row.rel_path.split("/").length - 1;
|
|
11908
|
+
const score = Math.max(0.05, 0.6 - depth * 0.02);
|
|
11909
|
+
const hit = rowToHit(row, score);
|
|
11910
|
+
if (!existsSync3(hit.absPath))
|
|
11911
|
+
continue;
|
|
11912
|
+
hits.push(hit);
|
|
11913
|
+
if (hits.length >= limit)
|
|
11914
|
+
break;
|
|
11915
|
+
}
|
|
11916
|
+
if (rows.length < pageSize)
|
|
11768
11917
|
break;
|
|
11769
11918
|
}
|
|
11770
11919
|
return hits;
|
|
@@ -11778,40 +11927,48 @@ function searchFileContentRegex(pattern, opts = {}, db) {
|
|
|
11778
11927
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'export.*function', not '\\d+').");
|
|
11779
11928
|
}
|
|
11780
11929
|
const filters = filterClauses(opts, d);
|
|
11781
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11782
|
-
FROM file_content_fts fts
|
|
11783
|
-
JOIN files f ON f.id = fts.rowid
|
|
11784
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
11785
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
11786
|
-
ORDER BY fts.rank
|
|
11787
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(200, limit * 10));
|
|
11788
11930
|
const hits = [];
|
|
11789
|
-
|
|
11790
|
-
|
|
11791
|
-
const
|
|
11792
|
-
|
|
11793
|
-
|
|
11794
|
-
|
|
11795
|
-
|
|
11796
|
-
|
|
11797
|
-
|
|
11798
|
-
|
|
11931
|
+
const pageSize = Math.max(200, limit * 10);
|
|
11932
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
11933
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11934
|
+
FROM file_content_fts fts
|
|
11935
|
+
JOIN files f ON f.id = fts.rowid
|
|
11936
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
11937
|
+
WHERE file_content_fts MATCH ?${filters.sql}
|
|
11938
|
+
ORDER BY fts.rank
|
|
11939
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
11940
|
+
if (rows.length === 0)
|
|
11941
|
+
break;
|
|
11942
|
+
for (let i = 0;i < rows.length && hits.length < limit; i++) {
|
|
11943
|
+
const row = rows[i];
|
|
11944
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
11945
|
+
let content;
|
|
11946
|
+
try {
|
|
11947
|
+
content = readFileSync5(absPath, "utf-8");
|
|
11948
|
+
} catch {
|
|
11949
|
+
continue;
|
|
11950
|
+
}
|
|
11951
|
+
const lines = content.split(`
|
|
11799
11952
|
`);
|
|
11800
|
-
|
|
11801
|
-
|
|
11802
|
-
|
|
11803
|
-
|
|
11953
|
+
const matches = [];
|
|
11954
|
+
for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
|
|
11955
|
+
if (regex.test(lines[n])) {
|
|
11956
|
+
matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
|
|
11957
|
+
}
|
|
11804
11958
|
}
|
|
11959
|
+
if (matches.length === 0)
|
|
11960
|
+
continue;
|
|
11961
|
+
const rankIndex = offset + i;
|
|
11962
|
+
const score = Math.max(0.25, 0.65 - rankIndex * 0.05);
|
|
11963
|
+
hits.push({
|
|
11964
|
+
...rowToHit(row, score),
|
|
11965
|
+
line: matches[0].line,
|
|
11966
|
+
lineText: matches[0].text,
|
|
11967
|
+
matches
|
|
11968
|
+
});
|
|
11805
11969
|
}
|
|
11806
|
-
if (
|
|
11807
|
-
|
|
11808
|
-
const score = Math.max(0.25, 0.65 - i * 0.05);
|
|
11809
|
-
hits.push({
|
|
11810
|
-
...rowToHit(row, score),
|
|
11811
|
-
line: matches[0].line,
|
|
11812
|
-
lineText: matches[0].text,
|
|
11813
|
-
matches
|
|
11814
|
-
});
|
|
11970
|
+
if (rows.length < pageSize)
|
|
11971
|
+
break;
|
|
11815
11972
|
}
|
|
11816
11973
|
return hits;
|
|
11817
11974
|
}
|
|
@@ -11822,42 +11979,51 @@ function searchFileContent(query, opts = {}, db) {
|
|
|
11822
11979
|
if (!ftsQuery)
|
|
11823
11980
|
return [];
|
|
11824
11981
|
const filters = filterClauses(opts, d);
|
|
11825
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11826
|
-
FROM file_content_fts fts
|
|
11827
|
-
JOIN files f ON f.id = fts.rowid
|
|
11828
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
11829
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
11830
|
-
ORDER BY fts.rank
|
|
11831
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(50, limit * 3));
|
|
11832
11982
|
const tokens = tokenize(query);
|
|
11833
11983
|
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
11984
|
+
const gramFilters = contentGramClauses(shortTokens);
|
|
11834
11985
|
const scored = [];
|
|
11835
|
-
|
|
11836
|
-
|
|
11837
|
-
const
|
|
11838
|
-
|
|
11839
|
-
|
|
11840
|
-
|
|
11841
|
-
|
|
11842
|
-
|
|
11843
|
-
|
|
11844
|
-
if (
|
|
11845
|
-
|
|
11846
|
-
|
|
11986
|
+
const pageSize = Math.max(50, limit * 3);
|
|
11987
|
+
for (let offset = 0;scored.length < limit * 2 && offset < MAX_CONTENT_CANDIDATES; offset += pageSize) {
|
|
11988
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11989
|
+
FROM file_content_fts fts
|
|
11990
|
+
JOIN files f ON f.id = fts.rowid
|
|
11991
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
11992
|
+
WHERE file_content_fts MATCH ?${filters.sql}${gramFilters.sql}
|
|
11993
|
+
ORDER BY fts.rank
|
|
11994
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, ...gramFilters.params, pageSize, offset);
|
|
11995
|
+
if (rows.length === 0)
|
|
11996
|
+
break;
|
|
11997
|
+
for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
|
|
11998
|
+
const row = rows[i];
|
|
11999
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
12000
|
+
let content;
|
|
12001
|
+
try {
|
|
12002
|
+
content = readFileSync5(absPath, "utf-8");
|
|
12003
|
+
} catch {
|
|
12004
|
+
continue;
|
|
12005
|
+
}
|
|
12006
|
+
if (shortTokens.length > 0) {
|
|
12007
|
+
const lower = content.toLowerCase();
|
|
12008
|
+
if (!shortTokens.every((t) => lower.includes(t)))
|
|
12009
|
+
continue;
|
|
12010
|
+
}
|
|
12011
|
+
const { matches, tier } = findLineMatches(content, query, tokens);
|
|
12012
|
+
if (matches.length === 0)
|
|
11847
12013
|
continue;
|
|
12014
|
+
const rankIndex = offset + i;
|
|
12015
|
+
const base = Math.max(0.25, 0.55 - rankIndex * 0.04);
|
|
12016
|
+
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
12017
|
+
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
12018
|
+
scored.push({
|
|
12019
|
+
...rowToHit(row, score),
|
|
12020
|
+
line: matches[0].line,
|
|
12021
|
+
lineText: matches[0].text,
|
|
12022
|
+
matches
|
|
12023
|
+
});
|
|
11848
12024
|
}
|
|
11849
|
-
|
|
11850
|
-
|
|
11851
|
-
continue;
|
|
11852
|
-
const base = Math.max(0.25, 0.55 - i * 0.04);
|
|
11853
|
-
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
11854
|
-
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
11855
|
-
scored.push({
|
|
11856
|
-
...rowToHit(row, score),
|
|
11857
|
-
line: matches[0].line,
|
|
11858
|
-
lineText: matches[0].text,
|
|
11859
|
-
matches
|
|
11860
|
-
});
|
|
12025
|
+
if (rows.length < pageSize)
|
|
12026
|
+
break;
|
|
11861
12027
|
}
|
|
11862
12028
|
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
11863
12029
|
}
|
|
@@ -11870,7 +12036,7 @@ class FilesProvider {
|
|
|
11870
12036
|
return hasReadyRoot();
|
|
11871
12037
|
}
|
|
11872
12038
|
async search(query, options) {
|
|
11873
|
-
|
|
12039
|
+
scheduleAutoRefreshStaleRoots();
|
|
11874
12040
|
const hits = searchFilePaths(query, { limit: options?.limit ?? 10 });
|
|
11875
12041
|
return hits.map((hit) => ({
|
|
11876
12042
|
title: hit.name,
|
|
@@ -11898,7 +12064,7 @@ class ContentProvider {
|
|
|
11898
12064
|
return hasReadyRoot();
|
|
11899
12065
|
}
|
|
11900
12066
|
async search(query, options) {
|
|
11901
|
-
|
|
12067
|
+
scheduleAutoRefreshStaleRoots();
|
|
11902
12068
|
const hits = searchFileContent(query, { limit: options?.limit ?? 10 });
|
|
11903
12069
|
return hits.map((hit) => ({
|
|
11904
12070
|
title: hit.name,
|
|
@@ -12010,13 +12176,300 @@ function deduplicateResults(results) {
|
|
|
12010
12176
|
return deduped;
|
|
12011
12177
|
}
|
|
12012
12178
|
|
|
12179
|
+
// src/lib/router.ts
|
|
12180
|
+
var PROVIDER_DESCRIPTIONS = {
|
|
12181
|
+
files: "Local file names and paths. Best for known filenames, path fragments, extensions, and repo navigation.",
|
|
12182
|
+
content: "Local indexed file contents. Best for code symbols, exact phrases, docs, snippets, and grep-style discovery.",
|
|
12183
|
+
google: "General web search through SerpAPI. Best for broad web coverage and current public pages.",
|
|
12184
|
+
serpapi: "SerpAPI multi-engine web search. Best for general web queries when Google-style results are desired.",
|
|
12185
|
+
exa: "Neural/semantic web search. Best for research, conceptual queries, docs, and high-relevance pages.",
|
|
12186
|
+
perplexity: "Answer-oriented web research with citations. Best for synthesized factual questions and research summaries.",
|
|
12187
|
+
brave: "General independent web search. Best for current web, news-like, product, and navigational queries.",
|
|
12188
|
+
bing: "General web search. Best for current web and Microsoft/Bing-indexed pages.",
|
|
12189
|
+
twitter: "X/Twitter search. Best for tweets, social reactions, breaking discourse, and people posting updates.",
|
|
12190
|
+
reddit: "Reddit search. Best for opinions, product experiences, troubleshooting threads, and community recommendations.",
|
|
12191
|
+
youtube: "YouTube search. Best for videos, tutorials, talks, demos, and channels.",
|
|
12192
|
+
hackernews: "Hacker News search. Best for startup, programming, launch, and technical discussion threads.",
|
|
12193
|
+
github: "GitHub code and repository search. Best for open-source repos, code examples, packages, and implementation details.",
|
|
12194
|
+
arxiv: "arXiv academic search. Best for papers, preprints, ML/AI/math/physics research, and scholarly topics."
|
|
12195
|
+
};
|
|
12196
|
+
function clampMaxProviders(value) {
|
|
12197
|
+
if (value === undefined || !Number.isFinite(value))
|
|
12198
|
+
return 3;
|
|
12199
|
+
return Math.max(1, Math.min(5, Math.floor(value)));
|
|
12200
|
+
}
|
|
12201
|
+
function clampConfidence(value) {
|
|
12202
|
+
return typeof value === "number" && Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.5;
|
|
12203
|
+
}
|
|
12204
|
+
function normalizeCandidates(candidates) {
|
|
12205
|
+
const allowed = new Set(PROVIDER_NAMES);
|
|
12206
|
+
const seen = new Set;
|
|
12207
|
+
const normalized = [];
|
|
12208
|
+
for (const candidate of candidates) {
|
|
12209
|
+
if (!allowed.has(candidate) || seen.has(candidate))
|
|
12210
|
+
continue;
|
|
12211
|
+
seen.add(candidate);
|
|
12212
|
+
normalized.push(candidate);
|
|
12213
|
+
}
|
|
12214
|
+
return normalized;
|
|
12215
|
+
}
|
|
12216
|
+
function addScore(scores, candidateSet, provider, amount) {
|
|
12217
|
+
if (!candidateSet.has(provider))
|
|
12218
|
+
return;
|
|
12219
|
+
scores.set(provider, (scores.get(provider) ?? 0) + amount);
|
|
12220
|
+
}
|
|
12221
|
+
function hasAny(query, patterns) {
|
|
12222
|
+
return patterns.some((pattern) => pattern.test(query));
|
|
12223
|
+
}
|
|
12224
|
+
function routeSearchProvidersHeuristic(query, candidates, options = {}) {
|
|
12225
|
+
const normalized = normalizeCandidates(candidates);
|
|
12226
|
+
const maxProviders = clampMaxProviders(options.maxProviders);
|
|
12227
|
+
if (normalized.length === 0) {
|
|
12228
|
+
return {
|
|
12229
|
+
strategy: "heuristic",
|
|
12230
|
+
selectedProviders: [],
|
|
12231
|
+
candidates: [],
|
|
12232
|
+
reason: "No configured providers were available to route.",
|
|
12233
|
+
confidence: 0
|
|
12234
|
+
};
|
|
12235
|
+
}
|
|
12236
|
+
const candidateSet = new Set(normalized);
|
|
12237
|
+
const scores = new Map;
|
|
12238
|
+
const reasons = [];
|
|
12239
|
+
const q = query.trim().toLowerCase();
|
|
12240
|
+
for (const candidate of normalized)
|
|
12241
|
+
scores.set(candidate, 0.05);
|
|
12242
|
+
if (hasAny(q, [
|
|
12243
|
+
/\b(file|filename|path|folder|directory|repo|workspace)\b/,
|
|
12244
|
+
/(^|[/\s])[\w.-]+\.(ts|tsx|js|jsx|py|rs|go|md|json|yaml|yml|css|html)\b/
|
|
12245
|
+
])) {
|
|
12246
|
+
addScore(scores, candidateSet, "files", 5);
|
|
12247
|
+
addScore(scores, candidateSet, "content", 3);
|
|
12248
|
+
reasons.push("query looks local-file oriented");
|
|
12249
|
+
}
|
|
12250
|
+
if (hasAny(q, [
|
|
12251
|
+
/\b(function|class|interface|type|const|import|export|error|stack|symbol|grep|regex)\b/,
|
|
12252
|
+
/[A-Za-z_$][\w$]*\([^)]*\)/,
|
|
12253
|
+
/[A-Za-z_$][\w$]*::[A-Za-z_$]/
|
|
12254
|
+
])) {
|
|
12255
|
+
addScore(scores, candidateSet, "content", 5);
|
|
12256
|
+
addScore(scores, candidateSet, "files", 2);
|
|
12257
|
+
addScore(scores, candidateSet, "github", 1.5);
|
|
12258
|
+
reasons.push("query contains code/content lookup signals");
|
|
12259
|
+
}
|
|
12260
|
+
if (hasAny(q, [/\b(paper|papers|arxiv|preprint|doi|citation|survey|benchmark|research)\b/])) {
|
|
12261
|
+
addScore(scores, candidateSet, "arxiv", 5);
|
|
12262
|
+
addScore(scores, candidateSet, "exa", 3);
|
|
12263
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
12264
|
+
reasons.push("query asks for scholarly or research material");
|
|
12265
|
+
}
|
|
12266
|
+
if (hasAny(q, [/\b(github|repo|repository|source code|open source|package|library|sdk|api example)\b/])) {
|
|
12267
|
+
addScore(scores, candidateSet, "github", 5);
|
|
12268
|
+
addScore(scores, candidateSet, "exa", 2);
|
|
12269
|
+
reasons.push("query asks for code or repository material");
|
|
12270
|
+
}
|
|
12271
|
+
if (hasAny(q, [/\b(video|youtube|tutorial|demo|talk|lecture|channel)\b/])) {
|
|
12272
|
+
addScore(scores, candidateSet, "youtube", 5);
|
|
12273
|
+
reasons.push("query asks for video material");
|
|
12274
|
+
}
|
|
12275
|
+
if (hasAny(q, [/\b(reddit|subreddit|opinion|experience|reviews?|worth it|recommendations?)\b/])) {
|
|
12276
|
+
addScore(scores, candidateSet, "reddit", 5);
|
|
12277
|
+
addScore(scores, candidateSet, "hackernews", 1.5);
|
|
12278
|
+
reasons.push("query asks for community discussion");
|
|
12279
|
+
}
|
|
12280
|
+
if (hasAny(q, [/\b(hacker news|hn|show hn|launch|startup)\b/])) {
|
|
12281
|
+
addScore(scores, candidateSet, "hackernews", 5);
|
|
12282
|
+
reasons.push("query asks for Hacker News style discussion");
|
|
12283
|
+
}
|
|
12284
|
+
if (hasAny(q, [/\b(twitter|tweet|tweets|x\.com|social reaction|trending)\b/])) {
|
|
12285
|
+
addScore(scores, candidateSet, "twitter", 5);
|
|
12286
|
+
reasons.push("query asks for social posts");
|
|
12287
|
+
}
|
|
12288
|
+
if (hasAny(q, [/\b(latest|today|yesterday|news|current|2025|2026|price|release|launched)\b/])) {
|
|
12289
|
+
addScore(scores, candidateSet, "brave", 3);
|
|
12290
|
+
addScore(scores, candidateSet, "bing", 2.5);
|
|
12291
|
+
addScore(scores, candidateSet, "google", 2.5);
|
|
12292
|
+
addScore(scores, candidateSet, "serpapi", 2);
|
|
12293
|
+
reasons.push("query appears time-sensitive");
|
|
12294
|
+
}
|
|
12295
|
+
if (reasons.length === 0) {
|
|
12296
|
+
addScore(scores, candidateSet, "exa", 2.5);
|
|
12297
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
12298
|
+
addScore(scores, candidateSet, "brave", 1.5);
|
|
12299
|
+
addScore(scores, candidateSet, "google", 1.5);
|
|
12300
|
+
addScore(scores, candidateSet, "hackernews", 0.75);
|
|
12301
|
+
reasons.push("general query fallback");
|
|
12302
|
+
}
|
|
12303
|
+
const selectedProviders = [...scores.entries()].sort((a, b) => b[1] - a[1] || normalized.indexOf(a[0]) - normalized.indexOf(b[0])).slice(0, Math.min(maxProviders, normalized.length)).map(([provider]) => provider);
|
|
12304
|
+
const topScore = scores.get(selectedProviders[0]) ?? 0;
|
|
12305
|
+
const confidence = Math.max(0.35, Math.min(0.9, topScore / 6));
|
|
12306
|
+
return {
|
|
12307
|
+
strategy: "heuristic",
|
|
12308
|
+
selectedProviders,
|
|
12309
|
+
candidates: normalized,
|
|
12310
|
+
reason: reasons.join("; "),
|
|
12311
|
+
confidence
|
|
12312
|
+
};
|
|
12313
|
+
}
|
|
12314
|
+
function routerSchema(candidates, maxProviders) {
|
|
12315
|
+
return {
|
|
12316
|
+
type: "object",
|
|
12317
|
+
properties: {
|
|
12318
|
+
selectedProviders: {
|
|
12319
|
+
type: "array",
|
|
12320
|
+
items: { type: "string", enum: candidates },
|
|
12321
|
+
minItems: 1,
|
|
12322
|
+
maxItems: maxProviders
|
|
12323
|
+
},
|
|
12324
|
+
reason: { type: "string" },
|
|
12325
|
+
confidence: { type: "number", minimum: 0, maximum: 1 }
|
|
12326
|
+
},
|
|
12327
|
+
required: ["selectedProviders", "reason", "confidence"],
|
|
12328
|
+
additionalProperties: false
|
|
12329
|
+
};
|
|
12330
|
+
}
|
|
12331
|
+
function parseCerebrasRouting(raw, candidates, maxProviders) {
|
|
12332
|
+
let parsed;
|
|
12333
|
+
try {
|
|
12334
|
+
parsed = JSON.parse(raw);
|
|
12335
|
+
} catch {
|
|
12336
|
+
return null;
|
|
12337
|
+
}
|
|
12338
|
+
if (!Array.isArray(parsed.selectedProviders))
|
|
12339
|
+
return null;
|
|
12340
|
+
const candidateSet = new Set(candidates);
|
|
12341
|
+
const selectedProviders = parsed.selectedProviders.filter((provider) => typeof provider === "string" && candidateSet.has(provider)).slice(0, maxProviders);
|
|
12342
|
+
if (selectedProviders.length === 0)
|
|
12343
|
+
return null;
|
|
12344
|
+
return {
|
|
12345
|
+
selectedProviders,
|
|
12346
|
+
reason: typeof parsed.reason === "string" ? parsed.reason : "Cerebras router selected providers.",
|
|
12347
|
+
confidence: clampConfidence(parsed.confidence)
|
|
12348
|
+
};
|
|
12349
|
+
}
|
|
12350
|
+
async function routeWithCerebras(query, candidates, options) {
|
|
12351
|
+
const apiKey = Bun.env.CEREBRAS_API_KEY;
|
|
12352
|
+
if (!apiKey) {
|
|
12353
|
+
return {
|
|
12354
|
+
...routeSearchProvidersHeuristic(query, candidates, options),
|
|
12355
|
+
error: "CEREBRAS_API_KEY is not configured; used heuristic routing."
|
|
12356
|
+
};
|
|
12357
|
+
}
|
|
12358
|
+
const providerGuide = candidates.map((name) => ({
|
|
12359
|
+
name,
|
|
12360
|
+
description: PROVIDER_DESCRIPTIONS[name]
|
|
12361
|
+
}));
|
|
12362
|
+
const res = await fetch("https://api.cerebras.ai/v1/chat/completions", {
|
|
12363
|
+
method: "POST",
|
|
12364
|
+
signal: AbortSignal.timeout(options.timeoutMs),
|
|
12365
|
+
headers: {
|
|
12366
|
+
"Content-Type": "application/json",
|
|
12367
|
+
Authorization: `Bearer ${apiKey}`
|
|
12368
|
+
},
|
|
12369
|
+
body: JSON.stringify({
|
|
12370
|
+
model: options.model,
|
|
12371
|
+
temperature: 0,
|
|
12372
|
+
messages: [
|
|
12373
|
+
{
|
|
12374
|
+
role: "system",
|
|
12375
|
+
content: "You route a search query to the smallest useful set of available search providers. Select only listed providers. Prefer local providers for local files/code in the indexed workspace. Prefer scholarly, code, video, social, or web providers when the query clearly asks for those domains."
|
|
12376
|
+
},
|
|
12377
|
+
{
|
|
12378
|
+
role: "user",
|
|
12379
|
+
content: JSON.stringify({
|
|
12380
|
+
query,
|
|
12381
|
+
maxProviders: options.maxProviders,
|
|
12382
|
+
providers: providerGuide
|
|
12383
|
+
})
|
|
12384
|
+
}
|
|
12385
|
+
],
|
|
12386
|
+
response_format: {
|
|
12387
|
+
type: "json_schema",
|
|
12388
|
+
json_schema: {
|
|
12389
|
+
name: "search_router",
|
|
12390
|
+
strict: true,
|
|
12391
|
+
schema: routerSchema(candidates, options.maxProviders)
|
|
12392
|
+
}
|
|
12393
|
+
}
|
|
12394
|
+
})
|
|
12395
|
+
});
|
|
12396
|
+
if (!res.ok) {
|
|
12397
|
+
throw new Error(`Cerebras router error: ${res.status} ${res.statusText}`);
|
|
12398
|
+
}
|
|
12399
|
+
const data = await res.json();
|
|
12400
|
+
const content = data.choices?.[0]?.message?.content;
|
|
12401
|
+
if (!content)
|
|
12402
|
+
throw new Error("Cerebras router returned no content");
|
|
12403
|
+
const parsed = parseCerebrasRouting(content, candidates, options.maxProviders);
|
|
12404
|
+
if (!parsed)
|
|
12405
|
+
throw new Error("Cerebras router returned invalid provider selection");
|
|
12406
|
+
return {
|
|
12407
|
+
strategy: "cerebras",
|
|
12408
|
+
candidates,
|
|
12409
|
+
...parsed
|
|
12410
|
+
};
|
|
12411
|
+
}
|
|
12412
|
+
async function routeSearchProviders(query, candidates, options = {}) {
|
|
12413
|
+
const normalized = normalizeCandidates(candidates);
|
|
12414
|
+
const maxProviders = Math.min(clampMaxProviders(options.maxProviders), Math.max(1, normalized.length));
|
|
12415
|
+
const timeoutMs = options.timeoutMs && Number.isFinite(options.timeoutMs) ? Math.max(250, Math.floor(options.timeoutMs)) : 1200;
|
|
12416
|
+
const model = options.model ?? Bun.env.CEREBRAS_MODEL ?? "gpt-oss-120b";
|
|
12417
|
+
if (normalized.length === 0) {
|
|
12418
|
+
return routeSearchProvidersHeuristic(query, normalized, { maxProviders });
|
|
12419
|
+
}
|
|
12420
|
+
try {
|
|
12421
|
+
return await routeWithCerebras(query, normalized, { maxProviders, timeoutMs, model });
|
|
12422
|
+
} catch (err) {
|
|
12423
|
+
return {
|
|
12424
|
+
...routeSearchProvidersHeuristic(query, normalized, { maxProviders }),
|
|
12425
|
+
error: err instanceof Error ? err.message : String(err)
|
|
12426
|
+
};
|
|
12427
|
+
}
|
|
12428
|
+
}
|
|
12429
|
+
|
|
12013
12430
|
// src/lib/search.ts
|
|
12431
|
+
async function withTimeout(promise, timeoutMs, label) {
|
|
12432
|
+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
|
|
12433
|
+
return promise;
|
|
12434
|
+
let timer;
|
|
12435
|
+
try {
|
|
12436
|
+
return await Promise.race([
|
|
12437
|
+
promise,
|
|
12438
|
+
new Promise((_resolve, reject) => {
|
|
12439
|
+
timer = setTimeout(() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
12440
|
+
timer.unref?.();
|
|
12441
|
+
})
|
|
12442
|
+
]);
|
|
12443
|
+
} finally {
|
|
12444
|
+
if (timer)
|
|
12445
|
+
clearTimeout(timer);
|
|
12446
|
+
}
|
|
12447
|
+
}
|
|
12448
|
+
async function allSettledLimited(items, concurrency, task) {
|
|
12449
|
+
const results = new Array(items.length);
|
|
12450
|
+
let next = 0;
|
|
12451
|
+
async function worker() {
|
|
12452
|
+
while (next < items.length) {
|
|
12453
|
+
const index = next++;
|
|
12454
|
+
const item = items[index];
|
|
12455
|
+
try {
|
|
12456
|
+
results[index] = { status: "fulfilled", value: await task(item) };
|
|
12457
|
+
} catch (reason) {
|
|
12458
|
+
results[index] = { status: "rejected", reason };
|
|
12459
|
+
}
|
|
12460
|
+
}
|
|
12461
|
+
}
|
|
12462
|
+
const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length);
|
|
12463
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
12464
|
+
return results;
|
|
12465
|
+
}
|
|
12014
12466
|
async function unifiedSearch(query, opts = {}) {
|
|
12015
12467
|
const config = getConfig();
|
|
12016
12468
|
const startTime = Date.now();
|
|
12017
12469
|
const db = opts.db;
|
|
12018
12470
|
let providerNames = opts.providers ?? [];
|
|
12019
|
-
|
|
12471
|
+
const smartProfile = opts.profile === "smart";
|
|
12472
|
+
if (opts.profile && !smartProfile) {
|
|
12020
12473
|
const profile = getProfileByName(opts.profile, db);
|
|
12021
12474
|
if (profile) {
|
|
12022
12475
|
providerNames = profile.providers;
|
|
@@ -12032,7 +12485,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
12032
12485
|
}
|
|
12033
12486
|
const errors2 = [];
|
|
12034
12487
|
const explicitRequest = (opts.providers?.length ?? 0) > 0 || Boolean(opts.profile);
|
|
12035
|
-
|
|
12488
|
+
let activeProviders = providerNames.filter((name) => {
|
|
12036
12489
|
try {
|
|
12037
12490
|
if (getProvider2(name).isConfigured())
|
|
12038
12491
|
return true;
|
|
@@ -12043,20 +12496,36 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
12043
12496
|
});
|
|
12044
12497
|
}
|
|
12045
12498
|
return false;
|
|
12046
|
-
} catch {
|
|
12499
|
+
} catch (err) {
|
|
12500
|
+
if (explicitRequest) {
|
|
12501
|
+
errors2.push({
|
|
12502
|
+
provider: name,
|
|
12503
|
+
error: err instanceof Error ? err.message : "unknown provider"
|
|
12504
|
+
});
|
|
12505
|
+
}
|
|
12047
12506
|
return false;
|
|
12048
12507
|
}
|
|
12049
12508
|
});
|
|
12509
|
+
const routingRequested = opts.smart === true || smartProfile || !explicitRequest && config.router.enabled;
|
|
12510
|
+
let routing;
|
|
12511
|
+
if (routingRequested && activeProviders.length > 0) {
|
|
12512
|
+
routing = await routeSearchProviders(query, activeProviders, {
|
|
12513
|
+
maxProviders: config.router.maxProviders,
|
|
12514
|
+
timeoutMs: config.router.timeoutMs,
|
|
12515
|
+
model: config.router.model
|
|
12516
|
+
});
|
|
12517
|
+
activeProviders = routing.selectedProviders;
|
|
12518
|
+
}
|
|
12050
12519
|
const searchOptions = {
|
|
12051
12520
|
limit: config.defaultLimit,
|
|
12052
12521
|
...opts.options
|
|
12053
12522
|
};
|
|
12054
|
-
const results = await
|
|
12523
|
+
const results = await allSettledLimited(activeProviders, config.maxConcurrent, async (name) => {
|
|
12055
12524
|
const provider = getProvider2(name);
|
|
12056
|
-
const rawResults = await provider.search(query, searchOptions);
|
|
12525
|
+
const rawResults = await withTimeout(provider.search(query, searchOptions), config.providerTimeoutMs, provider.displayName);
|
|
12057
12526
|
updateProviderLastUsed(name, db);
|
|
12058
12527
|
return { name, results: rawResults };
|
|
12059
|
-
})
|
|
12528
|
+
});
|
|
12060
12529
|
const allResults = [];
|
|
12061
12530
|
const searchId = generateId();
|
|
12062
12531
|
for (const result of results) {
|
|
@@ -12110,11 +12579,13 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
12110
12579
|
createdAt: new Date().toISOString()
|
|
12111
12580
|
},
|
|
12112
12581
|
results: finalResults,
|
|
12113
|
-
errors: errors2
|
|
12582
|
+
errors: errors2,
|
|
12583
|
+
...routing && { routing }
|
|
12114
12584
|
};
|
|
12115
12585
|
}
|
|
12116
12586
|
const persistable = config.recordLocalResults ? finalResults : finalResults.filter((r) => !LOCAL_PROVIDER_NAMES.has(r.source));
|
|
12117
12587
|
const search = createSearch({
|
|
12588
|
+
id: searchId,
|
|
12118
12589
|
query,
|
|
12119
12590
|
providers: activeProviders,
|
|
12120
12591
|
resultCount: persistable.length,
|
|
@@ -12123,6 +12594,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
12123
12594
|
if (persistable.length > 0) {
|
|
12124
12595
|
createResults(persistable.map((r) => ({
|
|
12125
12596
|
searchId: search.id,
|
|
12597
|
+
id: r.id,
|
|
12126
12598
|
title: r.title,
|
|
12127
12599
|
url: r.url,
|
|
12128
12600
|
snippet: r.snippet,
|
|
@@ -12139,7 +12611,8 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
12139
12611
|
return {
|
|
12140
12612
|
search: { ...search, resultCount: finalResults.length, duration },
|
|
12141
12613
|
results: finalResults,
|
|
12142
|
-
errors: errors2
|
|
12614
|
+
errors: errors2,
|
|
12615
|
+
...routing && { routing }
|
|
12143
12616
|
};
|
|
12144
12617
|
}
|
|
12145
12618
|
async function searchSingleProvider(provider, query, options, db) {
|
|
@@ -12233,7 +12706,7 @@ function findLocal(query, opts = {}, db) {
|
|
|
12233
12706
|
return { query, kind, indexed: false, roots: roots.length, total: 0, results: [] };
|
|
12234
12707
|
}
|
|
12235
12708
|
if (opts.refresh !== false)
|
|
12236
|
-
|
|
12709
|
+
scheduleAutoRefreshStaleRoots(db);
|
|
12237
12710
|
const queryOpts = {
|
|
12238
12711
|
root: opts.root,
|
|
12239
12712
|
ext: opts.ext,
|
|
@@ -12299,7 +12772,10 @@ export {
|
|
|
12299
12772
|
searchResultsFts,
|
|
12300
12773
|
searchFilePaths,
|
|
12301
12774
|
searchFileContent,
|
|
12775
|
+
scheduleAutoRefreshStaleRoots,
|
|
12302
12776
|
runStorageMigrations,
|
|
12777
|
+
routeSearchProvidersHeuristic,
|
|
12778
|
+
routeSearchProviders,
|
|
12303
12779
|
resetConfig,
|
|
12304
12780
|
removeRoot,
|
|
12305
12781
|
refreshStaleRoots,
|