@hasna/search 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +602 -128
- package/dist/db/index-migrations.d.ts.map +1 -1
- package/dist/db/results.d.ts +2 -0
- package/dist/db/results.d.ts.map +1 -1
- package/dist/db/searches.d.ts +1 -0
- package/dist/db/searches.d.ts.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +602 -126
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/local/find.d.ts +1 -1
- package/dist/lib/local/find.d.ts.map +1 -1
- package/dist/lib/local/indexer.d.ts +11 -0
- package/dist/lib/local/indexer.d.ts.map +1 -1
- package/dist/lib/local/query.d.ts.map +1 -1
- package/dist/lib/router.d.ts +10 -0
- package/dist/lib/router.d.ts.map +1 -0
- package/dist/lib/search.d.ts +1 -0
- package/dist/lib/search.d.ts.map +1 -1
- package/dist/mcp/index.js +621 -130
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/server/index.js +624 -139
- package/dist/server/serve.d.ts.map +1 -1
- package/dist/types/index.d.ts +22 -0
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/cli/index.js
CHANGED
|
@@ -7411,7 +7411,7 @@ var init_pg_migrate = __esm(() => {
|
|
|
7411
7411
|
var require_package = __commonJS((exports, module) => {
|
|
7412
7412
|
module.exports = {
|
|
7413
7413
|
name: "@hasna/search",
|
|
7414
|
-
version: "0.0.
|
|
7414
|
+
version: "0.0.11",
|
|
7415
7415
|
description: "Unified search \u2014 local file index (find files by name/path/content/regex in ms, trigram FTS) + 12 web providers (Google, SerpAPI, Exa, Perplexity, Twitter, Reddit, YouTube, Brave, Bing, Hacker News, GitHub, arXiv) + YouTube transcription. CLI + MCP + REST API + Dashboard.",
|
|
7416
7416
|
type: "module",
|
|
7417
7417
|
main: "dist/index.js",
|
|
@@ -7980,6 +7980,31 @@ var migrations2 = [
|
|
|
7980
7980
|
);
|
|
7981
7981
|
`);
|
|
7982
7982
|
}
|
|
7983
|
+
},
|
|
7984
|
+
{
|
|
7985
|
+
version: 2,
|
|
7986
|
+
description: "Local file index filter indexes",
|
|
7987
|
+
up: (db) => {
|
|
7988
|
+
db.exec(`
|
|
7989
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_ext ON files(root_id, ext);
|
|
7990
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_dir ON files(root_id, dir);
|
|
7991
|
+
`);
|
|
7992
|
+
}
|
|
7993
|
+
},
|
|
7994
|
+
{
|
|
7995
|
+
version: 3,
|
|
7996
|
+
description: "Local content short-token filter grams",
|
|
7997
|
+
up: (db) => {
|
|
7998
|
+
db.exec(`
|
|
7999
|
+
CREATE TABLE IF NOT EXISTS file_content_grams (
|
|
8000
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
8001
|
+
gram TEXT NOT NULL,
|
|
8002
|
+
PRIMARY KEY (file_id, gram)
|
|
8003
|
+
);
|
|
8004
|
+
CREATE INDEX IF NOT EXISTS idx_file_content_grams_gram_file
|
|
8005
|
+
ON file_content_grams(gram, file_id);
|
|
8006
|
+
`);
|
|
8007
|
+
}
|
|
7983
8008
|
}
|
|
7984
8009
|
];
|
|
7985
8010
|
function runIndexMigrations(db) {
|
|
@@ -12047,12 +12072,19 @@ var DEFAULT_CONFIG = {
|
|
|
12047
12072
|
defaultLimit: 10,
|
|
12048
12073
|
defaultProviders: [],
|
|
12049
12074
|
defaultProfile: null,
|
|
12075
|
+
router: {
|
|
12076
|
+
enabled: false,
|
|
12077
|
+
model: "gpt-oss-120b",
|
|
12078
|
+
maxProviders: 3,
|
|
12079
|
+
timeoutMs: 1200
|
|
12080
|
+
},
|
|
12050
12081
|
transcriber: {
|
|
12051
12082
|
baseUrl: "http://localhost:19600",
|
|
12052
12083
|
fallbackCli: "microservice-transcriber"
|
|
12053
12084
|
},
|
|
12054
12085
|
dedup: true,
|
|
12055
12086
|
maxConcurrent: 5,
|
|
12087
|
+
providerTimeoutMs: 15000,
|
|
12056
12088
|
indexStaleMinutes: 5,
|
|
12057
12089
|
indexAutoRefresh: true,
|
|
12058
12090
|
recordLocalResults: false
|
|
@@ -12083,7 +12115,18 @@ function getConfig() {
|
|
|
12083
12115
|
try {
|
|
12084
12116
|
const raw = readFileSync2(path, "utf-8");
|
|
12085
12117
|
const parsed = JSON.parse(raw);
|
|
12086
|
-
return {
|
|
12118
|
+
return {
|
|
12119
|
+
...DEFAULT_CONFIG,
|
|
12120
|
+
...parsed,
|
|
12121
|
+
router: {
|
|
12122
|
+
...DEFAULT_CONFIG.router,
|
|
12123
|
+
...parsed.router ?? {}
|
|
12124
|
+
},
|
|
12125
|
+
transcriber: {
|
|
12126
|
+
...DEFAULT_CONFIG.transcriber,
|
|
12127
|
+
...parsed.transcriber ?? {}
|
|
12128
|
+
}
|
|
12129
|
+
};
|
|
12087
12130
|
} catch {
|
|
12088
12131
|
return { ...DEFAULT_CONFIG };
|
|
12089
12132
|
}
|
|
@@ -12511,6 +12554,7 @@ function removeRoot(idOrPath, db) {
|
|
|
12511
12554
|
d.exec("BEGIN");
|
|
12512
12555
|
try {
|
|
12513
12556
|
d.prepare("DELETE FROM file_content_fts WHERE rowid IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
12557
|
+
d.prepare("DELETE FROM file_content_grams WHERE file_id IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
12514
12558
|
d.prepare("DELETE FROM index_roots WHERE id = ?").run(root.id);
|
|
12515
12559
|
d.exec("COMMIT");
|
|
12516
12560
|
} catch (err) {
|
|
@@ -12522,6 +12566,21 @@ function removeRoot(idOrPath, db) {
|
|
|
12522
12566
|
function shouldIndexContent(root, file) {
|
|
12523
12567
|
return root.contentIndexing && file.size > 0 && file.size <= root.maxFileSize && !hasBinaryExtension(file.ext) && !isContentExcluded(file.name);
|
|
12524
12568
|
}
|
|
12569
|
+
function contentShortGrams(body) {
|
|
12570
|
+
const grams = new Set;
|
|
12571
|
+
const words = body.toLowerCase().matchAll(/[a-z0-9_$]+/g);
|
|
12572
|
+
for (const match of words) {
|
|
12573
|
+
const word = match[0];
|
|
12574
|
+
for (let i = 0;i < word.length; i++) {
|
|
12575
|
+
grams.add(word[i]);
|
|
12576
|
+
if (i + 1 < word.length)
|
|
12577
|
+
grams.add(word.slice(i, i + 2));
|
|
12578
|
+
}
|
|
12579
|
+
if (grams.size >= 2048)
|
|
12580
|
+
break;
|
|
12581
|
+
}
|
|
12582
|
+
return [...grams];
|
|
12583
|
+
}
|
|
12525
12584
|
function indexRoot(idOrPath, opts = {}, db) {
|
|
12526
12585
|
const d = db ?? getIndexDb();
|
|
12527
12586
|
const root = getRoot(idOrPath, d);
|
|
@@ -12540,6 +12599,8 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
12540
12599
|
const deleteFile = d.prepare("DELETE FROM files WHERE id = ?");
|
|
12541
12600
|
const insertContent = d.prepare("INSERT INTO file_content_fts (rowid, body) VALUES (?, ?)");
|
|
12542
12601
|
const deleteContent = d.prepare("DELETE FROM file_content_fts WHERE rowid = ?");
|
|
12602
|
+
const insertContentGram = d.prepare("INSERT OR IGNORE INTO file_content_grams (file_id, gram) VALUES (?, ?)");
|
|
12603
|
+
const deleteContentGrams = d.prepare("DELETE FROM file_content_grams WHERE file_id = ?");
|
|
12543
12604
|
const stats = {
|
|
12544
12605
|
rootId: root.id,
|
|
12545
12606
|
added: 0,
|
|
@@ -12550,38 +12611,57 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
12550
12611
|
skippedDirs: skippedDirs.length,
|
|
12551
12612
|
durationMs: 0
|
|
12552
12613
|
};
|
|
12614
|
+
const seen = new Set;
|
|
12615
|
+
const changes = [];
|
|
12616
|
+
for (const file of scanned) {
|
|
12617
|
+
seen.add(file.relPath);
|
|
12618
|
+
const prev = existing.get(file.relPath);
|
|
12619
|
+
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
12620
|
+
if (prev && !changed && !opts.force)
|
|
12621
|
+
continue;
|
|
12622
|
+
const wantContent = shouldIndexContent(root, file);
|
|
12623
|
+
const absPath = `${root.path}/${file.relPath}`;
|
|
12624
|
+
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
12625
|
+
let body = null;
|
|
12626
|
+
if (wantContent && !isBinary) {
|
|
12627
|
+
try {
|
|
12628
|
+
body = readFileSync4(absPath, "utf-8");
|
|
12629
|
+
} catch {
|
|
12630
|
+
isBinary = true;
|
|
12631
|
+
}
|
|
12632
|
+
}
|
|
12633
|
+
changes.push({
|
|
12634
|
+
file,
|
|
12635
|
+
prev,
|
|
12636
|
+
isBinary,
|
|
12637
|
+
body,
|
|
12638
|
+
grams: body !== null ? contentShortGrams(body) : [],
|
|
12639
|
+
contentIndexed: body !== null ? 1 : 0
|
|
12640
|
+
});
|
|
12641
|
+
}
|
|
12553
12642
|
d.exec("BEGIN");
|
|
12554
12643
|
try {
|
|
12555
|
-
const
|
|
12556
|
-
for (const file of scanned) {
|
|
12557
|
-
seen.add(file.relPath);
|
|
12558
|
-
const prev = existing.get(file.relPath);
|
|
12559
|
-
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
12560
|
-
if (prev && !changed && !opts.force)
|
|
12561
|
-
continue;
|
|
12562
|
-
const wantContent = shouldIndexContent(root, file);
|
|
12563
|
-
const absPath = `${root.path}/${file.relPath}`;
|
|
12564
|
-
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
12565
|
-
let body = null;
|
|
12566
|
-
if (wantContent && !isBinary) {
|
|
12567
|
-
try {
|
|
12568
|
-
body = readFileSync4(absPath, "utf-8");
|
|
12569
|
-
} catch {
|
|
12570
|
-
isBinary = true;
|
|
12571
|
-
}
|
|
12572
|
-
}
|
|
12573
|
-
const contentIndexed = body !== null ? 1 : 0;
|
|
12644
|
+
for (const { file, prev, isBinary, body, grams, contentIndexed } of changes) {
|
|
12574
12645
|
if (prev) {
|
|
12575
|
-
if (prev.content_indexed)
|
|
12646
|
+
if (prev.content_indexed) {
|
|
12576
12647
|
deleteContent.run(prev.id);
|
|
12648
|
+
deleteContentGrams.run(prev.id);
|
|
12649
|
+
}
|
|
12577
12650
|
updateFile.run(file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now, prev.id);
|
|
12578
|
-
if (body !== null)
|
|
12651
|
+
if (body !== null) {
|
|
12579
12652
|
insertContent.run(prev.id, body);
|
|
12653
|
+
for (const gram of grams)
|
|
12654
|
+
insertContentGram.run(prev.id, gram);
|
|
12655
|
+
}
|
|
12580
12656
|
stats.updated++;
|
|
12581
12657
|
} else {
|
|
12582
12658
|
const inserted = insertFile.run(root.id, file.relPath, file.name, file.ext, file.dir, file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now);
|
|
12583
|
-
if (body !== null)
|
|
12584
|
-
|
|
12659
|
+
if (body !== null) {
|
|
12660
|
+
const fileId = Number(inserted.lastInsertRowid);
|
|
12661
|
+
insertContent.run(fileId, body);
|
|
12662
|
+
for (const gram of grams)
|
|
12663
|
+
insertContentGram.run(fileId, gram);
|
|
12664
|
+
}
|
|
12585
12665
|
stats.added++;
|
|
12586
12666
|
}
|
|
12587
12667
|
if (contentIndexed)
|
|
@@ -12590,8 +12670,10 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
12590
12670
|
for (const [relPath, row] of existing) {
|
|
12591
12671
|
if (seen.has(relPath))
|
|
12592
12672
|
continue;
|
|
12593
|
-
if (row.content_indexed)
|
|
12673
|
+
if (row.content_indexed) {
|
|
12594
12674
|
deleteContent.run(row.id);
|
|
12675
|
+
deleteContentGrams.run(row.id);
|
|
12676
|
+
}
|
|
12595
12677
|
deleteFile.run(row.id);
|
|
12596
12678
|
stats.deleted++;
|
|
12597
12679
|
}
|
|
@@ -12613,6 +12695,9 @@ function indexAllRoots(opts = {}, db) {
|
|
|
12613
12695
|
return listRoots(db).map((root) => indexRoot(root.id, opts, db));
|
|
12614
12696
|
}
|
|
12615
12697
|
var refreshing = new Set;
|
|
12698
|
+
var lastDefaultAutoRefreshCheckAt = 0;
|
|
12699
|
+
var AUTO_REFRESH_CHECK_THROTTLE_MS = 1000;
|
|
12700
|
+
var defaultRefreshScheduled = false;
|
|
12616
12701
|
function refreshStaleRoots(staleMinutes, db) {
|
|
12617
12702
|
const cutoff = Date.now() - staleMinutes * 60000;
|
|
12618
12703
|
const stats = [];
|
|
@@ -12636,8 +12721,31 @@ function autoRefreshStaleRoots(db) {
|
|
|
12636
12721
|
const config = getConfig();
|
|
12637
12722
|
if (!config.indexAutoRefresh)
|
|
12638
12723
|
return [];
|
|
12724
|
+
if (!db) {
|
|
12725
|
+
const now = Date.now();
|
|
12726
|
+
if (now - lastDefaultAutoRefreshCheckAt < AUTO_REFRESH_CHECK_THROTTLE_MS)
|
|
12727
|
+
return [];
|
|
12728
|
+
lastDefaultAutoRefreshCheckAt = now;
|
|
12729
|
+
}
|
|
12639
12730
|
return refreshStaleRoots(config.indexStaleMinutes, db);
|
|
12640
12731
|
}
|
|
12732
|
+
function scheduleAutoRefreshStaleRoots(db) {
|
|
12733
|
+
if (db)
|
|
12734
|
+
return autoRefreshStaleRoots(db);
|
|
12735
|
+
const config = getConfig();
|
|
12736
|
+
if (!config.indexAutoRefresh || defaultRefreshScheduled)
|
|
12737
|
+
return [];
|
|
12738
|
+
defaultRefreshScheduled = true;
|
|
12739
|
+
const timer = setTimeout(() => {
|
|
12740
|
+
try {
|
|
12741
|
+
autoRefreshStaleRoots();
|
|
12742
|
+
} catch {} finally {
|
|
12743
|
+
defaultRefreshScheduled = false;
|
|
12744
|
+
}
|
|
12745
|
+
}, 0);
|
|
12746
|
+
timer.unref?.();
|
|
12747
|
+
return [];
|
|
12748
|
+
}
|
|
12641
12749
|
|
|
12642
12750
|
// src/lib/local/query.ts
|
|
12643
12751
|
import { existsSync as existsSync3, readFileSync as readFileSync5 } from "fs";
|
|
@@ -12864,6 +12972,9 @@ function compileSearchRegex(pattern, caseSensitive = false) {
|
|
|
12864
12972
|
// src/lib/local/query.ts
|
|
12865
12973
|
var MAX_LINE_LENGTH = 200;
|
|
12866
12974
|
var MAX_MATCHES_PER_FILE = 5;
|
|
12975
|
+
var MAX_PATH_CANDIDATES = 20000;
|
|
12976
|
+
var MAX_CONTENT_CANDIDATES = 50000;
|
|
12977
|
+
var MAX_REGEX_CANDIDATES = 50000;
|
|
12867
12978
|
function tokenize(query) {
|
|
12868
12979
|
return query.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "").split(/\s+/).filter(Boolean);
|
|
12869
12980
|
}
|
|
@@ -12894,11 +13005,40 @@ function filterClauses(opts, db) {
|
|
|
12894
13005
|
}
|
|
12895
13006
|
if (opts.dir) {
|
|
12896
13007
|
clauses.push("f.dir LIKE ? ESCAPE '\\'");
|
|
12897
|
-
const dir = opts.dir.replace(/^\/|\/$/g, "")
|
|
13008
|
+
const dir = escapeLike(opts.dir.replace(/^\/|\/$/g, ""));
|
|
12898
13009
|
params.push(`%${dir}%`);
|
|
12899
13010
|
}
|
|
12900
13011
|
return { sql: clauses.length > 0 ? ` AND ${clauses.join(" AND ")}` : "", params };
|
|
12901
13012
|
}
|
|
13013
|
+
function escapeLike(value) {
|
|
13014
|
+
return value.replace(/[\\%_]/g, "\\$&");
|
|
13015
|
+
}
|
|
13016
|
+
function shortTokenClauses(tokens) {
|
|
13017
|
+
if (tokens.length === 0)
|
|
13018
|
+
return { sql: "", params: [] };
|
|
13019
|
+
return {
|
|
13020
|
+
sql: ` AND ${tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ")}`,
|
|
13021
|
+
params: tokens.map((token) => `%${escapeLike(token)}%`)
|
|
13022
|
+
};
|
|
13023
|
+
}
|
|
13024
|
+
function contentGramClauses(tokens) {
|
|
13025
|
+
const gramTokens = tokens.filter((token) => /^[a-z0-9_$]{1,2}$/.test(token));
|
|
13026
|
+
if (gramTokens.length === 0)
|
|
13027
|
+
return { sql: "", params: [] };
|
|
13028
|
+
return {
|
|
13029
|
+
sql: gramTokens.map((_token, index) => ` AND (
|
|
13030
|
+
NOT EXISTS (
|
|
13031
|
+
SELECT 1 FROM file_content_grams cg_any_${index}
|
|
13032
|
+
WHERE cg_any_${index}.file_id = f.id
|
|
13033
|
+
)
|
|
13034
|
+
OR EXISTS (
|
|
13035
|
+
SELECT 1 FROM file_content_grams cg_${index}
|
|
13036
|
+
WHERE cg_${index}.file_id = f.id AND cg_${index}.gram = ?
|
|
13037
|
+
)
|
|
13038
|
+
)`).join(""),
|
|
13039
|
+
params: gramTokens
|
|
13040
|
+
};
|
|
13041
|
+
}
|
|
12902
13042
|
function rowToHit(row, score) {
|
|
12903
13043
|
return {
|
|
12904
13044
|
rootId: row.root_id,
|
|
@@ -12968,6 +13108,8 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
12968
13108
|
return [];
|
|
12969
13109
|
const ftsQuery = buildFtsQuery(query);
|
|
12970
13110
|
const filters = filterClauses(opts, d);
|
|
13111
|
+
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
13112
|
+
const shortFilters = shortTokenClauses(shortTokens);
|
|
12971
13113
|
const candidateLimit = Math.max(200, limit * 10);
|
|
12972
13114
|
let rows;
|
|
12973
13115
|
if (ftsQuery) {
|
|
@@ -12975,16 +13117,16 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
12975
13117
|
FROM files_fts fts
|
|
12976
13118
|
JOIN files f ON f.id = fts.rowid
|
|
12977
13119
|
JOIN index_roots r ON r.id = f.root_id
|
|
12978
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
13120
|
+
WHERE files_fts MATCH ?${filters.sql}${shortFilters.sql}
|
|
12979
13121
|
ORDER BY bm25(files_fts, 10.0, 1.0)
|
|
12980
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, candidateLimit);
|
|
12981
|
-
const namePattern = `${query.trim()
|
|
13122
|
+
LIMIT ?`).all(ftsQuery, ...filters.params, ...shortFilters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
13123
|
+
const namePattern = `${escapeLike(query.trim())}%`;
|
|
12982
13124
|
const nameRows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
12983
13125
|
FROM files f
|
|
12984
13126
|
JOIN index_roots r ON r.id = f.root_id
|
|
12985
|
-
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}
|
|
13127
|
+
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}${shortFilters.sql}
|
|
12986
13128
|
ORDER BY length(f.name)
|
|
12987
|
-
LIMIT 100`).all(namePattern, ...filters.params);
|
|
13129
|
+
LIMIT 100`).all(namePattern, ...filters.params, ...shortFilters.params);
|
|
12988
13130
|
const seen = new Set(rows.map((row) => row.id));
|
|
12989
13131
|
for (const row of nameRows) {
|
|
12990
13132
|
if (!seen.has(row.id))
|
|
@@ -12992,14 +13134,14 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
12992
13134
|
}
|
|
12993
13135
|
} else {
|
|
12994
13136
|
const likeClauses = tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ");
|
|
12995
|
-
const likeParams = tokens.map((t) => `%${t
|
|
13137
|
+
const likeParams = tokens.map((t) => `%${escapeLike(t)}%`);
|
|
12996
13138
|
rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
12997
13139
|
FROM files f
|
|
12998
13140
|
JOIN index_roots r ON r.id = f.root_id
|
|
12999
13141
|
WHERE ${likeClauses}${filters.sql}
|
|
13000
|
-
|
|
13142
|
+
ORDER BY length(f.name), length(f.rel_path), f.rel_path
|
|
13143
|
+
LIMIT ?`).all(...likeParams, ...filters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
13001
13144
|
}
|
|
13002
|
-
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
13003
13145
|
const filtered = shortTokens.length > 0 ? rows.filter((row) => shortTokens.every((t) => row.rel_path.toLowerCase().includes(t))) : rows;
|
|
13004
13146
|
return filtered.map((row) => rowToHit(row, scoreFileName(query, tokens, row))).sort((a, b) => b.score - a.score).filter((hit) => existsSync3(hit.absPath)).slice(0, limit);
|
|
13005
13147
|
}
|
|
@@ -13037,24 +13179,31 @@ function searchFilePathsRegex(pattern, opts = {}, db) {
|
|
|
13037
13179
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'handle.*Click', not '\\w+').");
|
|
13038
13180
|
}
|
|
13039
13181
|
const filters = filterClauses(opts, d);
|
|
13040
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
13041
|
-
FROM files_fts fts
|
|
13042
|
-
JOIN files f ON f.id = fts.rowid
|
|
13043
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
13044
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
13045
|
-
ORDER BY fts.rank
|
|
13046
|
-
LIMIT 5000`).all(ftsQuery, ...filters.params);
|
|
13047
13182
|
const hits = [];
|
|
13048
|
-
|
|
13049
|
-
|
|
13050
|
-
|
|
13051
|
-
|
|
13052
|
-
|
|
13053
|
-
|
|
13054
|
-
|
|
13055
|
-
|
|
13056
|
-
|
|
13057
|
-
if (
|
|
13183
|
+
const pageSize = Math.max(500, limit * 20);
|
|
13184
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
13185
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
13186
|
+
FROM files_fts fts
|
|
13187
|
+
JOIN files f ON f.id = fts.rowid
|
|
13188
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
13189
|
+
WHERE files_fts MATCH ?${filters.sql}
|
|
13190
|
+
ORDER BY fts.rank
|
|
13191
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
13192
|
+
if (rows.length === 0)
|
|
13193
|
+
break;
|
|
13194
|
+
for (const row of rows) {
|
|
13195
|
+
if (!regex.test(row.rel_path) && !regex.test(row.name))
|
|
13196
|
+
continue;
|
|
13197
|
+
const depth = row.rel_path.split("/").length - 1;
|
|
13198
|
+
const score = Math.max(0.05, 0.6 - depth * 0.02);
|
|
13199
|
+
const hit = rowToHit(row, score);
|
|
13200
|
+
if (!existsSync3(hit.absPath))
|
|
13201
|
+
continue;
|
|
13202
|
+
hits.push(hit);
|
|
13203
|
+
if (hits.length >= limit)
|
|
13204
|
+
break;
|
|
13205
|
+
}
|
|
13206
|
+
if (rows.length < pageSize)
|
|
13058
13207
|
break;
|
|
13059
13208
|
}
|
|
13060
13209
|
return hits;
|
|
@@ -13068,40 +13217,48 @@ function searchFileContentRegex(pattern, opts = {}, db) {
|
|
|
13068
13217
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'export.*function', not '\\d+').");
|
|
13069
13218
|
}
|
|
13070
13219
|
const filters = filterClauses(opts, d);
|
|
13071
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
13072
|
-
FROM file_content_fts fts
|
|
13073
|
-
JOIN files f ON f.id = fts.rowid
|
|
13074
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
13075
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
13076
|
-
ORDER BY fts.rank
|
|
13077
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(200, limit * 10));
|
|
13078
13220
|
const hits = [];
|
|
13079
|
-
|
|
13080
|
-
|
|
13081
|
-
const
|
|
13082
|
-
|
|
13083
|
-
|
|
13084
|
-
|
|
13085
|
-
|
|
13086
|
-
|
|
13087
|
-
|
|
13088
|
-
|
|
13221
|
+
const pageSize = Math.max(200, limit * 10);
|
|
13222
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
13223
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
13224
|
+
FROM file_content_fts fts
|
|
13225
|
+
JOIN files f ON f.id = fts.rowid
|
|
13226
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
13227
|
+
WHERE file_content_fts MATCH ?${filters.sql}
|
|
13228
|
+
ORDER BY fts.rank
|
|
13229
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
13230
|
+
if (rows.length === 0)
|
|
13231
|
+
break;
|
|
13232
|
+
for (let i = 0;i < rows.length && hits.length < limit; i++) {
|
|
13233
|
+
const row = rows[i];
|
|
13234
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
13235
|
+
let content;
|
|
13236
|
+
try {
|
|
13237
|
+
content = readFileSync5(absPath, "utf-8");
|
|
13238
|
+
} catch {
|
|
13239
|
+
continue;
|
|
13240
|
+
}
|
|
13241
|
+
const lines = content.split(`
|
|
13089
13242
|
`);
|
|
13090
|
-
|
|
13091
|
-
|
|
13092
|
-
|
|
13093
|
-
|
|
13243
|
+
const matches = [];
|
|
13244
|
+
for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
|
|
13245
|
+
if (regex.test(lines[n])) {
|
|
13246
|
+
matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
|
|
13247
|
+
}
|
|
13094
13248
|
}
|
|
13249
|
+
if (matches.length === 0)
|
|
13250
|
+
continue;
|
|
13251
|
+
const rankIndex = offset + i;
|
|
13252
|
+
const score = Math.max(0.25, 0.65 - rankIndex * 0.05);
|
|
13253
|
+
hits.push({
|
|
13254
|
+
...rowToHit(row, score),
|
|
13255
|
+
line: matches[0].line,
|
|
13256
|
+
lineText: matches[0].text,
|
|
13257
|
+
matches
|
|
13258
|
+
});
|
|
13095
13259
|
}
|
|
13096
|
-
if (
|
|
13097
|
-
|
|
13098
|
-
const score = Math.max(0.25, 0.65 - i * 0.05);
|
|
13099
|
-
hits.push({
|
|
13100
|
-
...rowToHit(row, score),
|
|
13101
|
-
line: matches[0].line,
|
|
13102
|
-
lineText: matches[0].text,
|
|
13103
|
-
matches
|
|
13104
|
-
});
|
|
13260
|
+
if (rows.length < pageSize)
|
|
13261
|
+
break;
|
|
13105
13262
|
}
|
|
13106
13263
|
return hits;
|
|
13107
13264
|
}
|
|
@@ -13112,42 +13269,51 @@ function searchFileContent(query, opts = {}, db) {
|
|
|
13112
13269
|
if (!ftsQuery)
|
|
13113
13270
|
return [];
|
|
13114
13271
|
const filters = filterClauses(opts, d);
|
|
13115
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
13116
|
-
FROM file_content_fts fts
|
|
13117
|
-
JOIN files f ON f.id = fts.rowid
|
|
13118
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
13119
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
13120
|
-
ORDER BY fts.rank
|
|
13121
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(50, limit * 3));
|
|
13122
13272
|
const tokens = tokenize(query);
|
|
13123
13273
|
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
13274
|
+
const gramFilters = contentGramClauses(shortTokens);
|
|
13124
13275
|
const scored = [];
|
|
13125
|
-
|
|
13126
|
-
|
|
13127
|
-
const
|
|
13128
|
-
|
|
13129
|
-
|
|
13130
|
-
|
|
13131
|
-
|
|
13132
|
-
|
|
13133
|
-
|
|
13134
|
-
if (
|
|
13135
|
-
|
|
13136
|
-
|
|
13276
|
+
const pageSize = Math.max(50, limit * 3);
|
|
13277
|
+
for (let offset = 0;scored.length < limit * 2 && offset < MAX_CONTENT_CANDIDATES; offset += pageSize) {
|
|
13278
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
13279
|
+
FROM file_content_fts fts
|
|
13280
|
+
JOIN files f ON f.id = fts.rowid
|
|
13281
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
13282
|
+
WHERE file_content_fts MATCH ?${filters.sql}${gramFilters.sql}
|
|
13283
|
+
ORDER BY fts.rank
|
|
13284
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, ...gramFilters.params, pageSize, offset);
|
|
13285
|
+
if (rows.length === 0)
|
|
13286
|
+
break;
|
|
13287
|
+
for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
|
|
13288
|
+
const row = rows[i];
|
|
13289
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
13290
|
+
let content;
|
|
13291
|
+
try {
|
|
13292
|
+
content = readFileSync5(absPath, "utf-8");
|
|
13293
|
+
} catch {
|
|
13294
|
+
continue;
|
|
13295
|
+
}
|
|
13296
|
+
if (shortTokens.length > 0) {
|
|
13297
|
+
const lower = content.toLowerCase();
|
|
13298
|
+
if (!shortTokens.every((t) => lower.includes(t)))
|
|
13299
|
+
continue;
|
|
13300
|
+
}
|
|
13301
|
+
const { matches, tier } = findLineMatches(content, query, tokens);
|
|
13302
|
+
if (matches.length === 0)
|
|
13137
13303
|
continue;
|
|
13304
|
+
const rankIndex = offset + i;
|
|
13305
|
+
const base = Math.max(0.25, 0.55 - rankIndex * 0.04);
|
|
13306
|
+
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
13307
|
+
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
13308
|
+
scored.push({
|
|
13309
|
+
...rowToHit(row, score),
|
|
13310
|
+
line: matches[0].line,
|
|
13311
|
+
lineText: matches[0].text,
|
|
13312
|
+
matches
|
|
13313
|
+
});
|
|
13138
13314
|
}
|
|
13139
|
-
|
|
13140
|
-
|
|
13141
|
-
continue;
|
|
13142
|
-
const base = Math.max(0.25, 0.55 - i * 0.04);
|
|
13143
|
-
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
13144
|
-
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
13145
|
-
scored.push({
|
|
13146
|
-
...rowToHit(row, score),
|
|
13147
|
-
line: matches[0].line,
|
|
13148
|
-
lineText: matches[0].text,
|
|
13149
|
-
matches
|
|
13150
|
-
});
|
|
13315
|
+
if (rows.length < pageSize)
|
|
13316
|
+
break;
|
|
13151
13317
|
}
|
|
13152
13318
|
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
13153
13319
|
}
|
|
@@ -13164,7 +13330,7 @@ function findLocal(query, opts = {}, db) {
|
|
|
13164
13330
|
return { query, kind, indexed: false, roots: roots.length, total: 0, results: [] };
|
|
13165
13331
|
}
|
|
13166
13332
|
if (opts.refresh !== false)
|
|
13167
|
-
|
|
13333
|
+
scheduleAutoRefreshStaleRoots(db);
|
|
13168
13334
|
const queryOpts = {
|
|
13169
13335
|
root: opts.root,
|
|
13170
13336
|
ext: opts.ext,
|
|
@@ -14029,7 +14195,7 @@ class FilesProvider {
|
|
|
14029
14195
|
return hasReadyRoot();
|
|
14030
14196
|
}
|
|
14031
14197
|
async search(query, options) {
|
|
14032
|
-
|
|
14198
|
+
scheduleAutoRefreshStaleRoots();
|
|
14033
14199
|
const hits = searchFilePaths(query, { limit: options?.limit ?? 10 });
|
|
14034
14200
|
return hits.map((hit) => ({
|
|
14035
14201
|
title: hit.name,
|
|
@@ -14057,7 +14223,7 @@ class ContentProvider {
|
|
|
14057
14223
|
return hasReadyRoot();
|
|
14058
14224
|
}
|
|
14059
14225
|
async search(query, options) {
|
|
14060
|
-
|
|
14226
|
+
scheduleAutoRefreshStaleRoots();
|
|
14061
14227
|
const hits = searchFileContent(query, { limit: options?.limit ?? 10 });
|
|
14062
14228
|
return hits.map((hit) => ({
|
|
14063
14229
|
title: hit.name,
|
|
@@ -14184,7 +14350,7 @@ function rowToSearch(row) {
|
|
|
14184
14350
|
}
|
|
14185
14351
|
function createSearch(data, db) {
|
|
14186
14352
|
const d = db ?? getDb();
|
|
14187
|
-
const id = generateId();
|
|
14353
|
+
const id = data.id ?? generateId();
|
|
14188
14354
|
const now = new Date().toISOString();
|
|
14189
14355
|
d.prepare(`INSERT INTO searches (id, query, providers, profile_id, result_count, duration, created_at)
|
|
14190
14356
|
VALUES (?, ?, ?, ?, ?, ?, ?)`).run(id, data.query, JSON.stringify(data.providers), data.profileId ?? null, data.resultCount ?? 0, data.duration ?? 0, now);
|
|
@@ -14266,7 +14432,7 @@ function createResults(results, db) {
|
|
|
14266
14432
|
d.exec("BEGIN");
|
|
14267
14433
|
try {
|
|
14268
14434
|
for (const data of results) {
|
|
14269
|
-
const id = generateId();
|
|
14435
|
+
const id = data.id ?? generateId();
|
|
14270
14436
|
stmt.run(id, data.searchId, data.title, data.url, data.snippet, data.source, data.provider, data.rank, data.score ?? null, data.publishedAt ?? null, data.thumbnail ?? null, JSON.stringify(data.metadata ?? {}), now);
|
|
14271
14437
|
created.push({
|
|
14272
14438
|
id,
|
|
@@ -14408,13 +14574,300 @@ function isProviderConfigured(provider) {
|
|
|
14408
14574
|
return !!Bun.env[provider.apiKeyEnv];
|
|
14409
14575
|
}
|
|
14410
14576
|
|
|
14577
|
+
// src/lib/router.ts
|
|
14578
|
+
var PROVIDER_DESCRIPTIONS = {
|
|
14579
|
+
files: "Local file names and paths. Best for known filenames, path fragments, extensions, and repo navigation.",
|
|
14580
|
+
content: "Local indexed file contents. Best for code symbols, exact phrases, docs, snippets, and grep-style discovery.",
|
|
14581
|
+
google: "General web search through SerpAPI. Best for broad web coverage and current public pages.",
|
|
14582
|
+
serpapi: "SerpAPI multi-engine web search. Best for general web queries when Google-style results are desired.",
|
|
14583
|
+
exa: "Neural/semantic web search. Best for research, conceptual queries, docs, and high-relevance pages.",
|
|
14584
|
+
perplexity: "Answer-oriented web research with citations. Best for synthesized factual questions and research summaries.",
|
|
14585
|
+
brave: "General independent web search. Best for current web, news-like, product, and navigational queries.",
|
|
14586
|
+
bing: "General web search. Best for current web and Microsoft/Bing-indexed pages.",
|
|
14587
|
+
twitter: "X/Twitter search. Best for tweets, social reactions, breaking discourse, and people posting updates.",
|
|
14588
|
+
reddit: "Reddit search. Best for opinions, product experiences, troubleshooting threads, and community recommendations.",
|
|
14589
|
+
youtube: "YouTube search. Best for videos, tutorials, talks, demos, and channels.",
|
|
14590
|
+
hackernews: "Hacker News search. Best for startup, programming, launch, and technical discussion threads.",
|
|
14591
|
+
github: "GitHub code and repository search. Best for open-source repos, code examples, packages, and implementation details.",
|
|
14592
|
+
arxiv: "arXiv academic search. Best for papers, preprints, ML/AI/math/physics research, and scholarly topics."
|
|
14593
|
+
};
|
|
14594
|
+
function clampMaxProviders(value) {
|
|
14595
|
+
if (value === undefined || !Number.isFinite(value))
|
|
14596
|
+
return 3;
|
|
14597
|
+
return Math.max(1, Math.min(5, Math.floor(value)));
|
|
14598
|
+
}
|
|
14599
|
+
function clampConfidence(value) {
|
|
14600
|
+
return typeof value === "number" && Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.5;
|
|
14601
|
+
}
|
|
14602
|
+
function normalizeCandidates(candidates) {
|
|
14603
|
+
const allowed = new Set(PROVIDER_NAMES);
|
|
14604
|
+
const seen = new Set;
|
|
14605
|
+
const normalized = [];
|
|
14606
|
+
for (const candidate of candidates) {
|
|
14607
|
+
if (!allowed.has(candidate) || seen.has(candidate))
|
|
14608
|
+
continue;
|
|
14609
|
+
seen.add(candidate);
|
|
14610
|
+
normalized.push(candidate);
|
|
14611
|
+
}
|
|
14612
|
+
return normalized;
|
|
14613
|
+
}
|
|
14614
|
+
function addScore(scores, candidateSet, provider, amount) {
|
|
14615
|
+
if (!candidateSet.has(provider))
|
|
14616
|
+
return;
|
|
14617
|
+
scores.set(provider, (scores.get(provider) ?? 0) + amount);
|
|
14618
|
+
}
|
|
14619
|
+
function hasAny(query, patterns) {
|
|
14620
|
+
return patterns.some((pattern) => pattern.test(query));
|
|
14621
|
+
}
|
|
14622
|
+
function routeSearchProvidersHeuristic(query, candidates, options = {}) {
|
|
14623
|
+
const normalized = normalizeCandidates(candidates);
|
|
14624
|
+
const maxProviders = clampMaxProviders(options.maxProviders);
|
|
14625
|
+
if (normalized.length === 0) {
|
|
14626
|
+
return {
|
|
14627
|
+
strategy: "heuristic",
|
|
14628
|
+
selectedProviders: [],
|
|
14629
|
+
candidates: [],
|
|
14630
|
+
reason: "No configured providers were available to route.",
|
|
14631
|
+
confidence: 0
|
|
14632
|
+
};
|
|
14633
|
+
}
|
|
14634
|
+
const candidateSet = new Set(normalized);
|
|
14635
|
+
const scores = new Map;
|
|
14636
|
+
const reasons = [];
|
|
14637
|
+
const q = query.trim().toLowerCase();
|
|
14638
|
+
for (const candidate of normalized)
|
|
14639
|
+
scores.set(candidate, 0.05);
|
|
14640
|
+
if (hasAny(q, [
|
|
14641
|
+
/\b(file|filename|path|folder|directory|repo|workspace)\b/,
|
|
14642
|
+
/(^|[/\s])[\w.-]+\.(ts|tsx|js|jsx|py|rs|go|md|json|yaml|yml|css|html)\b/
|
|
14643
|
+
])) {
|
|
14644
|
+
addScore(scores, candidateSet, "files", 5);
|
|
14645
|
+
addScore(scores, candidateSet, "content", 3);
|
|
14646
|
+
reasons.push("query looks local-file oriented");
|
|
14647
|
+
}
|
|
14648
|
+
if (hasAny(q, [
|
|
14649
|
+
/\b(function|class|interface|type|const|import|export|error|stack|symbol|grep|regex)\b/,
|
|
14650
|
+
/[A-Za-z_$][\w$]*\([^)]*\)/,
|
|
14651
|
+
/[A-Za-z_$][\w$]*::[A-Za-z_$]/
|
|
14652
|
+
])) {
|
|
14653
|
+
addScore(scores, candidateSet, "content", 5);
|
|
14654
|
+
addScore(scores, candidateSet, "files", 2);
|
|
14655
|
+
addScore(scores, candidateSet, "github", 1.5);
|
|
14656
|
+
reasons.push("query contains code/content lookup signals");
|
|
14657
|
+
}
|
|
14658
|
+
if (hasAny(q, [/\b(paper|papers|arxiv|preprint|doi|citation|survey|benchmark|research)\b/])) {
|
|
14659
|
+
addScore(scores, candidateSet, "arxiv", 5);
|
|
14660
|
+
addScore(scores, candidateSet, "exa", 3);
|
|
14661
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
14662
|
+
reasons.push("query asks for scholarly or research material");
|
|
14663
|
+
}
|
|
14664
|
+
if (hasAny(q, [/\b(github|repo|repository|source code|open source|package|library|sdk|api example)\b/])) {
|
|
14665
|
+
addScore(scores, candidateSet, "github", 5);
|
|
14666
|
+
addScore(scores, candidateSet, "exa", 2);
|
|
14667
|
+
reasons.push("query asks for code or repository material");
|
|
14668
|
+
}
|
|
14669
|
+
if (hasAny(q, [/\b(video|youtube|tutorial|demo|talk|lecture|channel)\b/])) {
|
|
14670
|
+
addScore(scores, candidateSet, "youtube", 5);
|
|
14671
|
+
reasons.push("query asks for video material");
|
|
14672
|
+
}
|
|
14673
|
+
if (hasAny(q, [/\b(reddit|subreddit|opinion|experience|reviews?|worth it|recommendations?)\b/])) {
|
|
14674
|
+
addScore(scores, candidateSet, "reddit", 5);
|
|
14675
|
+
addScore(scores, candidateSet, "hackernews", 1.5);
|
|
14676
|
+
reasons.push("query asks for community discussion");
|
|
14677
|
+
}
|
|
14678
|
+
if (hasAny(q, [/\b(hacker news|hn|show hn|launch|startup)\b/])) {
|
|
14679
|
+
addScore(scores, candidateSet, "hackernews", 5);
|
|
14680
|
+
reasons.push("query asks for Hacker News style discussion");
|
|
14681
|
+
}
|
|
14682
|
+
if (hasAny(q, [/\b(twitter|tweet|tweets|x\.com|social reaction|trending)\b/])) {
|
|
14683
|
+
addScore(scores, candidateSet, "twitter", 5);
|
|
14684
|
+
reasons.push("query asks for social posts");
|
|
14685
|
+
}
|
|
14686
|
+
if (hasAny(q, [/\b(latest|today|yesterday|news|current|2025|2026|price|release|launched)\b/])) {
|
|
14687
|
+
addScore(scores, candidateSet, "brave", 3);
|
|
14688
|
+
addScore(scores, candidateSet, "bing", 2.5);
|
|
14689
|
+
addScore(scores, candidateSet, "google", 2.5);
|
|
14690
|
+
addScore(scores, candidateSet, "serpapi", 2);
|
|
14691
|
+
reasons.push("query appears time-sensitive");
|
|
14692
|
+
}
|
|
14693
|
+
if (reasons.length === 0) {
|
|
14694
|
+
addScore(scores, candidateSet, "exa", 2.5);
|
|
14695
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
14696
|
+
addScore(scores, candidateSet, "brave", 1.5);
|
|
14697
|
+
addScore(scores, candidateSet, "google", 1.5);
|
|
14698
|
+
addScore(scores, candidateSet, "hackernews", 0.75);
|
|
14699
|
+
reasons.push("general query fallback");
|
|
14700
|
+
}
|
|
14701
|
+
const selectedProviders = [...scores.entries()].sort((a, b) => b[1] - a[1] || normalized.indexOf(a[0]) - normalized.indexOf(b[0])).slice(0, Math.min(maxProviders, normalized.length)).map(([provider]) => provider);
|
|
14702
|
+
const topScore = scores.get(selectedProviders[0]) ?? 0;
|
|
14703
|
+
const confidence = Math.max(0.35, Math.min(0.9, topScore / 6));
|
|
14704
|
+
return {
|
|
14705
|
+
strategy: "heuristic",
|
|
14706
|
+
selectedProviders,
|
|
14707
|
+
candidates: normalized,
|
|
14708
|
+
reason: reasons.join("; "),
|
|
14709
|
+
confidence
|
|
14710
|
+
};
|
|
14711
|
+
}
|
|
14712
|
+
function routerSchema(candidates, maxProviders) {
|
|
14713
|
+
return {
|
|
14714
|
+
type: "object",
|
|
14715
|
+
properties: {
|
|
14716
|
+
selectedProviders: {
|
|
14717
|
+
type: "array",
|
|
14718
|
+
items: { type: "string", enum: candidates },
|
|
14719
|
+
minItems: 1,
|
|
14720
|
+
maxItems: maxProviders
|
|
14721
|
+
},
|
|
14722
|
+
reason: { type: "string" },
|
|
14723
|
+
confidence: { type: "number", minimum: 0, maximum: 1 }
|
|
14724
|
+
},
|
|
14725
|
+
required: ["selectedProviders", "reason", "confidence"],
|
|
14726
|
+
additionalProperties: false
|
|
14727
|
+
};
|
|
14728
|
+
}
|
|
14729
|
+
function parseCerebrasRouting(raw, candidates, maxProviders) {
|
|
14730
|
+
let parsed;
|
|
14731
|
+
try {
|
|
14732
|
+
parsed = JSON.parse(raw);
|
|
14733
|
+
} catch {
|
|
14734
|
+
return null;
|
|
14735
|
+
}
|
|
14736
|
+
if (!Array.isArray(parsed.selectedProviders))
|
|
14737
|
+
return null;
|
|
14738
|
+
const candidateSet = new Set(candidates);
|
|
14739
|
+
const selectedProviders = parsed.selectedProviders.filter((provider) => typeof provider === "string" && candidateSet.has(provider)).slice(0, maxProviders);
|
|
14740
|
+
if (selectedProviders.length === 0)
|
|
14741
|
+
return null;
|
|
14742
|
+
return {
|
|
14743
|
+
selectedProviders,
|
|
14744
|
+
reason: typeof parsed.reason === "string" ? parsed.reason : "Cerebras router selected providers.",
|
|
14745
|
+
confidence: clampConfidence(parsed.confidence)
|
|
14746
|
+
};
|
|
14747
|
+
}
|
|
14748
|
+
async function routeWithCerebras(query, candidates, options) {
|
|
14749
|
+
const apiKey = Bun.env.CEREBRAS_API_KEY;
|
|
14750
|
+
if (!apiKey) {
|
|
14751
|
+
return {
|
|
14752
|
+
...routeSearchProvidersHeuristic(query, candidates, options),
|
|
14753
|
+
error: "CEREBRAS_API_KEY is not configured; used heuristic routing."
|
|
14754
|
+
};
|
|
14755
|
+
}
|
|
14756
|
+
const providerGuide = candidates.map((name) => ({
|
|
14757
|
+
name,
|
|
14758
|
+
description: PROVIDER_DESCRIPTIONS[name]
|
|
14759
|
+
}));
|
|
14760
|
+
const res = await fetch("https://api.cerebras.ai/v1/chat/completions", {
|
|
14761
|
+
method: "POST",
|
|
14762
|
+
signal: AbortSignal.timeout(options.timeoutMs),
|
|
14763
|
+
headers: {
|
|
14764
|
+
"Content-Type": "application/json",
|
|
14765
|
+
Authorization: `Bearer ${apiKey}`
|
|
14766
|
+
},
|
|
14767
|
+
body: JSON.stringify({
|
|
14768
|
+
model: options.model,
|
|
14769
|
+
temperature: 0,
|
|
14770
|
+
messages: [
|
|
14771
|
+
{
|
|
14772
|
+
role: "system",
|
|
14773
|
+
content: "You route a search query to the smallest useful set of available search providers. Select only listed providers. Prefer local providers for local files/code in the indexed workspace. Prefer scholarly, code, video, social, or web providers when the query clearly asks for those domains."
|
|
14774
|
+
},
|
|
14775
|
+
{
|
|
14776
|
+
role: "user",
|
|
14777
|
+
content: JSON.stringify({
|
|
14778
|
+
query,
|
|
14779
|
+
maxProviders: options.maxProviders,
|
|
14780
|
+
providers: providerGuide
|
|
14781
|
+
})
|
|
14782
|
+
}
|
|
14783
|
+
],
|
|
14784
|
+
response_format: {
|
|
14785
|
+
type: "json_schema",
|
|
14786
|
+
json_schema: {
|
|
14787
|
+
name: "search_router",
|
|
14788
|
+
strict: true,
|
|
14789
|
+
schema: routerSchema(candidates, options.maxProviders)
|
|
14790
|
+
}
|
|
14791
|
+
}
|
|
14792
|
+
})
|
|
14793
|
+
});
|
|
14794
|
+
if (!res.ok) {
|
|
14795
|
+
throw new Error(`Cerebras router error: ${res.status} ${res.statusText}`);
|
|
14796
|
+
}
|
|
14797
|
+
const data = await res.json();
|
|
14798
|
+
const content = data.choices?.[0]?.message?.content;
|
|
14799
|
+
if (!content)
|
|
14800
|
+
throw new Error("Cerebras router returned no content");
|
|
14801
|
+
const parsed = parseCerebrasRouting(content, candidates, options.maxProviders);
|
|
14802
|
+
if (!parsed)
|
|
14803
|
+
throw new Error("Cerebras router returned invalid provider selection");
|
|
14804
|
+
return {
|
|
14805
|
+
strategy: "cerebras",
|
|
14806
|
+
candidates,
|
|
14807
|
+
...parsed
|
|
14808
|
+
};
|
|
14809
|
+
}
|
|
14810
|
+
async function routeSearchProviders(query, candidates, options = {}) {
|
|
14811
|
+
const normalized = normalizeCandidates(candidates);
|
|
14812
|
+
const maxProviders = Math.min(clampMaxProviders(options.maxProviders), Math.max(1, normalized.length));
|
|
14813
|
+
const timeoutMs = options.timeoutMs && Number.isFinite(options.timeoutMs) ? Math.max(250, Math.floor(options.timeoutMs)) : 1200;
|
|
14814
|
+
const model = options.model ?? Bun.env.CEREBRAS_MODEL ?? "gpt-oss-120b";
|
|
14815
|
+
if (normalized.length === 0) {
|
|
14816
|
+
return routeSearchProvidersHeuristic(query, normalized, { maxProviders });
|
|
14817
|
+
}
|
|
14818
|
+
try {
|
|
14819
|
+
return await routeWithCerebras(query, normalized, { maxProviders, timeoutMs, model });
|
|
14820
|
+
} catch (err) {
|
|
14821
|
+
return {
|
|
14822
|
+
...routeSearchProvidersHeuristic(query, normalized, { maxProviders }),
|
|
14823
|
+
error: err instanceof Error ? err.message : String(err)
|
|
14824
|
+
};
|
|
14825
|
+
}
|
|
14826
|
+
}
|
|
14827
|
+
|
|
14411
14828
|
// src/lib/search.ts
|
|
14829
|
+
async function withTimeout(promise, timeoutMs, label) {
|
|
14830
|
+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
|
|
14831
|
+
return promise;
|
|
14832
|
+
let timer;
|
|
14833
|
+
try {
|
|
14834
|
+
return await Promise.race([
|
|
14835
|
+
promise,
|
|
14836
|
+
new Promise((_resolve, reject) => {
|
|
14837
|
+
timer = setTimeout(() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
14838
|
+
timer.unref?.();
|
|
14839
|
+
})
|
|
14840
|
+
]);
|
|
14841
|
+
} finally {
|
|
14842
|
+
if (timer)
|
|
14843
|
+
clearTimeout(timer);
|
|
14844
|
+
}
|
|
14845
|
+
}
|
|
14846
|
+
async function allSettledLimited(items, concurrency, task) {
|
|
14847
|
+
const results = new Array(items.length);
|
|
14848
|
+
let next = 0;
|
|
14849
|
+
async function worker() {
|
|
14850
|
+
while (next < items.length) {
|
|
14851
|
+
const index = next++;
|
|
14852
|
+
const item = items[index];
|
|
14853
|
+
try {
|
|
14854
|
+
results[index] = { status: "fulfilled", value: await task(item) };
|
|
14855
|
+
} catch (reason) {
|
|
14856
|
+
results[index] = { status: "rejected", reason };
|
|
14857
|
+
}
|
|
14858
|
+
}
|
|
14859
|
+
}
|
|
14860
|
+
const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length);
|
|
14861
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
14862
|
+
return results;
|
|
14863
|
+
}
|
|
14412
14864
|
async function unifiedSearch(query, opts = {}) {
|
|
14413
14865
|
const config = getConfig();
|
|
14414
14866
|
const startTime = Date.now();
|
|
14415
14867
|
const db = opts.db;
|
|
14416
14868
|
let providerNames = opts.providers ?? [];
|
|
14417
|
-
|
|
14869
|
+
const smartProfile = opts.profile === "smart";
|
|
14870
|
+
if (opts.profile && !smartProfile) {
|
|
14418
14871
|
const profile = getProfileByName(opts.profile, db);
|
|
14419
14872
|
if (profile) {
|
|
14420
14873
|
providerNames = profile.providers;
|
|
@@ -14430,7 +14883,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
14430
14883
|
}
|
|
14431
14884
|
const errors2 = [];
|
|
14432
14885
|
const explicitRequest = (opts.providers?.length ?? 0) > 0 || Boolean(opts.profile);
|
|
14433
|
-
|
|
14886
|
+
let activeProviders = providerNames.filter((name) => {
|
|
14434
14887
|
try {
|
|
14435
14888
|
if (getProvider(name).isConfigured())
|
|
14436
14889
|
return true;
|
|
@@ -14441,20 +14894,36 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
14441
14894
|
});
|
|
14442
14895
|
}
|
|
14443
14896
|
return false;
|
|
14444
|
-
} catch {
|
|
14897
|
+
} catch (err) {
|
|
14898
|
+
if (explicitRequest) {
|
|
14899
|
+
errors2.push({
|
|
14900
|
+
provider: name,
|
|
14901
|
+
error: err instanceof Error ? err.message : "unknown provider"
|
|
14902
|
+
});
|
|
14903
|
+
}
|
|
14445
14904
|
return false;
|
|
14446
14905
|
}
|
|
14447
14906
|
});
|
|
14907
|
+
const routingRequested = opts.smart === true || smartProfile || !explicitRequest && config.router.enabled;
|
|
14908
|
+
let routing;
|
|
14909
|
+
if (routingRequested && activeProviders.length > 0) {
|
|
14910
|
+
routing = await routeSearchProviders(query, activeProviders, {
|
|
14911
|
+
maxProviders: config.router.maxProviders,
|
|
14912
|
+
timeoutMs: config.router.timeoutMs,
|
|
14913
|
+
model: config.router.model
|
|
14914
|
+
});
|
|
14915
|
+
activeProviders = routing.selectedProviders;
|
|
14916
|
+
}
|
|
14448
14917
|
const searchOptions = {
|
|
14449
14918
|
limit: config.defaultLimit,
|
|
14450
14919
|
...opts.options
|
|
14451
14920
|
};
|
|
14452
|
-
const results = await
|
|
14921
|
+
const results = await allSettledLimited(activeProviders, config.maxConcurrent, async (name) => {
|
|
14453
14922
|
const provider = getProvider(name);
|
|
14454
|
-
const rawResults = await provider.search(query, searchOptions);
|
|
14923
|
+
const rawResults = await withTimeout(provider.search(query, searchOptions), config.providerTimeoutMs, provider.displayName);
|
|
14455
14924
|
updateProviderLastUsed(name, db);
|
|
14456
14925
|
return { name, results: rawResults };
|
|
14457
|
-
})
|
|
14926
|
+
});
|
|
14458
14927
|
const allResults = [];
|
|
14459
14928
|
const searchId = generateId();
|
|
14460
14929
|
for (const result of results) {
|
|
@@ -14508,11 +14977,13 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
14508
14977
|
createdAt: new Date().toISOString()
|
|
14509
14978
|
},
|
|
14510
14979
|
results: finalResults,
|
|
14511
|
-
errors: errors2
|
|
14980
|
+
errors: errors2,
|
|
14981
|
+
...routing && { routing }
|
|
14512
14982
|
};
|
|
14513
14983
|
}
|
|
14514
14984
|
const persistable = config.recordLocalResults ? finalResults : finalResults.filter((r) => !LOCAL_PROVIDER_NAMES.has(r.source));
|
|
14515
14985
|
const search = createSearch({
|
|
14986
|
+
id: searchId,
|
|
14516
14987
|
query,
|
|
14517
14988
|
providers: activeProviders,
|
|
14518
14989
|
resultCount: persistable.length,
|
|
@@ -14521,6 +14992,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
14521
14992
|
if (persistable.length > 0) {
|
|
14522
14993
|
createResults(persistable.map((r) => ({
|
|
14523
14994
|
searchId: search.id,
|
|
14995
|
+
id: r.id,
|
|
14524
14996
|
title: r.title,
|
|
14525
14997
|
url: r.url,
|
|
14526
14998
|
snippet: r.snippet,
|
|
@@ -14537,7 +15009,8 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
14537
15009
|
return {
|
|
14538
15010
|
search: { ...search, resultCount: finalResults.length, duration },
|
|
14539
15011
|
results: finalResults,
|
|
14540
|
-
errors: errors2
|
|
15012
|
+
errors: errors2,
|
|
15013
|
+
...routing && { routing }
|
|
14541
15014
|
};
|
|
14542
15015
|
}
|
|
14543
15016
|
async function searchSingleProvider(provider, query, options, db) {
|
|
@@ -14814,7 +15287,7 @@ var program2 = new Command;
|
|
|
14814
15287
|
program2.name("search").version(pkg.version).description("Unified search \u2014 local file index + 12 web providers, one interface");
|
|
14815
15288
|
registerStorageCommands(program2);
|
|
14816
15289
|
registerLocalCommands(program2);
|
|
14817
|
-
program2.command("query").alias("q").argument("<query...>", "Search query").option("-p, --providers <providers>", "Comma-separated providers").option("--profile <name>", "Use a search profile").option("-l, --limit <n>", "Max results per provider", "10").option("-f, --format <format>", "Output format: table, json", "table").option("--no-dedup", "Disable deduplication").action(async (queryParts, opts) => {
|
|
15290
|
+
program2.command("query").alias("q").argument("<query...>", "Search query").option("-p, --providers <providers>", "Comma-separated providers").option("--profile <name>", "Use a search profile").option("-l, --limit <n>", "Max results per provider", "10").option("-f, --format <format>", "Output format: table, json", "table").option("--smart", "Route the query to the best configured providers with the smart router").option("--no-dedup", "Disable deduplication").action(async (queryParts, opts) => {
|
|
14818
15291
|
const query = queryParts.join(" ");
|
|
14819
15292
|
const providers = opts.providers ? opts.providers.split(",") : undefined;
|
|
14820
15293
|
try {
|
|
@@ -14822,7 +15295,8 @@ program2.command("query").alias("q").argument("<query...>", "Search query").opti
|
|
|
14822
15295
|
providers,
|
|
14823
15296
|
profile: opts.profile,
|
|
14824
15297
|
options: { limit: parseInt(opts.limit) },
|
|
14825
|
-
dedup: opts.dedup
|
|
15298
|
+
dedup: opts.dedup,
|
|
15299
|
+
smart: opts.smart
|
|
14826
15300
|
});
|
|
14827
15301
|
if (opts.format === "json") {
|
|
14828
15302
|
console.log(JSON.stringify(response, null, 2));
|