@hasna/search 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +602 -128
- package/dist/db/index-migrations.d.ts.map +1 -1
- package/dist/db/results.d.ts +2 -0
- package/dist/db/results.d.ts.map +1 -1
- package/dist/db/searches.d.ts +1 -0
- package/dist/db/searches.d.ts.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +602 -126
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/local/find.d.ts +1 -1
- package/dist/lib/local/find.d.ts.map +1 -1
- package/dist/lib/local/indexer.d.ts +11 -0
- package/dist/lib/local/indexer.d.ts.map +1 -1
- package/dist/lib/local/query.d.ts.map +1 -1
- package/dist/lib/router.d.ts +10 -0
- package/dist/lib/router.d.ts.map +1 -0
- package/dist/lib/search.d.ts +1 -0
- package/dist/lib/search.d.ts.map +1 -1
- package/dist/mcp/index.js +621 -130
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/server/index.js +624 -139
- package/dist/server/serve.d.ts.map +1 -1
- package/dist/types/index.d.ts +22 -0
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/server/index.js
CHANGED
|
@@ -11644,7 +11644,7 @@ var require_lib2 = __commonJS((exports, module) => {
|
|
|
11644
11644
|
var require_package = __commonJS((exports, module) => {
|
|
11645
11645
|
module.exports = {
|
|
11646
11646
|
name: "@hasna/search",
|
|
11647
|
-
version: "0.0.
|
|
11647
|
+
version: "0.0.11",
|
|
11648
11648
|
description: "Unified search \u2014 local file index (find files by name/path/content/regex in ms, trigram FTS) + 12 web providers (Google, SerpAPI, Exa, Perplexity, Twitter, Reddit, YouTube, Brave, Bing, Hacker News, GitHub, arXiv) + YouTube transcription. CLI + MCP + REST API + Dashboard.",
|
|
11649
11649
|
type: "module",
|
|
11650
11650
|
main: "dist/index.js",
|
|
@@ -15756,12 +15756,19 @@ var DEFAULT_CONFIG = {
|
|
|
15756
15756
|
defaultLimit: 10,
|
|
15757
15757
|
defaultProviders: [],
|
|
15758
15758
|
defaultProfile: null,
|
|
15759
|
+
router: {
|
|
15760
|
+
enabled: false,
|
|
15761
|
+
model: "gpt-oss-120b",
|
|
15762
|
+
maxProviders: 3,
|
|
15763
|
+
timeoutMs: 1200
|
|
15764
|
+
},
|
|
15759
15765
|
transcriber: {
|
|
15760
15766
|
baseUrl: "http://localhost:19600",
|
|
15761
15767
|
fallbackCli: "microservice-transcriber"
|
|
15762
15768
|
},
|
|
15763
15769
|
dedup: true,
|
|
15764
15770
|
maxConcurrent: 5,
|
|
15771
|
+
providerTimeoutMs: 15000,
|
|
15765
15772
|
indexStaleMinutes: 5,
|
|
15766
15773
|
indexAutoRefresh: true,
|
|
15767
15774
|
recordLocalResults: false
|
|
@@ -16496,6 +16503,31 @@ var migrations = [
|
|
|
16496
16503
|
);
|
|
16497
16504
|
`);
|
|
16498
16505
|
}
|
|
16506
|
+
},
|
|
16507
|
+
{
|
|
16508
|
+
version: 2,
|
|
16509
|
+
description: "Local file index filter indexes",
|
|
16510
|
+
up: (db) => {
|
|
16511
|
+
db.exec(`
|
|
16512
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_ext ON files(root_id, ext);
|
|
16513
|
+
CREATE INDEX IF NOT EXISTS idx_files_root_dir ON files(root_id, dir);
|
|
16514
|
+
`);
|
|
16515
|
+
}
|
|
16516
|
+
},
|
|
16517
|
+
{
|
|
16518
|
+
version: 3,
|
|
16519
|
+
description: "Local content short-token filter grams",
|
|
16520
|
+
up: (db) => {
|
|
16521
|
+
db.exec(`
|
|
16522
|
+
CREATE TABLE IF NOT EXISTS file_content_grams (
|
|
16523
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
16524
|
+
gram TEXT NOT NULL,
|
|
16525
|
+
PRIMARY KEY (file_id, gram)
|
|
16526
|
+
);
|
|
16527
|
+
CREATE INDEX IF NOT EXISTS idx_file_content_grams_gram_file
|
|
16528
|
+
ON file_content_grams(gram, file_id);
|
|
16529
|
+
`);
|
|
16530
|
+
}
|
|
16499
16531
|
}
|
|
16500
16532
|
];
|
|
16501
16533
|
function runIndexMigrations(db) {
|
|
@@ -16567,7 +16599,18 @@ function getConfig() {
|
|
|
16567
16599
|
try {
|
|
16568
16600
|
const raw = readFileSync(path, "utf-8");
|
|
16569
16601
|
const parsed = JSON.parse(raw);
|
|
16570
|
-
return {
|
|
16602
|
+
return {
|
|
16603
|
+
...DEFAULT_CONFIG,
|
|
16604
|
+
...parsed,
|
|
16605
|
+
router: {
|
|
16606
|
+
...DEFAULT_CONFIG.router,
|
|
16607
|
+
...parsed.router ?? {}
|
|
16608
|
+
},
|
|
16609
|
+
transcriber: {
|
|
16610
|
+
...DEFAULT_CONFIG.transcriber,
|
|
16611
|
+
...parsed.transcriber ?? {}
|
|
16612
|
+
}
|
|
16613
|
+
};
|
|
16571
16614
|
} catch {
|
|
16572
16615
|
return { ...DEFAULT_CONFIG };
|
|
16573
16616
|
}
|
|
@@ -16985,6 +17028,7 @@ function removeRoot(idOrPath, db) {
|
|
|
16985
17028
|
d.exec("BEGIN");
|
|
16986
17029
|
try {
|
|
16987
17030
|
d.prepare("DELETE FROM file_content_fts WHERE rowid IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
17031
|
+
d.prepare("DELETE FROM file_content_grams WHERE file_id IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
16988
17032
|
d.prepare("DELETE FROM index_roots WHERE id = ?").run(root.id);
|
|
16989
17033
|
d.exec("COMMIT");
|
|
16990
17034
|
} catch (err) {
|
|
@@ -16996,6 +17040,21 @@ function removeRoot(idOrPath, db) {
|
|
|
16996
17040
|
function shouldIndexContent(root, file) {
|
|
16997
17041
|
return root.contentIndexing && file.size > 0 && file.size <= root.maxFileSize && !hasBinaryExtension(file.ext) && !isContentExcluded(file.name);
|
|
16998
17042
|
}
|
|
17043
|
+
function contentShortGrams(body) {
|
|
17044
|
+
const grams = new Set;
|
|
17045
|
+
const words = body.toLowerCase().matchAll(/[a-z0-9_$]+/g);
|
|
17046
|
+
for (const match of words) {
|
|
17047
|
+
const word = match[0];
|
|
17048
|
+
for (let i = 0;i < word.length; i++) {
|
|
17049
|
+
grams.add(word[i]);
|
|
17050
|
+
if (i + 1 < word.length)
|
|
17051
|
+
grams.add(word.slice(i, i + 2));
|
|
17052
|
+
}
|
|
17053
|
+
if (grams.size >= 2048)
|
|
17054
|
+
break;
|
|
17055
|
+
}
|
|
17056
|
+
return [...grams];
|
|
17057
|
+
}
|
|
16999
17058
|
function indexRoot(idOrPath, opts = {}, db) {
|
|
17000
17059
|
const d = db ?? getIndexDb();
|
|
17001
17060
|
const root = getRoot(idOrPath, d);
|
|
@@ -17014,6 +17073,8 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
17014
17073
|
const deleteFile = d.prepare("DELETE FROM files WHERE id = ?");
|
|
17015
17074
|
const insertContent = d.prepare("INSERT INTO file_content_fts (rowid, body) VALUES (?, ?)");
|
|
17016
17075
|
const deleteContent = d.prepare("DELETE FROM file_content_fts WHERE rowid = ?");
|
|
17076
|
+
const insertContentGram = d.prepare("INSERT OR IGNORE INTO file_content_grams (file_id, gram) VALUES (?, ?)");
|
|
17077
|
+
const deleteContentGrams = d.prepare("DELETE FROM file_content_grams WHERE file_id = ?");
|
|
17017
17078
|
const stats = {
|
|
17018
17079
|
rootId: root.id,
|
|
17019
17080
|
added: 0,
|
|
@@ -17024,38 +17085,57 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
17024
17085
|
skippedDirs: skippedDirs.length,
|
|
17025
17086
|
durationMs: 0
|
|
17026
17087
|
};
|
|
17088
|
+
const seen = new Set;
|
|
17089
|
+
const changes = [];
|
|
17090
|
+
for (const file of scanned) {
|
|
17091
|
+
seen.add(file.relPath);
|
|
17092
|
+
const prev = existing.get(file.relPath);
|
|
17093
|
+
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
17094
|
+
if (prev && !changed && !opts.force)
|
|
17095
|
+
continue;
|
|
17096
|
+
const wantContent = shouldIndexContent(root, file);
|
|
17097
|
+
const absPath = `${root.path}/${file.relPath}`;
|
|
17098
|
+
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
17099
|
+
let body = null;
|
|
17100
|
+
if (wantContent && !isBinary) {
|
|
17101
|
+
try {
|
|
17102
|
+
body = readFileSync3(absPath, "utf-8");
|
|
17103
|
+
} catch {
|
|
17104
|
+
isBinary = true;
|
|
17105
|
+
}
|
|
17106
|
+
}
|
|
17107
|
+
changes.push({
|
|
17108
|
+
file,
|
|
17109
|
+
prev,
|
|
17110
|
+
isBinary,
|
|
17111
|
+
body,
|
|
17112
|
+
grams: body !== null ? contentShortGrams(body) : [],
|
|
17113
|
+
contentIndexed: body !== null ? 1 : 0
|
|
17114
|
+
});
|
|
17115
|
+
}
|
|
17027
17116
|
d.exec("BEGIN");
|
|
17028
17117
|
try {
|
|
17029
|
-
const
|
|
17030
|
-
for (const file of scanned) {
|
|
17031
|
-
seen.add(file.relPath);
|
|
17032
|
-
const prev = existing.get(file.relPath);
|
|
17033
|
-
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
17034
|
-
if (prev && !changed && !opts.force)
|
|
17035
|
-
continue;
|
|
17036
|
-
const wantContent = shouldIndexContent(root, file);
|
|
17037
|
-
const absPath = `${root.path}/${file.relPath}`;
|
|
17038
|
-
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
17039
|
-
let body = null;
|
|
17040
|
-
if (wantContent && !isBinary) {
|
|
17041
|
-
try {
|
|
17042
|
-
body = readFileSync3(absPath, "utf-8");
|
|
17043
|
-
} catch {
|
|
17044
|
-
isBinary = true;
|
|
17045
|
-
}
|
|
17046
|
-
}
|
|
17047
|
-
const contentIndexed = body !== null ? 1 : 0;
|
|
17118
|
+
for (const { file, prev, isBinary, body, grams, contentIndexed } of changes) {
|
|
17048
17119
|
if (prev) {
|
|
17049
|
-
if (prev.content_indexed)
|
|
17120
|
+
if (prev.content_indexed) {
|
|
17050
17121
|
deleteContent.run(prev.id);
|
|
17122
|
+
deleteContentGrams.run(prev.id);
|
|
17123
|
+
}
|
|
17051
17124
|
updateFile.run(file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now, prev.id);
|
|
17052
|
-
if (body !== null)
|
|
17125
|
+
if (body !== null) {
|
|
17053
17126
|
insertContent.run(prev.id, body);
|
|
17127
|
+
for (const gram of grams)
|
|
17128
|
+
insertContentGram.run(prev.id, gram);
|
|
17129
|
+
}
|
|
17054
17130
|
stats.updated++;
|
|
17055
17131
|
} else {
|
|
17056
17132
|
const inserted = insertFile.run(root.id, file.relPath, file.name, file.ext, file.dir, file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now);
|
|
17057
|
-
if (body !== null)
|
|
17058
|
-
|
|
17133
|
+
if (body !== null) {
|
|
17134
|
+
const fileId = Number(inserted.lastInsertRowid);
|
|
17135
|
+
insertContent.run(fileId, body);
|
|
17136
|
+
for (const gram of grams)
|
|
17137
|
+
insertContentGram.run(fileId, gram);
|
|
17138
|
+
}
|
|
17059
17139
|
stats.added++;
|
|
17060
17140
|
}
|
|
17061
17141
|
if (contentIndexed)
|
|
@@ -17064,8 +17144,10 @@ function indexRoot(idOrPath, opts = {}, db) {
|
|
|
17064
17144
|
for (const [relPath, row] of existing) {
|
|
17065
17145
|
if (seen.has(relPath))
|
|
17066
17146
|
continue;
|
|
17067
|
-
if (row.content_indexed)
|
|
17147
|
+
if (row.content_indexed) {
|
|
17068
17148
|
deleteContent.run(row.id);
|
|
17149
|
+
deleteContentGrams.run(row.id);
|
|
17150
|
+
}
|
|
17069
17151
|
deleteFile.run(row.id);
|
|
17070
17152
|
stats.deleted++;
|
|
17071
17153
|
}
|
|
@@ -17087,6 +17169,9 @@ function indexAllRoots(opts = {}, db) {
|
|
|
17087
17169
|
return listRoots(db).map((root) => indexRoot(root.id, opts, db));
|
|
17088
17170
|
}
|
|
17089
17171
|
var refreshing = new Set;
|
|
17172
|
+
var lastDefaultAutoRefreshCheckAt = 0;
|
|
17173
|
+
var AUTO_REFRESH_CHECK_THROTTLE_MS = 1000;
|
|
17174
|
+
var defaultRefreshScheduled = false;
|
|
17090
17175
|
function refreshStaleRoots(staleMinutes, db) {
|
|
17091
17176
|
const cutoff = Date.now() - staleMinutes * 60000;
|
|
17092
17177
|
const stats = [];
|
|
@@ -17110,8 +17195,43 @@ function autoRefreshStaleRoots(db) {
|
|
|
17110
17195
|
const config = getConfig();
|
|
17111
17196
|
if (!config.indexAutoRefresh)
|
|
17112
17197
|
return [];
|
|
17198
|
+
if (!db) {
|
|
17199
|
+
const now = Date.now();
|
|
17200
|
+
if (now - lastDefaultAutoRefreshCheckAt < AUTO_REFRESH_CHECK_THROTTLE_MS)
|
|
17201
|
+
return [];
|
|
17202
|
+
lastDefaultAutoRefreshCheckAt = now;
|
|
17203
|
+
}
|
|
17113
17204
|
return refreshStaleRoots(config.indexStaleMinutes, db);
|
|
17114
17205
|
}
|
|
17206
|
+
function scheduleAutoRefreshStaleRoots(db) {
|
|
17207
|
+
if (db)
|
|
17208
|
+
return autoRefreshStaleRoots(db);
|
|
17209
|
+
const config = getConfig();
|
|
17210
|
+
if (!config.indexAutoRefresh || defaultRefreshScheduled)
|
|
17211
|
+
return [];
|
|
17212
|
+
defaultRefreshScheduled = true;
|
|
17213
|
+
const timer = setTimeout(() => {
|
|
17214
|
+
try {
|
|
17215
|
+
autoRefreshStaleRoots();
|
|
17216
|
+
} catch {} finally {
|
|
17217
|
+
defaultRefreshScheduled = false;
|
|
17218
|
+
}
|
|
17219
|
+
}, 0);
|
|
17220
|
+
timer.unref?.();
|
|
17221
|
+
return [];
|
|
17222
|
+
}
|
|
17223
|
+
function startBackgroundRefresh() {
|
|
17224
|
+
const minutes = Math.max(1, getConfig().indexStaleMinutes);
|
|
17225
|
+
const timer = setInterval(() => {
|
|
17226
|
+
try {
|
|
17227
|
+
autoRefreshStaleRoots();
|
|
17228
|
+
} catch (err) {
|
|
17229
|
+
console.error("Index refresh failed:", err);
|
|
17230
|
+
}
|
|
17231
|
+
}, minutes * 60000);
|
|
17232
|
+
timer.unref?.();
|
|
17233
|
+
return timer;
|
|
17234
|
+
}
|
|
17115
17235
|
|
|
17116
17236
|
// src/lib/local/query.ts
|
|
17117
17237
|
import { existsSync as existsSync2, readFileSync as readFileSync4 } from "fs";
|
|
@@ -17338,6 +17458,9 @@ function compileSearchRegex(pattern, caseSensitive = false) {
|
|
|
17338
17458
|
// src/lib/local/query.ts
|
|
17339
17459
|
var MAX_LINE_LENGTH = 200;
|
|
17340
17460
|
var MAX_MATCHES_PER_FILE = 5;
|
|
17461
|
+
var MAX_PATH_CANDIDATES = 20000;
|
|
17462
|
+
var MAX_CONTENT_CANDIDATES = 50000;
|
|
17463
|
+
var MAX_REGEX_CANDIDATES = 50000;
|
|
17341
17464
|
function tokenize(query) {
|
|
17342
17465
|
return query.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "").split(/\s+/).filter(Boolean);
|
|
17343
17466
|
}
|
|
@@ -17368,11 +17491,40 @@ function filterClauses(opts, db) {
|
|
|
17368
17491
|
}
|
|
17369
17492
|
if (opts.dir) {
|
|
17370
17493
|
clauses.push("f.dir LIKE ? ESCAPE '\\'");
|
|
17371
|
-
const dir = opts.dir.replace(/^\/|\/$/g, "")
|
|
17494
|
+
const dir = escapeLike(opts.dir.replace(/^\/|\/$/g, ""));
|
|
17372
17495
|
params.push(`%${dir}%`);
|
|
17373
17496
|
}
|
|
17374
17497
|
return { sql: clauses.length > 0 ? ` AND ${clauses.join(" AND ")}` : "", params };
|
|
17375
17498
|
}
|
|
17499
|
+
function escapeLike(value) {
|
|
17500
|
+
return value.replace(/[\\%_]/g, "\\$&");
|
|
17501
|
+
}
|
|
17502
|
+
function shortTokenClauses(tokens) {
|
|
17503
|
+
if (tokens.length === 0)
|
|
17504
|
+
return { sql: "", params: [] };
|
|
17505
|
+
return {
|
|
17506
|
+
sql: ` AND ${tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ")}`,
|
|
17507
|
+
params: tokens.map((token) => `%${escapeLike(token)}%`)
|
|
17508
|
+
};
|
|
17509
|
+
}
|
|
17510
|
+
function contentGramClauses(tokens) {
|
|
17511
|
+
const gramTokens = tokens.filter((token) => /^[a-z0-9_$]{1,2}$/.test(token));
|
|
17512
|
+
if (gramTokens.length === 0)
|
|
17513
|
+
return { sql: "", params: [] };
|
|
17514
|
+
return {
|
|
17515
|
+
sql: gramTokens.map((_token, index) => ` AND (
|
|
17516
|
+
NOT EXISTS (
|
|
17517
|
+
SELECT 1 FROM file_content_grams cg_any_${index}
|
|
17518
|
+
WHERE cg_any_${index}.file_id = f.id
|
|
17519
|
+
)
|
|
17520
|
+
OR EXISTS (
|
|
17521
|
+
SELECT 1 FROM file_content_grams cg_${index}
|
|
17522
|
+
WHERE cg_${index}.file_id = f.id AND cg_${index}.gram = ?
|
|
17523
|
+
)
|
|
17524
|
+
)`).join(""),
|
|
17525
|
+
params: gramTokens
|
|
17526
|
+
};
|
|
17527
|
+
}
|
|
17376
17528
|
function rowToHit(row, score) {
|
|
17377
17529
|
return {
|
|
17378
17530
|
rootId: row.root_id,
|
|
@@ -17442,6 +17594,8 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
17442
17594
|
return [];
|
|
17443
17595
|
const ftsQuery = buildFtsQuery(query);
|
|
17444
17596
|
const filters = filterClauses(opts, d);
|
|
17597
|
+
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
17598
|
+
const shortFilters = shortTokenClauses(shortTokens);
|
|
17445
17599
|
const candidateLimit = Math.max(200, limit * 10);
|
|
17446
17600
|
let rows;
|
|
17447
17601
|
if (ftsQuery) {
|
|
@@ -17449,16 +17603,16 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
17449
17603
|
FROM files_fts fts
|
|
17450
17604
|
JOIN files f ON f.id = fts.rowid
|
|
17451
17605
|
JOIN index_roots r ON r.id = f.root_id
|
|
17452
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
17606
|
+
WHERE files_fts MATCH ?${filters.sql}${shortFilters.sql}
|
|
17453
17607
|
ORDER BY bm25(files_fts, 10.0, 1.0)
|
|
17454
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, candidateLimit);
|
|
17455
|
-
const namePattern = `${query.trim()
|
|
17608
|
+
LIMIT ?`).all(ftsQuery, ...filters.params, ...shortFilters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
17609
|
+
const namePattern = `${escapeLike(query.trim())}%`;
|
|
17456
17610
|
const nameRows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17457
17611
|
FROM files f
|
|
17458
17612
|
JOIN index_roots r ON r.id = f.root_id
|
|
17459
|
-
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}
|
|
17613
|
+
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}${shortFilters.sql}
|
|
17460
17614
|
ORDER BY length(f.name)
|
|
17461
|
-
LIMIT 100`).all(namePattern, ...filters.params);
|
|
17615
|
+
LIMIT 100`).all(namePattern, ...filters.params, ...shortFilters.params);
|
|
17462
17616
|
const seen = new Set(rows.map((row) => row.id));
|
|
17463
17617
|
for (const row of nameRows) {
|
|
17464
17618
|
if (!seen.has(row.id))
|
|
@@ -17466,14 +17620,14 @@ function searchFilePaths(query, opts = {}, db) {
|
|
|
17466
17620
|
}
|
|
17467
17621
|
} else {
|
|
17468
17622
|
const likeClauses = tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ");
|
|
17469
|
-
const likeParams = tokens.map((t) => `%${t
|
|
17623
|
+
const likeParams = tokens.map((t) => `%${escapeLike(t)}%`);
|
|
17470
17624
|
rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17471
17625
|
FROM files f
|
|
17472
17626
|
JOIN index_roots r ON r.id = f.root_id
|
|
17473
17627
|
WHERE ${likeClauses}${filters.sql}
|
|
17474
|
-
|
|
17628
|
+
ORDER BY length(f.name), length(f.rel_path), f.rel_path
|
|
17629
|
+
LIMIT ?`).all(...likeParams, ...filters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
|
|
17475
17630
|
}
|
|
17476
|
-
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
17477
17631
|
const filtered = shortTokens.length > 0 ? rows.filter((row) => shortTokens.every((t) => row.rel_path.toLowerCase().includes(t))) : rows;
|
|
17478
17632
|
return filtered.map((row) => rowToHit(row, scoreFileName(query, tokens, row))).sort((a, b) => b.score - a.score).filter((hit) => existsSync2(hit.absPath)).slice(0, limit);
|
|
17479
17633
|
}
|
|
@@ -17511,24 +17665,31 @@ function searchFilePathsRegex(pattern, opts = {}, db) {
|
|
|
17511
17665
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'handle.*Click', not '\\w+').");
|
|
17512
17666
|
}
|
|
17513
17667
|
const filters = filterClauses(opts, d);
|
|
17514
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17515
|
-
FROM files_fts fts
|
|
17516
|
-
JOIN files f ON f.id = fts.rowid
|
|
17517
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
17518
|
-
WHERE files_fts MATCH ?${filters.sql}
|
|
17519
|
-
ORDER BY fts.rank
|
|
17520
|
-
LIMIT 5000`).all(ftsQuery, ...filters.params);
|
|
17521
17668
|
const hits = [];
|
|
17522
|
-
|
|
17523
|
-
|
|
17524
|
-
|
|
17525
|
-
|
|
17526
|
-
|
|
17527
|
-
|
|
17528
|
-
|
|
17529
|
-
|
|
17530
|
-
|
|
17531
|
-
if (
|
|
17669
|
+
const pageSize = Math.max(500, limit * 20);
|
|
17670
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
17671
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17672
|
+
FROM files_fts fts
|
|
17673
|
+
JOIN files f ON f.id = fts.rowid
|
|
17674
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
17675
|
+
WHERE files_fts MATCH ?${filters.sql}
|
|
17676
|
+
ORDER BY fts.rank
|
|
17677
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
17678
|
+
if (rows.length === 0)
|
|
17679
|
+
break;
|
|
17680
|
+
for (const row of rows) {
|
|
17681
|
+
if (!regex.test(row.rel_path) && !regex.test(row.name))
|
|
17682
|
+
continue;
|
|
17683
|
+
const depth = row.rel_path.split("/").length - 1;
|
|
17684
|
+
const score = Math.max(0.05, 0.6 - depth * 0.02);
|
|
17685
|
+
const hit = rowToHit(row, score);
|
|
17686
|
+
if (!existsSync2(hit.absPath))
|
|
17687
|
+
continue;
|
|
17688
|
+
hits.push(hit);
|
|
17689
|
+
if (hits.length >= limit)
|
|
17690
|
+
break;
|
|
17691
|
+
}
|
|
17692
|
+
if (rows.length < pageSize)
|
|
17532
17693
|
break;
|
|
17533
17694
|
}
|
|
17534
17695
|
return hits;
|
|
@@ -17542,40 +17703,48 @@ function searchFileContentRegex(pattern, opts = {}, db) {
|
|
|
17542
17703
|
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'export.*function', not '\\d+').");
|
|
17543
17704
|
}
|
|
17544
17705
|
const filters = filterClauses(opts, d);
|
|
17545
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17546
|
-
FROM file_content_fts fts
|
|
17547
|
-
JOIN files f ON f.id = fts.rowid
|
|
17548
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
17549
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
17550
|
-
ORDER BY fts.rank
|
|
17551
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(200, limit * 10));
|
|
17552
17706
|
const hits = [];
|
|
17553
|
-
|
|
17554
|
-
|
|
17555
|
-
const
|
|
17556
|
-
|
|
17557
|
-
|
|
17558
|
-
|
|
17559
|
-
|
|
17560
|
-
|
|
17561
|
-
|
|
17562
|
-
|
|
17707
|
+
const pageSize = Math.max(200, limit * 10);
|
|
17708
|
+
for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
|
|
17709
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17710
|
+
FROM file_content_fts fts
|
|
17711
|
+
JOIN files f ON f.id = fts.rowid
|
|
17712
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
17713
|
+
WHERE file_content_fts MATCH ?${filters.sql}
|
|
17714
|
+
ORDER BY fts.rank
|
|
17715
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
|
|
17716
|
+
if (rows.length === 0)
|
|
17717
|
+
break;
|
|
17718
|
+
for (let i = 0;i < rows.length && hits.length < limit; i++) {
|
|
17719
|
+
const row = rows[i];
|
|
17720
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
17721
|
+
let content;
|
|
17722
|
+
try {
|
|
17723
|
+
content = readFileSync4(absPath, "utf-8");
|
|
17724
|
+
} catch {
|
|
17725
|
+
continue;
|
|
17726
|
+
}
|
|
17727
|
+
const lines = content.split(`
|
|
17563
17728
|
`);
|
|
17564
|
-
|
|
17565
|
-
|
|
17566
|
-
|
|
17567
|
-
|
|
17729
|
+
const matches = [];
|
|
17730
|
+
for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
|
|
17731
|
+
if (regex.test(lines[n])) {
|
|
17732
|
+
matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
|
|
17733
|
+
}
|
|
17568
17734
|
}
|
|
17735
|
+
if (matches.length === 0)
|
|
17736
|
+
continue;
|
|
17737
|
+
const rankIndex = offset + i;
|
|
17738
|
+
const score = Math.max(0.25, 0.65 - rankIndex * 0.05);
|
|
17739
|
+
hits.push({
|
|
17740
|
+
...rowToHit(row, score),
|
|
17741
|
+
line: matches[0].line,
|
|
17742
|
+
lineText: matches[0].text,
|
|
17743
|
+
matches
|
|
17744
|
+
});
|
|
17569
17745
|
}
|
|
17570
|
-
if (
|
|
17571
|
-
|
|
17572
|
-
const score = Math.max(0.25, 0.65 - i * 0.05);
|
|
17573
|
-
hits.push({
|
|
17574
|
-
...rowToHit(row, score),
|
|
17575
|
-
line: matches[0].line,
|
|
17576
|
-
lineText: matches[0].text,
|
|
17577
|
-
matches
|
|
17578
|
-
});
|
|
17746
|
+
if (rows.length < pageSize)
|
|
17747
|
+
break;
|
|
17579
17748
|
}
|
|
17580
17749
|
return hits;
|
|
17581
17750
|
}
|
|
@@ -17586,42 +17755,51 @@ function searchFileContent(query, opts = {}, db) {
|
|
|
17586
17755
|
if (!ftsQuery)
|
|
17587
17756
|
return [];
|
|
17588
17757
|
const filters = filterClauses(opts, d);
|
|
17589
|
-
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17590
|
-
FROM file_content_fts fts
|
|
17591
|
-
JOIN files f ON f.id = fts.rowid
|
|
17592
|
-
JOIN index_roots r ON r.id = f.root_id
|
|
17593
|
-
WHERE file_content_fts MATCH ?${filters.sql}
|
|
17594
|
-
ORDER BY fts.rank
|
|
17595
|
-
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(50, limit * 3));
|
|
17596
17758
|
const tokens = tokenize(query);
|
|
17597
17759
|
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
17760
|
+
const gramFilters = contentGramClauses(shortTokens);
|
|
17598
17761
|
const scored = [];
|
|
17599
|
-
|
|
17600
|
-
|
|
17601
|
-
const
|
|
17602
|
-
|
|
17603
|
-
|
|
17604
|
-
|
|
17605
|
-
|
|
17606
|
-
|
|
17607
|
-
|
|
17608
|
-
if (
|
|
17609
|
-
|
|
17610
|
-
|
|
17762
|
+
const pageSize = Math.max(50, limit * 3);
|
|
17763
|
+
for (let offset = 0;scored.length < limit * 2 && offset < MAX_CONTENT_CANDIDATES; offset += pageSize) {
|
|
17764
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
17765
|
+
FROM file_content_fts fts
|
|
17766
|
+
JOIN files f ON f.id = fts.rowid
|
|
17767
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
17768
|
+
WHERE file_content_fts MATCH ?${filters.sql}${gramFilters.sql}
|
|
17769
|
+
ORDER BY fts.rank
|
|
17770
|
+
LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, ...gramFilters.params, pageSize, offset);
|
|
17771
|
+
if (rows.length === 0)
|
|
17772
|
+
break;
|
|
17773
|
+
for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
|
|
17774
|
+
const row = rows[i];
|
|
17775
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
17776
|
+
let content;
|
|
17777
|
+
try {
|
|
17778
|
+
content = readFileSync4(absPath, "utf-8");
|
|
17779
|
+
} catch {
|
|
17611
17780
|
continue;
|
|
17781
|
+
}
|
|
17782
|
+
if (shortTokens.length > 0) {
|
|
17783
|
+
const lower = content.toLowerCase();
|
|
17784
|
+
if (!shortTokens.every((t) => lower.includes(t)))
|
|
17785
|
+
continue;
|
|
17786
|
+
}
|
|
17787
|
+
const { matches, tier } = findLineMatches(content, query, tokens);
|
|
17788
|
+
if (matches.length === 0)
|
|
17789
|
+
continue;
|
|
17790
|
+
const rankIndex = offset + i;
|
|
17791
|
+
const base = Math.max(0.25, 0.55 - rankIndex * 0.04);
|
|
17792
|
+
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
17793
|
+
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
17794
|
+
scored.push({
|
|
17795
|
+
...rowToHit(row, score),
|
|
17796
|
+
line: matches[0].line,
|
|
17797
|
+
lineText: matches[0].text,
|
|
17798
|
+
matches
|
|
17799
|
+
});
|
|
17612
17800
|
}
|
|
17613
|
-
|
|
17614
|
-
|
|
17615
|
-
continue;
|
|
17616
|
-
const base = Math.max(0.25, 0.55 - i * 0.04);
|
|
17617
|
-
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
17618
|
-
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
17619
|
-
scored.push({
|
|
17620
|
-
...rowToHit(row, score),
|
|
17621
|
-
line: matches[0].line,
|
|
17622
|
-
lineText: matches[0].text,
|
|
17623
|
-
matches
|
|
17624
|
-
});
|
|
17801
|
+
if (rows.length < pageSize)
|
|
17802
|
+
break;
|
|
17625
17803
|
}
|
|
17626
17804
|
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
17627
17805
|
}
|
|
@@ -17634,7 +17812,7 @@ class FilesProvider {
|
|
|
17634
17812
|
return hasReadyRoot();
|
|
17635
17813
|
}
|
|
17636
17814
|
async search(query, options) {
|
|
17637
|
-
|
|
17815
|
+
scheduleAutoRefreshStaleRoots();
|
|
17638
17816
|
const hits = searchFilePaths(query, { limit: options?.limit ?? 10 });
|
|
17639
17817
|
return hits.map((hit) => ({
|
|
17640
17818
|
title: hit.name,
|
|
@@ -17662,7 +17840,7 @@ class ContentProvider {
|
|
|
17662
17840
|
return hasReadyRoot();
|
|
17663
17841
|
}
|
|
17664
17842
|
async search(query, options) {
|
|
17665
|
-
|
|
17843
|
+
scheduleAutoRefreshStaleRoots();
|
|
17666
17844
|
const hits = searchFileContent(query, { limit: options?.limit ?? 10 });
|
|
17667
17845
|
return hits.map((hit) => ({
|
|
17668
17846
|
title: hit.name,
|
|
@@ -17789,7 +17967,7 @@ function rowToSearch(row) {
|
|
|
17789
17967
|
}
|
|
17790
17968
|
function createSearch(data, db) {
|
|
17791
17969
|
const d = db ?? getDb();
|
|
17792
|
-
const id = generateId();
|
|
17970
|
+
const id = data.id ?? generateId();
|
|
17793
17971
|
const now = new Date().toISOString();
|
|
17794
17972
|
d.prepare(`INSERT INTO searches (id, query, providers, profile_id, result_count, duration, created_at)
|
|
17795
17973
|
VALUES (?, ?, ?, ?, ?, ?, ?)`).run(id, data.query, JSON.stringify(data.providers), data.profileId ?? null, data.resultCount ?? 0, data.duration ?? 0, now);
|
|
@@ -17871,7 +18049,7 @@ function createResults(results, db) {
|
|
|
17871
18049
|
d.exec("BEGIN");
|
|
17872
18050
|
try {
|
|
17873
18051
|
for (const data of results) {
|
|
17874
|
-
const id = generateId();
|
|
18052
|
+
const id = data.id ?? generateId();
|
|
17875
18053
|
stmt.run(id, data.searchId, data.title, data.url, data.snippet, data.source, data.provider, data.rank, data.score ?? null, data.publishedAt ?? null, data.thumbnail ?? null, JSON.stringify(data.metadata ?? {}), now);
|
|
17876
18054
|
created.push({
|
|
17877
18055
|
id,
|
|
@@ -18028,13 +18206,300 @@ function isProviderConfigured(provider) {
|
|
|
18028
18206
|
return !!Bun.env[provider.apiKeyEnv];
|
|
18029
18207
|
}
|
|
18030
18208
|
|
|
18209
|
+
// src/lib/router.ts
|
|
18210
|
+
var PROVIDER_DESCRIPTIONS = {
|
|
18211
|
+
files: "Local file names and paths. Best for known filenames, path fragments, extensions, and repo navigation.",
|
|
18212
|
+
content: "Local indexed file contents. Best for code symbols, exact phrases, docs, snippets, and grep-style discovery.",
|
|
18213
|
+
google: "General web search through SerpAPI. Best for broad web coverage and current public pages.",
|
|
18214
|
+
serpapi: "SerpAPI multi-engine web search. Best for general web queries when Google-style results are desired.",
|
|
18215
|
+
exa: "Neural/semantic web search. Best for research, conceptual queries, docs, and high-relevance pages.",
|
|
18216
|
+
perplexity: "Answer-oriented web research with citations. Best for synthesized factual questions and research summaries.",
|
|
18217
|
+
brave: "General independent web search. Best for current web, news-like, product, and navigational queries.",
|
|
18218
|
+
bing: "General web search. Best for current web and Microsoft/Bing-indexed pages.",
|
|
18219
|
+
twitter: "X/Twitter search. Best for tweets, social reactions, breaking discourse, and people posting updates.",
|
|
18220
|
+
reddit: "Reddit search. Best for opinions, product experiences, troubleshooting threads, and community recommendations.",
|
|
18221
|
+
youtube: "YouTube search. Best for videos, tutorials, talks, demos, and channels.",
|
|
18222
|
+
hackernews: "Hacker News search. Best for startup, programming, launch, and technical discussion threads.",
|
|
18223
|
+
github: "GitHub code and repository search. Best for open-source repos, code examples, packages, and implementation details.",
|
|
18224
|
+
arxiv: "arXiv academic search. Best for papers, preprints, ML/AI/math/physics research, and scholarly topics."
|
|
18225
|
+
};
|
|
18226
|
+
function clampMaxProviders(value) {
|
|
18227
|
+
if (value === undefined || !Number.isFinite(value))
|
|
18228
|
+
return 3;
|
|
18229
|
+
return Math.max(1, Math.min(5, Math.floor(value)));
|
|
18230
|
+
}
|
|
18231
|
+
function clampConfidence(value) {
|
|
18232
|
+
return typeof value === "number" && Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.5;
|
|
18233
|
+
}
|
|
18234
|
+
function normalizeCandidates(candidates) {
|
|
18235
|
+
const allowed = new Set(PROVIDER_NAMES);
|
|
18236
|
+
const seen = new Set;
|
|
18237
|
+
const normalized = [];
|
|
18238
|
+
for (const candidate of candidates) {
|
|
18239
|
+
if (!allowed.has(candidate) || seen.has(candidate))
|
|
18240
|
+
continue;
|
|
18241
|
+
seen.add(candidate);
|
|
18242
|
+
normalized.push(candidate);
|
|
18243
|
+
}
|
|
18244
|
+
return normalized;
|
|
18245
|
+
}
|
|
18246
|
+
function addScore(scores, candidateSet, provider, amount) {
|
|
18247
|
+
if (!candidateSet.has(provider))
|
|
18248
|
+
return;
|
|
18249
|
+
scores.set(provider, (scores.get(provider) ?? 0) + amount);
|
|
18250
|
+
}
|
|
18251
|
+
function hasAny(query, patterns) {
|
|
18252
|
+
return patterns.some((pattern) => pattern.test(query));
|
|
18253
|
+
}
|
|
18254
|
+
function routeSearchProvidersHeuristic(query, candidates, options = {}) {
|
|
18255
|
+
const normalized = normalizeCandidates(candidates);
|
|
18256
|
+
const maxProviders = clampMaxProviders(options.maxProviders);
|
|
18257
|
+
if (normalized.length === 0) {
|
|
18258
|
+
return {
|
|
18259
|
+
strategy: "heuristic",
|
|
18260
|
+
selectedProviders: [],
|
|
18261
|
+
candidates: [],
|
|
18262
|
+
reason: "No configured providers were available to route.",
|
|
18263
|
+
confidence: 0
|
|
18264
|
+
};
|
|
18265
|
+
}
|
|
18266
|
+
const candidateSet = new Set(normalized);
|
|
18267
|
+
const scores = new Map;
|
|
18268
|
+
const reasons = [];
|
|
18269
|
+
const q = query.trim().toLowerCase();
|
|
18270
|
+
for (const candidate of normalized)
|
|
18271
|
+
scores.set(candidate, 0.05);
|
|
18272
|
+
if (hasAny(q, [
|
|
18273
|
+
/\b(file|filename|path|folder|directory|repo|workspace)\b/,
|
|
18274
|
+
/(^|[/\s])[\w.-]+\.(ts|tsx|js|jsx|py|rs|go|md|json|yaml|yml|css|html)\b/
|
|
18275
|
+
])) {
|
|
18276
|
+
addScore(scores, candidateSet, "files", 5);
|
|
18277
|
+
addScore(scores, candidateSet, "content", 3);
|
|
18278
|
+
reasons.push("query looks local-file oriented");
|
|
18279
|
+
}
|
|
18280
|
+
if (hasAny(q, [
|
|
18281
|
+
/\b(function|class|interface|type|const|import|export|error|stack|symbol|grep|regex)\b/,
|
|
18282
|
+
/[A-Za-z_$][\w$]*\([^)]*\)/,
|
|
18283
|
+
/[A-Za-z_$][\w$]*::[A-Za-z_$]/
|
|
18284
|
+
])) {
|
|
18285
|
+
addScore(scores, candidateSet, "content", 5);
|
|
18286
|
+
addScore(scores, candidateSet, "files", 2);
|
|
18287
|
+
addScore(scores, candidateSet, "github", 1.5);
|
|
18288
|
+
reasons.push("query contains code/content lookup signals");
|
|
18289
|
+
}
|
|
18290
|
+
if (hasAny(q, [/\b(paper|papers|arxiv|preprint|doi|citation|survey|benchmark|research)\b/])) {
|
|
18291
|
+
addScore(scores, candidateSet, "arxiv", 5);
|
|
18292
|
+
addScore(scores, candidateSet, "exa", 3);
|
|
18293
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
18294
|
+
reasons.push("query asks for scholarly or research material");
|
|
18295
|
+
}
|
|
18296
|
+
if (hasAny(q, [/\b(github|repo|repository|source code|open source|package|library|sdk|api example)\b/])) {
|
|
18297
|
+
addScore(scores, candidateSet, "github", 5);
|
|
18298
|
+
addScore(scores, candidateSet, "exa", 2);
|
|
18299
|
+
reasons.push("query asks for code or repository material");
|
|
18300
|
+
}
|
|
18301
|
+
if (hasAny(q, [/\b(video|youtube|tutorial|demo|talk|lecture|channel)\b/])) {
|
|
18302
|
+
addScore(scores, candidateSet, "youtube", 5);
|
|
18303
|
+
reasons.push("query asks for video material");
|
|
18304
|
+
}
|
|
18305
|
+
if (hasAny(q, [/\b(reddit|subreddit|opinion|experience|reviews?|worth it|recommendations?)\b/])) {
|
|
18306
|
+
addScore(scores, candidateSet, "reddit", 5);
|
|
18307
|
+
addScore(scores, candidateSet, "hackernews", 1.5);
|
|
18308
|
+
reasons.push("query asks for community discussion");
|
|
18309
|
+
}
|
|
18310
|
+
if (hasAny(q, [/\b(hacker news|hn|show hn|launch|startup)\b/])) {
|
|
18311
|
+
addScore(scores, candidateSet, "hackernews", 5);
|
|
18312
|
+
reasons.push("query asks for Hacker News style discussion");
|
|
18313
|
+
}
|
|
18314
|
+
if (hasAny(q, [/\b(twitter|tweet|tweets|x\.com|social reaction|trending)\b/])) {
|
|
18315
|
+
addScore(scores, candidateSet, "twitter", 5);
|
|
18316
|
+
reasons.push("query asks for social posts");
|
|
18317
|
+
}
|
|
18318
|
+
if (hasAny(q, [/\b(latest|today|yesterday|news|current|2025|2026|price|release|launched)\b/])) {
|
|
18319
|
+
addScore(scores, candidateSet, "brave", 3);
|
|
18320
|
+
addScore(scores, candidateSet, "bing", 2.5);
|
|
18321
|
+
addScore(scores, candidateSet, "google", 2.5);
|
|
18322
|
+
addScore(scores, candidateSet, "serpapi", 2);
|
|
18323
|
+
reasons.push("query appears time-sensitive");
|
|
18324
|
+
}
|
|
18325
|
+
if (reasons.length === 0) {
|
|
18326
|
+
addScore(scores, candidateSet, "exa", 2.5);
|
|
18327
|
+
addScore(scores, candidateSet, "perplexity", 2);
|
|
18328
|
+
addScore(scores, candidateSet, "brave", 1.5);
|
|
18329
|
+
addScore(scores, candidateSet, "google", 1.5);
|
|
18330
|
+
addScore(scores, candidateSet, "hackernews", 0.75);
|
|
18331
|
+
reasons.push("general query fallback");
|
|
18332
|
+
}
|
|
18333
|
+
const selectedProviders = [...scores.entries()].sort((a, b) => b[1] - a[1] || normalized.indexOf(a[0]) - normalized.indexOf(b[0])).slice(0, Math.min(maxProviders, normalized.length)).map(([provider]) => provider);
|
|
18334
|
+
const topScore = scores.get(selectedProviders[0]) ?? 0;
|
|
18335
|
+
const confidence = Math.max(0.35, Math.min(0.9, topScore / 6));
|
|
18336
|
+
return {
|
|
18337
|
+
strategy: "heuristic",
|
|
18338
|
+
selectedProviders,
|
|
18339
|
+
candidates: normalized,
|
|
18340
|
+
reason: reasons.join("; "),
|
|
18341
|
+
confidence
|
|
18342
|
+
};
|
|
18343
|
+
}
|
|
18344
|
+
function routerSchema(candidates, maxProviders) {
|
|
18345
|
+
return {
|
|
18346
|
+
type: "object",
|
|
18347
|
+
properties: {
|
|
18348
|
+
selectedProviders: {
|
|
18349
|
+
type: "array",
|
|
18350
|
+
items: { type: "string", enum: candidates },
|
|
18351
|
+
minItems: 1,
|
|
18352
|
+
maxItems: maxProviders
|
|
18353
|
+
},
|
|
18354
|
+
reason: { type: "string" },
|
|
18355
|
+
confidence: { type: "number", minimum: 0, maximum: 1 }
|
|
18356
|
+
},
|
|
18357
|
+
required: ["selectedProviders", "reason", "confidence"],
|
|
18358
|
+
additionalProperties: false
|
|
18359
|
+
};
|
|
18360
|
+
}
|
|
18361
|
+
function parseCerebrasRouting(raw, candidates, maxProviders) {
|
|
18362
|
+
let parsed;
|
|
18363
|
+
try {
|
|
18364
|
+
parsed = JSON.parse(raw);
|
|
18365
|
+
} catch {
|
|
18366
|
+
return null;
|
|
18367
|
+
}
|
|
18368
|
+
if (!Array.isArray(parsed.selectedProviders))
|
|
18369
|
+
return null;
|
|
18370
|
+
const candidateSet = new Set(candidates);
|
|
18371
|
+
const selectedProviders = parsed.selectedProviders.filter((provider) => typeof provider === "string" && candidateSet.has(provider)).slice(0, maxProviders);
|
|
18372
|
+
if (selectedProviders.length === 0)
|
|
18373
|
+
return null;
|
|
18374
|
+
return {
|
|
18375
|
+
selectedProviders,
|
|
18376
|
+
reason: typeof parsed.reason === "string" ? parsed.reason : "Cerebras router selected providers.",
|
|
18377
|
+
confidence: clampConfidence(parsed.confidence)
|
|
18378
|
+
};
|
|
18379
|
+
}
|
|
18380
|
+
async function routeWithCerebras(query, candidates, options) {
|
|
18381
|
+
const apiKey = Bun.env.CEREBRAS_API_KEY;
|
|
18382
|
+
if (!apiKey) {
|
|
18383
|
+
return {
|
|
18384
|
+
...routeSearchProvidersHeuristic(query, candidates, options),
|
|
18385
|
+
error: "CEREBRAS_API_KEY is not configured; used heuristic routing."
|
|
18386
|
+
};
|
|
18387
|
+
}
|
|
18388
|
+
const providerGuide = candidates.map((name) => ({
|
|
18389
|
+
name,
|
|
18390
|
+
description: PROVIDER_DESCRIPTIONS[name]
|
|
18391
|
+
}));
|
|
18392
|
+
const res = await fetch("https://api.cerebras.ai/v1/chat/completions", {
|
|
18393
|
+
method: "POST",
|
|
18394
|
+
signal: AbortSignal.timeout(options.timeoutMs),
|
|
18395
|
+
headers: {
|
|
18396
|
+
"Content-Type": "application/json",
|
|
18397
|
+
Authorization: `Bearer ${apiKey}`
|
|
18398
|
+
},
|
|
18399
|
+
body: JSON.stringify({
|
|
18400
|
+
model: options.model,
|
|
18401
|
+
temperature: 0,
|
|
18402
|
+
messages: [
|
|
18403
|
+
{
|
|
18404
|
+
role: "system",
|
|
18405
|
+
content: "You route a search query to the smallest useful set of available search providers. Select only listed providers. Prefer local providers for local files/code in the indexed workspace. Prefer scholarly, code, video, social, or web providers when the query clearly asks for those domains."
|
|
18406
|
+
},
|
|
18407
|
+
{
|
|
18408
|
+
role: "user",
|
|
18409
|
+
content: JSON.stringify({
|
|
18410
|
+
query,
|
|
18411
|
+
maxProviders: options.maxProviders,
|
|
18412
|
+
providers: providerGuide
|
|
18413
|
+
})
|
|
18414
|
+
}
|
|
18415
|
+
],
|
|
18416
|
+
response_format: {
|
|
18417
|
+
type: "json_schema",
|
|
18418
|
+
json_schema: {
|
|
18419
|
+
name: "search_router",
|
|
18420
|
+
strict: true,
|
|
18421
|
+
schema: routerSchema(candidates, options.maxProviders)
|
|
18422
|
+
}
|
|
18423
|
+
}
|
|
18424
|
+
})
|
|
18425
|
+
});
|
|
18426
|
+
if (!res.ok) {
|
|
18427
|
+
throw new Error(`Cerebras router error: ${res.status} ${res.statusText}`);
|
|
18428
|
+
}
|
|
18429
|
+
const data = await res.json();
|
|
18430
|
+
const content = data.choices?.[0]?.message?.content;
|
|
18431
|
+
if (!content)
|
|
18432
|
+
throw new Error("Cerebras router returned no content");
|
|
18433
|
+
const parsed = parseCerebrasRouting(content, candidates, options.maxProviders);
|
|
18434
|
+
if (!parsed)
|
|
18435
|
+
throw new Error("Cerebras router returned invalid provider selection");
|
|
18436
|
+
return {
|
|
18437
|
+
strategy: "cerebras",
|
|
18438
|
+
candidates,
|
|
18439
|
+
...parsed
|
|
18440
|
+
};
|
|
18441
|
+
}
|
|
18442
|
+
async function routeSearchProviders(query, candidates, options = {}) {
|
|
18443
|
+
const normalized = normalizeCandidates(candidates);
|
|
18444
|
+
const maxProviders = Math.min(clampMaxProviders(options.maxProviders), Math.max(1, normalized.length));
|
|
18445
|
+
const timeoutMs = options.timeoutMs && Number.isFinite(options.timeoutMs) ? Math.max(250, Math.floor(options.timeoutMs)) : 1200;
|
|
18446
|
+
const model = options.model ?? Bun.env.CEREBRAS_MODEL ?? "gpt-oss-120b";
|
|
18447
|
+
if (normalized.length === 0) {
|
|
18448
|
+
return routeSearchProvidersHeuristic(query, normalized, { maxProviders });
|
|
18449
|
+
}
|
|
18450
|
+
try {
|
|
18451
|
+
return await routeWithCerebras(query, normalized, { maxProviders, timeoutMs, model });
|
|
18452
|
+
} catch (err) {
|
|
18453
|
+
return {
|
|
18454
|
+
...routeSearchProvidersHeuristic(query, normalized, { maxProviders }),
|
|
18455
|
+
error: err instanceof Error ? err.message : String(err)
|
|
18456
|
+
};
|
|
18457
|
+
}
|
|
18458
|
+
}
|
|
18459
|
+
|
|
18031
18460
|
// src/lib/search.ts
|
|
18461
|
+
async function withTimeout(promise, timeoutMs, label) {
|
|
18462
|
+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
|
|
18463
|
+
return promise;
|
|
18464
|
+
let timer;
|
|
18465
|
+
try {
|
|
18466
|
+
return await Promise.race([
|
|
18467
|
+
promise,
|
|
18468
|
+
new Promise((_resolve, reject) => {
|
|
18469
|
+
timer = setTimeout(() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
18470
|
+
timer.unref?.();
|
|
18471
|
+
})
|
|
18472
|
+
]);
|
|
18473
|
+
} finally {
|
|
18474
|
+
if (timer)
|
|
18475
|
+
clearTimeout(timer);
|
|
18476
|
+
}
|
|
18477
|
+
}
|
|
18478
|
+
async function allSettledLimited(items, concurrency, task) {
|
|
18479
|
+
const results = new Array(items.length);
|
|
18480
|
+
let next = 0;
|
|
18481
|
+
async function worker() {
|
|
18482
|
+
while (next < items.length) {
|
|
18483
|
+
const index = next++;
|
|
18484
|
+
const item = items[index];
|
|
18485
|
+
try {
|
|
18486
|
+
results[index] = { status: "fulfilled", value: await task(item) };
|
|
18487
|
+
} catch (reason) {
|
|
18488
|
+
results[index] = { status: "rejected", reason };
|
|
18489
|
+
}
|
|
18490
|
+
}
|
|
18491
|
+
}
|
|
18492
|
+
const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length);
|
|
18493
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
18494
|
+
return results;
|
|
18495
|
+
}
|
|
18032
18496
|
async function unifiedSearch(query, opts = {}) {
|
|
18033
18497
|
const config = getConfig();
|
|
18034
18498
|
const startTime = Date.now();
|
|
18035
18499
|
const db = opts.db;
|
|
18036
18500
|
let providerNames = opts.providers ?? [];
|
|
18037
|
-
|
|
18501
|
+
const smartProfile = opts.profile === "smart";
|
|
18502
|
+
if (opts.profile && !smartProfile) {
|
|
18038
18503
|
const profile = getProfileByName(opts.profile, db);
|
|
18039
18504
|
if (profile) {
|
|
18040
18505
|
providerNames = profile.providers;
|
|
@@ -18050,7 +18515,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
18050
18515
|
}
|
|
18051
18516
|
const errors2 = [];
|
|
18052
18517
|
const explicitRequest = (opts.providers?.length ?? 0) > 0 || Boolean(opts.profile);
|
|
18053
|
-
|
|
18518
|
+
let activeProviders = providerNames.filter((name) => {
|
|
18054
18519
|
try {
|
|
18055
18520
|
if (getProvider(name).isConfigured())
|
|
18056
18521
|
return true;
|
|
@@ -18061,20 +18526,36 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
18061
18526
|
});
|
|
18062
18527
|
}
|
|
18063
18528
|
return false;
|
|
18064
|
-
} catch {
|
|
18529
|
+
} catch (err) {
|
|
18530
|
+
if (explicitRequest) {
|
|
18531
|
+
errors2.push({
|
|
18532
|
+
provider: name,
|
|
18533
|
+
error: err instanceof Error ? err.message : "unknown provider"
|
|
18534
|
+
});
|
|
18535
|
+
}
|
|
18065
18536
|
return false;
|
|
18066
18537
|
}
|
|
18067
18538
|
});
|
|
18539
|
+
const routingRequested = opts.smart === true || smartProfile || !explicitRequest && config.router.enabled;
|
|
18540
|
+
let routing;
|
|
18541
|
+
if (routingRequested && activeProviders.length > 0) {
|
|
18542
|
+
routing = await routeSearchProviders(query, activeProviders, {
|
|
18543
|
+
maxProviders: config.router.maxProviders,
|
|
18544
|
+
timeoutMs: config.router.timeoutMs,
|
|
18545
|
+
model: config.router.model
|
|
18546
|
+
});
|
|
18547
|
+
activeProviders = routing.selectedProviders;
|
|
18548
|
+
}
|
|
18068
18549
|
const searchOptions = {
|
|
18069
18550
|
limit: config.defaultLimit,
|
|
18070
18551
|
...opts.options
|
|
18071
18552
|
};
|
|
18072
|
-
const results = await
|
|
18553
|
+
const results = await allSettledLimited(activeProviders, config.maxConcurrent, async (name) => {
|
|
18073
18554
|
const provider = getProvider(name);
|
|
18074
|
-
const rawResults = await provider.search(query, searchOptions);
|
|
18555
|
+
const rawResults = await withTimeout(provider.search(query, searchOptions), config.providerTimeoutMs, provider.displayName);
|
|
18075
18556
|
updateProviderLastUsed(name, db);
|
|
18076
18557
|
return { name, results: rawResults };
|
|
18077
|
-
})
|
|
18558
|
+
});
|
|
18078
18559
|
const allResults = [];
|
|
18079
18560
|
const searchId = generateId();
|
|
18080
18561
|
for (const result of results) {
|
|
@@ -18128,11 +18609,13 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
18128
18609
|
createdAt: new Date().toISOString()
|
|
18129
18610
|
},
|
|
18130
18611
|
results: finalResults,
|
|
18131
|
-
errors: errors2
|
|
18612
|
+
errors: errors2,
|
|
18613
|
+
...routing && { routing }
|
|
18132
18614
|
};
|
|
18133
18615
|
}
|
|
18134
18616
|
const persistable = config.recordLocalResults ? finalResults : finalResults.filter((r) => !LOCAL_PROVIDER_NAMES.has(r.source));
|
|
18135
18617
|
const search = createSearch({
|
|
18618
|
+
id: searchId,
|
|
18136
18619
|
query,
|
|
18137
18620
|
providers: activeProviders,
|
|
18138
18621
|
resultCount: persistable.length,
|
|
@@ -18141,6 +18624,7 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
18141
18624
|
if (persistable.length > 0) {
|
|
18142
18625
|
createResults(persistable.map((r) => ({
|
|
18143
18626
|
searchId: search.id,
|
|
18627
|
+
id: r.id,
|
|
18144
18628
|
title: r.title,
|
|
18145
18629
|
url: r.url,
|
|
18146
18630
|
snippet: r.snippet,
|
|
@@ -18157,7 +18641,8 @@ async function unifiedSearch(query, opts = {}) {
|
|
|
18157
18641
|
return {
|
|
18158
18642
|
search: { ...search, resultCount: finalResults.length, duration },
|
|
18159
18643
|
results: finalResults,
|
|
18160
|
-
errors: errors2
|
|
18644
|
+
errors: errors2,
|
|
18645
|
+
...routing && { routing }
|
|
18161
18646
|
};
|
|
18162
18647
|
}
|
|
18163
18648
|
async function searchSingleProvider(provider, query, options, db) {
|
|
@@ -18391,7 +18876,7 @@ function findLocal(query, opts = {}, db) {
|
|
|
18391
18876
|
return { query, kind, indexed: false, roots: roots.length, total: 0, results: [] };
|
|
18392
18877
|
}
|
|
18393
18878
|
if (opts.refresh !== false)
|
|
18394
|
-
|
|
18879
|
+
scheduleAutoRefreshStaleRoots(db);
|
|
18395
18880
|
const queryOpts = {
|
|
18396
18881
|
root: opts.root,
|
|
18397
18882
|
ext: opts.ext,
|
|
@@ -28631,13 +29116,15 @@ function buildServer() {
|
|
|
28631
29116
|
providers: exports_external.array(SearchProviderNameSchema).optional().describe("Providers to search (default: all enabled)"),
|
|
28632
29117
|
profile: exports_external.string().optional().describe("Search profile name (e.g. research, social, code)"),
|
|
28633
29118
|
limit: exports_external.number().int().min(1).max(100).optional().describe("Max results per provider"),
|
|
28634
|
-
dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)")
|
|
28635
|
-
|
|
29119
|
+
dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)"),
|
|
29120
|
+
smart: exports_external.boolean().optional().describe("Route to the best configured providers before searching")
|
|
29121
|
+
}, async ({ query, providers, profile, limit, dedup, smart }) => {
|
|
28636
29122
|
const response = await unifiedSearch(query, {
|
|
28637
29123
|
providers,
|
|
28638
29124
|
profile,
|
|
28639
29125
|
options: limit ? { limit } : undefined,
|
|
28640
|
-
dedup
|
|
29126
|
+
dedup,
|
|
29127
|
+
smart
|
|
28641
29128
|
});
|
|
28642
29129
|
return {
|
|
28643
29130
|
content: [
|
|
@@ -28656,7 +29143,8 @@ function buildServer() {
|
|
|
28656
29143
|
source: r.source,
|
|
28657
29144
|
score: r.score
|
|
28658
29145
|
})),
|
|
28659
|
-
errors: response.errors
|
|
29146
|
+
errors: response.errors,
|
|
29147
|
+
routing: response.routing
|
|
28660
29148
|
}, null, 2)
|
|
28661
29149
|
}
|
|
28662
29150
|
]
|
|
@@ -28911,12 +29399,14 @@ function buildServer() {
|
|
|
28911
29399
|
default_limit: exports_external.number().int().optional(),
|
|
28912
29400
|
dedup: exports_external.boolean().optional(),
|
|
28913
29401
|
max_concurrent: exports_external.number().int().optional(),
|
|
29402
|
+
provider_timeout_ms: exports_external.number().int().optional(),
|
|
28914
29403
|
default_profile: exports_external.string().nullable().optional()
|
|
28915
29404
|
}, async (updates) => {
|
|
28916
29405
|
const config2 = setConfig({
|
|
28917
29406
|
...updates.default_limit !== undefined && { defaultLimit: updates.default_limit },
|
|
28918
29407
|
...updates.dedup !== undefined && { dedup: updates.dedup },
|
|
28919
29408
|
...updates.max_concurrent !== undefined && { maxConcurrent: updates.max_concurrent },
|
|
29409
|
+
...updates.provider_timeout_ms !== undefined && { providerTimeoutMs: updates.provider_timeout_ms },
|
|
28920
29410
|
...updates.default_profile !== undefined && { defaultProfile: updates.default_profile }
|
|
28921
29411
|
});
|
|
28922
29412
|
return {
|
|
@@ -29059,10 +29549,12 @@ function startServer(port) {
|
|
|
29059
29549
|
const providers = url.searchParams.get("providers")?.split(",");
|
|
29060
29550
|
const profile = url.searchParams.get("profile") ?? undefined;
|
|
29061
29551
|
const limit = url.searchParams.get("limit") ? parseInt(url.searchParams.get("limit")) : undefined;
|
|
29552
|
+
const smart = url.searchParams.get("smart") === "1" || url.searchParams.get("smart") === "true";
|
|
29062
29553
|
const response = await unifiedSearch(q, {
|
|
29063
29554
|
providers,
|
|
29064
29555
|
profile,
|
|
29065
|
-
options: limit ? { limit } : undefined
|
|
29556
|
+
options: limit ? { limit } : undefined,
|
|
29557
|
+
smart
|
|
29066
29558
|
});
|
|
29067
29559
|
return json(response);
|
|
29068
29560
|
}
|
|
@@ -29284,14 +29776,7 @@ function startServer(port) {
|
|
|
29284
29776
|
}
|
|
29285
29777
|
}
|
|
29286
29778
|
});
|
|
29287
|
-
|
|
29288
|
-
setInterval(() => {
|
|
29289
|
-
try {
|
|
29290
|
-
autoRefreshStaleRoots();
|
|
29291
|
-
} catch (err2) {
|
|
29292
|
-
console.error("Index refresh failed:", err2);
|
|
29293
|
-
}
|
|
29294
|
-
}, refreshMinutes * 60000).unref?.();
|
|
29779
|
+
startBackgroundRefresh();
|
|
29295
29780
|
console.log(`open-search server running at http://localhost:${port}`);
|
|
29296
29781
|
}
|
|
29297
29782
|
|