@hasna/search 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/mcp/index.js CHANGED
@@ -11644,7 +11644,7 @@ var require_lib2 = __commonJS((exports, module) => {
11644
11644
  var require_package = __commonJS((exports, module) => {
11645
11645
  module.exports = {
11646
11646
  name: "@hasna/search",
11647
- version: "0.0.9",
11647
+ version: "0.0.11",
11648
11648
  description: "Unified search \u2014 local file index (find files by name/path/content/regex in ms, trigram FTS) + 12 web providers (Google, SerpAPI, Exa, Perplexity, Twitter, Reddit, YouTube, Brave, Bing, Hacker News, GitHub, arXiv) + YouTube transcription. CLI + MCP + REST API + Dashboard.",
11649
11649
  type: "module",
11650
11650
  main: "dist/index.js",
@@ -24742,12 +24742,19 @@ var DEFAULT_CONFIG = {
24742
24742
  defaultLimit: 10,
24743
24743
  defaultProviders: [],
24744
24744
  defaultProfile: null,
24745
+ router: {
24746
+ enabled: false,
24747
+ model: "gpt-oss-120b",
24748
+ maxProviders: 3,
24749
+ timeoutMs: 1200
24750
+ },
24745
24751
  transcriber: {
24746
24752
  baseUrl: "http://localhost:19600",
24747
24753
  fallbackCli: "microservice-transcriber"
24748
24754
  },
24749
24755
  dedup: true,
24750
24756
  maxConcurrent: 5,
24757
+ providerTimeoutMs: 15000,
24751
24758
  indexStaleMinutes: 5,
24752
24759
  indexAutoRefresh: true,
24753
24760
  recordLocalResults: false
@@ -25482,6 +25489,31 @@ var migrations = [
25482
25489
  );
25483
25490
  `);
25484
25491
  }
25492
+ },
25493
+ {
25494
+ version: 2,
25495
+ description: "Local file index filter indexes",
25496
+ up: (db) => {
25497
+ db.exec(`
25498
+ CREATE INDEX IF NOT EXISTS idx_files_root_ext ON files(root_id, ext);
25499
+ CREATE INDEX IF NOT EXISTS idx_files_root_dir ON files(root_id, dir);
25500
+ `);
25501
+ }
25502
+ },
25503
+ {
25504
+ version: 3,
25505
+ description: "Local content short-token filter grams",
25506
+ up: (db) => {
25507
+ db.exec(`
25508
+ CREATE TABLE IF NOT EXISTS file_content_grams (
25509
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
25510
+ gram TEXT NOT NULL,
25511
+ PRIMARY KEY (file_id, gram)
25512
+ );
25513
+ CREATE INDEX IF NOT EXISTS idx_file_content_grams_gram_file
25514
+ ON file_content_grams(gram, file_id);
25515
+ `);
25516
+ }
25485
25517
  }
25486
25518
  ];
25487
25519
  function runIndexMigrations(db) {
@@ -25553,7 +25585,18 @@ function getConfig() {
25553
25585
  try {
25554
25586
  const raw = readFileSync(path, "utf-8");
25555
25587
  const parsed = JSON.parse(raw);
25556
- return { ...DEFAULT_CONFIG, ...parsed };
25588
+ return {
25589
+ ...DEFAULT_CONFIG,
25590
+ ...parsed,
25591
+ router: {
25592
+ ...DEFAULT_CONFIG.router,
25593
+ ...parsed.router ?? {}
25594
+ },
25595
+ transcriber: {
25596
+ ...DEFAULT_CONFIG.transcriber,
25597
+ ...parsed.transcriber ?? {}
25598
+ }
25599
+ };
25557
25600
  } catch {
25558
25601
  return { ...DEFAULT_CONFIG };
25559
25602
  }
@@ -25971,6 +26014,7 @@ function removeRoot(idOrPath, db) {
25971
26014
  d.exec("BEGIN");
25972
26015
  try {
25973
26016
  d.prepare("DELETE FROM file_content_fts WHERE rowid IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
26017
+ d.prepare("DELETE FROM file_content_grams WHERE file_id IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
25974
26018
  d.prepare("DELETE FROM index_roots WHERE id = ?").run(root.id);
25975
26019
  d.exec("COMMIT");
25976
26020
  } catch (err) {
@@ -25982,6 +26026,21 @@ function removeRoot(idOrPath, db) {
25982
26026
  function shouldIndexContent(root, file) {
25983
26027
  return root.contentIndexing && file.size > 0 && file.size <= root.maxFileSize && !hasBinaryExtension(file.ext) && !isContentExcluded(file.name);
25984
26028
  }
26029
+ function contentShortGrams(body) {
26030
+ const grams = new Set;
26031
+ const words = body.toLowerCase().matchAll(/[a-z0-9_$]+/g);
26032
+ for (const match of words) {
26033
+ const word = match[0];
26034
+ for (let i = 0;i < word.length; i++) {
26035
+ grams.add(word[i]);
26036
+ if (i + 1 < word.length)
26037
+ grams.add(word.slice(i, i + 2));
26038
+ }
26039
+ if (grams.size >= 2048)
26040
+ break;
26041
+ }
26042
+ return [...grams];
26043
+ }
25985
26044
  function indexRoot(idOrPath, opts = {}, db) {
25986
26045
  const d = db ?? getIndexDb();
25987
26046
  const root = getRoot(idOrPath, d);
@@ -26000,6 +26059,8 @@ function indexRoot(idOrPath, opts = {}, db) {
26000
26059
  const deleteFile = d.prepare("DELETE FROM files WHERE id = ?");
26001
26060
  const insertContent = d.prepare("INSERT INTO file_content_fts (rowid, body) VALUES (?, ?)");
26002
26061
  const deleteContent = d.prepare("DELETE FROM file_content_fts WHERE rowid = ?");
26062
+ const insertContentGram = d.prepare("INSERT OR IGNORE INTO file_content_grams (file_id, gram) VALUES (?, ?)");
26063
+ const deleteContentGrams = d.prepare("DELETE FROM file_content_grams WHERE file_id = ?");
26003
26064
  const stats = {
26004
26065
  rootId: root.id,
26005
26066
  added: 0,
@@ -26010,38 +26071,57 @@ function indexRoot(idOrPath, opts = {}, db) {
26010
26071
  skippedDirs: skippedDirs.length,
26011
26072
  durationMs: 0
26012
26073
  };
26074
+ const seen = new Set;
26075
+ const changes = [];
26076
+ for (const file of scanned) {
26077
+ seen.add(file.relPath);
26078
+ const prev = existing.get(file.relPath);
26079
+ const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
26080
+ if (prev && !changed && !opts.force)
26081
+ continue;
26082
+ const wantContent = shouldIndexContent(root, file);
26083
+ const absPath = `${root.path}/${file.relPath}`;
26084
+ let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
26085
+ let body = null;
26086
+ if (wantContent && !isBinary) {
26087
+ try {
26088
+ body = readFileSync3(absPath, "utf-8");
26089
+ } catch {
26090
+ isBinary = true;
26091
+ }
26092
+ }
26093
+ changes.push({
26094
+ file,
26095
+ prev,
26096
+ isBinary,
26097
+ body,
26098
+ grams: body !== null ? contentShortGrams(body) : [],
26099
+ contentIndexed: body !== null ? 1 : 0
26100
+ });
26101
+ }
26013
26102
  d.exec("BEGIN");
26014
26103
  try {
26015
- const seen = new Set;
26016
- for (const file of scanned) {
26017
- seen.add(file.relPath);
26018
- const prev = existing.get(file.relPath);
26019
- const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
26020
- if (prev && !changed && !opts.force)
26021
- continue;
26022
- const wantContent = shouldIndexContent(root, file);
26023
- const absPath = `${root.path}/${file.relPath}`;
26024
- let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
26025
- let body = null;
26026
- if (wantContent && !isBinary) {
26027
- try {
26028
- body = readFileSync3(absPath, "utf-8");
26029
- } catch {
26030
- isBinary = true;
26031
- }
26032
- }
26033
- const contentIndexed = body !== null ? 1 : 0;
26104
+ for (const { file, prev, isBinary, body, grams, contentIndexed } of changes) {
26034
26105
  if (prev) {
26035
- if (prev.content_indexed)
26106
+ if (prev.content_indexed) {
26036
26107
  deleteContent.run(prev.id);
26108
+ deleteContentGrams.run(prev.id);
26109
+ }
26037
26110
  updateFile.run(file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now, prev.id);
26038
- if (body !== null)
26111
+ if (body !== null) {
26039
26112
  insertContent.run(prev.id, body);
26113
+ for (const gram of grams)
26114
+ insertContentGram.run(prev.id, gram);
26115
+ }
26040
26116
  stats.updated++;
26041
26117
  } else {
26042
26118
  const inserted = insertFile.run(root.id, file.relPath, file.name, file.ext, file.dir, file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now);
26043
- if (body !== null)
26044
- insertContent.run(Number(inserted.lastInsertRowid), body);
26119
+ if (body !== null) {
26120
+ const fileId = Number(inserted.lastInsertRowid);
26121
+ insertContent.run(fileId, body);
26122
+ for (const gram of grams)
26123
+ insertContentGram.run(fileId, gram);
26124
+ }
26045
26125
  stats.added++;
26046
26126
  }
26047
26127
  if (contentIndexed)
@@ -26050,8 +26130,10 @@ function indexRoot(idOrPath, opts = {}, db) {
26050
26130
  for (const [relPath, row] of existing) {
26051
26131
  if (seen.has(relPath))
26052
26132
  continue;
26053
- if (row.content_indexed)
26133
+ if (row.content_indexed) {
26054
26134
  deleteContent.run(row.id);
26135
+ deleteContentGrams.run(row.id);
26136
+ }
26055
26137
  deleteFile.run(row.id);
26056
26138
  stats.deleted++;
26057
26139
  }
@@ -26073,6 +26155,9 @@ function indexAllRoots(opts = {}, db) {
26073
26155
  return listRoots(db).map((root) => indexRoot(root.id, opts, db));
26074
26156
  }
26075
26157
  var refreshing = new Set;
26158
+ var lastDefaultAutoRefreshCheckAt = 0;
26159
+ var AUTO_REFRESH_CHECK_THROTTLE_MS = 1000;
26160
+ var defaultRefreshScheduled = false;
26076
26161
  function refreshStaleRoots(staleMinutes, db) {
26077
26162
  const cutoff = Date.now() - staleMinutes * 60000;
26078
26163
  const stats = [];
@@ -26096,8 +26181,43 @@ function autoRefreshStaleRoots(db) {
26096
26181
  const config2 = getConfig();
26097
26182
  if (!config2.indexAutoRefresh)
26098
26183
  return [];
26184
+ if (!db) {
26185
+ const now = Date.now();
26186
+ if (now - lastDefaultAutoRefreshCheckAt < AUTO_REFRESH_CHECK_THROTTLE_MS)
26187
+ return [];
26188
+ lastDefaultAutoRefreshCheckAt = now;
26189
+ }
26099
26190
  return refreshStaleRoots(config2.indexStaleMinutes, db);
26100
26191
  }
26192
+ function scheduleAutoRefreshStaleRoots(db) {
26193
+ if (db)
26194
+ return autoRefreshStaleRoots(db);
26195
+ const config2 = getConfig();
26196
+ if (!config2.indexAutoRefresh || defaultRefreshScheduled)
26197
+ return [];
26198
+ defaultRefreshScheduled = true;
26199
+ const timer = setTimeout(() => {
26200
+ try {
26201
+ autoRefreshStaleRoots();
26202
+ } catch {} finally {
26203
+ defaultRefreshScheduled = false;
26204
+ }
26205
+ }, 0);
26206
+ timer.unref?.();
26207
+ return [];
26208
+ }
26209
+ function startBackgroundRefresh() {
26210
+ const minutes = Math.max(1, getConfig().indexStaleMinutes);
26211
+ const timer = setInterval(() => {
26212
+ try {
26213
+ autoRefreshStaleRoots();
26214
+ } catch (err) {
26215
+ console.error("Index refresh failed:", err);
26216
+ }
26217
+ }, minutes * 60000);
26218
+ timer.unref?.();
26219
+ return timer;
26220
+ }
26101
26221
 
26102
26222
  // src/lib/local/query.ts
26103
26223
  import { existsSync as existsSync2, readFileSync as readFileSync4 } from "fs";
@@ -26324,6 +26444,9 @@ function compileSearchRegex(pattern, caseSensitive = false) {
26324
26444
  // src/lib/local/query.ts
26325
26445
  var MAX_LINE_LENGTH = 200;
26326
26446
  var MAX_MATCHES_PER_FILE = 5;
26447
+ var MAX_PATH_CANDIDATES = 20000;
26448
+ var MAX_CONTENT_CANDIDATES = 50000;
26449
+ var MAX_REGEX_CANDIDATES = 50000;
26327
26450
  function tokenize(query) {
26328
26451
  return query.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "").split(/\s+/).filter(Boolean);
26329
26452
  }
@@ -26354,11 +26477,40 @@ function filterClauses(opts, db) {
26354
26477
  }
26355
26478
  if (opts.dir) {
26356
26479
  clauses.push("f.dir LIKE ? ESCAPE '\\'");
26357
- const dir = opts.dir.replace(/^\/|\/$/g, "").replace(/[\\%_]/g, "\\$&");
26480
+ const dir = escapeLike(opts.dir.replace(/^\/|\/$/g, ""));
26358
26481
  params.push(`%${dir}%`);
26359
26482
  }
26360
26483
  return { sql: clauses.length > 0 ? ` AND ${clauses.join(" AND ")}` : "", params };
26361
26484
  }
26485
+ function escapeLike(value) {
26486
+ return value.replace(/[\\%_]/g, "\\$&");
26487
+ }
26488
+ function shortTokenClauses(tokens) {
26489
+ if (tokens.length === 0)
26490
+ return { sql: "", params: [] };
26491
+ return {
26492
+ sql: ` AND ${tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ")}`,
26493
+ params: tokens.map((token) => `%${escapeLike(token)}%`)
26494
+ };
26495
+ }
26496
+ function contentGramClauses(tokens) {
26497
+ const gramTokens = tokens.filter((token) => /^[a-z0-9_$]{1,2}$/.test(token));
26498
+ if (gramTokens.length === 0)
26499
+ return { sql: "", params: [] };
26500
+ return {
26501
+ sql: gramTokens.map((_token, index) => ` AND (
26502
+ NOT EXISTS (
26503
+ SELECT 1 FROM file_content_grams cg_any_${index}
26504
+ WHERE cg_any_${index}.file_id = f.id
26505
+ )
26506
+ OR EXISTS (
26507
+ SELECT 1 FROM file_content_grams cg_${index}
26508
+ WHERE cg_${index}.file_id = f.id AND cg_${index}.gram = ?
26509
+ )
26510
+ )`).join(""),
26511
+ params: gramTokens
26512
+ };
26513
+ }
26362
26514
  function rowToHit(row, score) {
26363
26515
  return {
26364
26516
  rootId: row.root_id,
@@ -26428,6 +26580,8 @@ function searchFilePaths(query, opts = {}, db) {
26428
26580
  return [];
26429
26581
  const ftsQuery = buildFtsQuery(query);
26430
26582
  const filters = filterClauses(opts, d);
26583
+ const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
26584
+ const shortFilters = shortTokenClauses(shortTokens);
26431
26585
  const candidateLimit = Math.max(200, limit * 10);
26432
26586
  let rows;
26433
26587
  if (ftsQuery) {
@@ -26435,16 +26589,16 @@ function searchFilePaths(query, opts = {}, db) {
26435
26589
  FROM files_fts fts
26436
26590
  JOIN files f ON f.id = fts.rowid
26437
26591
  JOIN index_roots r ON r.id = f.root_id
26438
- WHERE files_fts MATCH ?${filters.sql}
26592
+ WHERE files_fts MATCH ?${filters.sql}${shortFilters.sql}
26439
26593
  ORDER BY bm25(files_fts, 10.0, 1.0)
26440
- LIMIT ?`).all(ftsQuery, ...filters.params, candidateLimit);
26441
- const namePattern = `${query.trim().replace(/[\\%_]/g, "\\$&")}%`;
26594
+ LIMIT ?`).all(ftsQuery, ...filters.params, ...shortFilters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
26595
+ const namePattern = `${escapeLike(query.trim())}%`;
26442
26596
  const nameRows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
26443
26597
  FROM files f
26444
26598
  JOIN index_roots r ON r.id = f.root_id
26445
- WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}
26599
+ WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}${shortFilters.sql}
26446
26600
  ORDER BY length(f.name)
26447
- LIMIT 100`).all(namePattern, ...filters.params);
26601
+ LIMIT 100`).all(namePattern, ...filters.params, ...shortFilters.params);
26448
26602
  const seen = new Set(rows.map((row) => row.id));
26449
26603
  for (const row of nameRows) {
26450
26604
  if (!seen.has(row.id))
@@ -26452,14 +26606,14 @@ function searchFilePaths(query, opts = {}, db) {
26452
26606
  }
26453
26607
  } else {
26454
26608
  const likeClauses = tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ");
26455
- const likeParams = tokens.map((t) => `%${t.replace(/[\\%_]/g, "\\$&")}%`);
26609
+ const likeParams = tokens.map((t) => `%${escapeLike(t)}%`);
26456
26610
  rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
26457
26611
  FROM files f
26458
26612
  JOIN index_roots r ON r.id = f.root_id
26459
26613
  WHERE ${likeClauses}${filters.sql}
26460
- LIMIT ?`).all(...likeParams, ...filters.params, candidateLimit);
26614
+ ORDER BY length(f.name), length(f.rel_path), f.rel_path
26615
+ LIMIT ?`).all(...likeParams, ...filters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
26461
26616
  }
26462
- const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
26463
26617
  const filtered = shortTokens.length > 0 ? rows.filter((row) => shortTokens.every((t) => row.rel_path.toLowerCase().includes(t))) : rows;
26464
26618
  return filtered.map((row) => rowToHit(row, scoreFileName(query, tokens, row))).sort((a, b) => b.score - a.score).filter((hit) => existsSync2(hit.absPath)).slice(0, limit);
26465
26619
  }
@@ -26497,24 +26651,31 @@ function searchFilePathsRegex(pattern, opts = {}, db) {
26497
26651
  throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'handle.*Click', not '\\w+').");
26498
26652
  }
26499
26653
  const filters = filterClauses(opts, d);
26500
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
26501
- FROM files_fts fts
26502
- JOIN files f ON f.id = fts.rowid
26503
- JOIN index_roots r ON r.id = f.root_id
26504
- WHERE files_fts MATCH ?${filters.sql}
26505
- ORDER BY fts.rank
26506
- LIMIT 5000`).all(ftsQuery, ...filters.params);
26507
26654
  const hits = [];
26508
- for (const row of rows) {
26509
- if (!regex.test(row.rel_path) && !regex.test(row.name))
26510
- continue;
26511
- const depth = row.rel_path.split("/").length - 1;
26512
- const score = Math.max(0.05, 0.6 - depth * 0.02);
26513
- const hit = rowToHit(row, score);
26514
- if (!existsSync2(hit.absPath))
26515
- continue;
26516
- hits.push(hit);
26517
- if (hits.length >= limit)
26655
+ const pageSize = Math.max(500, limit * 20);
26656
+ for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
26657
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
26658
+ FROM files_fts fts
26659
+ JOIN files f ON f.id = fts.rowid
26660
+ JOIN index_roots r ON r.id = f.root_id
26661
+ WHERE files_fts MATCH ?${filters.sql}
26662
+ ORDER BY fts.rank
26663
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
26664
+ if (rows.length === 0)
26665
+ break;
26666
+ for (const row of rows) {
26667
+ if (!regex.test(row.rel_path) && !regex.test(row.name))
26668
+ continue;
26669
+ const depth = row.rel_path.split("/").length - 1;
26670
+ const score = Math.max(0.05, 0.6 - depth * 0.02);
26671
+ const hit = rowToHit(row, score);
26672
+ if (!existsSync2(hit.absPath))
26673
+ continue;
26674
+ hits.push(hit);
26675
+ if (hits.length >= limit)
26676
+ break;
26677
+ }
26678
+ if (rows.length < pageSize)
26518
26679
  break;
26519
26680
  }
26520
26681
  return hits;
@@ -26528,40 +26689,48 @@ function searchFileContentRegex(pattern, opts = {}, db) {
26528
26689
  throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'export.*function', not '\\d+').");
26529
26690
  }
26530
26691
  const filters = filterClauses(opts, d);
26531
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
26532
- FROM file_content_fts fts
26533
- JOIN files f ON f.id = fts.rowid
26534
- JOIN index_roots r ON r.id = f.root_id
26535
- WHERE file_content_fts MATCH ?${filters.sql}
26536
- ORDER BY fts.rank
26537
- LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(200, limit * 10));
26538
26692
  const hits = [];
26539
- for (let i = 0;i < rows.length && hits.length < limit; i++) {
26540
- const row = rows[i];
26541
- const absPath = `${row.root_path}/${row.rel_path}`;
26542
- let content;
26543
- try {
26544
- content = readFileSync4(absPath, "utf-8");
26545
- } catch {
26546
- continue;
26547
- }
26548
- const lines = content.split(`
26693
+ const pageSize = Math.max(200, limit * 10);
26694
+ for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
26695
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
26696
+ FROM file_content_fts fts
26697
+ JOIN files f ON f.id = fts.rowid
26698
+ JOIN index_roots r ON r.id = f.root_id
26699
+ WHERE file_content_fts MATCH ?${filters.sql}
26700
+ ORDER BY fts.rank
26701
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
26702
+ if (rows.length === 0)
26703
+ break;
26704
+ for (let i = 0;i < rows.length && hits.length < limit; i++) {
26705
+ const row = rows[i];
26706
+ const absPath = `${row.root_path}/${row.rel_path}`;
26707
+ let content;
26708
+ try {
26709
+ content = readFileSync4(absPath, "utf-8");
26710
+ } catch {
26711
+ continue;
26712
+ }
26713
+ const lines = content.split(`
26549
26714
  `);
26550
- const matches = [];
26551
- for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
26552
- if (regex.test(lines[n])) {
26553
- matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
26715
+ const matches = [];
26716
+ for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
26717
+ if (regex.test(lines[n])) {
26718
+ matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
26719
+ }
26554
26720
  }
26721
+ if (matches.length === 0)
26722
+ continue;
26723
+ const rankIndex = offset + i;
26724
+ const score = Math.max(0.25, 0.65 - rankIndex * 0.05);
26725
+ hits.push({
26726
+ ...rowToHit(row, score),
26727
+ line: matches[0].line,
26728
+ lineText: matches[0].text,
26729
+ matches
26730
+ });
26555
26731
  }
26556
- if (matches.length === 0)
26557
- continue;
26558
- const score = Math.max(0.25, 0.65 - i * 0.05);
26559
- hits.push({
26560
- ...rowToHit(row, score),
26561
- line: matches[0].line,
26562
- lineText: matches[0].text,
26563
- matches
26564
- });
26732
+ if (rows.length < pageSize)
26733
+ break;
26565
26734
  }
26566
26735
  return hits;
26567
26736
  }
@@ -26572,42 +26741,51 @@ function searchFileContent(query, opts = {}, db) {
26572
26741
  if (!ftsQuery)
26573
26742
  return [];
26574
26743
  const filters = filterClauses(opts, d);
26575
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
26576
- FROM file_content_fts fts
26577
- JOIN files f ON f.id = fts.rowid
26578
- JOIN index_roots r ON r.id = f.root_id
26579
- WHERE file_content_fts MATCH ?${filters.sql}
26580
- ORDER BY fts.rank
26581
- LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(50, limit * 3));
26582
26744
  const tokens = tokenize(query);
26583
26745
  const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
26746
+ const gramFilters = contentGramClauses(shortTokens);
26584
26747
  const scored = [];
26585
- for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
26586
- const row = rows[i];
26587
- const absPath = `${row.root_path}/${row.rel_path}`;
26588
- let content;
26589
- try {
26590
- content = readFileSync4(absPath, "utf-8");
26591
- } catch {
26592
- continue;
26593
- }
26594
- if (shortTokens.length > 0) {
26595
- const lower = content.toLowerCase();
26596
- if (!shortTokens.every((t) => lower.includes(t)))
26748
+ const pageSize = Math.max(50, limit * 3);
26749
+ for (let offset = 0;scored.length < limit * 2 && offset < MAX_CONTENT_CANDIDATES; offset += pageSize) {
26750
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
26751
+ FROM file_content_fts fts
26752
+ JOIN files f ON f.id = fts.rowid
26753
+ JOIN index_roots r ON r.id = f.root_id
26754
+ WHERE file_content_fts MATCH ?${filters.sql}${gramFilters.sql}
26755
+ ORDER BY fts.rank
26756
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, ...gramFilters.params, pageSize, offset);
26757
+ if (rows.length === 0)
26758
+ break;
26759
+ for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
26760
+ const row = rows[i];
26761
+ const absPath = `${row.root_path}/${row.rel_path}`;
26762
+ let content;
26763
+ try {
26764
+ content = readFileSync4(absPath, "utf-8");
26765
+ } catch {
26766
+ continue;
26767
+ }
26768
+ if (shortTokens.length > 0) {
26769
+ const lower = content.toLowerCase();
26770
+ if (!shortTokens.every((t) => lower.includes(t)))
26771
+ continue;
26772
+ }
26773
+ const { matches, tier } = findLineMatches(content, query, tokens);
26774
+ if (matches.length === 0)
26597
26775
  continue;
26776
+ const rankIndex = offset + i;
26777
+ const base = Math.max(0.25, 0.55 - rankIndex * 0.04);
26778
+ const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
26779
+ const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
26780
+ scored.push({
26781
+ ...rowToHit(row, score),
26782
+ line: matches[0].line,
26783
+ lineText: matches[0].text,
26784
+ matches
26785
+ });
26598
26786
  }
26599
- const { matches, tier } = findLineMatches(content, query, tokens);
26600
- if (matches.length === 0)
26601
- continue;
26602
- const base = Math.max(0.25, 0.55 - i * 0.04);
26603
- const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
26604
- const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
26605
- scored.push({
26606
- ...rowToHit(row, score),
26607
- line: matches[0].line,
26608
- lineText: matches[0].text,
26609
- matches
26610
- });
26787
+ if (rows.length < pageSize)
26788
+ break;
26611
26789
  }
26612
26790
  return scored.sort((a, b) => b.score - a.score).slice(0, limit);
26613
26791
  }
@@ -26620,7 +26798,7 @@ class FilesProvider {
26620
26798
  return hasReadyRoot();
26621
26799
  }
26622
26800
  async search(query, options) {
26623
- autoRefreshStaleRoots();
26801
+ scheduleAutoRefreshStaleRoots();
26624
26802
  const hits = searchFilePaths(query, { limit: options?.limit ?? 10 });
26625
26803
  return hits.map((hit) => ({
26626
26804
  title: hit.name,
@@ -26648,7 +26826,7 @@ class ContentProvider {
26648
26826
  return hasReadyRoot();
26649
26827
  }
26650
26828
  async search(query, options) {
26651
- autoRefreshStaleRoots();
26829
+ scheduleAutoRefreshStaleRoots();
26652
26830
  const hits = searchFileContent(query, { limit: options?.limit ?? 10 });
26653
26831
  return hits.map((hit) => ({
26654
26832
  title: hit.name,
@@ -26775,7 +26953,7 @@ function rowToSearch(row) {
26775
26953
  }
26776
26954
  function createSearch(data, db) {
26777
26955
  const d = db ?? getDb();
26778
- const id = generateId();
26956
+ const id = data.id ?? generateId();
26779
26957
  const now = new Date().toISOString();
26780
26958
  d.prepare(`INSERT INTO searches (id, query, providers, profile_id, result_count, duration, created_at)
26781
26959
  VALUES (?, ?, ?, ?, ?, ?, ?)`).run(id, data.query, JSON.stringify(data.providers), data.profileId ?? null, data.resultCount ?? 0, data.duration ?? 0, now);
@@ -26857,7 +27035,7 @@ function createResults(results, db) {
26857
27035
  d.exec("BEGIN");
26858
27036
  try {
26859
27037
  for (const data of results) {
26860
- const id = generateId();
27038
+ const id = data.id ?? generateId();
26861
27039
  stmt.run(id, data.searchId, data.title, data.url, data.snippet, data.source, data.provider, data.rank, data.score ?? null, data.publishedAt ?? null, data.thumbnail ?? null, JSON.stringify(data.metadata ?? {}), now);
26862
27040
  created.push({
26863
27041
  id,
@@ -27014,13 +27192,300 @@ function isProviderConfigured(provider) {
27014
27192
  return !!Bun.env[provider.apiKeyEnv];
27015
27193
  }
27016
27194
 
27195
+ // src/lib/router.ts
27196
+ var PROVIDER_DESCRIPTIONS = {
27197
+ files: "Local file names and paths. Best for known filenames, path fragments, extensions, and repo navigation.",
27198
+ content: "Local indexed file contents. Best for code symbols, exact phrases, docs, snippets, and grep-style discovery.",
27199
+ google: "General web search through SerpAPI. Best for broad web coverage and current public pages.",
27200
+ serpapi: "SerpAPI multi-engine web search. Best for general web queries when Google-style results are desired.",
27201
+ exa: "Neural/semantic web search. Best for research, conceptual queries, docs, and high-relevance pages.",
27202
+ perplexity: "Answer-oriented web research with citations. Best for synthesized factual questions and research summaries.",
27203
+ brave: "General independent web search. Best for current web, news-like, product, and navigational queries.",
27204
+ bing: "General web search. Best for current web and Microsoft/Bing-indexed pages.",
27205
+ twitter: "X/Twitter search. Best for tweets, social reactions, breaking discourse, and people posting updates.",
27206
+ reddit: "Reddit search. Best for opinions, product experiences, troubleshooting threads, and community recommendations.",
27207
+ youtube: "YouTube search. Best for videos, tutorials, talks, demos, and channels.",
27208
+ hackernews: "Hacker News search. Best for startup, programming, launch, and technical discussion threads.",
27209
+ github: "GitHub code and repository search. Best for open-source repos, code examples, packages, and implementation details.",
27210
+ arxiv: "arXiv academic search. Best for papers, preprints, ML/AI/math/physics research, and scholarly topics."
27211
+ };
27212
+ function clampMaxProviders(value) {
27213
+ if (value === undefined || !Number.isFinite(value))
27214
+ return 3;
27215
+ return Math.max(1, Math.min(5, Math.floor(value)));
27216
+ }
27217
+ function clampConfidence(value) {
27218
+ return typeof value === "number" && Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.5;
27219
+ }
27220
+ function normalizeCandidates(candidates) {
27221
+ const allowed = new Set(PROVIDER_NAMES);
27222
+ const seen = new Set;
27223
+ const normalized = [];
27224
+ for (const candidate of candidates) {
27225
+ if (!allowed.has(candidate) || seen.has(candidate))
27226
+ continue;
27227
+ seen.add(candidate);
27228
+ normalized.push(candidate);
27229
+ }
27230
+ return normalized;
27231
+ }
27232
+ function addScore(scores, candidateSet, provider, amount) {
27233
+ if (!candidateSet.has(provider))
27234
+ return;
27235
+ scores.set(provider, (scores.get(provider) ?? 0) + amount);
27236
+ }
27237
+ function hasAny(query, patterns) {
27238
+ return patterns.some((pattern) => pattern.test(query));
27239
+ }
27240
+ function routeSearchProvidersHeuristic(query, candidates, options = {}) {
27241
+ const normalized = normalizeCandidates(candidates);
27242
+ const maxProviders = clampMaxProviders(options.maxProviders);
27243
+ if (normalized.length === 0) {
27244
+ return {
27245
+ strategy: "heuristic",
27246
+ selectedProviders: [],
27247
+ candidates: [],
27248
+ reason: "No configured providers were available to route.",
27249
+ confidence: 0
27250
+ };
27251
+ }
27252
+ const candidateSet = new Set(normalized);
27253
+ const scores = new Map;
27254
+ const reasons = [];
27255
+ const q = query.trim().toLowerCase();
27256
+ for (const candidate of normalized)
27257
+ scores.set(candidate, 0.05);
27258
+ if (hasAny(q, [
27259
+ /\b(file|filename|path|folder|directory|repo|workspace)\b/,
27260
+ /(^|[/\s])[\w.-]+\.(ts|tsx|js|jsx|py|rs|go|md|json|yaml|yml|css|html)\b/
27261
+ ])) {
27262
+ addScore(scores, candidateSet, "files", 5);
27263
+ addScore(scores, candidateSet, "content", 3);
27264
+ reasons.push("query looks local-file oriented");
27265
+ }
27266
+ if (hasAny(q, [
27267
+ /\b(function|class|interface|type|const|import|export|error|stack|symbol|grep|regex)\b/,
27268
+ /[A-Za-z_$][\w$]*\([^)]*\)/,
27269
+ /[A-Za-z_$][\w$]*::[A-Za-z_$]/
27270
+ ])) {
27271
+ addScore(scores, candidateSet, "content", 5);
27272
+ addScore(scores, candidateSet, "files", 2);
27273
+ addScore(scores, candidateSet, "github", 1.5);
27274
+ reasons.push("query contains code/content lookup signals");
27275
+ }
27276
+ if (hasAny(q, [/\b(paper|papers|arxiv|preprint|doi|citation|survey|benchmark|research)\b/])) {
27277
+ addScore(scores, candidateSet, "arxiv", 5);
27278
+ addScore(scores, candidateSet, "exa", 3);
27279
+ addScore(scores, candidateSet, "perplexity", 2);
27280
+ reasons.push("query asks for scholarly or research material");
27281
+ }
27282
+ if (hasAny(q, [/\b(github|repo|repository|source code|open source|package|library|sdk|api example)\b/])) {
27283
+ addScore(scores, candidateSet, "github", 5);
27284
+ addScore(scores, candidateSet, "exa", 2);
27285
+ reasons.push("query asks for code or repository material");
27286
+ }
27287
+ if (hasAny(q, [/\b(video|youtube|tutorial|demo|talk|lecture|channel)\b/])) {
27288
+ addScore(scores, candidateSet, "youtube", 5);
27289
+ reasons.push("query asks for video material");
27290
+ }
27291
+ if (hasAny(q, [/\b(reddit|subreddit|opinion|experience|reviews?|worth it|recommendations?)\b/])) {
27292
+ addScore(scores, candidateSet, "reddit", 5);
27293
+ addScore(scores, candidateSet, "hackernews", 1.5);
27294
+ reasons.push("query asks for community discussion");
27295
+ }
27296
+ if (hasAny(q, [/\b(hacker news|hn|show hn|launch|startup)\b/])) {
27297
+ addScore(scores, candidateSet, "hackernews", 5);
27298
+ reasons.push("query asks for Hacker News style discussion");
27299
+ }
27300
+ if (hasAny(q, [/\b(twitter|tweet|tweets|x\.com|social reaction|trending)\b/])) {
27301
+ addScore(scores, candidateSet, "twitter", 5);
27302
+ reasons.push("query asks for social posts");
27303
+ }
27304
+ if (hasAny(q, [/\b(latest|today|yesterday|news|current|2025|2026|price|release|launched)\b/])) {
27305
+ addScore(scores, candidateSet, "brave", 3);
27306
+ addScore(scores, candidateSet, "bing", 2.5);
27307
+ addScore(scores, candidateSet, "google", 2.5);
27308
+ addScore(scores, candidateSet, "serpapi", 2);
27309
+ reasons.push("query appears time-sensitive");
27310
+ }
27311
+ if (reasons.length === 0) {
27312
+ addScore(scores, candidateSet, "exa", 2.5);
27313
+ addScore(scores, candidateSet, "perplexity", 2);
27314
+ addScore(scores, candidateSet, "brave", 1.5);
27315
+ addScore(scores, candidateSet, "google", 1.5);
27316
+ addScore(scores, candidateSet, "hackernews", 0.75);
27317
+ reasons.push("general query fallback");
27318
+ }
27319
+ const selectedProviders = [...scores.entries()].sort((a, b) => b[1] - a[1] || normalized.indexOf(a[0]) - normalized.indexOf(b[0])).slice(0, Math.min(maxProviders, normalized.length)).map(([provider]) => provider);
27320
+ const topScore = scores.get(selectedProviders[0]) ?? 0;
27321
+ const confidence = Math.max(0.35, Math.min(0.9, topScore / 6));
27322
+ return {
27323
+ strategy: "heuristic",
27324
+ selectedProviders,
27325
+ candidates: normalized,
27326
+ reason: reasons.join("; "),
27327
+ confidence
27328
+ };
27329
+ }
27330
+ function routerSchema(candidates, maxProviders) {
27331
+ return {
27332
+ type: "object",
27333
+ properties: {
27334
+ selectedProviders: {
27335
+ type: "array",
27336
+ items: { type: "string", enum: candidates },
27337
+ minItems: 1,
27338
+ maxItems: maxProviders
27339
+ },
27340
+ reason: { type: "string" },
27341
+ confidence: { type: "number", minimum: 0, maximum: 1 }
27342
+ },
27343
+ required: ["selectedProviders", "reason", "confidence"],
27344
+ additionalProperties: false
27345
+ };
27346
+ }
27347
+ function parseCerebrasRouting(raw, candidates, maxProviders) {
27348
+ let parsed;
27349
+ try {
27350
+ parsed = JSON.parse(raw);
27351
+ } catch {
27352
+ return null;
27353
+ }
27354
+ if (!Array.isArray(parsed.selectedProviders))
27355
+ return null;
27356
+ const candidateSet = new Set(candidates);
27357
+ const selectedProviders = parsed.selectedProviders.filter((provider) => typeof provider === "string" && candidateSet.has(provider)).slice(0, maxProviders);
27358
+ if (selectedProviders.length === 0)
27359
+ return null;
27360
+ return {
27361
+ selectedProviders,
27362
+ reason: typeof parsed.reason === "string" ? parsed.reason : "Cerebras router selected providers.",
27363
+ confidence: clampConfidence(parsed.confidence)
27364
+ };
27365
+ }
27366
+ async function routeWithCerebras(query, candidates, options) {
27367
+ const apiKey = Bun.env.CEREBRAS_API_KEY;
27368
+ if (!apiKey) {
27369
+ return {
27370
+ ...routeSearchProvidersHeuristic(query, candidates, options),
27371
+ error: "CEREBRAS_API_KEY is not configured; used heuristic routing."
27372
+ };
27373
+ }
27374
+ const providerGuide = candidates.map((name) => ({
27375
+ name,
27376
+ description: PROVIDER_DESCRIPTIONS[name]
27377
+ }));
27378
+ const res = await fetch("https://api.cerebras.ai/v1/chat/completions", {
27379
+ method: "POST",
27380
+ signal: AbortSignal.timeout(options.timeoutMs),
27381
+ headers: {
27382
+ "Content-Type": "application/json",
27383
+ Authorization: `Bearer ${apiKey}`
27384
+ },
27385
+ body: JSON.stringify({
27386
+ model: options.model,
27387
+ temperature: 0,
27388
+ messages: [
27389
+ {
27390
+ role: "system",
27391
+ content: "You route a search query to the smallest useful set of available search providers. Select only listed providers. Prefer local providers for local files/code in the indexed workspace. Prefer scholarly, code, video, social, or web providers when the query clearly asks for those domains."
27392
+ },
27393
+ {
27394
+ role: "user",
27395
+ content: JSON.stringify({
27396
+ query,
27397
+ maxProviders: options.maxProviders,
27398
+ providers: providerGuide
27399
+ })
27400
+ }
27401
+ ],
27402
+ response_format: {
27403
+ type: "json_schema",
27404
+ json_schema: {
27405
+ name: "search_router",
27406
+ strict: true,
27407
+ schema: routerSchema(candidates, options.maxProviders)
27408
+ }
27409
+ }
27410
+ })
27411
+ });
27412
+ if (!res.ok) {
27413
+ throw new Error(`Cerebras router error: ${res.status} ${res.statusText}`);
27414
+ }
27415
+ const data = await res.json();
27416
+ const content = data.choices?.[0]?.message?.content;
27417
+ if (!content)
27418
+ throw new Error("Cerebras router returned no content");
27419
+ const parsed = parseCerebrasRouting(content, candidates, options.maxProviders);
27420
+ if (!parsed)
27421
+ throw new Error("Cerebras router returned invalid provider selection");
27422
+ return {
27423
+ strategy: "cerebras",
27424
+ candidates,
27425
+ ...parsed
27426
+ };
27427
+ }
27428
+ async function routeSearchProviders(query, candidates, options = {}) {
27429
+ const normalized = normalizeCandidates(candidates);
27430
+ const maxProviders = Math.min(clampMaxProviders(options.maxProviders), Math.max(1, normalized.length));
27431
+ const timeoutMs = options.timeoutMs && Number.isFinite(options.timeoutMs) ? Math.max(250, Math.floor(options.timeoutMs)) : 1200;
27432
+ const model = options.model ?? Bun.env.CEREBRAS_MODEL ?? "gpt-oss-120b";
27433
+ if (normalized.length === 0) {
27434
+ return routeSearchProvidersHeuristic(query, normalized, { maxProviders });
27435
+ }
27436
+ try {
27437
+ return await routeWithCerebras(query, normalized, { maxProviders, timeoutMs, model });
27438
+ } catch (err) {
27439
+ return {
27440
+ ...routeSearchProvidersHeuristic(query, normalized, { maxProviders }),
27441
+ error: err instanceof Error ? err.message : String(err)
27442
+ };
27443
+ }
27444
+ }
27445
+
27017
27446
  // src/lib/search.ts
27447
+ async function withTimeout(promise2, timeoutMs, label) {
27448
+ if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
27449
+ return promise2;
27450
+ let timer;
27451
+ try {
27452
+ return await Promise.race([
27453
+ promise2,
27454
+ new Promise((_resolve, reject) => {
27455
+ timer = setTimeout(() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)), timeoutMs);
27456
+ timer.unref?.();
27457
+ })
27458
+ ]);
27459
+ } finally {
27460
+ if (timer)
27461
+ clearTimeout(timer);
27462
+ }
27463
+ }
27464
+ async function allSettledLimited(items, concurrency, task) {
27465
+ const results = new Array(items.length);
27466
+ let next = 0;
27467
+ async function worker() {
27468
+ while (next < items.length) {
27469
+ const index = next++;
27470
+ const item = items[index];
27471
+ try {
27472
+ results[index] = { status: "fulfilled", value: await task(item) };
27473
+ } catch (reason) {
27474
+ results[index] = { status: "rejected", reason };
27475
+ }
27476
+ }
27477
+ }
27478
+ const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length);
27479
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
27480
+ return results;
27481
+ }
27018
27482
  async function unifiedSearch(query, opts = {}) {
27019
27483
  const config2 = getConfig();
27020
27484
  const startTime = Date.now();
27021
27485
  const db = opts.db;
27022
27486
  let providerNames = opts.providers ?? [];
27023
- if (opts.profile) {
27487
+ const smartProfile = opts.profile === "smart";
27488
+ if (opts.profile && !smartProfile) {
27024
27489
  const profile = getProfileByName(opts.profile, db);
27025
27490
  if (profile) {
27026
27491
  providerNames = profile.providers;
@@ -27036,7 +27501,7 @@ async function unifiedSearch(query, opts = {}) {
27036
27501
  }
27037
27502
  const errors4 = [];
27038
27503
  const explicitRequest = (opts.providers?.length ?? 0) > 0 || Boolean(opts.profile);
27039
- const activeProviders = providerNames.filter((name) => {
27504
+ let activeProviders = providerNames.filter((name) => {
27040
27505
  try {
27041
27506
  if (getProvider(name).isConfigured())
27042
27507
  return true;
@@ -27047,20 +27512,36 @@ async function unifiedSearch(query, opts = {}) {
27047
27512
  });
27048
27513
  }
27049
27514
  return false;
27050
- } catch {
27515
+ } catch (err) {
27516
+ if (explicitRequest) {
27517
+ errors4.push({
27518
+ provider: name,
27519
+ error: err instanceof Error ? err.message : "unknown provider"
27520
+ });
27521
+ }
27051
27522
  return false;
27052
27523
  }
27053
27524
  });
27525
+ const routingRequested = opts.smart === true || smartProfile || !explicitRequest && config2.router.enabled;
27526
+ let routing;
27527
+ if (routingRequested && activeProviders.length > 0) {
27528
+ routing = await routeSearchProviders(query, activeProviders, {
27529
+ maxProviders: config2.router.maxProviders,
27530
+ timeoutMs: config2.router.timeoutMs,
27531
+ model: config2.router.model
27532
+ });
27533
+ activeProviders = routing.selectedProviders;
27534
+ }
27054
27535
  const searchOptions = {
27055
27536
  limit: config2.defaultLimit,
27056
27537
  ...opts.options
27057
27538
  };
27058
- const results = await Promise.allSettled(activeProviders.map(async (name) => {
27539
+ const results = await allSettledLimited(activeProviders, config2.maxConcurrent, async (name) => {
27059
27540
  const provider = getProvider(name);
27060
- const rawResults = await provider.search(query, searchOptions);
27541
+ const rawResults = await withTimeout(provider.search(query, searchOptions), config2.providerTimeoutMs, provider.displayName);
27061
27542
  updateProviderLastUsed(name, db);
27062
27543
  return { name, results: rawResults };
27063
- }));
27544
+ });
27064
27545
  const allResults = [];
27065
27546
  const searchId = generateId();
27066
27547
  for (const result of results) {
@@ -27114,11 +27595,13 @@ async function unifiedSearch(query, opts = {}) {
27114
27595
  createdAt: new Date().toISOString()
27115
27596
  },
27116
27597
  results: finalResults,
27117
- errors: errors4
27598
+ errors: errors4,
27599
+ ...routing && { routing }
27118
27600
  };
27119
27601
  }
27120
27602
  const persistable = config2.recordLocalResults ? finalResults : finalResults.filter((r) => !LOCAL_PROVIDER_NAMES.has(r.source));
27121
27603
  const search = createSearch({
27604
+ id: searchId,
27122
27605
  query,
27123
27606
  providers: activeProviders,
27124
27607
  resultCount: persistable.length,
@@ -27127,6 +27610,7 @@ async function unifiedSearch(query, opts = {}) {
27127
27610
  if (persistable.length > 0) {
27128
27611
  createResults(persistable.map((r) => ({
27129
27612
  searchId: search.id,
27613
+ id: r.id,
27130
27614
  title: r.title,
27131
27615
  url: r.url,
27132
27616
  snippet: r.snippet,
@@ -27143,7 +27627,8 @@ async function unifiedSearch(query, opts = {}) {
27143
27627
  return {
27144
27628
  search: { ...search, resultCount: finalResults.length, duration: duration3 },
27145
27629
  results: finalResults,
27146
- errors: errors4
27630
+ errors: errors4,
27631
+ ...routing && { routing }
27147
27632
  };
27148
27633
  }
27149
27634
  async function searchSingleProvider(provider, query, options, db) {
@@ -27377,7 +27862,7 @@ function findLocal(query, opts = {}, db) {
27377
27862
  return { query, kind, indexed: false, roots: roots.length, total: 0, results: [] };
27378
27863
  }
27379
27864
  if (opts.refresh !== false)
27380
- autoRefreshStaleRoots(db);
27865
+ scheduleAutoRefreshStaleRoots(db);
27381
27866
  const queryOpts = {
27382
27867
  root: opts.root,
27383
27868
  ext: opts.ext,
@@ -27988,13 +28473,15 @@ function buildServer() {
27988
28473
  providers: exports_external.array(SearchProviderNameSchema).optional().describe("Providers to search (default: all enabled)"),
27989
28474
  profile: exports_external.string().optional().describe("Search profile name (e.g. research, social, code)"),
27990
28475
  limit: exports_external.number().int().min(1).max(100).optional().describe("Max results per provider"),
27991
- dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)")
27992
- }, async ({ query, providers, profile, limit, dedup }) => {
28476
+ dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)"),
28477
+ smart: exports_external.boolean().optional().describe("Route to the best configured providers before searching")
28478
+ }, async ({ query, providers, profile, limit, dedup, smart }) => {
27993
28479
  const response = await unifiedSearch(query, {
27994
28480
  providers,
27995
28481
  profile,
27996
28482
  options: limit ? { limit } : undefined,
27997
- dedup
28483
+ dedup,
28484
+ smart
27998
28485
  });
27999
28486
  return {
28000
28487
  content: [
@@ -28013,7 +28500,8 @@ function buildServer() {
28013
28500
  source: r.source,
28014
28501
  score: r.score
28015
28502
  })),
28016
- errors: response.errors
28503
+ errors: response.errors,
28504
+ routing: response.routing
28017
28505
  }, null, 2)
28018
28506
  }
28019
28507
  ]
@@ -28268,12 +28756,14 @@ function buildServer() {
28268
28756
  default_limit: exports_external.number().int().optional(),
28269
28757
  dedup: exports_external.boolean().optional(),
28270
28758
  max_concurrent: exports_external.number().int().optional(),
28759
+ provider_timeout_ms: exports_external.number().int().optional(),
28271
28760
  default_profile: exports_external.string().nullable().optional()
28272
28761
  }, async (updates) => {
28273
28762
  const config2 = setConfig({
28274
28763
  ...updates.default_limit !== undefined && { defaultLimit: updates.default_limit },
28275
28764
  ...updates.dedup !== undefined && { dedup: updates.dedup },
28276
28765
  ...updates.max_concurrent !== undefined && { maxConcurrent: updates.max_concurrent },
28766
+ ...updates.provider_timeout_ms !== undefined && { providerTimeoutMs: updates.provider_timeout_ms },
28277
28767
  ...updates.default_profile !== undefined && { defaultProfile: updates.default_profile }
28278
28768
  });
28279
28769
  return {
@@ -29537,6 +30027,7 @@ if (handleCliFlags(argv)) {
29537
30027
  process.exit(0);
29538
30028
  }
29539
30029
  async function main() {
30030
+ startBackgroundRefresh();
29540
30031
  if (isHttpMode(argv)) {
29541
30032
  startMcpHttpServer({ port: resolveMcpHttpPort(argv) });
29542
30033
  return;