@hasna/search 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11644,7 +11644,7 @@ var require_lib2 = __commonJS((exports, module) => {
11644
11644
  var require_package = __commonJS((exports, module) => {
11645
11645
  module.exports = {
11646
11646
  name: "@hasna/search",
11647
- version: "0.0.10",
11647
+ version: "0.0.11",
11648
11648
  description: "Unified search \u2014 local file index (find files by name/path/content/regex in ms, trigram FTS) + 12 web providers (Google, SerpAPI, Exa, Perplexity, Twitter, Reddit, YouTube, Brave, Bing, Hacker News, GitHub, arXiv) + YouTube transcription. CLI + MCP + REST API + Dashboard.",
11649
11649
  type: "module",
11650
11650
  main: "dist/index.js",
@@ -15756,12 +15756,19 @@ var DEFAULT_CONFIG = {
15756
15756
  defaultLimit: 10,
15757
15757
  defaultProviders: [],
15758
15758
  defaultProfile: null,
15759
+ router: {
15760
+ enabled: false,
15761
+ model: "gpt-oss-120b",
15762
+ maxProviders: 3,
15763
+ timeoutMs: 1200
15764
+ },
15759
15765
  transcriber: {
15760
15766
  baseUrl: "http://localhost:19600",
15761
15767
  fallbackCli: "microservice-transcriber"
15762
15768
  },
15763
15769
  dedup: true,
15764
15770
  maxConcurrent: 5,
15771
+ providerTimeoutMs: 15000,
15765
15772
  indexStaleMinutes: 5,
15766
15773
  indexAutoRefresh: true,
15767
15774
  recordLocalResults: false
@@ -16496,6 +16503,31 @@ var migrations = [
16496
16503
  );
16497
16504
  `);
16498
16505
  }
16506
+ },
16507
+ {
16508
+ version: 2,
16509
+ description: "Local file index filter indexes",
16510
+ up: (db) => {
16511
+ db.exec(`
16512
+ CREATE INDEX IF NOT EXISTS idx_files_root_ext ON files(root_id, ext);
16513
+ CREATE INDEX IF NOT EXISTS idx_files_root_dir ON files(root_id, dir);
16514
+ `);
16515
+ }
16516
+ },
16517
+ {
16518
+ version: 3,
16519
+ description: "Local content short-token filter grams",
16520
+ up: (db) => {
16521
+ db.exec(`
16522
+ CREATE TABLE IF NOT EXISTS file_content_grams (
16523
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
16524
+ gram TEXT NOT NULL,
16525
+ PRIMARY KEY (file_id, gram)
16526
+ );
16527
+ CREATE INDEX IF NOT EXISTS idx_file_content_grams_gram_file
16528
+ ON file_content_grams(gram, file_id);
16529
+ `);
16530
+ }
16499
16531
  }
16500
16532
  ];
16501
16533
  function runIndexMigrations(db) {
@@ -16567,7 +16599,18 @@ function getConfig() {
16567
16599
  try {
16568
16600
  const raw = readFileSync(path, "utf-8");
16569
16601
  const parsed = JSON.parse(raw);
16570
- return { ...DEFAULT_CONFIG, ...parsed };
16602
+ return {
16603
+ ...DEFAULT_CONFIG,
16604
+ ...parsed,
16605
+ router: {
16606
+ ...DEFAULT_CONFIG.router,
16607
+ ...parsed.router ?? {}
16608
+ },
16609
+ transcriber: {
16610
+ ...DEFAULT_CONFIG.transcriber,
16611
+ ...parsed.transcriber ?? {}
16612
+ }
16613
+ };
16571
16614
  } catch {
16572
16615
  return { ...DEFAULT_CONFIG };
16573
16616
  }
@@ -16985,6 +17028,7 @@ function removeRoot(idOrPath, db) {
16985
17028
  d.exec("BEGIN");
16986
17029
  try {
16987
17030
  d.prepare("DELETE FROM file_content_fts WHERE rowid IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
17031
+ d.prepare("DELETE FROM file_content_grams WHERE file_id IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
16988
17032
  d.prepare("DELETE FROM index_roots WHERE id = ?").run(root.id);
16989
17033
  d.exec("COMMIT");
16990
17034
  } catch (err) {
@@ -16996,6 +17040,21 @@ function removeRoot(idOrPath, db) {
16996
17040
  function shouldIndexContent(root, file) {
16997
17041
  return root.contentIndexing && file.size > 0 && file.size <= root.maxFileSize && !hasBinaryExtension(file.ext) && !isContentExcluded(file.name);
16998
17042
  }
17043
+ function contentShortGrams(body) {
17044
+ const grams = new Set;
17045
+ const words = body.toLowerCase().matchAll(/[a-z0-9_$]+/g);
17046
+ for (const match of words) {
17047
+ const word = match[0];
17048
+ for (let i = 0;i < word.length; i++) {
17049
+ grams.add(word[i]);
17050
+ if (i + 1 < word.length)
17051
+ grams.add(word.slice(i, i + 2));
17052
+ }
17053
+ if (grams.size >= 2048)
17054
+ break;
17055
+ }
17056
+ return [...grams];
17057
+ }
16999
17058
  function indexRoot(idOrPath, opts = {}, db) {
17000
17059
  const d = db ?? getIndexDb();
17001
17060
  const root = getRoot(idOrPath, d);
@@ -17014,6 +17073,8 @@ function indexRoot(idOrPath, opts = {}, db) {
17014
17073
  const deleteFile = d.prepare("DELETE FROM files WHERE id = ?");
17015
17074
  const insertContent = d.prepare("INSERT INTO file_content_fts (rowid, body) VALUES (?, ?)");
17016
17075
  const deleteContent = d.prepare("DELETE FROM file_content_fts WHERE rowid = ?");
17076
+ const insertContentGram = d.prepare("INSERT OR IGNORE INTO file_content_grams (file_id, gram) VALUES (?, ?)");
17077
+ const deleteContentGrams = d.prepare("DELETE FROM file_content_grams WHERE file_id = ?");
17017
17078
  const stats = {
17018
17079
  rootId: root.id,
17019
17080
  added: 0,
@@ -17024,38 +17085,57 @@ function indexRoot(idOrPath, opts = {}, db) {
17024
17085
  skippedDirs: skippedDirs.length,
17025
17086
  durationMs: 0
17026
17087
  };
17088
+ const seen = new Set;
17089
+ const changes = [];
17090
+ for (const file of scanned) {
17091
+ seen.add(file.relPath);
17092
+ const prev = existing.get(file.relPath);
17093
+ const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
17094
+ if (prev && !changed && !opts.force)
17095
+ continue;
17096
+ const wantContent = shouldIndexContent(root, file);
17097
+ const absPath = `${root.path}/${file.relPath}`;
17098
+ let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
17099
+ let body = null;
17100
+ if (wantContent && !isBinary) {
17101
+ try {
17102
+ body = readFileSync3(absPath, "utf-8");
17103
+ } catch {
17104
+ isBinary = true;
17105
+ }
17106
+ }
17107
+ changes.push({
17108
+ file,
17109
+ prev,
17110
+ isBinary,
17111
+ body,
17112
+ grams: body !== null ? contentShortGrams(body) : [],
17113
+ contentIndexed: body !== null ? 1 : 0
17114
+ });
17115
+ }
17027
17116
  d.exec("BEGIN");
17028
17117
  try {
17029
- const seen = new Set;
17030
- for (const file of scanned) {
17031
- seen.add(file.relPath);
17032
- const prev = existing.get(file.relPath);
17033
- const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
17034
- if (prev && !changed && !opts.force)
17035
- continue;
17036
- const wantContent = shouldIndexContent(root, file);
17037
- const absPath = `${root.path}/${file.relPath}`;
17038
- let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
17039
- let body = null;
17040
- if (wantContent && !isBinary) {
17041
- try {
17042
- body = readFileSync3(absPath, "utf-8");
17043
- } catch {
17044
- isBinary = true;
17045
- }
17046
- }
17047
- const contentIndexed = body !== null ? 1 : 0;
17118
+ for (const { file, prev, isBinary, body, grams, contentIndexed } of changes) {
17048
17119
  if (prev) {
17049
- if (prev.content_indexed)
17120
+ if (prev.content_indexed) {
17050
17121
  deleteContent.run(prev.id);
17122
+ deleteContentGrams.run(prev.id);
17123
+ }
17051
17124
  updateFile.run(file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now, prev.id);
17052
- if (body !== null)
17125
+ if (body !== null) {
17053
17126
  insertContent.run(prev.id, body);
17127
+ for (const gram of grams)
17128
+ insertContentGram.run(prev.id, gram);
17129
+ }
17054
17130
  stats.updated++;
17055
17131
  } else {
17056
17132
  const inserted = insertFile.run(root.id, file.relPath, file.name, file.ext, file.dir, file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now);
17057
- if (body !== null)
17058
- insertContent.run(Number(inserted.lastInsertRowid), body);
17133
+ if (body !== null) {
17134
+ const fileId = Number(inserted.lastInsertRowid);
17135
+ insertContent.run(fileId, body);
17136
+ for (const gram of grams)
17137
+ insertContentGram.run(fileId, gram);
17138
+ }
17059
17139
  stats.added++;
17060
17140
  }
17061
17141
  if (contentIndexed)
@@ -17064,8 +17144,10 @@ function indexRoot(idOrPath, opts = {}, db) {
17064
17144
  for (const [relPath, row] of existing) {
17065
17145
  if (seen.has(relPath))
17066
17146
  continue;
17067
- if (row.content_indexed)
17147
+ if (row.content_indexed) {
17068
17148
  deleteContent.run(row.id);
17149
+ deleteContentGrams.run(row.id);
17150
+ }
17069
17151
  deleteFile.run(row.id);
17070
17152
  stats.deleted++;
17071
17153
  }
@@ -17087,6 +17169,9 @@ function indexAllRoots(opts = {}, db) {
17087
17169
  return listRoots(db).map((root) => indexRoot(root.id, opts, db));
17088
17170
  }
17089
17171
  var refreshing = new Set;
17172
+ var lastDefaultAutoRefreshCheckAt = 0;
17173
+ var AUTO_REFRESH_CHECK_THROTTLE_MS = 1000;
17174
+ var defaultRefreshScheduled = false;
17090
17175
  function refreshStaleRoots(staleMinutes, db) {
17091
17176
  const cutoff = Date.now() - staleMinutes * 60000;
17092
17177
  const stats = [];
@@ -17110,8 +17195,31 @@ function autoRefreshStaleRoots(db) {
17110
17195
  const config = getConfig();
17111
17196
  if (!config.indexAutoRefresh)
17112
17197
  return [];
17198
+ if (!db) {
17199
+ const now = Date.now();
17200
+ if (now - lastDefaultAutoRefreshCheckAt < AUTO_REFRESH_CHECK_THROTTLE_MS)
17201
+ return [];
17202
+ lastDefaultAutoRefreshCheckAt = now;
17203
+ }
17113
17204
  return refreshStaleRoots(config.indexStaleMinutes, db);
17114
17205
  }
17206
+ function scheduleAutoRefreshStaleRoots(db) {
17207
+ if (db)
17208
+ return autoRefreshStaleRoots(db);
17209
+ const config = getConfig();
17210
+ if (!config.indexAutoRefresh || defaultRefreshScheduled)
17211
+ return [];
17212
+ defaultRefreshScheduled = true;
17213
+ const timer = setTimeout(() => {
17214
+ try {
17215
+ autoRefreshStaleRoots();
17216
+ } catch {} finally {
17217
+ defaultRefreshScheduled = false;
17218
+ }
17219
+ }, 0);
17220
+ timer.unref?.();
17221
+ return [];
17222
+ }
17115
17223
  function startBackgroundRefresh() {
17116
17224
  const minutes = Math.max(1, getConfig().indexStaleMinutes);
17117
17225
  const timer = setInterval(() => {
@@ -17350,6 +17458,9 @@ function compileSearchRegex(pattern, caseSensitive = false) {
17350
17458
  // src/lib/local/query.ts
17351
17459
  var MAX_LINE_LENGTH = 200;
17352
17460
  var MAX_MATCHES_PER_FILE = 5;
17461
+ var MAX_PATH_CANDIDATES = 20000;
17462
+ var MAX_CONTENT_CANDIDATES = 50000;
17463
+ var MAX_REGEX_CANDIDATES = 50000;
17353
17464
  function tokenize(query) {
17354
17465
  return query.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "").split(/\s+/).filter(Boolean);
17355
17466
  }
@@ -17380,11 +17491,40 @@ function filterClauses(opts, db) {
17380
17491
  }
17381
17492
  if (opts.dir) {
17382
17493
  clauses.push("f.dir LIKE ? ESCAPE '\\'");
17383
- const dir = opts.dir.replace(/^\/|\/$/g, "").replace(/[\\%_]/g, "\\$&");
17494
+ const dir = escapeLike(opts.dir.replace(/^\/|\/$/g, ""));
17384
17495
  params.push(`%${dir}%`);
17385
17496
  }
17386
17497
  return { sql: clauses.length > 0 ? ` AND ${clauses.join(" AND ")}` : "", params };
17387
17498
  }
17499
+ function escapeLike(value) {
17500
+ return value.replace(/[\\%_]/g, "\\$&");
17501
+ }
17502
+ function shortTokenClauses(tokens) {
17503
+ if (tokens.length === 0)
17504
+ return { sql: "", params: [] };
17505
+ return {
17506
+ sql: ` AND ${tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ")}`,
17507
+ params: tokens.map((token) => `%${escapeLike(token)}%`)
17508
+ };
17509
+ }
17510
+ function contentGramClauses(tokens) {
17511
+ const gramTokens = tokens.filter((token) => /^[a-z0-9_$]{1,2}$/.test(token));
17512
+ if (gramTokens.length === 0)
17513
+ return { sql: "", params: [] };
17514
+ return {
17515
+ sql: gramTokens.map((_token, index) => ` AND (
17516
+ NOT EXISTS (
17517
+ SELECT 1 FROM file_content_grams cg_any_${index}
17518
+ WHERE cg_any_${index}.file_id = f.id
17519
+ )
17520
+ OR EXISTS (
17521
+ SELECT 1 FROM file_content_grams cg_${index}
17522
+ WHERE cg_${index}.file_id = f.id AND cg_${index}.gram = ?
17523
+ )
17524
+ )`).join(""),
17525
+ params: gramTokens
17526
+ };
17527
+ }
17388
17528
  function rowToHit(row, score) {
17389
17529
  return {
17390
17530
  rootId: row.root_id,
@@ -17454,6 +17594,8 @@ function searchFilePaths(query, opts = {}, db) {
17454
17594
  return [];
17455
17595
  const ftsQuery = buildFtsQuery(query);
17456
17596
  const filters = filterClauses(opts, d);
17597
+ const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
17598
+ const shortFilters = shortTokenClauses(shortTokens);
17457
17599
  const candidateLimit = Math.max(200, limit * 10);
17458
17600
  let rows;
17459
17601
  if (ftsQuery) {
@@ -17461,16 +17603,16 @@ function searchFilePaths(query, opts = {}, db) {
17461
17603
  FROM files_fts fts
17462
17604
  JOIN files f ON f.id = fts.rowid
17463
17605
  JOIN index_roots r ON r.id = f.root_id
17464
- WHERE files_fts MATCH ?${filters.sql}
17606
+ WHERE files_fts MATCH ?${filters.sql}${shortFilters.sql}
17465
17607
  ORDER BY bm25(files_fts, 10.0, 1.0)
17466
- LIMIT ?`).all(ftsQuery, ...filters.params, candidateLimit);
17467
- const namePattern = `${query.trim().replace(/[\\%_]/g, "\\$&")}%`;
17608
+ LIMIT ?`).all(ftsQuery, ...filters.params, ...shortFilters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
17609
+ const namePattern = `${escapeLike(query.trim())}%`;
17468
17610
  const nameRows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17469
17611
  FROM files f
17470
17612
  JOIN index_roots r ON r.id = f.root_id
17471
- WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}
17613
+ WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}${shortFilters.sql}
17472
17614
  ORDER BY length(f.name)
17473
- LIMIT 100`).all(namePattern, ...filters.params);
17615
+ LIMIT 100`).all(namePattern, ...filters.params, ...shortFilters.params);
17474
17616
  const seen = new Set(rows.map((row) => row.id));
17475
17617
  for (const row of nameRows) {
17476
17618
  if (!seen.has(row.id))
@@ -17478,14 +17620,14 @@ function searchFilePaths(query, opts = {}, db) {
17478
17620
  }
17479
17621
  } else {
17480
17622
  const likeClauses = tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ");
17481
- const likeParams = tokens.map((t) => `%${t.replace(/[\\%_]/g, "\\$&")}%`);
17623
+ const likeParams = tokens.map((t) => `%${escapeLike(t)}%`);
17482
17624
  rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17483
17625
  FROM files f
17484
17626
  JOIN index_roots r ON r.id = f.root_id
17485
17627
  WHERE ${likeClauses}${filters.sql}
17486
- LIMIT ?`).all(...likeParams, ...filters.params, candidateLimit);
17628
+ ORDER BY length(f.name), length(f.rel_path), f.rel_path
17629
+ LIMIT ?`).all(...likeParams, ...filters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
17487
17630
  }
17488
- const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
17489
17631
  const filtered = shortTokens.length > 0 ? rows.filter((row) => shortTokens.every((t) => row.rel_path.toLowerCase().includes(t))) : rows;
17490
17632
  return filtered.map((row) => rowToHit(row, scoreFileName(query, tokens, row))).sort((a, b) => b.score - a.score).filter((hit) => existsSync2(hit.absPath)).slice(0, limit);
17491
17633
  }
@@ -17523,24 +17665,31 @@ function searchFilePathsRegex(pattern, opts = {}, db) {
17523
17665
  throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'handle.*Click', not '\\w+').");
17524
17666
  }
17525
17667
  const filters = filterClauses(opts, d);
17526
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17527
- FROM files_fts fts
17528
- JOIN files f ON f.id = fts.rowid
17529
- JOIN index_roots r ON r.id = f.root_id
17530
- WHERE files_fts MATCH ?${filters.sql}
17531
- ORDER BY fts.rank
17532
- LIMIT 5000`).all(ftsQuery, ...filters.params);
17533
17668
  const hits = [];
17534
- for (const row of rows) {
17535
- if (!regex.test(row.rel_path) && !regex.test(row.name))
17536
- continue;
17537
- const depth = row.rel_path.split("/").length - 1;
17538
- const score = Math.max(0.05, 0.6 - depth * 0.02);
17539
- const hit = rowToHit(row, score);
17540
- if (!existsSync2(hit.absPath))
17541
- continue;
17542
- hits.push(hit);
17543
- if (hits.length >= limit)
17669
+ const pageSize = Math.max(500, limit * 20);
17670
+ for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
17671
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17672
+ FROM files_fts fts
17673
+ JOIN files f ON f.id = fts.rowid
17674
+ JOIN index_roots r ON r.id = f.root_id
17675
+ WHERE files_fts MATCH ?${filters.sql}
17676
+ ORDER BY fts.rank
17677
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
17678
+ if (rows.length === 0)
17679
+ break;
17680
+ for (const row of rows) {
17681
+ if (!regex.test(row.rel_path) && !regex.test(row.name))
17682
+ continue;
17683
+ const depth = row.rel_path.split("/").length - 1;
17684
+ const score = Math.max(0.05, 0.6 - depth * 0.02);
17685
+ const hit = rowToHit(row, score);
17686
+ if (!existsSync2(hit.absPath))
17687
+ continue;
17688
+ hits.push(hit);
17689
+ if (hits.length >= limit)
17690
+ break;
17691
+ }
17692
+ if (rows.length < pageSize)
17544
17693
  break;
17545
17694
  }
17546
17695
  return hits;
@@ -17554,40 +17703,48 @@ function searchFileContentRegex(pattern, opts = {}, db) {
17554
17703
  throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'export.*function', not '\\d+').");
17555
17704
  }
17556
17705
  const filters = filterClauses(opts, d);
17557
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17558
- FROM file_content_fts fts
17559
- JOIN files f ON f.id = fts.rowid
17560
- JOIN index_roots r ON r.id = f.root_id
17561
- WHERE file_content_fts MATCH ?${filters.sql}
17562
- ORDER BY fts.rank
17563
- LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(200, limit * 10));
17564
17706
  const hits = [];
17565
- for (let i = 0;i < rows.length && hits.length < limit; i++) {
17566
- const row = rows[i];
17567
- const absPath = `${row.root_path}/${row.rel_path}`;
17568
- let content;
17569
- try {
17570
- content = readFileSync4(absPath, "utf-8");
17571
- } catch {
17572
- continue;
17573
- }
17574
- const lines = content.split(`
17707
+ const pageSize = Math.max(200, limit * 10);
17708
+ for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
17709
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17710
+ FROM file_content_fts fts
17711
+ JOIN files f ON f.id = fts.rowid
17712
+ JOIN index_roots r ON r.id = f.root_id
17713
+ WHERE file_content_fts MATCH ?${filters.sql}
17714
+ ORDER BY fts.rank
17715
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
17716
+ if (rows.length === 0)
17717
+ break;
17718
+ for (let i = 0;i < rows.length && hits.length < limit; i++) {
17719
+ const row = rows[i];
17720
+ const absPath = `${row.root_path}/${row.rel_path}`;
17721
+ let content;
17722
+ try {
17723
+ content = readFileSync4(absPath, "utf-8");
17724
+ } catch {
17725
+ continue;
17726
+ }
17727
+ const lines = content.split(`
17575
17728
  `);
17576
- const matches = [];
17577
- for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
17578
- if (regex.test(lines[n])) {
17579
- matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
17729
+ const matches = [];
17730
+ for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
17731
+ if (regex.test(lines[n])) {
17732
+ matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
17733
+ }
17580
17734
  }
17735
+ if (matches.length === 0)
17736
+ continue;
17737
+ const rankIndex = offset + i;
17738
+ const score = Math.max(0.25, 0.65 - rankIndex * 0.05);
17739
+ hits.push({
17740
+ ...rowToHit(row, score),
17741
+ line: matches[0].line,
17742
+ lineText: matches[0].text,
17743
+ matches
17744
+ });
17581
17745
  }
17582
- if (matches.length === 0)
17583
- continue;
17584
- const score = Math.max(0.25, 0.65 - i * 0.05);
17585
- hits.push({
17586
- ...rowToHit(row, score),
17587
- line: matches[0].line,
17588
- lineText: matches[0].text,
17589
- matches
17590
- });
17746
+ if (rows.length < pageSize)
17747
+ break;
17591
17748
  }
17592
17749
  return hits;
17593
17750
  }
@@ -17598,42 +17755,51 @@ function searchFileContent(query, opts = {}, db) {
17598
17755
  if (!ftsQuery)
17599
17756
  return [];
17600
17757
  const filters = filterClauses(opts, d);
17601
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17602
- FROM file_content_fts fts
17603
- JOIN files f ON f.id = fts.rowid
17604
- JOIN index_roots r ON r.id = f.root_id
17605
- WHERE file_content_fts MATCH ?${filters.sql}
17606
- ORDER BY fts.rank
17607
- LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(50, limit * 3));
17608
17758
  const tokens = tokenize(query);
17609
17759
  const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
17760
+ const gramFilters = contentGramClauses(shortTokens);
17610
17761
  const scored = [];
17611
- for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
17612
- const row = rows[i];
17613
- const absPath = `${row.root_path}/${row.rel_path}`;
17614
- let content;
17615
- try {
17616
- content = readFileSync4(absPath, "utf-8");
17617
- } catch {
17618
- continue;
17619
- }
17620
- if (shortTokens.length > 0) {
17621
- const lower = content.toLowerCase();
17622
- if (!shortTokens.every((t) => lower.includes(t)))
17762
+ const pageSize = Math.max(50, limit * 3);
17763
+ for (let offset = 0;scored.length < limit * 2 && offset < MAX_CONTENT_CANDIDATES; offset += pageSize) {
17764
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17765
+ FROM file_content_fts fts
17766
+ JOIN files f ON f.id = fts.rowid
17767
+ JOIN index_roots r ON r.id = f.root_id
17768
+ WHERE file_content_fts MATCH ?${filters.sql}${gramFilters.sql}
17769
+ ORDER BY fts.rank
17770
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, ...gramFilters.params, pageSize, offset);
17771
+ if (rows.length === 0)
17772
+ break;
17773
+ for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
17774
+ const row = rows[i];
17775
+ const absPath = `${row.root_path}/${row.rel_path}`;
17776
+ let content;
17777
+ try {
17778
+ content = readFileSync4(absPath, "utf-8");
17779
+ } catch {
17623
17780
  continue;
17781
+ }
17782
+ if (shortTokens.length > 0) {
17783
+ const lower = content.toLowerCase();
17784
+ if (!shortTokens.every((t) => lower.includes(t)))
17785
+ continue;
17786
+ }
17787
+ const { matches, tier } = findLineMatches(content, query, tokens);
17788
+ if (matches.length === 0)
17789
+ continue;
17790
+ const rankIndex = offset + i;
17791
+ const base = Math.max(0.25, 0.55 - rankIndex * 0.04);
17792
+ const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
17793
+ const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
17794
+ scored.push({
17795
+ ...rowToHit(row, score),
17796
+ line: matches[0].line,
17797
+ lineText: matches[0].text,
17798
+ matches
17799
+ });
17624
17800
  }
17625
- const { matches, tier } = findLineMatches(content, query, tokens);
17626
- if (matches.length === 0)
17627
- continue;
17628
- const base = Math.max(0.25, 0.55 - i * 0.04);
17629
- const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
17630
- const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
17631
- scored.push({
17632
- ...rowToHit(row, score),
17633
- line: matches[0].line,
17634
- lineText: matches[0].text,
17635
- matches
17636
- });
17801
+ if (rows.length < pageSize)
17802
+ break;
17637
17803
  }
17638
17804
  return scored.sort((a, b) => b.score - a.score).slice(0, limit);
17639
17805
  }
@@ -17646,7 +17812,7 @@ class FilesProvider {
17646
17812
  return hasReadyRoot();
17647
17813
  }
17648
17814
  async search(query, options) {
17649
- autoRefreshStaleRoots();
17815
+ scheduleAutoRefreshStaleRoots();
17650
17816
  const hits = searchFilePaths(query, { limit: options?.limit ?? 10 });
17651
17817
  return hits.map((hit) => ({
17652
17818
  title: hit.name,
@@ -17674,7 +17840,7 @@ class ContentProvider {
17674
17840
  return hasReadyRoot();
17675
17841
  }
17676
17842
  async search(query, options) {
17677
- autoRefreshStaleRoots();
17843
+ scheduleAutoRefreshStaleRoots();
17678
17844
  const hits = searchFileContent(query, { limit: options?.limit ?? 10 });
17679
17845
  return hits.map((hit) => ({
17680
17846
  title: hit.name,
@@ -17801,7 +17967,7 @@ function rowToSearch(row) {
17801
17967
  }
17802
17968
  function createSearch(data, db) {
17803
17969
  const d = db ?? getDb();
17804
- const id = generateId();
17970
+ const id = data.id ?? generateId();
17805
17971
  const now = new Date().toISOString();
17806
17972
  d.prepare(`INSERT INTO searches (id, query, providers, profile_id, result_count, duration, created_at)
17807
17973
  VALUES (?, ?, ?, ?, ?, ?, ?)`).run(id, data.query, JSON.stringify(data.providers), data.profileId ?? null, data.resultCount ?? 0, data.duration ?? 0, now);
@@ -17883,7 +18049,7 @@ function createResults(results, db) {
17883
18049
  d.exec("BEGIN");
17884
18050
  try {
17885
18051
  for (const data of results) {
17886
- const id = generateId();
18052
+ const id = data.id ?? generateId();
17887
18053
  stmt.run(id, data.searchId, data.title, data.url, data.snippet, data.source, data.provider, data.rank, data.score ?? null, data.publishedAt ?? null, data.thumbnail ?? null, JSON.stringify(data.metadata ?? {}), now);
17888
18054
  created.push({
17889
18055
  id,
@@ -18040,13 +18206,300 @@ function isProviderConfigured(provider) {
18040
18206
  return !!Bun.env[provider.apiKeyEnv];
18041
18207
  }
18042
18208
 
18209
+ // src/lib/router.ts
18210
+ var PROVIDER_DESCRIPTIONS = {
18211
+ files: "Local file names and paths. Best for known filenames, path fragments, extensions, and repo navigation.",
18212
+ content: "Local indexed file contents. Best for code symbols, exact phrases, docs, snippets, and grep-style discovery.",
18213
+ google: "General web search through SerpAPI. Best for broad web coverage and current public pages.",
18214
+ serpapi: "SerpAPI multi-engine web search. Best for general web queries when Google-style results are desired.",
18215
+ exa: "Neural/semantic web search. Best for research, conceptual queries, docs, and high-relevance pages.",
18216
+ perplexity: "Answer-oriented web research with citations. Best for synthesized factual questions and research summaries.",
18217
+ brave: "General independent web search. Best for current web, news-like, product, and navigational queries.",
18218
+ bing: "General web search. Best for current web and Microsoft/Bing-indexed pages.",
18219
+ twitter: "X/Twitter search. Best for tweets, social reactions, breaking discourse, and people posting updates.",
18220
+ reddit: "Reddit search. Best for opinions, product experiences, troubleshooting threads, and community recommendations.",
18221
+ youtube: "YouTube search. Best for videos, tutorials, talks, demos, and channels.",
18222
+ hackernews: "Hacker News search. Best for startup, programming, launch, and technical discussion threads.",
18223
+ github: "GitHub code and repository search. Best for open-source repos, code examples, packages, and implementation details.",
18224
+ arxiv: "arXiv academic search. Best for papers, preprints, ML/AI/math/physics research, and scholarly topics."
18225
+ };
18226
+ function clampMaxProviders(value) {
18227
+ if (value === undefined || !Number.isFinite(value))
18228
+ return 3;
18229
+ return Math.max(1, Math.min(5, Math.floor(value)));
18230
+ }
18231
+ function clampConfidence(value) {
18232
+ return typeof value === "number" && Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.5;
18233
+ }
18234
+ function normalizeCandidates(candidates) {
18235
+ const allowed = new Set(PROVIDER_NAMES);
18236
+ const seen = new Set;
18237
+ const normalized = [];
18238
+ for (const candidate of candidates) {
18239
+ if (!allowed.has(candidate) || seen.has(candidate))
18240
+ continue;
18241
+ seen.add(candidate);
18242
+ normalized.push(candidate);
18243
+ }
18244
+ return normalized;
18245
+ }
18246
+ function addScore(scores, candidateSet, provider, amount) {
18247
+ if (!candidateSet.has(provider))
18248
+ return;
18249
+ scores.set(provider, (scores.get(provider) ?? 0) + amount);
18250
+ }
18251
+ function hasAny(query, patterns) {
18252
+ return patterns.some((pattern) => pattern.test(query));
18253
+ }
18254
+ function routeSearchProvidersHeuristic(query, candidates, options = {}) {
18255
+ const normalized = normalizeCandidates(candidates);
18256
+ const maxProviders = clampMaxProviders(options.maxProviders);
18257
+ if (normalized.length === 0) {
18258
+ return {
18259
+ strategy: "heuristic",
18260
+ selectedProviders: [],
18261
+ candidates: [],
18262
+ reason: "No configured providers were available to route.",
18263
+ confidence: 0
18264
+ };
18265
+ }
18266
+ const candidateSet = new Set(normalized);
18267
+ const scores = new Map;
18268
+ const reasons = [];
18269
+ const q = query.trim().toLowerCase();
18270
+ for (const candidate of normalized)
18271
+ scores.set(candidate, 0.05);
18272
+ if (hasAny(q, [
18273
+ /\b(file|filename|path|folder|directory|repo|workspace)\b/,
18274
+ /(^|[/\s])[\w.-]+\.(ts|tsx|js|jsx|py|rs|go|md|json|yaml|yml|css|html)\b/
18275
+ ])) {
18276
+ addScore(scores, candidateSet, "files", 5);
18277
+ addScore(scores, candidateSet, "content", 3);
18278
+ reasons.push("query looks local-file oriented");
18279
+ }
18280
+ if (hasAny(q, [
18281
+ /\b(function|class|interface|type|const|import|export|error|stack|symbol|grep|regex)\b/,
18282
+ /[A-Za-z_$][\w$]*\([^)]*\)/,
18283
+ /[A-Za-z_$][\w$]*::[A-Za-z_$]/
18284
+ ])) {
18285
+ addScore(scores, candidateSet, "content", 5);
18286
+ addScore(scores, candidateSet, "files", 2);
18287
+ addScore(scores, candidateSet, "github", 1.5);
18288
+ reasons.push("query contains code/content lookup signals");
18289
+ }
18290
+ if (hasAny(q, [/\b(paper|papers|arxiv|preprint|doi|citation|survey|benchmark|research)\b/])) {
18291
+ addScore(scores, candidateSet, "arxiv", 5);
18292
+ addScore(scores, candidateSet, "exa", 3);
18293
+ addScore(scores, candidateSet, "perplexity", 2);
18294
+ reasons.push("query asks for scholarly or research material");
18295
+ }
18296
+ if (hasAny(q, [/\b(github|repo|repository|source code|open source|package|library|sdk|api example)\b/])) {
18297
+ addScore(scores, candidateSet, "github", 5);
18298
+ addScore(scores, candidateSet, "exa", 2);
18299
+ reasons.push("query asks for code or repository material");
18300
+ }
18301
+ if (hasAny(q, [/\b(video|youtube|tutorial|demo|talk|lecture|channel)\b/])) {
18302
+ addScore(scores, candidateSet, "youtube", 5);
18303
+ reasons.push("query asks for video material");
18304
+ }
18305
+ if (hasAny(q, [/\b(reddit|subreddit|opinion|experience|reviews?|worth it|recommendations?)\b/])) {
18306
+ addScore(scores, candidateSet, "reddit", 5);
18307
+ addScore(scores, candidateSet, "hackernews", 1.5);
18308
+ reasons.push("query asks for community discussion");
18309
+ }
18310
+ if (hasAny(q, [/\b(hacker news|hn|show hn|launch|startup)\b/])) {
18311
+ addScore(scores, candidateSet, "hackernews", 5);
18312
+ reasons.push("query asks for Hacker News style discussion");
18313
+ }
18314
+ if (hasAny(q, [/\b(twitter|tweet|tweets|x\.com|social reaction|trending)\b/])) {
18315
+ addScore(scores, candidateSet, "twitter", 5);
18316
+ reasons.push("query asks for social posts");
18317
+ }
18318
+ if (hasAny(q, [/\b(latest|today|yesterday|news|current|2025|2026|price|release|launched)\b/])) {
18319
+ addScore(scores, candidateSet, "brave", 3);
18320
+ addScore(scores, candidateSet, "bing", 2.5);
18321
+ addScore(scores, candidateSet, "google", 2.5);
18322
+ addScore(scores, candidateSet, "serpapi", 2);
18323
+ reasons.push("query appears time-sensitive");
18324
+ }
18325
+ if (reasons.length === 0) {
18326
+ addScore(scores, candidateSet, "exa", 2.5);
18327
+ addScore(scores, candidateSet, "perplexity", 2);
18328
+ addScore(scores, candidateSet, "brave", 1.5);
18329
+ addScore(scores, candidateSet, "google", 1.5);
18330
+ addScore(scores, candidateSet, "hackernews", 0.75);
18331
+ reasons.push("general query fallback");
18332
+ }
18333
+ const selectedProviders = [...scores.entries()].sort((a, b) => b[1] - a[1] || normalized.indexOf(a[0]) - normalized.indexOf(b[0])).slice(0, Math.min(maxProviders, normalized.length)).map(([provider]) => provider);
18334
+ const topScore = scores.get(selectedProviders[0]) ?? 0;
18335
+ const confidence = Math.max(0.35, Math.min(0.9, topScore / 6));
18336
+ return {
18337
+ strategy: "heuristic",
18338
+ selectedProviders,
18339
+ candidates: normalized,
18340
+ reason: reasons.join("; "),
18341
+ confidence
18342
+ };
18343
+ }
18344
+ function routerSchema(candidates, maxProviders) {
18345
+ return {
18346
+ type: "object",
18347
+ properties: {
18348
+ selectedProviders: {
18349
+ type: "array",
18350
+ items: { type: "string", enum: candidates },
18351
+ minItems: 1,
18352
+ maxItems: maxProviders
18353
+ },
18354
+ reason: { type: "string" },
18355
+ confidence: { type: "number", minimum: 0, maximum: 1 }
18356
+ },
18357
+ required: ["selectedProviders", "reason", "confidence"],
18358
+ additionalProperties: false
18359
+ };
18360
+ }
18361
+ function parseCerebrasRouting(raw, candidates, maxProviders) {
18362
+ let parsed;
18363
+ try {
18364
+ parsed = JSON.parse(raw);
18365
+ } catch {
18366
+ return null;
18367
+ }
18368
+ if (!Array.isArray(parsed.selectedProviders))
18369
+ return null;
18370
+ const candidateSet = new Set(candidates);
18371
+ const selectedProviders = parsed.selectedProviders.filter((provider) => typeof provider === "string" && candidateSet.has(provider)).slice(0, maxProviders);
18372
+ if (selectedProviders.length === 0)
18373
+ return null;
18374
+ return {
18375
+ selectedProviders,
18376
+ reason: typeof parsed.reason === "string" ? parsed.reason : "Cerebras router selected providers.",
18377
+ confidence: clampConfidence(parsed.confidence)
18378
+ };
18379
+ }
18380
+ async function routeWithCerebras(query, candidates, options) {
18381
+ const apiKey = Bun.env.CEREBRAS_API_KEY;
18382
+ if (!apiKey) {
18383
+ return {
18384
+ ...routeSearchProvidersHeuristic(query, candidates, options),
18385
+ error: "CEREBRAS_API_KEY is not configured; used heuristic routing."
18386
+ };
18387
+ }
18388
+ const providerGuide = candidates.map((name) => ({
18389
+ name,
18390
+ description: PROVIDER_DESCRIPTIONS[name]
18391
+ }));
18392
+ const res = await fetch("https://api.cerebras.ai/v1/chat/completions", {
18393
+ method: "POST",
18394
+ signal: AbortSignal.timeout(options.timeoutMs),
18395
+ headers: {
18396
+ "Content-Type": "application/json",
18397
+ Authorization: `Bearer ${apiKey}`
18398
+ },
18399
+ body: JSON.stringify({
18400
+ model: options.model,
18401
+ temperature: 0,
18402
+ messages: [
18403
+ {
18404
+ role: "system",
18405
+ content: "You route a search query to the smallest useful set of available search providers. Select only listed providers. Prefer local providers for local files/code in the indexed workspace. Prefer scholarly, code, video, social, or web providers when the query clearly asks for those domains."
18406
+ },
18407
+ {
18408
+ role: "user",
18409
+ content: JSON.stringify({
18410
+ query,
18411
+ maxProviders: options.maxProviders,
18412
+ providers: providerGuide
18413
+ })
18414
+ }
18415
+ ],
18416
+ response_format: {
18417
+ type: "json_schema",
18418
+ json_schema: {
18419
+ name: "search_router",
18420
+ strict: true,
18421
+ schema: routerSchema(candidates, options.maxProviders)
18422
+ }
18423
+ }
18424
+ })
18425
+ });
18426
+ if (!res.ok) {
18427
+ throw new Error(`Cerebras router error: ${res.status} ${res.statusText}`);
18428
+ }
18429
+ const data = await res.json();
18430
+ const content = data.choices?.[0]?.message?.content;
18431
+ if (!content)
18432
+ throw new Error("Cerebras router returned no content");
18433
+ const parsed = parseCerebrasRouting(content, candidates, options.maxProviders);
18434
+ if (!parsed)
18435
+ throw new Error("Cerebras router returned invalid provider selection");
18436
+ return {
18437
+ strategy: "cerebras",
18438
+ candidates,
18439
+ ...parsed
18440
+ };
18441
+ }
18442
+ async function routeSearchProviders(query, candidates, options = {}) {
18443
+ const normalized = normalizeCandidates(candidates);
18444
+ const maxProviders = Math.min(clampMaxProviders(options.maxProviders), Math.max(1, normalized.length));
18445
+ const timeoutMs = options.timeoutMs && Number.isFinite(options.timeoutMs) ? Math.max(250, Math.floor(options.timeoutMs)) : 1200;
18446
+ const model = options.model ?? Bun.env.CEREBRAS_MODEL ?? "gpt-oss-120b";
18447
+ if (normalized.length === 0) {
18448
+ return routeSearchProvidersHeuristic(query, normalized, { maxProviders });
18449
+ }
18450
+ try {
18451
+ return await routeWithCerebras(query, normalized, { maxProviders, timeoutMs, model });
18452
+ } catch (err) {
18453
+ return {
18454
+ ...routeSearchProvidersHeuristic(query, normalized, { maxProviders }),
18455
+ error: err instanceof Error ? err.message : String(err)
18456
+ };
18457
+ }
18458
+ }
18459
+
18043
18460
  // src/lib/search.ts
18461
+ async function withTimeout(promise, timeoutMs, label) {
18462
+ if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
18463
+ return promise;
18464
+ let timer;
18465
+ try {
18466
+ return await Promise.race([
18467
+ promise,
18468
+ new Promise((_resolve, reject) => {
18469
+ timer = setTimeout(() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)), timeoutMs);
18470
+ timer.unref?.();
18471
+ })
18472
+ ]);
18473
+ } finally {
18474
+ if (timer)
18475
+ clearTimeout(timer);
18476
+ }
18477
+ }
18478
+ async function allSettledLimited(items, concurrency, task) {
18479
+ const results = new Array(items.length);
18480
+ let next = 0;
18481
+ async function worker() {
18482
+ while (next < items.length) {
18483
+ const index = next++;
18484
+ const item = items[index];
18485
+ try {
18486
+ results[index] = { status: "fulfilled", value: await task(item) };
18487
+ } catch (reason) {
18488
+ results[index] = { status: "rejected", reason };
18489
+ }
18490
+ }
18491
+ }
18492
+ const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length);
18493
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
18494
+ return results;
18495
+ }
18044
18496
  async function unifiedSearch(query, opts = {}) {
18045
18497
  const config = getConfig();
18046
18498
  const startTime = Date.now();
18047
18499
  const db = opts.db;
18048
18500
  let providerNames = opts.providers ?? [];
18049
- if (opts.profile) {
18501
+ const smartProfile = opts.profile === "smart";
18502
+ if (opts.profile && !smartProfile) {
18050
18503
  const profile = getProfileByName(opts.profile, db);
18051
18504
  if (profile) {
18052
18505
  providerNames = profile.providers;
@@ -18062,7 +18515,7 @@ async function unifiedSearch(query, opts = {}) {
18062
18515
  }
18063
18516
  const errors2 = [];
18064
18517
  const explicitRequest = (opts.providers?.length ?? 0) > 0 || Boolean(opts.profile);
18065
- const activeProviders = providerNames.filter((name) => {
18518
+ let activeProviders = providerNames.filter((name) => {
18066
18519
  try {
18067
18520
  if (getProvider(name).isConfigured())
18068
18521
  return true;
@@ -18073,20 +18526,36 @@ async function unifiedSearch(query, opts = {}) {
18073
18526
  });
18074
18527
  }
18075
18528
  return false;
18076
- } catch {
18529
+ } catch (err) {
18530
+ if (explicitRequest) {
18531
+ errors2.push({
18532
+ provider: name,
18533
+ error: err instanceof Error ? err.message : "unknown provider"
18534
+ });
18535
+ }
18077
18536
  return false;
18078
18537
  }
18079
18538
  });
18539
+ const routingRequested = opts.smart === true || smartProfile || !explicitRequest && config.router.enabled;
18540
+ let routing;
18541
+ if (routingRequested && activeProviders.length > 0) {
18542
+ routing = await routeSearchProviders(query, activeProviders, {
18543
+ maxProviders: config.router.maxProviders,
18544
+ timeoutMs: config.router.timeoutMs,
18545
+ model: config.router.model
18546
+ });
18547
+ activeProviders = routing.selectedProviders;
18548
+ }
18080
18549
  const searchOptions = {
18081
18550
  limit: config.defaultLimit,
18082
18551
  ...opts.options
18083
18552
  };
18084
- const results = await Promise.allSettled(activeProviders.map(async (name) => {
18553
+ const results = await allSettledLimited(activeProviders, config.maxConcurrent, async (name) => {
18085
18554
  const provider = getProvider(name);
18086
- const rawResults = await provider.search(query, searchOptions);
18555
+ const rawResults = await withTimeout(provider.search(query, searchOptions), config.providerTimeoutMs, provider.displayName);
18087
18556
  updateProviderLastUsed(name, db);
18088
18557
  return { name, results: rawResults };
18089
- }));
18558
+ });
18090
18559
  const allResults = [];
18091
18560
  const searchId = generateId();
18092
18561
  for (const result of results) {
@@ -18140,11 +18609,13 @@ async function unifiedSearch(query, opts = {}) {
18140
18609
  createdAt: new Date().toISOString()
18141
18610
  },
18142
18611
  results: finalResults,
18143
- errors: errors2
18612
+ errors: errors2,
18613
+ ...routing && { routing }
18144
18614
  };
18145
18615
  }
18146
18616
  const persistable = config.recordLocalResults ? finalResults : finalResults.filter((r) => !LOCAL_PROVIDER_NAMES.has(r.source));
18147
18617
  const search = createSearch({
18618
+ id: searchId,
18148
18619
  query,
18149
18620
  providers: activeProviders,
18150
18621
  resultCount: persistable.length,
@@ -18153,6 +18624,7 @@ async function unifiedSearch(query, opts = {}) {
18153
18624
  if (persistable.length > 0) {
18154
18625
  createResults(persistable.map((r) => ({
18155
18626
  searchId: search.id,
18627
+ id: r.id,
18156
18628
  title: r.title,
18157
18629
  url: r.url,
18158
18630
  snippet: r.snippet,
@@ -18169,7 +18641,8 @@ async function unifiedSearch(query, opts = {}) {
18169
18641
  return {
18170
18642
  search: { ...search, resultCount: finalResults.length, duration },
18171
18643
  results: finalResults,
18172
- errors: errors2
18644
+ errors: errors2,
18645
+ ...routing && { routing }
18173
18646
  };
18174
18647
  }
18175
18648
  async function searchSingleProvider(provider, query, options, db) {
@@ -18403,7 +18876,7 @@ function findLocal(query, opts = {}, db) {
18403
18876
  return { query, kind, indexed: false, roots: roots.length, total: 0, results: [] };
18404
18877
  }
18405
18878
  if (opts.refresh !== false)
18406
- autoRefreshStaleRoots(db);
18879
+ scheduleAutoRefreshStaleRoots(db);
18407
18880
  const queryOpts = {
18408
18881
  root: opts.root,
18409
18882
  ext: opts.ext,
@@ -28643,13 +29116,15 @@ function buildServer() {
28643
29116
  providers: exports_external.array(SearchProviderNameSchema).optional().describe("Providers to search (default: all enabled)"),
28644
29117
  profile: exports_external.string().optional().describe("Search profile name (e.g. research, social, code)"),
28645
29118
  limit: exports_external.number().int().min(1).max(100).optional().describe("Max results per provider"),
28646
- dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)")
28647
- }, async ({ query, providers, profile, limit, dedup }) => {
29119
+ dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)"),
29120
+ smart: exports_external.boolean().optional().describe("Route to the best configured providers before searching")
29121
+ }, async ({ query, providers, profile, limit, dedup, smart }) => {
28648
29122
  const response = await unifiedSearch(query, {
28649
29123
  providers,
28650
29124
  profile,
28651
29125
  options: limit ? { limit } : undefined,
28652
- dedup
29126
+ dedup,
29127
+ smart
28653
29128
  });
28654
29129
  return {
28655
29130
  content: [
@@ -28668,7 +29143,8 @@ function buildServer() {
28668
29143
  source: r.source,
28669
29144
  score: r.score
28670
29145
  })),
28671
- errors: response.errors
29146
+ errors: response.errors,
29147
+ routing: response.routing
28672
29148
  }, null, 2)
28673
29149
  }
28674
29150
  ]
@@ -28923,12 +29399,14 @@ function buildServer() {
28923
29399
  default_limit: exports_external.number().int().optional(),
28924
29400
  dedup: exports_external.boolean().optional(),
28925
29401
  max_concurrent: exports_external.number().int().optional(),
29402
+ provider_timeout_ms: exports_external.number().int().optional(),
28926
29403
  default_profile: exports_external.string().nullable().optional()
28927
29404
  }, async (updates) => {
28928
29405
  const config2 = setConfig({
28929
29406
  ...updates.default_limit !== undefined && { defaultLimit: updates.default_limit },
28930
29407
  ...updates.dedup !== undefined && { dedup: updates.dedup },
28931
29408
  ...updates.max_concurrent !== undefined && { maxConcurrent: updates.max_concurrent },
29409
+ ...updates.provider_timeout_ms !== undefined && { providerTimeoutMs: updates.provider_timeout_ms },
28932
29410
  ...updates.default_profile !== undefined && { defaultProfile: updates.default_profile }
28933
29411
  });
28934
29412
  return {
@@ -29071,10 +29549,12 @@ function startServer(port) {
29071
29549
  const providers = url.searchParams.get("providers")?.split(",");
29072
29550
  const profile = url.searchParams.get("profile") ?? undefined;
29073
29551
  const limit = url.searchParams.get("limit") ? parseInt(url.searchParams.get("limit")) : undefined;
29552
+ const smart = url.searchParams.get("smart") === "1" || url.searchParams.get("smart") === "true";
29074
29553
  const response = await unifiedSearch(q, {
29075
29554
  providers,
29076
29555
  profile,
29077
- options: limit ? { limit } : undefined
29556
+ options: limit ? { limit } : undefined,
29557
+ smart
29078
29558
  });
29079
29559
  return json(response);
29080
29560
  }