@hasna/search 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11644,7 +11644,7 @@ var require_lib2 = __commonJS((exports, module) => {
11644
11644
  var require_package = __commonJS((exports, module) => {
11645
11645
  module.exports = {
11646
11646
  name: "@hasna/search",
11647
- version: "0.0.9",
11647
+ version: "0.0.11",
11648
11648
  description: "Unified search \u2014 local file index (find files by name/path/content/regex in ms, trigram FTS) + 12 web providers (Google, SerpAPI, Exa, Perplexity, Twitter, Reddit, YouTube, Brave, Bing, Hacker News, GitHub, arXiv) + YouTube transcription. CLI + MCP + REST API + Dashboard.",
11649
11649
  type: "module",
11650
11650
  main: "dist/index.js",
@@ -15756,12 +15756,19 @@ var DEFAULT_CONFIG = {
15756
15756
  defaultLimit: 10,
15757
15757
  defaultProviders: [],
15758
15758
  defaultProfile: null,
15759
+ router: {
15760
+ enabled: false,
15761
+ model: "gpt-oss-120b",
15762
+ maxProviders: 3,
15763
+ timeoutMs: 1200
15764
+ },
15759
15765
  transcriber: {
15760
15766
  baseUrl: "http://localhost:19600",
15761
15767
  fallbackCli: "microservice-transcriber"
15762
15768
  },
15763
15769
  dedup: true,
15764
15770
  maxConcurrent: 5,
15771
+ providerTimeoutMs: 15000,
15765
15772
  indexStaleMinutes: 5,
15766
15773
  indexAutoRefresh: true,
15767
15774
  recordLocalResults: false
@@ -16496,6 +16503,31 @@ var migrations = [
16496
16503
  );
16497
16504
  `);
16498
16505
  }
16506
+ },
16507
+ {
16508
+ version: 2,
16509
+ description: "Local file index filter indexes",
16510
+ up: (db) => {
16511
+ db.exec(`
16512
+ CREATE INDEX IF NOT EXISTS idx_files_root_ext ON files(root_id, ext);
16513
+ CREATE INDEX IF NOT EXISTS idx_files_root_dir ON files(root_id, dir);
16514
+ `);
16515
+ }
16516
+ },
16517
+ {
16518
+ version: 3,
16519
+ description: "Local content short-token filter grams",
16520
+ up: (db) => {
16521
+ db.exec(`
16522
+ CREATE TABLE IF NOT EXISTS file_content_grams (
16523
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
16524
+ gram TEXT NOT NULL,
16525
+ PRIMARY KEY (file_id, gram)
16526
+ );
16527
+ CREATE INDEX IF NOT EXISTS idx_file_content_grams_gram_file
16528
+ ON file_content_grams(gram, file_id);
16529
+ `);
16530
+ }
16499
16531
  }
16500
16532
  ];
16501
16533
  function runIndexMigrations(db) {
@@ -16567,7 +16599,18 @@ function getConfig() {
16567
16599
  try {
16568
16600
  const raw = readFileSync(path, "utf-8");
16569
16601
  const parsed = JSON.parse(raw);
16570
- return { ...DEFAULT_CONFIG, ...parsed };
16602
+ return {
16603
+ ...DEFAULT_CONFIG,
16604
+ ...parsed,
16605
+ router: {
16606
+ ...DEFAULT_CONFIG.router,
16607
+ ...parsed.router ?? {}
16608
+ },
16609
+ transcriber: {
16610
+ ...DEFAULT_CONFIG.transcriber,
16611
+ ...parsed.transcriber ?? {}
16612
+ }
16613
+ };
16571
16614
  } catch {
16572
16615
  return { ...DEFAULT_CONFIG };
16573
16616
  }
@@ -16985,6 +17028,7 @@ function removeRoot(idOrPath, db) {
16985
17028
  d.exec("BEGIN");
16986
17029
  try {
16987
17030
  d.prepare("DELETE FROM file_content_fts WHERE rowid IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
17031
+ d.prepare("DELETE FROM file_content_grams WHERE file_id IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
16988
17032
  d.prepare("DELETE FROM index_roots WHERE id = ?").run(root.id);
16989
17033
  d.exec("COMMIT");
16990
17034
  } catch (err) {
@@ -16996,6 +17040,21 @@ function removeRoot(idOrPath, db) {
16996
17040
  function shouldIndexContent(root, file) {
16997
17041
  return root.contentIndexing && file.size > 0 && file.size <= root.maxFileSize && !hasBinaryExtension(file.ext) && !isContentExcluded(file.name);
16998
17042
  }
17043
+ function contentShortGrams(body) {
17044
+ const grams = new Set;
17045
+ const words = body.toLowerCase().matchAll(/[a-z0-9_$]+/g);
17046
+ for (const match of words) {
17047
+ const word = match[0];
17048
+ for (let i = 0;i < word.length; i++) {
17049
+ grams.add(word[i]);
17050
+ if (i + 1 < word.length)
17051
+ grams.add(word.slice(i, i + 2));
17052
+ }
17053
+ if (grams.size >= 2048)
17054
+ break;
17055
+ }
17056
+ return [...grams];
17057
+ }
16999
17058
  function indexRoot(idOrPath, opts = {}, db) {
17000
17059
  const d = db ?? getIndexDb();
17001
17060
  const root = getRoot(idOrPath, d);
@@ -17014,6 +17073,8 @@ function indexRoot(idOrPath, opts = {}, db) {
17014
17073
  const deleteFile = d.prepare("DELETE FROM files WHERE id = ?");
17015
17074
  const insertContent = d.prepare("INSERT INTO file_content_fts (rowid, body) VALUES (?, ?)");
17016
17075
  const deleteContent = d.prepare("DELETE FROM file_content_fts WHERE rowid = ?");
17076
+ const insertContentGram = d.prepare("INSERT OR IGNORE INTO file_content_grams (file_id, gram) VALUES (?, ?)");
17077
+ const deleteContentGrams = d.prepare("DELETE FROM file_content_grams WHERE file_id = ?");
17017
17078
  const stats = {
17018
17079
  rootId: root.id,
17019
17080
  added: 0,
@@ -17024,38 +17085,57 @@ function indexRoot(idOrPath, opts = {}, db) {
17024
17085
  skippedDirs: skippedDirs.length,
17025
17086
  durationMs: 0
17026
17087
  };
17088
+ const seen = new Set;
17089
+ const changes = [];
17090
+ for (const file of scanned) {
17091
+ seen.add(file.relPath);
17092
+ const prev = existing.get(file.relPath);
17093
+ const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
17094
+ if (prev && !changed && !opts.force)
17095
+ continue;
17096
+ const wantContent = shouldIndexContent(root, file);
17097
+ const absPath = `${root.path}/${file.relPath}`;
17098
+ let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
17099
+ let body = null;
17100
+ if (wantContent && !isBinary) {
17101
+ try {
17102
+ body = readFileSync3(absPath, "utf-8");
17103
+ } catch {
17104
+ isBinary = true;
17105
+ }
17106
+ }
17107
+ changes.push({
17108
+ file,
17109
+ prev,
17110
+ isBinary,
17111
+ body,
17112
+ grams: body !== null ? contentShortGrams(body) : [],
17113
+ contentIndexed: body !== null ? 1 : 0
17114
+ });
17115
+ }
17027
17116
  d.exec("BEGIN");
17028
17117
  try {
17029
- const seen = new Set;
17030
- for (const file of scanned) {
17031
- seen.add(file.relPath);
17032
- const prev = existing.get(file.relPath);
17033
- const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
17034
- if (prev && !changed && !opts.force)
17035
- continue;
17036
- const wantContent = shouldIndexContent(root, file);
17037
- const absPath = `${root.path}/${file.relPath}`;
17038
- let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
17039
- let body = null;
17040
- if (wantContent && !isBinary) {
17041
- try {
17042
- body = readFileSync3(absPath, "utf-8");
17043
- } catch {
17044
- isBinary = true;
17045
- }
17046
- }
17047
- const contentIndexed = body !== null ? 1 : 0;
17118
+ for (const { file, prev, isBinary, body, grams, contentIndexed } of changes) {
17048
17119
  if (prev) {
17049
- if (prev.content_indexed)
17120
+ if (prev.content_indexed) {
17050
17121
  deleteContent.run(prev.id);
17122
+ deleteContentGrams.run(prev.id);
17123
+ }
17051
17124
  updateFile.run(file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now, prev.id);
17052
- if (body !== null)
17125
+ if (body !== null) {
17053
17126
  insertContent.run(prev.id, body);
17127
+ for (const gram of grams)
17128
+ insertContentGram.run(prev.id, gram);
17129
+ }
17054
17130
  stats.updated++;
17055
17131
  } else {
17056
17132
  const inserted = insertFile.run(root.id, file.relPath, file.name, file.ext, file.dir, file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now);
17057
- if (body !== null)
17058
- insertContent.run(Number(inserted.lastInsertRowid), body);
17133
+ if (body !== null) {
17134
+ const fileId = Number(inserted.lastInsertRowid);
17135
+ insertContent.run(fileId, body);
17136
+ for (const gram of grams)
17137
+ insertContentGram.run(fileId, gram);
17138
+ }
17059
17139
  stats.added++;
17060
17140
  }
17061
17141
  if (contentIndexed)
@@ -17064,8 +17144,10 @@ function indexRoot(idOrPath, opts = {}, db) {
17064
17144
  for (const [relPath, row] of existing) {
17065
17145
  if (seen.has(relPath))
17066
17146
  continue;
17067
- if (row.content_indexed)
17147
+ if (row.content_indexed) {
17068
17148
  deleteContent.run(row.id);
17149
+ deleteContentGrams.run(row.id);
17150
+ }
17069
17151
  deleteFile.run(row.id);
17070
17152
  stats.deleted++;
17071
17153
  }
@@ -17087,6 +17169,9 @@ function indexAllRoots(opts = {}, db) {
17087
17169
  return listRoots(db).map((root) => indexRoot(root.id, opts, db));
17088
17170
  }
17089
17171
  var refreshing = new Set;
17172
+ var lastDefaultAutoRefreshCheckAt = 0;
17173
+ var AUTO_REFRESH_CHECK_THROTTLE_MS = 1000;
17174
+ var defaultRefreshScheduled = false;
17090
17175
  function refreshStaleRoots(staleMinutes, db) {
17091
17176
  const cutoff = Date.now() - staleMinutes * 60000;
17092
17177
  const stats = [];
@@ -17110,8 +17195,43 @@ function autoRefreshStaleRoots(db) {
17110
17195
  const config = getConfig();
17111
17196
  if (!config.indexAutoRefresh)
17112
17197
  return [];
17198
+ if (!db) {
17199
+ const now = Date.now();
17200
+ if (now - lastDefaultAutoRefreshCheckAt < AUTO_REFRESH_CHECK_THROTTLE_MS)
17201
+ return [];
17202
+ lastDefaultAutoRefreshCheckAt = now;
17203
+ }
17113
17204
  return refreshStaleRoots(config.indexStaleMinutes, db);
17114
17205
  }
17206
+ function scheduleAutoRefreshStaleRoots(db) {
17207
+ if (db)
17208
+ return autoRefreshStaleRoots(db);
17209
+ const config = getConfig();
17210
+ if (!config.indexAutoRefresh || defaultRefreshScheduled)
17211
+ return [];
17212
+ defaultRefreshScheduled = true;
17213
+ const timer = setTimeout(() => {
17214
+ try {
17215
+ autoRefreshStaleRoots();
17216
+ } catch {} finally {
17217
+ defaultRefreshScheduled = false;
17218
+ }
17219
+ }, 0);
17220
+ timer.unref?.();
17221
+ return [];
17222
+ }
17223
+ function startBackgroundRefresh() {
17224
+ const minutes = Math.max(1, getConfig().indexStaleMinutes);
17225
+ const timer = setInterval(() => {
17226
+ try {
17227
+ autoRefreshStaleRoots();
17228
+ } catch (err) {
17229
+ console.error("Index refresh failed:", err);
17230
+ }
17231
+ }, minutes * 60000);
17232
+ timer.unref?.();
17233
+ return timer;
17234
+ }
17115
17235
 
17116
17236
  // src/lib/local/query.ts
17117
17237
  import { existsSync as existsSync2, readFileSync as readFileSync4 } from "fs";
@@ -17338,6 +17458,9 @@ function compileSearchRegex(pattern, caseSensitive = false) {
17338
17458
  // src/lib/local/query.ts
17339
17459
  var MAX_LINE_LENGTH = 200;
17340
17460
  var MAX_MATCHES_PER_FILE = 5;
17461
+ var MAX_PATH_CANDIDATES = 20000;
17462
+ var MAX_CONTENT_CANDIDATES = 50000;
17463
+ var MAX_REGEX_CANDIDATES = 50000;
17341
17464
  function tokenize(query) {
17342
17465
  return query.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "").split(/\s+/).filter(Boolean);
17343
17466
  }
@@ -17368,11 +17491,40 @@ function filterClauses(opts, db) {
17368
17491
  }
17369
17492
  if (opts.dir) {
17370
17493
  clauses.push("f.dir LIKE ? ESCAPE '\\'");
17371
- const dir = opts.dir.replace(/^\/|\/$/g, "").replace(/[\\%_]/g, "\\$&");
17494
+ const dir = escapeLike(opts.dir.replace(/^\/|\/$/g, ""));
17372
17495
  params.push(`%${dir}%`);
17373
17496
  }
17374
17497
  return { sql: clauses.length > 0 ? ` AND ${clauses.join(" AND ")}` : "", params };
17375
17498
  }
17499
+ function escapeLike(value) {
17500
+ return value.replace(/[\\%_]/g, "\\$&");
17501
+ }
17502
+ function shortTokenClauses(tokens) {
17503
+ if (tokens.length === 0)
17504
+ return { sql: "", params: [] };
17505
+ return {
17506
+ sql: ` AND ${tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ")}`,
17507
+ params: tokens.map((token) => `%${escapeLike(token)}%`)
17508
+ };
17509
+ }
17510
+ function contentGramClauses(tokens) {
17511
+ const gramTokens = tokens.filter((token) => /^[a-z0-9_$]{1,2}$/.test(token));
17512
+ if (gramTokens.length === 0)
17513
+ return { sql: "", params: [] };
17514
+ return {
17515
+ sql: gramTokens.map((_token, index) => ` AND (
17516
+ NOT EXISTS (
17517
+ SELECT 1 FROM file_content_grams cg_any_${index}
17518
+ WHERE cg_any_${index}.file_id = f.id
17519
+ )
17520
+ OR EXISTS (
17521
+ SELECT 1 FROM file_content_grams cg_${index}
17522
+ WHERE cg_${index}.file_id = f.id AND cg_${index}.gram = ?
17523
+ )
17524
+ )`).join(""),
17525
+ params: gramTokens
17526
+ };
17527
+ }
17376
17528
  function rowToHit(row, score) {
17377
17529
  return {
17378
17530
  rootId: row.root_id,
@@ -17442,6 +17594,8 @@ function searchFilePaths(query, opts = {}, db) {
17442
17594
  return [];
17443
17595
  const ftsQuery = buildFtsQuery(query);
17444
17596
  const filters = filterClauses(opts, d);
17597
+ const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
17598
+ const shortFilters = shortTokenClauses(shortTokens);
17445
17599
  const candidateLimit = Math.max(200, limit * 10);
17446
17600
  let rows;
17447
17601
  if (ftsQuery) {
@@ -17449,16 +17603,16 @@ function searchFilePaths(query, opts = {}, db) {
17449
17603
  FROM files_fts fts
17450
17604
  JOIN files f ON f.id = fts.rowid
17451
17605
  JOIN index_roots r ON r.id = f.root_id
17452
- WHERE files_fts MATCH ?${filters.sql}
17606
+ WHERE files_fts MATCH ?${filters.sql}${shortFilters.sql}
17453
17607
  ORDER BY bm25(files_fts, 10.0, 1.0)
17454
- LIMIT ?`).all(ftsQuery, ...filters.params, candidateLimit);
17455
- const namePattern = `${query.trim().replace(/[\\%_]/g, "\\$&")}%`;
17608
+ LIMIT ?`).all(ftsQuery, ...filters.params, ...shortFilters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
17609
+ const namePattern = `${escapeLike(query.trim())}%`;
17456
17610
  const nameRows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17457
17611
  FROM files f
17458
17612
  JOIN index_roots r ON r.id = f.root_id
17459
- WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}
17613
+ WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}${shortFilters.sql}
17460
17614
  ORDER BY length(f.name)
17461
- LIMIT 100`).all(namePattern, ...filters.params);
17615
+ LIMIT 100`).all(namePattern, ...filters.params, ...shortFilters.params);
17462
17616
  const seen = new Set(rows.map((row) => row.id));
17463
17617
  for (const row of nameRows) {
17464
17618
  if (!seen.has(row.id))
@@ -17466,14 +17620,14 @@ function searchFilePaths(query, opts = {}, db) {
17466
17620
  }
17467
17621
  } else {
17468
17622
  const likeClauses = tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ");
17469
- const likeParams = tokens.map((t) => `%${t.replace(/[\\%_]/g, "\\$&")}%`);
17623
+ const likeParams = tokens.map((t) => `%${escapeLike(t)}%`);
17470
17624
  rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17471
17625
  FROM files f
17472
17626
  JOIN index_roots r ON r.id = f.root_id
17473
17627
  WHERE ${likeClauses}${filters.sql}
17474
- LIMIT ?`).all(...likeParams, ...filters.params, candidateLimit);
17628
+ ORDER BY length(f.name), length(f.rel_path), f.rel_path
17629
+ LIMIT ?`).all(...likeParams, ...filters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
17475
17630
  }
17476
- const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
17477
17631
  const filtered = shortTokens.length > 0 ? rows.filter((row) => shortTokens.every((t) => row.rel_path.toLowerCase().includes(t))) : rows;
17478
17632
  return filtered.map((row) => rowToHit(row, scoreFileName(query, tokens, row))).sort((a, b) => b.score - a.score).filter((hit) => existsSync2(hit.absPath)).slice(0, limit);
17479
17633
  }
@@ -17511,24 +17665,31 @@ function searchFilePathsRegex(pattern, opts = {}, db) {
17511
17665
  throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'handle.*Click', not '\\w+').");
17512
17666
  }
17513
17667
  const filters = filterClauses(opts, d);
17514
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17515
- FROM files_fts fts
17516
- JOIN files f ON f.id = fts.rowid
17517
- JOIN index_roots r ON r.id = f.root_id
17518
- WHERE files_fts MATCH ?${filters.sql}
17519
- ORDER BY fts.rank
17520
- LIMIT 5000`).all(ftsQuery, ...filters.params);
17521
17668
  const hits = [];
17522
- for (const row of rows) {
17523
- if (!regex.test(row.rel_path) && !regex.test(row.name))
17524
- continue;
17525
- const depth = row.rel_path.split("/").length - 1;
17526
- const score = Math.max(0.05, 0.6 - depth * 0.02);
17527
- const hit = rowToHit(row, score);
17528
- if (!existsSync2(hit.absPath))
17529
- continue;
17530
- hits.push(hit);
17531
- if (hits.length >= limit)
17669
+ const pageSize = Math.max(500, limit * 20);
17670
+ for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
17671
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17672
+ FROM files_fts fts
17673
+ JOIN files f ON f.id = fts.rowid
17674
+ JOIN index_roots r ON r.id = f.root_id
17675
+ WHERE files_fts MATCH ?${filters.sql}
17676
+ ORDER BY fts.rank
17677
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
17678
+ if (rows.length === 0)
17679
+ break;
17680
+ for (const row of rows) {
17681
+ if (!regex.test(row.rel_path) && !regex.test(row.name))
17682
+ continue;
17683
+ const depth = row.rel_path.split("/").length - 1;
17684
+ const score = Math.max(0.05, 0.6 - depth * 0.02);
17685
+ const hit = rowToHit(row, score);
17686
+ if (!existsSync2(hit.absPath))
17687
+ continue;
17688
+ hits.push(hit);
17689
+ if (hits.length >= limit)
17690
+ break;
17691
+ }
17692
+ if (rows.length < pageSize)
17532
17693
  break;
17533
17694
  }
17534
17695
  return hits;
@@ -17542,40 +17703,48 @@ function searchFileContentRegex(pattern, opts = {}, db) {
17542
17703
  throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'export.*function', not '\\d+').");
17543
17704
  }
17544
17705
  const filters = filterClauses(opts, d);
17545
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17546
- FROM file_content_fts fts
17547
- JOIN files f ON f.id = fts.rowid
17548
- JOIN index_roots r ON r.id = f.root_id
17549
- WHERE file_content_fts MATCH ?${filters.sql}
17550
- ORDER BY fts.rank
17551
- LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(200, limit * 10));
17552
17706
  const hits = [];
17553
- for (let i = 0;i < rows.length && hits.length < limit; i++) {
17554
- const row = rows[i];
17555
- const absPath = `${row.root_path}/${row.rel_path}`;
17556
- let content;
17557
- try {
17558
- content = readFileSync4(absPath, "utf-8");
17559
- } catch {
17560
- continue;
17561
- }
17562
- const lines = content.split(`
17707
+ const pageSize = Math.max(200, limit * 10);
17708
+ for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
17709
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17710
+ FROM file_content_fts fts
17711
+ JOIN files f ON f.id = fts.rowid
17712
+ JOIN index_roots r ON r.id = f.root_id
17713
+ WHERE file_content_fts MATCH ?${filters.sql}
17714
+ ORDER BY fts.rank
17715
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
17716
+ if (rows.length === 0)
17717
+ break;
17718
+ for (let i = 0;i < rows.length && hits.length < limit; i++) {
17719
+ const row = rows[i];
17720
+ const absPath = `${row.root_path}/${row.rel_path}`;
17721
+ let content;
17722
+ try {
17723
+ content = readFileSync4(absPath, "utf-8");
17724
+ } catch {
17725
+ continue;
17726
+ }
17727
+ const lines = content.split(`
17563
17728
  `);
17564
- const matches = [];
17565
- for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
17566
- if (regex.test(lines[n])) {
17567
- matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
17729
+ const matches = [];
17730
+ for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
17731
+ if (regex.test(lines[n])) {
17732
+ matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
17733
+ }
17568
17734
  }
17735
+ if (matches.length === 0)
17736
+ continue;
17737
+ const rankIndex = offset + i;
17738
+ const score = Math.max(0.25, 0.65 - rankIndex * 0.05);
17739
+ hits.push({
17740
+ ...rowToHit(row, score),
17741
+ line: matches[0].line,
17742
+ lineText: matches[0].text,
17743
+ matches
17744
+ });
17569
17745
  }
17570
- if (matches.length === 0)
17571
- continue;
17572
- const score = Math.max(0.25, 0.65 - i * 0.05);
17573
- hits.push({
17574
- ...rowToHit(row, score),
17575
- line: matches[0].line,
17576
- lineText: matches[0].text,
17577
- matches
17578
- });
17746
+ if (rows.length < pageSize)
17747
+ break;
17579
17748
  }
17580
17749
  return hits;
17581
17750
  }
@@ -17586,42 +17755,51 @@ function searchFileContent(query, opts = {}, db) {
17586
17755
  if (!ftsQuery)
17587
17756
  return [];
17588
17757
  const filters = filterClauses(opts, d);
17589
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17590
- FROM file_content_fts fts
17591
- JOIN files f ON f.id = fts.rowid
17592
- JOIN index_roots r ON r.id = f.root_id
17593
- WHERE file_content_fts MATCH ?${filters.sql}
17594
- ORDER BY fts.rank
17595
- LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(50, limit * 3));
17596
17758
  const tokens = tokenize(query);
17597
17759
  const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
17760
+ const gramFilters = contentGramClauses(shortTokens);
17598
17761
  const scored = [];
17599
- for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
17600
- const row = rows[i];
17601
- const absPath = `${row.root_path}/${row.rel_path}`;
17602
- let content;
17603
- try {
17604
- content = readFileSync4(absPath, "utf-8");
17605
- } catch {
17606
- continue;
17607
- }
17608
- if (shortTokens.length > 0) {
17609
- const lower = content.toLowerCase();
17610
- if (!shortTokens.every((t) => lower.includes(t)))
17762
+ const pageSize = Math.max(50, limit * 3);
17763
+ for (let offset = 0;scored.length < limit * 2 && offset < MAX_CONTENT_CANDIDATES; offset += pageSize) {
17764
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
17765
+ FROM file_content_fts fts
17766
+ JOIN files f ON f.id = fts.rowid
17767
+ JOIN index_roots r ON r.id = f.root_id
17768
+ WHERE file_content_fts MATCH ?${filters.sql}${gramFilters.sql}
17769
+ ORDER BY fts.rank
17770
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, ...gramFilters.params, pageSize, offset);
17771
+ if (rows.length === 0)
17772
+ break;
17773
+ for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
17774
+ const row = rows[i];
17775
+ const absPath = `${row.root_path}/${row.rel_path}`;
17776
+ let content;
17777
+ try {
17778
+ content = readFileSync4(absPath, "utf-8");
17779
+ } catch {
17611
17780
  continue;
17781
+ }
17782
+ if (shortTokens.length > 0) {
17783
+ const lower = content.toLowerCase();
17784
+ if (!shortTokens.every((t) => lower.includes(t)))
17785
+ continue;
17786
+ }
17787
+ const { matches, tier } = findLineMatches(content, query, tokens);
17788
+ if (matches.length === 0)
17789
+ continue;
17790
+ const rankIndex = offset + i;
17791
+ const base = Math.max(0.25, 0.55 - rankIndex * 0.04);
17792
+ const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
17793
+ const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
17794
+ scored.push({
17795
+ ...rowToHit(row, score),
17796
+ line: matches[0].line,
17797
+ lineText: matches[0].text,
17798
+ matches
17799
+ });
17612
17800
  }
17613
- const { matches, tier } = findLineMatches(content, query, tokens);
17614
- if (matches.length === 0)
17615
- continue;
17616
- const base = Math.max(0.25, 0.55 - i * 0.04);
17617
- const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
17618
- const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
17619
- scored.push({
17620
- ...rowToHit(row, score),
17621
- line: matches[0].line,
17622
- lineText: matches[0].text,
17623
- matches
17624
- });
17801
+ if (rows.length < pageSize)
17802
+ break;
17625
17803
  }
17626
17804
  return scored.sort((a, b) => b.score - a.score).slice(0, limit);
17627
17805
  }
@@ -17634,7 +17812,7 @@ class FilesProvider {
17634
17812
  return hasReadyRoot();
17635
17813
  }
17636
17814
  async search(query, options) {
17637
- autoRefreshStaleRoots();
17815
+ scheduleAutoRefreshStaleRoots();
17638
17816
  const hits = searchFilePaths(query, { limit: options?.limit ?? 10 });
17639
17817
  return hits.map((hit) => ({
17640
17818
  title: hit.name,
@@ -17662,7 +17840,7 @@ class ContentProvider {
17662
17840
  return hasReadyRoot();
17663
17841
  }
17664
17842
  async search(query, options) {
17665
- autoRefreshStaleRoots();
17843
+ scheduleAutoRefreshStaleRoots();
17666
17844
  const hits = searchFileContent(query, { limit: options?.limit ?? 10 });
17667
17845
  return hits.map((hit) => ({
17668
17846
  title: hit.name,
@@ -17789,7 +17967,7 @@ function rowToSearch(row) {
17789
17967
  }
17790
17968
  function createSearch(data, db) {
17791
17969
  const d = db ?? getDb();
17792
- const id = generateId();
17970
+ const id = data.id ?? generateId();
17793
17971
  const now = new Date().toISOString();
17794
17972
  d.prepare(`INSERT INTO searches (id, query, providers, profile_id, result_count, duration, created_at)
17795
17973
  VALUES (?, ?, ?, ?, ?, ?, ?)`).run(id, data.query, JSON.stringify(data.providers), data.profileId ?? null, data.resultCount ?? 0, data.duration ?? 0, now);
@@ -17871,7 +18049,7 @@ function createResults(results, db) {
17871
18049
  d.exec("BEGIN");
17872
18050
  try {
17873
18051
  for (const data of results) {
17874
- const id = generateId();
18052
+ const id = data.id ?? generateId();
17875
18053
  stmt.run(id, data.searchId, data.title, data.url, data.snippet, data.source, data.provider, data.rank, data.score ?? null, data.publishedAt ?? null, data.thumbnail ?? null, JSON.stringify(data.metadata ?? {}), now);
17876
18054
  created.push({
17877
18055
  id,
@@ -18028,13 +18206,300 @@ function isProviderConfigured(provider) {
18028
18206
  return !!Bun.env[provider.apiKeyEnv];
18029
18207
  }
18030
18208
 
18209
+ // src/lib/router.ts
18210
+ var PROVIDER_DESCRIPTIONS = {
18211
+ files: "Local file names and paths. Best for known filenames, path fragments, extensions, and repo navigation.",
18212
+ content: "Local indexed file contents. Best for code symbols, exact phrases, docs, snippets, and grep-style discovery.",
18213
+ google: "General web search through SerpAPI. Best for broad web coverage and current public pages.",
18214
+ serpapi: "SerpAPI multi-engine web search. Best for general web queries when Google-style results are desired.",
18215
+ exa: "Neural/semantic web search. Best for research, conceptual queries, docs, and high-relevance pages.",
18216
+ perplexity: "Answer-oriented web research with citations. Best for synthesized factual questions and research summaries.",
18217
+ brave: "General independent web search. Best for current web, news-like, product, and navigational queries.",
18218
+ bing: "General web search. Best for current web and Microsoft/Bing-indexed pages.",
18219
+ twitter: "X/Twitter search. Best for tweets, social reactions, breaking discourse, and people posting updates.",
18220
+ reddit: "Reddit search. Best for opinions, product experiences, troubleshooting threads, and community recommendations.",
18221
+ youtube: "YouTube search. Best for videos, tutorials, talks, demos, and channels.",
18222
+ hackernews: "Hacker News search. Best for startup, programming, launch, and technical discussion threads.",
18223
+ github: "GitHub code and repository search. Best for open-source repos, code examples, packages, and implementation details.",
18224
+ arxiv: "arXiv academic search. Best for papers, preprints, ML/AI/math/physics research, and scholarly topics."
18225
+ };
18226
+ function clampMaxProviders(value) {
18227
+ if (value === undefined || !Number.isFinite(value))
18228
+ return 3;
18229
+ return Math.max(1, Math.min(5, Math.floor(value)));
18230
+ }
18231
+ function clampConfidence(value) {
18232
+ return typeof value === "number" && Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.5;
18233
+ }
18234
+ function normalizeCandidates(candidates) {
18235
+ const allowed = new Set(PROVIDER_NAMES);
18236
+ const seen = new Set;
18237
+ const normalized = [];
18238
+ for (const candidate of candidates) {
18239
+ if (!allowed.has(candidate) || seen.has(candidate))
18240
+ continue;
18241
+ seen.add(candidate);
18242
+ normalized.push(candidate);
18243
+ }
18244
+ return normalized;
18245
+ }
18246
+ function addScore(scores, candidateSet, provider, amount) {
18247
+ if (!candidateSet.has(provider))
18248
+ return;
18249
+ scores.set(provider, (scores.get(provider) ?? 0) + amount);
18250
+ }
18251
+ function hasAny(query, patterns) {
18252
+ return patterns.some((pattern) => pattern.test(query));
18253
+ }
18254
+ function routeSearchProvidersHeuristic(query, candidates, options = {}) {
18255
+ const normalized = normalizeCandidates(candidates);
18256
+ const maxProviders = clampMaxProviders(options.maxProviders);
18257
+ if (normalized.length === 0) {
18258
+ return {
18259
+ strategy: "heuristic",
18260
+ selectedProviders: [],
18261
+ candidates: [],
18262
+ reason: "No configured providers were available to route.",
18263
+ confidence: 0
18264
+ };
18265
+ }
18266
+ const candidateSet = new Set(normalized);
18267
+ const scores = new Map;
18268
+ const reasons = [];
18269
+ const q = query.trim().toLowerCase();
18270
+ for (const candidate of normalized)
18271
+ scores.set(candidate, 0.05);
18272
+ if (hasAny(q, [
18273
+ /\b(file|filename|path|folder|directory|repo|workspace)\b/,
18274
+ /(^|[/\s])[\w.-]+\.(ts|tsx|js|jsx|py|rs|go|md|json|yaml|yml|css|html)\b/
18275
+ ])) {
18276
+ addScore(scores, candidateSet, "files", 5);
18277
+ addScore(scores, candidateSet, "content", 3);
18278
+ reasons.push("query looks local-file oriented");
18279
+ }
18280
+ if (hasAny(q, [
18281
+ /\b(function|class|interface|type|const|import|export|error|stack|symbol|grep|regex)\b/,
18282
+ /[A-Za-z_$][\w$]*\([^)]*\)/,
18283
+ /[A-Za-z_$][\w$]*::[A-Za-z_$]/
18284
+ ])) {
18285
+ addScore(scores, candidateSet, "content", 5);
18286
+ addScore(scores, candidateSet, "files", 2);
18287
+ addScore(scores, candidateSet, "github", 1.5);
18288
+ reasons.push("query contains code/content lookup signals");
18289
+ }
18290
+ if (hasAny(q, [/\b(paper|papers|arxiv|preprint|doi|citation|survey|benchmark|research)\b/])) {
18291
+ addScore(scores, candidateSet, "arxiv", 5);
18292
+ addScore(scores, candidateSet, "exa", 3);
18293
+ addScore(scores, candidateSet, "perplexity", 2);
18294
+ reasons.push("query asks for scholarly or research material");
18295
+ }
18296
+ if (hasAny(q, [/\b(github|repo|repository|source code|open source|package|library|sdk|api example)\b/])) {
18297
+ addScore(scores, candidateSet, "github", 5);
18298
+ addScore(scores, candidateSet, "exa", 2);
18299
+ reasons.push("query asks for code or repository material");
18300
+ }
18301
+ if (hasAny(q, [/\b(video|youtube|tutorial|demo|talk|lecture|channel)\b/])) {
18302
+ addScore(scores, candidateSet, "youtube", 5);
18303
+ reasons.push("query asks for video material");
18304
+ }
18305
+ if (hasAny(q, [/\b(reddit|subreddit|opinion|experience|reviews?|worth it|recommendations?)\b/])) {
18306
+ addScore(scores, candidateSet, "reddit", 5);
18307
+ addScore(scores, candidateSet, "hackernews", 1.5);
18308
+ reasons.push("query asks for community discussion");
18309
+ }
18310
+ if (hasAny(q, [/\b(hacker news|hn|show hn|launch|startup)\b/])) {
18311
+ addScore(scores, candidateSet, "hackernews", 5);
18312
+ reasons.push("query asks for Hacker News style discussion");
18313
+ }
18314
+ if (hasAny(q, [/\b(twitter|tweet|tweets|x\.com|social reaction|trending)\b/])) {
18315
+ addScore(scores, candidateSet, "twitter", 5);
18316
+ reasons.push("query asks for social posts");
18317
+ }
18318
+ if (hasAny(q, [/\b(latest|today|yesterday|news|current|2025|2026|price|release|launched)\b/])) {
18319
+ addScore(scores, candidateSet, "brave", 3);
18320
+ addScore(scores, candidateSet, "bing", 2.5);
18321
+ addScore(scores, candidateSet, "google", 2.5);
18322
+ addScore(scores, candidateSet, "serpapi", 2);
18323
+ reasons.push("query appears time-sensitive");
18324
+ }
18325
+ if (reasons.length === 0) {
18326
+ addScore(scores, candidateSet, "exa", 2.5);
18327
+ addScore(scores, candidateSet, "perplexity", 2);
18328
+ addScore(scores, candidateSet, "brave", 1.5);
18329
+ addScore(scores, candidateSet, "google", 1.5);
18330
+ addScore(scores, candidateSet, "hackernews", 0.75);
18331
+ reasons.push("general query fallback");
18332
+ }
18333
+ const selectedProviders = [...scores.entries()].sort((a, b) => b[1] - a[1] || normalized.indexOf(a[0]) - normalized.indexOf(b[0])).slice(0, Math.min(maxProviders, normalized.length)).map(([provider]) => provider);
18334
+ const topScore = scores.get(selectedProviders[0]) ?? 0;
18335
+ const confidence = Math.max(0.35, Math.min(0.9, topScore / 6));
18336
+ return {
18337
+ strategy: "heuristic",
18338
+ selectedProviders,
18339
+ candidates: normalized,
18340
+ reason: reasons.join("; "),
18341
+ confidence
18342
+ };
18343
+ }
18344
+ function routerSchema(candidates, maxProviders) {
18345
+ return {
18346
+ type: "object",
18347
+ properties: {
18348
+ selectedProviders: {
18349
+ type: "array",
18350
+ items: { type: "string", enum: candidates },
18351
+ minItems: 1,
18352
+ maxItems: maxProviders
18353
+ },
18354
+ reason: { type: "string" },
18355
+ confidence: { type: "number", minimum: 0, maximum: 1 }
18356
+ },
18357
+ required: ["selectedProviders", "reason", "confidence"],
18358
+ additionalProperties: false
18359
+ };
18360
+ }
18361
+ function parseCerebrasRouting(raw, candidates, maxProviders) {
18362
+ let parsed;
18363
+ try {
18364
+ parsed = JSON.parse(raw);
18365
+ } catch {
18366
+ return null;
18367
+ }
18368
+ if (!Array.isArray(parsed.selectedProviders))
18369
+ return null;
18370
+ const candidateSet = new Set(candidates);
18371
+ const selectedProviders = parsed.selectedProviders.filter((provider) => typeof provider === "string" && candidateSet.has(provider)).slice(0, maxProviders);
18372
+ if (selectedProviders.length === 0)
18373
+ return null;
18374
+ return {
18375
+ selectedProviders,
18376
+ reason: typeof parsed.reason === "string" ? parsed.reason : "Cerebras router selected providers.",
18377
+ confidence: clampConfidence(parsed.confidence)
18378
+ };
18379
+ }
18380
+ async function routeWithCerebras(query, candidates, options) {
18381
+ const apiKey = Bun.env.CEREBRAS_API_KEY;
18382
+ if (!apiKey) {
18383
+ return {
18384
+ ...routeSearchProvidersHeuristic(query, candidates, options),
18385
+ error: "CEREBRAS_API_KEY is not configured; used heuristic routing."
18386
+ };
18387
+ }
18388
+ const providerGuide = candidates.map((name) => ({
18389
+ name,
18390
+ description: PROVIDER_DESCRIPTIONS[name]
18391
+ }));
18392
+ const res = await fetch("https://api.cerebras.ai/v1/chat/completions", {
18393
+ method: "POST",
18394
+ signal: AbortSignal.timeout(options.timeoutMs),
18395
+ headers: {
18396
+ "Content-Type": "application/json",
18397
+ Authorization: `Bearer ${apiKey}`
18398
+ },
18399
+ body: JSON.stringify({
18400
+ model: options.model,
18401
+ temperature: 0,
18402
+ messages: [
18403
+ {
18404
+ role: "system",
18405
+ content: "You route a search query to the smallest useful set of available search providers. Select only listed providers. Prefer local providers for local files/code in the indexed workspace. Prefer scholarly, code, video, social, or web providers when the query clearly asks for those domains."
18406
+ },
18407
+ {
18408
+ role: "user",
18409
+ content: JSON.stringify({
18410
+ query,
18411
+ maxProviders: options.maxProviders,
18412
+ providers: providerGuide
18413
+ })
18414
+ }
18415
+ ],
18416
+ response_format: {
18417
+ type: "json_schema",
18418
+ json_schema: {
18419
+ name: "search_router",
18420
+ strict: true,
18421
+ schema: routerSchema(candidates, options.maxProviders)
18422
+ }
18423
+ }
18424
+ })
18425
+ });
18426
+ if (!res.ok) {
18427
+ throw new Error(`Cerebras router error: ${res.status} ${res.statusText}`);
18428
+ }
18429
+ const data = await res.json();
18430
+ const content = data.choices?.[0]?.message?.content;
18431
+ if (!content)
18432
+ throw new Error("Cerebras router returned no content");
18433
+ const parsed = parseCerebrasRouting(content, candidates, options.maxProviders);
18434
+ if (!parsed)
18435
+ throw new Error("Cerebras router returned invalid provider selection");
18436
+ return {
18437
+ strategy: "cerebras",
18438
+ candidates,
18439
+ ...parsed
18440
+ };
18441
+ }
18442
+ async function routeSearchProviders(query, candidates, options = {}) {
18443
+ const normalized = normalizeCandidates(candidates);
18444
+ const maxProviders = Math.min(clampMaxProviders(options.maxProviders), Math.max(1, normalized.length));
18445
+ const timeoutMs = options.timeoutMs && Number.isFinite(options.timeoutMs) ? Math.max(250, Math.floor(options.timeoutMs)) : 1200;
18446
+ const model = options.model ?? Bun.env.CEREBRAS_MODEL ?? "gpt-oss-120b";
18447
+ if (normalized.length === 0) {
18448
+ return routeSearchProvidersHeuristic(query, normalized, { maxProviders });
18449
+ }
18450
+ try {
18451
+ return await routeWithCerebras(query, normalized, { maxProviders, timeoutMs, model });
18452
+ } catch (err) {
18453
+ return {
18454
+ ...routeSearchProvidersHeuristic(query, normalized, { maxProviders }),
18455
+ error: err instanceof Error ? err.message : String(err)
18456
+ };
18457
+ }
18458
+ }
18459
+
18031
18460
  // src/lib/search.ts
18461
+ async function withTimeout(promise, timeoutMs, label) {
18462
+ if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
18463
+ return promise;
18464
+ let timer;
18465
+ try {
18466
+ return await Promise.race([
18467
+ promise,
18468
+ new Promise((_resolve, reject) => {
18469
+ timer = setTimeout(() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)), timeoutMs);
18470
+ timer.unref?.();
18471
+ })
18472
+ ]);
18473
+ } finally {
18474
+ if (timer)
18475
+ clearTimeout(timer);
18476
+ }
18477
+ }
18478
+ async function allSettledLimited(items, concurrency, task) {
18479
+ const results = new Array(items.length);
18480
+ let next = 0;
18481
+ async function worker() {
18482
+ while (next < items.length) {
18483
+ const index = next++;
18484
+ const item = items[index];
18485
+ try {
18486
+ results[index] = { status: "fulfilled", value: await task(item) };
18487
+ } catch (reason) {
18488
+ results[index] = { status: "rejected", reason };
18489
+ }
18490
+ }
18491
+ }
18492
+ const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length);
18493
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
18494
+ return results;
18495
+ }
18032
18496
  async function unifiedSearch(query, opts = {}) {
18033
18497
  const config = getConfig();
18034
18498
  const startTime = Date.now();
18035
18499
  const db = opts.db;
18036
18500
  let providerNames = opts.providers ?? [];
18037
- if (opts.profile) {
18501
+ const smartProfile = opts.profile === "smart";
18502
+ if (opts.profile && !smartProfile) {
18038
18503
  const profile = getProfileByName(opts.profile, db);
18039
18504
  if (profile) {
18040
18505
  providerNames = profile.providers;
@@ -18050,7 +18515,7 @@ async function unifiedSearch(query, opts = {}) {
18050
18515
  }
18051
18516
  const errors2 = [];
18052
18517
  const explicitRequest = (opts.providers?.length ?? 0) > 0 || Boolean(opts.profile);
18053
- const activeProviders = providerNames.filter((name) => {
18518
+ let activeProviders = providerNames.filter((name) => {
18054
18519
  try {
18055
18520
  if (getProvider(name).isConfigured())
18056
18521
  return true;
@@ -18061,20 +18526,36 @@ async function unifiedSearch(query, opts = {}) {
18061
18526
  });
18062
18527
  }
18063
18528
  return false;
18064
- } catch {
18529
+ } catch (err) {
18530
+ if (explicitRequest) {
18531
+ errors2.push({
18532
+ provider: name,
18533
+ error: err instanceof Error ? err.message : "unknown provider"
18534
+ });
18535
+ }
18065
18536
  return false;
18066
18537
  }
18067
18538
  });
18539
+ const routingRequested = opts.smart === true || smartProfile || !explicitRequest && config.router.enabled;
18540
+ let routing;
18541
+ if (routingRequested && activeProviders.length > 0) {
18542
+ routing = await routeSearchProviders(query, activeProviders, {
18543
+ maxProviders: config.router.maxProviders,
18544
+ timeoutMs: config.router.timeoutMs,
18545
+ model: config.router.model
18546
+ });
18547
+ activeProviders = routing.selectedProviders;
18548
+ }
18068
18549
  const searchOptions = {
18069
18550
  limit: config.defaultLimit,
18070
18551
  ...opts.options
18071
18552
  };
18072
- const results = await Promise.allSettled(activeProviders.map(async (name) => {
18553
+ const results = await allSettledLimited(activeProviders, config.maxConcurrent, async (name) => {
18073
18554
  const provider = getProvider(name);
18074
- const rawResults = await provider.search(query, searchOptions);
18555
+ const rawResults = await withTimeout(provider.search(query, searchOptions), config.providerTimeoutMs, provider.displayName);
18075
18556
  updateProviderLastUsed(name, db);
18076
18557
  return { name, results: rawResults };
18077
- }));
18558
+ });
18078
18559
  const allResults = [];
18079
18560
  const searchId = generateId();
18080
18561
  for (const result of results) {
@@ -18128,11 +18609,13 @@ async function unifiedSearch(query, opts = {}) {
18128
18609
  createdAt: new Date().toISOString()
18129
18610
  },
18130
18611
  results: finalResults,
18131
- errors: errors2
18612
+ errors: errors2,
18613
+ ...routing && { routing }
18132
18614
  };
18133
18615
  }
18134
18616
  const persistable = config.recordLocalResults ? finalResults : finalResults.filter((r) => !LOCAL_PROVIDER_NAMES.has(r.source));
18135
18617
  const search = createSearch({
18618
+ id: searchId,
18136
18619
  query,
18137
18620
  providers: activeProviders,
18138
18621
  resultCount: persistable.length,
@@ -18141,6 +18624,7 @@ async function unifiedSearch(query, opts = {}) {
18141
18624
  if (persistable.length > 0) {
18142
18625
  createResults(persistable.map((r) => ({
18143
18626
  searchId: search.id,
18627
+ id: r.id,
18144
18628
  title: r.title,
18145
18629
  url: r.url,
18146
18630
  snippet: r.snippet,
@@ -18157,7 +18641,8 @@ async function unifiedSearch(query, opts = {}) {
18157
18641
  return {
18158
18642
  search: { ...search, resultCount: finalResults.length, duration },
18159
18643
  results: finalResults,
18160
- errors: errors2
18644
+ errors: errors2,
18645
+ ...routing && { routing }
18161
18646
  };
18162
18647
  }
18163
18648
  async function searchSingleProvider(provider, query, options, db) {
@@ -18391,7 +18876,7 @@ function findLocal(query, opts = {}, db) {
18391
18876
  return { query, kind, indexed: false, roots: roots.length, total: 0, results: [] };
18392
18877
  }
18393
18878
  if (opts.refresh !== false)
18394
- autoRefreshStaleRoots(db);
18879
+ scheduleAutoRefreshStaleRoots(db);
18395
18880
  const queryOpts = {
18396
18881
  root: opts.root,
18397
18882
  ext: opts.ext,
@@ -28631,13 +29116,15 @@ function buildServer() {
28631
29116
  providers: exports_external.array(SearchProviderNameSchema).optional().describe("Providers to search (default: all enabled)"),
28632
29117
  profile: exports_external.string().optional().describe("Search profile name (e.g. research, social, code)"),
28633
29118
  limit: exports_external.number().int().min(1).max(100).optional().describe("Max results per provider"),
28634
- dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)")
28635
- }, async ({ query, providers, profile, limit, dedup }) => {
29119
+ dedup: exports_external.boolean().optional().describe("Deduplicate results by URL (default: true)"),
29120
+ smart: exports_external.boolean().optional().describe("Route to the best configured providers before searching")
29121
+ }, async ({ query, providers, profile, limit, dedup, smart }) => {
28636
29122
  const response = await unifiedSearch(query, {
28637
29123
  providers,
28638
29124
  profile,
28639
29125
  options: limit ? { limit } : undefined,
28640
- dedup
29126
+ dedup,
29127
+ smart
28641
29128
  });
28642
29129
  return {
28643
29130
  content: [
@@ -28656,7 +29143,8 @@ function buildServer() {
28656
29143
  source: r.source,
28657
29144
  score: r.score
28658
29145
  })),
28659
- errors: response.errors
29146
+ errors: response.errors,
29147
+ routing: response.routing
28660
29148
  }, null, 2)
28661
29149
  }
28662
29150
  ]
@@ -28911,12 +29399,14 @@ function buildServer() {
28911
29399
  default_limit: exports_external.number().int().optional(),
28912
29400
  dedup: exports_external.boolean().optional(),
28913
29401
  max_concurrent: exports_external.number().int().optional(),
29402
+ provider_timeout_ms: exports_external.number().int().optional(),
28914
29403
  default_profile: exports_external.string().nullable().optional()
28915
29404
  }, async (updates) => {
28916
29405
  const config2 = setConfig({
28917
29406
  ...updates.default_limit !== undefined && { defaultLimit: updates.default_limit },
28918
29407
  ...updates.dedup !== undefined && { dedup: updates.dedup },
28919
29408
  ...updates.max_concurrent !== undefined && { maxConcurrent: updates.max_concurrent },
29409
+ ...updates.provider_timeout_ms !== undefined && { providerTimeoutMs: updates.provider_timeout_ms },
28920
29410
  ...updates.default_profile !== undefined && { defaultProfile: updates.default_profile }
28921
29411
  });
28922
29412
  return {
@@ -29059,10 +29549,12 @@ function startServer(port) {
29059
29549
  const providers = url.searchParams.get("providers")?.split(",");
29060
29550
  const profile = url.searchParams.get("profile") ?? undefined;
29061
29551
  const limit = url.searchParams.get("limit") ? parseInt(url.searchParams.get("limit")) : undefined;
29552
+ const smart = url.searchParams.get("smart") === "1" || url.searchParams.get("smart") === "true";
29062
29553
  const response = await unifiedSearch(q, {
29063
29554
  providers,
29064
29555
  profile,
29065
- options: limit ? { limit } : undefined
29556
+ options: limit ? { limit } : undefined,
29557
+ smart
29066
29558
  });
29067
29559
  return json(response);
29068
29560
  }
@@ -29284,14 +29776,7 @@ function startServer(port) {
29284
29776
  }
29285
29777
  }
29286
29778
  });
29287
- const refreshMinutes = Math.max(1, getConfig().indexStaleMinutes);
29288
- setInterval(() => {
29289
- try {
29290
- autoRefreshStaleRoots();
29291
- } catch (err2) {
29292
- console.error("Index refresh failed:", err2);
29293
- }
29294
- }, refreshMinutes * 60000).unref?.();
29779
+ startBackgroundRefresh();
29295
29780
  console.log(`open-search server running at http://localhost:${port}`);
29296
29781
  }
29297
29782