@hasna/search 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -7411,7 +7411,7 @@ var init_pg_migrate = __esm(() => {
7411
7411
  var require_package = __commonJS((exports, module) => {
7412
7412
  module.exports = {
7413
7413
  name: "@hasna/search",
7414
- version: "0.0.9",
7414
+ version: "0.0.11",
7415
7415
  description: "Unified search \u2014 local file index (find files by name/path/content/regex in ms, trigram FTS) + 12 web providers (Google, SerpAPI, Exa, Perplexity, Twitter, Reddit, YouTube, Brave, Bing, Hacker News, GitHub, arXiv) + YouTube transcription. CLI + MCP + REST API + Dashboard.",
7416
7416
  type: "module",
7417
7417
  main: "dist/index.js",
@@ -7980,6 +7980,31 @@ var migrations2 = [
7980
7980
  );
7981
7981
  `);
7982
7982
  }
7983
+ },
7984
+ {
7985
+ version: 2,
7986
+ description: "Local file index filter indexes",
7987
+ up: (db) => {
7988
+ db.exec(`
7989
+ CREATE INDEX IF NOT EXISTS idx_files_root_ext ON files(root_id, ext);
7990
+ CREATE INDEX IF NOT EXISTS idx_files_root_dir ON files(root_id, dir);
7991
+ `);
7992
+ }
7993
+ },
7994
+ {
7995
+ version: 3,
7996
+ description: "Local content short-token filter grams",
7997
+ up: (db) => {
7998
+ db.exec(`
7999
+ CREATE TABLE IF NOT EXISTS file_content_grams (
8000
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
8001
+ gram TEXT NOT NULL,
8002
+ PRIMARY KEY (file_id, gram)
8003
+ );
8004
+ CREATE INDEX IF NOT EXISTS idx_file_content_grams_gram_file
8005
+ ON file_content_grams(gram, file_id);
8006
+ `);
8007
+ }
7983
8008
  }
7984
8009
  ];
7985
8010
  function runIndexMigrations(db) {
@@ -12047,12 +12072,19 @@ var DEFAULT_CONFIG = {
12047
12072
  defaultLimit: 10,
12048
12073
  defaultProviders: [],
12049
12074
  defaultProfile: null,
12075
+ router: {
12076
+ enabled: false,
12077
+ model: "gpt-oss-120b",
12078
+ maxProviders: 3,
12079
+ timeoutMs: 1200
12080
+ },
12050
12081
  transcriber: {
12051
12082
  baseUrl: "http://localhost:19600",
12052
12083
  fallbackCli: "microservice-transcriber"
12053
12084
  },
12054
12085
  dedup: true,
12055
12086
  maxConcurrent: 5,
12087
+ providerTimeoutMs: 15000,
12056
12088
  indexStaleMinutes: 5,
12057
12089
  indexAutoRefresh: true,
12058
12090
  recordLocalResults: false
@@ -12083,7 +12115,18 @@ function getConfig() {
12083
12115
  try {
12084
12116
  const raw = readFileSync2(path, "utf-8");
12085
12117
  const parsed = JSON.parse(raw);
12086
- return { ...DEFAULT_CONFIG, ...parsed };
12118
+ return {
12119
+ ...DEFAULT_CONFIG,
12120
+ ...parsed,
12121
+ router: {
12122
+ ...DEFAULT_CONFIG.router,
12123
+ ...parsed.router ?? {}
12124
+ },
12125
+ transcriber: {
12126
+ ...DEFAULT_CONFIG.transcriber,
12127
+ ...parsed.transcriber ?? {}
12128
+ }
12129
+ };
12087
12130
  } catch {
12088
12131
  return { ...DEFAULT_CONFIG };
12089
12132
  }
@@ -12511,6 +12554,7 @@ function removeRoot(idOrPath, db) {
12511
12554
  d.exec("BEGIN");
12512
12555
  try {
12513
12556
  d.prepare("DELETE FROM file_content_fts WHERE rowid IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
12557
+ d.prepare("DELETE FROM file_content_grams WHERE file_id IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
12514
12558
  d.prepare("DELETE FROM index_roots WHERE id = ?").run(root.id);
12515
12559
  d.exec("COMMIT");
12516
12560
  } catch (err) {
@@ -12522,6 +12566,21 @@ function removeRoot(idOrPath, db) {
12522
12566
  function shouldIndexContent(root, file) {
12523
12567
  return root.contentIndexing && file.size > 0 && file.size <= root.maxFileSize && !hasBinaryExtension(file.ext) && !isContentExcluded(file.name);
12524
12568
  }
12569
+ function contentShortGrams(body) {
12570
+ const grams = new Set;
12571
+ const words = body.toLowerCase().matchAll(/[a-z0-9_$]+/g);
12572
+ for (const match of words) {
12573
+ const word = match[0];
12574
+ for (let i = 0;i < word.length; i++) {
12575
+ grams.add(word[i]);
12576
+ if (i + 1 < word.length)
12577
+ grams.add(word.slice(i, i + 2));
12578
+ }
12579
+ if (grams.size >= 2048)
12580
+ break;
12581
+ }
12582
+ return [...grams];
12583
+ }
12525
12584
  function indexRoot(idOrPath, opts = {}, db) {
12526
12585
  const d = db ?? getIndexDb();
12527
12586
  const root = getRoot(idOrPath, d);
@@ -12540,6 +12599,8 @@ function indexRoot(idOrPath, opts = {}, db) {
12540
12599
  const deleteFile = d.prepare("DELETE FROM files WHERE id = ?");
12541
12600
  const insertContent = d.prepare("INSERT INTO file_content_fts (rowid, body) VALUES (?, ?)");
12542
12601
  const deleteContent = d.prepare("DELETE FROM file_content_fts WHERE rowid = ?");
12602
+ const insertContentGram = d.prepare("INSERT OR IGNORE INTO file_content_grams (file_id, gram) VALUES (?, ?)");
12603
+ const deleteContentGrams = d.prepare("DELETE FROM file_content_grams WHERE file_id = ?");
12543
12604
  const stats = {
12544
12605
  rootId: root.id,
12545
12606
  added: 0,
@@ -12550,38 +12611,57 @@ function indexRoot(idOrPath, opts = {}, db) {
12550
12611
  skippedDirs: skippedDirs.length,
12551
12612
  durationMs: 0
12552
12613
  };
12614
+ const seen = new Set;
12615
+ const changes = [];
12616
+ for (const file of scanned) {
12617
+ seen.add(file.relPath);
12618
+ const prev = existing.get(file.relPath);
12619
+ const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
12620
+ if (prev && !changed && !opts.force)
12621
+ continue;
12622
+ const wantContent = shouldIndexContent(root, file);
12623
+ const absPath = `${root.path}/${file.relPath}`;
12624
+ let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
12625
+ let body = null;
12626
+ if (wantContent && !isBinary) {
12627
+ try {
12628
+ body = readFileSync4(absPath, "utf-8");
12629
+ } catch {
12630
+ isBinary = true;
12631
+ }
12632
+ }
12633
+ changes.push({
12634
+ file,
12635
+ prev,
12636
+ isBinary,
12637
+ body,
12638
+ grams: body !== null ? contentShortGrams(body) : [],
12639
+ contentIndexed: body !== null ? 1 : 0
12640
+ });
12641
+ }
12553
12642
  d.exec("BEGIN");
12554
12643
  try {
12555
- const seen = new Set;
12556
- for (const file of scanned) {
12557
- seen.add(file.relPath);
12558
- const prev = existing.get(file.relPath);
12559
- const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
12560
- if (prev && !changed && !opts.force)
12561
- continue;
12562
- const wantContent = shouldIndexContent(root, file);
12563
- const absPath = `${root.path}/${file.relPath}`;
12564
- let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
12565
- let body = null;
12566
- if (wantContent && !isBinary) {
12567
- try {
12568
- body = readFileSync4(absPath, "utf-8");
12569
- } catch {
12570
- isBinary = true;
12571
- }
12572
- }
12573
- const contentIndexed = body !== null ? 1 : 0;
12644
+ for (const { file, prev, isBinary, body, grams, contentIndexed } of changes) {
12574
12645
  if (prev) {
12575
- if (prev.content_indexed)
12646
+ if (prev.content_indexed) {
12576
12647
  deleteContent.run(prev.id);
12648
+ deleteContentGrams.run(prev.id);
12649
+ }
12577
12650
  updateFile.run(file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now, prev.id);
12578
- if (body !== null)
12651
+ if (body !== null) {
12579
12652
  insertContent.run(prev.id, body);
12653
+ for (const gram of grams)
12654
+ insertContentGram.run(prev.id, gram);
12655
+ }
12580
12656
  stats.updated++;
12581
12657
  } else {
12582
12658
  const inserted = insertFile.run(root.id, file.relPath, file.name, file.ext, file.dir, file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now);
12583
- if (body !== null)
12584
- insertContent.run(Number(inserted.lastInsertRowid), body);
12659
+ if (body !== null) {
12660
+ const fileId = Number(inserted.lastInsertRowid);
12661
+ insertContent.run(fileId, body);
12662
+ for (const gram of grams)
12663
+ insertContentGram.run(fileId, gram);
12664
+ }
12585
12665
  stats.added++;
12586
12666
  }
12587
12667
  if (contentIndexed)
@@ -12590,8 +12670,10 @@ function indexRoot(idOrPath, opts = {}, db) {
12590
12670
  for (const [relPath, row] of existing) {
12591
12671
  if (seen.has(relPath))
12592
12672
  continue;
12593
- if (row.content_indexed)
12673
+ if (row.content_indexed) {
12594
12674
  deleteContent.run(row.id);
12675
+ deleteContentGrams.run(row.id);
12676
+ }
12595
12677
  deleteFile.run(row.id);
12596
12678
  stats.deleted++;
12597
12679
  }
@@ -12613,6 +12695,9 @@ function indexAllRoots(opts = {}, db) {
12613
12695
  return listRoots(db).map((root) => indexRoot(root.id, opts, db));
12614
12696
  }
12615
12697
  var refreshing = new Set;
12698
+ var lastDefaultAutoRefreshCheckAt = 0;
12699
+ var AUTO_REFRESH_CHECK_THROTTLE_MS = 1000;
12700
+ var defaultRefreshScheduled = false;
12616
12701
  function refreshStaleRoots(staleMinutes, db) {
12617
12702
  const cutoff = Date.now() - staleMinutes * 60000;
12618
12703
  const stats = [];
@@ -12636,8 +12721,31 @@ function autoRefreshStaleRoots(db) {
12636
12721
  const config = getConfig();
12637
12722
  if (!config.indexAutoRefresh)
12638
12723
  return [];
12724
+ if (!db) {
12725
+ const now = Date.now();
12726
+ if (now - lastDefaultAutoRefreshCheckAt < AUTO_REFRESH_CHECK_THROTTLE_MS)
12727
+ return [];
12728
+ lastDefaultAutoRefreshCheckAt = now;
12729
+ }
12639
12730
  return refreshStaleRoots(config.indexStaleMinutes, db);
12640
12731
  }
12732
+ function scheduleAutoRefreshStaleRoots(db) {
12733
+ if (db)
12734
+ return autoRefreshStaleRoots(db);
12735
+ const config = getConfig();
12736
+ if (!config.indexAutoRefresh || defaultRefreshScheduled)
12737
+ return [];
12738
+ defaultRefreshScheduled = true;
12739
+ const timer = setTimeout(() => {
12740
+ try {
12741
+ autoRefreshStaleRoots();
12742
+ } catch {} finally {
12743
+ defaultRefreshScheduled = false;
12744
+ }
12745
+ }, 0);
12746
+ timer.unref?.();
12747
+ return [];
12748
+ }
12641
12749
 
12642
12750
  // src/lib/local/query.ts
12643
12751
  import { existsSync as existsSync3, readFileSync as readFileSync5 } from "fs";
@@ -12864,6 +12972,9 @@ function compileSearchRegex(pattern, caseSensitive = false) {
12864
12972
  // src/lib/local/query.ts
12865
12973
  var MAX_LINE_LENGTH = 200;
12866
12974
  var MAX_MATCHES_PER_FILE = 5;
12975
+ var MAX_PATH_CANDIDATES = 20000;
12976
+ var MAX_CONTENT_CANDIDATES = 50000;
12977
+ var MAX_REGEX_CANDIDATES = 50000;
12867
12978
  function tokenize(query) {
12868
12979
  return query.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "").split(/\s+/).filter(Boolean);
12869
12980
  }
@@ -12894,11 +13005,40 @@ function filterClauses(opts, db) {
12894
13005
  }
12895
13006
  if (opts.dir) {
12896
13007
  clauses.push("f.dir LIKE ? ESCAPE '\\'");
12897
- const dir = opts.dir.replace(/^\/|\/$/g, "").replace(/[\\%_]/g, "\\$&");
13008
+ const dir = escapeLike(opts.dir.replace(/^\/|\/$/g, ""));
12898
13009
  params.push(`%${dir}%`);
12899
13010
  }
12900
13011
  return { sql: clauses.length > 0 ? ` AND ${clauses.join(" AND ")}` : "", params };
12901
13012
  }
13013
+ function escapeLike(value) {
13014
+ return value.replace(/[\\%_]/g, "\\$&");
13015
+ }
13016
+ function shortTokenClauses(tokens) {
13017
+ if (tokens.length === 0)
13018
+ return { sql: "", params: [] };
13019
+ return {
13020
+ sql: ` AND ${tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ")}`,
13021
+ params: tokens.map((token) => `%${escapeLike(token)}%`)
13022
+ };
13023
+ }
13024
+ function contentGramClauses(tokens) {
13025
+ const gramTokens = tokens.filter((token) => /^[a-z0-9_$]{1,2}$/.test(token));
13026
+ if (gramTokens.length === 0)
13027
+ return { sql: "", params: [] };
13028
+ return {
13029
+ sql: gramTokens.map((_token, index) => ` AND (
13030
+ NOT EXISTS (
13031
+ SELECT 1 FROM file_content_grams cg_any_${index}
13032
+ WHERE cg_any_${index}.file_id = f.id
13033
+ )
13034
+ OR EXISTS (
13035
+ SELECT 1 FROM file_content_grams cg_${index}
13036
+ WHERE cg_${index}.file_id = f.id AND cg_${index}.gram = ?
13037
+ )
13038
+ )`).join(""),
13039
+ params: gramTokens
13040
+ };
13041
+ }
12902
13042
  function rowToHit(row, score) {
12903
13043
  return {
12904
13044
  rootId: row.root_id,
@@ -12968,6 +13108,8 @@ function searchFilePaths(query, opts = {}, db) {
12968
13108
  return [];
12969
13109
  const ftsQuery = buildFtsQuery(query);
12970
13110
  const filters = filterClauses(opts, d);
13111
+ const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
13112
+ const shortFilters = shortTokenClauses(shortTokens);
12971
13113
  const candidateLimit = Math.max(200, limit * 10);
12972
13114
  let rows;
12973
13115
  if (ftsQuery) {
@@ -12975,16 +13117,16 @@ function searchFilePaths(query, opts = {}, db) {
12975
13117
  FROM files_fts fts
12976
13118
  JOIN files f ON f.id = fts.rowid
12977
13119
  JOIN index_roots r ON r.id = f.root_id
12978
- WHERE files_fts MATCH ?${filters.sql}
13120
+ WHERE files_fts MATCH ?${filters.sql}${shortFilters.sql}
12979
13121
  ORDER BY bm25(files_fts, 10.0, 1.0)
12980
- LIMIT ?`).all(ftsQuery, ...filters.params, candidateLimit);
12981
- const namePattern = `${query.trim().replace(/[\\%_]/g, "\\$&")}%`;
13122
+ LIMIT ?`).all(ftsQuery, ...filters.params, ...shortFilters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
13123
+ const namePattern = `${escapeLike(query.trim())}%`;
12982
13124
  const nameRows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
12983
13125
  FROM files f
12984
13126
  JOIN index_roots r ON r.id = f.root_id
12985
- WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}
13127
+ WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}${shortFilters.sql}
12986
13128
  ORDER BY length(f.name)
12987
- LIMIT 100`).all(namePattern, ...filters.params);
13129
+ LIMIT 100`).all(namePattern, ...filters.params, ...shortFilters.params);
12988
13130
  const seen = new Set(rows.map((row) => row.id));
12989
13131
  for (const row of nameRows) {
12990
13132
  if (!seen.has(row.id))
@@ -12992,14 +13134,14 @@ function searchFilePaths(query, opts = {}, db) {
12992
13134
  }
12993
13135
  } else {
12994
13136
  const likeClauses = tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ");
12995
- const likeParams = tokens.map((t) => `%${t.replace(/[\\%_]/g, "\\$&")}%`);
13137
+ const likeParams = tokens.map((t) => `%${escapeLike(t)}%`);
12996
13138
  rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
12997
13139
  FROM files f
12998
13140
  JOIN index_roots r ON r.id = f.root_id
12999
13141
  WHERE ${likeClauses}${filters.sql}
13000
- LIMIT ?`).all(...likeParams, ...filters.params, candidateLimit);
13142
+ ORDER BY length(f.name), length(f.rel_path), f.rel_path
13143
+ LIMIT ?`).all(...likeParams, ...filters.params, Math.min(candidateLimit, MAX_PATH_CANDIDATES));
13001
13144
  }
13002
- const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
13003
13145
  const filtered = shortTokens.length > 0 ? rows.filter((row) => shortTokens.every((t) => row.rel_path.toLowerCase().includes(t))) : rows;
13004
13146
  return filtered.map((row) => rowToHit(row, scoreFileName(query, tokens, row))).sort((a, b) => b.score - a.score).filter((hit) => existsSync3(hit.absPath)).slice(0, limit);
13005
13147
  }
@@ -13037,24 +13179,31 @@ function searchFilePathsRegex(pattern, opts = {}, db) {
13037
13179
  throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'handle.*Click', not '\\w+').");
13038
13180
  }
13039
13181
  const filters = filterClauses(opts, d);
13040
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
13041
- FROM files_fts fts
13042
- JOIN files f ON f.id = fts.rowid
13043
- JOIN index_roots r ON r.id = f.root_id
13044
- WHERE files_fts MATCH ?${filters.sql}
13045
- ORDER BY fts.rank
13046
- LIMIT 5000`).all(ftsQuery, ...filters.params);
13047
13182
  const hits = [];
13048
- for (const row of rows) {
13049
- if (!regex.test(row.rel_path) && !regex.test(row.name))
13050
- continue;
13051
- const depth = row.rel_path.split("/").length - 1;
13052
- const score = Math.max(0.05, 0.6 - depth * 0.02);
13053
- const hit = rowToHit(row, score);
13054
- if (!existsSync3(hit.absPath))
13055
- continue;
13056
- hits.push(hit);
13057
- if (hits.length >= limit)
13183
+ const pageSize = Math.max(500, limit * 20);
13184
+ for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
13185
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
13186
+ FROM files_fts fts
13187
+ JOIN files f ON f.id = fts.rowid
13188
+ JOIN index_roots r ON r.id = f.root_id
13189
+ WHERE files_fts MATCH ?${filters.sql}
13190
+ ORDER BY fts.rank
13191
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
13192
+ if (rows.length === 0)
13193
+ break;
13194
+ for (const row of rows) {
13195
+ if (!regex.test(row.rel_path) && !regex.test(row.name))
13196
+ continue;
13197
+ const depth = row.rel_path.split("/").length - 1;
13198
+ const score = Math.max(0.05, 0.6 - depth * 0.02);
13199
+ const hit = rowToHit(row, score);
13200
+ if (!existsSync3(hit.absPath))
13201
+ continue;
13202
+ hits.push(hit);
13203
+ if (hits.length >= limit)
13204
+ break;
13205
+ }
13206
+ if (rows.length < pageSize)
13058
13207
  break;
13059
13208
  }
13060
13209
  return hits;
@@ -13068,40 +13217,48 @@ function searchFileContentRegex(pattern, opts = {}, db) {
13068
13217
  throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'export.*function', not '\\d+').");
13069
13218
  }
13070
13219
  const filters = filterClauses(opts, d);
13071
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
13072
- FROM file_content_fts fts
13073
- JOIN files f ON f.id = fts.rowid
13074
- JOIN index_roots r ON r.id = f.root_id
13075
- WHERE file_content_fts MATCH ?${filters.sql}
13076
- ORDER BY fts.rank
13077
- LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(200, limit * 10));
13078
13220
  const hits = [];
13079
- for (let i = 0;i < rows.length && hits.length < limit; i++) {
13080
- const row = rows[i];
13081
- const absPath = `${row.root_path}/${row.rel_path}`;
13082
- let content;
13083
- try {
13084
- content = readFileSync5(absPath, "utf-8");
13085
- } catch {
13086
- continue;
13087
- }
13088
- const lines = content.split(`
13221
+ const pageSize = Math.max(200, limit * 10);
13222
+ for (let offset = 0;hits.length < limit && offset < MAX_REGEX_CANDIDATES; offset += pageSize) {
13223
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
13224
+ FROM file_content_fts fts
13225
+ JOIN files f ON f.id = fts.rowid
13226
+ JOIN index_roots r ON r.id = f.root_id
13227
+ WHERE file_content_fts MATCH ?${filters.sql}
13228
+ ORDER BY fts.rank
13229
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, pageSize, offset);
13230
+ if (rows.length === 0)
13231
+ break;
13232
+ for (let i = 0;i < rows.length && hits.length < limit; i++) {
13233
+ const row = rows[i];
13234
+ const absPath = `${row.root_path}/${row.rel_path}`;
13235
+ let content;
13236
+ try {
13237
+ content = readFileSync5(absPath, "utf-8");
13238
+ } catch {
13239
+ continue;
13240
+ }
13241
+ const lines = content.split(`
13089
13242
  `);
13090
- const matches = [];
13091
- for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
13092
- if (regex.test(lines[n])) {
13093
- matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
13243
+ const matches = [];
13244
+ for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
13245
+ if (regex.test(lines[n])) {
13246
+ matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
13247
+ }
13094
13248
  }
13249
+ if (matches.length === 0)
13250
+ continue;
13251
+ const rankIndex = offset + i;
13252
+ const score = Math.max(0.25, 0.65 - rankIndex * 0.05);
13253
+ hits.push({
13254
+ ...rowToHit(row, score),
13255
+ line: matches[0].line,
13256
+ lineText: matches[0].text,
13257
+ matches
13258
+ });
13095
13259
  }
13096
- if (matches.length === 0)
13097
- continue;
13098
- const score = Math.max(0.25, 0.65 - i * 0.05);
13099
- hits.push({
13100
- ...rowToHit(row, score),
13101
- line: matches[0].line,
13102
- lineText: matches[0].text,
13103
- matches
13104
- });
13260
+ if (rows.length < pageSize)
13261
+ break;
13105
13262
  }
13106
13263
  return hits;
13107
13264
  }
@@ -13112,42 +13269,51 @@ function searchFileContent(query, opts = {}, db) {
13112
13269
  if (!ftsQuery)
13113
13270
  return [];
13114
13271
  const filters = filterClauses(opts, d);
13115
- const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
13116
- FROM file_content_fts fts
13117
- JOIN files f ON f.id = fts.rowid
13118
- JOIN index_roots r ON r.id = f.root_id
13119
- WHERE file_content_fts MATCH ?${filters.sql}
13120
- ORDER BY fts.rank
13121
- LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(50, limit * 3));
13122
13272
  const tokens = tokenize(query);
13123
13273
  const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
13274
+ const gramFilters = contentGramClauses(shortTokens);
13124
13275
  const scored = [];
13125
- for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
13126
- const row = rows[i];
13127
- const absPath = `${row.root_path}/${row.rel_path}`;
13128
- let content;
13129
- try {
13130
- content = readFileSync5(absPath, "utf-8");
13131
- } catch {
13132
- continue;
13133
- }
13134
- if (shortTokens.length > 0) {
13135
- const lower = content.toLowerCase();
13136
- if (!shortTokens.every((t) => lower.includes(t)))
13276
+ const pageSize = Math.max(50, limit * 3);
13277
+ for (let offset = 0;scored.length < limit * 2 && offset < MAX_CONTENT_CANDIDATES; offset += pageSize) {
13278
+ const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
13279
+ FROM file_content_fts fts
13280
+ JOIN files f ON f.id = fts.rowid
13281
+ JOIN index_roots r ON r.id = f.root_id
13282
+ WHERE file_content_fts MATCH ?${filters.sql}${gramFilters.sql}
13283
+ ORDER BY fts.rank
13284
+ LIMIT ? OFFSET ?`).all(ftsQuery, ...filters.params, ...gramFilters.params, pageSize, offset);
13285
+ if (rows.length === 0)
13286
+ break;
13287
+ for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
13288
+ const row = rows[i];
13289
+ const absPath = `${row.root_path}/${row.rel_path}`;
13290
+ let content;
13291
+ try {
13292
+ content = readFileSync5(absPath, "utf-8");
13293
+ } catch {
13294
+ continue;
13295
+ }
13296
+ if (shortTokens.length > 0) {
13297
+ const lower = content.toLowerCase();
13298
+ if (!shortTokens.every((t) => lower.includes(t)))
13299
+ continue;
13300
+ }
13301
+ const { matches, tier } = findLineMatches(content, query, tokens);
13302
+ if (matches.length === 0)
13137
13303
  continue;
13304
+ const rankIndex = offset + i;
13305
+ const base = Math.max(0.25, 0.55 - rankIndex * 0.04);
13306
+ const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
13307
+ const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
13308
+ scored.push({
13309
+ ...rowToHit(row, score),
13310
+ line: matches[0].line,
13311
+ lineText: matches[0].text,
13312
+ matches
13313
+ });
13138
13314
  }
13139
- const { matches, tier } = findLineMatches(content, query, tokens);
13140
- if (matches.length === 0)
13141
- continue;
13142
- const base = Math.max(0.25, 0.55 - i * 0.04);
13143
- const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
13144
- const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
13145
- scored.push({
13146
- ...rowToHit(row, score),
13147
- line: matches[0].line,
13148
- lineText: matches[0].text,
13149
- matches
13150
- });
13315
+ if (rows.length < pageSize)
13316
+ break;
13151
13317
  }
13152
13318
  return scored.sort((a, b) => b.score - a.score).slice(0, limit);
13153
13319
  }
@@ -13164,7 +13330,7 @@ function findLocal(query, opts = {}, db) {
13164
13330
  return { query, kind, indexed: false, roots: roots.length, total: 0, results: [] };
13165
13331
  }
13166
13332
  if (opts.refresh !== false)
13167
- autoRefreshStaleRoots(db);
13333
+ scheduleAutoRefreshStaleRoots(db);
13168
13334
  const queryOpts = {
13169
13335
  root: opts.root,
13170
13336
  ext: opts.ext,
@@ -14029,7 +14195,7 @@ class FilesProvider {
14029
14195
  return hasReadyRoot();
14030
14196
  }
14031
14197
  async search(query, options) {
14032
- autoRefreshStaleRoots();
14198
+ scheduleAutoRefreshStaleRoots();
14033
14199
  const hits = searchFilePaths(query, { limit: options?.limit ?? 10 });
14034
14200
  return hits.map((hit) => ({
14035
14201
  title: hit.name,
@@ -14057,7 +14223,7 @@ class ContentProvider {
14057
14223
  return hasReadyRoot();
14058
14224
  }
14059
14225
  async search(query, options) {
14060
- autoRefreshStaleRoots();
14226
+ scheduleAutoRefreshStaleRoots();
14061
14227
  const hits = searchFileContent(query, { limit: options?.limit ?? 10 });
14062
14228
  return hits.map((hit) => ({
14063
14229
  title: hit.name,
@@ -14184,7 +14350,7 @@ function rowToSearch(row) {
14184
14350
  }
14185
14351
  function createSearch(data, db) {
14186
14352
  const d = db ?? getDb();
14187
- const id = generateId();
14353
+ const id = data.id ?? generateId();
14188
14354
  const now = new Date().toISOString();
14189
14355
  d.prepare(`INSERT INTO searches (id, query, providers, profile_id, result_count, duration, created_at)
14190
14356
  VALUES (?, ?, ?, ?, ?, ?, ?)`).run(id, data.query, JSON.stringify(data.providers), data.profileId ?? null, data.resultCount ?? 0, data.duration ?? 0, now);
@@ -14266,7 +14432,7 @@ function createResults(results, db) {
14266
14432
  d.exec("BEGIN");
14267
14433
  try {
14268
14434
  for (const data of results) {
14269
- const id = generateId();
14435
+ const id = data.id ?? generateId();
14270
14436
  stmt.run(id, data.searchId, data.title, data.url, data.snippet, data.source, data.provider, data.rank, data.score ?? null, data.publishedAt ?? null, data.thumbnail ?? null, JSON.stringify(data.metadata ?? {}), now);
14271
14437
  created.push({
14272
14438
  id,
@@ -14408,13 +14574,300 @@ function isProviderConfigured(provider) {
14408
14574
  return !!Bun.env[provider.apiKeyEnv];
14409
14575
  }
14410
14576
 
14577
+ // src/lib/router.ts
14578
+ var PROVIDER_DESCRIPTIONS = {
14579
+ files: "Local file names and paths. Best for known filenames, path fragments, extensions, and repo navigation.",
14580
+ content: "Local indexed file contents. Best for code symbols, exact phrases, docs, snippets, and grep-style discovery.",
14581
+ google: "General web search through SerpAPI. Best for broad web coverage and current public pages.",
14582
+ serpapi: "SerpAPI multi-engine web search. Best for general web queries when Google-style results are desired.",
14583
+ exa: "Neural/semantic web search. Best for research, conceptual queries, docs, and high-relevance pages.",
14584
+ perplexity: "Answer-oriented web research with citations. Best for synthesized factual questions and research summaries.",
14585
+ brave: "General independent web search. Best for current web, news-like, product, and navigational queries.",
14586
+ bing: "General web search. Best for current web and Microsoft/Bing-indexed pages.",
14587
+ twitter: "X/Twitter search. Best for tweets, social reactions, breaking discourse, and people posting updates.",
14588
+ reddit: "Reddit search. Best for opinions, product experiences, troubleshooting threads, and community recommendations.",
14589
+ youtube: "YouTube search. Best for videos, tutorials, talks, demos, and channels.",
14590
+ hackernews: "Hacker News search. Best for startup, programming, launch, and technical discussion threads.",
14591
+ github: "GitHub code and repository search. Best for open-source repos, code examples, packages, and implementation details.",
14592
+ arxiv: "arXiv academic search. Best for papers, preprints, ML/AI/math/physics research, and scholarly topics."
14593
+ };
14594
+ function clampMaxProviders(value) {
14595
+ if (value === undefined || !Number.isFinite(value))
14596
+ return 3;
14597
+ return Math.max(1, Math.min(5, Math.floor(value)));
14598
+ }
14599
+ function clampConfidence(value) {
14600
+ return typeof value === "number" && Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : 0.5;
14601
+ }
14602
+ function normalizeCandidates(candidates) {
14603
+ const allowed = new Set(PROVIDER_NAMES);
14604
+ const seen = new Set;
14605
+ const normalized = [];
14606
+ for (const candidate of candidates) {
14607
+ if (!allowed.has(candidate) || seen.has(candidate))
14608
+ continue;
14609
+ seen.add(candidate);
14610
+ normalized.push(candidate);
14611
+ }
14612
+ return normalized;
14613
+ }
14614
+ function addScore(scores, candidateSet, provider, amount) {
14615
+ if (!candidateSet.has(provider))
14616
+ return;
14617
+ scores.set(provider, (scores.get(provider) ?? 0) + amount);
14618
+ }
14619
+ function hasAny(query, patterns) {
14620
+ return patterns.some((pattern) => pattern.test(query));
14621
+ }
14622
+ function routeSearchProvidersHeuristic(query, candidates, options = {}) {
14623
+ const normalized = normalizeCandidates(candidates);
14624
+ const maxProviders = clampMaxProviders(options.maxProviders);
14625
+ if (normalized.length === 0) {
14626
+ return {
14627
+ strategy: "heuristic",
14628
+ selectedProviders: [],
14629
+ candidates: [],
14630
+ reason: "No configured providers were available to route.",
14631
+ confidence: 0
14632
+ };
14633
+ }
14634
+ const candidateSet = new Set(normalized);
14635
+ const scores = new Map;
14636
+ const reasons = [];
14637
+ const q = query.trim().toLowerCase();
14638
+ for (const candidate of normalized)
14639
+ scores.set(candidate, 0.05);
14640
+ if (hasAny(q, [
14641
+ /\b(file|filename|path|folder|directory|repo|workspace)\b/,
14642
+ /(^|[/\s])[\w.-]+\.(ts|tsx|js|jsx|py|rs|go|md|json|yaml|yml|css|html)\b/
14643
+ ])) {
14644
+ addScore(scores, candidateSet, "files", 5);
14645
+ addScore(scores, candidateSet, "content", 3);
14646
+ reasons.push("query looks local-file oriented");
14647
+ }
14648
+ if (hasAny(q, [
14649
+ /\b(function|class|interface|type|const|import|export|error|stack|symbol|grep|regex)\b/,
14650
+ /[A-Za-z_$][\w$]*\([^)]*\)/,
14651
+ /[A-Za-z_$][\w$]*::[A-Za-z_$]/
14652
+ ])) {
14653
+ addScore(scores, candidateSet, "content", 5);
14654
+ addScore(scores, candidateSet, "files", 2);
14655
+ addScore(scores, candidateSet, "github", 1.5);
14656
+ reasons.push("query contains code/content lookup signals");
14657
+ }
14658
+ if (hasAny(q, [/\b(paper|papers|arxiv|preprint|doi|citation|survey|benchmark|research)\b/])) {
14659
+ addScore(scores, candidateSet, "arxiv", 5);
14660
+ addScore(scores, candidateSet, "exa", 3);
14661
+ addScore(scores, candidateSet, "perplexity", 2);
14662
+ reasons.push("query asks for scholarly or research material");
14663
+ }
14664
+ if (hasAny(q, [/\b(github|repo|repository|source code|open source|package|library|sdk|api example)\b/])) {
14665
+ addScore(scores, candidateSet, "github", 5);
14666
+ addScore(scores, candidateSet, "exa", 2);
14667
+ reasons.push("query asks for code or repository material");
14668
+ }
14669
+ if (hasAny(q, [/\b(video|youtube|tutorial|demo|talk|lecture|channel)\b/])) {
14670
+ addScore(scores, candidateSet, "youtube", 5);
14671
+ reasons.push("query asks for video material");
14672
+ }
14673
+ if (hasAny(q, [/\b(reddit|subreddit|opinion|experience|reviews?|worth it|recommendations?)\b/])) {
14674
+ addScore(scores, candidateSet, "reddit", 5);
14675
+ addScore(scores, candidateSet, "hackernews", 1.5);
14676
+ reasons.push("query asks for community discussion");
14677
+ }
14678
+ if (hasAny(q, [/\b(hacker news|hn|show hn|launch|startup)\b/])) {
14679
+ addScore(scores, candidateSet, "hackernews", 5);
14680
+ reasons.push("query asks for Hacker News style discussion");
14681
+ }
14682
+ if (hasAny(q, [/\b(twitter|tweet|tweets|x\.com|social reaction|trending)\b/])) {
14683
+ addScore(scores, candidateSet, "twitter", 5);
14684
+ reasons.push("query asks for social posts");
14685
+ }
14686
+ if (hasAny(q, [/\b(latest|today|yesterday|news|current|2025|2026|price|release|launched)\b/])) {
14687
+ addScore(scores, candidateSet, "brave", 3);
14688
+ addScore(scores, candidateSet, "bing", 2.5);
14689
+ addScore(scores, candidateSet, "google", 2.5);
14690
+ addScore(scores, candidateSet, "serpapi", 2);
14691
+ reasons.push("query appears time-sensitive");
14692
+ }
14693
+ if (reasons.length === 0) {
14694
+ addScore(scores, candidateSet, "exa", 2.5);
14695
+ addScore(scores, candidateSet, "perplexity", 2);
14696
+ addScore(scores, candidateSet, "brave", 1.5);
14697
+ addScore(scores, candidateSet, "google", 1.5);
14698
+ addScore(scores, candidateSet, "hackernews", 0.75);
14699
+ reasons.push("general query fallback");
14700
+ }
14701
+ const selectedProviders = [...scores.entries()].sort((a, b) => b[1] - a[1] || normalized.indexOf(a[0]) - normalized.indexOf(b[0])).slice(0, Math.min(maxProviders, normalized.length)).map(([provider]) => provider);
14702
+ const topScore = scores.get(selectedProviders[0]) ?? 0;
14703
+ const confidence = Math.max(0.35, Math.min(0.9, topScore / 6));
14704
+ return {
14705
+ strategy: "heuristic",
14706
+ selectedProviders,
14707
+ candidates: normalized,
14708
+ reason: reasons.join("; "),
14709
+ confidence
14710
+ };
14711
+ }
14712
+ function routerSchema(candidates, maxProviders) {
14713
+ return {
14714
+ type: "object",
14715
+ properties: {
14716
+ selectedProviders: {
14717
+ type: "array",
14718
+ items: { type: "string", enum: candidates },
14719
+ minItems: 1,
14720
+ maxItems: maxProviders
14721
+ },
14722
+ reason: { type: "string" },
14723
+ confidence: { type: "number", minimum: 0, maximum: 1 }
14724
+ },
14725
+ required: ["selectedProviders", "reason", "confidence"],
14726
+ additionalProperties: false
14727
+ };
14728
+ }
14729
+ function parseCerebrasRouting(raw, candidates, maxProviders) {
14730
+ let parsed;
14731
+ try {
14732
+ parsed = JSON.parse(raw);
14733
+ } catch {
14734
+ return null;
14735
+ }
14736
+ if (!Array.isArray(parsed.selectedProviders))
14737
+ return null;
14738
+ const candidateSet = new Set(candidates);
14739
+ const selectedProviders = parsed.selectedProviders.filter((provider) => typeof provider === "string" && candidateSet.has(provider)).slice(0, maxProviders);
14740
+ if (selectedProviders.length === 0)
14741
+ return null;
14742
+ return {
14743
+ selectedProviders,
14744
+ reason: typeof parsed.reason === "string" ? parsed.reason : "Cerebras router selected providers.",
14745
+ confidence: clampConfidence(parsed.confidence)
14746
+ };
14747
+ }
14748
+ async function routeWithCerebras(query, candidates, options) {
14749
+ const apiKey = Bun.env.CEREBRAS_API_KEY;
14750
+ if (!apiKey) {
14751
+ return {
14752
+ ...routeSearchProvidersHeuristic(query, candidates, options),
14753
+ error: "CEREBRAS_API_KEY is not configured; used heuristic routing."
14754
+ };
14755
+ }
14756
+ const providerGuide = candidates.map((name) => ({
14757
+ name,
14758
+ description: PROVIDER_DESCRIPTIONS[name]
14759
+ }));
14760
+ const res = await fetch("https://api.cerebras.ai/v1/chat/completions", {
14761
+ method: "POST",
14762
+ signal: AbortSignal.timeout(options.timeoutMs),
14763
+ headers: {
14764
+ "Content-Type": "application/json",
14765
+ Authorization: `Bearer ${apiKey}`
14766
+ },
14767
+ body: JSON.stringify({
14768
+ model: options.model,
14769
+ temperature: 0,
14770
+ messages: [
14771
+ {
14772
+ role: "system",
14773
+ content: "You route a search query to the smallest useful set of available search providers. Select only listed providers. Prefer local providers for local files/code in the indexed workspace. Prefer scholarly, code, video, social, or web providers when the query clearly asks for those domains."
14774
+ },
14775
+ {
14776
+ role: "user",
14777
+ content: JSON.stringify({
14778
+ query,
14779
+ maxProviders: options.maxProviders,
14780
+ providers: providerGuide
14781
+ })
14782
+ }
14783
+ ],
14784
+ response_format: {
14785
+ type: "json_schema",
14786
+ json_schema: {
14787
+ name: "search_router",
14788
+ strict: true,
14789
+ schema: routerSchema(candidates, options.maxProviders)
14790
+ }
14791
+ }
14792
+ })
14793
+ });
14794
+ if (!res.ok) {
14795
+ throw new Error(`Cerebras router error: ${res.status} ${res.statusText}`);
14796
+ }
14797
+ const data = await res.json();
14798
+ const content = data.choices?.[0]?.message?.content;
14799
+ if (!content)
14800
+ throw new Error("Cerebras router returned no content");
14801
+ const parsed = parseCerebrasRouting(content, candidates, options.maxProviders);
14802
+ if (!parsed)
14803
+ throw new Error("Cerebras router returned invalid provider selection");
14804
+ return {
14805
+ strategy: "cerebras",
14806
+ candidates,
14807
+ ...parsed
14808
+ };
14809
+ }
14810
+ async function routeSearchProviders(query, candidates, options = {}) {
14811
+ const normalized = normalizeCandidates(candidates);
14812
+ const maxProviders = Math.min(clampMaxProviders(options.maxProviders), Math.max(1, normalized.length));
14813
+ const timeoutMs = options.timeoutMs && Number.isFinite(options.timeoutMs) ? Math.max(250, Math.floor(options.timeoutMs)) : 1200;
14814
+ const model = options.model ?? Bun.env.CEREBRAS_MODEL ?? "gpt-oss-120b";
14815
+ if (normalized.length === 0) {
14816
+ return routeSearchProvidersHeuristic(query, normalized, { maxProviders });
14817
+ }
14818
+ try {
14819
+ return await routeWithCerebras(query, normalized, { maxProviders, timeoutMs, model });
14820
+ } catch (err) {
14821
+ return {
14822
+ ...routeSearchProvidersHeuristic(query, normalized, { maxProviders }),
14823
+ error: err instanceof Error ? err.message : String(err)
14824
+ };
14825
+ }
14826
+ }
14827
+
14411
14828
  // src/lib/search.ts
14829
+ async function withTimeout(promise, timeoutMs, label) {
14830
+ if (!Number.isFinite(timeoutMs) || timeoutMs <= 0)
14831
+ return promise;
14832
+ let timer;
14833
+ try {
14834
+ return await Promise.race([
14835
+ promise,
14836
+ new Promise((_resolve, reject) => {
14837
+ timer = setTimeout(() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)), timeoutMs);
14838
+ timer.unref?.();
14839
+ })
14840
+ ]);
14841
+ } finally {
14842
+ if (timer)
14843
+ clearTimeout(timer);
14844
+ }
14845
+ }
14846
+ async function allSettledLimited(items, concurrency, task) {
14847
+ const results = new Array(items.length);
14848
+ let next = 0;
14849
+ async function worker() {
14850
+ while (next < items.length) {
14851
+ const index = next++;
14852
+ const item = items[index];
14853
+ try {
14854
+ results[index] = { status: "fulfilled", value: await task(item) };
14855
+ } catch (reason) {
14856
+ results[index] = { status: "rejected", reason };
14857
+ }
14858
+ }
14859
+ }
14860
+ const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length);
14861
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
14862
+ return results;
14863
+ }
14412
14864
  async function unifiedSearch(query, opts = {}) {
14413
14865
  const config = getConfig();
14414
14866
  const startTime = Date.now();
14415
14867
  const db = opts.db;
14416
14868
  let providerNames = opts.providers ?? [];
14417
- if (opts.profile) {
14869
+ const smartProfile = opts.profile === "smart";
14870
+ if (opts.profile && !smartProfile) {
14418
14871
  const profile = getProfileByName(opts.profile, db);
14419
14872
  if (profile) {
14420
14873
  providerNames = profile.providers;
@@ -14430,7 +14883,7 @@ async function unifiedSearch(query, opts = {}) {
14430
14883
  }
14431
14884
  const errors2 = [];
14432
14885
  const explicitRequest = (opts.providers?.length ?? 0) > 0 || Boolean(opts.profile);
14433
- const activeProviders = providerNames.filter((name) => {
14886
+ let activeProviders = providerNames.filter((name) => {
14434
14887
  try {
14435
14888
  if (getProvider(name).isConfigured())
14436
14889
  return true;
@@ -14441,20 +14894,36 @@ async function unifiedSearch(query, opts = {}) {
14441
14894
  });
14442
14895
  }
14443
14896
  return false;
14444
- } catch {
14897
+ } catch (err) {
14898
+ if (explicitRequest) {
14899
+ errors2.push({
14900
+ provider: name,
14901
+ error: err instanceof Error ? err.message : "unknown provider"
14902
+ });
14903
+ }
14445
14904
  return false;
14446
14905
  }
14447
14906
  });
14907
+ const routingRequested = opts.smart === true || smartProfile || !explicitRequest && config.router.enabled;
14908
+ let routing;
14909
+ if (routingRequested && activeProviders.length > 0) {
14910
+ routing = await routeSearchProviders(query, activeProviders, {
14911
+ maxProviders: config.router.maxProviders,
14912
+ timeoutMs: config.router.timeoutMs,
14913
+ model: config.router.model
14914
+ });
14915
+ activeProviders = routing.selectedProviders;
14916
+ }
14448
14917
  const searchOptions = {
14449
14918
  limit: config.defaultLimit,
14450
14919
  ...opts.options
14451
14920
  };
14452
- const results = await Promise.allSettled(activeProviders.map(async (name) => {
14921
+ const results = await allSettledLimited(activeProviders, config.maxConcurrent, async (name) => {
14453
14922
  const provider = getProvider(name);
14454
- const rawResults = await provider.search(query, searchOptions);
14923
+ const rawResults = await withTimeout(provider.search(query, searchOptions), config.providerTimeoutMs, provider.displayName);
14455
14924
  updateProviderLastUsed(name, db);
14456
14925
  return { name, results: rawResults };
14457
- }));
14926
+ });
14458
14927
  const allResults = [];
14459
14928
  const searchId = generateId();
14460
14929
  for (const result of results) {
@@ -14508,11 +14977,13 @@ async function unifiedSearch(query, opts = {}) {
14508
14977
  createdAt: new Date().toISOString()
14509
14978
  },
14510
14979
  results: finalResults,
14511
- errors: errors2
14980
+ errors: errors2,
14981
+ ...routing && { routing }
14512
14982
  };
14513
14983
  }
14514
14984
  const persistable = config.recordLocalResults ? finalResults : finalResults.filter((r) => !LOCAL_PROVIDER_NAMES.has(r.source));
14515
14985
  const search = createSearch({
14986
+ id: searchId,
14516
14987
  query,
14517
14988
  providers: activeProviders,
14518
14989
  resultCount: persistable.length,
@@ -14521,6 +14992,7 @@ async function unifiedSearch(query, opts = {}) {
14521
14992
  if (persistable.length > 0) {
14522
14993
  createResults(persistable.map((r) => ({
14523
14994
  searchId: search.id,
14995
+ id: r.id,
14524
14996
  title: r.title,
14525
14997
  url: r.url,
14526
14998
  snippet: r.snippet,
@@ -14537,7 +15009,8 @@ async function unifiedSearch(query, opts = {}) {
14537
15009
  return {
14538
15010
  search: { ...search, resultCount: finalResults.length, duration },
14539
15011
  results: finalResults,
14540
- errors: errors2
15012
+ errors: errors2,
15013
+ ...routing && { routing }
14541
15014
  };
14542
15015
  }
14543
15016
  async function searchSingleProvider(provider, query, options, db) {
@@ -14814,7 +15287,7 @@ var program2 = new Command;
14814
15287
  program2.name("search").version(pkg.version).description("Unified search \u2014 local file index + 12 web providers, one interface");
14815
15288
  registerStorageCommands(program2);
14816
15289
  registerLocalCommands(program2);
14817
- program2.command("query").alias("q").argument("<query...>", "Search query").option("-p, --providers <providers>", "Comma-separated providers").option("--profile <name>", "Use a search profile").option("-l, --limit <n>", "Max results per provider", "10").option("-f, --format <format>", "Output format: table, json", "table").option("--no-dedup", "Disable deduplication").action(async (queryParts, opts) => {
15290
+ program2.command("query").alias("q").argument("<query...>", "Search query").option("-p, --providers <providers>", "Comma-separated providers").option("--profile <name>", "Use a search profile").option("-l, --limit <n>", "Max results per provider", "10").option("-f, --format <format>", "Output format: table, json", "table").option("--smart", "Route the query to the best configured providers with the smart router").option("--no-dedup", "Disable deduplication").action(async (queryParts, opts) => {
14818
15291
  const query = queryParts.join(" ");
14819
15292
  const providers = opts.providers ? opts.providers.split(",") : undefined;
14820
15293
  try {
@@ -14822,7 +15295,8 @@ program2.command("query").alias("q").argument("<query...>", "Search query").opti
14822
15295
  providers,
14823
15296
  profile: opts.profile,
14824
15297
  options: { limit: parseInt(opts.limit) },
14825
- dedup: opts.dedup
15298
+ dedup: opts.dedup,
15299
+ smart: opts.smart
14826
15300
  });
14827
15301
  if (opts.format === "json") {
14828
15302
  console.log(JSON.stringify(response, null, 2));