@prom.codes/context-mcp 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +9 -0
  2. package/dist/bin.js +749 -73
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -25,4 +25,13 @@ your workspace into a local SQLite database (`~/.prometheus/<hash>.db`)
25
25
  and embeds via the Prometheus API. Your code never leaves your machine —
26
26
  only embedding text transits to the API.
27
27
 
28
+ ## Native modules
29
+
30
+ Uses native Tree-sitter grammars + `better-sqlite3` for parsing and storage.
31
+ Prebuilt binaries are fetched automatically on the mainstream platforms
32
+ (macOS x64/arm64, Linux x64, Windows x64) — no compiler needed. On other
33
+ platforms (e.g. Linux/Windows arm64) or a Node ABI without a prebuild, install
34
+ C/C++ build tools so the native modules can compile (Windows: VS Build Tools).
35
+ Requires Node ≥ 20.10.
36
+
28
37
  Docs: https://prom.codes/docs
package/dist/bin.js CHANGED
@@ -3354,6 +3354,116 @@ async function runEmbedPass(storage, embedder, options, driftRecovered) {
3354
3354
  import chokidar from "chokidar";
3355
3355
  import { resolve, sep } from "node:path";
3356
3356
  import { EventEmitter } from "node:events";
3357
+
3358
+ // ../shared/dist/types.js
3359
+ var GRAMMAR_LANGUAGE_IDS = [
3360
+ "typescript",
3361
+ "tsx",
3362
+ "javascript",
3363
+ "python",
3364
+ "php",
3365
+ "go",
3366
+ "rust",
3367
+ "java",
3368
+ "csharp",
3369
+ "c",
3370
+ "cpp",
3371
+ "ruby",
3372
+ "kotlin",
3373
+ "html"
3374
+ ];
3375
+ var DOCUMENT_LANGUAGE_IDS = [
3376
+ "markdown",
3377
+ "text",
3378
+ "json",
3379
+ "yaml",
3380
+ "toml"
3381
+ ];
3382
+ var LANGUAGE_IDS = [
3383
+ ...GRAMMAR_LANGUAGE_IDS,
3384
+ ...DOCUMENT_LANGUAGE_IDS
3385
+ ];
3386
+ var EDGE_TYPES = [
3387
+ "defines",
3388
+ "calls",
3389
+ "imports",
3390
+ "same-file",
3391
+ "co-change"
3392
+ ];
3393
+
3394
+ // ../shared/dist/sensitive-paths.js
3395
+ var SENSITIVE_DIRECTORIES = /* @__PURE__ */ new Set([
3396
+ "secrets",
3397
+ "credentials",
3398
+ ".aws",
3399
+ ".ssh",
3400
+ ".gnupg"
3401
+ ]);
3402
+ var SENSITIVE_BASENAMES = /* @__PURE__ */ new Set([
3403
+ ".npmrc",
3404
+ ".pypirc",
3405
+ ".netrc",
3406
+ ".pgpass",
3407
+ "known_hosts",
3408
+ // Project-specific: CLAUDE.md carries live credentials by convention.
3409
+ "claude.md"
3410
+ ]);
3411
+ var SENSITIVE_EXTENSIONS = [
3412
+ ".pem",
3413
+ ".key",
3414
+ ".p12",
3415
+ ".pfx",
3416
+ ".jks",
3417
+ ".keystore",
3418
+ ".ppk"
3419
+ ];
3420
+ var ENV_TEMPLATE_BASENAMES = /* @__PURE__ */ new Set([
3421
+ ".env.example",
3422
+ ".env.sample",
3423
+ ".env.template"
3424
+ ]);
3425
+ function isSensitivePath(path) {
3426
+ const normalised = path.replace(/\\/g, "/").replace(/^\/+|\/+$/g, "");
3427
+ if (normalised === "")
3428
+ return false;
3429
+ const segments = normalised.split("/").filter((s) => s !== "" && s !== ".").map((s) => s.toLowerCase());
3430
+ if (segments.length === 0)
3431
+ return false;
3432
+ for (const segment of segments) {
3433
+ if (SENSITIVE_DIRECTORIES.has(segment))
3434
+ return true;
3435
+ }
3436
+ const base = segments[segments.length - 1];
3437
+ if (SENSITIVE_BASENAMES.has(base))
3438
+ return true;
3439
+ if (base === ".env")
3440
+ return true;
3441
+ if (base.startsWith(".env.") && !ENV_TEMPLATE_BASENAMES.has(base))
3442
+ return true;
3443
+ for (const ext of SENSITIVE_EXTENSIONS) {
3444
+ if (base.endsWith(ext))
3445
+ return true;
3446
+ }
3447
+ if (base.startsWith("id_rsa") || base.startsWith("id_ed25519"))
3448
+ return true;
3449
+ if (/\.tfstate(\.|$)/.test(base))
3450
+ return true;
3451
+ if (base.endsWith(".json")) {
3452
+ if (base.startsWith("serviceaccount"))
3453
+ return true;
3454
+ if (base.startsWith("gcp-"))
3455
+ return true;
3456
+ if (base.endsWith("-credentials.json"))
3457
+ return true;
3458
+ }
3459
+ return false;
3460
+ }
3461
+ var SENSITIVE_PATH_ERROR = "path matches the sensitive-file deny-list";
3462
+
3463
+ // ../shared/dist/index.js
3464
+ var PROMETHEUS_VERSION = "0.1.0";
3465
+
3466
+ // ../indexer/dist/watcher.js
3357
3467
  var DEFAULT_IGNORED = [
3358
3468
  /(^|[/\\])\.git([/\\]|$)/,
3359
3469
  /(^|[/\\])node_modules([/\\]|$)/,
@@ -3389,8 +3499,12 @@ var WorkspaceWatcher = class extends EventEmitter {
3389
3499
  async start() {
3390
3500
  if (this.#watcher !== null)
3391
3501
  return;
3502
+ const denySensitive = (abs) => {
3503
+ const rel = toRelative(this.#root, abs);
3504
+ return rel !== abs && isSensitivePath(rel);
3505
+ };
3392
3506
  const watcher = chokidar.watch(this.#root, {
3393
- ignored: this.#ignored,
3507
+ ignored: [...this.#ignored, denySensitive],
3394
3508
  ignoreInitial: false,
3395
3509
  persistent: true,
3396
3510
  awaitWriteFinish: { stabilityThreshold: 50, pollInterval: 10 }
@@ -3415,6 +3529,8 @@ var WorkspaceWatcher = class extends EventEmitter {
3415
3529
  this.#watcher = null;
3416
3530
  }
3417
3531
  #schedule(kind, abs) {
3532
+ if (isSensitivePath(toRelative(this.#root, abs)))
3533
+ return;
3418
3534
  const key = `${kind}::${abs}`;
3419
3535
  const existing = this.#pending.get(key);
3420
3536
  if (existing !== void 0)
@@ -3728,6 +3844,8 @@ var WorkspaceIndexer = class {
3728
3844
  }
3729
3845
  }
3730
3846
  #isIgnored(relPath) {
3847
+ if (isSensitivePath(relPath))
3848
+ return true;
3731
3849
  const patterns = this.#ignored ?? DEFAULT_IGNORED_PATTERNS;
3732
3850
  for (const pat of patterns) {
3733
3851
  if (pat instanceof RegExp) {
@@ -3807,7 +3925,10 @@ var WorkspaceIndexer = class {
3807
3925
  hasParseErrors: parsed.hasParseErrors
3808
3926
  },
3809
3927
  symbols: parsed.symbols,
3810
- references: parsed.references
3928
+ references: parsed.references,
3929
+ // Full source so the SQLite adapter can store per-symbol body
3930
+ // text for the full-text lexical channel (sliced by byte offset).
3931
+ source
3811
3932
  });
3812
3933
  return "indexed";
3813
3934
  } catch (err) {
@@ -3836,47 +3957,8 @@ import { basename as basename5, dirname as dirname2, join as join3, resolve as r
3836
3957
  // ../storage-sqlite/dist/adapter.js
3837
3958
  import Database from "better-sqlite3";
3838
3959
 
3839
- // ../shared/dist/types.js
3840
- var GRAMMAR_LANGUAGE_IDS = [
3841
- "typescript",
3842
- "tsx",
3843
- "javascript",
3844
- "python",
3845
- "php",
3846
- "go",
3847
- "rust",
3848
- "java",
3849
- "csharp",
3850
- "c",
3851
- "cpp",
3852
- "ruby",
3853
- "kotlin",
3854
- "html"
3855
- ];
3856
- var DOCUMENT_LANGUAGE_IDS = [
3857
- "markdown",
3858
- "text",
3859
- "json",
3860
- "yaml",
3861
- "toml"
3862
- ];
3863
- var LANGUAGE_IDS = [
3864
- ...GRAMMAR_LANGUAGE_IDS,
3865
- ...DOCUMENT_LANGUAGE_IDS
3866
- ];
3867
- var EDGE_TYPES = [
3868
- "defines",
3869
- "calls",
3870
- "imports",
3871
- "same-file",
3872
- "co-change"
3873
- ];
3874
-
3875
- // ../shared/dist/index.js
3876
- var PROMETHEUS_VERSION = "0.1.0";
3877
-
3878
3960
  // ../storage-sqlite/dist/schema.js
3879
- var SCHEMA_VERSION = 3;
3961
+ var SCHEMA_VERSION = 4;
3880
3962
  var SCHEMA_STATEMENTS = [
3881
3963
  `PRAGMA journal_mode = WAL`,
3882
3964
  `PRAGMA foreign_keys = ON`,
@@ -3907,7 +3989,8 @@ var SCHEMA_STATEMENTS = [
3907
3989
  end_row INTEGER NOT NULL,
3908
3990
  end_col INTEGER NOT NULL,
3909
3991
  start_byte INTEGER NOT NULL,
3910
- end_byte INTEGER NOT NULL
3992
+ end_byte INTEGER NOT NULL,
3993
+ body TEXT
3911
3994
  )`,
3912
3995
  `CREATE TABLE IF NOT EXISTS refs (
3913
3996
  id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -3937,25 +4020,27 @@ var SCHEMA_STATEMENTS = [
3937
4020
  `CREATE INDEX IF NOT EXISTS idx_refs_name_nocase ON refs(name COLLATE NOCASE)`,
3938
4021
  // FTS5 virtual table: external-content design — actual rows live in
3939
4022
  // `symbols`, FTS holds only the tokenised index. Triggers below keep
3940
- // the FTS index in sync so we never write to it directly.
4023
+ // the FTS index in sync so we never write to it directly. The `body`
4024
+ // column (version 4) carries each symbol's source slice, so the lexical
4025
+ // channel matches symbol *source text*, not just name + container.
3941
4026
  `CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
3942
- name, container,
4027
+ name, container, body,
3943
4028
  content='symbols', content_rowid='id',
3944
4029
  tokenize='unicode61 remove_diacritics 2'
3945
4030
  )`,
3946
4031
  `CREATE TRIGGER IF NOT EXISTS symbols_ai AFTER INSERT ON symbols BEGIN
3947
- INSERT INTO symbols_fts(rowid, name, container)
3948
- VALUES (new.id, new.name, COALESCE(new.container, ''));
4032
+ INSERT INTO symbols_fts(rowid, name, container, body)
4033
+ VALUES (new.id, new.name, COALESCE(new.container, ''), COALESCE(new.body, ''));
3949
4034
  END`,
3950
4035
  `CREATE TRIGGER IF NOT EXISTS symbols_ad AFTER DELETE ON symbols BEGIN
3951
- INSERT INTO symbols_fts(symbols_fts, rowid, name, container)
3952
- VALUES('delete', old.id, old.name, COALESCE(old.container, ''));
4036
+ INSERT INTO symbols_fts(symbols_fts, rowid, name, container, body)
4037
+ VALUES('delete', old.id, old.name, COALESCE(old.container, ''), COALESCE(old.body, ''));
3953
4038
  END`,
3954
4039
  `CREATE TRIGGER IF NOT EXISTS symbols_au AFTER UPDATE ON symbols BEGIN
3955
- INSERT INTO symbols_fts(symbols_fts, rowid, name, container)
3956
- VALUES('delete', old.id, old.name, COALESCE(old.container, ''));
3957
- INSERT INTO symbols_fts(rowid, name, container)
3958
- VALUES (new.id, new.name, COALESCE(new.container, ''));
4040
+ INSERT INTO symbols_fts(symbols_fts, rowid, name, container, body)
4041
+ VALUES('delete', old.id, old.name, COALESCE(old.container, ''), COALESCE(old.body, ''));
4042
+ INSERT INTO symbols_fts(rowid, name, container, body)
4043
+ VALUES (new.id, new.name, COALESCE(new.container, ''), COALESCE(new.body, ''));
3959
4044
  END`,
3960
4045
  // Phase 2.15.8 — co-change pairs (file-level historical coupling).
3961
4046
  // Pairs are canonical (`file_a < file_b`) so each unordered pair has
@@ -3981,6 +4066,7 @@ var SCHEMA_STATEMENTS = [
3981
4066
  ];
3982
4067
 
3983
4068
  // ../storage-sqlite/dist/adapter.js
4069
+ var MAX_BODY_BYTES = 8e3;
3984
4070
  function vectorToBlob(v) {
3985
4071
  return Buffer.from(v.buffer, v.byteOffset, v.byteLength);
3986
4072
  }
@@ -4054,6 +4140,14 @@ var SqliteStorageAdapter = class {
4054
4140
  #options;
4055
4141
  #db = null;
4056
4142
  #stmts = null;
4143
+ /**
4144
+ * Lazily-prepared, weight-keyed FTS statements for the adaptive
4145
+ * column-weighting path (see {@link searchByText}). The default
4146
+ * (equal-weight) statement lives in {@link PreparedStatements.searchFts};
4147
+ * non-default weight triples get one cached statement each here. Cleared
4148
+ * on `close` together with the handle.
4149
+ */
4150
+ #weightedFts = /* @__PURE__ */ new Map();
4057
4151
  constructor(options) {
4058
4152
  this.#options = options;
4059
4153
  }
@@ -4070,6 +4164,16 @@ var SqliteStorageAdapter = class {
4070
4164
  }
4071
4165
  const hasMeta = db.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='meta'`).get();
4072
4166
  const priorVersion = hasMeta ? Number(db.prepare(`SELECT value FROM meta WHERE key = 'schema_version'`).get()?.value ?? 0) : 0;
4167
+ if (priorVersion > 0 && priorVersion < 4) {
4168
+ const cols = db.prepare(`PRAGMA table_info(symbols)`).all();
4169
+ if (!cols.some((c) => c.name === "body")) {
4170
+ db.exec(`ALTER TABLE symbols ADD COLUMN body TEXT`);
4171
+ }
4172
+ db.exec(`DROP TRIGGER IF EXISTS symbols_ai;
4173
+ DROP TRIGGER IF EXISTS symbols_ad;
4174
+ DROP TRIGGER IF EXISTS symbols_au;
4175
+ DROP TABLE IF EXISTS symbols_fts;`);
4176
+ }
4073
4177
  for (const ddl of SCHEMA_STATEMENTS)
4074
4178
  db.exec(ddl);
4075
4179
  if (priorVersion < SCHEMA_VERSION) {
@@ -4087,6 +4191,7 @@ var SqliteStorageAdapter = class {
4087
4191
  this.#db.close();
4088
4192
  this.#db = null;
4089
4193
  this.#stmts = null;
4194
+ this.#weightedFts.clear();
4090
4195
  }
4091
4196
  async getFileHash(path) {
4092
4197
  const row = this.#requireStmts().getFileHash.get(path);
@@ -4104,11 +4209,12 @@ var SqliteStorageAdapter = class {
4104
4209
  async upsertFile(payload) {
4105
4210
  const db = this.#requireDb();
4106
4211
  const stmts = this.#requireStmts();
4212
+ const sourceBuf = payload.source === void 0 ? null : Buffer.from(payload.source, "utf8");
4107
4213
  const tx = db.transaction((p) => {
4108
4214
  stmts.deleteFile.run(p.file.path);
4109
4215
  stmts.insertFile.run(p.file.path, p.file.language, p.file.contentHash, p.file.size, p.file.mtimeMs, p.file.indexedAt, p.file.hasParseErrors ? 1 : 0);
4110
4216
  for (const s of p.symbols)
4111
- this.#insertSymbol(p.file.path, s, stmts);
4217
+ this.#insertSymbol(p.file.path, s, sourceBuf, stmts);
4112
4218
  for (const r of p.references)
4113
4219
  this.#insertRef(p.file.path, r, stmts);
4114
4220
  });
@@ -4238,18 +4344,53 @@ var SqliteStorageAdapter = class {
4238
4344
  vector: byKey.get(`${r.filePath}\0${r.chunkIndex}`) ?? null
4239
4345
  }));
4240
4346
  }
4241
- async searchByText(query, limit) {
4347
+ async searchByText(query, limit, options) {
4242
4348
  if (limit <= 0)
4243
4349
  return [];
4244
4350
  const sanitised = sanitiseFtsQuery(query);
4245
4351
  if (sanitised === "")
4246
4352
  return [];
4247
- const rows = this.#requireStmts().searchFts.all(sanitised, limit);
4353
+ const stmt = this.#ftsStmt(options?.columnWeights);
4354
+ const rows = stmt.all(sanitised, limit);
4248
4355
  return rows.map((row) => ({
4249
4356
  symbol: rowToStoredSymbol(row),
4250
4357
  score: row.bm25_score
4251
4358
  }));
4252
4359
  }
4360
+ /**
4361
+ * Pick the FTS statement for a given `[name, container, body]` BM25
4362
+ * column-weight triple. No weights (or an all-1 triple) returns the
4363
+ * cached default statement — byte-identical, equal-weight ranking. A
4364
+ * non-default triple gets a lazily-prepared, cached statement whose
4365
+ * `bm25()` carries the weights as literal arguments. The weights are
4366
+ * internal, validated finite numbers (never user input), so inlining
4367
+ * them in the SQL is injection-safe; FTS5 requires them as function
4368
+ * arguments, not bindable parameters.
4369
+ */
4370
+ #ftsStmt(weights) {
4371
+ if (weights === void 0)
4372
+ return this.#requireStmts().searchFts;
4373
+ for (const w of weights) {
4374
+ if (!Number.isFinite(w) || w < 0) {
4375
+ throw new Error(`searchByText: columnWeights must be finite and >= 0, got [${weights.join(", ")}]`);
4376
+ }
4377
+ }
4378
+ const [n, c, b] = weights;
4379
+ if (n === 1 && c === 1 && b === 1)
4380
+ return this.#requireStmts().searchFts;
4381
+ const key = `${n},${c},${b}`;
4382
+ const cached = this.#weightedFts.get(key);
4383
+ if (cached !== void 0)
4384
+ return cached;
4385
+ const stmt = this.#requireDb().prepare(`SELECT s.*, bm25(symbols_fts, ${n}, ${c}, ${b}) AS bm25_score
4386
+ FROM symbols_fts
4387
+ JOIN symbols s ON s.id = symbols_fts.rowid
4388
+ WHERE symbols_fts MATCH ?
4389
+ ORDER BY bm25_score
4390
+ LIMIT ?`);
4391
+ this.#weightedFts.set(key, stmt);
4392
+ return stmt;
4393
+ }
4253
4394
  async expandSymbolGraph(symbolName, limit, options) {
4254
4395
  if (limit <= 0)
4255
4396
  return [];
@@ -4353,8 +4494,9 @@ var SqliteStorageAdapter = class {
4353
4494
  commitCount: r.commit_count
4354
4495
  }));
4355
4496
  }
4356
- #insertSymbol(filePath, s, stmts) {
4357
- stmts.insertSymbol.run(filePath, s.name, s.kind, s.language, s.container, s.exported ? 1 : 0, s.range.start.row, s.range.start.column, s.range.end.row, s.range.end.column, s.range.startByte, s.range.endByte);
4497
+ #insertSymbol(filePath, s, sourceBuf, stmts) {
4498
+ const body = sourceBuf === null ? null : sourceBuf.subarray(s.range.startByte, Math.min(s.range.endByte, s.range.startByte + MAX_BODY_BYTES)).toString("utf8");
4499
+ stmts.insertSymbol.run(filePath, s.name, s.kind, s.language, s.container, s.exported ? 1 : 0, s.range.start.row, s.range.start.column, s.range.end.row, s.range.end.column, s.range.startByte, s.range.endByte, body);
4358
4500
  }
4359
4501
  #insertRef(filePath, r, stmts) {
4360
4502
  stmts.insertRef.run(filePath, r.name, r.kind, r.fromContainer, r.moduleSpecifier, r.range.start.row, r.range.start.column, r.range.end.row, r.range.end.column, r.range.startByte, r.range.endByte);
@@ -4378,8 +4520,9 @@ var SqliteStorageAdapter = class {
4378
4520
  insertFile: db.prepare(`INSERT INTO files(path, language, content_hash, size, mtime_ms, indexed_at, has_parse_errors)
4379
4521
  VALUES(?, ?, ?, ?, ?, ?, ?)`),
4380
4522
  insertSymbol: db.prepare(`INSERT INTO symbols(file_path, name, kind, language, container, exported,
4381
- start_row, start_col, end_row, end_col, start_byte, end_byte)
4382
- VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
4523
+ start_row, start_col, end_row, end_col, start_byte, end_byte,
4524
+ body)
4525
+ VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
4383
4526
  insertRef: db.prepare(`INSERT INTO refs(file_path, name, kind, from_container, module_specifier,
4384
4527
  start_row, start_col, end_row, end_col, start_byte, end_byte)
4385
4528
  VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
@@ -4403,6 +4546,14 @@ var SqliteStorageAdapter = class {
4403
4546
  WHERE s.file_path = ?
4404
4547
  ORDER BY s.id
4405
4548
  LIMIT 1 OFFSET ?`),
4549
+ // BM25 over (name, container, body). Default (equal) column weights:
4550
+ // SWE-bench django showed name-dominant weights (10/5/1) regress
4551
+ // localization vs equal weights — common name tokens ("field",
4552
+ // "model") then crowd out the specific body match that pinpoints the
4553
+ // right file. The body column (Schema v4) lets lexical search match a
4554
+ // symbol's source text, which lifts file localization (bm25-only
4555
+ // any@5 25→50 on django). Column weighting is left as a future tuning
4556
+ // knob if a name-vs-body balance proves worthwhile per-corpus.
4406
4557
  searchFts: db.prepare(`SELECT s.*, bm25(symbols_fts) AS bm25_score
4407
4558
  FROM symbols_fts
4408
4559
  JOIN symbols s ON s.id = symbols_fts.rowid
@@ -5109,7 +5260,7 @@ var SupabaseStorageAdapter = class {
5109
5260
  }));
5110
5261
  });
5111
5262
  }
5112
- async searchByText(query, limit) {
5263
+ async searchByText(query, limit, _options) {
5113
5264
  if (limit <= 0)
5114
5265
  return [];
5115
5266
  const tokens = query.toLowerCase().split(/[^a-z0-9_]+/u).filter((t) => t.length >= 2);
@@ -5945,7 +6096,8 @@ var HybridRetriever = class {
5945
6096
  const activeEdgeTypes = EDGE_TYPES.filter((t) => effectiveEdgeWeights[t] > 0);
5946
6097
  const staticEdgeTypes = activeEdgeTypes.filter((t) => t !== CO_CHANGE);
5947
6098
  const coChangeActive = effectiveEdgeWeights[CO_CHANGE] > 0;
5948
- const lexicalPromise = wLex > 0 ? this.#storage.searchByText(trimmed, candidateLimit) : Promise.resolve([]);
6099
+ const lexCols = options.lexicalColumnWeights;
6100
+ const lexicalPromise = wLex > 0 ? this.#storage.searchByText(trimmed, candidateLimit, lexCols !== void 0 ? { columnWeights: lexCols } : void 0) : Promise.resolve([]);
5949
6101
  const vectorPromise = wVec > 0 ? this.#runVector(trimmed, candidateLimit, options.signal) : Promise.resolve({ hits: [], queryVector: null });
5950
6102
  const [lexicalHits, vectorResult] = await Promise.all([
5951
6103
  lexicalPromise,
@@ -5959,7 +6111,23 @@ var HybridRetriever = class {
5959
6111
  if (wLex > 0)
5960
6112
  firstPassLists.push({ ...lexicalAsList(lexicalHits), weight: wLex });
5961
6113
  const firstPass = reciprocalRankFusion(firstPassLists, { k: rrfK });
5962
- const seedSymbols = wGraph > 0 && expandN > 0 && firstPass.length > 0 ? firstPass.slice(0, expandN).map((r) => r.payload) : [];
6114
+ let seedSymbols = [];
6115
+ if (wGraph > 0 && expandN > 0) {
6116
+ const seedCols = options.graphSeedColumnWeights;
6117
+ if (seedCols !== void 0 && wLex > 0) {
6118
+ const seedLexHits = await this.#storage.searchByText(trimmed, candidateLimit, {
6119
+ columnWeights: seedCols
6120
+ });
6121
+ const seedLists = [];
6122
+ if (wVec > 0)
6123
+ seedLists.push({ ...vectorAsList(vectorHits), weight: wVec });
6124
+ seedLists.push({ ...lexicalAsList(seedLexHits), weight: wLex });
6125
+ const seedPass = reciprocalRankFusion(seedLists, { k: rrfK });
6126
+ seedSymbols = seedPass.slice(0, expandN).map((r) => r.payload);
6127
+ } else if (firstPass.length > 0) {
6128
+ seedSymbols = firstPass.slice(0, expandN).map((r) => r.payload);
6129
+ }
6130
+ }
5963
6131
  const graphPromise = seedSymbols.length > 0 && staticEdgeTypes.length > 0 ? this.#runGraphExpansion(seedSymbols, candidateLimit, staticEdgeTypes, maxDepth, adaptiveThreshold) : Promise.resolve(/* @__PURE__ */ new Map());
5964
6132
  const coChangePromise = coChangeActive && seedSymbols.length > 0 && ccFilesPerSeed > 0 && ccPerFileCap > 0 ? this.#runCoChangeStage(seedSymbols, ccFilesPerSeed, ccPerFileCap, ccMinWeight) : Promise.resolve([]);
5965
6133
  const [graphBuckets, coChangeSymbols] = await Promise.all([
@@ -6992,6 +7160,377 @@ var HashEmbeddingProvider = class {
6992
7160
  }
6993
7161
  };
6994
7162
 
7163
+ // ../rerank-voyage/dist/index.js
7164
+ var DEFAULT_MODEL2 = "rerank-2.5";
7165
+ var DEFAULT_BASE_URL = "https://api.voyageai.com/v1";
7166
+ var DEFAULT_BATCH5 = 100;
7167
+ var DEFAULT_RETRIES5 = 6;
7168
+ var DEFAULT_BACKOFF5 = 2e3;
7169
+ var DEFAULT_RETRY_MAX2 = 6e4;
7170
+ function parseRetryAfterMs2(value, now = Date.now()) {
7171
+ if (value === null)
7172
+ return null;
7173
+ const trimmed = value.trim();
7174
+ if (trimmed === "")
7175
+ return null;
7176
+ if (/^[0-9]+(\.[0-9]+)?$/.test(trimmed)) {
7177
+ const secs = Number(trimmed);
7178
+ if (!Number.isFinite(secs) || secs < 0)
7179
+ return null;
7180
+ return Math.round(secs * 1e3);
7181
+ }
7182
+ if (!/[A-Za-z]/.test(trimmed))
7183
+ return null;
7184
+ const ts = Date.parse(trimmed);
7185
+ if (!Number.isFinite(ts))
7186
+ return null;
7187
+ const delta = ts - now;
7188
+ return delta > 0 ? delta : 0;
7189
+ }
7190
+ function sleep5(ms, signal) {
7191
+ return new Promise((resolve6, reject) => {
7192
+ if (signal?.aborted === true) {
7193
+ reject(new Error("aborted"));
7194
+ return;
7195
+ }
7196
+ const timer = setTimeout(() => {
7197
+ signal?.removeEventListener("abort", onAbort);
7198
+ resolve6();
7199
+ }, ms);
7200
+ const onAbort = () => {
7201
+ clearTimeout(timer);
7202
+ reject(new Error("aborted"));
7203
+ };
7204
+ signal?.addEventListener("abort", onAbort, { once: true });
7205
+ });
7206
+ }
7207
+ function nonRetryable5(message) {
7208
+ const err = new Error(message);
7209
+ err.nonRetryable = true;
7210
+ return err;
7211
+ }
7212
+ var VoyageRerankProvider = class {
7213
+ name;
7214
+ model;
7215
+ region;
7216
+ #baseUrl;
7217
+ #apiKey;
7218
+ #batchSize;
7219
+ #maxRetries;
7220
+ #retryBaseMs;
7221
+ #retryMaxMs;
7222
+ #fetch;
7223
+ constructor(opts) {
7224
+ if (typeof opts.apiKey !== "string" || opts.apiKey === "") {
7225
+ throw new Error("VoyageRerankProvider: apiKey is required");
7226
+ }
7227
+ if (opts.batchSize !== void 0 && (!Number.isInteger(opts.batchSize) || opts.batchSize <= 0 || opts.batchSize > 1e3)) {
7228
+ throw new Error(`VoyageRerankProvider: batchSize must be an integer in 1..1000, got ${opts.batchSize}`);
7229
+ }
7230
+ this.model = opts.model ?? DEFAULT_MODEL2;
7231
+ this.name = opts.name ?? `voyage:${this.model}`;
7232
+ this.region = opts.region ?? "us";
7233
+ this.#baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, "");
7234
+ this.#apiKey = opts.apiKey;
7235
+ this.#batchSize = opts.batchSize ?? DEFAULT_BATCH5;
7236
+ this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES5;
7237
+ this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF5;
7238
+ this.#retryMaxMs = opts.retryMaxMs ?? DEFAULT_RETRY_MAX2;
7239
+ this.#fetch = opts.fetch ?? fetch;
7240
+ }
7241
+ async rerank(query, candidates, opts) {
7242
+ if (candidates.length === 0)
7243
+ return [];
7244
+ const all = new Array(candidates.length);
7245
+ let cursor = 0;
7246
+ for (let start = 0; start < candidates.length; start += this.#batchSize) {
7247
+ const slice = candidates.slice(start, start + this.#batchSize);
7248
+ const scored = await this.#rerankBatch(query, slice, opts?.signal);
7249
+ for (const hit of scored) {
7250
+ const globalIndex = start + hit.localIndex;
7251
+ const cand = candidates[globalIndex];
7252
+ all[cursor++] = { id: cand.id, index: globalIndex, score: hit.score };
7253
+ }
7254
+ }
7255
+ all.sort((a, b) => b.score - a.score);
7256
+ if (opts?.topK !== void 0 && opts.topK >= 0 && opts.topK < all.length) {
7257
+ return all.slice(0, opts.topK);
7258
+ }
7259
+ return all;
7260
+ }
7261
+ async #rerankBatch(query, batch, signal) {
7262
+ const body = {
7263
+ query,
7264
+ documents: batch.map((c) => c.text),
7265
+ model: this.model,
7266
+ return_documents: false,
7267
+ truncation: true
7268
+ };
7269
+ const init = {
7270
+ method: "POST",
7271
+ headers: {
7272
+ "content-type": "application/json",
7273
+ authorization: `Bearer ${this.#apiKey}`
7274
+ },
7275
+ body: JSON.stringify(body)
7276
+ };
7277
+ if (signal !== void 0)
7278
+ init.signal = signal;
7279
+ let attempt = 0;
7280
+ let lastError = null;
7281
+ while (attempt <= this.#maxRetries) {
7282
+ try {
7283
+ const res = await this.#fetch(`${this.#baseUrl}/rerank`, init);
7284
+ if (res.status === 429 || res.status >= 500 && res.status < 600) {
7285
+ lastError = new Error(`${this.name}: HTTP ${res.status}`);
7286
+ attempt += 1;
7287
+ if (attempt > this.#maxRetries)
7288
+ break;
7289
+ const backoff = this.#computeBackoff(attempt, res.headers.get("retry-after"));
7290
+ await sleep5(backoff, signal);
7291
+ continue;
7292
+ }
7293
+ if (!res.ok) {
7294
+ const text = await res.text().catch(() => "");
7295
+ throw nonRetryable5(`${this.name}: HTTP ${res.status} ${res.statusText}${text === "" ? "" : ` \u2014 ${text}`}`);
7296
+ }
7297
+ const payload = await res.json();
7298
+ return this.#decode(payload, batch.length);
7299
+ } catch (err) {
7300
+ if (err?.name === "AbortError")
7301
+ throw err;
7302
+ if (err?.nonRetryable === true)
7303
+ throw err;
7304
+ if (attempt >= this.#maxRetries)
7305
+ throw err;
7306
+ lastError = err;
7307
+ attempt += 1;
7308
+ await sleep5(this.#computeBackoff(attempt, null), signal);
7309
+ }
7310
+ }
7311
+ throw lastError instanceof Error ? lastError : new Error(`${this.name}: exhausted ${this.#maxRetries} retries`);
7312
+ }
7313
+ #computeBackoff(attempt, retryAfterHeader) {
7314
+ const exp = this.#retryBaseMs * 2 ** Math.max(0, attempt - 1);
7315
+ const advised = parseRetryAfterMs2(retryAfterHeader);
7316
+ const lower = advised === null ? exp : Math.max(exp, advised);
7317
+ return Math.min(lower, this.#retryMaxMs);
7318
+ }
7319
+ #decode(payload, expected) {
7320
+ if (!Array.isArray(payload.data) || payload.data.length !== expected) {
7321
+ throw nonRetryable5(`${this.name}: expected ${expected} rerank rows, got ${payload.data?.length ?? 0}`);
7322
+ }
7323
+ return payload.data.map((row) => {
7324
+ if (!Number.isInteger(row.index) || row.index < 0 || row.index >= expected) {
7325
+ throw nonRetryable5(`${this.name}: invalid index ${row.index} in rerank response`);
7326
+ }
7327
+ if (typeof row.relevance_score !== "number" || !Number.isFinite(row.relevance_score)) {
7328
+ throw nonRetryable5(`${this.name}: invalid relevance_score ${row.relevance_score} at index ${row.index}`);
7329
+ }
7330
+ return { localIndex: row.index, score: row.relevance_score };
7331
+ });
7332
+ }
7333
+ };
7334
+
7335
+ // ../rerank-openai-compat/dist/index.js
7336
+ var DEFAULT_MODEL3 = "bge-reranker-base";
7337
+ var DEFAULT_BATCH6 = 100;
7338
+ var DEFAULT_RETRIES6 = 6;
7339
+ var DEFAULT_BACKOFF6 = 2e3;
7340
+ var DEFAULT_RETRY_MAX3 = 6e4;
7341
+ var DEFAULT_TIMEOUT = 18e4;
7342
+ function parseRetryAfterMs3(value, now = Date.now()) {
7343
+ if (value === null)
7344
+ return null;
7345
+ const trimmed = value.trim();
7346
+ if (trimmed === "")
7347
+ return null;
7348
+ if (/^[0-9]+(\.[0-9]+)?$/.test(trimmed)) {
7349
+ const secs = Number(trimmed);
7350
+ if (!Number.isFinite(secs) || secs < 0)
7351
+ return null;
7352
+ return Math.round(secs * 1e3);
7353
+ }
7354
+ if (!/[A-Za-z]/.test(trimmed))
7355
+ return null;
7356
+ const ts = Date.parse(trimmed);
7357
+ if (!Number.isFinite(ts))
7358
+ return null;
7359
+ const delta = ts - now;
7360
+ return delta > 0 ? delta : 0;
7361
+ }
7362
+ function sleep6(ms, signal) {
7363
+ return new Promise((resolve6, reject) => {
7364
+ if (signal?.aborted === true) {
7365
+ reject(new Error("aborted"));
7366
+ return;
7367
+ }
7368
+ const timer = setTimeout(() => {
7369
+ signal?.removeEventListener("abort", onAbort);
7370
+ resolve6();
7371
+ }, ms);
7372
+ const onAbort = () => {
7373
+ clearTimeout(timer);
7374
+ reject(new Error("aborted"));
7375
+ };
7376
+ signal?.addEventListener("abort", onAbort, { once: true });
7377
+ });
7378
+ }
7379
+ function nonRetryable6(message) {
7380
+ const err = new Error(message);
7381
+ err.nonRetryable = true;
7382
+ return err;
7383
+ }
7384
+ var OpenAICompatRerankProvider = class {
7385
+ name;
7386
+ model;
7387
+ region;
7388
+ #baseUrl;
7389
+ #apiKey;
7390
+ #batchSize;
7391
+ #maxRetries;
7392
+ #retryBaseMs;
7393
+ #retryMaxMs;
7394
+ #timeoutMs;
7395
+ #fetch;
7396
+ constructor(opts) {
7397
+ if (typeof opts.baseUrl !== "string" || opts.baseUrl === "") {
7398
+ throw new Error("OpenAICompatRerankProvider: baseUrl is required");
7399
+ }
7400
+ if (opts.batchSize !== void 0 && (!Number.isInteger(opts.batchSize) || opts.batchSize <= 0 || opts.batchSize > 1e3)) {
7401
+ throw new Error(`OpenAICompatRerankProvider: batchSize must be an integer in 1..1000, got ${opts.batchSize}`);
7402
+ }
7403
+ if (opts.timeoutMs !== void 0 && (!Number.isInteger(opts.timeoutMs) || opts.timeoutMs < 0)) {
7404
+ throw new Error(`OpenAICompatRerankProvider: timeoutMs must be a non-negative integer (0 disables), got ${opts.timeoutMs}`);
7405
+ }
7406
+ this.model = opts.model ?? DEFAULT_MODEL3;
7407
+ this.name = opts.name ?? `openai-compat:${this.model}`;
7408
+ this.region = opts.region ?? "self-hosted";
7409
+ this.#baseUrl = opts.baseUrl.replace(/\/+$/, "");
7410
+ this.#apiKey = opts.apiKey === void 0 || opts.apiKey === "" ? void 0 : opts.apiKey;
7411
+ this.#batchSize = opts.batchSize ?? DEFAULT_BATCH6;
7412
+ this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES6;
7413
+ this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF6;
7414
+ this.#retryMaxMs = opts.retryMaxMs ?? DEFAULT_RETRY_MAX3;
7415
+ this.#timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT;
7416
+ this.#fetch = opts.fetch ?? fetch;
7417
+ }
7418
+ async rerank(query, candidates, opts) {
7419
+ if (candidates.length === 0)
7420
+ return [];
7421
+ const all = new Array(candidates.length);
7422
+ let cursor = 0;
7423
+ for (let start = 0; start < candidates.length; start += this.#batchSize) {
7424
+ const slice = candidates.slice(start, start + this.#batchSize);
7425
+ const scored = await this.#rerankBatch(query, slice, opts?.signal);
7426
+ for (const hit of scored) {
7427
+ const globalIndex = start + hit.localIndex;
7428
+ const cand = candidates[globalIndex];
7429
+ all[cursor++] = { id: cand.id, index: globalIndex, score: hit.score };
7430
+ }
7431
+ }
7432
+ all.sort((a, b) => b.score - a.score);
7433
+ if (opts?.topK !== void 0 && opts.topK >= 0 && opts.topK < all.length) {
7434
+ return all.slice(0, opts.topK);
7435
+ }
7436
+ return all;
7437
+ }
7438
+ async #rerankBatch(query, batch, signal) {
7439
+ const body = {
7440
+ query,
7441
+ documents: batch.map((c) => c.text),
7442
+ model: this.model,
7443
+ return_documents: false
7444
+ };
7445
+ const headers = { "content-type": "application/json" };
7446
+ if (this.#apiKey !== void 0)
7447
+ headers.authorization = `Bearer ${this.#apiKey}`;
7448
+ const payloadJson = JSON.stringify(body);
7449
+ let attempt = 0;
7450
+ let lastError = null;
7451
+ while (attempt <= this.#maxRetries) {
7452
+ const controller = new AbortController();
7453
+ let timedOut = false;
7454
+ let timer;
7455
+ if (this.#timeoutMs > 0) {
7456
+ timer = setTimeout(() => {
7457
+ timedOut = true;
7458
+ controller.abort();
7459
+ }, this.#timeoutMs);
7460
+ }
7461
+ const onParentAbort = () => controller.abort();
7462
+ if (signal !== void 0) {
7463
+ if (signal.aborted)
7464
+ controller.abort();
7465
+ else
7466
+ signal.addEventListener("abort", onParentAbort, { once: true });
7467
+ }
7468
+ const init = {
7469
+ method: "POST",
7470
+ headers,
7471
+ body: payloadJson,
7472
+ signal: controller.signal
7473
+ };
7474
+ try {
7475
+ const res = await this.#fetch(`${this.#baseUrl}/rerank`, init);
7476
+ if (res.status === 429 || res.status >= 500 && res.status < 600) {
7477
+ lastError = new Error(`${this.name}: HTTP ${res.status}`);
7478
+ attempt += 1;
7479
+ if (attempt > this.#maxRetries)
7480
+ break;
7481
+ const backoff = this.#computeBackoff(attempt, res.headers.get("retry-after"));
7482
+ await sleep6(backoff, signal);
7483
+ continue;
7484
+ }
7485
+ if (!res.ok) {
7486
+ const text = await res.text().catch(() => "");
7487
+ throw nonRetryable6(`${this.name}: HTTP ${res.status} ${res.statusText}${text === "" ? "" : ` \u2014 ${text}`}`);
7488
+ }
7489
+ const payload = await res.json();
7490
+ return this.#decode(payload, batch.length);
7491
+ } catch (err) {
7492
+ const isAbort = err?.name === "AbortError";
7493
+ if (isAbort && !timedOut)
7494
+ throw err;
7495
+ if (!isAbort && err?.nonRetryable === true)
7496
+ throw err;
7497
+ const normalized = timedOut ? new Error(`${this.name}: request timed out after ${this.#timeoutMs}ms`) : err;
7498
+ if (attempt >= this.#maxRetries)
7499
+ throw normalized;
7500
+ lastError = normalized;
7501
+ attempt += 1;
7502
+ await sleep6(this.#computeBackoff(attempt, null), signal);
7503
+ } finally {
7504
+ if (timer !== void 0)
7505
+ clearTimeout(timer);
7506
+ if (signal !== void 0)
7507
+ signal.removeEventListener("abort", onParentAbort);
7508
+ }
7509
+ }
7510
+ throw lastError instanceof Error ? lastError : new Error(`${this.name}: exhausted ${this.#maxRetries} retries`);
7511
+ }
7512
+ #computeBackoff(attempt, retryAfterHeader) {
7513
+ const exp = this.#retryBaseMs * 2 ** Math.max(0, attempt - 1);
7514
+ const advised = parseRetryAfterMs3(retryAfterHeader);
7515
+ const lower = advised === null ? exp : Math.max(exp, advised);
7516
+ return Math.min(lower, this.#retryMaxMs);
7517
+ }
7518
+ #decode(payload, expected) {
7519
+ if (!Array.isArray(payload.results) || payload.results.length !== expected) {
7520
+ throw nonRetryable6(`${this.name}: expected ${expected} rerank rows, got ${payload.results?.length ?? 0}`);
7521
+ }
7522
+ return payload.results.map((row) => {
7523
+ if (!Number.isInteger(row.index) || row.index < 0 || row.index >= expected) {
7524
+ throw nonRetryable6(`${this.name}: invalid index ${row.index} in rerank response`);
7525
+ }
7526
+ if (typeof row.relevance_score !== "number" || !Number.isFinite(row.relevance_score)) {
7527
+ throw nonRetryable6(`${this.name}: invalid relevance_score ${row.relevance_score} at index ${row.index}`);
7528
+ }
7529
+ return { localIndex: row.index, score: row.relevance_score };
7530
+ });
7531
+ }
7532
+ };
7533
+
6995
7534
  // dist/composition.js
6996
7535
  var RegionModeViolation = class extends Error {
6997
7536
  mode;
@@ -7227,6 +7766,56 @@ function discoverEmbeddingProvider(env, fetchImpl) {
7227
7766
  validateRegionMode(regionMode, picked.id, picked.provider.region);
7228
7767
  return { ...picked, regionMode };
7229
7768
  }
7769
+ function discoverRerankProvider(env, fetchImpl) {
7770
+ const regionMode = parseRegionMode(env.PROMETHEUS_REGION_MODE);
7771
+ const forced = env.PROMETHEUS_RERANK_PROVIDER?.toLowerCase() ?? "none";
7772
+ if (forced === "" || forced === "none")
7773
+ return { id: "none", provider: null };
7774
+ if (forced === "voyage") {
7775
+ const apiKey = env.VOYAGE_API_KEY;
7776
+ if (apiKey === void 0 || apiKey === "") {
7777
+ throw new NoProviderError(`rerank provider "voyage" requested but VOYAGE_API_KEY is missing`);
7778
+ }
7779
+ const model = env.VOYAGE_RERANK_MODEL ?? "rerank-2.5";
7780
+ const region = "us";
7781
+ if (regionMode !== "default") {
7782
+ throw new RegionModeViolation(regionMode, "voyage", region, regionMode === "eu-strict" ? ["nomic", "bge-m3", "mistral", "hash"] : ["nomic", "bge-m3", "hash"]);
7783
+ }
7784
+ const provider = new VoyageRerankProvider({
7785
+ name: "voyage-rerank",
7786
+ apiKey,
7787
+ model,
7788
+ region,
7789
+ baseUrl: env.VOYAGE_BASE_URL ?? "https://api.voyageai.com/v1",
7790
+ maxRetries: intEnv(env, "VOYAGE_RERANK_MAX_RETRIES", 6),
7791
+ retryBaseMs: intEnv(env, "VOYAGE_RERANK_RETRY_BASE_MS", 2e3),
7792
+ batchSize: intEnv(env, "VOYAGE_RERANK_BATCH", 100),
7793
+ ...fetchOpt(fetchImpl)
7794
+ });
7795
+ return { id: "voyage", provider };
7796
+ }
7797
+ if (forced === "bge" || forced === "generic") {
7798
+ const baseUrl = env.PROMETHEUS_RERANK_ENDPOINT;
7799
+ if (baseUrl === void 0 || baseUrl === "") {
7800
+ throw new NoProviderError(`rerank provider "${forced}" requested but PROMETHEUS_RERANK_ENDPOINT is missing`);
7801
+ }
7802
+ const model = env.PROMETHEUS_RERANK_MODEL ?? "bge-reranker-base";
7803
+ const provider = new OpenAICompatRerankProvider({
7804
+ name: env.PROMETHEUS_RERANK_NAME ?? `bge-rerank:${model}`,
7805
+ model,
7806
+ region: "self-hosted",
7807
+ baseUrl,
7808
+ maxRetries: intEnv(env, "PROMETHEUS_RERANK_MAX_RETRIES", 6),
7809
+ retryBaseMs: intEnv(env, "PROMETHEUS_RERANK_RETRY_BASE_MS", 2e3),
7810
+ batchSize: intEnv(env, "PROMETHEUS_RERANK_BATCH", 100),
7811
+ timeoutMs: intEnv(env, "PROMETHEUS_RERANK_TIMEOUT_MS", 18e4),
7812
+ ...apiKeyOpt(env.PROMETHEUS_RERANK_API_KEY),
7813
+ ...fetchOpt(fetchImpl)
7814
+ });
7815
+ return { id: "bge", provider };
7816
+ }
7817
+ throw new NoProviderError(`unknown PROMETHEUS_RERANK_PROVIDER="${forced}" (expected "none", "voyage", or "bge")`);
7818
+ }
7230
7819
  function getStableDbPath(workspaceRoot) {
7231
7820
  const abs = resolve4(workspaceRoot);
7232
7821
  const hash = createHash3("sha256").update(abs).digest("hex").slice(0, 16);
@@ -7310,6 +7899,8 @@ async function composeFromEnv(opts) {
7310
7899
  const { id: storageBackend, adapter: storage, dbPath } = discoverStorageBackend(env, regionMode, storageOptions);
7311
7900
  await storage.init();
7312
7901
  const retriever = new HybridRetriever({ storage, embedder });
7902
+ const { id: rerankId, provider: reranker } = discoverRerankProvider(env, opts.fetch);
7903
+ const rerankTopN = intEnv(env, "PROMETHEUS_RERANK_TOP_N", 100);
7313
7904
  const managed = apiKeyPresent && storageBackend === "sqlite";
7314
7905
  let closed = false;
7315
7906
  return {
@@ -7324,6 +7915,9 @@ async function composeFromEnv(opts) {
7324
7915
  storageBackend,
7325
7916
  managed,
7326
7917
  dbPath,
7918
+ reranker,
7919
+ rerankId,
7920
+ rerankTopN,
7327
7921
  async close() {
7328
7922
  if (closed)
7329
7923
  return;
@@ -7476,6 +8070,8 @@ var FRAMEWORK_MANIFESTS = [
7476
8070
  var MAX_K = 50;
7477
8071
  var DEFAULT_K2 = 10;
7478
8072
  var MAX_FILE_BYTES = 256 * 1024;
8073
+ var MAX_SNIPPET_BYTES = 1500;
8074
+ var RERANK_DOC_BYTES = 4096;
7479
8075
  function symbolToJson(s) {
7480
8076
  return {
7481
8077
  name: s.name,
@@ -7502,6 +8098,64 @@ function textResult(payload) {
7502
8098
  content: [{ type: "text", text: JSON.stringify(payload, null, 2) }]
7503
8099
  };
7504
8100
  }
8101
+ async function snippetForSymbol(workspaceRoot, symbol, cache, capBytes = MAX_SNIPPET_BYTES) {
8102
+ try {
8103
+ const relPath = symbol.filePath;
8104
+ if (isSensitivePath(relPath))
8105
+ return null;
8106
+ let buf = cache.get(relPath);
8107
+ if (buf === void 0) {
8108
+ const abs = resolveInWorkspace(workspaceRoot, relPath);
8109
+ buf = await readFile3(abs).catch(() => null);
8110
+ cache.set(relPath, buf);
8111
+ }
8112
+ if (buf === null)
8113
+ return null;
8114
+ const startByte = Math.max(0, symbol.range.startByte);
8115
+ const endByte = Math.min(symbol.range.endByte, buf.byteLength);
8116
+ if (!(endByte > startByte))
8117
+ return null;
8118
+ const full = buf.subarray(startByte, endByte);
8119
+ const truncated = full.byteLength > capBytes;
8120
+ const view = truncated ? full.subarray(0, capBytes) : full;
8121
+ return { text: view.toString("utf8"), truncated };
8122
+ } catch {
8123
+ return null;
8124
+ }
8125
+ }
8126
+ async function rerankHead(reranker, query, head, workspaceRoot, cache) {
8127
+ const candidates = [];
8128
+ for (let i = 0; i < head.length; i++) {
8129
+ const snip = await snippetForSymbol(workspaceRoot, head[i].symbol, cache, RERANK_DOC_BYTES);
8130
+ if (snip === null)
8131
+ continue;
8132
+ candidates.push({ id: String(i), text: snip.text });
8133
+ }
8134
+ if (candidates.length === 0)
8135
+ return null;
8136
+ let hits;
8137
+ try {
8138
+ hits = await reranker.rerank(query, candidates, { topK: candidates.length });
8139
+ } catch {
8140
+ return null;
8141
+ }
8142
+ const out = [];
8143
+ const seen = /* @__PURE__ */ new Set();
8144
+ for (const hit of hits) {
8145
+ const i = Number(hit.id);
8146
+ if (!Number.isFinite(i) || seen.has(i))
8147
+ continue;
8148
+ seen.add(i);
8149
+ const r = head[i];
8150
+ if (r !== void 0)
8151
+ out.push(r);
8152
+ }
8153
+ for (let i = 0; i < head.length; i++) {
8154
+ if (!seen.has(i))
8155
+ out.push(head[i]);
8156
+ }
8157
+ return out;
8158
+ }
7505
8159
  function resolveInWorkspace(workspaceRoot, input) {
7506
8160
  if (input === "")
7507
8161
  throw new Error("path must not be empty.");
@@ -7541,7 +8195,8 @@ function clampK(k) {
7541
8195
  }
7542
8196
  var searchInput = {
7543
8197
  query: z.string().min(1, "query must not be empty"),
7544
- k: z.number().int().positive().max(MAX_K).optional()
8198
+ k: z.number().int().positive().max(MAX_K).optional(),
8199
+ includeSnippet: z.boolean().optional()
7545
8200
  };
7546
8201
  var lookupInput = {
7547
8202
  name: z.string().min(1, "name must not be empty"),
@@ -7573,23 +8228,41 @@ var changedSinceInput = {
7573
8228
  };
7574
8229
  var emptyInput = {};
7575
8230
  function registerTools(server, deps) {
7576
- const { storage, retriever, workspaceRoot, workspaceId, workspaceName, regionMode, providerId, storageBackend } = deps;
8231
+ const { storage, retriever, workspaceRoot, workspaceId, workspaceName, regionMode, providerId, storageBackend, reranker, rerankTopN } = deps;
7577
8232
  server.registerTool("search_code", {
7578
8233
  title: "Hybrid code search",
7579
- description: "Hybrid retrieval over the indexed workspace (lexical FTS + vector + symbol graph) fused with RRF. Returns the top-k symbols with provenance per source.",
8234
+ description: "PRIMARY code search for this workspace \u2014 call this FIRST to find where something is defined, used or implemented, before reading files or guessing paths. Hybrid retrieval (lexical FTS + vector + symbol graph, RRF-fused) over natural-language or symbol queries. Returns the top-k symbols with provenance AND an inline source snippet per hit, so the result is usually actionable without a follow-up get_file. Set `includeSnippet: false` to omit the inline code (symbols only).",
7580
8235
  inputSchema: searchInput
7581
8236
  }, async (args) => {
7582
8237
  const k = clampK(args.k);
7583
- const results = await retriever.search(args.query, { k });
7584
- return textResult({
7585
- query: args.query,
7586
- k,
7587
- results: results.map((r) => ({
8238
+ const includeSnippet = args.includeSnippet ?? true;
8239
+ const cache = /* @__PURE__ */ new Map();
8240
+ const poolK = reranker ? Math.max(k, rerankTopN ?? 100) : k;
8241
+ const pool = await retriever.search(args.query, { k: poolK });
8242
+ let ordered = pool;
8243
+ let reranked = false;
8244
+ if (reranker && pool.length > 0) {
8245
+ const head = pool.slice(0, rerankTopN ?? 100);
8246
+ const tail = pool.slice(rerankTopN);
8247
+ const reorderedHead = await rerankHead(reranker, args.query, head, workspaceRoot, cache);
8248
+ if (reorderedHead !== null) {
8249
+ ordered = reorderedHead.concat(tail);
8250
+ reranked = true;
8251
+ }
8252
+ }
8253
+ const results = ordered.slice(0, k);
8254
+ const mapped = await Promise.all(results.map(async (r) => {
8255
+ const base = {
7588
8256
  score: r.score,
7589
8257
  provenance: r.provenance,
7590
8258
  symbol: symbolToJson(r.symbol)
7591
- }))
7592
- });
8259
+ };
8260
+ if (!includeSnippet)
8261
+ return base;
8262
+ const snip = await snippetForSymbol(workspaceRoot, r.symbol, cache);
8263
+ return snip === null ? base : { ...base, snippet: snip.text, snippetTruncated: snip.truncated };
8264
+ }));
8265
+ return textResult({ query: args.query, k, reranked, results: mapped });
7593
8266
  });
7594
8267
  server.registerTool("get_symbol", {
7595
8268
  title: "Exact symbol lookup",
@@ -7656,6 +8329,9 @@ function registerTools(server, deps) {
7656
8329
  inputSchema: getFileInput
7657
8330
  }, async (args) => {
7658
8331
  const abs = resolveInWorkspace(workspaceRoot, args.path);
8332
+ if (isSensitivePath(relative(workspaceRoot, abs))) {
8333
+ throw new Error(`${SENSITIVE_PATH_ERROR}: "${args.path}".`);
8334
+ }
7659
8335
  const buf = await readFile3(abs);
7660
8336
  const start = args.startByte ?? 0;
7661
8337
  const end = args.endByte ?? buf.byteLength;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@prom.codes/context-mcp",
3
- "version": "0.1.0",
3
+ "version": "0.2.1",
4
4
  "description": "Prometheus Context Engine — local-first codebase indexing & retrieval as an MCP server.",
5
5
  "type": "module",
6
6
  "bin": {