@prom.codes/context-mcp 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +9 -0
  2. package/dist/bin.js +274 -71
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -25,4 +25,13 @@ your workspace into a local SQLite database (`~/.prometheus/<hash>.db`)
25
25
  and embeds via the Prometheus API. Your code never leaves your machine —
26
26
  only embedding text transits to the API.
27
27
 
28
+ ## Native modules
29
+
30
+ Uses native Tree-sitter grammars + `better-sqlite3` for parsing and storage.
31
+ Prebuilt binaries are fetched automatically on the mainstream platforms
32
+ (macOS x64/arm64, Linux x64, Windows x64) — no compiler needed. On other
33
+ platforms (e.g. Linux/Windows arm64) or a Node ABI without a prebuild, install
34
+ C/C++ build tools so the native modules can compile (Windows: VS Build Tools).
35
+ Requires Node ≥ 20.10.
36
+
28
37
  Docs: https://prom.codes/docs
package/dist/bin.js CHANGED
@@ -3354,6 +3354,116 @@ async function runEmbedPass(storage, embedder, options, driftRecovered) {
3354
3354
  import chokidar from "chokidar";
3355
3355
  import { resolve, sep } from "node:path";
3356
3356
  import { EventEmitter } from "node:events";
3357
+
3358
+ // ../shared/dist/types.js
3359
+ var GRAMMAR_LANGUAGE_IDS = [
3360
+ "typescript",
3361
+ "tsx",
3362
+ "javascript",
3363
+ "python",
3364
+ "php",
3365
+ "go",
3366
+ "rust",
3367
+ "java",
3368
+ "csharp",
3369
+ "c",
3370
+ "cpp",
3371
+ "ruby",
3372
+ "kotlin",
3373
+ "html"
3374
+ ];
3375
+ var DOCUMENT_LANGUAGE_IDS = [
3376
+ "markdown",
3377
+ "text",
3378
+ "json",
3379
+ "yaml",
3380
+ "toml"
3381
+ ];
3382
+ var LANGUAGE_IDS = [
3383
+ ...GRAMMAR_LANGUAGE_IDS,
3384
+ ...DOCUMENT_LANGUAGE_IDS
3385
+ ];
3386
+ var EDGE_TYPES = [
3387
+ "defines",
3388
+ "calls",
3389
+ "imports",
3390
+ "same-file",
3391
+ "co-change"
3392
+ ];
3393
+
3394
+ // ../shared/dist/sensitive-paths.js
3395
+ var SENSITIVE_DIRECTORIES = /* @__PURE__ */ new Set([
3396
+ "secrets",
3397
+ "credentials",
3398
+ ".aws",
3399
+ ".ssh",
3400
+ ".gnupg"
3401
+ ]);
3402
+ var SENSITIVE_BASENAMES = /* @__PURE__ */ new Set([
3403
+ ".npmrc",
3404
+ ".pypirc",
3405
+ ".netrc",
3406
+ ".pgpass",
3407
+ "known_hosts",
3408
+ // Project-specific: CLAUDE.md carries live credentials by convention.
3409
+ "claude.md"
3410
+ ]);
3411
+ var SENSITIVE_EXTENSIONS = [
3412
+ ".pem",
3413
+ ".key",
3414
+ ".p12",
3415
+ ".pfx",
3416
+ ".jks",
3417
+ ".keystore",
3418
+ ".ppk"
3419
+ ];
3420
+ var ENV_TEMPLATE_BASENAMES = /* @__PURE__ */ new Set([
3421
+ ".env.example",
3422
+ ".env.sample",
3423
+ ".env.template"
3424
+ ]);
3425
+ function isSensitivePath(path) {
3426
+ const normalised = path.replace(/\\/g, "/").replace(/^\/+|\/+$/g, "");
3427
+ if (normalised === "")
3428
+ return false;
3429
+ const segments = normalised.split("/").filter((s) => s !== "" && s !== ".").map((s) => s.toLowerCase());
3430
+ if (segments.length === 0)
3431
+ return false;
3432
+ for (const segment of segments) {
3433
+ if (SENSITIVE_DIRECTORIES.has(segment))
3434
+ return true;
3435
+ }
3436
+ const base = segments[segments.length - 1];
3437
+ if (SENSITIVE_BASENAMES.has(base))
3438
+ return true;
3439
+ if (base === ".env")
3440
+ return true;
3441
+ if (base.startsWith(".env.") && !ENV_TEMPLATE_BASENAMES.has(base))
3442
+ return true;
3443
+ for (const ext of SENSITIVE_EXTENSIONS) {
3444
+ if (base.endsWith(ext))
3445
+ return true;
3446
+ }
3447
+ if (base.startsWith("id_rsa") || base.startsWith("id_ed25519"))
3448
+ return true;
3449
+ if (/\.tfstate(\.|$)/.test(base))
3450
+ return true;
3451
+ if (base.endsWith(".json")) {
3452
+ if (base.startsWith("serviceaccount"))
3453
+ return true;
3454
+ if (base.startsWith("gcp-"))
3455
+ return true;
3456
+ if (base.endsWith("-credentials.json"))
3457
+ return true;
3458
+ }
3459
+ return false;
3460
+ }
3461
+ var SENSITIVE_PATH_ERROR = "path matches the sensitive-file deny-list";
3462
+
3463
+ // ../shared/dist/index.js
3464
+ var PROMETHEUS_VERSION = "0.1.0";
3465
+
3466
+ // ../indexer/dist/watcher.js
3357
3467
  var DEFAULT_IGNORED = [
3358
3468
  /(^|[/\\])\.git([/\\]|$)/,
3359
3469
  /(^|[/\\])node_modules([/\\]|$)/,
@@ -3389,8 +3499,12 @@ var WorkspaceWatcher = class extends EventEmitter {
3389
3499
  async start() {
3390
3500
  if (this.#watcher !== null)
3391
3501
  return;
3502
+ const denySensitive = (abs) => {
3503
+ const rel = toRelative(this.#root, abs);
3504
+ return rel !== abs && isSensitivePath(rel);
3505
+ };
3392
3506
  const watcher = chokidar.watch(this.#root, {
3393
- ignored: this.#ignored,
3507
+ ignored: [...this.#ignored, denySensitive],
3394
3508
  ignoreInitial: false,
3395
3509
  persistent: true,
3396
3510
  awaitWriteFinish: { stabilityThreshold: 50, pollInterval: 10 }
@@ -3415,6 +3529,8 @@ var WorkspaceWatcher = class extends EventEmitter {
3415
3529
  this.#watcher = null;
3416
3530
  }
3417
3531
  #schedule(kind, abs) {
3532
+ if (isSensitivePath(toRelative(this.#root, abs)))
3533
+ return;
3418
3534
  const key = `${kind}::${abs}`;
3419
3535
  const existing = this.#pending.get(key);
3420
3536
  if (existing !== void 0)
@@ -3728,6 +3844,8 @@ var WorkspaceIndexer = class {
3728
3844
  }
3729
3845
  }
3730
3846
  #isIgnored(relPath) {
3847
+ if (isSensitivePath(relPath))
3848
+ return true;
3731
3849
  const patterns = this.#ignored ?? DEFAULT_IGNORED_PATTERNS;
3732
3850
  for (const pat of patterns) {
3733
3851
  if (pat instanceof RegExp) {
@@ -3807,7 +3925,10 @@ var WorkspaceIndexer = class {
3807
3925
  hasParseErrors: parsed.hasParseErrors
3808
3926
  },
3809
3927
  symbols: parsed.symbols,
3810
- references: parsed.references
3928
+ references: parsed.references,
3929
+ // Full source so the SQLite adapter can store per-symbol body
3930
+ // text for the full-text lexical channel (sliced by byte offset).
3931
+ source
3811
3932
  });
3812
3933
  return "indexed";
3813
3934
  } catch (err) {
@@ -3836,47 +3957,8 @@ import { basename as basename5, dirname as dirname2, join as join3, resolve as r
3836
3957
  // ../storage-sqlite/dist/adapter.js
3837
3958
  import Database from "better-sqlite3";
3838
3959
 
3839
- // ../shared/dist/types.js
3840
- var GRAMMAR_LANGUAGE_IDS = [
3841
- "typescript",
3842
- "tsx",
3843
- "javascript",
3844
- "python",
3845
- "php",
3846
- "go",
3847
- "rust",
3848
- "java",
3849
- "csharp",
3850
- "c",
3851
- "cpp",
3852
- "ruby",
3853
- "kotlin",
3854
- "html"
3855
- ];
3856
- var DOCUMENT_LANGUAGE_IDS = [
3857
- "markdown",
3858
- "text",
3859
- "json",
3860
- "yaml",
3861
- "toml"
3862
- ];
3863
- var LANGUAGE_IDS = [
3864
- ...GRAMMAR_LANGUAGE_IDS,
3865
- ...DOCUMENT_LANGUAGE_IDS
3866
- ];
3867
- var EDGE_TYPES = [
3868
- "defines",
3869
- "calls",
3870
- "imports",
3871
- "same-file",
3872
- "co-change"
3873
- ];
3874
-
3875
- // ../shared/dist/index.js
3876
- var PROMETHEUS_VERSION = "0.1.0";
3877
-
3878
3960
  // ../storage-sqlite/dist/schema.js
3879
- var SCHEMA_VERSION = 3;
3961
+ var SCHEMA_VERSION = 4;
3880
3962
  var SCHEMA_STATEMENTS = [
3881
3963
  `PRAGMA journal_mode = WAL`,
3882
3964
  `PRAGMA foreign_keys = ON`,
@@ -3907,7 +3989,8 @@ var SCHEMA_STATEMENTS = [
3907
3989
  end_row INTEGER NOT NULL,
3908
3990
  end_col INTEGER NOT NULL,
3909
3991
  start_byte INTEGER NOT NULL,
3910
- end_byte INTEGER NOT NULL
3992
+ end_byte INTEGER NOT NULL,
3993
+ body TEXT
3911
3994
  )`,
3912
3995
  `CREATE TABLE IF NOT EXISTS refs (
3913
3996
  id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -3937,25 +4020,27 @@ var SCHEMA_STATEMENTS = [
3937
4020
  `CREATE INDEX IF NOT EXISTS idx_refs_name_nocase ON refs(name COLLATE NOCASE)`,
3938
4021
  // FTS5 virtual table: external-content design — actual rows live in
3939
4022
  // `symbols`, FTS holds only the tokenised index. Triggers below keep
3940
- // the FTS index in sync so we never write to it directly.
4023
+ // the FTS index in sync so we never write to it directly. The `body`
4024
+ // column (version 4) carries each symbol's source slice, so the lexical
4025
+ // channel matches symbol *source text*, not just name + container.
3941
4026
  `CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
3942
- name, container,
4027
+ name, container, body,
3943
4028
  content='symbols', content_rowid='id',
3944
4029
  tokenize='unicode61 remove_diacritics 2'
3945
4030
  )`,
3946
4031
  `CREATE TRIGGER IF NOT EXISTS symbols_ai AFTER INSERT ON symbols BEGIN
3947
- INSERT INTO symbols_fts(rowid, name, container)
3948
- VALUES (new.id, new.name, COALESCE(new.container, ''));
4032
+ INSERT INTO symbols_fts(rowid, name, container, body)
4033
+ VALUES (new.id, new.name, COALESCE(new.container, ''), COALESCE(new.body, ''));
3949
4034
  END`,
3950
4035
  `CREATE TRIGGER IF NOT EXISTS symbols_ad AFTER DELETE ON symbols BEGIN
3951
- INSERT INTO symbols_fts(symbols_fts, rowid, name, container)
3952
- VALUES('delete', old.id, old.name, COALESCE(old.container, ''));
4036
+ INSERT INTO symbols_fts(symbols_fts, rowid, name, container, body)
4037
+ VALUES('delete', old.id, old.name, COALESCE(old.container, ''), COALESCE(old.body, ''));
3953
4038
  END`,
3954
4039
  `CREATE TRIGGER IF NOT EXISTS symbols_au AFTER UPDATE ON symbols BEGIN
3955
- INSERT INTO symbols_fts(symbols_fts, rowid, name, container)
3956
- VALUES('delete', old.id, old.name, COALESCE(old.container, ''));
3957
- INSERT INTO symbols_fts(rowid, name, container)
3958
- VALUES (new.id, new.name, COALESCE(new.container, ''));
4040
+ INSERT INTO symbols_fts(symbols_fts, rowid, name, container, body)
4041
+ VALUES('delete', old.id, old.name, COALESCE(old.container, ''), COALESCE(old.body, ''));
4042
+ INSERT INTO symbols_fts(rowid, name, container, body)
4043
+ VALUES (new.id, new.name, COALESCE(new.container, ''), COALESCE(new.body, ''));
3959
4044
  END`,
3960
4045
  // Phase 2.15.8 — co-change pairs (file-level historical coupling).
3961
4046
  // Pairs are canonical (`file_a < file_b`) so each unordered pair has
@@ -3981,6 +4066,7 @@ var SCHEMA_STATEMENTS = [
3981
4066
  ];
3982
4067
 
3983
4068
  // ../storage-sqlite/dist/adapter.js
4069
+ var MAX_BODY_BYTES = 8e3;
3984
4070
  function vectorToBlob(v) {
3985
4071
  return Buffer.from(v.buffer, v.byteOffset, v.byteLength);
3986
4072
  }
@@ -4054,6 +4140,14 @@ var SqliteStorageAdapter = class {
4054
4140
  #options;
4055
4141
  #db = null;
4056
4142
  #stmts = null;
4143
+ /**
4144
+ * Lazily-prepared, weight-keyed FTS statements for the adaptive
4145
+ * column-weighting path (see {@link searchByText}). The default
4146
+ * (equal-weight) statement lives in {@link PreparedStatements.searchFts};
4147
+ * non-default weight triples get one cached statement each here. Cleared
4148
+ * on `close` together with the handle.
4149
+ */
4150
+ #weightedFts = /* @__PURE__ */ new Map();
4057
4151
  constructor(options) {
4058
4152
  this.#options = options;
4059
4153
  }
@@ -4070,6 +4164,16 @@ var SqliteStorageAdapter = class {
4070
4164
  }
4071
4165
  const hasMeta = db.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='meta'`).get();
4072
4166
  const priorVersion = hasMeta ? Number(db.prepare(`SELECT value FROM meta WHERE key = 'schema_version'`).get()?.value ?? 0) : 0;
4167
+ if (priorVersion > 0 && priorVersion < 4) {
4168
+ const cols = db.prepare(`PRAGMA table_info(symbols)`).all();
4169
+ if (!cols.some((c) => c.name === "body")) {
4170
+ db.exec(`ALTER TABLE symbols ADD COLUMN body TEXT`);
4171
+ }
4172
+ db.exec(`DROP TRIGGER IF EXISTS symbols_ai;
4173
+ DROP TRIGGER IF EXISTS symbols_ad;
4174
+ DROP TRIGGER IF EXISTS symbols_au;
4175
+ DROP TABLE IF EXISTS symbols_fts;`);
4176
+ }
4073
4177
  for (const ddl of SCHEMA_STATEMENTS)
4074
4178
  db.exec(ddl);
4075
4179
  if (priorVersion < SCHEMA_VERSION) {
@@ -4087,6 +4191,7 @@ var SqliteStorageAdapter = class {
4087
4191
  this.#db.close();
4088
4192
  this.#db = null;
4089
4193
  this.#stmts = null;
4194
+ this.#weightedFts.clear();
4090
4195
  }
4091
4196
  async getFileHash(path) {
4092
4197
  const row = this.#requireStmts().getFileHash.get(path);
@@ -4104,11 +4209,12 @@ var SqliteStorageAdapter = class {
4104
4209
  async upsertFile(payload) {
4105
4210
  const db = this.#requireDb();
4106
4211
  const stmts = this.#requireStmts();
4212
+ const sourceBuf = payload.source === void 0 ? null : Buffer.from(payload.source, "utf8");
4107
4213
  const tx = db.transaction((p) => {
4108
4214
  stmts.deleteFile.run(p.file.path);
4109
4215
  stmts.insertFile.run(p.file.path, p.file.language, p.file.contentHash, p.file.size, p.file.mtimeMs, p.file.indexedAt, p.file.hasParseErrors ? 1 : 0);
4110
4216
  for (const s of p.symbols)
4111
- this.#insertSymbol(p.file.path, s, stmts);
4217
+ this.#insertSymbol(p.file.path, s, sourceBuf, stmts);
4112
4218
  for (const r of p.references)
4113
4219
  this.#insertRef(p.file.path, r, stmts);
4114
4220
  });
@@ -4238,18 +4344,53 @@ var SqliteStorageAdapter = class {
4238
4344
  vector: byKey.get(`${r.filePath}\0${r.chunkIndex}`) ?? null
4239
4345
  }));
4240
4346
  }
4241
- async searchByText(query, limit) {
4347
+ async searchByText(query, limit, options) {
4242
4348
  if (limit <= 0)
4243
4349
  return [];
4244
4350
  const sanitised = sanitiseFtsQuery(query);
4245
4351
  if (sanitised === "")
4246
4352
  return [];
4247
- const rows = this.#requireStmts().searchFts.all(sanitised, limit);
4353
+ const stmt = this.#ftsStmt(options?.columnWeights);
4354
+ const rows = stmt.all(sanitised, limit);
4248
4355
  return rows.map((row) => ({
4249
4356
  symbol: rowToStoredSymbol(row),
4250
4357
  score: row.bm25_score
4251
4358
  }));
4252
4359
  }
4360
+ /**
4361
+ * Pick the FTS statement for a given `[name, container, body]` BM25
4362
+ * column-weight triple. No weights (or an all-1 triple) returns the
4363
+ * cached default statement — byte-identical, equal-weight ranking. A
4364
+ * non-default triple gets a lazily-prepared, cached statement whose
4365
+ * `bm25()` carries the weights as literal arguments. The weights are
4366
+ * internal, validated finite numbers (never user input), so inlining
4367
+ * them in the SQL is injection-safe; FTS5 requires them as function
4368
+ * arguments, not bindable parameters.
4369
+ */
4370
+ #ftsStmt(weights) {
4371
+ if (weights === void 0)
4372
+ return this.#requireStmts().searchFts;
4373
+ for (const w of weights) {
4374
+ if (!Number.isFinite(w) || w < 0) {
4375
+ throw new Error(`searchByText: columnWeights must be finite and >= 0, got [${weights.join(", ")}]`);
4376
+ }
4377
+ }
4378
+ const [n, c, b] = weights;
4379
+ if (n === 1 && c === 1 && b === 1)
4380
+ return this.#requireStmts().searchFts;
4381
+ const key = `${n},${c},${b}`;
4382
+ const cached = this.#weightedFts.get(key);
4383
+ if (cached !== void 0)
4384
+ return cached;
4385
+ const stmt = this.#requireDb().prepare(`SELECT s.*, bm25(symbols_fts, ${n}, ${c}, ${b}) AS bm25_score
4386
+ FROM symbols_fts
4387
+ JOIN symbols s ON s.id = symbols_fts.rowid
4388
+ WHERE symbols_fts MATCH ?
4389
+ ORDER BY bm25_score
4390
+ LIMIT ?`);
4391
+ this.#weightedFts.set(key, stmt);
4392
+ return stmt;
4393
+ }
4253
4394
  async expandSymbolGraph(symbolName, limit, options) {
4254
4395
  if (limit <= 0)
4255
4396
  return [];
@@ -4353,8 +4494,9 @@ var SqliteStorageAdapter = class {
4353
4494
  commitCount: r.commit_count
4354
4495
  }));
4355
4496
  }
4356
- #insertSymbol(filePath, s, stmts) {
4357
- stmts.insertSymbol.run(filePath, s.name, s.kind, s.language, s.container, s.exported ? 1 : 0, s.range.start.row, s.range.start.column, s.range.end.row, s.range.end.column, s.range.startByte, s.range.endByte);
4497
+ #insertSymbol(filePath, s, sourceBuf, stmts) {
4498
+ const body = sourceBuf === null ? null : sourceBuf.subarray(s.range.startByte, Math.min(s.range.endByte, s.range.startByte + MAX_BODY_BYTES)).toString("utf8");
4499
+ stmts.insertSymbol.run(filePath, s.name, s.kind, s.language, s.container, s.exported ? 1 : 0, s.range.start.row, s.range.start.column, s.range.end.row, s.range.end.column, s.range.startByte, s.range.endByte, body);
4358
4500
  }
4359
4501
  #insertRef(filePath, r, stmts) {
4360
4502
  stmts.insertRef.run(filePath, r.name, r.kind, r.fromContainer, r.moduleSpecifier, r.range.start.row, r.range.start.column, r.range.end.row, r.range.end.column, r.range.startByte, r.range.endByte);
@@ -4378,8 +4520,9 @@ var SqliteStorageAdapter = class {
4378
4520
  insertFile: db.prepare(`INSERT INTO files(path, language, content_hash, size, mtime_ms, indexed_at, has_parse_errors)
4379
4521
  VALUES(?, ?, ?, ?, ?, ?, ?)`),
4380
4522
  insertSymbol: db.prepare(`INSERT INTO symbols(file_path, name, kind, language, container, exported,
4381
- start_row, start_col, end_row, end_col, start_byte, end_byte)
4382
- VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
4523
+ start_row, start_col, end_row, end_col, start_byte, end_byte,
4524
+ body)
4525
+ VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
4383
4526
  insertRef: db.prepare(`INSERT INTO refs(file_path, name, kind, from_container, module_specifier,
4384
4527
  start_row, start_col, end_row, end_col, start_byte, end_byte)
4385
4528
  VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
@@ -4403,6 +4546,14 @@ var SqliteStorageAdapter = class {
4403
4546
  WHERE s.file_path = ?
4404
4547
  ORDER BY s.id
4405
4548
  LIMIT 1 OFFSET ?`),
4549
+ // BM25 over (name, container, body). Default (equal) column weights:
4550
+ // SWE-bench django showed name-dominant weights (10/5/1) regress
4551
+ // localization vs equal weights — common name tokens ("field",
4552
+ // "model") then crowd out the specific body match that pinpoints the
4553
+ // right file. The body column (Schema v4) lets lexical search match a
4554
+ // symbol's source text, which lifts file localization (bm25-only
4555
+ // any@5 25→50 on django). Column weighting is left as a future tuning
4556
+ // knob if a name-vs-body balance proves worthwhile per-corpus.
4406
4557
  searchFts: db.prepare(`SELECT s.*, bm25(symbols_fts) AS bm25_score
4407
4558
  FROM symbols_fts
4408
4559
  JOIN symbols s ON s.id = symbols_fts.rowid
@@ -5109,7 +5260,7 @@ var SupabaseStorageAdapter = class {
5109
5260
  }));
5110
5261
  });
5111
5262
  }
5112
- async searchByText(query, limit) {
5263
+ async searchByText(query, limit, _options) {
5113
5264
  if (limit <= 0)
5114
5265
  return [];
5115
5266
  const tokens = query.toLowerCase().split(/[^a-z0-9_]+/u).filter((t) => t.length >= 2);
@@ -5945,7 +6096,8 @@ var HybridRetriever = class {
5945
6096
  const activeEdgeTypes = EDGE_TYPES.filter((t) => effectiveEdgeWeights[t] > 0);
5946
6097
  const staticEdgeTypes = activeEdgeTypes.filter((t) => t !== CO_CHANGE);
5947
6098
  const coChangeActive = effectiveEdgeWeights[CO_CHANGE] > 0;
5948
- const lexicalPromise = wLex > 0 ? this.#storage.searchByText(trimmed, candidateLimit) : Promise.resolve([]);
6099
+ const lexCols = options.lexicalColumnWeights;
6100
+ const lexicalPromise = wLex > 0 ? this.#storage.searchByText(trimmed, candidateLimit, lexCols !== void 0 ? { columnWeights: lexCols } : void 0) : Promise.resolve([]);
5949
6101
  const vectorPromise = wVec > 0 ? this.#runVector(trimmed, candidateLimit, options.signal) : Promise.resolve({ hits: [], queryVector: null });
5950
6102
  const [lexicalHits, vectorResult] = await Promise.all([
5951
6103
  lexicalPromise,
@@ -5959,7 +6111,23 @@ var HybridRetriever = class {
5959
6111
  if (wLex > 0)
5960
6112
  firstPassLists.push({ ...lexicalAsList(lexicalHits), weight: wLex });
5961
6113
  const firstPass = reciprocalRankFusion(firstPassLists, { k: rrfK });
5962
- const seedSymbols = wGraph > 0 && expandN > 0 && firstPass.length > 0 ? firstPass.slice(0, expandN).map((r) => r.payload) : [];
6114
+ let seedSymbols = [];
6115
+ if (wGraph > 0 && expandN > 0) {
6116
+ const seedCols = options.graphSeedColumnWeights;
6117
+ if (seedCols !== void 0 && wLex > 0) {
6118
+ const seedLexHits = await this.#storage.searchByText(trimmed, candidateLimit, {
6119
+ columnWeights: seedCols
6120
+ });
6121
+ const seedLists = [];
6122
+ if (wVec > 0)
6123
+ seedLists.push({ ...vectorAsList(vectorHits), weight: wVec });
6124
+ seedLists.push({ ...lexicalAsList(seedLexHits), weight: wLex });
6125
+ const seedPass = reciprocalRankFusion(seedLists, { k: rrfK });
6126
+ seedSymbols = seedPass.slice(0, expandN).map((r) => r.payload);
6127
+ } else if (firstPass.length > 0) {
6128
+ seedSymbols = firstPass.slice(0, expandN).map((r) => r.payload);
6129
+ }
6130
+ }
5963
6131
  const graphPromise = seedSymbols.length > 0 && staticEdgeTypes.length > 0 ? this.#runGraphExpansion(seedSymbols, candidateLimit, staticEdgeTypes, maxDepth, adaptiveThreshold) : Promise.resolve(/* @__PURE__ */ new Map());
5964
6132
  const coChangePromise = coChangeActive && seedSymbols.length > 0 && ccFilesPerSeed > 0 && ccPerFileCap > 0 ? this.#runCoChangeStage(seedSymbols, ccFilesPerSeed, ccPerFileCap, ccMinWeight) : Promise.resolve([]);
5965
6133
  const [graphBuckets, coChangeSymbols] = await Promise.all([
@@ -7476,6 +7644,7 @@ var FRAMEWORK_MANIFESTS = [
7476
7644
  var MAX_K = 50;
7477
7645
  var DEFAULT_K2 = 10;
7478
7646
  var MAX_FILE_BYTES = 256 * 1024;
7647
+ var MAX_SNIPPET_BYTES = 1500;
7479
7648
  function symbolToJson(s) {
7480
7649
  return {
7481
7650
  name: s.name,
@@ -7502,6 +7671,31 @@ function textResult(payload) {
7502
7671
  content: [{ type: "text", text: JSON.stringify(payload, null, 2) }]
7503
7672
  };
7504
7673
  }
7674
+ async function snippetForSymbol(workspaceRoot, symbol, cache) {
7675
+ try {
7676
+ const relPath = symbol.filePath;
7677
+ if (isSensitivePath(relPath))
7678
+ return null;
7679
+ let buf = cache.get(relPath);
7680
+ if (buf === void 0) {
7681
+ const abs = resolveInWorkspace(workspaceRoot, relPath);
7682
+ buf = await readFile3(abs).catch(() => null);
7683
+ cache.set(relPath, buf);
7684
+ }
7685
+ if (buf === null)
7686
+ return null;
7687
+ const startByte = Math.max(0, symbol.range.startByte);
7688
+ const endByte = Math.min(symbol.range.endByte, buf.byteLength);
7689
+ if (!(endByte > startByte))
7690
+ return null;
7691
+ const full = buf.subarray(startByte, endByte);
7692
+ const truncated = full.byteLength > MAX_SNIPPET_BYTES;
7693
+ const view = truncated ? full.subarray(0, MAX_SNIPPET_BYTES) : full;
7694
+ return { text: view.toString("utf8"), truncated };
7695
+ } catch {
7696
+ return null;
7697
+ }
7698
+ }
7505
7699
  function resolveInWorkspace(workspaceRoot, input) {
7506
7700
  if (input === "")
7507
7701
  throw new Error("path must not be empty.");
@@ -7541,7 +7735,8 @@ function clampK(k) {
7541
7735
  }
7542
7736
  var searchInput = {
7543
7737
  query: z.string().min(1, "query must not be empty"),
7544
- k: z.number().int().positive().max(MAX_K).optional()
7738
+ k: z.number().int().positive().max(MAX_K).optional(),
7739
+ includeSnippet: z.boolean().optional()
7545
7740
  };
7546
7741
  var lookupInput = {
7547
7742
  name: z.string().min(1, "name must not be empty"),
@@ -7576,20 +7771,25 @@ function registerTools(server, deps) {
7576
7771
  const { storage, retriever, workspaceRoot, workspaceId, workspaceName, regionMode, providerId, storageBackend } = deps;
7577
7772
  server.registerTool("search_code", {
7578
7773
  title: "Hybrid code search",
7579
- description: "Hybrid retrieval over the indexed workspace (lexical FTS + vector + symbol graph) fused with RRF. Returns the top-k symbols with provenance per source.",
7774
+ description: "PRIMARY code search for this workspace \u2014 call this FIRST to find where something is defined, used or implemented, before reading files or guessing paths. Hybrid retrieval (lexical FTS + vector + symbol graph, RRF-fused) over natural-language or symbol queries. Returns the top-k symbols with provenance AND an inline source snippet per hit, so the result is usually actionable without a follow-up get_file. Set `includeSnippet: false` to omit the inline code (symbols only).",
7580
7775
  inputSchema: searchInput
7581
7776
  }, async (args) => {
7582
7777
  const k = clampK(args.k);
7778
+ const includeSnippet = args.includeSnippet ?? true;
7583
7779
  const results = await retriever.search(args.query, { k });
7584
- return textResult({
7585
- query: args.query,
7586
- k,
7587
- results: results.map((r) => ({
7780
+ const cache = /* @__PURE__ */ new Map();
7781
+ const mapped = await Promise.all(results.map(async (r) => {
7782
+ const base = {
7588
7783
  score: r.score,
7589
7784
  provenance: r.provenance,
7590
7785
  symbol: symbolToJson(r.symbol)
7591
- }))
7592
- });
7786
+ };
7787
+ if (!includeSnippet)
7788
+ return base;
7789
+ const snip = await snippetForSymbol(workspaceRoot, r.symbol, cache);
7790
+ return snip === null ? base : { ...base, snippet: snip.text, snippetTruncated: snip.truncated };
7791
+ }));
7792
+ return textResult({ query: args.query, k, results: mapped });
7593
7793
  });
7594
7794
  server.registerTool("get_symbol", {
7595
7795
  title: "Exact symbol lookup",
@@ -7656,6 +7856,9 @@ function registerTools(server, deps) {
7656
7856
  inputSchema: getFileInput
7657
7857
  }, async (args) => {
7658
7858
  const abs = resolveInWorkspace(workspaceRoot, args.path);
7859
+ if (isSensitivePath(relative(workspaceRoot, abs))) {
7860
+ throw new Error(`${SENSITIVE_PATH_ERROR}: "${args.path}".`);
7861
+ }
7659
7862
  const buf = await readFile3(abs);
7660
7863
  const start = args.startByte ?? 0;
7661
7864
  const end = args.endByte ?? buf.byteLength;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@prom.codes/context-mcp",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Prometheus Context Engine — local-first codebase indexing & retrieval as an MCP server.",
5
5
  "type": "module",
6
6
  "bin": {