grepmax 0.17.16 → 0.17.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -158,6 +158,14 @@ const TOOLS = [
158
158
  type: "string",
159
159
  description: "Project names to exclude (comma-separated)",
160
160
  },
161
+ seed_files: {
162
+ type: "string",
163
+ description: "Bias results toward your working context: comma-separated paths you have open (e.g. 'src/lib/llm/server.ts'). On-topic chunks in these files get lifted; off-topic ones are not.",
164
+ },
165
+ seed_symbols: {
166
+ type: "string",
167
+ description: "Bias results toward identifiers you're working with: comma-separated symbol names. Chunks defining a seeded symbol are preferred over mere callers.",
168
+ },
161
169
  },
162
170
  required: ["query"],
163
171
  },
@@ -784,7 +792,22 @@ exports.mcp = new commander_1.Command("mcp")
784
792
  }
785
793
  }
786
794
  }
787
- const result = yield searcher.search(query, limit, { rerank: process.env.GMAX_RERANK === "1" }, Object.keys(filters).length > 0 ? filters : undefined, pathPrefix);
795
+ // Aider-style seeding: the agent passes its open files / discussed
796
+ // symbols; the searcher biases candidate generation toward them.
797
+ const parseSeedList = (v) => {
798
+ const items = (Array.isArray(v)
799
+ ? v.map((x) => String(x))
800
+ : typeof v === "string"
801
+ ? v.split(",")
802
+ : [])
803
+ .map((s) => s.trim())
804
+ .filter((s) => s.length > 0);
805
+ return items.length > 0 ? items : undefined;
806
+ };
807
+ const seedFiles = parseSeedList(args.seed_files);
808
+ const seedSymbols = parseSeedList(args.seed_symbols);
809
+ const seeds = seedFiles || seedSymbols ? { files: seedFiles, symbols: seedSymbols } : undefined;
810
+ const result = yield searcher.search(query, limit, { rerank: process.env.GMAX_RERANK === "1", seeds }, Object.keys(filters).length > 0 ? filters : undefined, pathPrefix);
788
811
  if (!result.data || result.data.length === 0) {
789
812
  return ok("No matches found. Try broadening your query, using fewer keywords, or check `gmax status` to verify the project is indexed.");
790
813
  }
@@ -377,6 +377,8 @@ exports.search = new commander_1.Command("search")
377
377
  .option("--name <regex>", "Filter results by symbol name regex")
378
378
  .option("-C, --context <n>", "Include N lines before/after each result")
379
379
  .option("--agent", "Ultra-compact output for AI agents (one line per result)", false)
380
+ .option("--seed-file <path>", "Bias results toward your working context (repeatable; comma-separated also accepted)", (value, prev) => (prev ? [...prev, value] : [value]))
381
+ .option("--seed-symbol <name>", "Bias results toward an identifier you're working with (repeatable; comma-separated also accepted)", (value, prev) => (prev ? [...prev, value] : [value]))
380
382
  .argument("<pattern>", 'Natural language query (e.g. "where do we handle auth?")')
381
383
  .argument("[path]", "Restrict search to this path prefix")
382
384
  .addHelpText("after", `
@@ -569,6 +571,19 @@ Examples:
569
571
  searchFilters.inPrefixes = scope.inPrefixes;
570
572
  if (scope.excludePrefixes.length > 0)
571
573
  searchFilters.excludePrefixes = scope.excludePrefixes;
574
+ // Aider-style seeding: --seed-file / --seed-symbol (repeatable, also
575
+ // comma-separated) bias candidate generation toward the caller's working
576
+ // context. Absent → undefined → inert.
577
+ const splitSeeds = (vals) => {
578
+ const items = (vals !== null && vals !== void 0 ? vals : [])
579
+ .flatMap((v) => v.split(","))
580
+ .map((s) => s.trim())
581
+ .filter((s) => s.length > 0);
582
+ return items.length > 0 ? items : undefined;
583
+ };
584
+ const seedFiles = splitSeeds(options.seedFile);
585
+ const seedSymbols = splitSeeds(options.seedSymbol);
586
+ const seeds = seedFiles || seedSymbols ? { files: seedFiles, symbols: seedSymbols } : undefined;
572
587
  // Daemon-mediated search: ships query+args over IPC, daemon runs the
573
588
  // hybrid+rerank against its already-warm VectorDB and worker pool.
574
589
  // Drops cold-start cost (~17s wall, 6GB RAM in the CLI) to <1s. Falls
@@ -592,6 +607,7 @@ Examples:
592
607
  pathPrefix: pathFilter,
593
608
  rerank: process.env.GMAX_RERANK === "1",
594
609
  explain: options.explain,
610
+ seeds,
595
611
  includeSkeletons: options.skeleton,
596
612
  includeGraph: options.symbol,
597
613
  }, { timeoutMs: 60000 });
@@ -695,7 +711,7 @@ Examples:
695
711
  }
696
712
  }
697
713
  const searcher = new searcher_1.Searcher(vectorDb);
698
- searchResult = yield searcher.search(pattern, parseInt(options.m, 10), { rerank: process.env.GMAX_RERANK === "1", explain: options.explain }, Object.keys(searchFilters).length > 0
714
+ searchResult = yield searcher.search(pattern, parseInt(options.m, 10), { rerank: process.env.GMAX_RERANK === "1", explain: options.explain, seeds }, Object.keys(searchFilters).length > 0
699
715
  ? searchFilters
700
716
  : undefined, pathFilter);
701
717
  } // end if (!searchResult) — in-process fallback
@@ -0,0 +1,284 @@
1
+ "use strict";
2
+ /**
3
+ * Seed-eval harness — measures Aider-style chat/file seeding (Phase 4).
4
+ *
5
+ * WHY A SEPARATE HARNESS. `bench:oss` (src/eval-oss.ts) uses bare-symbol P1
6
+ * lookups with NO seed context, so it can only serve as a *no-seed regression
7
+ * guard* for seeding (seeded path absent → results must be unchanged). It
8
+ * cannot demonstrate that seeding *helps*: attaching a seed equal to the answer
9
+ * file would be circular, and the fixtures carry no realistic "open files"
10
+ * annotation.
11
+ *
12
+ * THE HONEST DESIGN. Every case here uses an *ambiguous* natural-language query
13
+ * that legitimately matches several subsystems, plus a realistic seed (a file
14
+ * an agent would have open, or a symbol they're discussing). The metric is the
15
+ * rank of the *contextually-correct* answer file, measured twice: baseline (no
16
+ * seed) vs seeded. Three case kinds:
17
+ *
18
+ * - route: same query, seed points at subsystem A → answer should be A's
19
+ * file (which a no-seed search ranks below a different subsystem).
20
+ * Non-circular because the SAME query under a DIFFERENT seed must
21
+ * route to a DIFFERENT, independently-valid answer — something no
22
+ * static ranking can do.
23
+ * - recover: the contextually-correct file is OUT of the no-seed top-K
24
+ * entirely; seeding must pull it back via candidate-generation
25
+ * weight (a rerank-only seed could never recover an out-of-pool
26
+ * item — this case is the load-bearing proof of "weight in
27
+ * candidate generation, not rerank").
28
+ * - guard: the seed is IRRELEVANT to the query; the no-seed rank-1 file must
29
+ * stay rank 1. Catches seeding doing harm.
30
+ *
31
+ * Baselines below were measured live against the gmax index on 2026-06-02
32
+ * (granite-small, gpu) and are quoted per case. They are documentation, not
33
+ * assertions — the harness recomputes them every run.
34
+ *
35
+ * Usage:
36
+ * npx tsx src/eval-seed.ts # table output
37
+ * npx tsx src/eval-seed.ts --json # machine-readable
38
+ */
39
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
40
+ if (k2 === undefined) k2 = k;
41
+ var desc = Object.getOwnPropertyDescriptor(m, k);
42
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
43
+ desc = { enumerable: true, get: function() { return m[k]; } };
44
+ }
45
+ Object.defineProperty(o, k2, desc);
46
+ }) : (function(o, m, k, k2) {
47
+ if (k2 === undefined) k2 = k;
48
+ o[k2] = m[k];
49
+ }));
50
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
51
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
52
+ }) : function(o, v) {
53
+ o["default"] = v;
54
+ });
55
+ var __importStar = (this && this.__importStar) || (function () {
56
+ var ownKeys = function(o) {
57
+ ownKeys = Object.getOwnPropertyNames || function (o) {
58
+ var ar = [];
59
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
60
+ return ar;
61
+ };
62
+ return ownKeys(o);
63
+ };
64
+ return function (mod) {
65
+ if (mod && mod.__esModule) return mod;
66
+ var result = {};
67
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
68
+ __setModuleDefault(result, mod);
69
+ return result;
70
+ };
71
+ })();
72
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
73
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
74
+ return new (P || (P = Promise))(function (resolve, reject) {
75
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
76
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
77
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
78
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
79
+ });
80
+ };
81
+ var _a, _b, _c;
82
+ var _d;
83
+ Object.defineProperty(exports, "__esModule", { value: true });
84
+ (_a = (_d = process.env).GMAX_WORKER_COUNT) !== null && _a !== void 0 ? _a : (_d.GMAX_WORKER_COUNT = "1");
85
+ const path = __importStar(require("node:path"));
86
+ const searcher_1 = require("./lib/search/searcher");
87
+ const vector_db_1 = require("./lib/store/vector-db");
88
+ const exit_1 = require("./lib/utils/exit");
89
+ const config_1 = require("./config");
90
+ // Route/recover cases target the gmax repo itself — the corpus whose graph the
91
+ // author can verify by hand. The "idle timeout" concept lives in three
92
+ // subsystems (worker reap / LLM server / daemon); "health check" in three more.
93
+ // That natural polysemy is what makes the routing test honest.
94
+ //
95
+ // The no-harm guards instead target the immutable express fixture: querying
96
+ // gmax for "rank fusion" is contaminated by this harness's own source (which is
97
+ // full of "fusion" prose and gets live-indexed), so a stable external corpus is
98
+ // the honest place to assert "an irrelevant seed must not displace the winner".
99
+ const REPO_ROOTS = {
100
+ gmax: path.join((_b = process.env.HOME) !== null && _b !== void 0 ? _b : "", "Development/beyond/tools/gmax"),
101
+ express: path.join((_c = process.env.HOME) !== null && _c !== void 0 ? _c : "", "Development/sandbox/bench-fixtures/express"),
102
+ };
103
+ const GMAX_CASES = [
104
+ // ── Triple A: "idle timeout shutdown" routes to worker / LLM / daemon ──────
105
+ {
106
+ id: "idle-pool",
107
+ query: "idle timeout shutdown",
108
+ seedFiles: ["src/lib/workers/pool.ts"],
109
+ expectedFile: "src/lib/workers/pool.ts",
110
+ kind: "guard", // already rank 1 without seeds — seeding must not demote it
111
+ baselineRankNote: 1,
112
+ note: "worker-reap is the no-seed winner; seeding its own file keeps it #1",
113
+ },
114
+ {
115
+ id: "idle-llm",
116
+ query: "idle timeout shutdown",
117
+ seedFiles: ["src/lib/llm/server.ts"],
118
+ expectedFile: "src/lib/llm/server.ts",
119
+ kind: "route",
120
+ baselineRankNote: 5,
121
+ note: "LLM idle watchdog at #5 behind worker-reap chunks; seed should lift it to #1",
122
+ },
123
+ {
124
+ id: "idle-daemon",
125
+ query: "idle timeout shutdown",
126
+ seedFiles: ["src/lib/daemon/daemon.ts"],
127
+ expectedFile: "src/lib/daemon/daemon.ts",
128
+ kind: "recover",
129
+ baselineRankNote: 0,
130
+ note: "daemon idle checker is OUT of the no-seed top-25; candidate-gen weight must recover it",
131
+ },
132
+ // ── Triple B: "health check probe" routes to doctor / mlx / llm ────────────
133
+ {
134
+ id: "health-doctor",
135
+ query: "health check probe",
136
+ seedFiles: ["src/commands/doctor.ts"],
137
+ expectedFile: "src/commands/doctor.ts",
138
+ kind: "guard",
139
+ baselineRankNote: 1,
140
+ note: "doctor is the no-seed winner; seeding its own file keeps it #1",
141
+ },
142
+ {
143
+ id: "health-mlx",
144
+ query: "health check probe",
145
+ seedFiles: ["src/lib/workers/embeddings/mlx-client.ts"],
146
+ expectedFile: "src/lib/workers/embeddings/mlx-client.ts",
147
+ kind: "route",
148
+ baselineRankNote: 3,
149
+ note: "mlx checkHealth at #3; seed should lift the embed-server probe to #1",
150
+ },
151
+ {
152
+ id: "health-llm",
153
+ query: "health check probe",
154
+ seedFiles: ["src/lib/llm/server.ts"],
155
+ expectedFile: "src/lib/llm/server.ts",
156
+ kind: "route",
157
+ baselineRankNote: 5,
158
+ note: "llm-server healthy() at #5; seed should lift it to #1",
159
+ },
160
+ // ── Symbol seeding: discussed identifier instead of open file ──────────────
161
+ {
162
+ id: "idle-llm-sym",
163
+ query: "idle timeout shutdown",
164
+ seedSymbols: ["LlmServer"],
165
+ expectedFile: "src/lib/llm/server.ts",
166
+ kind: "route",
167
+ baselineRankNote: 5,
168
+ note: "symbol-seed analog of idle-llm: discussing LlmServer biases toward its file",
169
+ },
170
+ // ── Guards: irrelevant seed must not perturb a strong no-seed winner.
171
+ // On the immutable express fixture so the assertion can't be polluted by
172
+ // live-indexing this harness's own source. ────────────────────────────────
173
+ {
174
+ id: "guard-express-file",
175
+ repo: "express",
176
+ query: "create the application factory",
177
+ seedFiles: ["lib/view.js"],
178
+ expectedFile: "lib/express.js",
179
+ kind: "guard",
180
+ baselineRankNote: 1,
181
+ note: "view.js (rank ~150 for this query) is off-topic; express.js must stay #1",
182
+ },
183
+ {
184
+ id: "guard-express-sym",
185
+ repo: "express",
186
+ query: "create the application factory",
187
+ seedSymbols: ["View"],
188
+ expectedFile: "lib/express.js",
189
+ kind: "guard",
190
+ baselineRankNote: 1,
191
+ note: "View is defined in the off-topic view.js; express.js must stay #1",
192
+ },
193
+ ];
194
+ /** Rank (1-indexed) of the first result whose path matches expectedFile; 0 = miss. */
195
+ function rankOf(response, expectedFile) {
196
+ const want = expectedFile.toLowerCase();
197
+ const idx = response.data.findIndex((chunk) => {
198
+ var _a;
199
+ const p = String(((_a = chunk.metadata) === null || _a === void 0 ? void 0 : _a.path) || "").toLowerCase();
200
+ return p.endsWith(`/${want}`) || p.endsWith(want);
201
+ });
202
+ return idx + 1;
203
+ }
204
+ function judge(kind, baseline, seeded) {
205
+ // 0 means "not found in top-K"; treat as worse than any found rank.
206
+ const b = baseline === 0 ? Infinity : baseline;
207
+ const s = seeded === 0 ? Infinity : seeded;
208
+ switch (kind) {
209
+ case "route":
210
+ // Seeding must improve (or already hold) the contextually-correct file's
211
+ // rank — and land it at the top.
212
+ return s <= b && s === 1;
213
+ case "recover":
214
+ // Out-of-pool baseline must be pulled into the results and to the top.
215
+ return baseline === 0 && s === 1;
216
+ case "guard":
217
+ // No harm: the file must not lose rank (and a rank-1 stays rank-1).
218
+ return s <= b;
219
+ }
220
+ }
221
+ function run() {
222
+ return __awaiter(this, void 0, void 0, function* () {
223
+ var _a;
224
+ const jsonMode = process.argv.includes("--json") || process.env.GMAX_EVAL_JSON === "1";
225
+ const topK = 25;
226
+ const rerank = process.env.GMAX_EVAL_RERANK === "1";
227
+ const vectorDb = new vector_db_1.VectorDB(config_1.PATHS.lancedbDir);
228
+ const searcher = new searcher_1.Searcher(vectorDb);
229
+ const results = [];
230
+ for (const c of GMAX_CASES) {
231
+ const pathPrefix = `${REPO_ROOTS[(_a = c.repo) !== null && _a !== void 0 ? _a : "gmax"]}/`;
232
+ const baseRes = yield searcher.search(c.query, topK, { rerank }, undefined, pathPrefix);
233
+ const seededRes = yield searcher.search(c.query, topK, { rerank, seeds: { files: c.seedFiles, symbols: c.seedSymbols } }, undefined, pathPrefix);
234
+ const baselineRank = rankOf(baseRes, c.expectedFile);
235
+ const seededRank = rankOf(seededRes, c.expectedFile);
236
+ results.push({
237
+ id: c.id,
238
+ kind: c.kind,
239
+ query: c.query,
240
+ expectedFile: c.expectedFile,
241
+ baselineRank,
242
+ seededRank,
243
+ pass: judge(c.kind, baselineRank, seededRank),
244
+ note: c.note,
245
+ });
246
+ }
247
+ yield vectorDb.close();
248
+ const passes = results.filter((r) => r.pass).length;
249
+ const byKind = (k) => results.filter((r) => r.kind === k);
250
+ const summary = {
251
+ cases: results.length,
252
+ passes,
253
+ route: { total: byKind("route").length, pass: byKind("route").filter((r) => r.pass).length },
254
+ recover: { total: byKind("recover").length, pass: byKind("recover").filter((r) => r.pass).length },
255
+ guard: { total: byKind("guard").length, pass: byKind("guard").filter((r) => r.pass).length },
256
+ };
257
+ if (jsonMode) {
258
+ process.stdout.write(`${JSON.stringify({ rerank, summary, results }, null, 2)}\n`);
259
+ }
260
+ else {
261
+ console.log(`Seed eval (rerank=${rerank ? "on" : "off"})\n`);
262
+ const fmtRank = (r) => (r === 0 ? "—" : `#${r}`);
263
+ for (const r of results) {
264
+ const arrow = `${fmtRank(r.baselineRank)} → ${fmtRank(r.seededRank)}`;
265
+ const mark = r.pass ? "✓" : "✗";
266
+ const seed = `[${r.kind}]`;
267
+ console.log(` ${mark} ${r.id.padEnd(18)} ${seed.padEnd(10)} ${arrow.padEnd(12)} ${r.expectedFile}`);
268
+ if (r.note)
269
+ console.log(` ${r.note}`);
270
+ }
271
+ console.log(`\n → ${passes}/${results.length} pass ` +
272
+ `(route ${summary.route.pass}/${summary.route.total}, ` +
273
+ `recover ${summary.recover.pass}/${summary.recover.total}, ` +
274
+ `guard ${summary.guard.pass}/${summary.guard.total})`);
275
+ }
276
+ yield (0, exit_1.gracefulExit)(0);
277
+ });
278
+ }
279
+ if (require.main === module && process.env.GMAX_EVAL_AUTORUN !== "0") {
280
+ run().catch((e) => {
281
+ console.error(e);
282
+ process.exit(1);
283
+ });
284
+ }
@@ -867,7 +867,11 @@ class Daemon {
867
867
  this.lastActivity = Date.now();
868
868
  let result;
869
869
  try {
870
- result = yield searcher.search(payload.query, payload.limit, { rerank: payload.rerank === true, explain: payload.explain === true }, payload.filters, payload.pathPrefix, undefined, signal);
870
+ result = yield searcher.search(payload.query, payload.limit, {
871
+ rerank: payload.rerank === true,
872
+ explain: payload.explain === true,
873
+ seeds: payload.seeds,
874
+ }, payload.filters, payload.pathPrefix, undefined, signal);
871
875
  }
872
876
  catch (err) {
873
877
  if ((err === null || err === void 0 ? void 0 : err.name) === "AbortError") {
@@ -171,6 +171,9 @@ function handleCommand(daemon, cmd, conn) {
171
171
  pathPrefix: typeof cmd.pathPrefix === "string" ? cmd.pathPrefix : undefined,
172
172
  rerank: cmd.rerank === true,
173
173
  explain: cmd.explain === true,
174
+ seeds: cmd.seeds && typeof cmd.seeds === "object" && !Array.isArray(cmd.seeds)
175
+ ? cmd.seeds
176
+ : undefined,
174
177
  includeSkeletons: cmd.includeSkeletons === true,
175
178
  skeletonLimit: skeletonLimitRaw,
176
179
  includeGraph: cmd.includeGraph === true,
@@ -17,6 +17,7 @@ const filter_builder_1 = require("../utils/filter-builder");
17
17
  const pool_1 = require("../workers/pool");
18
18
  const intent_1 = require("./intent");
19
19
  const pagerank_1 = require("./pagerank");
20
+ const seed_weight_1 = require("./seed-weight");
20
21
  // Reads a defined_symbols / referenced_symbols column that may arrive as a plain
21
22
  // array or a LanceDB Arrow proxy (.toArray()).
22
23
  function readSymbolArray(val) {
@@ -367,7 +368,7 @@ class Searcher {
367
368
  }
368
369
  search(query, top_k, _search_options, _filters, pathPrefix, intent, signal) {
369
370
  return __awaiter(this, void 0, void 0, function* () {
370
- var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
371
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p;
371
372
  const finalLimit = top_k !== null && top_k !== void 0 ? top_k : 10;
372
373
  // ColBERT rerank is opt-in as of v0.17.1. On the 97-case eval it
373
374
  // regresses MRR@10 by ~3% and doubles query latency; sweep across
@@ -375,6 +376,10 @@ class Searcher {
375
376
  // fused scores ~30:1 so blend tuning can't recover the loss.
376
377
  let doRerank = (_a = _search_options === null || _search_options === void 0 ? void 0 : _search_options.rerank) !== null && _a !== void 0 ? _a : false;
377
378
  const explain = (_b = _search_options === null || _search_options === void 0 ? void 0 : _search_options.explain) !== null && _b !== void 0 ? _b : false;
379
+ // Aider-style seeding (Phase 4): bias candidate generation toward the
380
+ // agent's working context. Inert unless the caller supplied seed files or
381
+ // symbols, so the default search path is unchanged.
382
+ const seedCtx = (0, seed_weight_1.buildSeedContext)(_search_options === null || _search_options === void 0 ? void 0 : _search_options.seeds);
378
383
  const searchIntent = intent || (0, intent_1.detectIntent)(query);
379
384
  // Bare-identifier queries get symbol-definition promotion (see below).
380
385
  const symbolQuery = asSymbolQuery(query);
@@ -402,7 +407,7 @@ class Searcher {
402
407
  try {
403
408
  table = yield this.db.ensureTable();
404
409
  }
405
- catch (_o) {
410
+ catch (_q) {
406
411
  return { data: [] };
407
412
  }
408
413
  // Ensure FTS index exists (lazy init, retry periodically on failure)
@@ -428,7 +433,9 @@ class Searcher {
428
433
  const pagerankEnabled = process.env.GMAX_PAGERANK === "1" && !!pathPrefix;
429
434
  // Symbol-definition promotion needs defined_symbols on every candidate, not
430
435
  // just the final display set — load it for bare-symbol queries too.
431
- const needDefinedSymbols = pagerankEnabled || symbolQuery !== null;
436
+ // Seed-symbol matching reads defined_symbols (referenced_symbols is always
437
+ // loaded), so pull it into the lightweight path when symbols were seeded.
438
+ const needDefinedSymbols = pagerankEnabled || symbolQuery !== null || seedCtx.symbols.size > 0;
432
439
  const LIGHTWEIGHT_COLUMNS = [
433
440
  "id", "path", "hash", "chunk_index", "start_line", "end_line",
434
441
  "is_anchor", "chunk_type", "role", "complexity", "is_exported",
@@ -471,7 +478,7 @@ class Searcher {
471
478
  this.ftsAvailable = true;
472
479
  console.warn("[Searcher] Rebuilt FTS index with position support — retry search");
473
480
  }
474
- catch (_p) { }
481
+ catch (_r) { }
475
482
  }
476
483
  else {
477
484
  console.warn(`[Searcher] FTS search failed (will retry later): ${msg}`);
@@ -487,11 +494,23 @@ class Searcher {
487
494
  const RRF_K = 60;
488
495
  const candidateScores = new Map();
489
496
  const docMap = new Map();
497
+ // Best (lowest) 1-indexed rank each candidate reached in any retriever —
498
+ // the relevance gate for seeding (see the seed block below). Only tracked
499
+ // when seeding is active; otherwise it stays empty and costs nothing.
500
+ const bestRank = new Map();
501
+ const noteRank = seedCtx.active
502
+ ? (key, rank) => {
503
+ const prev = bestRank.get(key);
504
+ if (prev === undefined || rank + 1 < prev)
505
+ bestRank.set(key, rank + 1);
506
+ }
507
+ : () => { };
490
508
  vectorResults.forEach((doc, rank) => {
491
509
  const key = doc.id || `${doc.path}:${doc.chunk_index}`;
492
510
  docMap.set(key, doc);
493
511
  const score = 1.0 / (RRF_K + rank + 1);
494
512
  candidateScores.set(key, (candidateScores.get(key) || 0) + score);
513
+ noteRank(key, rank);
495
514
  });
496
515
  ftsResults.forEach((doc, rank) => {
497
516
  const key = doc.id || `${doc.path}:${doc.chunk_index}`;
@@ -499,6 +518,7 @@ class Searcher {
499
518
  docMap.set(key, doc);
500
519
  const score = 1.0 / (RRF_K + rank + 1);
501
520
  candidateScores.set(key, (candidateScores.get(key) || 0) + score);
521
+ noteRank(key, rank);
502
522
  });
503
523
  const fused = Array.from(candidateScores.entries())
504
524
  .sort((a, b) => b[1] - a[1])
@@ -507,6 +527,45 @@ class Searcher {
507
527
  // Free raw search results — docMap holds the only needed references
508
528
  vectorResults.length = 0;
509
529
  ftsResults.length = 0;
530
+ // Aider-style seeding (Phase 4): bump the RRF score of candidates matching
531
+ // the agent's working context, gated by each candidate's own relevance so
532
+ // off-topic seed files are never injected (the safety invariant). Because
533
+ // the final ordering also reads candidateScores, this one bump propagates
534
+ // through the stage-1 cosine cut, the stage-2 window, the rerank set, AND
535
+ // the final score — and can recover a candidate fusion buried below the
536
+ // display cut, which a rerank-only seed could not. See ./seed-weight.ts.
537
+ if (seedCtx.active) {
538
+ // Bound the scan to the relevant head of the pool. The gate is each
539
+ // candidate's best retriever rank (bestRank), so off-topic seed chunks
540
+ // that only appear deep in the pool are never lifted.
541
+ const SEED_WINDOW = 200;
542
+ const seedParams = (0, seed_weight_1.seedParamsFromEnv)();
543
+ let boosted = false;
544
+ for (const doc of fused.slice(0, SEED_WINDOW)) {
545
+ const sym = seedCtx.symbols.size > 0
546
+ ? (0, seed_weight_1.matchesSeedSymbol)(seedCtx, readSymbolArray(doc.defined_symbols), readSymbolArray(doc.referenced_symbols))
547
+ : { def: false, ref: false };
548
+ const match = {
549
+ file: (0, seed_weight_1.matchesSeedFile)(seedCtx, doc.path),
550
+ symbolDef: sym.def,
551
+ symbolRef: sym.ref && !sym.def,
552
+ };
553
+ const key = doc.id || `${doc.path}:${doc.chunk_index}`;
554
+ const bonus = (0, seed_weight_1.seedBoost)(match, (_d = bestRank.get(key)) !== null && _d !== void 0 ? _d : Infinity, seedParams);
555
+ if (bonus > 0) {
556
+ candidateScores.set(key, ((_e = candidateScores.get(key)) !== null && _e !== void 0 ? _e : 0) + bonus);
557
+ boosted = true;
558
+ }
559
+ }
560
+ if (boosted) {
561
+ fused.sort((a, b) => {
562
+ var _a, _b;
563
+ const ka = a.id || `${a.path}:${a.chunk_index}`;
564
+ const kb = b.id || `${b.path}:${b.chunk_index}`;
565
+ return ((_a = candidateScores.get(kb)) !== null && _a !== void 0 ? _a : 0) - ((_b = candidateScores.get(ka)) !== null && _b !== void 0 ? _b : 0);
566
+ });
567
+ }
568
+ }
510
569
  // Candidate-concentration gate (Bundle B, v0.17.2 OSS-fixture finding):
511
570
  // ColBERT rerank is shape-sensitive. When the post-fusion pool clusters
512
571
  // into one file (single-file-repo / concentrated shape, e.g. lodash) rerank
@@ -515,7 +574,7 @@ class Searcher {
515
574
  // here and *add* rerank-on for it. This only ever flips doRerank false→true:
516
575
  // an explicit GMAX_RERANK=1 (doRerank already true) is never overridden off.
517
576
  if (!doRerank) {
518
- const envConcThreshold = Number.parseFloat((_d = process.env.GMAX_CONCENTRATION_THRESHOLD) !== null && _d !== void 0 ? _d : "");
577
+ const envConcThreshold = Number.parseFloat((_f = process.env.GMAX_CONCENTRATION_THRESHOLD) !== null && _f !== void 0 ? _f : "");
519
578
  // <= 0 (or NaN with the default) keeps the gate active at 0.7; a value > 1
520
579
  // disables it (no possible share reaches it), giving a rerank-fully-off
521
580
  // baseline for sweeps without touching the doRerank default. 0.7 is the
@@ -531,7 +590,7 @@ class Searcher {
531
590
  if (window.length > 0 && CONCENTRATION_THRESHOLD <= 1) {
532
591
  const buckets = new Map();
533
592
  for (const doc of window) {
534
- buckets.set(doc.path, ((_e = buckets.get(doc.path)) !== null && _e !== void 0 ? _e : 0) + 1);
593
+ buckets.set(doc.path, ((_g = buckets.get(doc.path)) !== null && _g !== void 0 ? _g : 0) + 1);
535
594
  }
536
595
  let maxBucket = 0;
537
596
  for (const count of buckets.values()) {
@@ -546,7 +605,7 @@ class Searcher {
546
605
  }
547
606
  // Item 8: Widen PRE_RERANK_K
548
607
  // Retrieve a wide set for Stage 1 filtering
549
- const envStage1 = Number.parseInt((_f = process.env.GMAX_STAGE1_K) !== null && _f !== void 0 ? _f : "", 10);
608
+ const envStage1 = Number.parseInt((_h = process.env.GMAX_STAGE1_K) !== null && _h !== void 0 ? _h : "", 10);
550
609
  const STAGE1_K = Number.isFinite(envStage1) && envStage1 > 0 ? envStage1 : 200;
551
610
  const topCandidates = fused.slice(0, STAGE1_K);
552
611
  // Free docMap — topCandidates already holds record references
@@ -554,11 +613,11 @@ class Searcher {
554
613
  // Item 9: Two-stage rerank
555
614
  // Stage 1: Cheap pooled cosine filter
556
615
  let stage2Candidates = topCandidates;
557
- const envStage2K = Number.parseInt((_g = process.env.GMAX_STAGE2_K) !== null && _g !== void 0 ? _g : "", 10);
616
+ const envStage2K = Number.parseInt((_j = process.env.GMAX_STAGE2_K) !== null && _j !== void 0 ? _j : "", 10);
558
617
  const STAGE2_K = Number.isFinite(envStage2K) && envStage2K > 0 ? envStage2K : 40;
559
- const envRerankTop = Number.parseInt((_h = process.env.GMAX_RERANK_TOP) !== null && _h !== void 0 ? _h : "", 10);
618
+ const envRerankTop = Number.parseInt((_k = process.env.GMAX_RERANK_TOP) !== null && _k !== void 0 ? _k : "", 10);
560
619
  const RERANK_TOP = Number.isFinite(envRerankTop) && envRerankTop > 0 ? envRerankTop : 20;
561
- const envBlend = Number.parseFloat((_j = process.env.GMAX_RERANK_BLEND) !== null && _j !== void 0 ? _j : "");
620
+ const envBlend = Number.parseFloat((_l = process.env.GMAX_RERANK_BLEND) !== null && _l !== void 0 ? _l : "");
562
621
  const FUSED_WEIGHT = Number.isFinite(envBlend) && envBlend >= 0 ? envBlend : 0.5;
563
622
  if (queryPooled && topCandidates.length > STAGE2_K) {
564
623
  const cosineScores = topCandidates.map((doc) => {
@@ -662,7 +721,7 @@ class Searcher {
662
721
  // constructor/toJSON, which otherwise score higher on the literal and evict
663
722
  // the parent in overlap dedup). Multiplicative keeps it scale-invariant
664
723
  // across the rerank-on (ColBERT maxsim) and rerank-off (fusion) score ranges.
665
- const envDefBoost = Number.parseFloat((_k = process.env.GMAX_DEF_BOOST) !== null && _k !== void 0 ? _k : "");
724
+ const envDefBoost = Number.parseFloat((_m = process.env.GMAX_DEF_BOOST) !== null && _m !== void 0 ? _m : "");
666
725
  const DEF_MATCH_BOOST = Number.isFinite(envDefBoost) && envDefBoost >= 1 ? envDefBoost : 5;
667
726
  const scored = rerankCandidates.map((doc, idx) => {
668
727
  var _a, _b;
@@ -691,7 +750,7 @@ class Searcher {
691
750
  try {
692
751
  const { scores: prScores, max: prMax } = yield (0, pagerank_1.loadOrComputePageRank)(this.db, pathPrefix);
693
752
  if (prMax > 0) {
694
- const envWeight = Number.parseFloat((_l = process.env.GMAX_PR_WEIGHT) !== null && _l !== void 0 ? _l : "");
753
+ const envWeight = Number.parseFloat((_o = process.env.GMAX_PR_WEIGHT) !== null && _o !== void 0 ? _o : "");
695
754
  const PR_WEIGHT = Number.isFinite(envWeight) && envWeight >= 0 ? envWeight : 0.05;
696
755
  for (const item of scored) {
697
756
  const raw = item.record.defined_symbols;
@@ -706,7 +765,7 @@ class Searcher {
706
765
  defs = arr.filter((v) => typeof v === "string");
707
766
  }
708
767
  }
709
- catch (_q) { }
768
+ catch (_s) { }
710
769
  }
711
770
  const norm = (0, pagerank_1.pageRankBoostForSymbols)(defs, prScores, prMax);
712
771
  item.score += PR_WEIGHT * norm;
@@ -724,7 +783,7 @@ class Searcher {
724
783
  // Item 10: Per-file diversification
725
784
  const seenFiles = new Map();
726
785
  const diversified = [];
727
- const envMaxPerFile = Number.parseInt((_m = process.env.GMAX_MAX_PER_FILE) !== null && _m !== void 0 ? _m : "", 10);
786
+ const envMaxPerFile = Number.parseInt((_p = process.env.GMAX_MAX_PER_FILE) !== null && _p !== void 0 ? _p : "", 10);
728
787
  const MAX_PER_FILE = Number.isFinite(envMaxPerFile) && envMaxPerFile > 0 ? envMaxPerFile : 3;
729
788
  for (const item of uniqueScored) {
730
789
  const path = item.record.path || "";
@@ -0,0 +1,125 @@
1
+ "use strict";
2
+ /**
3
+ * Aider-style chat/file seeding (Phase 4) — pure scoring helpers.
4
+ *
5
+ * Seeding biases search toward the agent's *working context*: files it has open
6
+ * ("chat files", weighted heavily in Aider's repo-map) and identifiers it is
7
+ * discussing. gmax applies the bias in **candidate generation** — it bumps the
8
+ * Reciprocal Rank Fusion score of seed-matching candidates — NOT as a post-hoc
9
+ * rerank tiebreaker. Bundle B (see docs/plans/2026-05-25-semantic-search-
10
+ * landscape.md) showed a tiebreaker over a saturated rerank pool is a no-op;
11
+ * lifting the fusion score instead lets a seeded candidate climb through the
12
+ * stage-1 cosine cut, the stage-2 window, and the final ordering in one move,
13
+ * and can even *recover* a candidate that fusion alone buried below the display
14
+ * cut (something a rerank-only seed could never do).
15
+ *
16
+ * THE SAFETY INVARIANT. Seeding must never inject *off-topic* context: an agent
17
+ * working in `pool.ts` who searches for "rank fusion scoring" should still get
18
+ * `searcher.ts`, because `pool.ts` has nothing relevant to say. So the bonus is
19
+ * **relevance-gated** — a seed match is only boosted when the candidate already
20
+ * ranked highly in at least one retriever (vector OR full-text). A genuinely
21
+ * on-topic seed chunk surfaces near the top of some retriever; an off-topic one
22
+ * sits deep in every retriever and is left exactly where the query put it. We
23
+ * gate on retriever rank (always available, even for an FTS-only hit) rather
24
+ * than pooled-ColBERT cosine, which is not reliably populated on every index.
25
+ *
26
+ * All functions here are pure so the gating/weighting math can be unit-tested
27
+ * (tests/seed-weight.test.ts) independently of the LanceDB-backed searcher.
28
+ */
29
+ Object.defineProperty(exports, "__esModule", { value: true });
30
+ exports.DEFAULT_SEED_PARAMS = void 0;
31
+ exports.seedParamsFromEnv = seedParamsFromEnv;
32
+ exports.buildSeedContext = buildSeedContext;
33
+ exports.matchesSeedFile = matchesSeedFile;
34
+ exports.matchesSeedSymbol = matchesSeedSymbol;
35
+ exports.seedBoost = seedBoost;
36
+ exports.DEFAULT_SEED_PARAMS = {
37
+ // RRF scores live around 1/(60+rank) ≈ 0.008–0.016, so a ~0.02 bonus is
38
+ // strong enough to lift a genuinely-relevant seed match several ranks while
39
+ // staying in the same order of magnitude as the fusion signal it augments.
40
+ fileWeight: 0.02,
41
+ symbolDefWeight: 0.02,
42
+ symbolRefWeight: 0.006,
43
+ // A genuinely on-topic seed chunk reaches the top handful of some retriever
44
+ // (the route/recover fixtures land at ranks 1–7); an off-topic one sits mid-
45
+ // pool or deeper (an irrelevant express seed file is rank ~150 for an
46
+ // unrelated query). 8 is the eligibility ceiling separating the two without
47
+ // boosting mid-pool noise (see tests/seed-weight.test.ts and eval-seed.ts).
48
+ maxRank: 8,
49
+ };
50
+ /** Resolve params from env, falling back to DEFAULT_SEED_PARAMS per field. */
51
+ function seedParamsFromEnv(env = process.env) {
52
+ const num = (raw, fallback, min) => {
53
+ const v = Number.parseFloat(raw !== null && raw !== void 0 ? raw : "");
54
+ return Number.isFinite(v) && v >= min ? v : fallback;
55
+ };
56
+ return {
57
+ fileWeight: num(env.GMAX_SEED_FILE_W, exports.DEFAULT_SEED_PARAMS.fileWeight, 0),
58
+ symbolDefWeight: num(env.GMAX_SEED_SYMBOL_DEF_W, exports.DEFAULT_SEED_PARAMS.symbolDefWeight, 0),
59
+ symbolRefWeight: num(env.GMAX_SEED_SYMBOL_REF_W, exports.DEFAULT_SEED_PARAMS.symbolRefWeight, 0),
60
+ maxRank: num(env.GMAX_SEED_MAX_RANK, exports.DEFAULT_SEED_PARAMS.maxRank, 1),
61
+ };
62
+ }
63
+ /** Normalize a seed spec into a matchable context. */
64
+ function buildSeedContext(spec) {
65
+ var _a, _b;
66
+ const fileSuffixes = ((_a = spec === null || spec === void 0 ? void 0 : spec.files) !== null && _a !== void 0 ? _a : [])
67
+ .map((f) => f.trim().toLowerCase().replace(/^\.?\//, ""))
68
+ .filter((f) => f.length > 0);
69
+ const symbols = new Set(((_b = spec === null || spec === void 0 ? void 0 : spec.symbols) !== null && _b !== void 0 ? _b : []).map((s) => s.trim()).filter((s) => s.length > 0));
70
+ return {
71
+ fileSuffixes,
72
+ symbols,
73
+ active: fileSuffixes.length > 0 || symbols.size > 0,
74
+ };
75
+ }
76
+ /** Does a candidate match any seed file (by path suffix)? */
77
+ function matchesSeedFile(ctx, candidatePath) {
78
+ if (ctx.fileSuffixes.length === 0)
79
+ return false;
80
+ const p = candidatePath.toLowerCase();
81
+ return ctx.fileSuffixes.some((suffix) => p.endsWith(`/${suffix}`) || p === suffix || p.endsWith(suffix));
82
+ }
83
+ /**
84
+ * Classify a candidate's relationship to the seed symbols: does it define one,
85
+ * or merely reference one? Definition wins when both are true.
86
+ */
87
+ function matchesSeedSymbol(ctx, definedSymbols, referencedSymbols) {
88
+ if (ctx.symbols.size === 0)
89
+ return { def: false, ref: false };
90
+ let def = false;
91
+ for (const s of definedSymbols) {
92
+ if (ctx.symbols.has(s)) {
93
+ def = true;
94
+ break;
95
+ }
96
+ }
97
+ let ref = false;
98
+ for (const s of referencedSymbols) {
99
+ if (ctx.symbols.has(s)) {
100
+ ref = true;
101
+ break;
102
+ }
103
+ }
104
+ return { def, ref };
105
+ }
106
+ /**
107
+ * The additive RRF-score bonus for a candidate. Returns 0 when the candidate
108
+ * matches no seed, or when it matches but its best retriever rank is deeper
109
+ * than the ceiling (the safety invariant). `bestRank` is the 1-indexed best
110
+ * position the candidate reached across retrievers; 0/Infinity means it was
111
+ * never retrieved near the top and is therefore ineligible. File and symbol
112
+ * bonuses are additive; a definition match supersedes a reference match.
113
+ */
114
+ function seedBoost(match, bestRank, params) {
115
+ if (!match.file && !match.symbolDef && !match.symbolRef)
116
+ return 0;
117
+ if (!(bestRank >= 1) || bestRank > params.maxRank)
118
+ return 0;
119
+ let bonus = match.file ? params.fileWeight : 0;
120
+ if (match.symbolDef)
121
+ bonus += params.symbolDefWeight;
122
+ else if (match.symbolRef)
123
+ bonus += params.symbolRefWeight;
124
+ return bonus;
125
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.16",
3
+ "version": "0.17.17",
4
4
  "author": "Robert Owens <78518764+reowens@users.noreply.github.com>",
5
5
  "homepage": "https://github.com/reowens/grepmax",
6
6
  "bugs": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.16",
3
+ "version": "0.17.17",
4
4
  "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
5
5
  "author": {
6
6
  "name": "Robert Owens",