grepmax 0.17.7 → 0.17.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,170 @@
1
+ "use strict";
2
+ /**
3
+ * Phase 3 design probe. For each platform symbol-name miss, determine whether
4
+ * the expected DEFINITION chunk is:
5
+ * (a) inside the top-200 fusion pool but lost downstream -> a ranking fix
6
+ * (b) outside the pool (but inside the 500-row retrieval) -> recovery fix
7
+ * (c) outside retrieval entirely -> unreachable
8
+ * and whether a 1-hop ref->def walk from the top-K fusion seeds reaches it
9
+ * (i.e. some seed's referenced_symbols contains the query symbol).
10
+ *
11
+ * Run: npx tsx src/eval-graph-recovery-probe.ts
12
+ */
13
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
14
+ if (k2 === undefined) k2 = k;
15
+ var desc = Object.getOwnPropertyDescriptor(m, k);
16
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
17
+ desc = { enumerable: true, get: function() { return m[k]; } };
18
+ }
19
+ Object.defineProperty(o, k2, desc);
20
+ }) : (function(o, m, k, k2) {
21
+ if (k2 === undefined) k2 = k;
22
+ o[k2] = m[k];
23
+ }));
24
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
25
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
26
+ }) : function(o, v) {
27
+ o["default"] = v;
28
+ });
29
+ var __importStar = (this && this.__importStar) || (function () {
30
+ var ownKeys = function(o) {
31
+ ownKeys = Object.getOwnPropertyNames || function (o) {
32
+ var ar = [];
33
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
34
+ return ar;
35
+ };
36
+ return ownKeys(o);
37
+ };
38
+ return function (mod) {
39
+ if (mod && mod.__esModule) return mod;
40
+ var result = {};
41
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
42
+ __setModuleDefault(result, mod);
43
+ return result;
44
+ };
45
+ })();
46
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
47
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
48
+ return new (P || (P = Promise))(function (resolve, reject) {
49
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
50
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
51
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
52
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
53
+ });
54
+ };
55
+ var _a, _b;
56
+ var _c;
57
+ Object.defineProperty(exports, "__esModule", { value: true });
58
+ (_a = (_c = process.env).GMAX_WORKER_COUNT) !== null && _a !== void 0 ? _a : (_c.GMAX_WORKER_COUNT = "1");
59
+ const path = __importStar(require("node:path"));
60
+ const config_1 = require("./config");
61
+ const vector_db_1 = require("./lib/store/vector-db");
62
+ const exit_1 = require("./lib/utils/exit");
63
+ const pool_1 = require("./lib/workers/pool");
64
+ const filter_builder_1 = require("./lib/utils/filter-builder");
65
+ const PLATFORM_ROOT = path.join((_b = process.env.HOME) !== null && _b !== void 0 ? _b : "", "Development/beyond/platform");
66
+ const PRE_K = 500;
67
+ const STAGE1_K = 200;
68
+ const SEED_K = 20;
69
+ const RRF_K = 60;
70
+ // (query symbol, expected definition file) — the rank-0 platform misses.
71
+ const CASES = [
72
+ ["BeyondError", "packages/shared/src/errors.ts"],
73
+ ["ErrorCodes", "packages/shared/src/errors.ts"],
74
+ ["createDb", "packages/db/src/index.ts"],
75
+ ["createDbAsync", "packages/db/src/index.ts"],
76
+ ["authMiddleware", "packages/api/src/middleware/auth.ts"],
77
+ ["requireAuth", "packages/api/src/middleware/auth.ts"],
78
+ ["resolveActor", "packages/api/src/middleware/auth.ts"],
79
+ ["getActor", "packages/api/src/middleware/auth.ts"],
80
+ ["errorHandler", "packages/api/src/middleware/error.ts"],
81
+ ["initializeApp", "packages/api/src/app.ts"],
82
+ ];
83
+ function toStrArr(val) {
84
+ if (!val)
85
+ return [];
86
+ if (Array.isArray(val))
87
+ return val.filter((v) => typeof v === "string");
88
+ const m = val;
89
+ if (typeof m.toArray === "function") {
90
+ try {
91
+ const a = m.toArray();
92
+ return Array.isArray(a) ? a.filter((v) => typeof v === "string") : [];
93
+ }
94
+ catch (_a) {
95
+ return [];
96
+ }
97
+ }
98
+ return [];
99
+ }
100
+ function probe(table, sym, expectedFile) {
101
+ return __awaiter(this, void 0, void 0, function* () {
102
+ const pool = (0, pool_1.getWorkerPool)();
103
+ const { dense } = yield pool.encodeQuery(sym);
104
+ const prefix = PLATFORM_ROOT.endsWith("/") ? PLATFORM_ROOT : `${PLATFORM_ROOT}/`;
105
+ const where = `path LIKE '${(0, filter_builder_1.escapeSqlString)(prefix)}%'`;
106
+ const columns = ["id", "path", "chunk_index", "defined_symbols", "referenced_symbols"];
107
+ const vectorRows = (yield table.vectorSearch(dense).select([...columns, "_distance"]).where(where).limit(PRE_K).toArray());
108
+ let ftsRows = [];
109
+ try {
110
+ ftsRows = (yield table.search(sym).select([...columns, "_score"]).where(where).limit(PRE_K).toArray());
111
+ }
112
+ catch (_a) { }
113
+ const scores = new Map();
114
+ const docMap = new Map();
115
+ const keyOf = (d) => d.id || `${d.path}:${d.chunk_index}`;
116
+ vectorRows.forEach((d, r) => { const k = keyOf(d); docMap.set(k, d); scores.set(k, (scores.get(k) || 0) + 1 / (RRF_K + r + 1)); });
117
+ ftsRows.forEach((d, r) => { const k = keyOf(d); if (!docMap.has(k))
118
+ docMap.set(k, d); scores.set(k, (scores.get(k) || 0) + 1 / (RRF_K + r + 1)); });
119
+ const fusedKeys = Array.from(scores.entries()).sort((a, b) => b[1] - a[1]).map(([k]) => k);
120
+ const fused = fusedKeys.map((k) => docMap.get(k));
121
+ // Locate the expected definition chunk among retrieved rows.
122
+ const isExpected = (d) => String(d.path).toLowerCase().endsWith(`/${expectedFile.toLowerCase()}`) &&
123
+ toStrArr(d.defined_symbols).includes(sym);
124
+ let defRetrievalRank = -1; // rank within full union (by fusion order)
125
+ let defInPool = false;
126
+ fused.forEach((d, i) => {
127
+ if (defRetrievalRank < 0 && isExpected(d)) {
128
+ defRetrievalRank = i + 1;
129
+ defInPool = i < STAGE1_K;
130
+ }
131
+ });
132
+ // Was the def chunk retrieved at all (in the 500 union) even if low?
133
+ const defInUnion = fused.some(isExpected);
134
+ // If not in the union, query directly to confirm it exists in the index.
135
+ let defExistsInIndex = defInUnion;
136
+ if (!defInUnion) {
137
+ const direct = (yield table.query().select(columns).where(`${where} AND array_contains(defined_symbols, '${(0, filter_builder_1.escapeSqlString)(sym)}')`).limit(50).toArray());
138
+ defExistsInIndex = direct.some((d) => String(d.path).toLowerCase().endsWith(`/${expectedFile.toLowerCase()}`));
139
+ }
140
+ // ref->def reachability: among top-SEED_K fusion seeds, how many reference `sym`?
141
+ const seeds = fused.slice(0, SEED_K);
142
+ let seedsRefSym = 0;
143
+ let firstRefSeedRank = -1;
144
+ seeds.forEach((d, i) => {
145
+ if (toStrArr(d.referenced_symbols).includes(sym)) {
146
+ seedsRefSym++;
147
+ if (firstRefSeedRank < 0)
148
+ firstRefSeedRank = i + 1;
149
+ }
150
+ });
151
+ const loc = defRetrievalRank > 0
152
+ ? (defInPool ? `pool#${defRetrievalRank}` : `union#${defRetrievalRank}(>200)`)
153
+ : (defExistsInIndex ? "OUTSIDE-500" : "NOT-IN-INDEX?");
154
+ return { sym, expectedFile, loc, seedsRefSym, firstRefSeedRank };
155
+ });
156
+ }
157
+ function main() {
158
+ return __awaiter(this, void 0, void 0, function* () {
159
+ const db = new vector_db_1.VectorDB(config_1.PATHS.lancedbDir);
160
+ const table = yield db.ensureTable();
161
+ console.log("sym expectedDefChunk seeds_ref firstRefSeed");
162
+ for (const [sym, file] of CASES) {
163
+ const r = yield probe(table, sym, file);
164
+ console.log(`${r.sym.padEnd(15)} ${r.loc.padEnd(24)} ${String(r.seedsRefSym).padStart(3)}/20 ${r.firstRefSeedRank > 0 ? `rank${r.firstRefSeedRank}` : "-"}`);
165
+ }
166
+ yield db.close();
167
+ yield (0, exit_1.gracefulExit)(0);
168
+ });
169
+ }
170
+ main().catch((e) => { console.error(e); process.exit(1); });
package/dist/eval-oss.js CHANGED
@@ -133,25 +133,44 @@ const DATASETS = {
133
133
  cases: PLATFORM_CASES,
134
134
  },
135
135
  };
136
- // A chunk matches when its file path ends with the expected file AND the
137
- // expected line falls within [start_line, end_line]. end_line falls back
138
- // to start_line + 200 when missing (lodash's mega-IIFE chunks are
139
- // typically much smaller than this, but the guard avoids false negatives
140
- // from short tail chunks).
141
- function chunkMatches(chunk, expectedFile, expectedLine) {
136
+ // A chunk matches when its file path ends with the expected file AND either:
137
+ // (b) it declares the queried symbol (`defined_symbols` includes it), OR
138
+ // (a) the expected line falls within [start_line, end_line].
139
+ //
140
+ // (b) is the primary, drift-robust criterion for these symbol-lookup cases: it
141
+ // credits the searcher for surfacing the chunk that *defines* the symbol,
142
+ // regardless of where the hand-curated `expectedLine` lands relative to
143
+ // post-reindex chunk boundaries. Before this, stale expectedLine values (e.g.
144
+ // `requireAuth` def moved to lines 57-76 but the case said 45) and one-line
145
+ // boundary off-by-ones (BeyondError chunk starts at line 37; the line check
146
+ // tested `36 >= 37`) scored 7/15 platform cases as misses even though the
147
+ // defining chunk was returned at ranks 1-3 — masking real recall (0.333 → 0.800).
148
+ // (a) is kept as a fallback for re-export / binding-site cases (express `query`,
149
+ // lodash `merge`) whose answer chunk legitimately doesn't carry the symbol in
150
+ // `defined_symbols`. end_line falls back to start_line + 200 when missing.
151
+ function chunkMatches(chunk, expectedFile, expectedLine, expectedSymbol) {
142
152
  var _a, _b, _c, _d, _e, _f;
143
153
  const path = String(((_a = chunk.metadata) === null || _a === void 0 ? void 0 : _a.path) || "").toLowerCase();
144
154
  if (!path.endsWith(`/${expectedFile.toLowerCase()}`) && !path.endsWith(expectedFile.toLowerCase())) {
145
155
  return false;
146
156
  }
157
+ // (b) defining-chunk match
158
+ if (expectedSymbol) {
159
+ const defs = chunk.defined_symbols;
160
+ if (Array.isArray(defs) && defs.includes(expectedSymbol))
161
+ return true;
162
+ }
163
+ // (a) line-range match — chunks are 0-indexed start_line; expected line is 1-indexed
147
164
  const start = Number((_d = (_c = (_b = chunk.generated_metadata) === null || _b === void 0 ? void 0 : _b.start_line) !== null && _c !== void 0 ? _c : chunk.start_line) !== null && _d !== void 0 ? _d : 0);
148
165
  const numLines = Number((_f = (_e = chunk.generated_metadata) === null || _e === void 0 ? void 0 : _e.num_lines) !== null && _f !== void 0 ? _f : 0);
149
166
  const end = numLines > 0 ? start + numLines : start + 200;
150
- // chunks are 0-indexed start_line; expected line is 1-indexed
151
167
  return expectedLine - 1 >= start && expectedLine - 1 <= end;
152
168
  }
153
169
  function evaluateOss(response, c, timeMs) {
154
- const idx = response.data.findIndex((chunk) => chunkMatches(chunk, c.expectedFile, c.expectedLine));
170
+ const idx = response.data.findIndex((chunk) =>
171
+ // `query` is the symbol name (sverklo P1 shape), so it doubles as the
172
+ // expected defined-symbol for the drift-robust match branch.
173
+ chunkMatches(chunk, c.expectedFile, c.expectedLine, c.query));
155
174
  const rank = idx + 1; // 0 = miss
156
175
  const rr = rank > 0 ? 1 / rank : 0;
157
176
  const recall10 = rank > 0 && rank <= 10 ? 1 : 0;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.7",
3
+ "version": "0.17.8",
4
4
  "author": "Robert Owens <78518764+reowens@users.noreply.github.com>",
5
5
  "homepage": "https://github.com/reowens/grepmax",
6
6
  "bugs": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.17.7",
3
+ "version": "0.17.8",
4
4
  "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
5
5
  "author": {
6
6
  "name": "Robert Owens",