grepmax 0.17.2 → 0.17.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/dist/commands/context.js +118 -21
- package/dist/commands/mcp.js +305 -118
- package/dist/commands/search.js +40 -102
- package/dist/eval-graph-sanity.js +225 -0
- package/dist/eval-graph-spotcheck.js +83 -0
- package/dist/eval-graph-totals.js +131 -0
- package/dist/eval-oss.js +244 -0
- package/dist/eval.js +6 -1
- package/dist/lib/output/agent-search-formatter.js +163 -0
- package/dist/lib/search/pagerank.js +267 -0
- package/dist/lib/search/searcher.js +44 -4
- package/mlx-embed-server/server.py +24 -0
- package/package.json +3 -1
- package/plugins/grepmax/.claude-plugin/plugin.json +1 -1
- package/plugins/grepmax/skills/grepmax/SKILL.md +15 -5
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Diagnostic counterpart to eval-graph-sanity.ts.
|
|
4
|
+
*
|
|
5
|
+
* Phase 0 found that 0/200 fusion-pool chunks reference any of the 4
|
|
6
|
+
* hard-miss targets via `referenced_symbols`. That can mean either:
|
|
7
|
+
* (a) chunker isn't extracting these refs (upstream miss), OR
|
|
8
|
+
* (b) refs exist but live outside the fusion top-200.
|
|
9
|
+
*
|
|
10
|
+
* This script measures the *total* `array_contains(referenced_symbols, X)`
|
|
11
|
+
* count across the whole platform index for each target. If those counts
|
|
12
|
+
* are also ~0, the graph is empty at the data-extraction layer, not the
|
|
13
|
+
* retrieval layer.
|
|
14
|
+
*/
|
|
15
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
18
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
19
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
20
|
+
}
|
|
21
|
+
Object.defineProperty(o, k2, desc);
|
|
22
|
+
}) : (function(o, m, k, k2) {
|
|
23
|
+
if (k2 === undefined) k2 = k;
|
|
24
|
+
o[k2] = m[k];
|
|
25
|
+
}));
|
|
26
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
27
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
28
|
+
}) : function(o, v) {
|
|
29
|
+
o["default"] = v;
|
|
30
|
+
});
|
|
31
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
32
|
+
var ownKeys = function(o) {
|
|
33
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
34
|
+
var ar = [];
|
|
35
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
36
|
+
return ar;
|
|
37
|
+
};
|
|
38
|
+
return ownKeys(o);
|
|
39
|
+
};
|
|
40
|
+
return function (mod) {
|
|
41
|
+
if (mod && mod.__esModule) return mod;
|
|
42
|
+
var result = {};
|
|
43
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
44
|
+
__setModuleDefault(result, mod);
|
|
45
|
+
return result;
|
|
46
|
+
};
|
|
47
|
+
})();
|
|
48
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
49
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
50
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
51
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
52
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
53
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
54
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
55
|
+
});
|
|
56
|
+
};
|
|
57
|
+
var _a, _b;
|
|
58
|
+
var _c;
|
|
59
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
60
|
+
(_a = (_c = process.env).GMAX_WORKER_COUNT) !== null && _a !== void 0 ? _a : (_c.GMAX_WORKER_COUNT = "1");
|
|
61
|
+
const path = __importStar(require("node:path"));
|
|
62
|
+
const config_1 = require("./config");
|
|
63
|
+
const vector_db_1 = require("./lib/store/vector-db");
|
|
64
|
+
const exit_1 = require("./lib/utils/exit");
|
|
65
|
+
const filter_builder_1 = require("./lib/utils/filter-builder");
|
|
66
|
+
const PLATFORM_ROOT = path.join((_b = process.env.HOME) !== null && _b !== void 0 ? _b : "", "Development/beyond/platform");
|
|
67
|
+
const TARGETS = ["BeyondError", "ErrorCodes", "resolveActor", "errorHandler"];
|
|
68
|
+
function main() {
|
|
69
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
70
|
+
const db = new vector_db_1.VectorDB(config_1.PATHS.lancedbDir);
|
|
71
|
+
const table = yield db.ensureTable();
|
|
72
|
+
const pathPrefix = PLATFORM_ROOT.endsWith("/") ? PLATFORM_ROOT : `${PLATFORM_ROOT}/`;
|
|
73
|
+
const scope = `path LIKE '${(0, filter_builder_1.escapeSqlString)(pathPrefix)}%'`;
|
|
74
|
+
console.log(`Platform graph density check — pathPrefix=${PLATFORM_ROOT}\n`);
|
|
75
|
+
for (const sym of TARGETS) {
|
|
76
|
+
const esc = (0, filter_builder_1.escapeSqlString)(sym);
|
|
77
|
+
const refRows = yield table
|
|
78
|
+
.query()
|
|
79
|
+
.select(["path"])
|
|
80
|
+
.where(`${scope} AND array_contains(referenced_symbols, '${esc}')`)
|
|
81
|
+
.limit(2000)
|
|
82
|
+
.toArray();
|
|
83
|
+
const defRows = yield table
|
|
84
|
+
.query()
|
|
85
|
+
.select(["path"])
|
|
86
|
+
.where(`${scope} AND array_contains(defined_symbols, '${esc}')`)
|
|
87
|
+
.limit(2000)
|
|
88
|
+
.toArray();
|
|
89
|
+
console.log(`${sym.padEnd(16)} def-chunks=${String(defRows.length).padStart(3)} ref-chunks=${String(refRows.length).padStart(4)}`);
|
|
90
|
+
}
|
|
91
|
+
// Also count chunks with non-empty referenced_symbols overall to baseline
|
|
92
|
+
// graph density.
|
|
93
|
+
const allRows = yield table
|
|
94
|
+
.query()
|
|
95
|
+
.select(["referenced_symbols"])
|
|
96
|
+
.where(scope)
|
|
97
|
+
.limit(20000)
|
|
98
|
+
.toArray();
|
|
99
|
+
let nonEmpty = 0;
|
|
100
|
+
let totalRefs = 0;
|
|
101
|
+
for (const row of allRows) {
|
|
102
|
+
const raw = row.referenced_symbols;
|
|
103
|
+
let arr = [];
|
|
104
|
+
if (Array.isArray(raw))
|
|
105
|
+
arr = raw.filter((v) => typeof v === "string");
|
|
106
|
+
else if (raw && typeof raw.toArray === "function") {
|
|
107
|
+
try {
|
|
108
|
+
const a = raw.toArray();
|
|
109
|
+
if (Array.isArray(a))
|
|
110
|
+
arr = a.filter((v) => typeof v === "string");
|
|
111
|
+
}
|
|
112
|
+
catch (_a) { }
|
|
113
|
+
}
|
|
114
|
+
if (arr.length > 0) {
|
|
115
|
+
nonEmpty++;
|
|
116
|
+
totalRefs += arr.length;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
console.log(`\nPlatform corpus: ${allRows.length} chunks sampled (cap 20k), ` +
|
|
120
|
+
`${nonEmpty} with non-empty referenced_symbols (${((nonEmpty / allRows.length) * 100).toFixed(1)}%), ` +
|
|
121
|
+
`avg refs/chunk = ${(totalRefs / Math.max(1, nonEmpty)).toFixed(1)}`);
|
|
122
|
+
yield db.close();
|
|
123
|
+
yield (0, exit_1.gracefulExit)(0);
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
if (require.main === module) {
|
|
127
|
+
main().catch((e) => {
|
|
128
|
+
console.error(e);
|
|
129
|
+
process.exit(1);
|
|
130
|
+
});
|
|
131
|
+
}
|
package/dist/eval-oss.js
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* OSS-fixture evaluation harness — runs the search pipeline against
|
|
4
|
+
* sverklo-bench P1 (definition lookup) fixtures ported from
|
|
5
|
+
* github.com/sverklo/sverklo-bench (tasks/express.gen.ts,
|
|
6
|
+
* tasks/lodash.gen.ts). Used to answer "does ColBERT rerank help on a
|
|
7
|
+
* third-party fixture set, or are our 97 internal cases biased?"
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* pnpm tsx src/eval-oss.ts express # rerank off (default)
|
|
11
|
+
* GMAX_EVAL_RERANK=1 pnpm tsx src/eval-oss.ts express
|
|
12
|
+
* pnpm tsx src/eval-oss.ts lodash --json
|
|
13
|
+
* pnpm tsx src/eval-oss.ts all # all datasets
|
|
14
|
+
*/
|
|
15
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
18
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
19
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
20
|
+
}
|
|
21
|
+
Object.defineProperty(o, k2, desc);
|
|
22
|
+
}) : (function(o, m, k, k2) {
|
|
23
|
+
if (k2 === undefined) k2 = k;
|
|
24
|
+
o[k2] = m[k];
|
|
25
|
+
}));
|
|
26
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
27
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
28
|
+
}) : function(o, v) {
|
|
29
|
+
o["default"] = v;
|
|
30
|
+
});
|
|
31
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
32
|
+
var ownKeys = function(o) {
|
|
33
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
34
|
+
var ar = [];
|
|
35
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
36
|
+
return ar;
|
|
37
|
+
};
|
|
38
|
+
return ownKeys(o);
|
|
39
|
+
};
|
|
40
|
+
return function (mod) {
|
|
41
|
+
if (mod && mod.__esModule) return mod;
|
|
42
|
+
var result = {};
|
|
43
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
44
|
+
__setModuleDefault(result, mod);
|
|
45
|
+
return result;
|
|
46
|
+
};
|
|
47
|
+
})();
|
|
48
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
49
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
50
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
51
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
52
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
53
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
54
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
55
|
+
});
|
|
56
|
+
};
|
|
57
|
+
var _a, _b, _c, _d;
|
|
58
|
+
var _e;
|
|
59
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
60
|
+
// Same precaution as src/eval.ts — pin worker pool to 1 to avoid ONNX
|
|
61
|
+
// concurrency issues during the back-to-back search runs.
|
|
62
|
+
(_a = (_e = process.env).GMAX_WORKER_COUNT) !== null && _a !== void 0 ? _a : (_e.GMAX_WORKER_COUNT = "1");
|
|
63
|
+
const path = __importStar(require("node:path"));
|
|
64
|
+
const searcher_1 = require("./lib/search/searcher");
|
|
65
|
+
const vector_db_1 = require("./lib/store/vector-db");
|
|
66
|
+
const exit_1 = require("./lib/utils/exit");
|
|
67
|
+
const config_1 = require("./config");
|
|
68
|
+
// ─── express 4.21.1 P1 — sverklo-bench/tasks/express.gen.ts resolved against
|
|
69
|
+
// a fresh checkout. `merge` dropped: it's the utils-merge package, not
|
|
70
|
+
// an in-tree definition, so the sverklo resolver returns no location.
|
|
71
|
+
const EXPRESS_CASES = [
|
|
72
|
+
{ id: "ex-p1-01", query: "createApplication", expectedFile: "lib/express.js", expectedLine: 37 },
|
|
73
|
+
{ id: "ex-p1-02", query: "Route", expectedFile: "lib/router/route.js", expectedLine: 43 },
|
|
74
|
+
{ id: "ex-p1-03", query: "Layer", expectedFile: "lib/router/layer.js", expectedLine: 33 },
|
|
75
|
+
{ id: "ex-p1-04", query: "View", expectedFile: "lib/view.js", expectedLine: 52 },
|
|
76
|
+
{ id: "ex-p1-05", query: "query", expectedFile: "lib/express.js", expectedLine: 79, note: "re-export site — middleware/query.js doesn't match the function-decl patterns" },
|
|
77
|
+
{ id: "ex-p1-06", query: "init", expectedFile: "lib/middleware/init.js", expectedLine: 28 },
|
|
78
|
+
{ id: "ex-p1-07", query: "acceptParams", expectedFile: "lib/utils.js", expectedLine: 126 },
|
|
79
|
+
{ id: "ex-p1-08", query: "stringify", expectedFile: "lib/response.js", expectedLine: 1155 },
|
|
80
|
+
{ id: "ex-p1-09", query: "compileETag", expectedFile: "lib/utils.js", expectedLine: 150 },
|
|
81
|
+
];
|
|
82
|
+
// ─── lodash 4.17.21 P1 — hand-verified line numbers in lodash.js, copied
|
|
83
|
+
// verbatim from sverklo-bench/tasks/lodash.gen.ts. All 10 live in the
|
|
84
|
+
// same 17K-line UMD file, so line-window matching (NOT path-only match)
|
|
85
|
+
// is what makes this fixture set discriminating.
|
|
86
|
+
const LODASH_CASES = [
|
|
87
|
+
{ id: "ld-p1-01", query: "map", expectedFile: "lodash.js", expectedLine: 9620 },
|
|
88
|
+
{ id: "ld-p1-02", query: "filter", expectedFile: "lodash.js", expectedLine: 9239 },
|
|
89
|
+
{ id: "ld-p1-03", query: "reduce", expectedFile: "lodash.js", expectedLine: 9745 },
|
|
90
|
+
{ id: "ld-p1-04", query: "debounce", expectedFile: "lodash.js", expectedLine: 10372 },
|
|
91
|
+
{ id: "ld-p1-05", query: "throttle", expectedFile: "lodash.js", expectedLine: 10965 },
|
|
92
|
+
{ id: "ld-p1-06", query: "merge", expectedFile: "lodash.js", expectedLine: 13505, note: "var merge = createAssigner(...) binding site, not the re-export at 16689" },
|
|
93
|
+
{ id: "ld-p1-07", query: "cloneDeep", expectedFile: "lodash.js", expectedLine: 11155 },
|
|
94
|
+
{ id: "ld-p1-08", query: "get", expectedFile: "lodash.js", expectedLine: 13194 },
|
|
95
|
+
{ id: "ld-p1-09", query: "set", expectedFile: "lodash.js", expectedLine: 13741 },
|
|
96
|
+
{ id: "ld-p1-10", query: "chunk", expectedFile: "lodash.js", expectedLine: 6903 },
|
|
97
|
+
];
|
|
98
|
+
// ─── platform monorepo (private) — 15 hand-curated P1 cases across packages
|
|
99
|
+
// to test the "modular monorepo" shape that neither express nor lodash
|
|
100
|
+
// covers. Symbols resolved against the live checkout. Bare-symbol
|
|
101
|
+
// queries match sverklo's P1 methodology so results are comparable.
|
|
102
|
+
const PLATFORM_CASES = [
|
|
103
|
+
{ id: "pf-p1-01", query: "formatCents", expectedFile: "packages/shared/src/format.ts", expectedLine: 8 },
|
|
104
|
+
{ id: "pf-p1-02", query: "formatTimeAgo", expectedFile: "packages/shared/src/format.ts", expectedLine: 44 },
|
|
105
|
+
{ id: "pf-p1-03", query: "BeyondError", expectedFile: "packages/shared/src/errors.ts", expectedLine: 37 },
|
|
106
|
+
{ id: "pf-p1-04", query: "ErrorCodes", expectedFile: "packages/shared/src/errors.ts", expectedLine: 5 },
|
|
107
|
+
{ id: "pf-p1-05", query: "createDb", expectedFile: "packages/db/src/index.ts", expectedLine: 42 },
|
|
108
|
+
{ id: "pf-p1-06", query: "createDbAsync", expectedFile: "packages/db/src/index.ts", expectedLine: 50 },
|
|
109
|
+
{ id: "pf-p1-07", query: "authMiddleware", expectedFile: "packages/api/src/middleware/auth.ts", expectedLine: 39 },
|
|
110
|
+
{ id: "pf-p1-08", query: "requireAuth", expectedFile: "packages/api/src/middleware/auth.ts", expectedLine: 45 },
|
|
111
|
+
{ id: "pf-p1-09", query: "resolveActor", expectedFile: "packages/api/src/middleware/auth.ts", expectedLine: 71 },
|
|
112
|
+
{ id: "pf-p1-10", query: "getActor", expectedFile: "packages/api/src/middleware/auth.ts", expectedLine: 1136 },
|
|
113
|
+
{ id: "pf-p1-11", query: "rateLimit", expectedFile: "packages/api/src/middleware/rate-limit.ts", expectedLine: 94 },
|
|
114
|
+
{ id: "pf-p1-12", query: "checkRateLimitKey", expectedFile: "packages/api/src/middleware/rate-limit.ts", expectedLine: 158 },
|
|
115
|
+
{ id: "pf-p1-13", query: "errorHandler", expectedFile: "packages/api/src/middleware/error.ts", expectedLine: 128 },
|
|
116
|
+
{ id: "pf-p1-14", query: "activityTracker", expectedFile: "packages/api/src/middleware/activity-tracker.ts", expectedLine: 17 },
|
|
117
|
+
{ id: "pf-p1-15", query: "initializeApp", expectedFile: "packages/api/src/app.ts", expectedLine: 68 },
|
|
118
|
+
];
|
|
119
|
+
const DATASETS = {
|
|
120
|
+
express: {
|
|
121
|
+
name: "express",
|
|
122
|
+
projectRoot: path.join((_b = process.env.HOME) !== null && _b !== void 0 ? _b : "", "Development/sandbox/bench-fixtures/express"),
|
|
123
|
+
cases: EXPRESS_CASES,
|
|
124
|
+
},
|
|
125
|
+
lodash: {
|
|
126
|
+
name: "lodash",
|
|
127
|
+
projectRoot: path.join((_c = process.env.HOME) !== null && _c !== void 0 ? _c : "", "Development/sandbox/bench-fixtures/lodash"),
|
|
128
|
+
cases: LODASH_CASES,
|
|
129
|
+
},
|
|
130
|
+
platform: {
|
|
131
|
+
name: "platform",
|
|
132
|
+
projectRoot: path.join((_d = process.env.HOME) !== null && _d !== void 0 ? _d : "", "Development/beyond/platform"),
|
|
133
|
+
cases: PLATFORM_CASES,
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
// A chunk matches when its file path ends with the expected file AND the
|
|
137
|
+
// expected line falls within [start_line, end_line]. end_line falls back
|
|
138
|
+
// to start_line + 200 when missing (lodash's mega-IIFE chunks are
|
|
139
|
+
// typically much smaller than this, but the guard avoids false negatives
|
|
140
|
+
// from short tail chunks).
|
|
141
|
+
function chunkMatches(chunk, expectedFile, expectedLine) {
|
|
142
|
+
var _a, _b, _c, _d, _e, _f;
|
|
143
|
+
const path = String(((_a = chunk.metadata) === null || _a === void 0 ? void 0 : _a.path) || "").toLowerCase();
|
|
144
|
+
if (!path.endsWith(`/${expectedFile.toLowerCase()}`) && !path.endsWith(expectedFile.toLowerCase())) {
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
const start = Number((_d = (_c = (_b = chunk.generated_metadata) === null || _b === void 0 ? void 0 : _b.start_line) !== null && _c !== void 0 ? _c : chunk.start_line) !== null && _d !== void 0 ? _d : 0);
|
|
148
|
+
const numLines = Number((_f = (_e = chunk.generated_metadata) === null || _e === void 0 ? void 0 : _e.num_lines) !== null && _f !== void 0 ? _f : 0);
|
|
149
|
+
const end = numLines > 0 ? start + numLines : start + 200;
|
|
150
|
+
// chunks are 0-indexed start_line; expected line is 1-indexed
|
|
151
|
+
return expectedLine - 1 >= start && expectedLine - 1 <= end;
|
|
152
|
+
}
|
|
153
|
+
function evaluateOss(response, c, timeMs) {
|
|
154
|
+
const idx = response.data.findIndex((chunk) => chunkMatches(chunk, c.expectedFile, c.expectedLine));
|
|
155
|
+
const rank = idx + 1; // 0 = miss
|
|
156
|
+
const rr = rank > 0 ? 1 / rank : 0;
|
|
157
|
+
const recall10 = rank > 0 && rank <= 10 ? 1 : 0;
|
|
158
|
+
return {
|
|
159
|
+
id: c.id,
|
|
160
|
+
query: c.query,
|
|
161
|
+
expectedFile: c.expectedFile,
|
|
162
|
+
expectedLine: c.expectedLine,
|
|
163
|
+
rank,
|
|
164
|
+
rr,
|
|
165
|
+
recall10,
|
|
166
|
+
timeMs,
|
|
167
|
+
note: c.note,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
function runDataset(ds, rerank, topK) {
|
|
171
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
172
|
+
const vectorDb = new vector_db_1.VectorDB(config_1.PATHS.lancedbDir);
|
|
173
|
+
const searcher = new searcher_1.Searcher(vectorDb);
|
|
174
|
+
const pathPrefix = ds.projectRoot.endsWith("/") ? ds.projectRoot : `${ds.projectRoot}/`;
|
|
175
|
+
const results = [];
|
|
176
|
+
for (const c of ds.cases) {
|
|
177
|
+
const t0 = performance.now();
|
|
178
|
+
const res = yield searcher.search(c.query, topK, { rerank }, undefined, pathPrefix);
|
|
179
|
+
const timeMs = performance.now() - t0;
|
|
180
|
+
results.push(evaluateOss(res, c, timeMs));
|
|
181
|
+
}
|
|
182
|
+
yield vectorDb.close();
|
|
183
|
+
const hits = results.filter((r) => r.rank > 0).length;
|
|
184
|
+
const hitsAt1 = results.filter((r) => r.rank === 1).length;
|
|
185
|
+
const mrr = results.reduce((s, r) => s + r.rr, 0) / results.length;
|
|
186
|
+
const recall10 = results.reduce((s, r) => s + r.recall10, 0) / results.length;
|
|
187
|
+
const avgTime = results.reduce((s, r) => s + r.timeMs, 0) / results.length;
|
|
188
|
+
return {
|
|
189
|
+
summary: {
|
|
190
|
+
dataset: ds.name,
|
|
191
|
+
rerank,
|
|
192
|
+
cases: results.length,
|
|
193
|
+
hits,
|
|
194
|
+
hitsAt1,
|
|
195
|
+
mrrAt10: Number(mrr.toFixed(4)),
|
|
196
|
+
recallAt10: Number(recall10.toFixed(4)),
|
|
197
|
+
avgTimeMs: Math.round(avgTime),
|
|
198
|
+
},
|
|
199
|
+
results,
|
|
200
|
+
};
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
function run() {
|
|
204
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
205
|
+
var _a;
|
|
206
|
+
const args = process.argv.slice(2);
|
|
207
|
+
const datasetArg = (_a = args.find((a) => !a.startsWith("--"))) !== null && _a !== void 0 ? _a : "all";
|
|
208
|
+
const jsonMode = args.includes("--json") || process.env.GMAX_EVAL_JSON === "1";
|
|
209
|
+
const rerank = process.env.GMAX_EVAL_RERANK === "1";
|
|
210
|
+
const topK = 20;
|
|
211
|
+
const wanted = datasetArg === "all" ? Object.keys(DATASETS) : [datasetArg];
|
|
212
|
+
const out = [];
|
|
213
|
+
const log = jsonMode ? console.error : console.log;
|
|
214
|
+
log(`OSS eval (rerank=${rerank ? "on" : "off"})`);
|
|
215
|
+
for (const name of wanted) {
|
|
216
|
+
const ds = DATASETS[name];
|
|
217
|
+
if (!ds) {
|
|
218
|
+
console.error(`Unknown dataset: ${name}. Known: ${Object.keys(DATASETS).join(", ")}`);
|
|
219
|
+
process.exit(1);
|
|
220
|
+
}
|
|
221
|
+
log(`\n── ${ds.name} (${ds.cases.length} cases, ${ds.projectRoot})`);
|
|
222
|
+
const r = yield runDataset(ds, rerank, topK);
|
|
223
|
+
out.push(r);
|
|
224
|
+
if (!jsonMode) {
|
|
225
|
+
for (const res of r.results) {
|
|
226
|
+
const status = res.rank > 0 ? `rank ${res.rank}` : "miss";
|
|
227
|
+
console.log(` ${res.id} ${res.query.padEnd(20)} → ${status.padEnd(8)} ${res.expectedFile}:${res.expectedLine} [${res.timeMs.toFixed(0)}ms]`);
|
|
228
|
+
}
|
|
229
|
+
const s = r.summary;
|
|
230
|
+
console.log(` → MRR@10=${s.mrrAt10} Recall@10=${s.recallAt10} hits@1=${s.hitsAt1}/${s.cases} avg=${s.avgTimeMs}ms`);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
if (jsonMode) {
|
|
234
|
+
process.stdout.write(`${JSON.stringify({ rerank, datasets: out }, null, 2)}\n`);
|
|
235
|
+
}
|
|
236
|
+
yield (0, exit_1.gracefulExit)(0);
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
if (require.main === module && process.env.GMAX_EVAL_AUTORUN !== "0") {
|
|
240
|
+
run().catch((e) => {
|
|
241
|
+
console.error(e);
|
|
242
|
+
process.exit(1);
|
|
243
|
+
});
|
|
244
|
+
}
|
package/dist/eval.js
CHANGED
|
@@ -582,9 +582,14 @@ function run() {
|
|
|
582
582
|
// changes show up clearly. Set GMAX_EVAL_RERANK=1 to measure the full
|
|
583
583
|
// production pipeline (slower, but more representative).
|
|
584
584
|
const rerank = process.env.GMAX_EVAL_RERANK === "1";
|
|
585
|
+
// Optional pathPrefix scoping — needed by PageRank tiebreaker (which is
|
|
586
|
+
// per-project) and to isolate this bench from cross-project chunks indexed
|
|
587
|
+
// by the same daemon. Defaults to unscoped so historical numbers stay
|
|
588
|
+
// comparable.
|
|
589
|
+
const evalPathPrefix = process.env.GMAX_EVAL_PATH_PREFIX || undefined;
|
|
585
590
|
for (const c of exports.cases) {
|
|
586
591
|
const queryStart = performance.now();
|
|
587
|
-
const res = yield searcher.search(c.query, topK, { rerank });
|
|
592
|
+
const res = yield searcher.search(c.query, topK, { rerank }, undefined, evalPathPrefix);
|
|
588
593
|
const queryEnd = performance.now();
|
|
589
594
|
const timeMs = queryEnd - queryStart;
|
|
590
595
|
results.push(evaluateCase(res, c, timeMs));
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.formatAgentSearchResults = formatAgentSearchResults;
|
|
37
|
+
const path = __importStar(require("node:path"));
|
|
38
|
+
function chunkPath(chunk) {
|
|
39
|
+
const metadata = chunk.metadata;
|
|
40
|
+
return String(chunk.path || (metadata === null || metadata === void 0 ? void 0 : metadata.path) || "");
|
|
41
|
+
}
|
|
42
|
+
function chunkStartLine(chunk) {
|
|
43
|
+
var _a, _b, _c, _d;
|
|
44
|
+
return Math.max(1, Number((_d = (_b = (_a = chunk.startLine) !== null && _a !== void 0 ? _a : chunk.start_line) !== null && _b !== void 0 ? _b : (_c = chunk.generated_metadata) === null || _c === void 0 ? void 0 : _c.start_line) !== null && _d !== void 0 ? _d : 0) + 1);
|
|
45
|
+
}
|
|
46
|
+
function definedSymbols(chunk) {
|
|
47
|
+
var _a;
|
|
48
|
+
const raw = (_a = chunk.defined_symbols) !== null && _a !== void 0 ? _a : chunk.definedSymbols;
|
|
49
|
+
if (Array.isArray(raw))
|
|
50
|
+
return raw.filter((v) => typeof v === "string");
|
|
51
|
+
if (raw && typeof raw.toArray === "function") {
|
|
52
|
+
try {
|
|
53
|
+
const arr = raw.toArray();
|
|
54
|
+
if (Array.isArray(arr)) {
|
|
55
|
+
return arr.filter((v) => typeof v === "string");
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
catch (_b) { }
|
|
59
|
+
}
|
|
60
|
+
return [];
|
|
61
|
+
}
|
|
62
|
+
function relativePath(projectRoot, absPath) {
|
|
63
|
+
if (!absPath)
|
|
64
|
+
return "";
|
|
65
|
+
return path.isAbsolute(absPath)
|
|
66
|
+
? path.relative(projectRoot, absPath)
|
|
67
|
+
: absPath;
|
|
68
|
+
}
|
|
69
|
+
function firstSignatureLine(chunk) {
|
|
70
|
+
var _a, _b;
|
|
71
|
+
const raw = (_b = (_a = chunk.content) !== null && _a !== void 0 ? _a : chunk.text) !== null && _b !== void 0 ? _b : "";
|
|
72
|
+
const lines = String(raw).split("\n");
|
|
73
|
+
for (const line of lines) {
|
|
74
|
+
const trimmed = line.trim();
|
|
75
|
+
if (!trimmed || trimmed.length < 5)
|
|
76
|
+
continue;
|
|
77
|
+
if (trimmed.startsWith("//") ||
|
|
78
|
+
trimmed.startsWith("/*") ||
|
|
79
|
+
trimmed.startsWith("*")) {
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
if (trimmed.startsWith("import ") ||
|
|
83
|
+
trimmed.startsWith("#") ||
|
|
84
|
+
trimmed.startsWith("File:")) {
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
if (trimmed === "{" || trimmed === "}")
|
|
88
|
+
continue;
|
|
89
|
+
if (/^[.),;:}\]|&(+`'"!~]/.test(trimmed))
|
|
90
|
+
continue;
|
|
91
|
+
if (trimmed.startsWith("} ") ||
|
|
92
|
+
trimmed.startsWith("- ") ||
|
|
93
|
+
trimmed.startsWith("...")) {
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
if (/^[a-z]/.test(trimmed) &&
|
|
97
|
+
!/^(export|function|class|interface|type|const|let|var|async|return|if|for|while|switch|enum|struct|pub |fn |def |impl |mod |use )/.test(trimmed)) {
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
return trimmed.length > 120 ? `${trimmed.slice(0, 117)}...` : trimmed;
|
|
101
|
+
}
|
|
102
|
+
return "";
|
|
103
|
+
}
|
|
104
|
+
function hintFor(chunk) {
|
|
105
|
+
if (typeof chunk.summary === "string" && chunk.summary) {
|
|
106
|
+
return chunk.summary;
|
|
107
|
+
}
|
|
108
|
+
return firstSignatureLine(chunk);
|
|
109
|
+
}
|
|
110
|
+
function explainSuffix(chunk, enabled) {
|
|
111
|
+
if (!enabled || !chunk.scoreBreakdown)
|
|
112
|
+
return "";
|
|
113
|
+
const b = chunk.scoreBreakdown;
|
|
114
|
+
return `\texplain:rerank=${b.rerank.toFixed(3)},fused=${b.fused.toFixed(3)},boost=${b.boost.toFixed(2)}x,score=${b.normalized.toFixed(3)}`;
|
|
115
|
+
}
|
|
116
|
+
function formatAgentSearchResults(results, projectRoot, options = {}) {
|
|
117
|
+
var _a, _b;
|
|
118
|
+
if (!results.length)
|
|
119
|
+
return "(none)";
|
|
120
|
+
const groups = new Map();
|
|
121
|
+
for (const result of results) {
|
|
122
|
+
const absPath = chunkPath(result);
|
|
123
|
+
const group = groups.get(absPath);
|
|
124
|
+
if (group)
|
|
125
|
+
group.push(result);
|
|
126
|
+
else
|
|
127
|
+
groups.set(absPath, [result]);
|
|
128
|
+
}
|
|
129
|
+
const lines = [];
|
|
130
|
+
const seenImportFiles = new Set();
|
|
131
|
+
for (const [absPath, members] of groups) {
|
|
132
|
+
const rel = relativePath(projectRoot, absPath);
|
|
133
|
+
if (options.includeImports &&
|
|
134
|
+
absPath &&
|
|
135
|
+
options.getImportsForFile &&
|
|
136
|
+
!seenImportFiles.has(absPath)) {
|
|
137
|
+
seenImportFiles.add(absPath);
|
|
138
|
+
const imports = options.getImportsForFile(absPath);
|
|
139
|
+
if (imports) {
|
|
140
|
+
lines.push(`[imports ${rel}] ${imports.split("\n").join(" | ")}`);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
const grouped = members.length > 1;
|
|
144
|
+
if (grouped) {
|
|
145
|
+
lines.push(`${rel} (${members.length} hits):`);
|
|
146
|
+
}
|
|
147
|
+
for (const result of members) {
|
|
148
|
+
const symbol = (_a = definedSymbols(result)[0]) !== null && _a !== void 0 ? _a : "";
|
|
149
|
+
const role = String((_b = result.role) !== null && _b !== void 0 ? _b : "")
|
|
150
|
+
.slice(0, 4)
|
|
151
|
+
.toUpperCase();
|
|
152
|
+
const score = typeof result.score === "number"
|
|
153
|
+
? `\ts=${result.score.toFixed(3)}`
|
|
154
|
+
: "";
|
|
155
|
+
const hint = hintFor(result);
|
|
156
|
+
const locator = grouped
|
|
157
|
+
? ` :${chunkStartLine(result)}`
|
|
158
|
+
: `${rel}:${chunkStartLine(result)}`;
|
|
159
|
+
lines.push(`${locator}${score}${symbol ? ` ${symbol}` : ""}${role ? ` [${role}]` : ""}${hint ? ` — ${hint}` : ""}${explainSuffix(result, options.explain)}`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return lines.join("\n");
|
|
163
|
+
}
|