grepmax 0.17.1 → 0.17.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/dist/commands/dead.js +183 -0
- package/dist/commands/mcp.js +66 -0
- package/dist/eval-graph-sanity.js +225 -0
- package/dist/eval-graph-spotcheck.js +83 -0
- package/dist/eval-graph-totals.js +131 -0
- package/dist/eval-oss.js +244 -0
- package/dist/eval.js +6 -1
- package/dist/index.js +2 -0
- package/dist/lib/search/pagerank.js +267 -0
- package/dist/lib/search/searcher.js +44 -4
- package/mlx-embed-server/server.py +24 -0
- package/package.json +3 -1
- package/plugins/grepmax/.claude-plugin/plugin.json +1 -1
- package/plugins/grepmax/skills/grepmax/SKILL.md +15 -5
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Diagnostic counterpart to eval-graph-sanity.ts.
|
|
4
|
+
*
|
|
5
|
+
* Phase 0 found that 0/200 fusion-pool chunks reference any of the 4
|
|
6
|
+
* hard-miss targets via `referenced_symbols`. That can mean either:
|
|
7
|
+
* (a) chunker isn't extracting these refs (upstream miss), OR
|
|
8
|
+
* (b) refs exist but live outside the fusion top-200.
|
|
9
|
+
*
|
|
10
|
+
* This script measures the *total* `array_contains(referenced_symbols, X)`
|
|
11
|
+
* count across the whole platform index for each target. If those counts
|
|
12
|
+
* are also ~0, the graph is empty at the data-extraction layer, not the
|
|
13
|
+
* retrieval layer.
|
|
14
|
+
*/
|
|
15
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
18
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
19
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
20
|
+
}
|
|
21
|
+
Object.defineProperty(o, k2, desc);
|
|
22
|
+
}) : (function(o, m, k, k2) {
|
|
23
|
+
if (k2 === undefined) k2 = k;
|
|
24
|
+
o[k2] = m[k];
|
|
25
|
+
}));
|
|
26
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
27
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
28
|
+
}) : function(o, v) {
|
|
29
|
+
o["default"] = v;
|
|
30
|
+
});
|
|
31
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
32
|
+
var ownKeys = function(o) {
|
|
33
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
34
|
+
var ar = [];
|
|
35
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
36
|
+
return ar;
|
|
37
|
+
};
|
|
38
|
+
return ownKeys(o);
|
|
39
|
+
};
|
|
40
|
+
return function (mod) {
|
|
41
|
+
if (mod && mod.__esModule) return mod;
|
|
42
|
+
var result = {};
|
|
43
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
44
|
+
__setModuleDefault(result, mod);
|
|
45
|
+
return result;
|
|
46
|
+
};
|
|
47
|
+
})();
|
|
48
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
49
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
50
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
51
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
52
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
53
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
54
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
55
|
+
});
|
|
56
|
+
};
|
|
57
|
+
var _a, _b;
|
|
58
|
+
var _c;
|
|
59
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
60
|
+
(_a = (_c = process.env).GMAX_WORKER_COUNT) !== null && _a !== void 0 ? _a : (_c.GMAX_WORKER_COUNT = "1");
|
|
61
|
+
const path = __importStar(require("node:path"));
|
|
62
|
+
const config_1 = require("./config");
|
|
63
|
+
const vector_db_1 = require("./lib/store/vector-db");
|
|
64
|
+
const exit_1 = require("./lib/utils/exit");
|
|
65
|
+
const filter_builder_1 = require("./lib/utils/filter-builder");
|
|
66
|
+
const PLATFORM_ROOT = path.join((_b = process.env.HOME) !== null && _b !== void 0 ? _b : "", "Development/beyond/platform");
|
|
67
|
+
const TARGETS = ["BeyondError", "ErrorCodes", "resolveActor", "errorHandler"];
|
|
68
|
+
function main() {
|
|
69
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
70
|
+
const db = new vector_db_1.VectorDB(config_1.PATHS.lancedbDir);
|
|
71
|
+
const table = yield db.ensureTable();
|
|
72
|
+
const pathPrefix = PLATFORM_ROOT.endsWith("/") ? PLATFORM_ROOT : `${PLATFORM_ROOT}/`;
|
|
73
|
+
const scope = `path LIKE '${(0, filter_builder_1.escapeSqlString)(pathPrefix)}%'`;
|
|
74
|
+
console.log(`Platform graph density check — pathPrefix=${PLATFORM_ROOT}\n`);
|
|
75
|
+
for (const sym of TARGETS) {
|
|
76
|
+
const esc = (0, filter_builder_1.escapeSqlString)(sym);
|
|
77
|
+
const refRows = yield table
|
|
78
|
+
.query()
|
|
79
|
+
.select(["path"])
|
|
80
|
+
.where(`${scope} AND array_contains(referenced_symbols, '${esc}')`)
|
|
81
|
+
.limit(2000)
|
|
82
|
+
.toArray();
|
|
83
|
+
const defRows = yield table
|
|
84
|
+
.query()
|
|
85
|
+
.select(["path"])
|
|
86
|
+
.where(`${scope} AND array_contains(defined_symbols, '${esc}')`)
|
|
87
|
+
.limit(2000)
|
|
88
|
+
.toArray();
|
|
89
|
+
console.log(`${sym.padEnd(16)} def-chunks=${String(defRows.length).padStart(3)} ref-chunks=${String(refRows.length).padStart(4)}`);
|
|
90
|
+
}
|
|
91
|
+
// Also count chunks with non-empty referenced_symbols overall to baseline
|
|
92
|
+
// graph density.
|
|
93
|
+
const allRows = yield table
|
|
94
|
+
.query()
|
|
95
|
+
.select(["referenced_symbols"])
|
|
96
|
+
.where(scope)
|
|
97
|
+
.limit(20000)
|
|
98
|
+
.toArray();
|
|
99
|
+
let nonEmpty = 0;
|
|
100
|
+
let totalRefs = 0;
|
|
101
|
+
for (const row of allRows) {
|
|
102
|
+
const raw = row.referenced_symbols;
|
|
103
|
+
let arr = [];
|
|
104
|
+
if (Array.isArray(raw))
|
|
105
|
+
arr = raw.filter((v) => typeof v === "string");
|
|
106
|
+
else if (raw && typeof raw.toArray === "function") {
|
|
107
|
+
try {
|
|
108
|
+
const a = raw.toArray();
|
|
109
|
+
if (Array.isArray(a))
|
|
110
|
+
arr = a.filter((v) => typeof v === "string");
|
|
111
|
+
}
|
|
112
|
+
catch (_a) { }
|
|
113
|
+
}
|
|
114
|
+
if (arr.length > 0) {
|
|
115
|
+
nonEmpty++;
|
|
116
|
+
totalRefs += arr.length;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
console.log(`\nPlatform corpus: ${allRows.length} chunks sampled (cap 20k), ` +
|
|
120
|
+
`${nonEmpty} with non-empty referenced_symbols (${((nonEmpty / allRows.length) * 100).toFixed(1)}%), ` +
|
|
121
|
+
`avg refs/chunk = ${(totalRefs / Math.max(1, nonEmpty)).toFixed(1)}`);
|
|
122
|
+
yield db.close();
|
|
123
|
+
yield (0, exit_1.gracefulExit)(0);
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
if (require.main === module) {
|
|
127
|
+
main().catch((e) => {
|
|
128
|
+
console.error(e);
|
|
129
|
+
process.exit(1);
|
|
130
|
+
});
|
|
131
|
+
}
|
package/dist/eval-oss.js
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* OSS-fixture evaluation harness — runs the search pipeline against
|
|
4
|
+
* sverklo-bench P1 (definition lookup) fixtures ported from
|
|
5
|
+
* github.com/sverklo/sverklo-bench (tasks/express.gen.ts,
|
|
6
|
+
* tasks/lodash.gen.ts). Used to answer "does ColBERT rerank help on a
|
|
7
|
+
* third-party fixture set, or are our 97 internal cases biased?"
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* pnpm tsx src/eval-oss.ts express # rerank off (default)
|
|
11
|
+
* GMAX_EVAL_RERANK=1 pnpm tsx src/eval-oss.ts express
|
|
12
|
+
* pnpm tsx src/eval-oss.ts lodash --json
|
|
13
|
+
* pnpm tsx src/eval-oss.ts all # all datasets
|
|
14
|
+
*/
|
|
15
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
18
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
19
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
20
|
+
}
|
|
21
|
+
Object.defineProperty(o, k2, desc);
|
|
22
|
+
}) : (function(o, m, k, k2) {
|
|
23
|
+
if (k2 === undefined) k2 = k;
|
|
24
|
+
o[k2] = m[k];
|
|
25
|
+
}));
|
|
26
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
27
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
28
|
+
}) : function(o, v) {
|
|
29
|
+
o["default"] = v;
|
|
30
|
+
});
|
|
31
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
32
|
+
var ownKeys = function(o) {
|
|
33
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
34
|
+
var ar = [];
|
|
35
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
36
|
+
return ar;
|
|
37
|
+
};
|
|
38
|
+
return ownKeys(o);
|
|
39
|
+
};
|
|
40
|
+
return function (mod) {
|
|
41
|
+
if (mod && mod.__esModule) return mod;
|
|
42
|
+
var result = {};
|
|
43
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
44
|
+
__setModuleDefault(result, mod);
|
|
45
|
+
return result;
|
|
46
|
+
};
|
|
47
|
+
})();
|
|
48
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
49
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
50
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
51
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
52
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
53
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
54
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
55
|
+
});
|
|
56
|
+
};
|
|
57
|
+
var _a, _b, _c, _d;
|
|
58
|
+
var _e;
|
|
59
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
60
|
+
// Same precaution as src/eval.ts — pin worker pool to 1 to avoid ONNX
|
|
61
|
+
// concurrency issues during the back-to-back search runs.
|
|
62
|
+
(_a = (_e = process.env).GMAX_WORKER_COUNT) !== null && _a !== void 0 ? _a : (_e.GMAX_WORKER_COUNT = "1");
|
|
63
|
+
const path = __importStar(require("node:path"));
|
|
64
|
+
const searcher_1 = require("./lib/search/searcher");
|
|
65
|
+
const vector_db_1 = require("./lib/store/vector-db");
|
|
66
|
+
const exit_1 = require("./lib/utils/exit");
|
|
67
|
+
const config_1 = require("./config");
|
|
68
|
+
// ─── express 4.21.1 P1 — sverklo-bench/tasks/express.gen.ts resolved against
|
|
69
|
+
// a fresh checkout. `merge` dropped: it's the utils-merge package, not
|
|
70
|
+
// an in-tree definition, so the sverklo resolver returns no location.
|
|
71
|
+
const EXPRESS_CASES = [
|
|
72
|
+
{ id: "ex-p1-01", query: "createApplication", expectedFile: "lib/express.js", expectedLine: 37 },
|
|
73
|
+
{ id: "ex-p1-02", query: "Route", expectedFile: "lib/router/route.js", expectedLine: 43 },
|
|
74
|
+
{ id: "ex-p1-03", query: "Layer", expectedFile: "lib/router/layer.js", expectedLine: 33 },
|
|
75
|
+
{ id: "ex-p1-04", query: "View", expectedFile: "lib/view.js", expectedLine: 52 },
|
|
76
|
+
{ id: "ex-p1-05", query: "query", expectedFile: "lib/express.js", expectedLine: 79, note: "re-export site — middleware/query.js doesn't match the function-decl patterns" },
|
|
77
|
+
{ id: "ex-p1-06", query: "init", expectedFile: "lib/middleware/init.js", expectedLine: 28 },
|
|
78
|
+
{ id: "ex-p1-07", query: "acceptParams", expectedFile: "lib/utils.js", expectedLine: 126 },
|
|
79
|
+
{ id: "ex-p1-08", query: "stringify", expectedFile: "lib/response.js", expectedLine: 1155 },
|
|
80
|
+
{ id: "ex-p1-09", query: "compileETag", expectedFile: "lib/utils.js", expectedLine: 150 },
|
|
81
|
+
];
|
|
82
|
+
// ─── lodash 4.17.21 P1 — hand-verified line numbers in lodash.js, copied
|
|
83
|
+
// verbatim from sverklo-bench/tasks/lodash.gen.ts. All 10 live in the
|
|
84
|
+
// same 17K-line UMD file, so line-window matching (NOT path-only match)
|
|
85
|
+
// is what makes this fixture set discriminating.
|
|
86
|
+
const LODASH_CASES = [
|
|
87
|
+
{ id: "ld-p1-01", query: "map", expectedFile: "lodash.js", expectedLine: 9620 },
|
|
88
|
+
{ id: "ld-p1-02", query: "filter", expectedFile: "lodash.js", expectedLine: 9239 },
|
|
89
|
+
{ id: "ld-p1-03", query: "reduce", expectedFile: "lodash.js", expectedLine: 9745 },
|
|
90
|
+
{ id: "ld-p1-04", query: "debounce", expectedFile: "lodash.js", expectedLine: 10372 },
|
|
91
|
+
{ id: "ld-p1-05", query: "throttle", expectedFile: "lodash.js", expectedLine: 10965 },
|
|
92
|
+
{ id: "ld-p1-06", query: "merge", expectedFile: "lodash.js", expectedLine: 13505, note: "var merge = createAssigner(...) binding site, not the re-export at 16689" },
|
|
93
|
+
{ id: "ld-p1-07", query: "cloneDeep", expectedFile: "lodash.js", expectedLine: 11155 },
|
|
94
|
+
{ id: "ld-p1-08", query: "get", expectedFile: "lodash.js", expectedLine: 13194 },
|
|
95
|
+
{ id: "ld-p1-09", query: "set", expectedFile: "lodash.js", expectedLine: 13741 },
|
|
96
|
+
{ id: "ld-p1-10", query: "chunk", expectedFile: "lodash.js", expectedLine: 6903 },
|
|
97
|
+
];
|
|
98
|
+
// ─── platform monorepo (private) — 15 hand-curated P1 cases across packages
|
|
99
|
+
// to test the "modular monorepo" shape that neither express nor lodash
|
|
100
|
+
// covers. Symbols resolved against the live checkout. Bare-symbol
|
|
101
|
+
// queries match sverklo's P1 methodology so results are comparable.
|
|
102
|
+
const PLATFORM_CASES = [
|
|
103
|
+
{ id: "pf-p1-01", query: "formatCents", expectedFile: "packages/shared/src/format.ts", expectedLine: 8 },
|
|
104
|
+
{ id: "pf-p1-02", query: "formatTimeAgo", expectedFile: "packages/shared/src/format.ts", expectedLine: 44 },
|
|
105
|
+
{ id: "pf-p1-03", query: "BeyondError", expectedFile: "packages/shared/src/errors.ts", expectedLine: 37 },
|
|
106
|
+
{ id: "pf-p1-04", query: "ErrorCodes", expectedFile: "packages/shared/src/errors.ts", expectedLine: 5 },
|
|
107
|
+
{ id: "pf-p1-05", query: "createDb", expectedFile: "packages/db/src/index.ts", expectedLine: 42 },
|
|
108
|
+
{ id: "pf-p1-06", query: "createDbAsync", expectedFile: "packages/db/src/index.ts", expectedLine: 50 },
|
|
109
|
+
{ id: "pf-p1-07", query: "authMiddleware", expectedFile: "packages/api/src/middleware/auth.ts", expectedLine: 39 },
|
|
110
|
+
{ id: "pf-p1-08", query: "requireAuth", expectedFile: "packages/api/src/middleware/auth.ts", expectedLine: 45 },
|
|
111
|
+
{ id: "pf-p1-09", query: "resolveActor", expectedFile: "packages/api/src/middleware/auth.ts", expectedLine: 71 },
|
|
112
|
+
{ id: "pf-p1-10", query: "getActor", expectedFile: "packages/api/src/middleware/auth.ts", expectedLine: 1136 },
|
|
113
|
+
{ id: "pf-p1-11", query: "rateLimit", expectedFile: "packages/api/src/middleware/rate-limit.ts", expectedLine: 94 },
|
|
114
|
+
{ id: "pf-p1-12", query: "checkRateLimitKey", expectedFile: "packages/api/src/middleware/rate-limit.ts", expectedLine: 158 },
|
|
115
|
+
{ id: "pf-p1-13", query: "errorHandler", expectedFile: "packages/api/src/middleware/error.ts", expectedLine: 128 },
|
|
116
|
+
{ id: "pf-p1-14", query: "activityTracker", expectedFile: "packages/api/src/middleware/activity-tracker.ts", expectedLine: 17 },
|
|
117
|
+
{ id: "pf-p1-15", query: "initializeApp", expectedFile: "packages/api/src/app.ts", expectedLine: 68 },
|
|
118
|
+
];
|
|
119
|
+
const DATASETS = {
|
|
120
|
+
express: {
|
|
121
|
+
name: "express",
|
|
122
|
+
projectRoot: path.join((_b = process.env.HOME) !== null && _b !== void 0 ? _b : "", "Development/sandbox/bench-fixtures/express"),
|
|
123
|
+
cases: EXPRESS_CASES,
|
|
124
|
+
},
|
|
125
|
+
lodash: {
|
|
126
|
+
name: "lodash",
|
|
127
|
+
projectRoot: path.join((_c = process.env.HOME) !== null && _c !== void 0 ? _c : "", "Development/sandbox/bench-fixtures/lodash"),
|
|
128
|
+
cases: LODASH_CASES,
|
|
129
|
+
},
|
|
130
|
+
platform: {
|
|
131
|
+
name: "platform",
|
|
132
|
+
projectRoot: path.join((_d = process.env.HOME) !== null && _d !== void 0 ? _d : "", "Development/beyond/platform"),
|
|
133
|
+
cases: PLATFORM_CASES,
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
// A chunk matches when its file path ends with the expected file AND the
|
|
137
|
+
// expected line falls within [start_line, end_line]. end_line falls back
|
|
138
|
+
// to start_line + 200 when missing (lodash's mega-IIFE chunks are
|
|
139
|
+
// typically much smaller than this, but the guard avoids false negatives
|
|
140
|
+
// from short tail chunks).
|
|
141
|
+
function chunkMatches(chunk, expectedFile, expectedLine) {
|
|
142
|
+
var _a, _b, _c, _d, _e, _f;
|
|
143
|
+
const path = String(((_a = chunk.metadata) === null || _a === void 0 ? void 0 : _a.path) || "").toLowerCase();
|
|
144
|
+
if (!path.endsWith(`/${expectedFile.toLowerCase()}`) && !path.endsWith(expectedFile.toLowerCase())) {
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
const start = Number((_d = (_c = (_b = chunk.generated_metadata) === null || _b === void 0 ? void 0 : _b.start_line) !== null && _c !== void 0 ? _c : chunk.start_line) !== null && _d !== void 0 ? _d : 0);
|
|
148
|
+
const numLines = Number((_f = (_e = chunk.generated_metadata) === null || _e === void 0 ? void 0 : _e.num_lines) !== null && _f !== void 0 ? _f : 0);
|
|
149
|
+
const end = numLines > 0 ? start + numLines : start + 200;
|
|
150
|
+
// chunks are 0-indexed start_line; expected line is 1-indexed
|
|
151
|
+
return expectedLine - 1 >= start && expectedLine - 1 <= end;
|
|
152
|
+
}
|
|
153
|
+
function evaluateOss(response, c, timeMs) {
|
|
154
|
+
const idx = response.data.findIndex((chunk) => chunkMatches(chunk, c.expectedFile, c.expectedLine));
|
|
155
|
+
const rank = idx + 1; // 0 = miss
|
|
156
|
+
const rr = rank > 0 ? 1 / rank : 0;
|
|
157
|
+
const recall10 = rank > 0 && rank <= 10 ? 1 : 0;
|
|
158
|
+
return {
|
|
159
|
+
id: c.id,
|
|
160
|
+
query: c.query,
|
|
161
|
+
expectedFile: c.expectedFile,
|
|
162
|
+
expectedLine: c.expectedLine,
|
|
163
|
+
rank,
|
|
164
|
+
rr,
|
|
165
|
+
recall10,
|
|
166
|
+
timeMs,
|
|
167
|
+
note: c.note,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
function runDataset(ds, rerank, topK) {
|
|
171
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
172
|
+
const vectorDb = new vector_db_1.VectorDB(config_1.PATHS.lancedbDir);
|
|
173
|
+
const searcher = new searcher_1.Searcher(vectorDb);
|
|
174
|
+
const pathPrefix = ds.projectRoot.endsWith("/") ? ds.projectRoot : `${ds.projectRoot}/`;
|
|
175
|
+
const results = [];
|
|
176
|
+
for (const c of ds.cases) {
|
|
177
|
+
const t0 = performance.now();
|
|
178
|
+
const res = yield searcher.search(c.query, topK, { rerank }, undefined, pathPrefix);
|
|
179
|
+
const timeMs = performance.now() - t0;
|
|
180
|
+
results.push(evaluateOss(res, c, timeMs));
|
|
181
|
+
}
|
|
182
|
+
yield vectorDb.close();
|
|
183
|
+
const hits = results.filter((r) => r.rank > 0).length;
|
|
184
|
+
const hitsAt1 = results.filter((r) => r.rank === 1).length;
|
|
185
|
+
const mrr = results.reduce((s, r) => s + r.rr, 0) / results.length;
|
|
186
|
+
const recall10 = results.reduce((s, r) => s + r.recall10, 0) / results.length;
|
|
187
|
+
const avgTime = results.reduce((s, r) => s + r.timeMs, 0) / results.length;
|
|
188
|
+
return {
|
|
189
|
+
summary: {
|
|
190
|
+
dataset: ds.name,
|
|
191
|
+
rerank,
|
|
192
|
+
cases: results.length,
|
|
193
|
+
hits,
|
|
194
|
+
hitsAt1,
|
|
195
|
+
mrrAt10: Number(mrr.toFixed(4)),
|
|
196
|
+
recallAt10: Number(recall10.toFixed(4)),
|
|
197
|
+
avgTimeMs: Math.round(avgTime),
|
|
198
|
+
},
|
|
199
|
+
results,
|
|
200
|
+
};
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
function run() {
|
|
204
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
205
|
+
var _a;
|
|
206
|
+
const args = process.argv.slice(2);
|
|
207
|
+
const datasetArg = (_a = args.find((a) => !a.startsWith("--"))) !== null && _a !== void 0 ? _a : "all";
|
|
208
|
+
const jsonMode = args.includes("--json") || process.env.GMAX_EVAL_JSON === "1";
|
|
209
|
+
const rerank = process.env.GMAX_EVAL_RERANK === "1";
|
|
210
|
+
const topK = 20;
|
|
211
|
+
const wanted = datasetArg === "all" ? Object.keys(DATASETS) : [datasetArg];
|
|
212
|
+
const out = [];
|
|
213
|
+
const log = jsonMode ? console.error : console.log;
|
|
214
|
+
log(`OSS eval (rerank=${rerank ? "on" : "off"})`);
|
|
215
|
+
for (const name of wanted) {
|
|
216
|
+
const ds = DATASETS[name];
|
|
217
|
+
if (!ds) {
|
|
218
|
+
console.error(`Unknown dataset: ${name}. Known: ${Object.keys(DATASETS).join(", ")}`);
|
|
219
|
+
process.exit(1);
|
|
220
|
+
}
|
|
221
|
+
log(`\n── ${ds.name} (${ds.cases.length} cases, ${ds.projectRoot})`);
|
|
222
|
+
const r = yield runDataset(ds, rerank, topK);
|
|
223
|
+
out.push(r);
|
|
224
|
+
if (!jsonMode) {
|
|
225
|
+
for (const res of r.results) {
|
|
226
|
+
const status = res.rank > 0 ? `rank ${res.rank}` : "miss";
|
|
227
|
+
console.log(` ${res.id} ${res.query.padEnd(20)} → ${status.padEnd(8)} ${res.expectedFile}:${res.expectedLine} [${res.timeMs.toFixed(0)}ms]`);
|
|
228
|
+
}
|
|
229
|
+
const s = r.summary;
|
|
230
|
+
console.log(` → MRR@10=${s.mrrAt10} Recall@10=${s.recallAt10} hits@1=${s.hitsAt1}/${s.cases} avg=${s.avgTimeMs}ms`);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
if (jsonMode) {
|
|
234
|
+
process.stdout.write(`${JSON.stringify({ rerank, datasets: out }, null, 2)}\n`);
|
|
235
|
+
}
|
|
236
|
+
yield (0, exit_1.gracefulExit)(0);
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
if (require.main === module && process.env.GMAX_EVAL_AUTORUN !== "0") {
|
|
240
|
+
run().catch((e) => {
|
|
241
|
+
console.error(e);
|
|
242
|
+
process.exit(1);
|
|
243
|
+
});
|
|
244
|
+
}
|
package/dist/eval.js
CHANGED
|
@@ -582,9 +582,14 @@ function run() {
|
|
|
582
582
|
// changes show up clearly. Set GMAX_EVAL_RERANK=1 to measure the full
|
|
583
583
|
// production pipeline (slower, but more representative).
|
|
584
584
|
const rerank = process.env.GMAX_EVAL_RERANK === "1";
|
|
585
|
+
// Optional pathPrefix scoping — needed by PageRank tiebreaker (which is
|
|
586
|
+
// per-project) and to isolate this bench from cross-project chunks indexed
|
|
587
|
+
// by the same daemon. Defaults to unscoped so historical numbers stay
|
|
588
|
+
// comparable.
|
|
589
|
+
const evalPathPrefix = process.env.GMAX_EVAL_PATH_PREFIX || undefined;
|
|
585
590
|
for (const c of exports.cases) {
|
|
586
591
|
const queryStart = performance.now();
|
|
587
|
-
const res = yield searcher.search(c.query, topK, { rerank });
|
|
592
|
+
const res = yield searcher.search(c.query, topK, { rerank }, undefined, evalPathPrefix);
|
|
588
593
|
const queryEnd = performance.now();
|
|
589
594
|
const timeMs = queryEnd - queryStart;
|
|
590
595
|
results.push(evaluateCase(res, c, timeMs));
|
package/dist/index.js
CHANGED
|
@@ -40,6 +40,7 @@ const path = __importStar(require("node:path"));
|
|
|
40
40
|
const commander_1 = require("commander");
|
|
41
41
|
const add_1 = require("./commands/add");
|
|
42
42
|
const context_1 = require("./commands/context");
|
|
43
|
+
const dead_1 = require("./commands/dead");
|
|
43
44
|
const diff_1 = require("./commands/diff");
|
|
44
45
|
const claude_code_1 = require("./commands/claude-code");
|
|
45
46
|
const codex_1 = require("./commands/codex");
|
|
@@ -114,6 +115,7 @@ commander_1.program.addCommand(symbols_1.symbols);
|
|
|
114
115
|
commander_1.program.addCommand(trace_1.trace);
|
|
115
116
|
commander_1.program.addCommand(extract_1.extract);
|
|
116
117
|
commander_1.program.addCommand(peek_1.peek);
|
|
118
|
+
commander_1.program.addCommand(dead_1.dead);
|
|
117
119
|
commander_1.program.addCommand(project_1.project);
|
|
118
120
|
commander_1.program.addCommand(related_1.related);
|
|
119
121
|
commander_1.program.addCommand(log_1.log);
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
38
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
39
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
40
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
41
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
|
+
});
|
|
43
|
+
};
|
|
44
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
|
+
exports.computePageRank = computePageRank;
|
|
46
|
+
exports.buildGraphFromDb = buildGraphFromDb;
|
|
47
|
+
exports.writeDiskCache = writeDiskCache;
|
|
48
|
+
exports.loadOrComputePageRank = loadOrComputePageRank;
|
|
49
|
+
exports.pageRankBoostForSymbols = pageRankBoostForSymbols;
|
|
50
|
+
exports._clearMemoryCacheForTests = _clearMemoryCacheForTests;
|
|
51
|
+
exports._cachePathForTests = _cachePathForTests;
|
|
52
|
+
const crypto = __importStar(require("node:crypto"));
|
|
53
|
+
const fs = __importStar(require("node:fs"));
|
|
54
|
+
const path = __importStar(require("node:path"));
|
|
55
|
+
const config_1 = require("../../config");
|
|
56
|
+
const filter_builder_1 = require("../utils/filter-builder");
|
|
57
|
+
const DEFAULT_DAMPING = 0.85;
|
|
58
|
+
const DEFAULT_MAX_ITER = 50;
|
|
59
|
+
const DEFAULT_TOL = 1e-6;
|
|
60
|
+
const DEFAULT_TTL_MS = 60 * 60 * 1000;
|
|
61
|
+
const memoryCache = new Map();
|
|
62
|
+
function computePageRank(graph, damping = DEFAULT_DAMPING, maxIter = DEFAULT_MAX_ITER, tol = DEFAULT_TOL) {
|
|
63
|
+
const N = graph.nodes.length;
|
|
64
|
+
const result = new Map();
|
|
65
|
+
if (N === 0)
|
|
66
|
+
return result;
|
|
67
|
+
const idx = new Map();
|
|
68
|
+
for (let i = 0; i < N; i++)
|
|
69
|
+
idx.set(graph.nodes[i], i);
|
|
70
|
+
const outNeighbors = Array.from({ length: N }, () => []);
|
|
71
|
+
for (const [src, targets] of graph.edges) {
|
|
72
|
+
const si = idx.get(src);
|
|
73
|
+
if (si === undefined)
|
|
74
|
+
continue;
|
|
75
|
+
const seen = new Set();
|
|
76
|
+
for (const tgt of targets) {
|
|
77
|
+
const ti = idx.get(tgt);
|
|
78
|
+
if (ti === undefined || ti === si || seen.has(ti))
|
|
79
|
+
continue;
|
|
80
|
+
seen.add(ti);
|
|
81
|
+
outNeighbors[si].push(ti);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const outDegree = new Int32Array(N);
|
|
85
|
+
for (let i = 0; i < N; i++)
|
|
86
|
+
outDegree[i] = outNeighbors[i].length;
|
|
87
|
+
const inNeighbors = Array.from({ length: N }, () => []);
|
|
88
|
+
for (let i = 0; i < N; i++) {
|
|
89
|
+
for (const j of outNeighbors[i])
|
|
90
|
+
inNeighbors[j].push(i);
|
|
91
|
+
}
|
|
92
|
+
let pr = new Float64Array(N).fill(1 / N);
|
|
93
|
+
let next = new Float64Array(N);
|
|
94
|
+
const teleport = (1 - damping) / N;
|
|
95
|
+
for (let iter = 0; iter < maxIter; iter++) {
|
|
96
|
+
let dangling = 0;
|
|
97
|
+
for (let i = 0; i < N; i++) {
|
|
98
|
+
if (outDegree[i] === 0)
|
|
99
|
+
dangling += pr[i];
|
|
100
|
+
}
|
|
101
|
+
const danglingShare = (damping * dangling) / N;
|
|
102
|
+
for (let i = 0; i < N; i++) {
|
|
103
|
+
let sum = 0;
|
|
104
|
+
const ins = inNeighbors[i];
|
|
105
|
+
for (let k = 0; k < ins.length; k++) {
|
|
106
|
+
const j = ins[k];
|
|
107
|
+
sum += pr[j] / outDegree[j];
|
|
108
|
+
}
|
|
109
|
+
next[i] = teleport + danglingShare + damping * sum;
|
|
110
|
+
}
|
|
111
|
+
let delta = 0;
|
|
112
|
+
for (let i = 0; i < N; i++)
|
|
113
|
+
delta += Math.abs(next[i] - pr[i]);
|
|
114
|
+
const tmp = pr;
|
|
115
|
+
pr = next;
|
|
116
|
+
next = tmp;
|
|
117
|
+
if (delta < tol)
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
for (let i = 0; i < N; i++)
|
|
121
|
+
result.set(graph.nodes[i], pr[i]);
|
|
122
|
+
return result;
|
|
123
|
+
}
|
|
124
|
+
function toStringArray(val) {
|
|
125
|
+
if (!val)
|
|
126
|
+
return [];
|
|
127
|
+
if (Array.isArray(val))
|
|
128
|
+
return val.filter((v) => typeof v === "string");
|
|
129
|
+
const maybe = val;
|
|
130
|
+
if (typeof maybe.toArray === "function") {
|
|
131
|
+
try {
|
|
132
|
+
const arr = maybe.toArray();
|
|
133
|
+
return Array.isArray(arr)
|
|
134
|
+
? arr.filter((v) => typeof v === "string")
|
|
135
|
+
: [];
|
|
136
|
+
}
|
|
137
|
+
catch (_a) {
|
|
138
|
+
return [];
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return [];
|
|
142
|
+
}
|
|
143
|
+
function buildGraphFromDb(db, pathPrefix) {
|
|
144
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
145
|
+
const table = yield db.ensureTable();
|
|
146
|
+
const prefix = pathPrefix.endsWith("/") ? pathPrefix : `${pathPrefix}/`;
|
|
147
|
+
const rows = yield table
|
|
148
|
+
.query()
|
|
149
|
+
.select(["defined_symbols", "referenced_symbols"])
|
|
150
|
+
.where(`path LIKE '${(0, filter_builder_1.escapeSqlString)(prefix)}%'`)
|
|
151
|
+
.toArray();
|
|
152
|
+
const nodes = new Set();
|
|
153
|
+
const edges = new Map();
|
|
154
|
+
for (const row of rows) {
|
|
155
|
+
const defs = toStringArray(row.defined_symbols);
|
|
156
|
+
const refs = toStringArray(row.referenced_symbols);
|
|
157
|
+
for (const d of defs)
|
|
158
|
+
nodes.add(d);
|
|
159
|
+
if (refs.length === 0)
|
|
160
|
+
continue;
|
|
161
|
+
for (const d of defs) {
|
|
162
|
+
let set = edges.get(d);
|
|
163
|
+
if (!set) {
|
|
164
|
+
set = new Set();
|
|
165
|
+
edges.set(d, set);
|
|
166
|
+
}
|
|
167
|
+
for (const r of refs)
|
|
168
|
+
set.add(r);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
return { nodes: Array.from(nodes), edges };
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
function cachePathFor(pathPrefix) {
|
|
175
|
+
const hash = crypto
|
|
176
|
+
.createHash("sha1")
|
|
177
|
+
.update(pathPrefix)
|
|
178
|
+
.digest("hex")
|
|
179
|
+
.slice(0, 16);
|
|
180
|
+
return path.join(config_1.PATHS.globalRoot, "pagerank", `${hash}.json`);
|
|
181
|
+
}
|
|
182
|
+
function getTtlMs() {
|
|
183
|
+
var _a;
|
|
184
|
+
const env = Number.parseInt((_a = process.env.GMAX_PAGERANK_TTL_MS) !== null && _a !== void 0 ? _a : "", 10);
|
|
185
|
+
return Number.isFinite(env) && env > 0 ? env : DEFAULT_TTL_MS;
|
|
186
|
+
}
|
|
187
|
+
function readDiskCache(pathPrefix) {
|
|
188
|
+
const file = cachePathFor(pathPrefix);
|
|
189
|
+
if (!fs.existsSync(file))
|
|
190
|
+
return null;
|
|
191
|
+
try {
|
|
192
|
+
const data = JSON.parse(fs.readFileSync(file, "utf8"));
|
|
193
|
+
const computedAt = Date.parse(data.computedAt);
|
|
194
|
+
if (!Number.isFinite(computedAt))
|
|
195
|
+
return null;
|
|
196
|
+
if (Date.now() - computedAt > getTtlMs())
|
|
197
|
+
return null;
|
|
198
|
+
const scores = new Map();
|
|
199
|
+
let max = 0;
|
|
200
|
+
for (const [k, v] of Object.entries(data.scores)) {
|
|
201
|
+
const n = Number(v);
|
|
202
|
+
if (!Number.isFinite(n))
|
|
203
|
+
continue;
|
|
204
|
+
scores.set(k, n);
|
|
205
|
+
if (n > max)
|
|
206
|
+
max = n;
|
|
207
|
+
}
|
|
208
|
+
return { scores, max, computedAt };
|
|
209
|
+
}
|
|
210
|
+
catch (_a) {
|
|
211
|
+
return null;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
function writeDiskCache(pathPrefix, scores) {
|
|
215
|
+
const file = cachePathFor(pathPrefix);
|
|
216
|
+
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
217
|
+
const obj = {
|
|
218
|
+
pathPrefix,
|
|
219
|
+
computedAt: new Date().toISOString(),
|
|
220
|
+
nodeCount: scores.size,
|
|
221
|
+
scores: Object.fromEntries(scores),
|
|
222
|
+
};
|
|
223
|
+
fs.writeFileSync(file, JSON.stringify(obj), "utf8");
|
|
224
|
+
}
|
|
225
|
+
function loadOrComputePageRank(db, pathPrefix) {
|
|
226
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
227
|
+
const mem = memoryCache.get(pathPrefix);
|
|
228
|
+
if (mem && Date.now() - mem.computedAt < getTtlMs()) {
|
|
229
|
+
return { scores: mem.scores, max: mem.max };
|
|
230
|
+
}
|
|
231
|
+
const disk = readDiskCache(pathPrefix);
|
|
232
|
+
if (disk) {
|
|
233
|
+
memoryCache.set(pathPrefix, disk);
|
|
234
|
+
return { scores: disk.scores, max: disk.max };
|
|
235
|
+
}
|
|
236
|
+
const graph = yield buildGraphFromDb(db, pathPrefix);
|
|
237
|
+
const scores = computePageRank(graph);
|
|
238
|
+
let max = 0;
|
|
239
|
+
for (const v of scores.values())
|
|
240
|
+
if (v > max)
|
|
241
|
+
max = v;
|
|
242
|
+
const entry = { scores, max, computedAt: Date.now() };
|
|
243
|
+
memoryCache.set(pathPrefix, entry);
|
|
244
|
+
try {
|
|
245
|
+
writeDiskCache(pathPrefix, scores);
|
|
246
|
+
}
|
|
247
|
+
catch (_a) { }
|
|
248
|
+
return { scores, max };
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
function pageRankBoostForSymbols(symbols, scores, max) {
|
|
252
|
+
if (!symbols || symbols.length === 0 || max <= 0)
|
|
253
|
+
return 0;
|
|
254
|
+
let best = 0;
|
|
255
|
+
for (const s of symbols) {
|
|
256
|
+
const v = scores.get(s);
|
|
257
|
+
if (v !== undefined && v > best)
|
|
258
|
+
best = v;
|
|
259
|
+
}
|
|
260
|
+
return best / max;
|
|
261
|
+
}
|
|
262
|
+
function _clearMemoryCacheForTests() {
|
|
263
|
+
memoryCache.clear();
|
|
264
|
+
}
|
|
265
|
+
function _cachePathForTests(pathPrefix) {
|
|
266
|
+
return cachePathFor(pathPrefix);
|
|
267
|
+
}
|