@veewo/gitnexus 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +234 -0
- package/dist/benchmark/agent-context/evaluators.d.ts +9 -0
- package/dist/benchmark/agent-context/evaluators.js +196 -0
- package/dist/benchmark/agent-context/evaluators.test.d.ts +1 -0
- package/dist/benchmark/agent-context/evaluators.test.js +39 -0
- package/dist/benchmark/agent-context/io.d.ts +2 -0
- package/dist/benchmark/agent-context/io.js +23 -0
- package/dist/benchmark/agent-context/io.test.d.ts +1 -0
- package/dist/benchmark/agent-context/io.test.js +19 -0
- package/dist/benchmark/agent-context/report.d.ts +2 -0
- package/dist/benchmark/agent-context/report.js +59 -0
- package/dist/benchmark/agent-context/report.test.d.ts +1 -0
- package/dist/benchmark/agent-context/report.test.js +85 -0
- package/dist/benchmark/agent-context/runner.d.ts +46 -0
- package/dist/benchmark/agent-context/runner.js +111 -0
- package/dist/benchmark/agent-context/runner.test.d.ts +1 -0
- package/dist/benchmark/agent-context/runner.test.js +79 -0
- package/dist/benchmark/agent-context/tool-runner.d.ts +7 -0
- package/dist/benchmark/agent-context/tool-runner.js +18 -0
- package/dist/benchmark/agent-context/tool-runner.test.d.ts +1 -0
- package/dist/benchmark/agent-context/tool-runner.test.js +11 -0
- package/dist/benchmark/agent-context/types.d.ts +40 -0
- package/dist/benchmark/agent-context/types.js +1 -0
- package/dist/benchmark/analyze-runner.d.ts +16 -0
- package/dist/benchmark/analyze-runner.js +51 -0
- package/dist/benchmark/analyze-runner.test.d.ts +1 -0
- package/dist/benchmark/analyze-runner.test.js +37 -0
- package/dist/benchmark/evaluators.d.ts +6 -0
- package/dist/benchmark/evaluators.js +10 -0
- package/dist/benchmark/evaluators.test.d.ts +1 -0
- package/dist/benchmark/evaluators.test.js +12 -0
- package/dist/benchmark/io.d.ts +7 -0
- package/dist/benchmark/io.js +25 -0
- package/dist/benchmark/io.test.d.ts +1 -0
- package/dist/benchmark/io.test.js +35 -0
- package/dist/benchmark/neonspark-candidates.d.ts +19 -0
- package/dist/benchmark/neonspark-candidates.js +94 -0
- package/dist/benchmark/neonspark-candidates.test.d.ts +1 -0
- package/dist/benchmark/neonspark-candidates.test.js +43 -0
- package/dist/benchmark/neonspark-materialize.d.ts +19 -0
- package/dist/benchmark/neonspark-materialize.js +111 -0
- package/dist/benchmark/neonspark-materialize.test.d.ts +1 -0
- package/dist/benchmark/neonspark-materialize.test.js +124 -0
- package/dist/benchmark/neonspark-sync.d.ts +3 -0
- package/dist/benchmark/neonspark-sync.js +53 -0
- package/dist/benchmark/neonspark-sync.test.d.ts +1 -0
- package/dist/benchmark/neonspark-sync.test.js +20 -0
- package/dist/benchmark/report.d.ts +1 -0
- package/dist/benchmark/report.js +7 -0
- package/dist/benchmark/runner.d.ts +48 -0
- package/dist/benchmark/runner.js +302 -0
- package/dist/benchmark/runner.test.d.ts +1 -0
- package/dist/benchmark/runner.test.js +50 -0
- package/dist/benchmark/scoring.d.ts +16 -0
- package/dist/benchmark/scoring.js +27 -0
- package/dist/benchmark/scoring.test.d.ts +1 -0
- package/dist/benchmark/scoring.test.js +24 -0
- package/dist/benchmark/tool-runner.d.ts +6 -0
- package/dist/benchmark/tool-runner.js +17 -0
- package/dist/benchmark/types.d.ts +36 -0
- package/dist/benchmark/types.js +1 -0
- package/dist/cli/ai-context.d.ts +22 -0
- package/dist/cli/ai-context.js +184 -0
- package/dist/cli/ai-context.test.d.ts +1 -0
- package/dist/cli/ai-context.test.js +30 -0
- package/dist/cli/analyze-multi-scope-regression.test.d.ts +1 -0
- package/dist/cli/analyze-multi-scope-regression.test.js +22 -0
- package/dist/cli/analyze-options.d.ts +7 -0
- package/dist/cli/analyze-options.js +56 -0
- package/dist/cli/analyze-options.test.d.ts +1 -0
- package/dist/cli/analyze-options.test.js +36 -0
- package/dist/cli/analyze.d.ts +14 -0
- package/dist/cli/analyze.js +384 -0
- package/dist/cli/augment.d.ts +13 -0
- package/dist/cli/augment.js +33 -0
- package/dist/cli/benchmark-agent-context.d.ts +29 -0
- package/dist/cli/benchmark-agent-context.js +61 -0
- package/dist/cli/benchmark-agent-context.test.d.ts +1 -0
- package/dist/cli/benchmark-agent-context.test.js +80 -0
- package/dist/cli/benchmark-unity.d.ts +15 -0
- package/dist/cli/benchmark-unity.js +31 -0
- package/dist/cli/benchmark-unity.test.d.ts +1 -0
- package/dist/cli/benchmark-unity.test.js +18 -0
- package/dist/cli/claude-hooks.d.ts +22 -0
- package/dist/cli/claude-hooks.js +97 -0
- package/dist/cli/clean.d.ts +10 -0
- package/dist/cli/clean.js +60 -0
- package/dist/cli/eval-server.d.ts +30 -0
- package/dist/cli/eval-server.js +372 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +182 -0
- package/dist/cli/list.d.ts +6 -0
- package/dist/cli/list.js +33 -0
- package/dist/cli/mcp.d.ts +8 -0
- package/dist/cli/mcp.js +34 -0
- package/dist/cli/repo-manager-alias.test.d.ts +1 -0
- package/dist/cli/repo-manager-alias.test.js +40 -0
- package/dist/cli/scope-filter.test.d.ts +1 -0
- package/dist/cli/scope-filter.test.js +49 -0
- package/dist/cli/serve.d.ts +4 -0
- package/dist/cli/serve.js +6 -0
- package/dist/cli/setup.d.ts +8 -0
- package/dist/cli/setup.js +311 -0
- package/dist/cli/setup.test.d.ts +1 -0
- package/dist/cli/setup.test.js +31 -0
- package/dist/cli/status.d.ts +6 -0
- package/dist/cli/status.js +27 -0
- package/dist/cli/tool.d.ts +40 -0
- package/dist/cli/tool.js +94 -0
- package/dist/cli/version.test.d.ts +1 -0
- package/dist/cli/version.test.js +19 -0
- package/dist/cli/wiki.d.ts +15 -0
- package/dist/cli/wiki.js +361 -0
- package/dist/config/ignore-service.d.ts +1 -0
- package/dist/config/ignore-service.js +210 -0
- package/dist/config/supported-languages.d.ts +12 -0
- package/dist/config/supported-languages.js +15 -0
- package/dist/core/augmentation/engine.d.ts +26 -0
- package/dist/core/augmentation/engine.js +213 -0
- package/dist/core/embeddings/embedder.d.ts +60 -0
- package/dist/core/embeddings/embedder.js +251 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +51 -0
- package/dist/core/embeddings/embedding-pipeline.js +329 -0
- package/dist/core/embeddings/index.d.ts +9 -0
- package/dist/core/embeddings/index.js +9 -0
- package/dist/core/embeddings/text-generator.d.ts +24 -0
- package/dist/core/embeddings/text-generator.js +182 -0
- package/dist/core/embeddings/types.d.ts +87 -0
- package/dist/core/embeddings/types.js +32 -0
- package/dist/core/graph/graph.d.ts +2 -0
- package/dist/core/graph/graph.js +66 -0
- package/dist/core/graph/types.d.ts +61 -0
- package/dist/core/graph/types.js +1 -0
- package/dist/core/ingestion/ast-cache.d.ts +11 -0
- package/dist/core/ingestion/ast-cache.js +34 -0
- package/dist/core/ingestion/call-processor.d.ts +15 -0
- package/dist/core/ingestion/call-processor.js +327 -0
- package/dist/core/ingestion/cluster-enricher.d.ts +38 -0
- package/dist/core/ingestion/cluster-enricher.js +170 -0
- package/dist/core/ingestion/community-processor.d.ts +39 -0
- package/dist/core/ingestion/community-processor.js +312 -0
- package/dist/core/ingestion/entry-point-scoring.d.ts +39 -0
- package/dist/core/ingestion/entry-point-scoring.js +260 -0
- package/dist/core/ingestion/filesystem-walker.d.ts +28 -0
- package/dist/core/ingestion/filesystem-walker.js +80 -0
- package/dist/core/ingestion/framework-detection.d.ts +39 -0
- package/dist/core/ingestion/framework-detection.js +235 -0
- package/dist/core/ingestion/heritage-processor.d.ts +20 -0
- package/dist/core/ingestion/heritage-processor.js +197 -0
- package/dist/core/ingestion/import-processor.d.ts +38 -0
- package/dist/core/ingestion/import-processor.js +778 -0
- package/dist/core/ingestion/parsing-processor.d.ts +15 -0
- package/dist/core/ingestion/parsing-processor.js +291 -0
- package/dist/core/ingestion/pipeline.d.ts +5 -0
- package/dist/core/ingestion/pipeline.js +323 -0
- package/dist/core/ingestion/process-processor.d.ts +51 -0
- package/dist/core/ingestion/process-processor.js +309 -0
- package/dist/core/ingestion/scope-filter.d.ts +25 -0
- package/dist/core/ingestion/scope-filter.js +100 -0
- package/dist/core/ingestion/structure-processor.d.ts +2 -0
- package/dist/core/ingestion/structure-processor.js +36 -0
- package/dist/core/ingestion/symbol-table.d.ts +33 -0
- package/dist/core/ingestion/symbol-table.js +38 -0
- package/dist/core/ingestion/tree-sitter-queries.d.ts +12 -0
- package/dist/core/ingestion/tree-sitter-queries.js +398 -0
- package/dist/core/ingestion/utils.d.ts +10 -0
- package/dist/core/ingestion/utils.js +50 -0
- package/dist/core/ingestion/workers/parse-worker.d.ts +59 -0
- package/dist/core/ingestion/workers/parse-worker.js +672 -0
- package/dist/core/ingestion/workers/worker-pool.d.ts +16 -0
- package/dist/core/ingestion/workers/worker-pool.js +120 -0
- package/dist/core/kuzu/csv-generator.d.ts +29 -0
- package/dist/core/kuzu/csv-generator.js +336 -0
- package/dist/core/kuzu/kuzu-adapter.d.ts +101 -0
- package/dist/core/kuzu/kuzu-adapter.js +753 -0
- package/dist/core/kuzu/schema.d.ts +53 -0
- package/dist/core/kuzu/schema.js +407 -0
- package/dist/core/search/bm25-index.d.ts +23 -0
- package/dist/core/search/bm25-index.js +95 -0
- package/dist/core/search/hybrid-search.d.ts +49 -0
- package/dist/core/search/hybrid-search.js +118 -0
- package/dist/core/tree-sitter/parser-loader.d.ts +4 -0
- package/dist/core/tree-sitter/parser-loader.js +44 -0
- package/dist/core/wiki/generator.d.ts +110 -0
- package/dist/core/wiki/generator.js +786 -0
- package/dist/core/wiki/graph-queries.d.ts +80 -0
- package/dist/core/wiki/graph-queries.js +238 -0
- package/dist/core/wiki/html-viewer.d.ts +10 -0
- package/dist/core/wiki/html-viewer.js +297 -0
- package/dist/core/wiki/llm-client.d.ts +40 -0
- package/dist/core/wiki/llm-client.js +162 -0
- package/dist/core/wiki/prompts.d.ts +53 -0
- package/dist/core/wiki/prompts.js +174 -0
- package/dist/lib/utils.d.ts +1 -0
- package/dist/lib/utils.js +3 -0
- package/dist/mcp/core/embedder.d.ts +27 -0
- package/dist/mcp/core/embedder.js +108 -0
- package/dist/mcp/core/kuzu-adapter.d.ts +34 -0
- package/dist/mcp/core/kuzu-adapter.js +231 -0
- package/dist/mcp/local/local-backend.d.ts +160 -0
- package/dist/mcp/local/local-backend.js +1646 -0
- package/dist/mcp/resources.d.ts +31 -0
- package/dist/mcp/resources.js +407 -0
- package/dist/mcp/server.d.ts +23 -0
- package/dist/mcp/server.js +251 -0
- package/dist/mcp/staleness.d.ts +15 -0
- package/dist/mcp/staleness.js +29 -0
- package/dist/mcp/tools.d.ts +24 -0
- package/dist/mcp/tools.js +195 -0
- package/dist/server/api.d.ts +10 -0
- package/dist/server/api.js +344 -0
- package/dist/server/mcp-http.d.ts +13 -0
- package/dist/server/mcp-http.js +100 -0
- package/dist/storage/git.d.ts +6 -0
- package/dist/storage/git.js +32 -0
- package/dist/storage/repo-manager.d.ts +125 -0
- package/dist/storage/repo-manager.js +257 -0
- package/dist/types/pipeline.d.ts +34 -0
- package/dist/types/pipeline.js +18 -0
- package/hooks/claude/gitnexus-hook.cjs +135 -0
- package/hooks/claude/pre-tool-use.sh +78 -0
- package/hooks/claude/session-start.sh +42 -0
- package/package.json +92 -0
- package/skills/gitnexus-cli.md +82 -0
- package/skills/gitnexus-debugging.md +89 -0
- package/skills/gitnexus-exploring.md +78 -0
- package/skills/gitnexus-guide.md +64 -0
- package/skills/gitnexus-impact-analysis.md +97 -0
- package/skills/gitnexus-refactoring.md +121 -0
- package/vendor/leiden/index.cjs +355 -0
- package/vendor/leiden/utils.cjs +392 -0
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { runAnalyze, parseAnalyzeSummary } from './analyze-runner.js';
|
|
3
|
+
import { evaluateGates, computePR, computeF1 } from './scoring.js';
|
|
4
|
+
import { buildFailureTriage } from './evaluators.js';
|
|
5
|
+
import { writeReports } from './report.js';
|
|
6
|
+
import { createToolRunner } from './tool-runner.js';
|
|
7
|
+
export function resolveBenchmarkRepoName(options) {
|
|
8
|
+
return options.repo || options.repoAlias || (options.targetPath ? path.basename(path.resolve(options.targetPath)) : undefined);
|
|
9
|
+
}
|
|
10
|
+
function normalize(s) {
|
|
11
|
+
return s.trim().toLowerCase();
|
|
12
|
+
}
|
|
13
|
+
function expectedName(uid) {
|
|
14
|
+
const last = uid.split(':').pop() || uid;
|
|
15
|
+
return normalize(last);
|
|
16
|
+
}
|
|
17
|
+
function extractHits(result) {
|
|
18
|
+
const uids = new Set();
|
|
19
|
+
const names = new Set();
|
|
20
|
+
const addUid = (uid) => {
|
|
21
|
+
if (typeof uid === 'string' && uid.trim()) {
|
|
22
|
+
uids.add(uid.trim());
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
const addName = (name) => {
|
|
26
|
+
if (typeof name === 'string' && name.trim()) {
|
|
27
|
+
names.add(name.trim());
|
|
28
|
+
}
|
|
29
|
+
};
|
|
30
|
+
for (const sym of result?.process_symbols || []) {
|
|
31
|
+
addUid(sym?.id);
|
|
32
|
+
addName(sym?.name);
|
|
33
|
+
}
|
|
34
|
+
for (const def of result?.definitions || []) {
|
|
35
|
+
addUid(def?.id);
|
|
36
|
+
addName(def?.name);
|
|
37
|
+
}
|
|
38
|
+
addUid(result?.symbol?.uid);
|
|
39
|
+
addName(result?.symbol?.name);
|
|
40
|
+
addUid(result?.target?.id);
|
|
41
|
+
addName(result?.target?.name);
|
|
42
|
+
for (const depthRows of Object.values(result?.byDepth || {})) {
|
|
43
|
+
if (!Array.isArray(depthRows))
|
|
44
|
+
continue;
|
|
45
|
+
for (const row of depthRows) {
|
|
46
|
+
addUid(row?.id);
|
|
47
|
+
addName(row?.name);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
for (const candidate of result?.candidates || []) {
|
|
51
|
+
addUid(candidate?.uid);
|
|
52
|
+
addName(candidate?.name);
|
|
53
|
+
}
|
|
54
|
+
return { uids: [...uids], names: [...names] };
|
|
55
|
+
}
|
|
56
|
+
export function hasRequiredHitFuzzy(expectedUid, hitUids, hitNames) {
|
|
57
|
+
const expectedNorm = normalize(expectedUid);
|
|
58
|
+
const expectedLooksLikeUid = expectedUid.includes(':');
|
|
59
|
+
if (expectedLooksLikeUid) {
|
|
60
|
+
for (const uid of hitUids) {
|
|
61
|
+
const n = normalize(uid);
|
|
62
|
+
if (n === expectedNorm || n.endsWith(expectedNorm) || expectedNorm.endsWith(n)) {
|
|
63
|
+
return true;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
const expectedSym = expectedName(expectedUid);
|
|
69
|
+
for (const uid of hitUids) {
|
|
70
|
+
const n = normalize(uid);
|
|
71
|
+
if (expectedSym && n.includes(expectedSym)) {
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
for (const name of hitNames) {
|
|
76
|
+
const n = normalize(name);
|
|
77
|
+
if (n === expectedSym || n.includes(expectedSym) || expectedSym.includes(n)) {
|
|
78
|
+
return true;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
export function hasForbiddenUidHitStrict(forbiddenUid, hitUids) {
|
|
84
|
+
const forbiddenNorm = normalize(forbiddenUid);
|
|
85
|
+
return hitUids.some((uid) => normalize(uid) === forbiddenNorm);
|
|
86
|
+
}
|
|
87
|
+
function mapToolInput(task, repo) {
|
|
88
|
+
const input = { ...(task.input || {}) };
|
|
89
|
+
if (repo) {
|
|
90
|
+
input.repo = repo;
|
|
91
|
+
}
|
|
92
|
+
if (task.tool === 'query') {
|
|
93
|
+
if (!('query' in input) && typeof input.search_query === 'string') {
|
|
94
|
+
input.query = input.search_query;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (task.tool === 'impact' && !('direction' in input)) {
|
|
98
|
+
input.direction = 'upstream';
|
|
99
|
+
}
|
|
100
|
+
return input;
|
|
101
|
+
}
|
|
102
|
+
function resultCount(tool, result) {
|
|
103
|
+
if (tool === 'query') {
|
|
104
|
+
return (result?.process_symbols?.length || 0) + (result?.definitions?.length || 0);
|
|
105
|
+
}
|
|
106
|
+
if (tool === 'context') {
|
|
107
|
+
const incoming = Object.values(result?.incoming || {}).flat().length;
|
|
108
|
+
const outgoing = Object.values(result?.outgoing || {}).flat().length;
|
|
109
|
+
return incoming + outgoing + (result?.processes?.length || 0) + (result?.symbol ? 1 : 0);
|
|
110
|
+
}
|
|
111
|
+
if (tool === 'impact') {
|
|
112
|
+
return Number(result?.impactedCount || 0) + (result?.target ? 1 : 0);
|
|
113
|
+
}
|
|
114
|
+
return 0;
|
|
115
|
+
}
|
|
116
|
+
async function evaluateTask(index, task, runTool, repo) {
|
|
117
|
+
const failures = [];
|
|
118
|
+
try {
|
|
119
|
+
const params = mapToolInput(task, repo);
|
|
120
|
+
const result = await runTool(task.tool, params);
|
|
121
|
+
if (result?.error) {
|
|
122
|
+
return {
|
|
123
|
+
smokePass: false,
|
|
124
|
+
tool: task.tool,
|
|
125
|
+
hits: [],
|
|
126
|
+
names: [],
|
|
127
|
+
truePositive: 0,
|
|
128
|
+
predicted: 0,
|
|
129
|
+
gold: task.must_hit_uids.length,
|
|
130
|
+
failures: [{ kind: 'tool-error', taskIndex: index, detail: String(result.error) }],
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
if (result?.status === 'ambiguous') {
|
|
134
|
+
failures.push({ kind: 'ambiguous-name-wrong-hit', taskIndex: index, detail: result?.message });
|
|
135
|
+
}
|
|
136
|
+
const hits = extractHits(result);
|
|
137
|
+
const predicted = hits.uids.length;
|
|
138
|
+
const gold = task.must_hit_uids.length;
|
|
139
|
+
let truePositive = 0;
|
|
140
|
+
for (const expected of task.must_hit_uids) {
|
|
141
|
+
if (hasRequiredHitFuzzy(expected, hits.uids, hits.names)) {
|
|
142
|
+
truePositive += 1;
|
|
143
|
+
}
|
|
144
|
+
else {
|
|
145
|
+
failures.push({ kind: 'missing-required-hit', taskIndex: index, detail: expected });
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
for (const forbidden of task.must_not_hit_uids) {
|
|
149
|
+
if (hasForbiddenUidHitStrict(forbidden, hits.uids)) {
|
|
150
|
+
failures.push({ kind: 'forbidden-hit-present', taskIndex: index, detail: forbidden });
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
const count = resultCount(task.tool, result);
|
|
154
|
+
if (typeof task.min_result_count === 'number' && count < task.min_result_count) {
|
|
155
|
+
failures.push({ kind: 'insufficient-result-count', taskIndex: index, detail: `${count} < ${task.min_result_count}` });
|
|
156
|
+
}
|
|
157
|
+
if (task.tool === 'context') {
|
|
158
|
+
const refs = Object.values(result?.incoming || {}).flat().length + Object.values(result?.outgoing || {}).flat().length;
|
|
159
|
+
if (refs === 0) {
|
|
160
|
+
failures.push({ kind: 'context-empty-refs', taskIndex: index });
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
if (task.tool === 'impact' && Number(result?.impactedCount || 0) === 0) {
|
|
164
|
+
failures.push({ kind: 'impact-downstream-zero', taskIndex: index });
|
|
165
|
+
}
|
|
166
|
+
return {
|
|
167
|
+
smokePass: !result?.error,
|
|
168
|
+
tool: task.tool,
|
|
169
|
+
hits: hits.uids,
|
|
170
|
+
names: hits.names,
|
|
171
|
+
truePositive,
|
|
172
|
+
predicted,
|
|
173
|
+
gold,
|
|
174
|
+
failures,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
catch (error) {
|
|
178
|
+
return {
|
|
179
|
+
smokePass: false,
|
|
180
|
+
tool: task.tool,
|
|
181
|
+
hits: [],
|
|
182
|
+
names: [],
|
|
183
|
+
truePositive: 0,
|
|
184
|
+
predicted: 0,
|
|
185
|
+
gold: task.must_hit_uids.length,
|
|
186
|
+
failures: [{ kind: 'tool-execution-error', taskIndex: index, detail: String(error?.message || error) }],
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
export async function runBenchmark(ds, options) {
|
|
191
|
+
const reportDir = path.resolve(options.reportDir || '.gitnexus/benchmark');
|
|
192
|
+
const repo = resolveBenchmarkRepoName(options);
|
|
193
|
+
let analyzeSummary;
|
|
194
|
+
if (!options.skipAnalyze) {
|
|
195
|
+
if (!options.targetPath) {
|
|
196
|
+
throw new Error('targetPath is required unless skipAnalyze is true');
|
|
197
|
+
}
|
|
198
|
+
const analyze = await runAnalyze(path.resolve(options.targetPath), {
|
|
199
|
+
extensions: options.extensions,
|
|
200
|
+
repoAlias: options.repoAlias,
|
|
201
|
+
scopeManifest: options.scopeManifest,
|
|
202
|
+
scopePrefix: options.scopePrefix,
|
|
203
|
+
});
|
|
204
|
+
analyzeSummary = parseAnalyzeSummary(`${analyze.stdout}\n${analyze.stderr}`);
|
|
205
|
+
}
|
|
206
|
+
const limitedTasks = ds.tasks.slice(0, options.profile.maxTasks);
|
|
207
|
+
const limitedSymbols = ds.symbols.slice(0, options.profile.maxSymbols);
|
|
208
|
+
const toolRunner = await createToolRunner();
|
|
209
|
+
const failures = [];
|
|
210
|
+
let queryTP = 0;
|
|
211
|
+
let queryPred = 0;
|
|
212
|
+
let queryGold = 0;
|
|
213
|
+
let ciTP = 0;
|
|
214
|
+
let ciPred = 0;
|
|
215
|
+
let ciGold = 0;
|
|
216
|
+
let smokePassCount = 0;
|
|
217
|
+
const runTool = async (tool, params) => {
|
|
218
|
+
if (tool === 'query') {
|
|
219
|
+
return toolRunner.query(params);
|
|
220
|
+
}
|
|
221
|
+
if (tool === 'context') {
|
|
222
|
+
return toolRunner.context(params);
|
|
223
|
+
}
|
|
224
|
+
return toolRunner.impact(params);
|
|
225
|
+
};
|
|
226
|
+
try {
|
|
227
|
+
for (let i = 0; i < limitedTasks.length; i += 1) {
|
|
228
|
+
const task = limitedTasks[i];
|
|
229
|
+
const evalResult = await evaluateTask(i, task, runTool, repo);
|
|
230
|
+
if (evalResult.smokePass) {
|
|
231
|
+
smokePassCount += 1;
|
|
232
|
+
}
|
|
233
|
+
failures.push(...evalResult.failures);
|
|
234
|
+
if (task.tool === 'query') {
|
|
235
|
+
queryTP += evalResult.truePositive;
|
|
236
|
+
queryPred += evalResult.predicted;
|
|
237
|
+
queryGold += evalResult.gold;
|
|
238
|
+
}
|
|
239
|
+
else {
|
|
240
|
+
ciTP += evalResult.truePositive;
|
|
241
|
+
ciPred += evalResult.predicted;
|
|
242
|
+
ciGold += evalResult.gold;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
finally {
|
|
247
|
+
await toolRunner.close();
|
|
248
|
+
}
|
|
249
|
+
if (limitedSymbols.length === 0) {
|
|
250
|
+
failures.push({ kind: 'dataset-empty-symbols' });
|
|
251
|
+
}
|
|
252
|
+
if (ds.relations.length === 0) {
|
|
253
|
+
failures.push({ kind: 'dataset-empty-relations' });
|
|
254
|
+
}
|
|
255
|
+
const queryPR = queryGold > 0 || queryPred > 0 ? computePR(queryTP, queryPred, queryGold) : { precision: 1, recall: 1 };
|
|
256
|
+
const ciPR = ciGold > 0 || ciPred > 0 ? computePR(ciTP, ciPred, ciGold) : { precision: 1, recall: 1 };
|
|
257
|
+
const metrics = {
|
|
258
|
+
queryPrecision: queryPR.precision,
|
|
259
|
+
queryRecall: queryPR.recall,
|
|
260
|
+
contextImpactF1: computeF1(ciPR.precision, ciPR.recall),
|
|
261
|
+
smokePassRate: limitedTasks.length === 0 ? 1 : smokePassCount / limitedTasks.length,
|
|
262
|
+
perfRegressionPct: 0,
|
|
263
|
+
};
|
|
264
|
+
const gateResult = evaluateGates(metrics, ds.thresholds);
|
|
265
|
+
const triage = buildFailureTriage(failures);
|
|
266
|
+
const jsonReport = {
|
|
267
|
+
generatedAt: new Date().toISOString(),
|
|
268
|
+
repo,
|
|
269
|
+
profile: options.profile,
|
|
270
|
+
metrics,
|
|
271
|
+
thresholds: ds.thresholds,
|
|
272
|
+
gate: gateResult,
|
|
273
|
+
analyze: analyzeSummary,
|
|
274
|
+
triage,
|
|
275
|
+
failures,
|
|
276
|
+
stats: {
|
|
277
|
+
symbols: limitedSymbols.length,
|
|
278
|
+
relations: ds.relations.length,
|
|
279
|
+
tasks: limitedTasks.length,
|
|
280
|
+
},
|
|
281
|
+
};
|
|
282
|
+
const markdown = [
|
|
283
|
+
'# Unity Benchmark Summary',
|
|
284
|
+
'',
|
|
285
|
+
`- Pass: ${gateResult.pass ? 'YES' : 'NO'}`,
|
|
286
|
+
`- Query Precision: ${metrics.queryPrecision.toFixed(3)}`,
|
|
287
|
+
`- Query Recall: ${metrics.queryRecall.toFixed(3)}`,
|
|
288
|
+
`- Context/Impact F1: ${metrics.contextImpactF1.toFixed(3)}`,
|
|
289
|
+
`- Smoke Pass Rate: ${metrics.smokePassRate.toFixed(3)}`,
|
|
290
|
+
`- Perf Regression: ${metrics.perfRegressionPct.toFixed(2)}%`,
|
|
291
|
+
gateResult.failures.length > 0 ? `- Gate Failures: ${gateResult.failures.join(', ')}` : '- Gate Failures: none',
|
|
292
|
+
].join('\n');
|
|
293
|
+
await writeReports(reportDir, jsonReport, markdown);
|
|
294
|
+
return {
|
|
295
|
+
pass: gateResult.pass,
|
|
296
|
+
failures: gateResult.failures,
|
|
297
|
+
metrics,
|
|
298
|
+
triage,
|
|
299
|
+
analyze: analyzeSummary,
|
|
300
|
+
reportDir,
|
|
301
|
+
};
|
|
302
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import test from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
import { hasForbiddenUidHitStrict, hasRequiredHitFuzzy, resolveBenchmarkRepoName } from './runner.js';
|
|
4
|
+
test('resolveBenchmarkRepoName prefers explicit repo', () => {
|
|
5
|
+
const resolved = resolveBenchmarkRepoName({
|
|
6
|
+
repo: 'my-repo',
|
|
7
|
+
repoAlias: 'alias-repo',
|
|
8
|
+
targetPath: '/tmp/source',
|
|
9
|
+
});
|
|
10
|
+
assert.equal(resolved, 'my-repo');
|
|
11
|
+
});
|
|
12
|
+
test('resolveBenchmarkRepoName falls back to repo alias', () => {
|
|
13
|
+
const resolved = resolveBenchmarkRepoName({
|
|
14
|
+
repoAlias: 'neonspark-v1-subset',
|
|
15
|
+
targetPath: '/tmp/source',
|
|
16
|
+
});
|
|
17
|
+
assert.equal(resolved, 'neonspark-v1-subset');
|
|
18
|
+
});
|
|
19
|
+
test('resolveBenchmarkRepoName uses target basename when no repo input exists', () => {
|
|
20
|
+
const resolved = resolveBenchmarkRepoName({
|
|
21
|
+
targetPath: '/Volumes/Shuttle/unity-projects/neonspark',
|
|
22
|
+
});
|
|
23
|
+
assert.equal(resolved, 'neonspark');
|
|
24
|
+
});
|
|
25
|
+
test('hasRequiredHitFuzzy does not treat wrong same-name uid as a required hit for uid expectations', () => {
|
|
26
|
+
const expected = 'Class:Assets/NEON/Code/Game/LootSystem/LootManager.cs:LootManager';
|
|
27
|
+
const hitUids = ['Class:Assets/NEON/Code/Game/LootSystem/LootDropRecorder.cs:LootManager'];
|
|
28
|
+
const matched = hasRequiredHitFuzzy(expected, hitUids, ['LootManager']);
|
|
29
|
+
assert.equal(matched, false);
|
|
30
|
+
});
|
|
31
|
+
test('hasRequiredHitFuzzy accepts correct uid for required hit', () => {
|
|
32
|
+
const expected = 'Class:Assets/NEON/Code/Game/LootSystem/LootManager.cs:LootManager';
|
|
33
|
+
const hitUids = ['class:assets/neon/code/game/lootsystem/lootmanager.cs:lootmanager'];
|
|
34
|
+
assert.equal(hasRequiredHitFuzzy(expected, hitUids, []), true);
|
|
35
|
+
});
|
|
36
|
+
test('hasRequiredHitFuzzy keeps legacy name fallback for non-uid expectations', () => {
|
|
37
|
+
const expected = 'LootManager';
|
|
38
|
+
const matched = hasRequiredHitFuzzy(expected, [], ['LootManager']);
|
|
39
|
+
assert.equal(matched, true);
|
|
40
|
+
});
|
|
41
|
+
test('hasForbiddenUidHitStrict ignores same-name symbol with different uid', () => {
|
|
42
|
+
const forbidden = 'Class:Assets/NEON/Code/Game/LootSystem/LootManager.cs:LootManager';
|
|
43
|
+
const hitUids = ['Class:Assets/NEON/Code/Game/LootSystem/LootDropRecorder.cs:LootManager'];
|
|
44
|
+
assert.equal(hasForbiddenUidHitStrict(forbidden, hitUids), false);
|
|
45
|
+
});
|
|
46
|
+
test('hasForbiddenUidHitStrict matches only exact normalized uid', () => {
|
|
47
|
+
const forbidden = 'Class:Assets/NEON/Code/Game/LootSystem/LootManager.cs:LootManager';
|
|
48
|
+
const hitUids = [' class:assets/neon/code/game/lootsystem/lootmanager.cs:lootmanager '];
|
|
49
|
+
assert.equal(hasForbiddenUidHitStrict(forbidden, hitUids), true);
|
|
50
|
+
});
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { Thresholds } from './types.js';
|
|
2
|
+
export declare function computePR(truePositive: number, predicted: number, gold: number): {
|
|
3
|
+
precision: number;
|
|
4
|
+
recall: number;
|
|
5
|
+
};
|
|
6
|
+
export declare function computeF1(precision: number, recall: number): number;
|
|
7
|
+
export declare function evaluateGates(metrics: {
|
|
8
|
+
queryPrecision: number;
|
|
9
|
+
queryRecall: number;
|
|
10
|
+
contextImpactF1: number;
|
|
11
|
+
smokePassRate: number;
|
|
12
|
+
perfRegressionPct: number;
|
|
13
|
+
}, thresholds: Thresholds): {
|
|
14
|
+
pass: boolean;
|
|
15
|
+
failures: string[];
|
|
16
|
+
};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
export function computePR(truePositive, predicted, gold) {
|
|
2
|
+
const precision = predicted === 0 ? 0 : truePositive / predicted;
|
|
3
|
+
const recall = gold === 0 ? 0 : truePositive / gold;
|
|
4
|
+
return { precision, recall };
|
|
5
|
+
}
|
|
6
|
+
export function computeF1(precision, recall) {
|
|
7
|
+
return precision + recall === 0 ? 0 : (2 * precision * recall) / (precision + recall);
|
|
8
|
+
}
|
|
9
|
+
export function evaluateGates(metrics, thresholds) {
|
|
10
|
+
const failures = [];
|
|
11
|
+
if (metrics.queryPrecision < thresholds.query.precisionMin) {
|
|
12
|
+
failures.push('query.precision');
|
|
13
|
+
}
|
|
14
|
+
if (metrics.queryRecall < thresholds.query.recallMin) {
|
|
15
|
+
failures.push('query.recall');
|
|
16
|
+
}
|
|
17
|
+
if (metrics.contextImpactF1 < thresholds.contextImpact.f1Min) {
|
|
18
|
+
failures.push('contextImpact.f1');
|
|
19
|
+
}
|
|
20
|
+
if (metrics.smokePassRate < thresholds.smoke.passRateMin) {
|
|
21
|
+
failures.push('smoke.passRate');
|
|
22
|
+
}
|
|
23
|
+
if (metrics.perfRegressionPct > thresholds.performance.analyzeTimeRegressionMaxPct) {
|
|
24
|
+
failures.push('performance.analyzeTimeRegression');
|
|
25
|
+
}
|
|
26
|
+
return { pass: failures.length === 0, failures };
|
|
27
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import test from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
import { computePR, evaluateGates } from './scoring.js';
|
|
4
|
+
test('computePR returns precision and recall', () => {
|
|
5
|
+
const pr = computePR(9, 10, 12);
|
|
6
|
+
assert.equal(pr.precision.toFixed(2), '0.90');
|
|
7
|
+
assert.equal(pr.recall.toFixed(2), '0.75');
|
|
8
|
+
});
|
|
9
|
+
test('evaluateGates fails when one hard threshold fails', () => {
|
|
10
|
+
const result = evaluateGates({
|
|
11
|
+
queryPrecision: 0.9,
|
|
12
|
+
queryRecall: 0.84,
|
|
13
|
+
contextImpactF1: 0.82,
|
|
14
|
+
smokePassRate: 1,
|
|
15
|
+
perfRegressionPct: 10,
|
|
16
|
+
}, {
|
|
17
|
+
query: { precisionMin: 0.9, recallMin: 0.85 },
|
|
18
|
+
contextImpact: { f1Min: 0.8 },
|
|
19
|
+
smoke: { passRateMin: 1 },
|
|
20
|
+
performance: { analyzeTimeRegressionMaxPct: 15 },
|
|
21
|
+
});
|
|
22
|
+
assert.equal(result.pass, false);
|
|
23
|
+
assert.ok(result.failures.includes('query.recall'));
|
|
24
|
+
});
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { closeKuzu } from '../mcp/core/kuzu-adapter.js';
|
|
2
|
+
import { LocalBackend } from '../mcp/local/local-backend.js';
|
|
3
|
+
export async function createToolRunner() {
|
|
4
|
+
const backend = new LocalBackend();
|
|
5
|
+
const ok = await backend.init();
|
|
6
|
+
if (!ok) {
|
|
7
|
+
throw new Error('No indexed repositories found. Run analyze first.');
|
|
8
|
+
}
|
|
9
|
+
return {
|
|
10
|
+
query: (params) => backend.callTool('query', params),
|
|
11
|
+
context: (params) => backend.callTool('context', params),
|
|
12
|
+
impact: (params) => backend.callTool('impact', params),
|
|
13
|
+
close: async () => {
|
|
14
|
+
await closeKuzu();
|
|
15
|
+
},
|
|
16
|
+
};
|
|
17
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
export interface Thresholds {
|
|
2
|
+
query: {
|
|
3
|
+
precisionMin: number;
|
|
4
|
+
recallMin: number;
|
|
5
|
+
};
|
|
6
|
+
contextImpact: {
|
|
7
|
+
f1Min: number;
|
|
8
|
+
};
|
|
9
|
+
smoke: {
|
|
10
|
+
passRateMin: number;
|
|
11
|
+
};
|
|
12
|
+
performance: {
|
|
13
|
+
analyzeTimeRegressionMaxPct: number;
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
export interface SymbolCase {
|
|
17
|
+
symbol_uid: string;
|
|
18
|
+
file_path: string;
|
|
19
|
+
symbol_name: string;
|
|
20
|
+
symbol_type: string;
|
|
21
|
+
start_line: number;
|
|
22
|
+
end_line: number;
|
|
23
|
+
}
|
|
24
|
+
export interface RelationCase {
|
|
25
|
+
src_uid: string;
|
|
26
|
+
edge_type: string;
|
|
27
|
+
dst_uid: string;
|
|
28
|
+
must_exist: boolean;
|
|
29
|
+
}
|
|
30
|
+
export interface TaskCase {
|
|
31
|
+
tool: 'query' | 'context' | 'impact';
|
|
32
|
+
input: Record<string, unknown>;
|
|
33
|
+
must_hit_uids: string[];
|
|
34
|
+
must_not_hit_uids: string[];
|
|
35
|
+
min_result_count?: number;
|
|
36
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AI Context Generator
|
|
3
|
+
*
|
|
4
|
+
* Creates AGENTS.md and CLAUDE.md with full inline GitNexus context.
|
|
5
|
+
* AGENTS.md is the standard read by Cursor, Windsurf, OpenCode, Cline, etc.
|
|
6
|
+
* CLAUDE.md is for Claude Code which only reads that file.
|
|
7
|
+
*/
|
|
8
|
+
interface RepoStats {
|
|
9
|
+
files?: number;
|
|
10
|
+
nodes?: number;
|
|
11
|
+
edges?: number;
|
|
12
|
+
communities?: number;
|
|
13
|
+
clusters?: number;
|
|
14
|
+
processes?: number;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Generate AI context files after indexing
|
|
18
|
+
*/
|
|
19
|
+
export declare function generateAIContextFiles(repoPath: string, _storagePath: string, projectName: string, stats: RepoStats): Promise<{
|
|
20
|
+
files: string[];
|
|
21
|
+
}>;
|
|
22
|
+
export {};
|