sigmap 2.0.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +41 -0
- package/README.md +10 -0
- package/gen-context.js +520 -1
- package/package.json +1 -1
- package/src/eval/analyzer.js +221 -0
- package/src/eval/runner.js +308 -0
- package/src/eval/scorer.js +126 -0
- package/src/mcp/server.js +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,47 @@ Format: [Semantic Versioning](https://semver.org/)
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [2.2.0] — 2026-04-06
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- **Diagnostics & analyze command** — `src/eval/analyzer.js`: per-file breakdown of signature count, token cost, extractor used, and test coverage status.
|
|
13
|
+
- **`--analyze` CLI flag** — prints a per-file table (File | Extractor | Sigs | Tokens | Covered) across all srcDirs; respects `exclude` config.
|
|
14
|
+
- **`--analyze --json` flag** — outputs the same breakdown as structured JSON (`{ files, totalSigs, totalTokens, slowFiles, fileCount }`).
|
|
15
|
+
- **`--analyze --slow` flag** — re-times each extractor and flags any file whose extraction takes >50ms in the table.
|
|
16
|
+
- **`--diagnose-extractors` CLI flag** — runs all 21 language extractors against `test/fixtures/` and compares output to `test/expected/`; exits non-zero if any extractor diverges, shows first diff line per failure.
|
|
17
|
+
- **`test/integration/analyze.test.js`** — 14 integration tests covering `analyzeFiles`, `formatAnalysisTable`, `formatAnalysisJSON`, and all four CLI flags.
|
|
18
|
+
|
|
19
|
+
### Validation gate
|
|
20
|
+
- 21/21 extractor tests passed
|
|
21
|
+
- All integration suites passed (19 suites, 19 passed, 0 failed — includes 14 new analyze tests)
|
|
22
|
+
- `node gen-context.js --version` → `2.2.0`
|
|
23
|
+
- `node gen-context.js --analyze` runs without error on SigMap repo
|
|
24
|
+
- `node gen-context.js --analyze --json` → valid JSON with required keys
|
|
25
|
+
- `node gen-context.js --diagnose-extractors` → exits 0 on SigMap repo
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## [2.1.0] — 2026-04-05
|
|
30
|
+
|
|
31
|
+
### Added
|
|
32
|
+
- **Benchmark & evaluation system** — `src/eval/runner.js` and `src/eval/scorer.js`: zero-dependency retrieval quality measurement pipeline. Computes hit@5, MRR, and precision@5 against a JSONL task file.
|
|
33
|
+
- **`benchmarks/` directory structure** — `benchmarks/tasks/retrieval.jsonl` (20 tasks against SigMap's own codebase), `benchmarks/results/` (gitignored run output), `benchmarks/reports/` (human-readable summaries).
|
|
34
|
+
- **`--benchmark` CLI flag** — runs retrieval through all tasks in `benchmarks/tasks/retrieval.jsonl`, prints a markdown table (Task | Query | hit@5 | RR | Tokens) plus aggregate metrics; `--benchmark --json` for machine-readable output.
|
|
35
|
+
- **`--eval` CLI flag** — alias for `--benchmark`.
|
|
36
|
+
- **`src/eval/scorer.js`** — pure metric functions: `hitAtK(ranked, expected, k)`, `reciprocalRank(ranked, expected)`, `precisionAtK(ranked, expected, k)`, `aggregate(results)`. Never throws.
|
|
37
|
+
- **`src/eval/runner.js`** — task loader (`loadTasks`), sig-index builder (`buildSigIndex`), keyword ranker (`rank`, `tokenize`), and main `run(tasksFile, cwd)` entry point. Reads generated context file from disk; no in-memory state.
|
|
38
|
+
- **`test/integration/benchmark.test.js`** — 10 integration tests covering scorer unit tests, tokenizer, task loading, empty-file edge case, metrics shape, and `--benchmark --json` CLI output.
|
|
39
|
+
|
|
40
|
+
### Validation gate
|
|
41
|
+
- 21/21 extractor tests passed
|
|
42
|
+
- All integration suites passed (includes 10 new benchmark tests)
|
|
43
|
+
- `node gen-context.js --version` → `2.1.0`
|
|
44
|
+
- `node gen-context.js --benchmark` runs without error on SigMap repo
|
|
45
|
+
- `node gen-context.js --benchmark --json` → valid JSON with `metrics.hitAt5`, `metrics.mrr`, `tasks` array
|
|
46
|
+
- `node gen-context.js --eval --json` → same output as `--benchmark --json`
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
9
50
|
## [2.0.0] — 2026-04-04
|
|
10
51
|
|
|
11
52
|
### Added
|
package/README.md
CHANGED
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
[](CHANGELOG.md)
|
|
27
27
|
[](CONTRIBUTING.md)
|
|
28
28
|
[](https://marketplace.visualstudio.com/items?itemName=manojmallick.sigmap)
|
|
29
|
+
[](https://open-vsx.org/extension/manojmallick/sigmap)
|
|
29
30
|
|
|
30
31
|
</div>
|
|
31
32
|
|
|
@@ -69,6 +70,9 @@ gen-context.js ──► extracts signatures from 21 languages
|
|
|
69
70
|
AI agent session starts with full context
|
|
70
71
|
```
|
|
71
72
|
|
|
73
|
+
> **Dogfooding:** SigMap runs on itself — 40 JS files, 8,600 lines of code.
|
|
74
|
+
> View the generated context: [`.github/copilot-instructions.md`](.github/copilot-instructions.md)
|
|
75
|
+
|
|
72
76
|
### Token reduction at every stage
|
|
73
77
|
|
|
74
78
|
| Stage | Tokens | Reduction |
|
|
@@ -606,6 +610,12 @@ function extract(src) { // src: string → string[]
|
|
|
606
610
|
|
|
607
611
|
---
|
|
608
612
|
|
|
613
|
+
## ⭐ Support
|
|
614
|
+
|
|
615
|
+
If SigMap saves you context or API spend, a ⭐ on [GitHub](https://github.com/manojmallick/sigmap) helps others find it.
|
|
616
|
+
|
|
617
|
+
---
|
|
618
|
+
|
|
609
619
|
## 📄 License
|
|
610
620
|
|
|
611
621
|
MIT © 2026 [Manoj Mallick](https://github.com/manojmallick) · Made in Amsterdam 🇳🇱
|
package/gen-context.js
CHANGED
|
@@ -3570,6 +3570,359 @@ __factories["./src/tracking/logger"] = function(module, exports) {
|
|
|
3570
3570
|
|
|
3571
3571
|
};
|
|
3572
3572
|
|
|
3573
|
+
// ── ./src/eval/scorer ──
|
|
3574
|
+
__factories["./src/eval/scorer"] = function(module, exports) {
|
|
3575
|
+
'use strict';
|
|
3576
|
+
|
|
3577
|
+
function firstRank(ranked, expected) {
|
|
3578
|
+
if (!Array.isArray(ranked) || !Array.isArray(expected)) return Infinity;
|
|
3579
|
+
const expSet = new Set(expected.map((f) => normalizePath(f)));
|
|
3580
|
+
for (let i = 0; i < ranked.length; i++) {
|
|
3581
|
+
if (expSet.has(normalizePath(ranked[i]))) return i + 1;
|
|
3582
|
+
}
|
|
3583
|
+
return Infinity;
|
|
3584
|
+
}
|
|
3585
|
+
|
|
3586
|
+
function normalizePath(p) {
|
|
3587
|
+
return String(p).replace(/^\.\//, '').replace(/\\/g, '/');
|
|
3588
|
+
}
|
|
3589
|
+
|
|
3590
|
+
function hitAtK(ranked, expected, k = 5) {
|
|
3591
|
+
return firstRank(ranked, expected) <= k ? 1 : 0;
|
|
3592
|
+
}
|
|
3593
|
+
|
|
3594
|
+
function reciprocalRank(ranked, expected) {
|
|
3595
|
+
const rank = firstRank(ranked, expected);
|
|
3596
|
+
return rank === Infinity ? 0 : 1 / rank;
|
|
3597
|
+
}
|
|
3598
|
+
|
|
3599
|
+
function precisionAtK(ranked, expected, k = 5) {
|
|
3600
|
+
if (!ranked || ranked.length === 0) return 0;
|
|
3601
|
+
const expSet = new Set(expected.map((f) => normalizePath(f)));
|
|
3602
|
+
const topK = ranked.slice(0, k);
|
|
3603
|
+
const hits = topK.filter((f) => expSet.has(normalizePath(f))).length;
|
|
3604
|
+
return hits / topK.length;
|
|
3605
|
+
}
|
|
3606
|
+
|
|
3607
|
+
function aggregate(results, k = 5) {
|
|
3608
|
+
if (!Array.isArray(results) || results.length === 0) {
|
|
3609
|
+
return { hitAt5: 0, mrr: 0, precisionAt5: 0, avgTokens: 0, tasks: 0 };
|
|
3610
|
+
}
|
|
3611
|
+
let totalHit = 0, totalRR = 0, totalPrec = 0, totalTokens = 0;
|
|
3612
|
+
for (const r of results) {
|
|
3613
|
+
const ranked = r.ranked || [];
|
|
3614
|
+
const expected = r.expected || [];
|
|
3615
|
+
totalHit += hitAtK(ranked, expected, k);
|
|
3616
|
+
totalRR += reciprocalRank(ranked, expected);
|
|
3617
|
+
totalPrec += precisionAtK(ranked, expected, k);
|
|
3618
|
+
totalTokens += (typeof r.tokens === 'number' ? r.tokens : 0);
|
|
3619
|
+
}
|
|
3620
|
+
const n = results.length;
|
|
3621
|
+
return {
|
|
3622
|
+
hitAt5: Math.round(totalHit / n * 1000) / 1000,
|
|
3623
|
+
mrr: Math.round(totalRR / n * 1000) / 1000,
|
|
3624
|
+
precisionAt5: Math.round(totalPrec / n * 1000) / 1000,
|
|
3625
|
+
avgTokens: Math.round(totalTokens / n),
|
|
3626
|
+
tasks: n,
|
|
3627
|
+
};
|
|
3628
|
+
}
|
|
3629
|
+
|
|
3630
|
+
module.exports = { hitAtK, reciprocalRank, precisionAtK, aggregate, firstRank };
|
|
3631
|
+
};
|
|
3632
|
+
|
|
3633
|
+
// ── ./src/eval/analyzer ──
|
|
3634
|
+
__factories["./src/eval/analyzer"] = function(module, exports) {
|
|
3635
|
+
'use strict';
|
|
3636
|
+
|
|
3637
|
+
const fs = require('fs');
|
|
3638
|
+
const path = require('path');
|
|
3639
|
+
|
|
3640
|
+
const EXT_MAP = {
|
|
3641
|
+
'.ts': 'typescript', '.tsx': 'typescript',
|
|
3642
|
+
'.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
|
|
3643
|
+
'.py': 'python', '.pyw': 'python',
|
|
3644
|
+
'.java': 'java',
|
|
3645
|
+
'.kt': 'kotlin', '.kts': 'kotlin',
|
|
3646
|
+
'.go': 'go',
|
|
3647
|
+
'.rs': 'rust',
|
|
3648
|
+
'.cs': 'csharp',
|
|
3649
|
+
'.cpp': 'cpp', '.c': 'cpp', '.h': 'cpp', '.hpp': 'cpp', '.cc': 'cpp',
|
|
3650
|
+
'.rb': 'ruby', '.rake': 'ruby',
|
|
3651
|
+
'.php': 'php',
|
|
3652
|
+
'.swift': 'swift',
|
|
3653
|
+
'.dart': 'dart',
|
|
3654
|
+
'.scala': 'scala', '.sc': 'scala',
|
|
3655
|
+
'.vue': 'vue',
|
|
3656
|
+
'.svelte': 'svelte',
|
|
3657
|
+
'.html': 'html', '.htm': 'html',
|
|
3658
|
+
'.css': 'css', '.scss': 'css', '.sass': 'css', '.less': 'css',
|
|
3659
|
+
'.yml': 'yaml', '.yaml': 'yaml',
|
|
3660
|
+
'.sh': 'shell', '.bash': 'shell', '.zsh': 'shell', '.fish': 'shell',
|
|
3661
|
+
};
|
|
3662
|
+
|
|
3663
|
+
function isDockerfile(name) { return name === 'Dockerfile' || name.startsWith('Dockerfile.'); }
|
|
3664
|
+
|
|
3665
|
+
function getExtractorName(filePath) {
|
|
3666
|
+
const base = path.basename(filePath);
|
|
3667
|
+
const ext = path.extname(base).toLowerCase();
|
|
3668
|
+
if (EXT_MAP[ext]) return EXT_MAP[ext];
|
|
3669
|
+
if (isDockerfile(base)) return 'dockerfile';
|
|
3670
|
+
return null;
|
|
3671
|
+
}
|
|
3672
|
+
|
|
3673
|
+
function tokenCount(sigs) {
|
|
3674
|
+
return Math.ceil(sigs.reduce((sum, s) => sum + s.length, 0) / 4);
|
|
3675
|
+
}
|
|
3676
|
+
|
|
3677
|
+
function hasCoverage(filePath, cwd) {
|
|
3678
|
+
const base = path.basename(filePath, path.extname(filePath));
|
|
3679
|
+
const testDirs = ['test', 'tests', '__tests__', 'spec'];
|
|
3680
|
+
for (const d of testDirs) {
|
|
3681
|
+
const abs = path.join(cwd, d);
|
|
3682
|
+
if (!fs.existsSync(abs)) continue;
|
|
3683
|
+
let entries;
|
|
3684
|
+
try { entries = fs.readdirSync(abs, { withFileTypes: true }); } catch (_) { continue; }
|
|
3685
|
+
for (const e of entries) { if (e.name.includes(base)) return true; }
|
|
3686
|
+
}
|
|
3687
|
+
return false;
|
|
3688
|
+
}
|
|
3689
|
+
|
|
3690
|
+
function analyzeFiles(files, cwd, opts) {
|
|
3691
|
+
const slow = (opts && opts.slow) || false;
|
|
3692
|
+
const slowMs = (opts && opts.slowMs) || 50;
|
|
3693
|
+
const maxSigs = (opts && opts.maxSigs) || 25;
|
|
3694
|
+
const stats = [];
|
|
3695
|
+
const cache = {};
|
|
3696
|
+
|
|
3697
|
+
for (const filePath of files) {
|
|
3698
|
+
const extractorName = getExtractorName(filePath);
|
|
3699
|
+
if (!extractorName) continue;
|
|
3700
|
+
if (!cache[extractorName]) {
|
|
3701
|
+
try { cache[extractorName] = __require(`./src/extractors/${extractorName}`); } catch (_) { cache[extractorName] = null; }
|
|
3702
|
+
}
|
|
3703
|
+
const extractor = cache[extractorName];
|
|
3704
|
+
if (!extractor || typeof extractor.extract !== 'function') continue;
|
|
3705
|
+
|
|
3706
|
+
let content;
|
|
3707
|
+
try { content = fs.readFileSync(filePath, 'utf8'); } catch (_) { continue; }
|
|
3708
|
+
|
|
3709
|
+
let sigs; let elapsedMs = 0;
|
|
3710
|
+
if (slow) {
|
|
3711
|
+
const t0 = Date.now();
|
|
3712
|
+
try { sigs = extractor.extract(content); } catch (_) { sigs = []; }
|
|
3713
|
+
elapsedMs = Date.now() - t0;
|
|
3714
|
+
} else {
|
|
3715
|
+
try { sigs = extractor.extract(content); } catch (_) { sigs = []; }
|
|
3716
|
+
}
|
|
3717
|
+
sigs = (Array.isArray(sigs) ? sigs : []).slice(0, maxSigs);
|
|
3718
|
+
|
|
3719
|
+
stats.push({
|
|
3720
|
+
file: path.relative(cwd, filePath),
|
|
3721
|
+
extractor: extractorName,
|
|
3722
|
+
sigs: sigs.length,
|
|
3723
|
+
tokens: tokenCount(sigs),
|
|
3724
|
+
covered: hasCoverage(filePath, cwd),
|
|
3725
|
+
elapsedMs: slow ? elapsedMs : undefined,
|
|
3726
|
+
slow: slow ? (elapsedMs > slowMs) : undefined,
|
|
3727
|
+
});
|
|
3728
|
+
}
|
|
3729
|
+
return stats;
|
|
3730
|
+
}
|
|
3731
|
+
|
|
3732
|
+
function formatAnalysisTable(stats, showSlow) {
|
|
3733
|
+
if (!stats || stats.length === 0) return '_(no files analyzed)_\n';
|
|
3734
|
+
const maxFile = Math.max(4, ...stats.map((s) => s.file.length));
|
|
3735
|
+
const header = showSlow
|
|
3736
|
+
? `| ${'File'.padEnd(maxFile)} | Sigs | Tokens | Extractor | Coverage | Elapsed |`
|
|
3737
|
+
: `| ${'File'.padEnd(maxFile)} | Sigs | Tokens | Extractor | Coverage |`;
|
|
3738
|
+
const sep = showSlow
|
|
3739
|
+
? `|${'-'.repeat(maxFile + 2)}|------|--------|-------------|------------|----------|`
|
|
3740
|
+
: `|${'-'.repeat(maxFile + 2)}|------|--------|-------------|------------|`;
|
|
3741
|
+
const rows = stats.map((s) => {
|
|
3742
|
+
const cov = s.covered ? '✓ tested ' : '✗ untested';
|
|
3743
|
+
const file = s.file.padEnd(maxFile);
|
|
3744
|
+
const ext = (s.extractor || '').padEnd(11);
|
|
3745
|
+
const base = `| ${file} | ${String(s.sigs).padStart(4)} | ${String(s.tokens).padStart(6)} | ${ext} | ${cov} |`;
|
|
3746
|
+
if (showSlow) {
|
|
3747
|
+
const ms = s.elapsedMs !== undefined ? `${s.elapsedMs}ms` : '';
|
|
3748
|
+
return `${base} ${ms.padStart(6)}${s.slow ? ' ⚠️' : ''} |`;
|
|
3749
|
+
}
|
|
3750
|
+
return base;
|
|
3751
|
+
});
|
|
3752
|
+
const totalSigs = stats.reduce((n, s) => n + s.sigs, 0);
|
|
3753
|
+
const totalTokens = stats.reduce((n, s) => n + s.tokens, 0);
|
|
3754
|
+
const slotFile = ''.padEnd(maxFile);
|
|
3755
|
+
const baseFoot = `| ${slotFile} | ${String(totalSigs).padStart(4)} | ${String(totalTokens).padStart(6)} | **Total** | |`;
|
|
3756
|
+
const footer = showSlow ? `${baseFoot} ${' '.padStart(8)} |` : baseFoot;
|
|
3757
|
+
return [header, sep, ...rows, sep, footer].join('\n') + '\n';
|
|
3758
|
+
}
|
|
3759
|
+
|
|
3760
|
+
function formatAnalysisJSON(stats) {
|
|
3761
|
+
const totalSigs = stats.reduce((n, s) => n + s.sigs, 0);
|
|
3762
|
+
const totalTokens = stats.reduce((n, s) => n + s.tokens, 0);
|
|
3763
|
+
const slowFiles = stats.filter((s) => s.slow).map((s) => ({ file: s.file, elapsedMs: s.elapsedMs }));
|
|
3764
|
+
return { files: stats, totalSigs, totalTokens, slowFiles, fileCount: stats.length };
|
|
3765
|
+
}
|
|
3766
|
+
|
|
3767
|
+
module.exports = { analyzeFiles, formatAnalysisTable, formatAnalysisJSON };
|
|
3768
|
+
};
|
|
3769
|
+
|
|
3770
|
+
// ── ./src/eval/runner ──
|
|
3771
|
+
__factories["./src/eval/runner"] = function(module, exports) {
|
|
3772
|
+
'use strict';
|
|
3773
|
+
|
|
3774
|
+
const fs = require('fs');
|
|
3775
|
+
const path = require('path');
|
|
3776
|
+
const { hitAtK, reciprocalRank, precisionAtK, aggregate } = __require('./src/eval/scorer');
|
|
3777
|
+
|
|
3778
|
+
function buildSigIndex(cwd) {
|
|
3779
|
+
const contextPath = path.join(cwd, '.github', 'copilot-instructions.md');
|
|
3780
|
+
const index = new Map();
|
|
3781
|
+
if (!fs.existsSync(contextPath)) return index;
|
|
3782
|
+
const content = fs.readFileSync(contextPath, 'utf8');
|
|
3783
|
+
const lines = content.split('\n');
|
|
3784
|
+
let currentFile = null;
|
|
3785
|
+
let inBlock = false;
|
|
3786
|
+
let sigs = [];
|
|
3787
|
+
for (const line of lines) {
|
|
3788
|
+
const headerMatch = line.match(/^###\s+(\S+\.\w+)\s*$/);
|
|
3789
|
+
if (headerMatch) {
|
|
3790
|
+
if (currentFile !== null) index.set(currentFile, sigs);
|
|
3791
|
+
currentFile = headerMatch[1];
|
|
3792
|
+
sigs = [];
|
|
3793
|
+
inBlock = false;
|
|
3794
|
+
continue;
|
|
3795
|
+
}
|
|
3796
|
+
if (line.startsWith('```')) { inBlock = !inBlock; continue; }
|
|
3797
|
+
if (inBlock && currentFile && line.trim()) sigs.push(line.trim());
|
|
3798
|
+
}
|
|
3799
|
+
if (currentFile !== null) index.set(currentFile, sigs);
|
|
3800
|
+
return index;
|
|
3801
|
+
}
|
|
3802
|
+
|
|
3803
|
+
function tokenize(text) {
|
|
3804
|
+
if (!text) return [];
|
|
3805
|
+
return text
|
|
3806
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
3807
|
+
.replace(/[_\-]/g, ' ')
|
|
3808
|
+
.replace(/[^\w\s]/g, ' ')
|
|
3809
|
+
.toLowerCase()
|
|
3810
|
+
.split(/\s+/)
|
|
3811
|
+
.filter((t) => t.length > 1);
|
|
3812
|
+
}
|
|
3813
|
+
|
|
3814
|
+
const STOP_WORDS = new Set([
|
|
3815
|
+
'the', 'a', 'an', 'in', 'of', 'to', 'for', 'and', 'or', 'is', 'are',
|
|
3816
|
+
'that', 'this', 'it', 'with', 'from', 'by', 'be', 'as', 'on', 'at',
|
|
3817
|
+
]);
|
|
3818
|
+
|
|
3819
|
+
function scoreFile(sigs, queryTokens) {
|
|
3820
|
+
if (!sigs || sigs.length === 0) return 0;
|
|
3821
|
+
const sigText = sigs.join(' ');
|
|
3822
|
+
const sigTokens = new Set(tokenize(sigText));
|
|
3823
|
+
let score = 0;
|
|
3824
|
+
for (const qt of queryTokens) {
|
|
3825
|
+
if (STOP_WORDS.has(qt)) continue;
|
|
3826
|
+
if (sigTokens.has(qt)) score += 1;
|
|
3827
|
+
for (const st of sigTokens) {
|
|
3828
|
+
if (st !== qt && st.startsWith(qt) && qt.length >= 4) score += 0.3;
|
|
3829
|
+
}
|
|
3830
|
+
}
|
|
3831
|
+
return score;
|
|
3832
|
+
}
|
|
3833
|
+
|
|
3834
|
+
function rank(query, index, topK = 10) {
|
|
3835
|
+
const queryTokens = tokenize(query);
|
|
3836
|
+
const scored = [];
|
|
3837
|
+
for (const [file, sigs] of index.entries()) {
|
|
3838
|
+
scored.push({ file, score: scoreFile(sigs, queryTokens), sigs });
|
|
3839
|
+
}
|
|
3840
|
+
scored.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
|
|
3841
|
+
return scored.slice(0, topK);
|
|
3842
|
+
}
|
|
3843
|
+
|
|
3844
|
+
function estimateTokens(sigs) {
|
|
3845
|
+
return Math.ceil((sigs || []).join('\n').length / 4);
|
|
3846
|
+
}
|
|
3847
|
+
|
|
3848
|
+
function loadTasks(tasksFile) {
|
|
3849
|
+
if (!fs.existsSync(tasksFile)) return [];
|
|
3850
|
+
const lines = fs.readFileSync(tasksFile, 'utf8').split('\n');
|
|
3851
|
+
const tasks = [];
|
|
3852
|
+
for (const line of lines) {
|
|
3853
|
+
const trimmed = line.trim();
|
|
3854
|
+
if (!trimmed) continue;
|
|
3855
|
+
try {
|
|
3856
|
+
const obj = JSON.parse(trimmed);
|
|
3857
|
+
if (obj.query && Array.isArray(obj.expected_files)) {
|
|
3858
|
+
tasks.push({
|
|
3859
|
+
id: obj.id || String(tasks.length + 1),
|
|
3860
|
+
query: obj.query,
|
|
3861
|
+
expected: obj.expected_files,
|
|
3862
|
+
repo: obj.repo || '.',
|
|
3863
|
+
});
|
|
3864
|
+
}
|
|
3865
|
+
} catch (_) {}
|
|
3866
|
+
}
|
|
3867
|
+
return tasks;
|
|
3868
|
+
}
|
|
3869
|
+
|
|
3870
|
+
function run(tasksFile, cwd, opts = {}) {
|
|
3871
|
+
const topK = opts.topK || 10;
|
|
3872
|
+
const resolvedTasksFile = path.isAbsolute(tasksFile)
|
|
3873
|
+
? tasksFile
|
|
3874
|
+
: path.resolve(cwd, tasksFile);
|
|
3875
|
+
const tasks = loadTasks(resolvedTasksFile);
|
|
3876
|
+
if (tasks.length === 0) {
|
|
3877
|
+
return { tasks: [], metrics: { hitAt5: 0, mrr: 0, precisionAt5: 0, avgTokens: 0, tasks: 0 } };
|
|
3878
|
+
}
|
|
3879
|
+
const index = buildSigIndex(cwd);
|
|
3880
|
+
const taskResults = [];
|
|
3881
|
+
for (const task of tasks) {
|
|
3882
|
+
const ranked = rank(task.query, index, topK).map((r) => r.file);
|
|
3883
|
+
const topResults = rank(task.query, index, topK);
|
|
3884
|
+
const tokens = topResults.reduce((sum, r) => sum + estimateTokens(r.sigs), 0);
|
|
3885
|
+
taskResults.push({
|
|
3886
|
+
id: task.id,
|
|
3887
|
+
query: task.query,
|
|
3888
|
+
expected: task.expected,
|
|
3889
|
+
ranked,
|
|
3890
|
+
hit5: hitAtK(ranked, task.expected, 5),
|
|
3891
|
+
rr: reciprocalRank(ranked, task.expected),
|
|
3892
|
+
precAt5: precisionAtK(ranked, task.expected, 5),
|
|
3893
|
+
tokens,
|
|
3894
|
+
});
|
|
3895
|
+
}
|
|
3896
|
+
const metrics = aggregate(
|
|
3897
|
+
taskResults.map((r) => ({ ranked: r.ranked, expected: r.expected, tokens: r.tokens })),
|
|
3898
|
+
);
|
|
3899
|
+
return { tasks: taskResults, metrics };
|
|
3900
|
+
}
|
|
3901
|
+
|
|
3902
|
+
function formatTable(taskResults) {
|
|
3903
|
+
const header = '| Task | Query | hit@5 | RR | Tokens |';
|
|
3904
|
+
const divider = '|---|---|:---:|:---:|---:|';
|
|
3905
|
+
const rows = taskResults.map((r) => {
|
|
3906
|
+
const q = r.query.length > 40 ? r.query.slice(0, 37) + '...' : r.query;
|
|
3907
|
+
return `| ${r.id} | ${q} | ${r.hit5 ? '✓' : '✗'} | ${r.rr.toFixed(2)} | ${r.tokens} |`;
|
|
3908
|
+
});
|
|
3909
|
+
return [header, divider, ...rows].join('\n');
|
|
3910
|
+
}
|
|
3911
|
+
|
|
3912
|
+
function formatMetrics(metrics) {
|
|
3913
|
+
return [
|
|
3914
|
+
`[sigmap] benchmark results:`,
|
|
3915
|
+
` tasks : ${metrics.tasks}`,
|
|
3916
|
+
` hit@5 : ${(metrics.hitAt5 * 100).toFixed(1)}%`,
|
|
3917
|
+
` MRR : ${metrics.mrr.toFixed(3)}`,
|
|
3918
|
+
` precision@5 : ${(metrics.precisionAt5 * 100).toFixed(1)}%`,
|
|
3919
|
+
` avg tokens : ${metrics.avgTokens}`,
|
|
3920
|
+
].join('\n');
|
|
3921
|
+
}
|
|
3922
|
+
|
|
3923
|
+
module.exports = { run, rank, loadTasks, buildSigIndex, formatTable, formatMetrics, tokenize };
|
|
3924
|
+
};
|
|
3925
|
+
|
|
3573
3926
|
|
|
3574
3927
|
/**
|
|
3575
3928
|
* SigMap — gen-context.js v1.2.0
|
|
@@ -3583,7 +3936,7 @@ const path = require('path');
|
|
|
3583
3936
|
const os = require('os');
|
|
3584
3937
|
const { execSync } = require('child_process');
|
|
3585
3938
|
|
|
3586
|
-
const VERSION = '2.
|
|
3939
|
+
const VERSION = '2.2.0';
|
|
3587
3940
|
const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
|
|
3588
3941
|
|
|
3589
3942
|
function requireSourceOrBundled(key) {
|
|
@@ -4789,6 +5142,13 @@ Usage:
|
|
|
4789
5142
|
node gen-context.js --diff Generate context for git-changed files only
|
|
4790
5143
|
node gen-context.js --diff <base-ref> Generate context + structural diff vs base ref (e.g. main)
|
|
4791
5144
|
node gen-context.js --diff --staged Generate context for staged files only
|
|
5145
|
+
node gen-context.js --benchmark Run retrieval benchmark (benchmarks/tasks/retrieval.jsonl)
|
|
5146
|
+
node gen-context.js --benchmark --json Benchmark results as JSON
|
|
5147
|
+
node gen-context.js --eval Alias for --benchmark
|
|
5148
|
+
node gen-context.js --analyze Per-file breakdown: sigs, tokens, extractor, coverage
|
|
5149
|
+
node gen-context.js --analyze --json Breakdown as JSON
|
|
5150
|
+
node gen-context.js --analyze --slow Re-time each extractor; flag files >50ms
|
|
5151
|
+
node gen-context.js --diagnose-extractors Run all 21 extractors vs fixtures; show pass/fail + diff
|
|
4792
5152
|
node gen-context.js --init Write example config + .contextignore scaffold
|
|
4793
5153
|
node gen-context.js --help Show this message
|
|
4794
5154
|
node gen-context.js --version Show version
|
|
@@ -4916,6 +5276,165 @@ function main() {
|
|
|
4916
5276
|
process.exit(0);
|
|
4917
5277
|
}
|
|
4918
5278
|
|
|
5279
|
+
if (args.includes('--benchmark') || args.includes('--eval')) {
|
|
5280
|
+
try {
|
|
5281
|
+
const { run, formatTable, formatMetrics } = __require('./src/eval/runner');
|
|
5282
|
+
const tasksFile = path.join(cwd, 'benchmarks', 'tasks', 'retrieval.jsonl');
|
|
5283
|
+
const result = run(tasksFile, cwd, { topK: 10 });
|
|
5284
|
+
if (args.includes('--json')) {
|
|
5285
|
+
// Compact output: strip ranked arrays to keep payload small
|
|
5286
|
+
const compact = {
|
|
5287
|
+
metrics: result.metrics,
|
|
5288
|
+
tasks: result.tasks.map((t) => ({
|
|
5289
|
+
id: t.id,
|
|
5290
|
+
query: t.query,
|
|
5291
|
+
hit5: t.hit5,
|
|
5292
|
+
rr: t.rr,
|
|
5293
|
+
precAt5: t.precAt5,
|
|
5294
|
+
tokens: t.tokens,
|
|
5295
|
+
top1: t.ranked[0] || null,
|
|
5296
|
+
})),
|
|
5297
|
+
};
|
|
5298
|
+
process.stdout.write(JSON.stringify(compact) + '\n');
|
|
5299
|
+
} else {
|
|
5300
|
+
console.log(formatMetrics(result.metrics));
|
|
5301
|
+
if (result.tasks.length > 0) {
|
|
5302
|
+
console.log('');
|
|
5303
|
+
console.log(formatTable(result.tasks));
|
|
5304
|
+
}
|
|
5305
|
+
}
|
|
5306
|
+
} catch (err) {
|
|
5307
|
+
console.error(`[sigmap] benchmark error: ${err.message}`);
|
|
5308
|
+
process.exit(1);
|
|
5309
|
+
}
|
|
5310
|
+
process.exit(0);
|
|
5311
|
+
}
|
|
5312
|
+
|
|
5313
|
+
if (args.includes('--analyze')) {
|
|
5314
|
+
try {
|
|
5315
|
+
const { analyzeFiles, formatAnalysisTable, formatAnalysisJSON } = requireSourceOrBundled('./src/eval/analyzer');
|
|
5316
|
+
const cfg = config || {};
|
|
5317
|
+
const srcDirs = cfg.srcDirs || DEFAULTS.srcDirs;
|
|
5318
|
+
const exclude = cfg.exclude || DEFAULTS.exclude;
|
|
5319
|
+
const slow = args.includes('--slow');
|
|
5320
|
+
|
|
5321
|
+
// Collect files (reuse existing file-walker if accessible, else inline)
|
|
5322
|
+
const allFiles = [];
|
|
5323
|
+
function walkForAnalyze(dir, depth) {
|
|
5324
|
+
if (depth > (cfg.maxDepth || 6)) return;
|
|
5325
|
+
let entries;
|
|
5326
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch (_) { return; }
|
|
5327
|
+
for (const e of entries) {
|
|
5328
|
+
if (exclude.some((x) => e.name === x || e.name.startsWith(x))) continue;
|
|
5329
|
+
const full = path.join(dir, e.name);
|
|
5330
|
+
if (e.isDirectory()) walkForAnalyze(full, depth + 1);
|
|
5331
|
+
else if (e.isFile()) allFiles.push(full);
|
|
5332
|
+
}
|
|
5333
|
+
}
|
|
5334
|
+
for (const sd of srcDirs) {
|
|
5335
|
+
const abs = path.join(cwd, sd);
|
|
5336
|
+
if (fs.existsSync(abs)) walkForAnalyze(abs, 0);
|
|
5337
|
+
}
|
|
5338
|
+
|
|
5339
|
+
const stats = analyzeFiles(allFiles, cwd, { slow, maxSigs: cfg.maxSigsPerFile || 25 });
|
|
5340
|
+
|
|
5341
|
+
if (args.includes('--json')) {
|
|
5342
|
+
process.stdout.write(JSON.stringify(formatAnalysisJSON(stats)) + '\n');
|
|
5343
|
+
} else {
|
|
5344
|
+
const table = formatAnalysisTable(stats, slow);
|
|
5345
|
+
process.stdout.write(table);
|
|
5346
|
+
}
|
|
5347
|
+
} catch (err) {
|
|
5348
|
+
console.error(`[sigmap] analyze error: ${err.message}`);
|
|
5349
|
+
process.exit(1);
|
|
5350
|
+
}
|
|
5351
|
+
process.exit(0);
|
|
5352
|
+
}
|
|
5353
|
+
|
|
5354
|
+
if (args.includes('--diagnose-extractors')) {
|
|
5355
|
+
try {
|
|
5356
|
+
const fixturesDir = path.join(cwd, 'test', 'fixtures');
|
|
5357
|
+
const expectedDir = path.join(cwd, 'test', 'expected');
|
|
5358
|
+
if (!fs.existsSync(fixturesDir) || !fs.existsSync(expectedDir)) {
|
|
5359
|
+
console.error('[sigmap] test/fixtures or test/expected not found — run from the SigMap repo root');
|
|
5360
|
+
process.exit(1);
|
|
5361
|
+
}
|
|
5362
|
+
|
|
5363
|
+
const EXT_TO_LANG = {
|
|
5364
|
+
'.ts': 'typescript', '.js': 'javascript', '.py': 'python',
|
|
5365
|
+
'.java': 'java', '.kt': 'kotlin', '.go': 'go', '.rs': 'rust',
|
|
5366
|
+
'.cs': 'csharp', '.cpp': 'cpp', '.rb': 'ruby', '.php': 'php',
|
|
5367
|
+
'.swift': 'swift', '.dart': 'dart', '.scala': 'scala',
|
|
5368
|
+
'.vue': 'vue', '.svelte': 'svelte', '.html': 'html',
|
|
5369
|
+
'.css': 'css', '.yml': 'yaml', '.sh': 'shell',
|
|
5370
|
+
};
|
|
5371
|
+
const SPECIAL = { 'Dockerfile': 'dockerfile' };
|
|
5372
|
+
|
|
5373
|
+
let passed = 0; let failed = 0;
|
|
5374
|
+
const entries = fs.readdirSync(fixturesDir).sort();
|
|
5375
|
+
|
|
5376
|
+
for (const filename of entries) {
|
|
5377
|
+
const ext = path.extname(filename).toLowerCase();
|
|
5378
|
+
const lang = EXT_TO_LANG[ext] || SPECIAL[filename];
|
|
5379
|
+
if (!lang) continue;
|
|
5380
|
+
|
|
5381
|
+
const fixturePath = path.join(fixturesDir, filename);
|
|
5382
|
+
const expectedPath = path.join(expectedDir, `${lang}.txt`);
|
|
5383
|
+
if (!fs.existsSync(expectedPath)) {
|
|
5384
|
+
console.log(` SKIP ${lang.padEnd(12)} (no expected file)`);
|
|
5385
|
+
continue;
|
|
5386
|
+
}
|
|
5387
|
+
|
|
5388
|
+
const src = fs.readFileSync(fixturePath, 'utf8');
|
|
5389
|
+
const expected = fs.readFileSync(expectedPath, 'utf8').trim();
|
|
5390
|
+
|
|
5391
|
+
let mod;
|
|
5392
|
+
try {
|
|
5393
|
+
mod = requireSourceOrBundled(`./src/extractors/${lang}`);
|
|
5394
|
+
} catch (e) {
|
|
5395
|
+
console.log(` ERROR ${lang.padEnd(12)} loader failed: ${e.message}`);
|
|
5396
|
+
failed++;
|
|
5397
|
+
continue;
|
|
5398
|
+
}
|
|
5399
|
+
|
|
5400
|
+
let actual;
|
|
5401
|
+
try {
|
|
5402
|
+
const sigs = mod.extract(src);
|
|
5403
|
+
actual = sigs.join('\n').trim();
|
|
5404
|
+
} catch (e) {
|
|
5405
|
+
console.log(` ERROR ${lang.padEnd(12)} extract() threw: ${e.message}`);
|
|
5406
|
+
failed++;
|
|
5407
|
+
continue;
|
|
5408
|
+
}
|
|
5409
|
+
|
|
5410
|
+
if (actual === expected) {
|
|
5411
|
+
console.log(` PASS ${lang}`);
|
|
5412
|
+
passed++;
|
|
5413
|
+
} else {
|
|
5414
|
+
console.log(` FAIL ${lang}`);
|
|
5415
|
+
// Show first diff line
|
|
5416
|
+
const aLines = actual.split('\n');
|
|
5417
|
+
const eLines = expected.split('\n');
|
|
5418
|
+
const maxLen = Math.max(aLines.length, eLines.length);
|
|
5419
|
+
for (let i = 0; i < maxLen; i++) {
|
|
5420
|
+
if (aLines[i] !== eLines[i]) {
|
|
5421
|
+
console.log(` expected: ${(eLines[i] || '(missing)').slice(0, 100)}`);
|
|
5422
|
+
console.log(` actual : ${(aLines[i] || '(missing)').slice(0, 100)}`);
|
|
5423
|
+
break;
|
|
5424
|
+
}
|
|
5425
|
+
}
|
|
5426
|
+
failed++;
|
|
5427
|
+
}
|
|
5428
|
+
}
|
|
5429
|
+
|
|
5430
|
+
console.log(`\n${passed} passed, ${failed} failed`);
|
|
5431
|
+
process.exit(failed > 0 ? 1 : 0);
|
|
5432
|
+
} catch (err) {
|
|
5433
|
+
console.error(`[sigmap] diagnose error: ${err.message}`);
|
|
5434
|
+
process.exit(1);
|
|
5435
|
+
}
|
|
5436
|
+
}
|
|
5437
|
+
|
|
4919
5438
|
if (args.includes('--report')) {
|
|
4920
5439
|
if (args.includes('--history')) {
|
|
4921
5440
|
try {
|
package/package.json
CHANGED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* SigMap file analyzer — per-file diagnostic statistics.
|
|
5
|
+
* Zero npm dependencies.
|
|
6
|
+
*
|
|
7
|
+
* Exports:
|
|
8
|
+
* analyzeFiles(files, cwd, opts) → stats[]
|
|
9
|
+
* formatAnalysisTable(stats) → markdown table string
|
|
10
|
+
* formatAnalysisJSON(stats) → plain object suitable for JSON.stringify
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
|
|
16
|
+
// Extension → extractor name (mirrors EXT_MAP in gen-context.js)
|
|
17
|
+
const EXT_MAP = {
|
|
18
|
+
'.ts': 'typescript', '.tsx': 'typescript',
|
|
19
|
+
'.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
|
|
20
|
+
'.py': 'python', '.pyw': 'python',
|
|
21
|
+
'.java': 'java',
|
|
22
|
+
'.kt': 'kotlin', '.kts': 'kotlin',
|
|
23
|
+
'.go': 'go',
|
|
24
|
+
'.rs': 'rust',
|
|
25
|
+
'.cs': 'csharp',
|
|
26
|
+
'.cpp': 'cpp', '.c': 'cpp', '.h': 'cpp', '.hpp': 'cpp', '.cc': 'cpp',
|
|
27
|
+
'.rb': 'ruby', '.rake': 'ruby',
|
|
28
|
+
'.php': 'php',
|
|
29
|
+
'.swift': 'swift',
|
|
30
|
+
'.dart': 'dart',
|
|
31
|
+
'.scala': 'scala', '.sc': 'scala',
|
|
32
|
+
'.vue': 'vue',
|
|
33
|
+
'.svelte': 'svelte',
|
|
34
|
+
'.html': 'html', '.htm': 'html',
|
|
35
|
+
'.css': 'css', '.scss': 'css', '.sass': 'css', '.less': 'css',
|
|
36
|
+
'.yml': 'yaml', '.yaml': 'yaml',
|
|
37
|
+
'.sh': 'shell', '.bash': 'shell', '.zsh': 'shell', '.fish': 'shell',
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
function isDockerfile(name) {
|
|
41
|
+
return name === 'Dockerfile' || name.startsWith('Dockerfile.');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function getExtractorName(filePath) {
|
|
45
|
+
const base = path.basename(filePath);
|
|
46
|
+
const ext = path.extname(base).toLowerCase();
|
|
47
|
+
if (EXT_MAP[ext]) return EXT_MAP[ext];
|
|
48
|
+
if (isDockerfile(base)) return 'dockerfile';
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** Rough token estimate: chars / 4 */
|
|
53
|
+
function tokenCount(sigs) {
|
|
54
|
+
return Math.ceil(sigs.reduce((sum, s) => sum + s.length, 0) / 4);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Check whether a test file exists for this source file by looking for
|
|
59
|
+
* *.test.* / *.spec.* patterns in the test/ directory tree.
|
|
60
|
+
*/
|
|
61
|
+
function hasCoverage(filePath, cwd) {
|
|
62
|
+
const rel = path.relative(cwd, filePath);
|
|
63
|
+
const base = path.basename(rel, path.extname(rel)); // e.g. "python"
|
|
64
|
+
const testDirs = ['test', 'tests', '__tests__', 'spec'];
|
|
65
|
+
for (const d of testDirs) {
|
|
66
|
+
const abs = path.join(cwd, d);
|
|
67
|
+
if (!fs.existsSync(abs)) continue;
|
|
68
|
+
// Walk only one depth for speed
|
|
69
|
+
let entries;
|
|
70
|
+
try { entries = fs.readdirSync(abs, { withFileTypes: true }); } catch (_) { continue; }
|
|
71
|
+
for (const e of entries) {
|
|
72
|
+
if (e.name.includes(base)) return true;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Load an extractor module from src/extractors/ relative to cwd.
|
|
80
|
+
* Falls back to requiring from the module directory itself.
|
|
81
|
+
*/
|
|
82
|
+
function loadExtractor(name, cwd) {
|
|
83
|
+
// Try repo-local src/extractors first (for projects that embed sigmap)
|
|
84
|
+
const local = path.join(cwd, 'src', 'extractors', `${name}.js`);
|
|
85
|
+
if (fs.existsSync(local)) {
|
|
86
|
+
try { return require(local); } catch (_) {}
|
|
87
|
+
}
|
|
88
|
+
// Then standard node resolution from the current package
|
|
89
|
+
try { return require(path.join(__dirname, '..', 'extractors', `${name}.js`)); } catch (_) {}
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Analyze a list of absolute file paths.
|
|
95
|
+
*
|
|
96
|
+
* @param {string[]} files - absolute paths to analyze
|
|
97
|
+
* @param {string} cwd - project root
|
|
98
|
+
* @param {object} [opts]
|
|
99
|
+
* @param {boolean} [opts.slow=false] - if true, measure extraction time per file
|
|
100
|
+
* @param {number} [opts.slowMs=50] - threshold (ms) before a file is "slow"
|
|
101
|
+
* @param {number} [opts.maxSigs=25] - max sigs per file
|
|
102
|
+
* @returns {object[]} array of per-file stat objects
|
|
103
|
+
*/
|
|
104
|
+
function analyzeFiles(files, cwd, opts) {
|
|
105
|
+
const slow = (opts && opts.slow) || false;
|
|
106
|
+
const slowMs = (opts && opts.slowMs) || 50;
|
|
107
|
+
const maxSigs = (opts && opts.maxSigs) || 25;
|
|
108
|
+
|
|
109
|
+
const stats = [];
|
|
110
|
+
const extractorCache = {};
|
|
111
|
+
|
|
112
|
+
for (const filePath of files) {
|
|
113
|
+
const extractorName = getExtractorName(filePath);
|
|
114
|
+
if (!extractorName) continue;
|
|
115
|
+
|
|
116
|
+
// Load extractor (cached)
|
|
117
|
+
if (!extractorCache[extractorName]) {
|
|
118
|
+
extractorCache[extractorName] = loadExtractor(extractorName, cwd);
|
|
119
|
+
}
|
|
120
|
+
const extractor = extractorCache[extractorName];
|
|
121
|
+
if (!extractor || typeof extractor.extract !== 'function') continue;
|
|
122
|
+
|
|
123
|
+
let content;
|
|
124
|
+
try { content = fs.readFileSync(filePath, 'utf8'); } catch (_) { continue; }
|
|
125
|
+
|
|
126
|
+
let sigs;
|
|
127
|
+
let elapsedMs = 0;
|
|
128
|
+
|
|
129
|
+
if (slow) {
|
|
130
|
+
const t0 = Date.now();
|
|
131
|
+
try { sigs = extractor.extract(content); } catch (_) { sigs = []; }
|
|
132
|
+
elapsedMs = Date.now() - t0;
|
|
133
|
+
} else {
|
|
134
|
+
try { sigs = extractor.extract(content); } catch (_) { sigs = []; }
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
sigs = (Array.isArray(sigs) ? sigs : []).slice(0, maxSigs);
|
|
138
|
+
|
|
139
|
+
const rel = path.relative(cwd, filePath);
|
|
140
|
+
const tokens = tokenCount(sigs);
|
|
141
|
+
const covered = hasCoverage(filePath, cwd);
|
|
142
|
+
const isSlow = slow && elapsedMs > slowMs;
|
|
143
|
+
|
|
144
|
+
stats.push({
|
|
145
|
+
file: rel,
|
|
146
|
+
extractor: extractorName,
|
|
147
|
+
sigs: sigs.length,
|
|
148
|
+
tokens,
|
|
149
|
+
covered,
|
|
150
|
+
elapsedMs: slow ? elapsedMs : undefined,
|
|
151
|
+
slow: slow ? isSlow : undefined,
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return stats;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Format stats as a markdown table.
|
|
160
|
+
*
|
|
161
|
+
* @param {object[]} stats - output of analyzeFiles()
|
|
162
|
+
* @param {boolean} showSlow - whether to include the Elapsed column
|
|
163
|
+
* @returns {string}
|
|
164
|
+
*/
|
|
165
|
+
function formatAnalysisTable(stats, showSlow) {
|
|
166
|
+
if (!stats || stats.length === 0) return '_(no files analyzed)_\n';
|
|
167
|
+
|
|
168
|
+
// Column widths
|
|
169
|
+
const maxFile = Math.max(4, ...stats.map((s) => s.file.length));
|
|
170
|
+
|
|
171
|
+
const header = showSlow
|
|
172
|
+
? `| ${'File'.padEnd(maxFile)} | Sigs | Tokens | Extractor | Coverage | Elapsed |`
|
|
173
|
+
: `| ${'File'.padEnd(maxFile)} | Sigs | Tokens | Extractor | Coverage |`;
|
|
174
|
+
|
|
175
|
+
const sep = showSlow
|
|
176
|
+
? `|${'-'.repeat(maxFile + 2)}|------|--------|-------------|------------|----------|`
|
|
177
|
+
: `|${'-'.repeat(maxFile + 2)}|------|--------|-------------|------------|`;
|
|
178
|
+
|
|
179
|
+
const rows = stats.map((s) => {
|
|
180
|
+
const cov = s.covered ? '✓ tested ' : '✗ untested';
|
|
181
|
+
const file = s.file.padEnd(maxFile);
|
|
182
|
+
const ext = (s.extractor || '').padEnd(11);
|
|
183
|
+
const base = `| ${file} | ${String(s.sigs).padStart(4)} | ${String(s.tokens).padStart(6)} | ${ext} | ${cov} |`;
|
|
184
|
+
if (showSlow) {
|
|
185
|
+
const ms = s.elapsedMs !== undefined ? `${s.elapsedMs}ms` : '';
|
|
186
|
+
const flag = s.slow ? ' ⚠️' : '';
|
|
187
|
+
return `${base} ${ms.padStart(6)}${flag} |`;
|
|
188
|
+
}
|
|
189
|
+
return base;
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
const totalSigs = stats.reduce((n, s) => n + s.sigs, 0);
|
|
193
|
+
const totalTokens = stats.reduce((n, s) => n + s.tokens, 0);
|
|
194
|
+
const slotFile = ''.padEnd(maxFile);
|
|
195
|
+
const baseFoot = `| ${slotFile} | ${String(totalSigs).padStart(4)} | ${String(totalTokens).padStart(6)} | **Total** | |`;
|
|
196
|
+
const footer = showSlow ? `${baseFoot} ${' '.padStart(8)} |` : baseFoot;
|
|
197
|
+
|
|
198
|
+
return [header, sep, ...rows, sep, footer].join('\n') + '\n';
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Format stats as a plain-object suitable for JSON.stringify.
|
|
203
|
+
*
|
|
204
|
+
* @param {object[]} stats
|
|
205
|
+
* @returns {object}
|
|
206
|
+
*/
|
|
207
|
+
function formatAnalysisJSON(stats) {
|
|
208
|
+
const totalSigs = stats.reduce((n, s) => n + s.sigs, 0);
|
|
209
|
+
const totalTokens = stats.reduce((n, s) => n + s.tokens, 0);
|
|
210
|
+
const slowFiles = stats.filter((s) => s.slow);
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
files: stats,
|
|
214
|
+
totalSigs,
|
|
215
|
+
totalTokens,
|
|
216
|
+
slowFiles: slowFiles.map((s) => ({ file: s.file, elapsedMs: s.elapsedMs })),
|
|
217
|
+
fileCount: stats.length,
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
module.exports = { analyzeFiles, formatAnalysisTable, formatAnalysisJSON };
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* SigMap benchmark runner.
|
|
5
|
+
* Zero npm dependencies.
|
|
6
|
+
*
|
|
7
|
+
* Loads evaluation tasks from a JSONL file, runs signature-based retrieval
|
|
8
|
+
* against a target repo, and returns scored results.
|
|
9
|
+
*
|
|
10
|
+
* Usage (programmatic):
|
|
11
|
+
* const { run } = require('./src/eval/runner');
|
|
12
|
+
* const results = run('benchmarks/tasks/retrieval.jsonl', cwd);
|
|
13
|
+
* // results: { tasks: [...], metrics: { hitAt5, mrr, precisionAt5, avgTokens, tasks } }
|
|
14
|
+
*
|
|
15
|
+
* Usage (CLI via gen-context.js --benchmark):
|
|
16
|
+
* node gen-context.js --benchmark
|
|
17
|
+
* node gen-context.js --benchmark --json
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
const fs = require('fs');
|
|
21
|
+
const path = require('path');
|
|
22
|
+
const { aggregate } = require('./scorer');
|
|
23
|
+
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Context file reader
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Read the generated context file and build a simple signature index:
|
|
30
|
+
* Map<filePath, string[]> — file → list of signature strings
|
|
31
|
+
*
|
|
32
|
+
* The context file uses section headers like:
|
|
33
|
+
* ### src/extractors/python.js
|
|
34
|
+
* followed by ``` blocks containing signatures.
|
|
35
|
+
*
|
|
36
|
+
* @param {string} cwd
|
|
37
|
+
* @returns {Map<string, string[]>}
|
|
38
|
+
*/
|
|
39
|
+
function buildSigIndex(cwd) {
|
|
40
|
+
const contextPath = path.join(cwd, '.github', 'copilot-instructions.md');
|
|
41
|
+
const index = new Map();
|
|
42
|
+
|
|
43
|
+
if (!fs.existsSync(contextPath)) return index;
|
|
44
|
+
|
|
45
|
+
const content = fs.readFileSync(contextPath, 'utf8');
|
|
46
|
+
const lines = content.split('\n');
|
|
47
|
+
|
|
48
|
+
let currentFile = null;
|
|
49
|
+
let inBlock = false;
|
|
50
|
+
let sigs = [];
|
|
51
|
+
|
|
52
|
+
for (const line of lines) {
|
|
53
|
+
// Section header: ### path/to/file.js
|
|
54
|
+
const headerMatch = line.match(/^###\s+(\S+\.\w+)\s*$/);
|
|
55
|
+
if (headerMatch) {
|
|
56
|
+
if (currentFile !== null) {
|
|
57
|
+
index.set(currentFile, sigs);
|
|
58
|
+
}
|
|
59
|
+
currentFile = headerMatch[1];
|
|
60
|
+
sigs = [];
|
|
61
|
+
inBlock = false;
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (line.startsWith('```')) {
|
|
66
|
+
inBlock = !inBlock;
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (inBlock && currentFile && line.trim()) {
|
|
71
|
+
sigs.push(line.trim());
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Flush last file
|
|
76
|
+
if (currentFile !== null) {
|
|
77
|
+
index.set(currentFile, sigs);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return index;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
84
|
+
// Simple keyword-based ranking (pre-retrieval layer; v2.3 adds proper ranker)
|
|
85
|
+
// ---------------------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Tokenize a query or signature into lower-case word tokens.
|
|
89
|
+
* Splits on whitespace, punctuation, camelCase, and snake_case.
|
|
90
|
+
* @param {string} text
|
|
91
|
+
* @returns {string[]}
|
|
92
|
+
*/
|
|
93
|
+
function tokenize(text) {
|
|
94
|
+
if (!text) return [];
|
|
95
|
+
return text
|
|
96
|
+
// split camelCase
|
|
97
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
98
|
+
// split snake/kebab
|
|
99
|
+
.replace(/[_\-]/g, ' ')
|
|
100
|
+
// drop non-word chars
|
|
101
|
+
.replace(/[^\w\s]/g, ' ')
|
|
102
|
+
.toLowerCase()
|
|
103
|
+
.split(/\s+/)
|
|
104
|
+
.filter((t) => t.length > 1);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const STOP_WORDS = new Set([
|
|
108
|
+
'the', 'a', 'an', 'in', 'of', 'to', 'for', 'and', 'or', 'is', 'are',
|
|
109
|
+
'that', 'this', 'it', 'with', 'from', 'by', 'be', 'as', 'on', 'at',
|
|
110
|
+
]);
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Score a single file's signatures against a query.
|
|
114
|
+
* Returns a non-negative number; higher = more relevant.
|
|
115
|
+
* @param {string[]} sigs - array of signature strings for this file
|
|
116
|
+
* @param {string[]} queryTokens
|
|
117
|
+
* @returns {number}
|
|
118
|
+
*/
|
|
119
|
+
function scoreFile(sigs, queryTokens) {
|
|
120
|
+
if (!sigs || sigs.length === 0) return 0;
|
|
121
|
+
|
|
122
|
+
const sigText = sigs.join(' ');
|
|
123
|
+
const sigTokens = new Set(tokenize(sigText));
|
|
124
|
+
|
|
125
|
+
let score = 0;
|
|
126
|
+
for (const qt of queryTokens) {
|
|
127
|
+
if (STOP_WORDS.has(qt)) continue;
|
|
128
|
+
if (sigTokens.has(qt)) score += 1;
|
|
129
|
+
// Partial match (prefix)
|
|
130
|
+
for (const st of sigTokens) {
|
|
131
|
+
if (st !== qt && st.startsWith(qt) && qt.length >= 4) score += 0.3;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return score;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Rank all files in the index against a query. Returns file paths sorted
|
|
140
|
+
* by relevance score descending. Ties are broken by file path alphabetically.
|
|
141
|
+
* @param {string} query
|
|
142
|
+
* @param {Map<string, string[]>} index
|
|
143
|
+
* @param {number} topK
|
|
144
|
+
* @returns {{ file: string, score: number, sigs: string[] }[]}
|
|
145
|
+
*/
|
|
146
|
+
function rank(query, index, topK = 10) {
|
|
147
|
+
const queryTokens = tokenize(query);
|
|
148
|
+
const scored = [];
|
|
149
|
+
|
|
150
|
+
for (const [file, sigs] of index.entries()) {
|
|
151
|
+
const score = scoreFile(sigs, queryTokens);
|
|
152
|
+
scored.push({ file, score, sigs });
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
scored.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
|
|
156
|
+
return scored.slice(0, topK);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// ---------------------------------------------------------------------------
|
|
160
|
+
// Token estimation
|
|
161
|
+
// ---------------------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Estimate token count from character count (chars/4, ±5%).
|
|
165
|
+
* @param {string[]} sigs
|
|
166
|
+
* @returns {number}
|
|
167
|
+
*/
|
|
168
|
+
function estimateTokens(sigs) {
|
|
169
|
+
const text = (sigs || []).join('\n');
|
|
170
|
+
return Math.ceil(text.length / 4);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// ---------------------------------------------------------------------------
|
|
174
|
+
// Task loader
|
|
175
|
+
// ---------------------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Load tasks from a JSONL file.
|
|
179
|
+
* Each line: { id, query, expected_files, repo }
|
|
180
|
+
* Invalid or blank lines are silently skipped.
|
|
181
|
+
* @param {string} tasksFile - absolute or relative path
|
|
182
|
+
* @returns {Array<{id:string, query:string, expected:string[], repo:string}>}
|
|
183
|
+
*/
|
|
184
|
+
function loadTasks(tasksFile) {
|
|
185
|
+
if (!fs.existsSync(tasksFile)) return [];
|
|
186
|
+
const lines = fs.readFileSync(tasksFile, 'utf8').split('\n');
|
|
187
|
+
const tasks = [];
|
|
188
|
+
for (const line of lines) {
|
|
189
|
+
const trimmed = line.trim();
|
|
190
|
+
if (!trimmed) continue;
|
|
191
|
+
try {
|
|
192
|
+
const obj = JSON.parse(trimmed);
|
|
193
|
+
if (obj.query && Array.isArray(obj.expected_files)) {
|
|
194
|
+
tasks.push({
|
|
195
|
+
id: obj.id || String(tasks.length + 1),
|
|
196
|
+
query: obj.query,
|
|
197
|
+
expected: obj.expected_files,
|
|
198
|
+
repo: obj.repo || '.',
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
} catch {
|
|
202
|
+
// skip invalid JSON lines
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
return tasks;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// ---------------------------------------------------------------------------
|
|
209
|
+
// Main runner
|
|
210
|
+
// ---------------------------------------------------------------------------
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Run all tasks in tasksFile against the repo at cwd.
|
|
214
|
+
*
|
|
215
|
+
* @param {string} tasksFile - path to JSONL task file (absolute or relative to cwd)
|
|
216
|
+
* @param {string} cwd - project root
|
|
217
|
+
* @param {object} [opts]
|
|
218
|
+
* @param {number} [opts.topK=10] - how many results to rank per query
|
|
219
|
+
* @returns {{
|
|
220
|
+
* tasks: Array<{id, query, expected, ranked, hit5, rr, precAt5, tokens}>,
|
|
221
|
+
* metrics: { hitAt5, mrr, precisionAt5, avgTokens, tasks }
|
|
222
|
+
* }}
|
|
223
|
+
*/
|
|
224
|
+
function run(tasksFile, cwd, opts = {}) {
|
|
225
|
+
const topK = opts.topK || 10;
|
|
226
|
+
|
|
227
|
+
// Resolve paths
|
|
228
|
+
const resolvedTasksFile = path.isAbsolute(tasksFile)
|
|
229
|
+
? tasksFile
|
|
230
|
+
: path.resolve(cwd, tasksFile);
|
|
231
|
+
|
|
232
|
+
const tasks = loadTasks(resolvedTasksFile);
|
|
233
|
+
if (tasks.length === 0) {
|
|
234
|
+
return {
|
|
235
|
+
tasks: [],
|
|
236
|
+
metrics: { hitAt5: 0, mrr: 0, precisionAt5: 0, avgTokens: 0, tasks: 0 },
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Build index once (re-used across all tasks in the same repo)
|
|
241
|
+
const index = buildSigIndex(cwd);
|
|
242
|
+
|
|
243
|
+
const taskResults = [];
|
|
244
|
+
for (const task of tasks) {
|
|
245
|
+
const ranked = rank(task.query, index, topK).map((r) => r.file);
|
|
246
|
+
const topResult = rank(task.query, index, topK);
|
|
247
|
+
const tokens = topResult.reduce((sum, r) => sum + estimateTokens(r.sigs), 0);
|
|
248
|
+
|
|
249
|
+
const { hitAtK, reciprocalRank, precisionAtK } = require('./scorer');
|
|
250
|
+
const hit5 = hitAtK(ranked, task.expected, 5);
|
|
251
|
+
const rr = reciprocalRank(ranked, task.expected);
|
|
252
|
+
const precAt5 = precisionAtK(ranked, task.expected, 5);
|
|
253
|
+
|
|
254
|
+
taskResults.push({
|
|
255
|
+
id: task.id,
|
|
256
|
+
query: task.query,
|
|
257
|
+
expected: task.expected,
|
|
258
|
+
ranked,
|
|
259
|
+
hit5,
|
|
260
|
+
rr,
|
|
261
|
+
precAt5,
|
|
262
|
+
tokens,
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
const metrics = aggregate(
|
|
267
|
+
taskResults.map((r) => ({ ranked: r.ranked, expected: r.expected, tokens: r.tokens })),
|
|
268
|
+
);
|
|
269
|
+
|
|
270
|
+
return { tasks: taskResults, metrics };
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// ---------------------------------------------------------------------------
|
|
274
|
+
// Table formatter
|
|
275
|
+
// ---------------------------------------------------------------------------
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Format task results as a markdown table string.
|
|
279
|
+
* @param {Array} taskResults - from run()
|
|
280
|
+
* @returns {string}
|
|
281
|
+
*/
|
|
282
|
+
function formatTable(taskResults) {
|
|
283
|
+
const header = '| Task | Query | hit@5 | RR | Tokens |';
|
|
284
|
+
const divider = '|---|---|:---:|:---:|---:|';
|
|
285
|
+
const rows = taskResults.map((r) => {
|
|
286
|
+
const q = r.query.length > 40 ? r.query.slice(0, 37) + '...' : r.query;
|
|
287
|
+
return `| ${r.id} | ${q} | ${r.hit5 ? '✓' : '✗'} | ${r.rr.toFixed(2)} | ${r.tokens} |`;
|
|
288
|
+
});
|
|
289
|
+
return [header, divider, ...rows].join('\n');
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Format aggregate metrics as a human-readable string.
|
|
294
|
+
* @param {object} metrics - from aggregate()
|
|
295
|
+
* @returns {string}
|
|
296
|
+
*/
|
|
297
|
+
function formatMetrics(metrics) {
|
|
298
|
+
return [
|
|
299
|
+
`[sigmap] benchmark results:`,
|
|
300
|
+
` tasks : ${metrics.tasks}`,
|
|
301
|
+
` hit@5 : ${(metrics.hitAt5 * 100).toFixed(1)}%`,
|
|
302
|
+
` MRR : ${metrics.mrr.toFixed(3)}`,
|
|
303
|
+
` precision@5 : ${(metrics.precisionAt5 * 100).toFixed(1)}%`,
|
|
304
|
+
` avg tokens : ${metrics.avgTokens}`,
|
|
305
|
+
].join('\n');
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
module.exports = { run, rank, loadTasks, buildSigIndex, formatTable, formatMetrics, tokenize };
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* SigMap evaluation scorer.
|
|
5
|
+
* Zero npm dependencies.
|
|
6
|
+
*
|
|
7
|
+
* Computes retrieval quality metrics:
|
|
8
|
+
* hit@k — fraction of tasks where the correct file appears in top-k results
|
|
9
|
+
* MRR — mean reciprocal rank (1/rank of first correct result)
|
|
10
|
+
* precision@k — fraction of top-k results that are correct
|
|
11
|
+
*
|
|
12
|
+
* All functions are pure and never throw.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Return the 1-based rank of the first expected file found in the ranked
|
|
17
|
+
* result list, or Infinity if none found.
|
|
18
|
+
* @param {string[]} ranked - Ordered list of file paths returned by the system
|
|
19
|
+
* @param {string[]} expected - List of acceptable correct files (any match counts)
|
|
20
|
+
* @returns {number}
|
|
21
|
+
*/
|
|
22
|
+
function firstRank(ranked, expected) {
|
|
23
|
+
if (!Array.isArray(ranked) || !Array.isArray(expected)) return Infinity;
|
|
24
|
+
const expSet = new Set(expected.map((f) => normalizePath(f)));
|
|
25
|
+
for (let i = 0; i < ranked.length; i++) {
|
|
26
|
+
if (expSet.has(normalizePath(ranked[i]))) return i + 1;
|
|
27
|
+
}
|
|
28
|
+
return Infinity;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Normalize a file path for comparison (trim leading ./, lowercase on
|
|
33
|
+
* case-insensitive platforms is intentionally NOT done — keep paths as-is
|
|
34
|
+
* but trim leading directory separators and ./).
|
|
35
|
+
* @param {string} p
|
|
36
|
+
* @returns {string}
|
|
37
|
+
*/
|
|
38
|
+
function normalizePath(p) {
|
|
39
|
+
return String(p).replace(/^\.\//, '').replace(/\\/g, '/');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Compute hit@k for one task.
|
|
44
|
+
* @param {string[]} ranked - Ordered results
|
|
45
|
+
* @param {string[]} expected - Correct files
|
|
46
|
+
* @param {number} k - Cut-off (default 5)
|
|
47
|
+
* @returns {0|1}
|
|
48
|
+
*/
|
|
49
|
+
function hitAtK(ranked, expected, k = 5) {
|
|
50
|
+
return firstRank(ranked, expected) <= k ? 1 : 0;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Compute reciprocal rank for one task.
|
|
55
|
+
* @param {string[]} ranked
|
|
56
|
+
* @param {string[]} expected
|
|
57
|
+
* @returns {number} value in (0, 1]
|
|
58
|
+
*/
|
|
59
|
+
function reciprocalRank(ranked, expected) {
|
|
60
|
+
const rank = firstRank(ranked, expected);
|
|
61
|
+
return rank === Infinity ? 0 : 1 / rank;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Compute precision@k for one task.
|
|
66
|
+
* Fraction of the top-k results that appear in expected.
|
|
67
|
+
* @param {string[]} ranked
|
|
68
|
+
* @param {string[]} expected
|
|
69
|
+
* @param {number} k
|
|
70
|
+
* @returns {number} value in [0, 1]
|
|
71
|
+
*/
|
|
72
|
+
function precisionAtK(ranked, expected, k = 5) {
|
|
73
|
+
if (!ranked || ranked.length === 0) return 0;
|
|
74
|
+
const expSet = new Set(expected.map((f) => normalizePath(f)));
|
|
75
|
+
const topK = ranked.slice(0, k);
|
|
76
|
+
const hits = topK.filter((f) => expSet.has(normalizePath(f))).length;
|
|
77
|
+
return hits / topK.length;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Aggregate metrics across all task results.
|
|
82
|
+
*
|
|
83
|
+
* @param {Array<{ranked: string[], expected: string[], tokens: number}>} results
|
|
84
|
+
* @param {number} k - cut-off (default 5)
|
|
85
|
+
* @returns {{
|
|
86
|
+
* hitAt5: number, // fraction [0,1]
|
|
87
|
+
* mrr: number, // mean reciprocal rank [0,1]
|
|
88
|
+
* precisionAt5: number,
|
|
89
|
+
* avgTokens: number,
|
|
90
|
+
* tasks: number
|
|
91
|
+
* }}
|
|
92
|
+
*/
|
|
93
|
+
function aggregate(results, k = 5) {
|
|
94
|
+
if (!Array.isArray(results) || results.length === 0) {
|
|
95
|
+
return { hitAt5: 0, mrr: 0, precisionAt5: 0, avgTokens: 0, tasks: 0 };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
let totalHit = 0;
|
|
99
|
+
let totalRR = 0;
|
|
100
|
+
let totalPrec = 0;
|
|
101
|
+
let totalTokens = 0;
|
|
102
|
+
|
|
103
|
+
for (const r of results) {
|
|
104
|
+
const ranked = r.ranked || [];
|
|
105
|
+
const expected = r.expected || [];
|
|
106
|
+
totalHit += hitAtK(ranked, expected, k);
|
|
107
|
+
totalRR += reciprocalRank(ranked, expected);
|
|
108
|
+
totalPrec += precisionAtK(ranked, expected, k);
|
|
109
|
+
totalTokens += (typeof r.tokens === 'number' ? r.tokens : 0);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const n = results.length;
|
|
113
|
+
return {
|
|
114
|
+
hitAt5: round(totalHit / n),
|
|
115
|
+
mrr: round(totalRR / n),
|
|
116
|
+
precisionAt5: round(totalPrec / n),
|
|
117
|
+
avgTokens: Math.round(totalTokens / n),
|
|
118
|
+
tasks: n,
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function round(x) {
|
|
123
|
+
return Math.round(x * 1000) / 1000;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
module.exports = { hitAtK, reciprocalRank, precisionAtK, aggregate, firstRank };
|
package/src/mcp/server.js
CHANGED