sigmap 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -6,6 +6,27 @@ Format: [Semantic Versioning](https://semver.org/)
6
6
 
7
7
  ---
8
8
 
9
+ ## [2.1.0] — 2026-04-05
10
+
11
+ ### Added
12
+ - **Benchmark & evaluation system** — `src/eval/runner.js` and `src/eval/scorer.js`: zero-dependency retrieval quality measurement pipeline. Computes hit@5, MRR, and precision@5 against a JSONL task file.
13
+ - **`benchmarks/` directory structure** — `benchmarks/tasks/retrieval.jsonl` (20 tasks against SigMap's own codebase), `benchmarks/results/` (gitignored run output), `benchmarks/reports/` (human-readable summaries).
14
+ - **`--benchmark` CLI flag** — runs retrieval through all tasks in `benchmarks/tasks/retrieval.jsonl`, prints a markdown table (Task | Query | hit@5 | RR | Tokens) plus aggregate metrics; `--benchmark --json` for machine-readable output.
15
+ - **`--eval` CLI flag** — alias for `--benchmark`.
16
+ - **`src/eval/scorer.js`** — pure metric functions: `hitAtK(ranked, expected, k)`, `reciprocalRank(ranked, expected)`, `precisionAtK(ranked, expected, k)`, `aggregate(results)`. Never throws.
17
+ - **`src/eval/runner.js`** — task loader (`loadTasks`), sig-index builder (`buildSigIndex`), keyword ranker (`rank`, `tokenize`), and main `run(tasksFile, cwd)` entry point. Reads generated context file from disk; no in-memory state.
18
+ - **`test/integration/benchmark.test.js`** — 10 integration tests covering scorer unit tests, tokenizer, task loading, empty-file edge case, metrics shape, and `--benchmark --json` CLI output.
19
+
20
+ ### Validation gate
21
+ - 21/21 extractor tests passed
22
+ - All integration suites passed (includes 10 new benchmark tests)
23
+ - `node gen-context.js --version` → `2.1.0`
24
+ - `node gen-context.js --benchmark` runs without error on SigMap repo
25
+ - `node gen-context.js --benchmark --json` → valid JSON with `metrics.hitAt5`, `metrics.mrr`, `tasks` array
26
+ - `node gen-context.js --eval --json` → same output as `--benchmark --json`
27
+
28
+ ---
29
+
9
30
  ## [2.0.0] — 2026-04-04
10
31
 
11
32
  ### Added
package/README.md CHANGED
@@ -26,6 +26,7 @@
26
26
  [![Changelog](https://img.shields.io/badge/changelog-CHANGELOG.md-blue)](CHANGELOG.md)
27
27
  [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](CONTRIBUTING.md)
28
28
  [![VS Code](https://img.shields.io/badge/VS%20Code-extension-0078d4?logo=visual-studio-code)](https://marketplace.visualstudio.com/items?itemName=manojmallick.sigmap)
29
+ [![Open VSX](https://img.shields.io/open-vsx/v/manojmallick/sigmap?color=a251e3&label=Open%20VSX&logo=vscodium)](https://open-vsx.org/extension/manojmallick/sigmap)
29
30
 
30
31
  </div>
31
32
 
@@ -69,6 +70,9 @@ gen-context.js ──► extracts signatures from 21 languages
69
70
  AI agent session starts with full context
70
71
  ```
71
72
 
73
+ > **Dogfooding:** SigMap runs on itself — 40 JS files, 8,600 lines of code.
74
+ > View the generated context: [`.github/copilot-instructions.md`](.github/copilot-instructions.md)
75
+
72
76
  ### Token reduction at every stage
73
77
 
74
78
  | Stage | Tokens | Reduction |
@@ -606,9 +610,15 @@ function extract(src) { // src: string → string[]
606
610
 
607
611
  ---
608
612
 
613
+ ## ⭐ Support
614
+
615
+ If SigMap saves you context or API spend, a ⭐ on [GitHub](https://github.com/manojmallick/sigmap) helps others find it.
616
+
617
+ ---
618
+
609
619
  ## 📄 License
610
620
 
611
- MIT © 2026 [Manoj Mallick](https://github.com/manojmallick)
621
+ MIT © 2026 [Manoj Mallick](https://github.com/manojmallick) · Made in Amsterdam 🇳🇱
612
622
 
613
623
  ---
614
624
 
package/gen-context.js CHANGED
@@ -3570,6 +3570,222 @@ __factories["./src/tracking/logger"] = function(module, exports) {
3570
3570
 
3571
3571
  };
3572
3572
 
3573
+ // ── ./src/eval/scorer ──
3574
+ __factories["./src/eval/scorer"] = function(module, exports) {
3575
+ 'use strict';
3576
+
3577
+ function firstRank(ranked, expected) {
3578
+ if (!Array.isArray(ranked) || !Array.isArray(expected)) return Infinity;
3579
+ const expSet = new Set(expected.map((f) => normalizePath(f)));
3580
+ for (let i = 0; i < ranked.length; i++) {
3581
+ if (expSet.has(normalizePath(ranked[i]))) return i + 1;
3582
+ }
3583
+ return Infinity;
3584
+ }
3585
+
3586
+ function normalizePath(p) {
3587
+ return String(p).replace(/^\.\//, '').replace(/\\/g, '/');
3588
+ }
3589
+
3590
+ function hitAtK(ranked, expected, k = 5) {
3591
+ return firstRank(ranked, expected) <= k ? 1 : 0;
3592
+ }
3593
+
3594
+ function reciprocalRank(ranked, expected) {
3595
+ const rank = firstRank(ranked, expected);
3596
+ return rank === Infinity ? 0 : 1 / rank;
3597
+ }
3598
+
3599
+ function precisionAtK(ranked, expected, k = 5) {
3600
+ if (!ranked || ranked.length === 0) return 0;
3601
+ const expSet = new Set(expected.map((f) => normalizePath(f)));
3602
+ const topK = ranked.slice(0, k);
3603
+ const hits = topK.filter((f) => expSet.has(normalizePath(f))).length;
3604
+ return hits / topK.length;
3605
+ }
3606
+
3607
+ function aggregate(results, k = 5) {
3608
+ if (!Array.isArray(results) || results.length === 0) {
3609
+ return { hitAt5: 0, mrr: 0, precisionAt5: 0, avgTokens: 0, tasks: 0 };
3610
+ }
3611
+ let totalHit = 0, totalRR = 0, totalPrec = 0, totalTokens = 0;
3612
+ for (const r of results) {
3613
+ const ranked = r.ranked || [];
3614
+ const expected = r.expected || [];
3615
+ totalHit += hitAtK(ranked, expected, k);
3616
+ totalRR += reciprocalRank(ranked, expected);
3617
+ totalPrec += precisionAtK(ranked, expected, k);
3618
+ totalTokens += (typeof r.tokens === 'number' ? r.tokens : 0);
3619
+ }
3620
+ const n = results.length;
3621
+ return {
3622
+ hitAt5: Math.round(totalHit / n * 1000) / 1000,
3623
+ mrr: Math.round(totalRR / n * 1000) / 1000,
3624
+ precisionAt5: Math.round(totalPrec / n * 1000) / 1000,
3625
+ avgTokens: Math.round(totalTokens / n),
3626
+ tasks: n,
3627
+ };
3628
+ }
3629
+
3630
+ module.exports = { hitAtK, reciprocalRank, precisionAtK, aggregate, firstRank };
3631
+ };
3632
+
3633
+ // ── ./src/eval/runner ──
3634
+ __factories["./src/eval/runner"] = function(module, exports) {
3635
+ 'use strict';
3636
+
3637
+ const fs = require('fs');
3638
+ const path = require('path');
3639
+ const { hitAtK, reciprocalRank, precisionAtK, aggregate } = __require('./src/eval/scorer');
3640
+
3641
+ function buildSigIndex(cwd) {
3642
+ const contextPath = path.join(cwd, '.github', 'copilot-instructions.md');
3643
+ const index = new Map();
3644
+ if (!fs.existsSync(contextPath)) return index;
3645
+ const content = fs.readFileSync(contextPath, 'utf8');
3646
+ const lines = content.split('\n');
3647
+ let currentFile = null;
3648
+ let inBlock = false;
3649
+ let sigs = [];
3650
+ for (const line of lines) {
3651
+ const headerMatch = line.match(/^###\s+(\S+\.\w+)\s*$/);
3652
+ if (headerMatch) {
3653
+ if (currentFile !== null) index.set(currentFile, sigs);
3654
+ currentFile = headerMatch[1];
3655
+ sigs = [];
3656
+ inBlock = false;
3657
+ continue;
3658
+ }
3659
+ if (line.startsWith('```')) { inBlock = !inBlock; continue; }
3660
+ if (inBlock && currentFile && line.trim()) sigs.push(line.trim());
3661
+ }
3662
+ if (currentFile !== null) index.set(currentFile, sigs);
3663
+ return index;
3664
+ }
3665
+
3666
+ function tokenize(text) {
3667
+ if (!text) return [];
3668
+ return text
3669
+ .replace(/([a-z])([A-Z])/g, '$1 $2')
3670
+ .replace(/[_\-]/g, ' ')
3671
+ .replace(/[^\w\s]/g, ' ')
3672
+ .toLowerCase()
3673
+ .split(/\s+/)
3674
+ .filter((t) => t.length > 1);
3675
+ }
3676
+
3677
+ const STOP_WORDS = new Set([
3678
+ 'the', 'a', 'an', 'in', 'of', 'to', 'for', 'and', 'or', 'is', 'are',
3679
+ 'that', 'this', 'it', 'with', 'from', 'by', 'be', 'as', 'on', 'at',
3680
+ ]);
3681
+
3682
+ function scoreFile(sigs, queryTokens) {
3683
+ if (!sigs || sigs.length === 0) return 0;
3684
+ const sigText = sigs.join(' ');
3685
+ const sigTokens = new Set(tokenize(sigText));
3686
+ let score = 0;
3687
+ for (const qt of queryTokens) {
3688
+ if (STOP_WORDS.has(qt)) continue;
3689
+ if (sigTokens.has(qt)) score += 1;
3690
+ for (const st of sigTokens) {
3691
+ if (st !== qt && st.startsWith(qt) && qt.length >= 4) score += 0.3;
3692
+ }
3693
+ }
3694
+ return score;
3695
+ }
3696
+
3697
+ function rank(query, index, topK = 10) {
3698
+ const queryTokens = tokenize(query);
3699
+ const scored = [];
3700
+ for (const [file, sigs] of index.entries()) {
3701
+ scored.push({ file, score: scoreFile(sigs, queryTokens), sigs });
3702
+ }
3703
+ scored.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
3704
+ return scored.slice(0, topK);
3705
+ }
3706
+
3707
+ function estimateTokens(sigs) {
3708
+ return Math.ceil((sigs || []).join('\n').length / 4);
3709
+ }
3710
+
3711
+ function loadTasks(tasksFile) {
3712
+ if (!fs.existsSync(tasksFile)) return [];
3713
+ const lines = fs.readFileSync(tasksFile, 'utf8').split('\n');
3714
+ const tasks = [];
3715
+ for (const line of lines) {
3716
+ const trimmed = line.trim();
3717
+ if (!trimmed) continue;
3718
+ try {
3719
+ const obj = JSON.parse(trimmed);
3720
+ if (obj.query && Array.isArray(obj.expected_files)) {
3721
+ tasks.push({
3722
+ id: obj.id || String(tasks.length + 1),
3723
+ query: obj.query,
3724
+ expected: obj.expected_files,
3725
+ repo: obj.repo || '.',
3726
+ });
3727
+ }
3728
+ } catch (_) {}
3729
+ }
3730
+ return tasks;
3731
+ }
3732
+
3733
+ function run(tasksFile, cwd, opts = {}) {
3734
+ const topK = opts.topK || 10;
3735
+ const resolvedTasksFile = path.isAbsolute(tasksFile)
3736
+ ? tasksFile
3737
+ : path.resolve(cwd, tasksFile);
3738
+ const tasks = loadTasks(resolvedTasksFile);
3739
+ if (tasks.length === 0) {
3740
+ return { tasks: [], metrics: { hitAt5: 0, mrr: 0, precisionAt5: 0, avgTokens: 0, tasks: 0 } };
3741
+ }
3742
+ const index = buildSigIndex(cwd);
3743
+ const taskResults = [];
3744
+ for (const task of tasks) {
3745
+ const ranked = rank(task.query, index, topK).map((r) => r.file);
3746
+ const topResults = rank(task.query, index, topK);
3747
+ const tokens = topResults.reduce((sum, r) => sum + estimateTokens(r.sigs), 0);
3748
+ taskResults.push({
3749
+ id: task.id,
3750
+ query: task.query,
3751
+ expected: task.expected,
3752
+ ranked,
3753
+ hit5: hitAtK(ranked, task.expected, 5),
3754
+ rr: reciprocalRank(ranked, task.expected),
3755
+ precAt5: precisionAtK(ranked, task.expected, 5),
3756
+ tokens,
3757
+ });
3758
+ }
3759
+ const metrics = aggregate(
3760
+ taskResults.map((r) => ({ ranked: r.ranked, expected: r.expected, tokens: r.tokens })),
3761
+ );
3762
+ return { tasks: taskResults, metrics };
3763
+ }
3764
+
3765
+ function formatTable(taskResults) {
3766
+ const header = '| Task | Query | hit@5 | RR | Tokens |';
3767
+ const divider = '|---|---|:---:|:---:|---:|';
3768
+ const rows = taskResults.map((r) => {
3769
+ const q = r.query.length > 40 ? r.query.slice(0, 37) + '...' : r.query;
3770
+ return `| ${r.id} | ${q} | ${r.hit5 ? '✓' : '✗'} | ${r.rr.toFixed(2)} | ${r.tokens} |`;
3771
+ });
3772
+ return [header, divider, ...rows].join('\n');
3773
+ }
3774
+
3775
+ function formatMetrics(metrics) {
3776
+ return [
3777
+ `[sigmap] benchmark results:`,
3778
+ ` tasks : ${metrics.tasks}`,
3779
+ ` hit@5 : ${(metrics.hitAt5 * 100).toFixed(1)}%`,
3780
+ ` MRR : ${metrics.mrr.toFixed(3)}`,
3781
+ ` precision@5 : ${(metrics.precisionAt5 * 100).toFixed(1)}%`,
3782
+ ` avg tokens : ${metrics.avgTokens}`,
3783
+ ].join('\n');
3784
+ }
3785
+
3786
+ module.exports = { run, rank, loadTasks, buildSigIndex, formatTable, formatMetrics, tokenize };
3787
+ };
3788
+
3573
3789
 
3574
3790
  /**
3575
3791
  * SigMap — gen-context.js v1.2.0
@@ -3583,7 +3799,7 @@ const path = require('path');
3583
3799
  const os = require('os');
3584
3800
  const { execSync } = require('child_process');
3585
3801
 
3586
- const VERSION = '2.0.0';
3802
+ const VERSION = '2.1.0';
3587
3803
  const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
3588
3804
 
3589
3805
  function requireSourceOrBundled(key) {
@@ -4789,6 +5005,9 @@ Usage:
4789
5005
  node gen-context.js --diff Generate context for git-changed files only
4790
5006
  node gen-context.js --diff <base-ref> Generate context + structural diff vs base ref (e.g. main)
4791
5007
  node gen-context.js --diff --staged Generate context for staged files only
5008
+ node gen-context.js --benchmark Run retrieval benchmark (benchmarks/tasks/retrieval.jsonl)
5009
+ node gen-context.js --benchmark --json Benchmark results as JSON
5010
+ node gen-context.js --eval Alias for --benchmark
4792
5011
  node gen-context.js --init Write example config + .contextignore scaffold
4793
5012
  node gen-context.js --help Show this message
4794
5013
  node gen-context.js --version Show version
@@ -4916,6 +5135,40 @@ function main() {
4916
5135
  process.exit(0);
4917
5136
  }
4918
5137
 
5138
+ if (args.includes('--benchmark') || args.includes('--eval')) {
5139
+ try {
5140
+ const { run, formatTable, formatMetrics } = __require('./src/eval/runner');
5141
+ const tasksFile = path.join(cwd, 'benchmarks', 'tasks', 'retrieval.jsonl');
5142
+ const result = run(tasksFile, cwd, { topK: 10 });
5143
+ if (args.includes('--json')) {
5144
+ // Compact output: strip ranked arrays to keep payload small
5145
+ const compact = {
5146
+ metrics: result.metrics,
5147
+ tasks: result.tasks.map((t) => ({
5148
+ id: t.id,
5149
+ query: t.query,
5150
+ hit5: t.hit5,
5151
+ rr: t.rr,
5152
+ precAt5: t.precAt5,
5153
+ tokens: t.tokens,
5154
+ top1: t.ranked[0] || null,
5155
+ })),
5156
+ };
5157
+ process.stdout.write(JSON.stringify(compact) + '\n');
5158
+ } else {
5159
+ console.log(formatMetrics(result.metrics));
5160
+ if (result.tasks.length > 0) {
5161
+ console.log('');
5162
+ console.log(formatTable(result.tasks));
5163
+ }
5164
+ }
5165
+ } catch (err) {
5166
+ console.error(`[sigmap] benchmark error: ${err.message}`);
5167
+ process.exit(1);
5168
+ }
5169
+ process.exit(0);
5170
+ }
5171
+
4919
5172
  if (args.includes('--report')) {
4920
5173
  if (args.includes('--history')) {
4921
5174
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap",
3
- "version": "2.0.0",
3
+ "version": "2.1.0",
4
4
  "description": "Zero-dependency AI context engine — 97% token reduction. No npm install. Runs on Node 18+.",
5
5
  "main": "gen-context.js",
6
6
  "bin": {
@@ -0,0 +1,308 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * SigMap benchmark runner.
5
+ * Zero npm dependencies.
6
+ *
7
+ * Loads evaluation tasks from a JSONL file, runs signature-based retrieval
8
+ * against a target repo, and returns scored results.
9
+ *
10
+ * Usage (programmatic):
11
+ * const { run } = require('./src/eval/runner');
12
+ * const results = run('benchmarks/tasks/retrieval.jsonl', cwd);
13
+ * // results: { tasks: [...], metrics: { hitAt5, mrr, precisionAt5, avgTokens, tasks } }
14
+ *
15
+ * Usage (CLI via gen-context.js --benchmark):
16
+ * node gen-context.js --benchmark
17
+ * node gen-context.js --benchmark --json
18
+ */
19
+
20
+ const fs = require('fs');
21
+ const path = require('path');
22
+ const { aggregate } = require('./scorer');
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Context file reader
26
+ // ---------------------------------------------------------------------------
27
+
28
+ /**
29
+ * Read the generated context file and build a simple signature index:
30
+ * Map<filePath, string[]> — file → list of signature strings
31
+ *
32
+ * The context file uses section headers like:
33
+ * ### src/extractors/python.js
34
+ * followed by ``` blocks containing signatures.
35
+ *
36
+ * @param {string} cwd
37
+ * @returns {Map<string, string[]>}
38
+ */
39
+ function buildSigIndex(cwd) {
40
+ const contextPath = path.join(cwd, '.github', 'copilot-instructions.md');
41
+ const index = new Map();
42
+
43
+ if (!fs.existsSync(contextPath)) return index;
44
+
45
+ const content = fs.readFileSync(contextPath, 'utf8');
46
+ const lines = content.split('\n');
47
+
48
+ let currentFile = null;
49
+ let inBlock = false;
50
+ let sigs = [];
51
+
52
+ for (const line of lines) {
53
+ // Section header: ### path/to/file.js
54
+ const headerMatch = line.match(/^###\s+(\S+\.\w+)\s*$/);
55
+ if (headerMatch) {
56
+ if (currentFile !== null) {
57
+ index.set(currentFile, sigs);
58
+ }
59
+ currentFile = headerMatch[1];
60
+ sigs = [];
61
+ inBlock = false;
62
+ continue;
63
+ }
64
+
65
+ if (line.startsWith('```')) {
66
+ inBlock = !inBlock;
67
+ continue;
68
+ }
69
+
70
+ if (inBlock && currentFile && line.trim()) {
71
+ sigs.push(line.trim());
72
+ }
73
+ }
74
+
75
+ // Flush last file
76
+ if (currentFile !== null) {
77
+ index.set(currentFile, sigs);
78
+ }
79
+
80
+ return index;
81
+ }
82
+
83
+ // ---------------------------------------------------------------------------
84
+ // Simple keyword-based ranking (pre-retrieval layer; v2.3 adds proper ranker)
85
+ // ---------------------------------------------------------------------------
86
+
87
+ /**
88
+ * Tokenize a query or signature into lower-case word tokens.
89
+ * Splits on whitespace, punctuation, camelCase, and snake_case.
90
+ * @param {string} text
91
+ * @returns {string[]}
92
+ */
93
+ function tokenize(text) {
94
+ if (!text) return [];
95
+ return text
96
+ // split camelCase
97
+ .replace(/([a-z])([A-Z])/g, '$1 $2')
98
+ // split snake/kebab
99
+ .replace(/[_\-]/g, ' ')
100
+ // drop non-word chars
101
+ .replace(/[^\w\s]/g, ' ')
102
+ .toLowerCase()
103
+ .split(/\s+/)
104
+ .filter((t) => t.length > 1);
105
+ }
106
+
107
+ const STOP_WORDS = new Set([
108
+ 'the', 'a', 'an', 'in', 'of', 'to', 'for', 'and', 'or', 'is', 'are',
109
+ 'that', 'this', 'it', 'with', 'from', 'by', 'be', 'as', 'on', 'at',
110
+ ]);
111
+
112
+ /**
113
+ * Score a single file's signatures against a query.
114
+ * Returns a non-negative number; higher = more relevant.
115
+ * @param {string[]} sigs - array of signature strings for this file
116
+ * @param {string[]} queryTokens
117
+ * @returns {number}
118
+ */
119
+ function scoreFile(sigs, queryTokens) {
120
+ if (!sigs || sigs.length === 0) return 0;
121
+
122
+ const sigText = sigs.join(' ');
123
+ const sigTokens = new Set(tokenize(sigText));
124
+
125
+ let score = 0;
126
+ for (const qt of queryTokens) {
127
+ if (STOP_WORDS.has(qt)) continue;
128
+ if (sigTokens.has(qt)) score += 1;
129
+ // Partial match (prefix)
130
+ for (const st of sigTokens) {
131
+ if (st !== qt && st.startsWith(qt) && qt.length >= 4) score += 0.3;
132
+ }
133
+ }
134
+
135
+ return score;
136
+ }
137
+
138
+ /**
139
+ * Rank all files in the index against a query. Returns file paths sorted
140
+ * by relevance score descending. Ties are broken by file path alphabetically.
141
+ * @param {string} query
142
+ * @param {Map<string, string[]>} index
143
+ * @param {number} topK
144
+ * @returns {{ file: string, score: number, sigs: string[] }[]}
145
+ */
146
+ function rank(query, index, topK = 10) {
147
+ const queryTokens = tokenize(query);
148
+ const scored = [];
149
+
150
+ for (const [file, sigs] of index.entries()) {
151
+ const score = scoreFile(sigs, queryTokens);
152
+ scored.push({ file, score, sigs });
153
+ }
154
+
155
+ scored.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
156
+ return scored.slice(0, topK);
157
+ }
158
+
159
+ // ---------------------------------------------------------------------------
160
+ // Token estimation
161
+ // ---------------------------------------------------------------------------
162
+
163
+ /**
164
+ * Estimate token count from character count (chars/4, ±5%).
165
+ * @param {string[]} sigs
166
+ * @returns {number}
167
+ */
168
+ function estimateTokens(sigs) {
169
+ const text = (sigs || []).join('\n');
170
+ return Math.ceil(text.length / 4);
171
+ }
172
+
173
+ // ---------------------------------------------------------------------------
174
+ // Task loader
175
+ // ---------------------------------------------------------------------------
176
+
177
+ /**
178
+ * Load tasks from a JSONL file.
179
+ * Each line: { id, query, expected_files, repo }
180
+ * Invalid or blank lines are silently skipped.
181
+ * @param {string} tasksFile - absolute or relative path
182
+ * @returns {Array<{id:string, query:string, expected:string[], repo:string}>}
183
+ */
184
+ function loadTasks(tasksFile) {
185
+ if (!fs.existsSync(tasksFile)) return [];
186
+ const lines = fs.readFileSync(tasksFile, 'utf8').split('\n');
187
+ const tasks = [];
188
+ for (const line of lines) {
189
+ const trimmed = line.trim();
190
+ if (!trimmed) continue;
191
+ try {
192
+ const obj = JSON.parse(trimmed);
193
+ if (obj.query && Array.isArray(obj.expected_files)) {
194
+ tasks.push({
195
+ id: obj.id || String(tasks.length + 1),
196
+ query: obj.query,
197
+ expected: obj.expected_files,
198
+ repo: obj.repo || '.',
199
+ });
200
+ }
201
+ } catch {
202
+ // skip invalid JSON lines
203
+ }
204
+ }
205
+ return tasks;
206
+ }
207
+
208
+ // ---------------------------------------------------------------------------
209
+ // Main runner
210
+ // ---------------------------------------------------------------------------
211
+
212
+ /**
213
+ * Run all tasks in tasksFile against the repo at cwd.
214
+ *
215
+ * @param {string} tasksFile - path to JSONL task file (absolute or relative to cwd)
216
+ * @param {string} cwd - project root
217
+ * @param {object} [opts]
218
+ * @param {number} [opts.topK=10] - how many results to rank per query
219
+ * @returns {{
220
+ * tasks: Array<{id, query, expected, ranked, hit5, rr, precAt5, tokens}>,
221
+ * metrics: { hitAt5, mrr, precisionAt5, avgTokens, tasks }
222
+ * }}
223
+ */
224
+ function run(tasksFile, cwd, opts = {}) {
225
+ const topK = opts.topK || 10;
226
+
227
+ // Resolve paths
228
+ const resolvedTasksFile = path.isAbsolute(tasksFile)
229
+ ? tasksFile
230
+ : path.resolve(cwd, tasksFile);
231
+
232
+ const tasks = loadTasks(resolvedTasksFile);
233
+ if (tasks.length === 0) {
234
+ return {
235
+ tasks: [],
236
+ metrics: { hitAt5: 0, mrr: 0, precisionAt5: 0, avgTokens: 0, tasks: 0 },
237
+ };
238
+ }
239
+
240
+ // Build index once (re-used across all tasks in the same repo)
241
+ const index = buildSigIndex(cwd);
242
+
243
+ const taskResults = [];
244
+ for (const task of tasks) {
245
+ const ranked = rank(task.query, index, topK).map((r) => r.file);
246
+ const topResult = rank(task.query, index, topK);
247
+ const tokens = topResult.reduce((sum, r) => sum + estimateTokens(r.sigs), 0);
248
+
249
+ const { hitAtK, reciprocalRank, precisionAtK } = require('./scorer');
250
+ const hit5 = hitAtK(ranked, task.expected, 5);
251
+ const rr = reciprocalRank(ranked, task.expected);
252
+ const precAt5 = precisionAtK(ranked, task.expected, 5);
253
+
254
+ taskResults.push({
255
+ id: task.id,
256
+ query: task.query,
257
+ expected: task.expected,
258
+ ranked,
259
+ hit5,
260
+ rr,
261
+ precAt5,
262
+ tokens,
263
+ });
264
+ }
265
+
266
+ const metrics = aggregate(
267
+ taskResults.map((r) => ({ ranked: r.ranked, expected: r.expected, tokens: r.tokens })),
268
+ );
269
+
270
+ return { tasks: taskResults, metrics };
271
+ }
272
+
273
+ // ---------------------------------------------------------------------------
274
+ // Table formatter
275
+ // ---------------------------------------------------------------------------
276
+
277
+ /**
278
+ * Format task results as a markdown table string.
279
+ * @param {Array} taskResults - from run()
280
+ * @returns {string}
281
+ */
282
+ function formatTable(taskResults) {
283
+ const header = '| Task | Query | hit@5 | RR | Tokens |';
284
+ const divider = '|---|---|:---:|:---:|---:|';
285
+ const rows = taskResults.map((r) => {
286
+ const q = r.query.length > 40 ? r.query.slice(0, 37) + '...' : r.query;
287
+ return `| ${r.id} | ${q} | ${r.hit5 ? '✓' : '✗'} | ${r.rr.toFixed(2)} | ${r.tokens} |`;
288
+ });
289
+ return [header, divider, ...rows].join('\n');
290
+ }
291
+
292
+ /**
293
+ * Format aggregate metrics as a human-readable string.
294
+ * @param {object} metrics - from aggregate()
295
+ * @returns {string}
296
+ */
297
+ function formatMetrics(metrics) {
298
+ return [
299
+ `[sigmap] benchmark results:`,
300
+ ` tasks : ${metrics.tasks}`,
301
+ ` hit@5 : ${(metrics.hitAt5 * 100).toFixed(1)}%`,
302
+ ` MRR : ${metrics.mrr.toFixed(3)}`,
303
+ ` precision@5 : ${(metrics.precisionAt5 * 100).toFixed(1)}%`,
304
+ ` avg tokens : ${metrics.avgTokens}`,
305
+ ].join('\n');
306
+ }
307
+
308
+ module.exports = { run, rank, loadTasks, buildSigIndex, formatTable, formatMetrics, tokenize };
@@ -0,0 +1,126 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * SigMap evaluation scorer.
5
+ * Zero npm dependencies.
6
+ *
7
+ * Computes retrieval quality metrics:
8
+ * hit@k — fraction of tasks where the correct file appears in top-k results
9
+ * MRR — mean reciprocal rank (1/rank of first correct result)
10
+ * precision@k — fraction of top-k results that are correct
11
+ *
12
+ * All functions are pure and never throw.
13
+ */
14
+
15
+ /**
16
+ * Return the 1-based rank of the first expected file found in the ranked
17
+ * result list, or Infinity if none found.
18
+ * @param {string[]} ranked - Ordered list of file paths returned by the system
19
+ * @param {string[]} expected - List of acceptable correct files (any match counts)
20
+ * @returns {number}
21
+ */
22
+ function firstRank(ranked, expected) {
23
+ if (!Array.isArray(ranked) || !Array.isArray(expected)) return Infinity;
24
+ const expSet = new Set(expected.map((f) => normalizePath(f)));
25
+ for (let i = 0; i < ranked.length; i++) {
26
+ if (expSet.has(normalizePath(ranked[i]))) return i + 1;
27
+ }
28
+ return Infinity;
29
+ }
30
+
31
+ /**
32
+ * Normalize a file path for comparison (trim leading ./, lowercase on
33
+ * case-insensitive platforms is intentionally NOT done — keep paths as-is
34
+ * but trim leading directory separators and ./).
35
+ * @param {string} p
36
+ * @returns {string}
37
+ */
38
+ function normalizePath(p) {
39
+ return String(p).replace(/^\.\//, '').replace(/\\/g, '/');
40
+ }
41
+
42
+ /**
43
+ * Compute hit@k for one task.
44
+ * @param {string[]} ranked - Ordered results
45
+ * @param {string[]} expected - Correct files
46
+ * @param {number} k - Cut-off (default 5)
47
+ * @returns {0|1}
48
+ */
49
+ function hitAtK(ranked, expected, k = 5) {
50
+ return firstRank(ranked, expected) <= k ? 1 : 0;
51
+ }
52
+
53
+ /**
54
+ * Compute reciprocal rank for one task.
55
+ * @param {string[]} ranked
56
+ * @param {string[]} expected
57
+ * @returns {number} value in (0, 1]
58
+ */
59
+ function reciprocalRank(ranked, expected) {
60
+ const rank = firstRank(ranked, expected);
61
+ return rank === Infinity ? 0 : 1 / rank;
62
+ }
63
+
64
+ /**
65
+ * Compute precision@k for one task.
66
+ * Fraction of the top-k results that appear in expected.
67
+ * @param {string[]} ranked
68
+ * @param {string[]} expected
69
+ * @param {number} k
70
+ * @returns {number} value in [0, 1]
71
+ */
72
+ function precisionAtK(ranked, expected, k = 5) {
73
+ if (!ranked || ranked.length === 0) return 0;
74
+ const expSet = new Set(expected.map((f) => normalizePath(f)));
75
+ const topK = ranked.slice(0, k);
76
+ const hits = topK.filter((f) => expSet.has(normalizePath(f))).length;
77
+ return hits / topK.length;
78
+ }
79
+
80
+ /**
81
+ * Aggregate metrics across all task results.
82
+ *
83
+ * @param {Array<{ranked: string[], expected: string[], tokens: number}>} results
84
+ * @param {number} k - cut-off (default 5)
85
+ * @returns {{
86
+ * hitAt5: number, // fraction [0,1]
87
+ * mrr: number, // mean reciprocal rank [0,1]
88
+ * precisionAt5: number,
89
+ * avgTokens: number,
90
+ * tasks: number
91
+ * }}
92
+ */
93
+ function aggregate(results, k = 5) {
94
+ if (!Array.isArray(results) || results.length === 0) {
95
+ return { hitAt5: 0, mrr: 0, precisionAt5: 0, avgTokens: 0, tasks: 0 };
96
+ }
97
+
98
+ let totalHit = 0;
99
+ let totalRR = 0;
100
+ let totalPrec = 0;
101
+ let totalTokens = 0;
102
+
103
+ for (const r of results) {
104
+ const ranked = r.ranked || [];
105
+ const expected = r.expected || [];
106
+ totalHit += hitAtK(ranked, expected, k);
107
+ totalRR += reciprocalRank(ranked, expected);
108
+ totalPrec += precisionAtK(ranked, expected, k);
109
+ totalTokens += (typeof r.tokens === 'number' ? r.tokens : 0);
110
+ }
111
+
112
+ const n = results.length;
113
+ return {
114
+ hitAt5: round(totalHit / n),
115
+ mrr: round(totalRR / n),
116
+ precisionAt5: round(totalPrec / n),
117
+ avgTokens: Math.round(totalTokens / n),
118
+ tasks: n,
119
+ };
120
+ }
121
+
122
+ function round(x) {
123
+ return Math.round(x * 1000) / 1000;
124
+ }
125
+
126
+ module.exports = { hitAtK, reciprocalRank, precisionAtK, aggregate, firstRank };
package/src/mcp/server.js CHANGED
@@ -18,7 +18,7 @@ const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, exp
18
18
 
19
19
  const SERVER_INFO = {
20
20
  name: 'sigmap',
21
- version: '2.0.0',
21
+ version: '2.1.0',
22
22
  description: 'SigMap MCP server — code signatures on demand',
23
23
  };
24
24