sigmap 6.8.0 → 6.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.md +1 -1
- package/gen-context.js +2 -2
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/core/package.json +1 -1
- package/src/eval/usefulness-scorer.js +66 -0
- package/src/mcp/server.js +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -10,6 +10,16 @@ Format: [Semantic Versioning](https://semver.org/)
|
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
+
## [6.9.0] — 2026-05-03
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
|
|
17
|
+
- **Task metadata for segmentation** — All 18 benchmark repositories now tagged with language, repo type (framework/library/tool/application), and size class (small/medium/large) to enable segmented benchmark analysis.
|
|
18
|
+
- **Benchmark methodology documentation** — Comprehensive guide explaining what SigMap measures (retrieval accuracy, task success, prompt reduction, token reduction), why these metrics matter, and how the 90-task test set was selected and evaluated.
|
|
19
|
+
- **Answer usefulness evaluation** — New metric tracking whether retrieved context actually enabled correct answers, scored in three tiers: fully-useful (rank 1), partially-useful (ranks 2-5), not-useful (not retrieved). Complements task success proxy with granular answer quality assessment.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
13
23
|
## [6.8.0] — 2026-05-03
|
|
14
24
|
|
|
15
25
|
### Added
|
package/README.md
CHANGED
|
@@ -52,7 +52,7 @@ Works with Copilot, Claude, Cursor, Windsurf, and any LLM.
|
|
|
52
52
|
|
|
53
53
|
| Without SigMap | With SigMap |
|
|
54
54
|
|---|---|
|
|
55
|
-
| ❌ Guessing which files are relevant | ✅ Right file in context —
|
|
55
|
+
| ❌ Guessing which files are relevant | ✅ Right file in context — 80% of the time |
|
|
56
56
|
| ❌ Sending the full repo to your AI | ✅ Minimal context — only what matters |
|
|
57
57
|
| ❌ Embeddings / vector DB required | ✅ Grounded answers, no infra needed |
|
|
58
58
|
|
package/gen-context.js
CHANGED
|
@@ -5387,7 +5387,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
|
|
|
5387
5387
|
|
|
5388
5388
|
const SERVER_INFO = {
|
|
5389
5389
|
name: 'sigmap',
|
|
5390
|
-
version: '6.
|
|
5390
|
+
version: '6.9.0',
|
|
5391
5391
|
description: 'SigMap MCP server — code signatures on demand',
|
|
5392
5392
|
};
|
|
5393
5393
|
|
|
@@ -7855,7 +7855,7 @@ const path = require('path');
|
|
|
7855
7855
|
const os = require('os');
|
|
7856
7856
|
const { execSync } = require('child_process');
|
|
7857
7857
|
|
|
7858
|
-
const VERSION = '6.
|
|
7858
|
+
const VERSION = '6.9.0';
|
|
7859
7859
|
const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
|
|
7860
7860
|
|
|
7861
7861
|
function requireSourceOrBundled(key) {
|
package/package.json
CHANGED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
module.exports = { scoreUsefulness, computeUsefulnessStats };
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Score answer usefulness based on:
|
|
7
|
+
* 1. Whether right file was retrieved (retrieval hit)
|
|
8
|
+
* 2. Whether retrieved context covered the answer (coverage)
|
|
9
|
+
* 3. Confidence in answer quality (from ranking score)
|
|
10
|
+
*/
|
|
11
|
+
function scoreUsefulness(taskResult, rankingScore) {
|
|
12
|
+
const { hitRank } = taskResult;
|
|
13
|
+
|
|
14
|
+
// Tier 1: File not retrieved — context cannot be useful
|
|
15
|
+
if (hitRank === -1 || hitRank > 5) {
|
|
16
|
+
return {
|
|
17
|
+
tier: 'not-useful',
|
|
18
|
+
score: 0.0,
|
|
19
|
+
reason: 'expected file not in top 5'
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Tier 2: File retrieved but not top ranking — partially useful
|
|
24
|
+
if (hitRank > 1) {
|
|
25
|
+
return {
|
|
26
|
+
tier: 'partially-useful',
|
|
27
|
+
score: rankingScore * 0.5, // Partial usefulness
|
|
28
|
+
reason: `file ranked #${hitRank}`
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Tier 3: File at top of ranking — fully useful
|
|
33
|
+
return {
|
|
34
|
+
tier: 'fully-useful',
|
|
35
|
+
score: rankingScore, // Full usefulness
|
|
36
|
+
reason: 'file ranked first'
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function computeUsefulnessStats(taskResults) {
|
|
41
|
+
const tiers = {
|
|
42
|
+
'fully-useful': 0,
|
|
43
|
+
'partially-useful': 0,
|
|
44
|
+
'not-useful': 0
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
let totalScore = 0;
|
|
48
|
+
let count = 0;
|
|
49
|
+
|
|
50
|
+
taskResults.forEach(result => {
|
|
51
|
+
const usefulness = scoreUsefulness(result, result.rankingScore || 1.0);
|
|
52
|
+
tiers[usefulness.tier]++;
|
|
53
|
+
totalScore += usefulness.score;
|
|
54
|
+
count++;
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
fully_useful: tiers['fully-useful'],
|
|
59
|
+
partially_useful: tiers['partially-useful'],
|
|
60
|
+
not_useful: tiers['not-useful'],
|
|
61
|
+
fully_useful_pct: count > 0 ? (tiers['fully-useful'] / count * 100).toFixed(1) : 0,
|
|
62
|
+
partially_useful_pct: count > 0 ? (tiers['partially-useful'] / count * 100).toFixed(1) : 0,
|
|
63
|
+
not_useful_pct: count > 0 ? (tiers['not-useful'] / count * 100).toFixed(1) : 0,
|
|
64
|
+
average_usefulness_score: count > 0 ? (totalScore / count).toFixed(3) : 0
|
|
65
|
+
};
|
|
66
|
+
}
|
package/src/mcp/server.js
CHANGED