sigmap 8.4.0 → 8.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/README.md +1 -1
- package/gen-context.js +78 -5
- package/llms-full.txt +2 -2
- package/llms.txt +2 -2
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/core/package.json +1 -1
- package/src/mcp/server.js +1 -1
- package/src/retrieval/bm25.js +76 -3
package/CHANGELOG.md
CHANGED
|
@@ -10,6 +10,13 @@ Format: [Semantic Versioning](https://semver.org/)
|
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
+
## [8.5.0] — 2026-07-05
|
|
14
|
+
|
|
15
|
+
Minor release — **deterministic query expansion (a vocabulary-mismatch recall aid).** The BM25 ranker now bridges common code-domain synonyms/abbreviations so a query for `authentication` can still surface a file whose signatures only say `auth`. Zero-dependency, deterministic. **Honest framing:** measured on the retrieval benchmark, this is **benchmark-neutral** (hit@5 unchanged within the harness's 86.7–87.8% noise band at the shipped weight) — not a hit@5 improvement. The benefit is for real users whose query vocabulary differs from the code, a case the curated benchmark doesn't exercise.
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
- **Query expansion (#421, PR #422):** `src/retrieval/bm25.js` gains a curated, high-precision synonym/abbreviation table (`auth`↔`authentication`/`login`, `db`↔`database`, `ctx`↔`context`, `config`↔`configuration`, `req`/`res`, `init`, `impl`, …). `expandQuery()` adds synonyms to the query tokens at a **discount weight (0.15)** so an exact-term match always outranks a synonym-only match; documents are unchanged. Wired through the ranker, so `sigmap ask`, `--query`, and MCP `query_context` all benefit. A weight sweep confirmed higher weights regress retrieval, so 0.15 (benchmark-neutral) is the shipped setting.
|
|
19
|
+
|
|
13
20
|
## [8.4.0] — 2026-07-05
|
|
14
21
|
|
|
15
22
|
Minor release — **PR Evidence Report (v9.0 G3): a branded, deterministic review artifact.** SigMap already had the pieces — `review-pr` findings and `get_diff_context` — but no single Markdown comment an agent or CI could post on a PR. This adds it: one report that answers *"what changed, what it touches, and what to test"*, with no LLM.
|
package/README.md
CHANGED
|
@@ -98,7 +98,7 @@ Ask → Rank → Context → Validate → Judge → Learn
|
|
|
98
98
|
|
|
99
99
|
<!--SM:benchmarkBlock-->
|
|
100
100
|
```
|
|
101
|
-
Benchmark : sigmap-v8.
|
|
101
|
+
Benchmark : sigmap-v8.5-main (21 repositories, including R language)
|
|
102
102
|
Date : 2026-07-04
|
|
103
103
|
|
|
104
104
|
Hit@5 : 86.7% (baseline 13.6% — 6.4× lift)
|
package/gen-context.js
CHANGED
|
@@ -13108,7 +13108,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
|
|
|
13108
13108
|
|
|
13109
13109
|
const SERVER_INFO = {
|
|
13110
13110
|
name: 'sigmap',
|
|
13111
|
-
version: '8.
|
|
13111
|
+
version: '8.5.0',
|
|
13112
13112
|
description: 'SigMap MCP server — code signatures on demand',
|
|
13113
13113
|
};
|
|
13114
13114
|
|
|
@@ -13923,6 +13923,78 @@ __factories["./src/retrieval/bm25"] = function(module, exports) {
|
|
|
13923
13923
|
// are counted PATH_BOOST times when building the document term-frequency map.
|
|
13924
13924
|
const PATH_BOOST = 3;
|
|
13925
13925
|
|
|
13926
|
+
// Curated, high-precision code-domain synonym / abbreviation expansions. A query
|
|
13927
|
+
// for `authentication` should still surface a file whose signatures only say
|
|
13928
|
+
// `auth`. Kept deliberately tight — over-broad synonyms hurt precision. Groups
|
|
13929
|
+
// are expanded bidirectionally (every member maps to the others). Values are
|
|
13930
|
+
// tokenized+stemmed at load, so entries are written in natural form.
|
|
13931
|
+
const EXPANSION_GROUPS = [
|
|
13932
|
+
['auth', 'authenticate', 'authentication', 'login', 'signin', 'credential'],
|
|
13933
|
+
['authorize', 'authorization', 'permission', 'access'],
|
|
13934
|
+
['config', 'configuration', 'settings', 'options'],
|
|
13935
|
+
['db', 'database'],
|
|
13936
|
+
['ctx', 'context'],
|
|
13937
|
+
['req', 'request'],
|
|
13938
|
+
['res', 'response'],
|
|
13939
|
+
['err', 'error'],
|
|
13940
|
+
['msg', 'message'],
|
|
13941
|
+
['init', 'initialize', 'initialization', 'setup'],
|
|
13942
|
+
['async', 'asynchronous'],
|
|
13943
|
+
['sync', 'synchronize', 'synchronous'],
|
|
13944
|
+
['repo', 'repository'],
|
|
13945
|
+
['impl', 'implementation'],
|
|
13946
|
+
['util', 'utility', 'helper'],
|
|
13947
|
+
['param', 'parameter', 'argument'],
|
|
13948
|
+
['fn', 'func', 'function'],
|
|
13949
|
+
['btn', 'button'],
|
|
13950
|
+
['calc', 'calculate', 'calculation'],
|
|
13951
|
+
['gen', 'generate', 'generator'],
|
|
13952
|
+
['val', 'validate', 'validation'],
|
|
13953
|
+
['del', 'delete', 'remove'],
|
|
13954
|
+
['dir', 'directory', 'folder'],
|
|
13955
|
+
['env', 'environment'],
|
|
13956
|
+
['doc', 'document', 'documentation'],
|
|
13957
|
+
['id', 'identifier'],
|
|
13958
|
+
['num', 'number'],
|
|
13959
|
+
['str', 'string'],
|
|
13960
|
+
];
|
|
13961
|
+
|
|
13962
|
+
// The weight applied to an expanded (synonym) query term, so an exact match on
|
|
13963
|
+
// the literal query token always outranks a synonym-only match.
|
|
13964
|
+
const EXPANSION_WEIGHT = 0.15;
|
|
13965
|
+
|
|
13966
|
+
// Build a stemmed lookup: stem(member) → Set of the group's other stemmed members.
|
|
13967
|
+
const EXPANSIONS = (() => {
|
|
13968
|
+
const map = new Map();
|
|
13969
|
+
for (const group of EXPANSION_GROUPS) {
|
|
13970
|
+
const stemmed = [...new Set(group.map((w) => tokenize(w).join('')).filter(Boolean))];
|
|
13971
|
+
for (const s of stemmed) {
|
|
13972
|
+
if (!map.has(s)) map.set(s, new Set());
|
|
13973
|
+
for (const other of stemmed) if (other !== s) map.get(s).add(other);
|
|
13974
|
+
}
|
|
13975
|
+
}
|
|
13976
|
+
return map;
|
|
13977
|
+
})();
|
|
13978
|
+
|
|
13979
|
+
/**
|
|
13980
|
+
* Expand stemmed query tokens with curated synonyms. Returns a Map of
|
|
13981
|
+
* token → weight (1 for the original query tokens, EXPANSION_WEIGHT for
|
|
13982
|
+
* synonyms). Original tokens always keep full weight even if also a synonym.
|
|
13983
|
+
*
|
|
13984
|
+
* @param {string[]} qToks stemmed, de-duplicated query tokens
|
|
13985
|
+
* @returns {Map<string, number>}
|
|
13986
|
+
*/
|
|
13987
|
+
function expandQuery(qToks) {
|
|
13988
|
+
const weights = new Map();
|
|
13989
|
+
for (const t of qToks) weights.set(t, 1);
|
|
13990
|
+
for (const t of qToks) {
|
|
13991
|
+
const syns = EXPANSIONS.get(t);
|
|
13992
|
+
if (!syns) continue;
|
|
13993
|
+
for (const s of syns) if (!weights.has(s)) weights.set(s, EXPANSION_WEIGHT);
|
|
13994
|
+
}
|
|
13995
|
+
return weights;
|
|
13996
|
+
}
|
|
13997
|
+
|
|
13926
13998
|
/**
|
|
13927
13999
|
* BM25 re-rank of candidates against a query. Each candidate is
|
|
13928
14000
|
* `{ file, sigs }`; the returned objects preserve all original candidate
|
|
@@ -13958,23 +14030,24 @@ __factories["./src/retrieval/bm25"] = function(module, exports) {
|
|
|
13958
14030
|
}
|
|
13959
14031
|
|
|
13960
14032
|
const qToks = [...new Set(tokenize(query))];
|
|
14033
|
+
const qWeights = expandQuery(qToks); // token → weight (1 exact, <1 synonym)
|
|
13961
14034
|
|
|
13962
14035
|
return docs
|
|
13963
14036
|
.map((d) => {
|
|
13964
14037
|
let score = 0;
|
|
13965
|
-
for (const t of
|
|
14038
|
+
for (const [t, w] of qWeights) {
|
|
13966
14039
|
const f = d.tf.get(t);
|
|
13967
14040
|
if (!f) continue;
|
|
13968
14041
|
const dfT = df.get(t);
|
|
13969
14042
|
const idf = Math.log(1 + (N - dfT + 0.5) / (dfT + 0.5));
|
|
13970
|
-
score += (idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl));
|
|
14043
|
+
score += w * ((idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl)));
|
|
13971
14044
|
}
|
|
13972
14045
|
return Object.assign({}, d.cand, { score });
|
|
13973
14046
|
})
|
|
13974
14047
|
.sort((a, c) => c.score - a.score || String(a.file).localeCompare(String(c.file)));
|
|
13975
14048
|
}
|
|
13976
14049
|
|
|
13977
|
-
module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP };
|
|
14050
|
+
module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP, expandQuery, EXPANSIONS, EXPANSION_WEIGHT };
|
|
13978
14051
|
|
|
13979
14052
|
};
|
|
13980
14053
|
|
|
@@ -17574,7 +17647,7 @@ function __tryGit(args, opts = {}) {
|
|
|
17574
17647
|
catch (_) { return ''; }
|
|
17575
17648
|
}
|
|
17576
17649
|
|
|
17577
|
-
const VERSION = '8.
|
|
17650
|
+
const VERSION = '8.5.0';
|
|
17578
17651
|
const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
|
|
17579
17652
|
|
|
17580
17653
|
function requireSourceOrBundled(key) {
|
package/llms-full.txt
CHANGED
|
@@ -11,13 +11,13 @@ ranking keeps the relevant context in scope (cutting tokens ~97% as a side
|
|
|
11
11
|
effect), with no LLM calls, embeddings, or vector database. Works with Claude,
|
|
12
12
|
Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
|
|
13
13
|
|
|
14
|
-
# Version: 8.
|
|
14
|
+
# Version: 8.5.0 | Benchmark: sigmap-v8.5-main (2026-07-04)
|
|
15
15
|
# Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
|
|
16
16
|
# Regenerate: npm run generate:llms | Validate: npm run validate:llms
|
|
17
17
|
|
|
18
18
|
---
|
|
19
19
|
|
|
20
|
-
## Core metrics (benchmark: sigmap-v8.
|
|
20
|
+
## Core metrics (benchmark: sigmap-v8.5-main, 2026-07-04)
|
|
21
21
|
|
|
22
22
|
| Metric | Without SigMap | With SigMap |
|
|
23
23
|
|--------|----------------|-------------|
|
package/llms.txt
CHANGED
|
@@ -11,7 +11,7 @@ ranking keeps the relevant context in scope (cutting tokens ~97% as a side
|
|
|
11
11
|
effect), with no LLM calls, embeddings, or vector database. Works with Claude,
|
|
12
12
|
Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
|
|
13
13
|
|
|
14
|
-
# Version: 8.
|
|
14
|
+
# Version: 8.5.0 | Benchmark: sigmap-v8.5-main (2026-07-04)
|
|
15
15
|
# Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
|
|
16
16
|
# Regenerate: npm run generate:llms | Validate: npm run validate:llms
|
|
17
17
|
|
|
@@ -23,7 +23,7 @@ Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
|
|
|
23
23
|
- No blast-radius awareness before editing a hub file — `--impact` shows every file a change touches.
|
|
24
24
|
- Pasted stack traces, CI logs, and JSON bloat the prompt — `squeeze` minimizes them and enriches the top frame from the symbol index.
|
|
25
25
|
|
|
26
|
-
## Core metrics (benchmark: sigmap-v8.
|
|
26
|
+
## Core metrics (benchmark: sigmap-v8.5-main, 2026-07-04)
|
|
27
27
|
|
|
28
28
|
- hit@5 retrieval: 86.7% vs 13.6% random baseline (6.4× lift)
|
|
29
29
|
- Token reduction: 97.0% average across benchmark repos
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sigmap",
|
|
3
|
-
"version": "8.
|
|
3
|
+
"version": "8.5.0",
|
|
4
4
|
"description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
|
|
5
5
|
"main": "packages/core/index.js",
|
|
6
6
|
"exports": {
|
package/src/mcp/server.js
CHANGED
package/src/retrieval/bm25.js
CHANGED
|
@@ -68,6 +68,78 @@ function tokenize(text) {
|
|
|
68
68
|
// are counted PATH_BOOST times when building the document term-frequency map.
|
|
69
69
|
const PATH_BOOST = 3;
|
|
70
70
|
|
|
71
|
+
// Curated, high-precision code-domain synonym / abbreviation expansions. A query
|
|
72
|
+
// for `authentication` should still surface a file whose signatures only say
|
|
73
|
+
// `auth`. Kept deliberately tight — over-broad synonyms hurt precision. Groups
|
|
74
|
+
// are expanded bidirectionally (every member maps to the others). Values are
|
|
75
|
+
// tokenized+stemmed at load, so entries are written in natural form.
|
|
76
|
+
const EXPANSION_GROUPS = [
|
|
77
|
+
['auth', 'authenticate', 'authentication', 'login', 'signin', 'credential'],
|
|
78
|
+
['authorize', 'authorization', 'permission', 'access'],
|
|
79
|
+
['config', 'configuration', 'settings', 'options'],
|
|
80
|
+
['db', 'database'],
|
|
81
|
+
['ctx', 'context'],
|
|
82
|
+
['req', 'request'],
|
|
83
|
+
['res', 'response'],
|
|
84
|
+
['err', 'error'],
|
|
85
|
+
['msg', 'message'],
|
|
86
|
+
['init', 'initialize', 'initialization', 'setup'],
|
|
87
|
+
['async', 'asynchronous'],
|
|
88
|
+
['sync', 'synchronize', 'synchronous'],
|
|
89
|
+
['repo', 'repository'],
|
|
90
|
+
['impl', 'implementation'],
|
|
91
|
+
['util', 'utility', 'helper'],
|
|
92
|
+
['param', 'parameter', 'argument'],
|
|
93
|
+
['fn', 'func', 'function'],
|
|
94
|
+
['btn', 'button'],
|
|
95
|
+
['calc', 'calculate', 'calculation'],
|
|
96
|
+
['gen', 'generate', 'generator'],
|
|
97
|
+
['val', 'validate', 'validation'],
|
|
98
|
+
['del', 'delete', 'remove'],
|
|
99
|
+
['dir', 'directory', 'folder'],
|
|
100
|
+
['env', 'environment'],
|
|
101
|
+
['doc', 'document', 'documentation'],
|
|
102
|
+
['id', 'identifier'],
|
|
103
|
+
['num', 'number'],
|
|
104
|
+
['str', 'string'],
|
|
105
|
+
];
|
|
106
|
+
|
|
107
|
+
// The weight applied to an expanded (synonym) query term, so an exact match on
|
|
108
|
+
// the literal query token always outranks a synonym-only match.
|
|
109
|
+
const EXPANSION_WEIGHT = 0.15;
|
|
110
|
+
|
|
111
|
+
// Build a stemmed lookup: stem(member) → Set of the group's other stemmed members.
|
|
112
|
+
const EXPANSIONS = (() => {
|
|
113
|
+
const map = new Map();
|
|
114
|
+
for (const group of EXPANSION_GROUPS) {
|
|
115
|
+
const stemmed = [...new Set(group.map((w) => tokenize(w).join('')).filter(Boolean))];
|
|
116
|
+
for (const s of stemmed) {
|
|
117
|
+
if (!map.has(s)) map.set(s, new Set());
|
|
118
|
+
for (const other of stemmed) if (other !== s) map.get(s).add(other);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return map;
|
|
122
|
+
})();
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Expand stemmed query tokens with curated synonyms. Returns a Map of
|
|
126
|
+
* token → weight (1 for the original query tokens, EXPANSION_WEIGHT for
|
|
127
|
+
* synonyms). Original tokens always keep full weight even if also a synonym.
|
|
128
|
+
*
|
|
129
|
+
* @param {string[]} qToks stemmed, de-duplicated query tokens
|
|
130
|
+
* @returns {Map<string, number>}
|
|
131
|
+
*/
|
|
132
|
+
function expandQuery(qToks) {
|
|
133
|
+
const weights = new Map();
|
|
134
|
+
for (const t of qToks) weights.set(t, 1);
|
|
135
|
+
for (const t of qToks) {
|
|
136
|
+
const syns = EXPANSIONS.get(t);
|
|
137
|
+
if (!syns) continue;
|
|
138
|
+
for (const s of syns) if (!weights.has(s)) weights.set(s, EXPANSION_WEIGHT);
|
|
139
|
+
}
|
|
140
|
+
return weights;
|
|
141
|
+
}
|
|
142
|
+
|
|
71
143
|
/**
|
|
72
144
|
* BM25 re-rank of candidates against a query. Each candidate is
|
|
73
145
|
* `{ file, sigs }`; the returned objects preserve all original candidate
|
|
@@ -103,20 +175,21 @@ function bm25rank(query, candidates) {
|
|
|
103
175
|
}
|
|
104
176
|
|
|
105
177
|
const qToks = [...new Set(tokenize(query))];
|
|
178
|
+
const qWeights = expandQuery(qToks); // token → weight (1 exact, <1 synonym)
|
|
106
179
|
|
|
107
180
|
return docs
|
|
108
181
|
.map((d) => {
|
|
109
182
|
let score = 0;
|
|
110
|
-
for (const t of
|
|
183
|
+
for (const [t, w] of qWeights) {
|
|
111
184
|
const f = d.tf.get(t);
|
|
112
185
|
if (!f) continue;
|
|
113
186
|
const dfT = df.get(t);
|
|
114
187
|
const idf = Math.log(1 + (N - dfT + 0.5) / (dfT + 0.5));
|
|
115
|
-
score += (idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl));
|
|
188
|
+
score += w * ((idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl)));
|
|
116
189
|
}
|
|
117
190
|
return Object.assign({}, d.cand, { score });
|
|
118
191
|
})
|
|
119
192
|
.sort((a, c) => c.score - a.score || String(a.file).localeCompare(String(c.file)));
|
|
120
193
|
}
|
|
121
194
|
|
|
122
|
-
module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP };
|
|
195
|
+
module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP, expandQuery, EXPANSIONS, EXPANSION_WEIGHT };
|