sigmap 2.2.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +48 -0
- package/README.md +60 -16
- package/gen-context.js +193 -3
- package/package.json +7 -1
- package/packages/cli/index.js +63 -0
- package/packages/cli/package.json +26 -0
- package/packages/core/README.md +133 -0
- package/packages/core/index.js +215 -0
- package/packages/core/package.json +28 -0
- package/src/config/defaults.js +8 -0
- package/src/mcp/handlers.js +28 -1
- package/src/mcp/server.js +3 -2
- package/src/mcp/tools.js +24 -0
- package/src/retrieval/ranker.js +242 -0
- package/src/retrieval/tokenizer.js +54 -0
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,54 @@ Format: [Semantic Versioning](https://semver.org/)
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [2.4.0] — 2026-04-05
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- **`packages/core/`** — new `sigmap-core` package exposing a stable programmatic API: `{ extract, rank, buildSigIndex, scan, score }`. Third-party tools can now `require('sigmap')` and use all extraction/retrieval/security/health APIs without spawning a CLI process.
|
|
13
|
+
- **`packages/cli/`** — new `sigmap-cli` thin wrapper that exposes `{ CLI_ENTRY, run }` for programmatic CLI invocation and forward-compat with the v3.0 adapter architecture.
|
|
14
|
+
- **`packages/core/README.md`** — full programmatic API reference with usage examples for all five exported functions.
|
|
15
|
+
- **`exports` field in `package.json`** — `require('sigmap')` resolves to `packages/core/index.js`; `require('sigmap/cli')` resolves to `packages/cli/index.js`.
|
|
16
|
+
- **`test/integration/core-api.test.js`** — 15 integration tests covering: all exports present, `extract` for JS/TS/Python, file-path extension detection, unknown language returns `[]`, never throws on bad input, `rank` with empty map, `rank` sorted shape, `scan` clean/redact, `score` shape, `buildSigIndex` returns Map, CLI `--version` backward compat, CLI `--help` no crash.
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
- `package.json` `"version"` bumped to `2.4.0`.
|
|
20
|
+
- `package.json` `"files"` — added `"packages/"` so `sigmap-core` and `sigmap-cli` are published with the root package.
|
|
21
|
+
- `gen-context.js` `VERSION` constant bumped to `2.4.0`.
|
|
22
|
+
- `src/mcp/server.js` `SERVER_INFO.version` bumped to `2.4.0`.
|
|
23
|
+
|
|
24
|
+
### Validation gate
|
|
25
|
+
- 21/21 extractor unit tests passed
|
|
26
|
+
- 21/21 integration suites passed (0 failures, including new `core-api.test.js`)
|
|
27
|
+
- `node gen-context.js --version` → `2.4.0`
|
|
28
|
+
- `node -e "const { extract } = require('.'); console.log(extract('function hello(){}', 'javascript').length > 0 ? 'OK' : 'FAIL')"` → `OK`
|
|
29
|
+
- `require('sigmap')` works from any directory
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## [2.3.0] — 2026-04-07
|
|
34
|
+
|
|
35
|
+
### Added
|
|
36
|
+
- **Query-aware retrieval** — `src/retrieval/tokenizer.js` and `src/retrieval/ranker.js`: zero-dependency relevance ranker that scores every file against a free-text query by exact token, symbol, prefix, path, and recency signals.
|
|
37
|
+
- **`--query "<text>"` CLI flag** — ranks all context files by relevance and prints a scored table (Rank | File | Score | Sigs | Tokens) plus the top-3 signature blocks; `--query "<text>" --json` for machine-readable output; `--query "<text>" --top <n>` to limit result set.
|
|
38
|
+
- **`query_context` MCP tool** — 8th MCP tool; accepts `{ query: string, topK?: number }` and returns the same ranked table as the `--query` CLI flag; live within any running MCP session.
|
|
39
|
+
- **Retrieval config** — `config.retrieval.topK` (default 10) and `config.retrieval.recencyBoost` (default 1.5×) added to `src/config/defaults.js`.
|
|
40
|
+
- **`test/integration/retrieval.test.js`** — 23 integration tests covering tokenizer unit tests, ranker sorting/scoring/topK/empty-query, `formatRankTable`, `formatRankJSON`, CLI `--query` flags, and MCP `query_context`.
|
|
41
|
+
|
|
42
|
+
### Changed
|
|
43
|
+
- `src/mcp/server.js` version bumped to `2.3.0`.
|
|
44
|
+
- `test/integration/mcp-server.test.js` and `mcp-v14.test.js` updated to assert 8 tools.
|
|
45
|
+
- `test/integration/analyze.test.js` version assertion updated to `2.3.0`.
|
|
46
|
+
|
|
47
|
+
### Validation gate
|
|
48
|
+
- 21/21 extractor unit tests passed
|
|
49
|
+
- 20/20 integration suites passed (0 failures)
|
|
50
|
+
- `node gen-context.js --version` → `2.3.0`
|
|
51
|
+
- `node gen-context.js --query "python extractor"` → `src/extractors/python.js` in top-3
|
|
52
|
+
- `node gen-context.js --query "fix secret scanning" --json` → valid JSON
|
|
53
|
+
- MCP `tools/list` → 8 tools including `query_context`
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
9
57
|
## [2.2.0] — 2026-04-06
|
|
10
58
|
|
|
11
59
|
### Added
|
package/README.md
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
<!-- Status -->
|
|
13
13
|
[](https://www.npmjs.com/package/sigmap)
|
|
14
|
-
[](https://github.com/manojmallick/sigmap/tree/main/test)
|
|
15
15
|
[](package.json)
|
|
16
16
|
[](https://github.com/manojmallick/sigmap/commits/main)
|
|
17
17
|
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
| [VS Code extension](#-vs-code-extension) | Status bar, stale alerts, commands |
|
|
42
42
|
| [Languages supported](#-languages-supported) | 21 languages |
|
|
43
43
|
| [Context strategies](#-context-strategies) | full / per-module / hot-cold |
|
|
44
|
-
| [MCP server](#-mcp-server) |
|
|
44
|
+
| [MCP server](#-mcp-server) | 8 on-demand tools |
|
|
45
45
|
| [CLI reference](#-cli-reference) | All flags |
|
|
46
46
|
| [Configuration](#-configuration) | Config file + .contextignore |
|
|
47
47
|
| [Observability](#-observability) | Health score, reports, CI |
|
|
@@ -86,20 +86,18 @@ AI agent session starts with full context
|
|
|
86
86
|
|
|
87
87
|
---
|
|
88
88
|
|
|
89
|
-
## 🆕 What's new in 2.
|
|
89
|
+
## 🆕 What's new in 2.3
|
|
90
90
|
|
|
91
91
|
| Feature | Description |
|
|
92
92
|
|---|---|
|
|
93
|
-
|
|
|
94
|
-
|
|
|
95
|
-
|
|
|
96
|
-
|
|
|
97
|
-
|
|
|
98
|
-
|
|
|
99
|
-
| **Impact radius hints** | Reverse dependency annotations (used by: ...) |
|
|
100
|
-
| **New helper extractors** | `deps.js`, `todos.js`, `coverage.js`, `prdiff.js` |
|
|
93
|
+
| **`--query "<text>"` CLI** | Rank all context files by relevance to a free-text query — scored table + top-3 signature blocks |
|
|
94
|
+
| **`--query --json`** | Machine-readable ranked results (`{ query, results[], totalResults }`) |
|
|
95
|
+
| **`--query --top <n>`** | Limit results (default 10, configurable via `retrieval.topK`) |
|
|
96
|
+
| **`query_context` MCP tool** | 8th MCP tool — `{ query, topK? }` returns ranked file list, usable live in any MCP session |
|
|
97
|
+
| **`--analyze` / `--diagnose-extractors`** | Per-file breakdown of sigs/tokens/extractor/coverage; self-tests all 21 extractors (v2.2) |
|
|
98
|
+
| **`--benchmark` / `--eval`** | Measure hit@5 and MRR retrieval quality against a JSONL task file (v2.1) |
|
|
101
99
|
|
|
102
|
-
|
|
100
|
+
> **Previous v2.0 additions:** enriched signatures, dependency map, TODO/FIXME section, test coverage markers, structural diff mode, impact radius hints. See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
103
101
|
|
|
104
102
|
---
|
|
105
103
|
|
|
@@ -258,7 +256,7 @@ Recently committed files are **hot** (auto-injected). Everything else is **cold*
|
|
|
258
256
|
|
|
259
257
|
## 🔌 MCP server
|
|
260
258
|
|
|
261
|
-
> Introduced in v0.3, expanded to
|
|
259
|
+
> Introduced in v0.3, expanded to 8 tools through v2.3.
|
|
262
260
|
|
|
263
261
|
Start the MCP server on stdio:
|
|
264
262
|
|
|
@@ -277,6 +275,7 @@ node gen-context.js --mcp
|
|
|
277
275
|
| `list_modules` | — | Token-count table of all top-level module directories |
|
|
278
276
|
| `create_checkpoint` | `{ summary: string }` | Write a session checkpoint to `.context/` |
|
|
279
277
|
| `get_routing` | — | Full model routing table |
|
|
278
|
+
| `query_context` | `{ query: string, topK?: number }` | Files ranked by relevance to the query (v2.3) |
|
|
280
279
|
|
|
281
280
|
Reads files on every call — no stale state, no restart needed.
|
|
282
281
|
|
|
@@ -296,6 +295,19 @@ node gen-context.js --diff Generate context for git-changed f
|
|
|
296
295
|
node gen-context.js --diff --staged Staged files only (pre-commit check)
|
|
297
296
|
node gen-context.js --mcp Start MCP server on stdio
|
|
298
297
|
|
|
298
|
+
node gen-context.js --query "<text>" Rank files by relevance to a query
|
|
299
|
+
node gen-context.js --query "<text>" --json Ranked results as JSON
|
|
300
|
+
node gen-context.js --query "<text>" --top <n> Limit results to top N files (default 10)
|
|
301
|
+
|
|
302
|
+
node gen-context.js --analyze Per-file breakdown (sigs / tokens / extractor / coverage)
|
|
303
|
+
node gen-context.js --analyze --json Analysis as JSON
|
|
304
|
+
node gen-context.js --analyze --slow Include extraction timing per file
|
|
305
|
+
node gen-context.js --diagnose-extractors Self-test all 21 extractors against fixtures
|
|
306
|
+
|
|
307
|
+
node gen-context.js --benchmark Run retrieval quality benchmark (hit@5 / MRR)
|
|
308
|
+
node gen-context.js --benchmark --json Benchmark results as JSON
|
|
309
|
+
node gen-context.js --eval Alias for --benchmark
|
|
310
|
+
|
|
299
311
|
node gen-context.js --report Token reduction stats
|
|
300
312
|
node gen-context.js --report --json Structured JSON report (exits 1 if over budget)
|
|
301
313
|
node gen-context.js --report --history Usage log summary
|
|
@@ -435,6 +447,31 @@ node gen-context.js --format cache
|
|
|
435
447
|
|
|
436
448
|
---
|
|
437
449
|
|
|
450
|
+
## 📦 Programmatic API (v2.4+)
|
|
451
|
+
|
|
452
|
+
Use SigMap as a library — no CLI subprocess needed:
|
|
453
|
+
|
|
454
|
+
```js
|
|
455
|
+
const { extract, rank, buildSigIndex, scan, score } = require('sigmap');
|
|
456
|
+
|
|
457
|
+
// Extract signatures from source code
|
|
458
|
+
const sigs = extract('function hello() {}', 'javascript');
|
|
459
|
+
|
|
460
|
+
// Build an index and rank files by query
|
|
461
|
+
const index = buildSigIndex('/path/to/project');
|
|
462
|
+
const results = rank('authentication middleware', index);
|
|
463
|
+
|
|
464
|
+
// Scan signatures for secrets before storing
|
|
465
|
+
const { safe, redacted } = scan(sigs, 'src/config.ts');
|
|
466
|
+
|
|
467
|
+
// Get a composite health score for a project
|
|
468
|
+
const health = score('/path/to/project');
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
📖 Full API reference: [packages/core/README.md](packages/core/README.md)
|
|
472
|
+
|
|
473
|
+
---
|
|
474
|
+
|
|
438
475
|
## 🧪 Testing
|
|
439
476
|
|
|
440
477
|
```bash
|
|
@@ -464,7 +501,7 @@ grep "require(" gen-context.js | grep -v "^.*//.*require"
|
|
|
464
501
|
|
|
465
502
|
# Gate 3 — MCP server responds correctly
|
|
466
503
|
echo '{"jsonrpc":"2.0","method":"tools/list","id":1}' | node gen-context.js --mcp
|
|
467
|
-
# Expected: valid JSON with
|
|
504
|
+
# Expected: valid JSON with 8 tools
|
|
468
505
|
|
|
469
506
|
# Gate 4 — npm artifact is clean
|
|
470
507
|
npm pack --dry-run
|
|
@@ -481,9 +518,16 @@ sigmap/
|
|
|
481
518
|
├── gen-context.js ← PRIMARY ENTRY POINT — single file, zero deps
|
|
482
519
|
├── gen-project-map.js ← import graph, class hierarchy, route table
|
|
483
520
|
│
|
|
521
|
+
├── packages/
|
|
522
|
+
│ ├── core/ ← programmatic API — require('sigmap') (v2.4)
|
|
523
|
+
│ │ └── index.js ← extract, rank, buildSigIndex, scan, score
|
|
524
|
+
│ └── cli/ ← thin CLI wrapper / v3 compat shim (v2.4)
|
|
525
|
+
│
|
|
484
526
|
├── src/
|
|
485
527
|
│ ├── extractors/ ← 21 language extractors (one file per language)
|
|
486
|
-
│ ├──
|
|
528
|
+
│ ├── retrieval/ ← query-aware ranker + tokenizer (v2.3)
|
|
529
|
+
│ ├── eval/ ← benchmark runner + scorer (v2.1), analyzer (v2.2)
|
|
530
|
+
│ ├── mcp/ ← MCP stdio server — 8 tools
|
|
487
531
|
│ ├── security/ ← secret scanner — 10 patterns
|
|
488
532
|
│ ├── routing/ ← model routing hints
|
|
489
533
|
│ ├── tracking/ ← NDJSON usage logger
|
|
@@ -499,7 +543,7 @@ sigmap/
|
|
|
499
543
|
│ ├── fixtures/ ← one source file per language
|
|
500
544
|
│ ├── expected/ ← expected extractor output
|
|
501
545
|
│ ├── run.js ← zero-dep test runner
|
|
502
|
-
│ └── integration/ ←
|
|
546
|
+
│ └── integration/ ← 20 integration test files (304 tests)
|
|
503
547
|
│
|
|
504
548
|
├── docs/ ← documentation site (GitHub Pages)
|
|
505
549
|
│ ├── index.html ← homepage
|
package/gen-context.js
CHANGED
|
@@ -2879,7 +2879,23 @@ __factories["./src/mcp/handlers"] = function(module, exports) {
|
|
|
2879
2879
|
].join('\n');
|
|
2880
2880
|
}
|
|
2881
2881
|
|
|
2882
|
-
|
|
2882
|
+
function queryContext(args, cwd) {
|
|
2883
|
+
if (!args || !args.query) return 'Missing required argument: query';
|
|
2884
|
+
const contextPath = path.join(cwd, CONTEXT_FILE);
|
|
2885
|
+
if (!fs.existsSync(contextPath)) return 'No context file found. Run: node gen-context.js';
|
|
2886
|
+
try {
|
|
2887
|
+
const { rank, buildSigIndex, formatRankTable } = __require('./src/retrieval/ranker');
|
|
2888
|
+
const index = buildSigIndex(cwd);
|
|
2889
|
+
if (index.size === 0) return 'No signatures indexed. Run: node gen-context.js';
|
|
2890
|
+
const topK = Math.min(Math.max(1, parseInt(args.topK, 10) || 10), 25);
|
|
2891
|
+
const results = rank(args.query, index, { topK });
|
|
2892
|
+
return formatRankTable(results, args.query);
|
|
2893
|
+
} catch (err) {
|
|
2894
|
+
return `_query_context failed: ${err.message}_`;
|
|
2895
|
+
}
|
|
2896
|
+
}
|
|
2897
|
+
|
|
2898
|
+
module.exports = { readContext, searchSignatures, getMap, createCheckpoint, getRouting, explainFile, listModules, queryContext };
|
|
2883
2899
|
};
|
|
2884
2900
|
|
|
2885
2901
|
// ── ./src/mcp/server ──
|
|
@@ -2899,7 +2915,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
|
|
|
2899
2915
|
|
|
2900
2916
|
const readline = require('readline');
|
|
2901
2917
|
const { TOOLS } = __require('./src/mcp/tools');
|
|
2902
|
-
const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, explainFile, listModules } = __require('./src/mcp/handlers');
|
|
2918
|
+
const { readContext, searchSignatures, getMap, createCheckpoint, getRouting, explainFile, listModules, queryContext } = __require('./src/mcp/handlers');
|
|
2903
2919
|
|
|
2904
2920
|
const SERVER_INFO = {
|
|
2905
2921
|
name: 'sigmap',
|
|
@@ -2958,6 +2974,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
|
|
|
2958
2974
|
else if (name === 'get_routing') text = getRouting(args, cwd);
|
|
2959
2975
|
else if (name === 'explain_file') text = explainFile(args, cwd);
|
|
2960
2976
|
else if (name === 'list_modules') text = listModules(args, cwd);
|
|
2977
|
+
else if (name === 'query_context') text = queryContext(args, cwd);
|
|
2961
2978
|
else {
|
|
2962
2979
|
respondError(id, -32601, `Unknown tool: ${name}`);
|
|
2963
2980
|
return;
|
|
@@ -3137,6 +3154,30 @@ __factories["./src/mcp/tools"] = function(module, exports) {
|
|
|
3137
3154
|
required: [],
|
|
3138
3155
|
},
|
|
3139
3156
|
},
|
|
3157
|
+
{
|
|
3158
|
+
name: 'query_context',
|
|
3159
|
+
description:
|
|
3160
|
+
'Rank and return the most relevant files for a specific task or question. ' +
|
|
3161
|
+
'Uses keyword + symbol + path scoring to surface only the top-K files relevant ' +
|
|
3162
|
+
'to the query — much cheaper than reading all context. ' +
|
|
3163
|
+
'Returns ranked file list with signatures and relevance scores.',
|
|
3164
|
+
inputSchema: {
|
|
3165
|
+
type: 'object',
|
|
3166
|
+
properties: {
|
|
3167
|
+
query: {
|
|
3168
|
+
type: 'string',
|
|
3169
|
+
description:
|
|
3170
|
+
'Natural language task description or keyword(s) to rank files against. ' +
|
|
3171
|
+
'E.g. "add a new language extractor", "fix secret scanning", "auth module".',
|
|
3172
|
+
},
|
|
3173
|
+
topK: {
|
|
3174
|
+
type: 'number',
|
|
3175
|
+
description: 'Maximum number of files to return (default: 10, max: 25).',
|
|
3176
|
+
},
|
|
3177
|
+
},
|
|
3178
|
+
required: ['query'],
|
|
3179
|
+
},
|
|
3180
|
+
},
|
|
3140
3181
|
];
|
|
3141
3182
|
|
|
3142
3183
|
module.exports = { TOOLS };
|
|
@@ -3570,6 +3611,120 @@ __factories["./src/tracking/logger"] = function(module, exports) {
|
|
|
3570
3611
|
|
|
3571
3612
|
};
|
|
3572
3613
|
|
|
3614
|
+
// ── ./src/retrieval/tokenizer ──
|
|
3615
|
+
__factories["./src/retrieval/tokenizer"] = function(module, exports) {
|
|
3616
|
+
'use strict';
|
|
3617
|
+
const STOP_WORDS = new Set([
|
|
3618
|
+
'the', 'a', 'an', 'in', 'of', 'to', 'for', 'and', 'or', 'is', 'are',
|
|
3619
|
+
'that', 'this', 'it', 'with', 'from', 'by', 'be', 'as', 'on', 'at',
|
|
3620
|
+
'do', 'not', 'use', 'get', 'set', 'up', 'if', 'no', 'so', 'we',
|
|
3621
|
+
]);
|
|
3622
|
+
function tokenize(text, opts) {
|
|
3623
|
+
if (!text || typeof text !== 'string') return [];
|
|
3624
|
+
const removeStop = opts && opts.removeStopWords === false ? false : true;
|
|
3625
|
+
const minLen = (opts && opts.minLength) || 2;
|
|
3626
|
+
const tokens = text
|
|
3627
|
+
.replace(/\.\w{1,6}(?=\s|\/|$)/g, ' ')
|
|
3628
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
3629
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
3630
|
+
.replace(/[_\-\.\/]/g, ' ')
|
|
3631
|
+
.replace(/[^\w\s]/g, ' ')
|
|
3632
|
+
.toLowerCase()
|
|
3633
|
+
.split(/\s+/)
|
|
3634
|
+
.filter((t) => t.length >= minLen);
|
|
3635
|
+
if (!removeStop) return [...new Set(tokens)];
|
|
3636
|
+
return [...new Set(tokens.filter((t) => !STOP_WORDS.has(t)))];
|
|
3637
|
+
}
|
|
3638
|
+
module.exports = { tokenize, STOP_WORDS };
|
|
3639
|
+
};
|
|
3640
|
+
|
|
3641
|
+
// ── ./src/retrieval/ranker ──
|
|
3642
|
+
__factories["./src/retrieval/ranker"] = function(module, exports) {
|
|
3643
|
+
'use strict';
|
|
3644
|
+
const { tokenize, STOP_WORDS } = __require('./src/retrieval/tokenizer');
|
|
3645
|
+
const DEFAULT_WEIGHTS = {
|
|
3646
|
+
exactToken: 1.0, symbolMatch: 0.5, prefixMatch: 0.3, pathMatch: 0.8, recencyBoost: 1.5,
|
|
3647
|
+
};
|
|
3648
|
+
function scoreFile(filePath, sigs, queryTokens, weights) {
|
|
3649
|
+
if (!sigs || sigs.length === 0) return 0;
|
|
3650
|
+
const w = weights || DEFAULT_WEIGHTS;
|
|
3651
|
+
const sigTokenSet = new Set(tokenize(sigs.join(' ')));
|
|
3652
|
+
const pathTokenSet = new Set(tokenize(filePath));
|
|
3653
|
+
let score = 0;
|
|
3654
|
+
for (const qt of queryTokens) {
|
|
3655
|
+
if (STOP_WORDS.has(qt)) continue;
|
|
3656
|
+
if (sigTokenSet.has(qt)) {
|
|
3657
|
+
score += w.exactToken;
|
|
3658
|
+
if (sigs.some((sig) => tokenize(sig.replace(/[^a-zA-Z0-9_\s]/g, ' ')).includes(qt))) score += w.symbolMatch;
|
|
3659
|
+
}
|
|
3660
|
+
if (qt.length >= 4) {
|
|
3661
|
+
for (const st of sigTokenSet) {
|
|
3662
|
+
if (st !== qt && st.startsWith(qt)) { score += w.prefixMatch; break; }
|
|
3663
|
+
}
|
|
3664
|
+
}
|
|
3665
|
+
if (pathTokenSet.has(qt)) score += w.pathMatch;
|
|
3666
|
+
}
|
|
3667
|
+
return score;
|
|
3668
|
+
}
|
|
3669
|
+
function rank(query, sigIndex, opts) {
|
|
3670
|
+
if (!query || typeof query !== 'string') return [];
|
|
3671
|
+
if (!sigIndex || !(sigIndex instanceof Map) || sigIndex.size === 0) return [];
|
|
3672
|
+
const topK = (opts && opts.topK) || 10;
|
|
3673
|
+
const recencyMultiplier = (opts && opts.recencyBoost) || DEFAULT_WEIGHTS.recencyBoost;
|
|
3674
|
+
const recencySet = (opts && opts.recencySet) || null;
|
|
3675
|
+
const weights = (opts && opts.weights) ? Object.assign({}, DEFAULT_WEIGHTS, opts.weights) : DEFAULT_WEIGHTS;
|
|
3676
|
+
const queryTokens = tokenize(query);
|
|
3677
|
+
if (queryTokens.length === 0) {
|
|
3678
|
+
const all = [];
|
|
3679
|
+
for (const [file, sigs] of sigIndex.entries()) all.push({ file, score: sigs.length, sigs, tokens: Math.ceil(sigs.join('\n').length / 4) });
|
|
3680
|
+
all.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
|
|
3681
|
+
return all.slice(0, topK);
|
|
3682
|
+
}
|
|
3683
|
+
const scored = [];
|
|
3684
|
+
for (const [file, sigs] of sigIndex.entries()) {
|
|
3685
|
+
let score = scoreFile(file, sigs, queryTokens, weights);
|
|
3686
|
+
if (recencySet && recencySet.has(file) && score > 0) score *= recencyMultiplier;
|
|
3687
|
+
scored.push({ file, score, sigs, tokens: Math.ceil(sigs.join('\n').length / 4) });
|
|
3688
|
+
}
|
|
3689
|
+
scored.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
|
|
3690
|
+
return scored.slice(0, topK);
|
|
3691
|
+
}
|
|
3692
|
+
function buildSigIndex(cwd) {
|
|
3693
|
+
const fs = require('fs'); const path = require('path');
|
|
3694
|
+
const contextPath = path.join(cwd, '.github', 'copilot-instructions.md');
|
|
3695
|
+
const index = new Map();
|
|
3696
|
+
if (!fs.existsSync(contextPath)) return index;
|
|
3697
|
+
const content = fs.readFileSync(contextPath, 'utf8');
|
|
3698
|
+
const lines = content.split('\n');
|
|
3699
|
+
let currentFile = null; let inBlock = false; let sigs = [];
|
|
3700
|
+
for (const line of lines) {
|
|
3701
|
+
const hm = line.match(/^###\s+(\S+)\s*$/);
|
|
3702
|
+
if (hm) { if (currentFile !== null) index.set(currentFile, sigs); currentFile = hm[1]; sigs = []; inBlock = false; continue; }
|
|
3703
|
+
if (line.startsWith('```')) { inBlock = !inBlock; continue; }
|
|
3704
|
+
if (inBlock && currentFile && line.trim()) sigs.push(line.trim());
|
|
3705
|
+
}
|
|
3706
|
+
if (currentFile !== null) index.set(currentFile, sigs);
|
|
3707
|
+
return index;
|
|
3708
|
+
}
|
|
3709
|
+
function formatRankTable(results, query) {
|
|
3710
|
+
if (!results || results.length === 0) return `No matching files found for query: "${query}"\n`;
|
|
3711
|
+
const lines = [`## Query: ${query}`, '', '| Rank | File | Score | Sigs | Tokens |', '|------|------|-------|------|--------|',
|
|
3712
|
+
...results.map((r, i) => `| ${i + 1} | ${r.file} | ${r.score.toFixed(2)} | ${r.sigs.length} | ${r.tokens} |`), ''];
|
|
3713
|
+
for (const r of results.slice(0, 3)) {
|
|
3714
|
+
if (r.sigs.length > 0) {
|
|
3715
|
+
lines.push(`### ${r.file}`, '```', ...r.sigs.slice(0, 10));
|
|
3716
|
+
if (r.sigs.length > 10) lines.push(`... (${r.sigs.length - 10} more)`);
|
|
3717
|
+
lines.push('```', '');
|
|
3718
|
+
}
|
|
3719
|
+
}
|
|
3720
|
+
return lines.join('\n');
|
|
3721
|
+
}
|
|
3722
|
+
function formatRankJSON(results, query) {
|
|
3723
|
+
return { query, results: (results || []).map((r, i) => ({ rank: i + 1, file: r.file, score: r.score, sigs: r.sigs, tokens: r.tokens })), totalResults: (results || []).length };
|
|
3724
|
+
}
|
|
3725
|
+
module.exports = { rank, buildSigIndex, scoreFile, formatRankTable, formatRankJSON, DEFAULT_WEIGHTS };
|
|
3726
|
+
};
|
|
3727
|
+
|
|
3573
3728
|
// ── ./src/eval/scorer ──
|
|
3574
3729
|
__factories["./src/eval/scorer"] = function(module, exports) {
|
|
3575
3730
|
'use strict';
|
|
@@ -3936,7 +4091,7 @@ const path = require('path');
|
|
|
3936
4091
|
const os = require('os');
|
|
3937
4092
|
const { execSync } = require('child_process');
|
|
3938
4093
|
|
|
3939
|
-
const VERSION = '2.
|
|
4094
|
+
const VERSION = '2.4.0';
|
|
3940
4095
|
const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
|
|
3941
4096
|
|
|
3942
4097
|
function requireSourceOrBundled(key) {
|
|
@@ -5149,6 +5304,9 @@ Usage:
|
|
|
5149
5304
|
node gen-context.js --analyze --json Breakdown as JSON
|
|
5150
5305
|
node gen-context.js --analyze --slow Re-time each extractor; flag files >50ms
|
|
5151
5306
|
node gen-context.js --diagnose-extractors Run all 21 extractors vs fixtures; show pass/fail + diff
|
|
5307
|
+
node gen-context.js --query "<text>" Rank files by relevance to a query
|
|
5308
|
+
node gen-context.js --query "<text>" --json Ranked results as JSON
|
|
5309
|
+
node gen-context.js --query "<text>" --top <n> Limit results to top N files (default 10)
|
|
5152
5310
|
node gen-context.js --init Write example config + .contextignore scaffold
|
|
5153
5311
|
node gen-context.js --help Show this message
|
|
5154
5312
|
node gen-context.js --version Show version
|
|
@@ -5435,6 +5593,38 @@ function main() {
|
|
|
5435
5593
|
}
|
|
5436
5594
|
}
|
|
5437
5595
|
|
|
5596
|
+
if (args.includes('--query')) {
|
|
5597
|
+
try {
|
|
5598
|
+
const qIdx = args.indexOf('--query');
|
|
5599
|
+
const query = (args[qIdx + 1] || '').trim();
|
|
5600
|
+
if (!query || query.startsWith('--')) {
|
|
5601
|
+
console.error('[sigmap] --query requires a search string');
|
|
5602
|
+
console.error(' Example: node gen-context.js --query "add a new language extractor"');
|
|
5603
|
+
process.exit(1);
|
|
5604
|
+
}
|
|
5605
|
+
const { rank, buildSigIndex, formatRankTable, formatRankJSON } = requireSourceOrBundled('./src/retrieval/ranker');
|
|
5606
|
+
const index = buildSigIndex(cwd);
|
|
5607
|
+
if (index.size === 0) {
|
|
5608
|
+
console.error('[sigmap] no context file found. Run: node gen-context.js');
|
|
5609
|
+
process.exit(1);
|
|
5610
|
+
}
|
|
5611
|
+
const topIdx = args.indexOf('--top');
|
|
5612
|
+
const topK = topIdx >= 0 ? Math.min(Math.max(1, parseInt(args[topIdx + 1], 10) || 10), 25)
|
|
5613
|
+
: ((config && config.retrieval && config.retrieval.topK) || 10);
|
|
5614
|
+
const recencyBoost = (config && config.retrieval && config.retrieval.recencyBoost) || 1.5;
|
|
5615
|
+
const results = rank(query, index, { topK, recencyBoost });
|
|
5616
|
+
if (args.includes('--json')) {
|
|
5617
|
+
process.stdout.write(JSON.stringify(formatRankJSON(results, query)) + '\n');
|
|
5618
|
+
} else {
|
|
5619
|
+
process.stdout.write(formatRankTable(results, query));
|
|
5620
|
+
}
|
|
5621
|
+
} catch (err) {
|
|
5622
|
+
console.error(`[sigmap] query error: ${err.message}`);
|
|
5623
|
+
process.exit(1);
|
|
5624
|
+
}
|
|
5625
|
+
process.exit(0);
|
|
5626
|
+
}
|
|
5627
|
+
|
|
5438
5628
|
if (args.includes('--report')) {
|
|
5439
5629
|
if (args.includes('--history')) {
|
|
5440
5630
|
try {
|
package/package.json
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sigmap",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.4.0",
|
|
4
4
|
"description": "Zero-dependency AI context engine — 97% token reduction. No npm install. Runs on Node 18+.",
|
|
5
5
|
"main": "gen-context.js",
|
|
6
|
+
"exports": {
|
|
7
|
+
".": "./packages/core/index.js",
|
|
8
|
+
"./cli": "./packages/cli/index.js",
|
|
9
|
+
"./core": "./packages/core/index.js"
|
|
10
|
+
},
|
|
6
11
|
"bin": {
|
|
7
12
|
"sigmap": "./gen-context.js",
|
|
8
13
|
"gen-context": "./gen-context.js",
|
|
@@ -26,6 +31,7 @@
|
|
|
26
31
|
"gen-context.js",
|
|
27
32
|
"gen-project-map.js",
|
|
28
33
|
"src/",
|
|
34
|
+
"packages/",
|
|
29
35
|
"README.md",
|
|
30
36
|
"LICENSE",
|
|
31
37
|
"CHANGELOG.md",
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* sigmap-cli — thin CLI wrapper around sigmap-core.
|
|
5
|
+
*
|
|
6
|
+
* This module is required by the root gen-context.js entry point.
|
|
7
|
+
* All --flag handling lives here; business logic lives in src/ or packages/core.
|
|
8
|
+
*
|
|
9
|
+
* NOTE: This file intentionally does NOT duplicate business logic.
|
|
10
|
+
* It re-exports the entry-point function from gen-context.js so that
|
|
11
|
+
* `require('sigmap-cli')` can be used by tooling that wraps SigMap.
|
|
12
|
+
*
|
|
13
|
+
* In v2.4 the root gen-context.js is kept fully intact for backward compat.
|
|
14
|
+
* packages/cli is a forward-compat shim for the v3.0 adapter architecture.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const path = require('path');
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* The CLI entry point path.
|
|
21
|
+
* External tools can use this to spawn the CLI as a child process.
|
|
22
|
+
*/
|
|
23
|
+
const CLI_ENTRY = path.resolve(__dirname, '..', '..', 'gen-context.js');
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Run the SigMap CLI programmatically with the given argv array.
|
|
27
|
+
*
|
|
28
|
+
* @param {string[]} [argv] - Arguments to pass (default: process.argv)
|
|
29
|
+
* @param {string} [cwd] - Working directory (default: process.cwd())
|
|
30
|
+
* @returns {void}
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* const { run } = require('sigmap-cli');
|
|
34
|
+
* run(['--report'], '/path/to/project');
|
|
35
|
+
*/
|
|
36
|
+
function run(argv, cwd) {
|
|
37
|
+
const origArgv = process.argv;
|
|
38
|
+
const origCwd = process.cwd();
|
|
39
|
+
|
|
40
|
+
if (cwd) {
|
|
41
|
+
try { process.chdir(cwd); } catch (_) {}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (argv) {
|
|
45
|
+
process.argv = [process.argv[0], CLI_ENTRY, ...argv];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
require(CLI_ENTRY);
|
|
50
|
+
} finally {
|
|
51
|
+
process.argv = origArgv;
|
|
52
|
+
if (cwd) {
|
|
53
|
+
try { process.chdir(origCwd); } catch (_) {}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
module.exports = {
|
|
59
|
+
/** Absolute path to the gen-context.js entry point */
|
|
60
|
+
CLI_ENTRY,
|
|
61
|
+
/** Run the SigMap CLI programmatically */
|
|
62
|
+
run,
|
|
63
|
+
};
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "sigmap-cli",
|
|
3
|
+
"version": "2.4.0",
|
|
4
|
+
"description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"sigmap",
|
|
8
|
+
"cli",
|
|
9
|
+
"ai-context",
|
|
10
|
+
"code-signatures"
|
|
11
|
+
],
|
|
12
|
+
"author": {
|
|
13
|
+
"name": "Manoj Mallick",
|
|
14
|
+
"url": "https://github.com/manojmallick"
|
|
15
|
+
},
|
|
16
|
+
"repository": {
|
|
17
|
+
"type": "git",
|
|
18
|
+
"url": "https://github.com/manojmallick/sigmap.git",
|
|
19
|
+
"directory": "packages/cli"
|
|
20
|
+
},
|
|
21
|
+
"homepage": "https://manojmallick.github.io/sigmap/",
|
|
22
|
+
"license": "MIT",
|
|
23
|
+
"engines": {
|
|
24
|
+
"node": ">=18.0.0"
|
|
25
|
+
}
|
|
26
|
+
}
|