sigmap 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/gen-context.js +267 -1
- package/package.json +1 -1
- package/src/eval/analyzer.js +221 -0
- package/src/mcp/server.js +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,26 @@ Format: [Semantic Versioning](https://semver.org/)
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [2.2.0] — 2026-04-06
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- **Diagnostics & analyze command** — `src/eval/analyzer.js`: per-file breakdown of signature count, token cost, extractor used, and test coverage status.
|
|
13
|
+
- **`--analyze` CLI flag** — prints a per-file table (File | Extractor | Sigs | Tokens | Covered) across all srcDirs; respects `exclude` config.
|
|
14
|
+
- **`--analyze --json` flag** — outputs the same breakdown as structured JSON (`{ files, totalSigs, totalTokens, slowFiles, fileCount }`).
|
|
15
|
+
- **`--analyze --slow` flag** — re-times each extractor and flags any file whose extraction takes >50ms in the table.
|
|
16
|
+
- **`--diagnose-extractors` CLI flag** — runs all 21 language extractors against `test/fixtures/` and compares output to `test/expected/`; exits non-zero if any extractor diverges, shows first diff line per failure.
|
|
17
|
+
- **`test/integration/analyze.test.js`** — 14 integration tests covering `analyzeFiles`, `formatAnalysisTable`, `formatAnalysisJSON`, and all four CLI flags.
|
|
18
|
+
|
|
19
|
+
### Validation gate
|
|
20
|
+
- 21/21 extractor tests passed
|
|
21
|
+
- All integration suites passed (19 suites, 19 passed, 0 failed — includes 14 new analyze tests)
|
|
22
|
+
- `node gen-context.js --version` → `2.2.0`
|
|
23
|
+
- `node gen-context.js --analyze` runs without error on SigMap repo
|
|
24
|
+
- `node gen-context.js --analyze --json` → valid JSON with required keys
|
|
25
|
+
- `node gen-context.js --diagnose-extractors` → exits 0 on SigMap repo
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
9
29
|
## [2.1.0] — 2026-04-05
|
|
10
30
|
|
|
11
31
|
### Added
|
package/gen-context.js
CHANGED
|
@@ -3630,6 +3630,143 @@ __factories["./src/eval/scorer"] = function(module, exports) {
|
|
|
3630
3630
|
module.exports = { hitAtK, reciprocalRank, precisionAtK, aggregate, firstRank };
|
|
3631
3631
|
};
|
|
3632
3632
|
|
|
3633
|
+
// ── ./src/eval/analyzer ──
|
|
3634
|
+
__factories["./src/eval/analyzer"] = function(module, exports) {
|
|
3635
|
+
'use strict';
|
|
3636
|
+
|
|
3637
|
+
const fs = require('fs');
|
|
3638
|
+
const path = require('path');
|
|
3639
|
+
|
|
3640
|
+
const EXT_MAP = {
|
|
3641
|
+
'.ts': 'typescript', '.tsx': 'typescript',
|
|
3642
|
+
'.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
|
|
3643
|
+
'.py': 'python', '.pyw': 'python',
|
|
3644
|
+
'.java': 'java',
|
|
3645
|
+
'.kt': 'kotlin', '.kts': 'kotlin',
|
|
3646
|
+
'.go': 'go',
|
|
3647
|
+
'.rs': 'rust',
|
|
3648
|
+
'.cs': 'csharp',
|
|
3649
|
+
'.cpp': 'cpp', '.c': 'cpp', '.h': 'cpp', '.hpp': 'cpp', '.cc': 'cpp',
|
|
3650
|
+
'.rb': 'ruby', '.rake': 'ruby',
|
|
3651
|
+
'.php': 'php',
|
|
3652
|
+
'.swift': 'swift',
|
|
3653
|
+
'.dart': 'dart',
|
|
3654
|
+
'.scala': 'scala', '.sc': 'scala',
|
|
3655
|
+
'.vue': 'vue',
|
|
3656
|
+
'.svelte': 'svelte',
|
|
3657
|
+
'.html': 'html', '.htm': 'html',
|
|
3658
|
+
'.css': 'css', '.scss': 'css', '.sass': 'css', '.less': 'css',
|
|
3659
|
+
'.yml': 'yaml', '.yaml': 'yaml',
|
|
3660
|
+
'.sh': 'shell', '.bash': 'shell', '.zsh': 'shell', '.fish': 'shell',
|
|
3661
|
+
};
|
|
3662
|
+
|
|
3663
|
+
function isDockerfile(name) { return name === 'Dockerfile' || name.startsWith('Dockerfile.'); }
|
|
3664
|
+
|
|
3665
|
+
function getExtractorName(filePath) {
|
|
3666
|
+
const base = path.basename(filePath);
|
|
3667
|
+
const ext = path.extname(base).toLowerCase();
|
|
3668
|
+
if (EXT_MAP[ext]) return EXT_MAP[ext];
|
|
3669
|
+
if (isDockerfile(base)) return 'dockerfile';
|
|
3670
|
+
return null;
|
|
3671
|
+
}
|
|
3672
|
+
|
|
3673
|
+
function tokenCount(sigs) {
|
|
3674
|
+
return Math.ceil(sigs.reduce((sum, s) => sum + s.length, 0) / 4);
|
|
3675
|
+
}
|
|
3676
|
+
|
|
3677
|
+
function hasCoverage(filePath, cwd) {
|
|
3678
|
+
const base = path.basename(filePath, path.extname(filePath));
|
|
3679
|
+
const testDirs = ['test', 'tests', '__tests__', 'spec'];
|
|
3680
|
+
for (const d of testDirs) {
|
|
3681
|
+
const abs = path.join(cwd, d);
|
|
3682
|
+
if (!fs.existsSync(abs)) continue;
|
|
3683
|
+
let entries;
|
|
3684
|
+
try { entries = fs.readdirSync(abs, { withFileTypes: true }); } catch (_) { continue; }
|
|
3685
|
+
for (const e of entries) { if (e.name.includes(base)) return true; }
|
|
3686
|
+
}
|
|
3687
|
+
return false;
|
|
3688
|
+
}
|
|
3689
|
+
|
|
3690
|
+
function analyzeFiles(files, cwd, opts) {
|
|
3691
|
+
const slow = (opts && opts.slow) || false;
|
|
3692
|
+
const slowMs = (opts && opts.slowMs) || 50;
|
|
3693
|
+
const maxSigs = (opts && opts.maxSigs) || 25;
|
|
3694
|
+
const stats = [];
|
|
3695
|
+
const cache = {};
|
|
3696
|
+
|
|
3697
|
+
for (const filePath of files) {
|
|
3698
|
+
const extractorName = getExtractorName(filePath);
|
|
3699
|
+
if (!extractorName) continue;
|
|
3700
|
+
if (!cache[extractorName]) {
|
|
3701
|
+
try { cache[extractorName] = __require(`./src/extractors/${extractorName}`); } catch (_) { cache[extractorName] = null; }
|
|
3702
|
+
}
|
|
3703
|
+
const extractor = cache[extractorName];
|
|
3704
|
+
if (!extractor || typeof extractor.extract !== 'function') continue;
|
|
3705
|
+
|
|
3706
|
+
let content;
|
|
3707
|
+
try { content = fs.readFileSync(filePath, 'utf8'); } catch (_) { continue; }
|
|
3708
|
+
|
|
3709
|
+
let sigs; let elapsedMs = 0;
|
|
3710
|
+
if (slow) {
|
|
3711
|
+
const t0 = Date.now();
|
|
3712
|
+
try { sigs = extractor.extract(content); } catch (_) { sigs = []; }
|
|
3713
|
+
elapsedMs = Date.now() - t0;
|
|
3714
|
+
} else {
|
|
3715
|
+
try { sigs = extractor.extract(content); } catch (_) { sigs = []; }
|
|
3716
|
+
}
|
|
3717
|
+
sigs = (Array.isArray(sigs) ? sigs : []).slice(0, maxSigs);
|
|
3718
|
+
|
|
3719
|
+
stats.push({
|
|
3720
|
+
file: path.relative(cwd, filePath),
|
|
3721
|
+
extractor: extractorName,
|
|
3722
|
+
sigs: sigs.length,
|
|
3723
|
+
tokens: tokenCount(sigs),
|
|
3724
|
+
covered: hasCoverage(filePath, cwd),
|
|
3725
|
+
elapsedMs: slow ? elapsedMs : undefined,
|
|
3726
|
+
slow: slow ? (elapsedMs > slowMs) : undefined,
|
|
3727
|
+
});
|
|
3728
|
+
}
|
|
3729
|
+
return stats;
|
|
3730
|
+
}
|
|
3731
|
+
|
|
3732
|
+
function formatAnalysisTable(stats, showSlow) {
|
|
3733
|
+
if (!stats || stats.length === 0) return '_(no files analyzed)_\n';
|
|
3734
|
+
const maxFile = Math.max(4, ...stats.map((s) => s.file.length));
|
|
3735
|
+
const header = showSlow
|
|
3736
|
+
? `| ${'File'.padEnd(maxFile)} | Sigs | Tokens | Extractor | Coverage | Elapsed |`
|
|
3737
|
+
: `| ${'File'.padEnd(maxFile)} | Sigs | Tokens | Extractor | Coverage |`;
|
|
3738
|
+
const sep = showSlow
|
|
3739
|
+
? `|${'-'.repeat(maxFile + 2)}|------|--------|-------------|------------|----------|`
|
|
3740
|
+
: `|${'-'.repeat(maxFile + 2)}|------|--------|-------------|------------|`;
|
|
3741
|
+
const rows = stats.map((s) => {
|
|
3742
|
+
const cov = s.covered ? '✓ tested ' : '✗ untested';
|
|
3743
|
+
const file = s.file.padEnd(maxFile);
|
|
3744
|
+
const ext = (s.extractor || '').padEnd(11);
|
|
3745
|
+
const base = `| ${file} | ${String(s.sigs).padStart(4)} | ${String(s.tokens).padStart(6)} | ${ext} | ${cov} |`;
|
|
3746
|
+
if (showSlow) {
|
|
3747
|
+
const ms = s.elapsedMs !== undefined ? `${s.elapsedMs}ms` : '';
|
|
3748
|
+
return `${base} ${ms.padStart(6)}${s.slow ? ' ⚠️' : ''} |`;
|
|
3749
|
+
}
|
|
3750
|
+
return base;
|
|
3751
|
+
});
|
|
3752
|
+
const totalSigs = stats.reduce((n, s) => n + s.sigs, 0);
|
|
3753
|
+
const totalTokens = stats.reduce((n, s) => n + s.tokens, 0);
|
|
3754
|
+
const slotFile = ''.padEnd(maxFile);
|
|
3755
|
+
const baseFoot = `| ${slotFile} | ${String(totalSigs).padStart(4)} | ${String(totalTokens).padStart(6)} | **Total** | |`;
|
|
3756
|
+
const footer = showSlow ? `${baseFoot} ${' '.padStart(8)} |` : baseFoot;
|
|
3757
|
+
return [header, sep, ...rows, sep, footer].join('\n') + '\n';
|
|
3758
|
+
}
|
|
3759
|
+
|
|
3760
|
+
function formatAnalysisJSON(stats) {
|
|
3761
|
+
const totalSigs = stats.reduce((n, s) => n + s.sigs, 0);
|
|
3762
|
+
const totalTokens = stats.reduce((n, s) => n + s.tokens, 0);
|
|
3763
|
+
const slowFiles = stats.filter((s) => s.slow).map((s) => ({ file: s.file, elapsedMs: s.elapsedMs }));
|
|
3764
|
+
return { files: stats, totalSigs, totalTokens, slowFiles, fileCount: stats.length };
|
|
3765
|
+
}
|
|
3766
|
+
|
|
3767
|
+
module.exports = { analyzeFiles, formatAnalysisTable, formatAnalysisJSON };
|
|
3768
|
+
};
|
|
3769
|
+
|
|
3633
3770
|
// ── ./src/eval/runner ──
|
|
3634
3771
|
__factories["./src/eval/runner"] = function(module, exports) {
|
|
3635
3772
|
'use strict';
|
|
@@ -3799,7 +3936,7 @@ const path = require('path');
|
|
|
3799
3936
|
const os = require('os');
|
|
3800
3937
|
const { execSync } = require('child_process');
|
|
3801
3938
|
|
|
3802
|
-
const VERSION = '2.
|
|
3939
|
+
const VERSION = '2.2.0';
|
|
3803
3940
|
const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
|
|
3804
3941
|
|
|
3805
3942
|
function requireSourceOrBundled(key) {
|
|
@@ -5008,6 +5145,10 @@ Usage:
|
|
|
5008
5145
|
node gen-context.js --benchmark Run retrieval benchmark (benchmarks/tasks/retrieval.jsonl)
|
|
5009
5146
|
node gen-context.js --benchmark --json Benchmark results as JSON
|
|
5010
5147
|
node gen-context.js --eval Alias for --benchmark
|
|
5148
|
+
node gen-context.js --analyze Per-file breakdown: sigs, tokens, extractor, coverage
|
|
5149
|
+
node gen-context.js --analyze --json Breakdown as JSON
|
|
5150
|
+
node gen-context.js --analyze --slow Re-time each extractor; flag files >50ms
|
|
5151
|
+
node gen-context.js --diagnose-extractors Run all 21 extractors vs fixtures; show pass/fail + diff
|
|
5011
5152
|
node gen-context.js --init Write example config + .contextignore scaffold
|
|
5012
5153
|
node gen-context.js --help Show this message
|
|
5013
5154
|
node gen-context.js --version Show version
|
|
@@ -5169,6 +5310,131 @@ function main() {
|
|
|
5169
5310
|
process.exit(0);
|
|
5170
5311
|
}
|
|
5171
5312
|
|
|
5313
|
+
if (args.includes('--analyze')) {
|
|
5314
|
+
try {
|
|
5315
|
+
const { analyzeFiles, formatAnalysisTable, formatAnalysisJSON } = requireSourceOrBundled('./src/eval/analyzer');
|
|
5316
|
+
const cfg = config || {};
|
|
5317
|
+
const srcDirs = cfg.srcDirs || DEFAULTS.srcDirs;
|
|
5318
|
+
const exclude = cfg.exclude || DEFAULTS.exclude;
|
|
5319
|
+
const slow = args.includes('--slow');
|
|
5320
|
+
|
|
5321
|
+
// Collect files (reuse existing file-walker if accessible, else inline)
|
|
5322
|
+
const allFiles = [];
|
|
5323
|
+
function walkForAnalyze(dir, depth) {
|
|
5324
|
+
if (depth > (cfg.maxDepth || 6)) return;
|
|
5325
|
+
let entries;
|
|
5326
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch (_) { return; }
|
|
5327
|
+
for (const e of entries) {
|
|
5328
|
+
if (exclude.some((x) => e.name === x || e.name.startsWith(x))) continue;
|
|
5329
|
+
const full = path.join(dir, e.name);
|
|
5330
|
+
if (e.isDirectory()) walkForAnalyze(full, depth + 1);
|
|
5331
|
+
else if (e.isFile()) allFiles.push(full);
|
|
5332
|
+
}
|
|
5333
|
+
}
|
|
5334
|
+
for (const sd of srcDirs) {
|
|
5335
|
+
const abs = path.join(cwd, sd);
|
|
5336
|
+
if (fs.existsSync(abs)) walkForAnalyze(abs, 0);
|
|
5337
|
+
}
|
|
5338
|
+
|
|
5339
|
+
const stats = analyzeFiles(allFiles, cwd, { slow, maxSigs: cfg.maxSigsPerFile || 25 });
|
|
5340
|
+
|
|
5341
|
+
if (args.includes('--json')) {
|
|
5342
|
+
process.stdout.write(JSON.stringify(formatAnalysisJSON(stats)) + '\n');
|
|
5343
|
+
} else {
|
|
5344
|
+
const table = formatAnalysisTable(stats, slow);
|
|
5345
|
+
process.stdout.write(table);
|
|
5346
|
+
}
|
|
5347
|
+
} catch (err) {
|
|
5348
|
+
console.error(`[sigmap] analyze error: ${err.message}`);
|
|
5349
|
+
process.exit(1);
|
|
5350
|
+
}
|
|
5351
|
+
process.exit(0);
|
|
5352
|
+
}
|
|
5353
|
+
|
|
5354
|
+
if (args.includes('--diagnose-extractors')) {
|
|
5355
|
+
try {
|
|
5356
|
+
const fixturesDir = path.join(cwd, 'test', 'fixtures');
|
|
5357
|
+
const expectedDir = path.join(cwd, 'test', 'expected');
|
|
5358
|
+
if (!fs.existsSync(fixturesDir) || !fs.existsSync(expectedDir)) {
|
|
5359
|
+
console.error('[sigmap] test/fixtures or test/expected not found — run from the SigMap repo root');
|
|
5360
|
+
process.exit(1);
|
|
5361
|
+
}
|
|
5362
|
+
|
|
5363
|
+
const EXT_TO_LANG = {
|
|
5364
|
+
'.ts': 'typescript', '.js': 'javascript', '.py': 'python',
|
|
5365
|
+
'.java': 'java', '.kt': 'kotlin', '.go': 'go', '.rs': 'rust',
|
|
5366
|
+
'.cs': 'csharp', '.cpp': 'cpp', '.rb': 'ruby', '.php': 'php',
|
|
5367
|
+
'.swift': 'swift', '.dart': 'dart', '.scala': 'scala',
|
|
5368
|
+
'.vue': 'vue', '.svelte': 'svelte', '.html': 'html',
|
|
5369
|
+
'.css': 'css', '.yml': 'yaml', '.sh': 'shell',
|
|
5370
|
+
};
|
|
5371
|
+
const SPECIAL = { 'Dockerfile': 'dockerfile' };
|
|
5372
|
+
|
|
5373
|
+
let passed = 0; let failed = 0;
|
|
5374
|
+
const entries = fs.readdirSync(fixturesDir).sort();
|
|
5375
|
+
|
|
5376
|
+
for (const filename of entries) {
|
|
5377
|
+
const ext = path.extname(filename).toLowerCase();
|
|
5378
|
+
const lang = EXT_TO_LANG[ext] || SPECIAL[filename];
|
|
5379
|
+
if (!lang) continue;
|
|
5380
|
+
|
|
5381
|
+
const fixturePath = path.join(fixturesDir, filename);
|
|
5382
|
+
const expectedPath = path.join(expectedDir, `${lang}.txt`);
|
|
5383
|
+
if (!fs.existsSync(expectedPath)) {
|
|
5384
|
+
console.log(` SKIP ${lang.padEnd(12)} (no expected file)`);
|
|
5385
|
+
continue;
|
|
5386
|
+
}
|
|
5387
|
+
|
|
5388
|
+
const src = fs.readFileSync(fixturePath, 'utf8');
|
|
5389
|
+
const expected = fs.readFileSync(expectedPath, 'utf8').trim();
|
|
5390
|
+
|
|
5391
|
+
let mod;
|
|
5392
|
+
try {
|
|
5393
|
+
mod = requireSourceOrBundled(`./src/extractors/${lang}`);
|
|
5394
|
+
} catch (e) {
|
|
5395
|
+
console.log(` ERROR ${lang.padEnd(12)} loader failed: ${e.message}`);
|
|
5396
|
+
failed++;
|
|
5397
|
+
continue;
|
|
5398
|
+
}
|
|
5399
|
+
|
|
5400
|
+
let actual;
|
|
5401
|
+
try {
|
|
5402
|
+
const sigs = mod.extract(src);
|
|
5403
|
+
actual = sigs.join('\n').trim();
|
|
5404
|
+
} catch (e) {
|
|
5405
|
+
console.log(` ERROR ${lang.padEnd(12)} extract() threw: ${e.message}`);
|
|
5406
|
+
failed++;
|
|
5407
|
+
continue;
|
|
5408
|
+
}
|
|
5409
|
+
|
|
5410
|
+
if (actual === expected) {
|
|
5411
|
+
console.log(` PASS ${lang}`);
|
|
5412
|
+
passed++;
|
|
5413
|
+
} else {
|
|
5414
|
+
console.log(` FAIL ${lang}`);
|
|
5415
|
+
// Show first diff line
|
|
5416
|
+
const aLines = actual.split('\n');
|
|
5417
|
+
const eLines = expected.split('\n');
|
|
5418
|
+
const maxLen = Math.max(aLines.length, eLines.length);
|
|
5419
|
+
for (let i = 0; i < maxLen; i++) {
|
|
5420
|
+
if (aLines[i] !== eLines[i]) {
|
|
5421
|
+
console.log(` expected: ${(eLines[i] || '(missing)').slice(0, 100)}`);
|
|
5422
|
+
console.log(` actual : ${(aLines[i] || '(missing)').slice(0, 100)}`);
|
|
5423
|
+
break;
|
|
5424
|
+
}
|
|
5425
|
+
}
|
|
5426
|
+
failed++;
|
|
5427
|
+
}
|
|
5428
|
+
}
|
|
5429
|
+
|
|
5430
|
+
console.log(`\n${passed} passed, ${failed} failed`);
|
|
5431
|
+
process.exit(failed > 0 ? 1 : 0);
|
|
5432
|
+
} catch (err) {
|
|
5433
|
+
console.error(`[sigmap] diagnose error: ${err.message}`);
|
|
5434
|
+
process.exit(1);
|
|
5435
|
+
}
|
|
5436
|
+
}
|
|
5437
|
+
|
|
5172
5438
|
if (args.includes('--report')) {
|
|
5173
5439
|
if (args.includes('--history')) {
|
|
5174
5440
|
try {
|
package/package.json
CHANGED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* SigMap file analyzer — per-file diagnostic statistics.
|
|
5
|
+
* Zero npm dependencies.
|
|
6
|
+
*
|
|
7
|
+
* Exports:
|
|
8
|
+
* analyzeFiles(files, cwd, opts) → stats[]
|
|
9
|
+
* formatAnalysisTable(stats) → markdown table string
|
|
10
|
+
* formatAnalysisJSON(stats) → plain object suitable for JSON.stringify
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
|
|
16
|
+
// Extension → extractor name (mirrors EXT_MAP in gen-context.js)
|
|
17
|
+
const EXT_MAP = {
|
|
18
|
+
'.ts': 'typescript', '.tsx': 'typescript',
|
|
19
|
+
'.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
|
|
20
|
+
'.py': 'python', '.pyw': 'python',
|
|
21
|
+
'.java': 'java',
|
|
22
|
+
'.kt': 'kotlin', '.kts': 'kotlin',
|
|
23
|
+
'.go': 'go',
|
|
24
|
+
'.rs': 'rust',
|
|
25
|
+
'.cs': 'csharp',
|
|
26
|
+
'.cpp': 'cpp', '.c': 'cpp', '.h': 'cpp', '.hpp': 'cpp', '.cc': 'cpp',
|
|
27
|
+
'.rb': 'ruby', '.rake': 'ruby',
|
|
28
|
+
'.php': 'php',
|
|
29
|
+
'.swift': 'swift',
|
|
30
|
+
'.dart': 'dart',
|
|
31
|
+
'.scala': 'scala', '.sc': 'scala',
|
|
32
|
+
'.vue': 'vue',
|
|
33
|
+
'.svelte': 'svelte',
|
|
34
|
+
'.html': 'html', '.htm': 'html',
|
|
35
|
+
'.css': 'css', '.scss': 'css', '.sass': 'css', '.less': 'css',
|
|
36
|
+
'.yml': 'yaml', '.yaml': 'yaml',
|
|
37
|
+
'.sh': 'shell', '.bash': 'shell', '.zsh': 'shell', '.fish': 'shell',
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
function isDockerfile(name) {
|
|
41
|
+
return name === 'Dockerfile' || name.startsWith('Dockerfile.');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function getExtractorName(filePath) {
|
|
45
|
+
const base = path.basename(filePath);
|
|
46
|
+
const ext = path.extname(base).toLowerCase();
|
|
47
|
+
if (EXT_MAP[ext]) return EXT_MAP[ext];
|
|
48
|
+
if (isDockerfile(base)) return 'dockerfile';
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** Rough token estimate: chars / 4 */
|
|
53
|
+
function tokenCount(sigs) {
|
|
54
|
+
return Math.ceil(sigs.reduce((sum, s) => sum + s.length, 0) / 4);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Check whether a test file exists for this source file by looking for
|
|
59
|
+
* *.test.* / *.spec.* patterns in the test/ directory tree.
|
|
60
|
+
*/
|
|
61
|
+
function hasCoverage(filePath, cwd) {
|
|
62
|
+
const rel = path.relative(cwd, filePath);
|
|
63
|
+
const base = path.basename(rel, path.extname(rel)); // e.g. "python"
|
|
64
|
+
const testDirs = ['test', 'tests', '__tests__', 'spec'];
|
|
65
|
+
for (const d of testDirs) {
|
|
66
|
+
const abs = path.join(cwd, d);
|
|
67
|
+
if (!fs.existsSync(abs)) continue;
|
|
68
|
+
// Walk only one depth for speed
|
|
69
|
+
let entries;
|
|
70
|
+
try { entries = fs.readdirSync(abs, { withFileTypes: true }); } catch (_) { continue; }
|
|
71
|
+
for (const e of entries) {
|
|
72
|
+
if (e.name.includes(base)) return true;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Load an extractor module from src/extractors/ relative to cwd.
|
|
80
|
+
* Falls back to requiring from the module directory itself.
|
|
81
|
+
*/
|
|
82
|
+
function loadExtractor(name, cwd) {
|
|
83
|
+
// Try repo-local src/extractors first (for projects that embed sigmap)
|
|
84
|
+
const local = path.join(cwd, 'src', 'extractors', `${name}.js`);
|
|
85
|
+
if (fs.existsSync(local)) {
|
|
86
|
+
try { return require(local); } catch (_) {}
|
|
87
|
+
}
|
|
88
|
+
// Then standard node resolution from the current package
|
|
89
|
+
try { return require(path.join(__dirname, '..', 'extractors', `${name}.js`)); } catch (_) {}
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Analyze a list of absolute file paths.
|
|
95
|
+
*
|
|
96
|
+
* @param {string[]} files - absolute paths to analyze
|
|
97
|
+
* @param {string} cwd - project root
|
|
98
|
+
* @param {object} [opts]
|
|
99
|
+
* @param {boolean} [opts.slow=false] - if true, measure extraction time per file
|
|
100
|
+
* @param {number} [opts.slowMs=50] - threshold (ms) before a file is "slow"
|
|
101
|
+
* @param {number} [opts.maxSigs=25] - max sigs per file
|
|
102
|
+
* @returns {object[]} array of per-file stat objects
|
|
103
|
+
*/
|
|
104
|
+
function analyzeFiles(files, cwd, opts) {
|
|
105
|
+
const slow = (opts && opts.slow) || false;
|
|
106
|
+
const slowMs = (opts && opts.slowMs) || 50;
|
|
107
|
+
const maxSigs = (opts && opts.maxSigs) || 25;
|
|
108
|
+
|
|
109
|
+
const stats = [];
|
|
110
|
+
const extractorCache = {};
|
|
111
|
+
|
|
112
|
+
for (const filePath of files) {
|
|
113
|
+
const extractorName = getExtractorName(filePath);
|
|
114
|
+
if (!extractorName) continue;
|
|
115
|
+
|
|
116
|
+
// Load extractor (cached)
|
|
117
|
+
if (!extractorCache[extractorName]) {
|
|
118
|
+
extractorCache[extractorName] = loadExtractor(extractorName, cwd);
|
|
119
|
+
}
|
|
120
|
+
const extractor = extractorCache[extractorName];
|
|
121
|
+
if (!extractor || typeof extractor.extract !== 'function') continue;
|
|
122
|
+
|
|
123
|
+
let content;
|
|
124
|
+
try { content = fs.readFileSync(filePath, 'utf8'); } catch (_) { continue; }
|
|
125
|
+
|
|
126
|
+
let sigs;
|
|
127
|
+
let elapsedMs = 0;
|
|
128
|
+
|
|
129
|
+
if (slow) {
|
|
130
|
+
const t0 = Date.now();
|
|
131
|
+
try { sigs = extractor.extract(content); } catch (_) { sigs = []; }
|
|
132
|
+
elapsedMs = Date.now() - t0;
|
|
133
|
+
} else {
|
|
134
|
+
try { sigs = extractor.extract(content); } catch (_) { sigs = []; }
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
sigs = (Array.isArray(sigs) ? sigs : []).slice(0, maxSigs);
|
|
138
|
+
|
|
139
|
+
const rel = path.relative(cwd, filePath);
|
|
140
|
+
const tokens = tokenCount(sigs);
|
|
141
|
+
const covered = hasCoverage(filePath, cwd);
|
|
142
|
+
const isSlow = slow && elapsedMs > slowMs;
|
|
143
|
+
|
|
144
|
+
stats.push({
|
|
145
|
+
file: rel,
|
|
146
|
+
extractor: extractorName,
|
|
147
|
+
sigs: sigs.length,
|
|
148
|
+
tokens,
|
|
149
|
+
covered,
|
|
150
|
+
elapsedMs: slow ? elapsedMs : undefined,
|
|
151
|
+
slow: slow ? isSlow : undefined,
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return stats;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Format stats as a markdown table.
|
|
160
|
+
*
|
|
161
|
+
* @param {object[]} stats - output of analyzeFiles()
|
|
162
|
+
* @param {boolean} showSlow - whether to include the Elapsed column
|
|
163
|
+
* @returns {string}
|
|
164
|
+
*/
|
|
165
|
+
function formatAnalysisTable(stats, showSlow) {
|
|
166
|
+
if (!stats || stats.length === 0) return '_(no files analyzed)_\n';
|
|
167
|
+
|
|
168
|
+
// Column widths
|
|
169
|
+
const maxFile = Math.max(4, ...stats.map((s) => s.file.length));
|
|
170
|
+
|
|
171
|
+
const header = showSlow
|
|
172
|
+
? `| ${'File'.padEnd(maxFile)} | Sigs | Tokens | Extractor | Coverage | Elapsed |`
|
|
173
|
+
: `| ${'File'.padEnd(maxFile)} | Sigs | Tokens | Extractor | Coverage |`;
|
|
174
|
+
|
|
175
|
+
const sep = showSlow
|
|
176
|
+
? `|${'-'.repeat(maxFile + 2)}|------|--------|-------------|------------|----------|`
|
|
177
|
+
: `|${'-'.repeat(maxFile + 2)}|------|--------|-------------|------------|`;
|
|
178
|
+
|
|
179
|
+
const rows = stats.map((s) => {
|
|
180
|
+
const cov = s.covered ? '✓ tested ' : '✗ untested';
|
|
181
|
+
const file = s.file.padEnd(maxFile);
|
|
182
|
+
const ext = (s.extractor || '').padEnd(11);
|
|
183
|
+
const base = `| ${file} | ${String(s.sigs).padStart(4)} | ${String(s.tokens).padStart(6)} | ${ext} | ${cov} |`;
|
|
184
|
+
if (showSlow) {
|
|
185
|
+
const ms = s.elapsedMs !== undefined ? `${s.elapsedMs}ms` : '';
|
|
186
|
+
const flag = s.slow ? ' ⚠️' : '';
|
|
187
|
+
return `${base} ${ms.padStart(6)}${flag} |`;
|
|
188
|
+
}
|
|
189
|
+
return base;
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
const totalSigs = stats.reduce((n, s) => n + s.sigs, 0);
|
|
193
|
+
const totalTokens = stats.reduce((n, s) => n + s.tokens, 0);
|
|
194
|
+
const slotFile = ''.padEnd(maxFile);
|
|
195
|
+
const baseFoot = `| ${slotFile} | ${String(totalSigs).padStart(4)} | ${String(totalTokens).padStart(6)} | **Total** | |`;
|
|
196
|
+
const footer = showSlow ? `${baseFoot} ${' '.padStart(8)} |` : baseFoot;
|
|
197
|
+
|
|
198
|
+
return [header, sep, ...rows, sep, footer].join('\n') + '\n';
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Format stats as a plain-object suitable for JSON.stringify.
|
|
203
|
+
*
|
|
204
|
+
* @param {object[]} stats
|
|
205
|
+
* @returns {object}
|
|
206
|
+
*/
|
|
207
|
+
function formatAnalysisJSON(stats) {
|
|
208
|
+
const totalSigs = stats.reduce((n, s) => n + s.sigs, 0);
|
|
209
|
+
const totalTokens = stats.reduce((n, s) => n + s.tokens, 0);
|
|
210
|
+
const slowFiles = stats.filter((s) => s.slow);
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
files: stats,
|
|
214
|
+
totalSigs,
|
|
215
|
+
totalTokens,
|
|
216
|
+
slowFiles: slowFiles.map((s) => ({ file: s.file, elapsedMs: s.elapsedMs })),
|
|
217
|
+
fileCount: stats.length,
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
module.exports = { analyzeFiles, formatAnalysisTable, formatAnalysisJSON };
|
package/src/mcp/server.js
CHANGED