sigmap 7.30.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/README.md +9 -9
- package/gen-context.js +581 -73
- package/gen-project-map.js +14 -6
- package/llms-full.txt +5 -5
- package/llms.txt +5 -5
- package/package.json +2 -1
- package/packages/cli/package.json +1 -1
- package/packages/core/package.json +1 -1
- package/src/eval/runner.js +9 -61
- package/src/evidence/pack.js +42 -8
- package/src/map/build-ci.js +91 -0
- package/src/map/config-manifest.js +101 -0
- package/src/map/env-schema.js +90 -0
- package/src/map/migrations.js +84 -0
- package/src/mcp/handlers.js +5 -1
- package/src/mcp/server.js +1 -1
- package/src/retrieval/bm25.js +122 -0
- package/src/retrieval/ranker.js +15 -1
package/gen-context.js
CHANGED
|
@@ -4136,6 +4136,7 @@ __factories["./src/eval/runner"] = function(module, exports) {
|
|
|
4136
4136
|
const fs = require('fs');
|
|
4137
4137
|
const path = require('path');
|
|
4138
4138
|
const { aggregate } = __require('./src/eval/scorer');
|
|
4139
|
+
const { bm25rank } = __require('./src/retrieval/bm25');
|
|
4139
4140
|
|
|
4140
4141
|
// ---------------------------------------------------------------------------
|
|
4141
4142
|
// Context file reader
|
|
@@ -4197,79 +4198,26 @@ __factories["./src/eval/runner"] = function(module, exports) {
|
|
|
4197
4198
|
}
|
|
4198
4199
|
|
|
4199
4200
|
// ---------------------------------------------------------------------------
|
|
4200
|
-
//
|
|
4201
|
+
// Identifier-aware BM25 ranking (v7.31; see src/retrieval/bm25.js and #395)
|
|
4201
4202
|
// ---------------------------------------------------------------------------
|
|
4202
4203
|
|
|
4203
|
-
|
|
4204
|
-
* Tokenize a query or signature into lower-case word tokens.
|
|
4205
|
-
* Splits on whitespace, punctuation, camelCase, and snake_case.
|
|
4206
|
-
* @param {string} text
|
|
4207
|
-
* @returns {string[]}
|
|
4208
|
-
*/
|
|
4209
|
-
function tokenize(text) {
|
|
4210
|
-
if (!text) return [];
|
|
4211
|
-
return text
|
|
4212
|
-
// split camelCase
|
|
4213
|
-
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
4214
|
-
// split snake/kebab
|
|
4215
|
-
.replace(/[_\-]/g, ' ')
|
|
4216
|
-
// drop non-word chars
|
|
4217
|
-
.replace(/[^\w\s]/g, ' ')
|
|
4218
|
-
.toLowerCase()
|
|
4219
|
-
.split(/\s+/)
|
|
4220
|
-
.filter((t) => t.length > 1);
|
|
4221
|
-
}
|
|
4222
|
-
|
|
4223
|
-
const STOP_WORDS = new Set([
|
|
4224
|
-
'the', 'a', 'an', 'in', 'of', 'to', 'for', 'and', 'or', 'is', 'are',
|
|
4225
|
-
'that', 'this', 'it', 'with', 'from', 'by', 'be', 'as', 'on', 'at',
|
|
4226
|
-
]);
|
|
4204
|
+
const { tokenize } = __require('./src/retrieval/bm25');
|
|
4227
4205
|
|
|
4228
4206
|
/**
|
|
4229
|
-
*
|
|
4230
|
-
* Returns
|
|
4231
|
-
*
|
|
4232
|
-
* @param {string[]} queryTokens
|
|
4233
|
-
* @returns {number}
|
|
4234
|
-
*/
|
|
4235
|
-
function scoreFile(sigs, queryTokens) {
|
|
4236
|
-
if (!sigs || sigs.length === 0) return 0;
|
|
4237
|
-
|
|
4238
|
-
const sigText = sigs.join(' ');
|
|
4239
|
-
const sigTokens = new Set(tokenize(sigText));
|
|
4240
|
-
|
|
4241
|
-
let score = 0;
|
|
4242
|
-
for (const qt of queryTokens) {
|
|
4243
|
-
if (STOP_WORDS.has(qt)) continue;
|
|
4244
|
-
if (sigTokens.has(qt)) score += 1;
|
|
4245
|
-
// Partial match (prefix)
|
|
4246
|
-
for (const st of sigTokens) {
|
|
4247
|
-
if (st !== qt && st.startsWith(qt) && qt.length >= 4) score += 0.3;
|
|
4248
|
-
}
|
|
4249
|
-
}
|
|
4250
|
-
|
|
4251
|
-
return score;
|
|
4252
|
-
}
|
|
4253
|
-
|
|
4254
|
-
/**
|
|
4255
|
-
* Rank all files in the index against a query. Returns file paths sorted
|
|
4256
|
-
* by relevance score descending. Ties are broken by file path alphabetically.
|
|
4207
|
+
* Rank all files in the index against a query with the identifier-aware BM25
|
|
4208
|
+
* re-ranker. Returns file entries sorted by relevance score descending; ties
|
|
4209
|
+
* are broken by file path alphabetically (deterministic).
|
|
4257
4210
|
* @param {string} query
|
|
4258
4211
|
* @param {Map<string, string[]>} index
|
|
4259
4212
|
* @param {number} topK
|
|
4260
4213
|
* @returns {{ file: string, score: number, sigs: string[] }[]}
|
|
4261
4214
|
*/
|
|
4262
4215
|
function rank(query, index, topK = 10) {
|
|
4263
|
-
const
|
|
4264
|
-
const scored = [];
|
|
4265
|
-
|
|
4216
|
+
const candidates = [];
|
|
4266
4217
|
for (const [file, sigs] of index.entries()) {
|
|
4267
|
-
|
|
4268
|
-
scored.push({ file, score, sigs });
|
|
4218
|
+
candidates.push({ file, sigs });
|
|
4269
4219
|
}
|
|
4270
|
-
|
|
4271
|
-
scored.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
|
|
4272
|
-
return scored.slice(0, topK);
|
|
4220
|
+
return bm25rank(query, candidates).slice(0, topK);
|
|
4273
4221
|
}
|
|
4274
4222
|
|
|
4275
4223
|
// ---------------------------------------------------------------------------
|
|
@@ -4661,7 +4609,14 @@ __factories["./src/evidence/pack"] = function(module, exports) {
|
|
|
4661
4609
|
const GENERATED_RE = /(^|\/)(dist|build|out|vendor|node_modules)\/|\.(generated|min|bundle)\.|\.(pb|_pb)\.|\.pb\.go$|_pb2\.py$/;
|
|
4662
4610
|
const TEST_RE = /(^|\/)(tests?|__tests__|spec|specs)\/|\.(test|spec)\.[a-z]+$|(^|\/)test_[^/]+\.py$|_test\.(go|py|rb)$/;
|
|
4663
4611
|
const CONFIG_RE = /\.(json|ya?ml|toml|ini|conf|config|properties|env)$|(^|\/)(\.?[a-z]+rc)$|\.config\.[a-z]+$/i;
|
|
4664
|
-
|
|
4612
|
+
// DB migrations: framework dirs (Rails/Alembic/Prisma), Flyway `V1__x.sql`,
|
|
4613
|
+
// timestamped migration files, and `*_migration.*` naming.
|
|
4614
|
+
const MIGRATION_RE = /(^|\/)(migrations?|alembic\/versions|prisma\/migrations)(\/|$)|(^|\/)db\/migrate\/|(^|\/)V\d+(_\d+)*__[^/]+\.(sql|java)$|(^|\/)\d{8,}[_-][^/]+\.(sql|rb|py|js|ts)$|[._-]migration[s]?[._-]/i;
|
|
4615
|
+
const PAYMENT_RE = /(^|\/|[._-])(payment|payments|billing|checkout|invoice|invoicing|subscription|stripe|paypal|braintree|charge|refund|payout)([._-]|\/|$)/i;
|
|
4616
|
+
const AUTH_RE = /(^|\/|[._-])(auth|authn|authz|login|logout|signin|signup|password|passwd|session|oauth|jwt|permission|permissions|acl|rbac|credential|credentials)([._-]|\/|$)/i;
|
|
4617
|
+
const SECURITY_RE = /(^|\/|[._-])(secret|secrets|crypto|cipher|encrypt|decrypt|token|signing|keystore|vault)([._-]|\/|$)/i;
|
|
4618
|
+
// Public API surface: `api/` dirs, `public-api`, and module barrel entrypoints.
|
|
4619
|
+
const PUBLIC_API_RE = /(^|\/)api(\/|$)|(^|\/)public[-_]?api(\/|$)|(^|\/)index\.(js|ts|mjs|cjs)$/i;
|
|
4665
4620
|
|
|
4666
4621
|
/**
|
|
4667
4622
|
* Split a signature's ` :start-end` line anchor from its symbol text.
|
|
@@ -4679,17 +4634,25 @@ __factories["./src/evidence/pack"] = function(module, exports) {
|
|
|
4679
4634
|
}
|
|
4680
4635
|
|
|
4681
4636
|
/**
|
|
4682
|
-
* Classify a file into a
|
|
4683
|
-
*
|
|
4637
|
+
* Classify a file into a risk label (C3, v8.5). Path-based, deterministic.
|
|
4638
|
+
* Precedence is strict, most-specific-risk first: a migration touching payments
|
|
4639
|
+
* is labeled `migration` (a schema change is the dominant risk), payment/auth
|
|
4640
|
+
* outrank the generic `security` bucket, and `config`/`public-api` resolve
|
|
4641
|
+
* before the `source` fallback. `test`/`generated` semantics are preserved so
|
|
4642
|
+
* existing consumers (findRelatedTests, verifier) keep working.
|
|
4684
4643
|
* @param {string} relPath
|
|
4685
|
-
* @returns {'generated'|'test'|'
|
|
4644
|
+
* @returns {'generated'|'test'|'migration'|'payment'|'auth'|'security'|'config'|'public-api'|'source'}
|
|
4686
4645
|
*/
|
|
4687
4646
|
function riskLabelFor(relPath) {
|
|
4688
4647
|
const p = relPath.replace(/\\/g, '/');
|
|
4689
4648
|
if (GENERATED_RE.test(p)) return 'generated';
|
|
4690
4649
|
if (TEST_RE.test(p)) return 'test';
|
|
4650
|
+
if (MIGRATION_RE.test(p)) return 'migration';
|
|
4651
|
+
if (PAYMENT_RE.test(p)) return 'payment';
|
|
4652
|
+
if (AUTH_RE.test(p)) return 'auth';
|
|
4691
4653
|
if (SECURITY_RE.test(p)) return 'security';
|
|
4692
4654
|
if (CONFIG_RE.test(p)) return 'config';
|
|
4655
|
+
if (PUBLIC_API_RE.test(p)) return 'public-api';
|
|
4693
4656
|
return 'source';
|
|
4694
4657
|
}
|
|
4695
4658
|
|
|
@@ -4700,9 +4663,28 @@ __factories["./src/evidence/pack"] = function(module, exports) {
|
|
|
4700
4663
|
}
|
|
4701
4664
|
|
|
4702
4665
|
/**
|
|
4703
|
-
*
|
|
4704
|
-
*
|
|
4705
|
-
*
|
|
4666
|
+
* Infer the implementation stem a test file targets, by stripping the
|
|
4667
|
+
* conventional test affixes across languages (measured in the C2 benchmark):
|
|
4668
|
+
* foo.test.js / foo.spec.ts → foo (JS/TS)
|
|
4669
|
+
* test_foo.py → foo (Python / pytest)
|
|
4670
|
+
* foo_test.go / foo_test.py → foo (Go, unittest)
|
|
4671
|
+
* FooTest.java / BarSpec.scala → Foo (JVM, PascalCase)
|
|
4672
|
+
* @param {string} relPath
|
|
4673
|
+
* @returns {string}
|
|
4674
|
+
*/
|
|
4675
|
+
function testTargetStem(relPath) {
|
|
4676
|
+
let s = stemOf(relPath); // strips ext + trailing .test/.spec
|
|
4677
|
+
s = s.replace(/^test[_-]/i, ''); // Python: test_foo
|
|
4678
|
+
s = s.replace(/[_-]test$/i, ''); // Go / unittest: foo_test
|
|
4679
|
+
s = s.replace(/(Tests?|Specs?)$/, ''); // JVM PascalCase: FooTest, BarSpec
|
|
4680
|
+
return s;
|
|
4681
|
+
}
|
|
4682
|
+
|
|
4683
|
+
/**
|
|
4684
|
+
* Impl→test discovery (C2, v8.5). Matches test files back to their
|
|
4685
|
+
* implementation by normalizing conventional test affixes, so JS/TS, Python,
|
|
4686
|
+
* Go, and JVM naming conventions all resolve. Deterministic; accuracy is
|
|
4687
|
+
* measured by `scripts/run-test-discovery-benchmark.mjs`.
|
|
4706
4688
|
* @param {string} relPath
|
|
4707
4689
|
* @param {string[]} allFiles - universe of indexed files (relative paths)
|
|
4708
4690
|
* @returns {string[]}
|
|
@@ -4715,7 +4697,7 @@ __factories["./src/evidence/pack"] = function(module, exports) {
|
|
|
4715
4697
|
for (const f of allFiles) {
|
|
4716
4698
|
if (f === relPath) continue;
|
|
4717
4699
|
if (riskLabelFor(f) !== 'test') continue;
|
|
4718
|
-
if (
|
|
4700
|
+
if (testTargetStem(f).toLowerCase() === stem) out.push(f);
|
|
4719
4701
|
}
|
|
4720
4702
|
return out.sort();
|
|
4721
4703
|
}
|
|
@@ -11231,6 +11213,101 @@ __factories["./src/learning/weights"] = function(module, exports) {
|
|
|
11231
11213
|
|
|
11232
11214
|
};
|
|
11233
11215
|
|
|
11216
|
+
// ── ./src/map/build-ci ──
|
|
11217
|
+
__factories["./src/map/build-ci"] = function(module, exports) {
|
|
11218
|
+
|
|
11219
|
+
/**
|
|
11220
|
+
* Build & CI extractor (v8.5 C1).
|
|
11221
|
+
*
|
|
11222
|
+
* Surfaces how the project is built and validated: npm/pnpm/yarn scripts
|
|
11223
|
+
* (package.json), GitHub Actions workflows (.github/workflows/*.yml), and
|
|
11224
|
+
* Makefile targets. Pure, zero-dependency, deterministic.
|
|
11225
|
+
*
|
|
11226
|
+
* @param {string[]} files — absolute file paths (unused; roots are read directly)
|
|
11227
|
+
* @param {string} cwd — project root
|
|
11228
|
+
* @returns {string} formatted markdown table (empty string if none found)
|
|
11229
|
+
*/
|
|
11230
|
+
|
|
11231
|
+
const fs = require('fs');
|
|
11232
|
+
const path = require('path');
|
|
11233
|
+
|
|
11234
|
+
const MAX_ROWS = 120;
|
|
11235
|
+
|
|
11236
|
+
function readJson(p) {
|
|
11237
|
+
try { return JSON.parse(fs.readFileSync(p, 'utf8')); } catch (_) { return null; }
|
|
11238
|
+
}
|
|
11239
|
+
|
|
11240
|
+
function npmScripts(cwd, rows) {
|
|
11241
|
+
const pkg = readJson(path.join(cwd, 'package.json'));
|
|
11242
|
+
if (!pkg || !pkg.scripts || typeof pkg.scripts !== 'object') return;
|
|
11243
|
+
for (const name of Object.keys(pkg.scripts).sort()) {
|
|
11244
|
+
rows.push({ kind: 'script', name, detail: 'npm run ' + name });
|
|
11245
|
+
}
|
|
11246
|
+
}
|
|
11247
|
+
|
|
11248
|
+
function ciWorkflows(cwd, rows) {
|
|
11249
|
+
const dir = path.join(cwd, '.github', 'workflows');
|
|
11250
|
+
let entries;
|
|
11251
|
+
try { entries = fs.readdirSync(dir); } catch (_) { return; }
|
|
11252
|
+
for (const file of entries.sort()) {
|
|
11253
|
+
if (!/\.ya?ml$/i.test(file)) continue;
|
|
11254
|
+
let content;
|
|
11255
|
+
try { content = fs.readFileSync(path.join(dir, file), 'utf8'); } catch (_) { continue; }
|
|
11256
|
+
const nameMatch = content.match(/^name:\s*(.+)$/m);
|
|
11257
|
+
const name = nameMatch ? nameMatch[1].trim().replace(/^['"]|['"]$/g, '') : file;
|
|
11258
|
+
// Trigger events from an `on:` mapping or inline form.
|
|
11259
|
+
const onMatch = content.match(/^on:\s*(.*)$/m);
|
|
11260
|
+
let triggers = '';
|
|
11261
|
+
if (onMatch) {
|
|
11262
|
+
if (onMatch[1].trim()) {
|
|
11263
|
+
triggers = onMatch[1].replace(/[[\]{}'"]/g, '').trim();
|
|
11264
|
+
} else {
|
|
11265
|
+
const block = content.slice(onMatch.index);
|
|
11266
|
+
const events = [...block.matchAll(/^\s{2,}([a-z_]+):/gm)].map((m) => m[1]);
|
|
11267
|
+
triggers = [...new Set(events)].slice(0, 6).join(', ');
|
|
11268
|
+
}
|
|
11269
|
+
}
|
|
11270
|
+
rows.push({ kind: 'ci', name, detail: `${file}${triggers ? ' — ' + triggers : ''}` });
|
|
11271
|
+
}
|
|
11272
|
+
}
|
|
11273
|
+
|
|
11274
|
+
function makeTargets(cwd, rows) {
|
|
11275
|
+
let content;
|
|
11276
|
+
try { content = fs.readFileSync(path.join(cwd, 'Makefile'), 'utf8'); } catch (_) { return; }
|
|
11277
|
+
const targets = [];
|
|
11278
|
+
for (const line of content.split('\n')) {
|
|
11279
|
+
const m = line.match(/^([a-zA-Z0-9_][a-zA-Z0-9_.-]*)\s*:(?!=)/);
|
|
11280
|
+
if (m && m[1] !== '.PHONY') targets.push(m[1]);
|
|
11281
|
+
}
|
|
11282
|
+
for (const t of [...new Set(targets)].sort()) {
|
|
11283
|
+
rows.push({ kind: 'make', name: t, detail: 'make ' + t });
|
|
11284
|
+
}
|
|
11285
|
+
}
|
|
11286
|
+
|
|
11287
|
+
function analyze(files, cwd) {
|
|
11288
|
+
const rows = [];
|
|
11289
|
+
npmScripts(cwd, rows);
|
|
11290
|
+
ciWorkflows(cwd, rows);
|
|
11291
|
+
makeTargets(cwd, rows);
|
|
11292
|
+
if (rows.length === 0) return '';
|
|
11293
|
+
|
|
11294
|
+
const lines = [
|
|
11295
|
+
'| Kind | Name | Detail |',
|
|
11296
|
+
'|------|------|--------|',
|
|
11297
|
+
];
|
|
11298
|
+
for (const r of rows.slice(0, MAX_ROWS)) {
|
|
11299
|
+
lines.push(`| ${r.kind} | ${r.name} | ${r.detail} |`);
|
|
11300
|
+
}
|
|
11301
|
+
if (rows.length > MAX_ROWS) {
|
|
11302
|
+
lines.push(`| … | | +${rows.length - MAX_ROWS} more |`);
|
|
11303
|
+
}
|
|
11304
|
+
return lines.join('\n');
|
|
11305
|
+
}
|
|
11306
|
+
|
|
11307
|
+
module.exports = { analyze };
|
|
11308
|
+
|
|
11309
|
+
};
|
|
11310
|
+
|
|
11234
11311
|
// ── ./src/map/class-hierarchy ──
|
|
11235
11312
|
__factories["./src/map/class-hierarchy"] = function(module, exports) {
|
|
11236
11313
|
|
|
@@ -11352,6 +11429,205 @@ __factories["./src/map/class-hierarchy"] = function(module, exports) {
|
|
|
11352
11429
|
|
|
11353
11430
|
};
|
|
11354
11431
|
|
|
11432
|
+
// ── ./src/map/config-manifest ──
|
|
11433
|
+
__factories["./src/map/config-manifest"] = function(module, exports) {
|
|
11434
|
+
|
|
11435
|
+
/**
|
|
11436
|
+
* Config & package-manifest extractor (v8.5 C1).
|
|
11437
|
+
*
|
|
11438
|
+
* Surfaces the project's package manifests (name / version / dependency counts)
|
|
11439
|
+
* across ecosystems and the notable root config files present. Pure,
|
|
11440
|
+
* zero-dependency, deterministic.
|
|
11441
|
+
*
|
|
11442
|
+
* @param {string[]} files — absolute file paths (unused; roots are read directly)
|
|
11443
|
+
* @param {string} cwd — project root
|
|
11444
|
+
* @returns {string} formatted markdown table (empty string if none found)
|
|
11445
|
+
*/
|
|
11446
|
+
|
|
11447
|
+
const fs = require('fs');
|
|
11448
|
+
const path = require('path');
|
|
11449
|
+
|
|
11450
|
+
const CONFIG_FILES = [
|
|
11451
|
+
'tsconfig.json', 'jsconfig.json', '.eslintrc', '.eslintrc.json', '.eslintrc.js',
|
|
11452
|
+
'.prettierrc', 'babel.config.js', 'jest.config.js', 'vitest.config.ts',
|
|
11453
|
+
'webpack.config.js', 'vite.config.ts', 'rollup.config.js', 'tailwind.config.js',
|
|
11454
|
+
'docker-compose.yml', 'docker-compose.yaml', 'Dockerfile', '.editorconfig',
|
|
11455
|
+
];
|
|
11456
|
+
|
|
11457
|
+
function readText(p) { try { return fs.readFileSync(p, 'utf8'); } catch (_) { return null; } }
|
|
11458
|
+
function readJson(p) { try { return JSON.parse(fs.readFileSync(p, 'utf8')); } catch (_) { return null; } }
|
|
11459
|
+
function count(obj) { return obj && typeof obj === 'object' ? Object.keys(obj).length : 0; }
|
|
11460
|
+
|
|
11461
|
+
function manifests(cwd, rows) {
|
|
11462
|
+
const pkg = readJson(path.join(cwd, 'package.json'));
|
|
11463
|
+
if (pkg) {
|
|
11464
|
+
const deps = count(pkg.dependencies);
|
|
11465
|
+
const dev = count(pkg.devDependencies);
|
|
11466
|
+
const id = [pkg.name, pkg.version].filter(Boolean).join('@') || 'package.json';
|
|
11467
|
+
rows.push({ manifest: 'package.json (npm)', detail: `${id} · ${deps} deps, ${dev} devDeps` });
|
|
11468
|
+
}
|
|
11469
|
+
|
|
11470
|
+
const pyproject = readText(path.join(cwd, 'pyproject.toml'));
|
|
11471
|
+
if (pyproject) {
|
|
11472
|
+
const name = (pyproject.match(/^\s*name\s*=\s*["']([^"']+)["']/m) || [])[1];
|
|
11473
|
+
const ver = (pyproject.match(/^\s*version\s*=\s*["']([^"']+)["']/m) || [])[1];
|
|
11474
|
+
rows.push({ manifest: 'pyproject.toml (python)', detail: [name, ver].filter(Boolean).join('@') || 'present' });
|
|
11475
|
+
} else if (readText(path.join(cwd, 'setup.py'))) {
|
|
11476
|
+
rows.push({ manifest: 'setup.py (python)', detail: 'present' });
|
|
11477
|
+
}
|
|
11478
|
+
if (readText(path.join(cwd, 'requirements.txt'))) {
|
|
11479
|
+
rows.push({ manifest: 'requirements.txt (python)', detail: 'present' });
|
|
11480
|
+
}
|
|
11481
|
+
|
|
11482
|
+
const cargo = readText(path.join(cwd, 'Cargo.toml'));
|
|
11483
|
+
if (cargo) {
|
|
11484
|
+
const name = (cargo.match(/^\s*name\s*=\s*["']([^"']+)["']/m) || [])[1];
|
|
11485
|
+
const ver = (cargo.match(/^\s*version\s*=\s*["']([^"']+)["']/m) || [])[1];
|
|
11486
|
+
rows.push({ manifest: 'Cargo.toml (rust)', detail: [name, ver].filter(Boolean).join('@') || 'present' });
|
|
11487
|
+
}
|
|
11488
|
+
|
|
11489
|
+
const gomod = readText(path.join(cwd, 'go.mod'));
|
|
11490
|
+
if (gomod) {
|
|
11491
|
+
const mod = (gomod.match(/^module\s+(\S+)/m) || [])[1];
|
|
11492
|
+
const go = (gomod.match(/^go\s+(\S+)/m) || [])[1];
|
|
11493
|
+
rows.push({ manifest: 'go.mod (go)', detail: [mod, go && 'go ' + go].filter(Boolean).join(' · ') || 'present' });
|
|
11494
|
+
}
|
|
11495
|
+
|
|
11496
|
+
if (readText(path.join(cwd, 'pom.xml'))) rows.push({ manifest: 'pom.xml (maven)', detail: 'present' });
|
|
11497
|
+
if (readText(path.join(cwd, 'build.gradle')) || readText(path.join(cwd, 'build.gradle.kts'))) {
|
|
11498
|
+
rows.push({ manifest: 'build.gradle (gradle)', detail: 'present' });
|
|
11499
|
+
}
|
|
11500
|
+
if (readText(path.join(cwd, 'Gemfile'))) rows.push({ manifest: 'Gemfile (ruby)', detail: 'present' });
|
|
11501
|
+
const composer = readJson(path.join(cwd, 'composer.json'));
|
|
11502
|
+
if (composer) {
|
|
11503
|
+
rows.push({ manifest: 'composer.json (php)', detail: `${composer.name || 'present'} · ${count(composer.require)} deps` });
|
|
11504
|
+
}
|
|
11505
|
+
}
|
|
11506
|
+
|
|
11507
|
+
function configFiles(cwd) {
|
|
11508
|
+
const present = [];
|
|
11509
|
+
for (const f of CONFIG_FILES) {
|
|
11510
|
+
if (fs.existsSync(path.join(cwd, f))) present.push(f);
|
|
11511
|
+
}
|
|
11512
|
+
return present;
|
|
11513
|
+
}
|
|
11514
|
+
|
|
11515
|
+
function analyze(files, cwd) {
|
|
11516
|
+
const rows = [];
|
|
11517
|
+
manifests(cwd, rows);
|
|
11518
|
+
const configs = configFiles(cwd);
|
|
11519
|
+
if (rows.length === 0 && configs.length === 0) return '';
|
|
11520
|
+
|
|
11521
|
+
const lines = [];
|
|
11522
|
+
if (rows.length) {
|
|
11523
|
+
lines.push('| Manifest | Detail |', '|----------|--------|');
|
|
11524
|
+
for (const r of rows) lines.push(`| ${r.manifest} | ${r.detail} |`);
|
|
11525
|
+
}
|
|
11526
|
+
if (configs.length) {
|
|
11527
|
+
if (lines.length) lines.push('');
|
|
11528
|
+
lines.push(`**Config files:** ${configs.map((c) => '`' + c + '`').join(', ')}`);
|
|
11529
|
+
}
|
|
11530
|
+
return lines.join('\n');
|
|
11531
|
+
}
|
|
11532
|
+
|
|
11533
|
+
module.exports = { analyze };
|
|
11534
|
+
|
|
11535
|
+
};
|
|
11536
|
+
|
|
11537
|
+
// ── ./src/map/env-schema ──
|
|
11538
|
+
__factories["./src/map/env-schema"] = function(module, exports) {
|
|
11539
|
+
|
|
11540
|
+
/**
|
|
11541
|
+
* Environment-variable schema extractor (v8.5 C1).
|
|
11542
|
+
*
|
|
11543
|
+
* Surfaces the environment the project actually reads — from source across
|
|
11544
|
+
* JS/TS, Python, Ruby, and Go, plus keys declared in a committed `.env.example`
|
|
11545
|
+
* / `.env.sample` / `.env.template`. Pure, zero-dependency, deterministic.
|
|
11546
|
+
*
|
|
11547
|
+
* @param {string[]} files — absolute file paths to analyze (srcDirs-scoped)
|
|
11548
|
+
* @param {string} cwd — project root
|
|
11549
|
+
* @returns {string} formatted markdown table (empty string if none found)
|
|
11550
|
+
*/
|
|
11551
|
+
|
|
11552
|
+
const fs = require('fs');
|
|
11553
|
+
const path = require('path');
|
|
11554
|
+
|
|
11555
|
+
const SCAN_EXTS = new Set(['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.py', '.rb', '.go']);
|
|
11556
|
+
const EXAMPLE_FILES = ['.env.example', '.env.sample', '.env.template', '.env.dist'];
|
|
11557
|
+
|
|
11558
|
+
// process.env.X / process.env['X'] / import.meta.env.X / Deno.env.get('X')
|
|
11559
|
+
const JS_RE = /(?:process\.env|import\.meta\.env)(?:\.([A-Z_][A-Z0-9_]*)|\[\s*['"]([A-Z_][A-Z0-9_]*)['"]\s*\])|Deno\.env\.get\(\s*['"]([A-Z_][A-Z0-9_]*)['"]/g;
|
|
11560
|
+
// os.environ['X'] / os.environ.get('X') / os.getenv('X') / getenv('X')
|
|
11561
|
+
const PY_RE = /(?:os\.)?(?:environ(?:\.get)?\[?\s*['"]([A-Z_][A-Z0-9_]*)['"]|getenv\(\s*['"]([A-Z_][A-Z0-9_]*)['"])/g;
|
|
11562
|
+
const RB_RE = /ENV\[\s*['"]([A-Z_][A-Z0-9_]*)['"]\s*\]/g;
|
|
11563
|
+
const GO_RE = /os\.(?:Getenv|LookupEnv)\(\s*["`']([A-Z_][A-Z0-9_]*)["`']/g;
|
|
11564
|
+
|
|
11565
|
+
const MAX_ROWS = 200;
|
|
11566
|
+
|
|
11567
|
+
function collectMatches(re, content, into) {
|
|
11568
|
+
let m;
|
|
11569
|
+
re.lastIndex = 0;
|
|
11570
|
+
while ((m = re.exec(content)) !== null) {
|
|
11571
|
+
const name = m[1] || m[2] || m[3];
|
|
11572
|
+
if (name) into.add(name);
|
|
11573
|
+
}
|
|
11574
|
+
}
|
|
11575
|
+
|
|
11576
|
+
function readExampleKeys(cwd) {
|
|
11577
|
+
const keys = new Set();
|
|
11578
|
+
for (const name of EXAMPLE_FILES) {
|
|
11579
|
+
let content;
|
|
11580
|
+
try { content = fs.readFileSync(path.join(cwd, name), 'utf8'); } catch (_) { continue; }
|
|
11581
|
+
for (const line of content.split('\n')) {
|
|
11582
|
+
const t = line.trim();
|
|
11583
|
+
if (!t || t.startsWith('#')) continue;
|
|
11584
|
+
const eq = t.match(/^(?:export\s+)?([A-Z_][A-Z0-9_]*)\s*=/);
|
|
11585
|
+
if (eq) keys.add(eq[1]);
|
|
11586
|
+
}
|
|
11587
|
+
}
|
|
11588
|
+
return keys;
|
|
11589
|
+
}
|
|
11590
|
+
|
|
11591
|
+
function analyze(files, cwd) {
|
|
11592
|
+
const fromCode = new Set();
|
|
11593
|
+
|
|
11594
|
+
for (const filePath of files) {
|
|
11595
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
11596
|
+
if (!SCAN_EXTS.has(ext)) continue;
|
|
11597
|
+
let content;
|
|
11598
|
+
try { content = fs.readFileSync(filePath, 'utf8'); } catch (_) { continue; }
|
|
11599
|
+
|
|
11600
|
+
if (ext === '.py') collectMatches(PY_RE, content, fromCode);
|
|
11601
|
+
else if (ext === '.rb') collectMatches(RB_RE, content, fromCode);
|
|
11602
|
+
else if (ext === '.go') collectMatches(GO_RE, content, fromCode);
|
|
11603
|
+
else collectMatches(JS_RE, content, fromCode);
|
|
11604
|
+
}
|
|
11605
|
+
|
|
11606
|
+
const fromExample = readExampleKeys(cwd);
|
|
11607
|
+
const all = new Set([...fromCode, ...fromExample]);
|
|
11608
|
+
if (all.size === 0) return '';
|
|
11609
|
+
|
|
11610
|
+
const names = [...all].sort();
|
|
11611
|
+
const lines = [
|
|
11612
|
+
'| Variable | Source |',
|
|
11613
|
+
'|----------|--------|',
|
|
11614
|
+
];
|
|
11615
|
+
for (const name of names.slice(0, MAX_ROWS)) {
|
|
11616
|
+
const src = [];
|
|
11617
|
+
if (fromCode.has(name)) src.push('code');
|
|
11618
|
+
if (fromExample.has(name)) src.push('.env.example');
|
|
11619
|
+
lines.push(`| ${name} | ${src.join(', ')} |`);
|
|
11620
|
+
}
|
|
11621
|
+
if (names.length > MAX_ROWS) {
|
|
11622
|
+
lines.push(`| … | +${names.length - MAX_ROWS} more |`);
|
|
11623
|
+
}
|
|
11624
|
+
return lines.join('\n');
|
|
11625
|
+
}
|
|
11626
|
+
|
|
11627
|
+
module.exports = { analyze };
|
|
11628
|
+
|
|
11629
|
+
};
|
|
11630
|
+
|
|
11355
11631
|
// ── ./src/map/import-graph ──
|
|
11356
11632
|
__factories["./src/map/import-graph"] = function(module, exports) {
|
|
11357
11633
|
|
|
@@ -11541,6 +11817,94 @@ __factories["./src/map/import-graph"] = function(module, exports) {
|
|
|
11541
11817
|
|
|
11542
11818
|
};
|
|
11543
11819
|
|
|
11820
|
+
// ── ./src/map/migrations ──
|
|
11821
|
+
__factories["./src/map/migrations"] = function(module, exports) {
|
|
11822
|
+
|
|
11823
|
+
/**
|
|
11824
|
+
* Database-migration extractor (v8.5 C1).
|
|
11825
|
+
*
|
|
11826
|
+
* Detects schema-migration files across the common frameworks — Rails
|
|
11827
|
+
* (db/migrate), Django/Alembic, Prisma, Flyway (`V1__name.sql`), knex/Sequelize,
|
|
11828
|
+
* and timestamped SQL — and surfaces them with a parsed version + name. Pure,
|
|
11829
|
+
* zero-dependency, deterministic.
|
|
11830
|
+
*
|
|
11831
|
+
* @param {string[]} files — absolute file paths (unused; the tree is walked)
|
|
11832
|
+
* @param {string} cwd — project root
|
|
11833
|
+
* @returns {string} formatted markdown table (empty string if none found)
|
|
11834
|
+
*/
|
|
11835
|
+
|
|
11836
|
+
const fs = require('fs');
|
|
11837
|
+
const path = require('path');
|
|
11838
|
+
|
|
11839
|
+
const MAX_DEPTH = 6;
|
|
11840
|
+
const MAX_ROWS = 200;
|
|
11841
|
+
const SKIP_DIR = new Set(['.git', 'node_modules', 'vendor', 'dist', 'build', 'target', '.venv', 'venv', '__pycache__']);
|
|
11842
|
+
const MIG_EXT = new Set(['.sql', '.rb', '.py', '.js', '.ts']);
|
|
11843
|
+
|
|
11844
|
+
// A directory whose path marks its children as migrations.
|
|
11845
|
+
const MIG_DIR_RE = /(^|\/)(db\/migrate|migrations?|alembic\/versions|prisma\/migrations)$/i;
|
|
11846
|
+
// A filename that is itself a migration regardless of directory.
|
|
11847
|
+
const FLYWAY_RE = /^V\d+(?:[._]\d+)*__(.+)\.(sql|java)$/;
|
|
11848
|
+
const TIMESTAMP_RE = /^(\d{8,})[_-](.+)\.(sql|rb|py|js|ts)$/;
|
|
11849
|
+
const NAMED_RE = /[._-]migrations?[._-]/i;
|
|
11850
|
+
|
|
11851
|
+
function walk(dir, cwd, depth, out) {
|
|
11852
|
+
if (depth > MAX_DEPTH) return;
|
|
11853
|
+
let entries;
|
|
11854
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch (_) { return; }
|
|
11855
|
+
entries.sort((a, b) => (a.name < b.name ? -1 : a.name > b.name ? 1 : 0));
|
|
11856
|
+
|
|
11857
|
+
const relDir = path.relative(cwd, dir).replace(/\\/g, '/');
|
|
11858
|
+
const dirIsMigration = MIG_DIR_RE.test(relDir);
|
|
11859
|
+
|
|
11860
|
+
for (const e of entries) {
|
|
11861
|
+
if (e.isDirectory()) {
|
|
11862
|
+
if (SKIP_DIR.has(e.name)) continue;
|
|
11863
|
+
walk(path.join(dir, e.name), cwd, depth + 1, out);
|
|
11864
|
+
continue;
|
|
11865
|
+
}
|
|
11866
|
+
const ext = path.extname(e.name).toLowerCase();
|
|
11867
|
+
if (!MIG_EXT.has(ext)) continue;
|
|
11868
|
+
|
|
11869
|
+
const rel = path.relative(cwd, path.join(dir, e.name)).replace(/\\/g, '/');
|
|
11870
|
+
let version = null;
|
|
11871
|
+
let name = null;
|
|
11872
|
+
|
|
11873
|
+
let m;
|
|
11874
|
+
if ((m = e.name.match(FLYWAY_RE))) { version = e.name.split('__')[0]; name = m[1].replace(/_/g, ' '); }
|
|
11875
|
+
else if ((m = e.name.match(TIMESTAMP_RE))) { version = m[1]; name = m[2].replace(/[_-]/g, ' '); }
|
|
11876
|
+
else if (dirIsMigration) { version = '—'; name = e.name.replace(ext, ''); }
|
|
11877
|
+
else if (NAMED_RE.test(e.name)) { version = '—'; name = e.name.replace(ext, ''); }
|
|
11878
|
+
else continue;
|
|
11879
|
+
|
|
11880
|
+
out.push({ version, name, file: rel });
|
|
11881
|
+
}
|
|
11882
|
+
}
|
|
11883
|
+
|
|
11884
|
+
function analyze(files, cwd) {
|
|
11885
|
+
const found = [];
|
|
11886
|
+
walk(cwd, cwd, 0, found);
|
|
11887
|
+
if (found.length === 0) return '';
|
|
11888
|
+
|
|
11889
|
+
found.sort((a, b) => (a.file < b.file ? -1 : a.file > b.file ? 1 : 0));
|
|
11890
|
+
|
|
11891
|
+
const lines = [
|
|
11892
|
+
'| Version | Migration | File |',
|
|
11893
|
+
'|---------|-----------|------|',
|
|
11894
|
+
];
|
|
11895
|
+
for (const r of found.slice(0, MAX_ROWS)) {
|
|
11896
|
+
lines.push(`| ${r.version} | ${r.name} | ${r.file} |`);
|
|
11897
|
+
}
|
|
11898
|
+
if (found.length > MAX_ROWS) {
|
|
11899
|
+
lines.push(`| … | +${found.length - MAX_ROWS} more | |`);
|
|
11900
|
+
}
|
|
11901
|
+
return lines.join('\n');
|
|
11902
|
+
}
|
|
11903
|
+
|
|
11904
|
+
module.exports = { analyze };
|
|
11905
|
+
|
|
11906
|
+
};
|
|
11907
|
+
|
|
11544
11908
|
// ── ./src/map/route-table ──
|
|
11545
11909
|
__factories["./src/map/route-table"] = function(module, exports) {
|
|
11546
11910
|
|
|
@@ -11696,6 +12060,10 @@ __factories["./src/mcp/handlers"] = function(module, exports) {
|
|
|
11696
12060
|
imports: '### Import graph',
|
|
11697
12061
|
classes: '### Class hierarchy',
|
|
11698
12062
|
routes: '### Route table',
|
|
12063
|
+
env: '### Environment variables',
|
|
12064
|
+
buildci: '### Build & CI',
|
|
12065
|
+
manifests: '### Config & manifests',
|
|
12066
|
+
migrations: '### Database migrations',
|
|
11699
12067
|
};
|
|
11700
12068
|
|
|
11701
12069
|
/**
|
|
@@ -11781,7 +12149,7 @@ __factories["./src/mcp/handlers"] = function(module, exports) {
|
|
|
11781
12149
|
|
|
11782
12150
|
const header = MAP_SECTIONS[args.type];
|
|
11783
12151
|
if (!header) {
|
|
11784
|
-
return `Unknown map type: "${args.type}". Use:
|
|
12152
|
+
return `Unknown map type: "${args.type}". Use: ${Object.keys(MAP_SECTIONS).join(', ')}`;
|
|
11785
12153
|
}
|
|
11786
12154
|
|
|
11787
12155
|
const mapPath = path.join(cwd, 'PROJECT_MAP.md');
|
|
@@ -12695,7 +13063,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
|
|
|
12695
13063
|
|
|
12696
13064
|
const SERVER_INFO = {
|
|
12697
13065
|
name: 'sigmap',
|
|
12698
|
-
version: '
|
|
13066
|
+
version: '8.0.0',
|
|
12699
13067
|
description: 'SigMap MCP server — code signatures on demand',
|
|
12700
13068
|
};
|
|
12701
13069
|
|
|
@@ -13418,6 +13786,132 @@ __factories["./src/plan/verify-plan"] = function(module, exports) {
|
|
|
13418
13786
|
|
|
13419
13787
|
};
|
|
13420
13788
|
|
|
13789
|
+
// ── ./src/retrieval/bm25 ──
|
|
13790
|
+
__factories["./src/retrieval/bm25"] = function(module, exports) {
|
|
13791
|
+
|
|
13792
|
+
/**
|
|
13793
|
+
* SigMap identifier-aware BM25 re-ranker (zero dependencies, deterministic).
|
|
13794
|
+
*
|
|
13795
|
+
* Plain exact-token TF-IDF misses queries whose terms live *inside* code
|
|
13796
|
+
* identifiers — e.g. `component emit` never surfaces `componentEmits.ts`,
|
|
13797
|
+
* because "componentEmits" is one token that shares no exact term with the
|
|
13798
|
+
* query. This module fixes that with four small additions:
|
|
13799
|
+
*
|
|
13800
|
+
* 1. Identifier-aware tokenization — split camelCase and snake_case.
|
|
13801
|
+
* 2. Light stemming — plurals / common suffixes (`emits` → `emit`).
|
|
13802
|
+
* 3. Path-token boost — file path / basename tokens weigh PATH_BOOST× more.
|
|
13803
|
+
* 4. BM25 scoring instead of raw TF-IDF (length-normalized).
|
|
13804
|
+
*
|
|
13805
|
+
* On 85 curated tasks across 17 repos this lifted hit@5 from 75.3% → 82.4%
|
|
13806
|
+
* (MRR +16% relative). See issue #395.
|
|
13807
|
+
*/
|
|
13808
|
+
|
|
13809
|
+
// Stop words: common English + low-signal code verbs/nouns that appear in
|
|
13810
|
+
// nearly every signature and so carry little retrieval signal.
|
|
13811
|
+
const STOP = new Set(
|
|
13812
|
+
('a an the of to in on for and or is are be by with as at from that this it its ' +
|
|
13813
|
+
'into get set add new return value test')
|
|
13814
|
+
.split(' ')
|
|
13815
|
+
);
|
|
13816
|
+
|
|
13817
|
+
/**
|
|
13818
|
+
* Light suffix stemmer — conservative, tuned for code identifiers rather than
|
|
13819
|
+
* prose. Words of 3 chars or fewer pass through unchanged; a result shorter
|
|
13820
|
+
* than 3 chars reverts to the original token.
|
|
13821
|
+
*
|
|
13822
|
+
* @param {string} w
|
|
13823
|
+
* @returns {string}
|
|
13824
|
+
*/
|
|
13825
|
+
function stem(w) {
|
|
13826
|
+
if (w.length <= 3) return w;
|
|
13827
|
+
let s = w;
|
|
13828
|
+
s = s.replace(/ies$/, 'y');
|
|
13829
|
+
s = s.replace(/(sses|shes|ches|xes|zes)$/, (m) => m.slice(0, -2));
|
|
13830
|
+
s = s.replace(/([^s])s$/, '$1');
|
|
13831
|
+
s = s.replace(/(ization|izations)$/, 'ize');
|
|
13832
|
+
s = s.replace(/(ing|edly|ed|er|ers|ation|ations|ment|ness|ity|ive|able|ible|ize|ise|al)$/, '');
|
|
13833
|
+
return s.length >= 3 ? s : w;
|
|
13834
|
+
}
|
|
13835
|
+
|
|
13836
|
+
/**
|
|
13837
|
+
* Split on non-alphanumeric characters AND camelCase / snake_case boundaries,
|
|
13838
|
+
* lowercase, drop stop words and single characters, then stem.
|
|
13839
|
+
*
|
|
13840
|
+
* @param {string} text
|
|
13841
|
+
* @returns {string[]}
|
|
13842
|
+
*/
|
|
13843
|
+
function tokenize(text) {
|
|
13844
|
+
if (!text || typeof text !== 'string') return [];
|
|
13845
|
+
return text
|
|
13846
|
+
.replace(/[^A-Za-z0-9]+/g, ' ')
|
|
13847
|
+
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
|
13848
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
13849
|
+
.toLowerCase()
|
|
13850
|
+
.split(/\s+/)
|
|
13851
|
+
.filter((t) => t.length > 1 && !STOP.has(t))
|
|
13852
|
+
.map(stem)
|
|
13853
|
+
.filter(Boolean);
|
|
13854
|
+
}
|
|
13855
|
+
|
|
13856
|
+
// The file path / basename is highly indicative of relevance, so its tokens
|
|
13857
|
+
// are counted PATH_BOOST times when building the document term-frequency map.
|
|
13858
|
+
const PATH_BOOST = 3;
|
|
13859
|
+
|
|
13860
|
+
/**
|
|
13861
|
+
* BM25 re-rank of candidates against a query. Each candidate is
|
|
13862
|
+
* `{ file, sigs }`; the returned objects preserve all original candidate
|
|
13863
|
+
* fields and add a numeric `score` (higher = more relevant), sorted best-first
|
|
13864
|
+
* with a deterministic path tie-break. A `score` of 0 means no query token
|
|
13865
|
+
* matched — callers typically drop those.
|
|
13866
|
+
*
|
|
13867
|
+
* @param {string} query
|
|
13868
|
+
* @param {{ file: string, sigs: string[] }[]} candidates
|
|
13869
|
+
* @returns {Array<object & { score: number }>}
|
|
13870
|
+
*/
|
|
13871
|
+
function bm25rank(query, candidates) {
|
|
13872
|
+
if (!Array.isArray(candidates) || candidates.length === 0) return [];
|
|
13873
|
+
|
|
13874
|
+
const k1 = 1.5;
|
|
13875
|
+
const b = 0.75;
|
|
13876
|
+
|
|
13877
|
+
const docs = candidates.map((c) => {
|
|
13878
|
+
const pathToks = tokenize(c.file || '');
|
|
13879
|
+
const toks = tokenize((c.sigs || []).join(' '));
|
|
13880
|
+
for (let i = 0; i < PATH_BOOST; i++) toks.push(...pathToks);
|
|
13881
|
+
const tf = new Map();
|
|
13882
|
+
for (const t of toks) tf.set(t, (tf.get(t) || 0) + 1);
|
|
13883
|
+
return { cand: c, tf, len: toks.length };
|
|
13884
|
+
});
|
|
13885
|
+
|
|
13886
|
+
const N = docs.length || 1;
|
|
13887
|
+
const avgdl = docs.reduce((s, d) => s + d.len, 0) / N || 1;
|
|
13888
|
+
|
|
13889
|
+
const df = new Map();
|
|
13890
|
+
for (const d of docs) {
|
|
13891
|
+
for (const t of d.tf.keys()) df.set(t, (df.get(t) || 0) + 1);
|
|
13892
|
+
}
|
|
13893
|
+
|
|
13894
|
+
const qToks = [...new Set(tokenize(query))];
|
|
13895
|
+
|
|
13896
|
+
return docs
|
|
13897
|
+
.map((d) => {
|
|
13898
|
+
let score = 0;
|
|
13899
|
+
for (const t of qToks) {
|
|
13900
|
+
const f = d.tf.get(t);
|
|
13901
|
+
if (!f) continue;
|
|
13902
|
+
const dfT = df.get(t);
|
|
13903
|
+
const idf = Math.log(1 + (N - dfT + 0.5) / (dfT + 0.5));
|
|
13904
|
+
score += (idf * (f * (k1 + 1))) / (f + k1 * (1 - b + (b * d.len) / avgdl));
|
|
13905
|
+
}
|
|
13906
|
+
return Object.assign({}, d.cand, { score });
|
|
13907
|
+
})
|
|
13908
|
+
.sort((a, c) => c.score - a.score || String(a.file).localeCompare(String(c.file)));
|
|
13909
|
+
}
|
|
13910
|
+
|
|
13911
|
+
module.exports = { tokenize, stem, bm25rank, PATH_BOOST, STOP };
|
|
13912
|
+
|
|
13913
|
+
};
|
|
13914
|
+
|
|
13421
13915
|
// ── ./src/retrieval/ranker ──
|
|
13422
13916
|
__factories["./src/retrieval/ranker"] = function(module, exports) {
|
|
13423
13917
|
|
|
@@ -13440,6 +13934,7 @@ __factories["./src/retrieval/ranker"] = function(module, exports) {
|
|
|
13440
13934
|
|
|
13441
13935
|
const { loadWeights } = __require('./src/learning/weights');
|
|
13442
13936
|
const { tokenize, STOP_WORDS } = __require('./src/retrieval/tokenizer');
|
|
13937
|
+
const { bm25rank } = __require('./src/retrieval/bm25');
|
|
13443
13938
|
|
|
13444
13939
|
// ---------------------------------------------------------------------------
|
|
13445
13940
|
// Default weights
|
|
@@ -13618,11 +14113,24 @@ __factories["./src/retrieval/ranker"] = function(module, exports) {
|
|
|
13618
14113
|
return all.slice(0, topK);
|
|
13619
14114
|
}
|
|
13620
14115
|
|
|
14116
|
+
// Identifier-aware BM25 base relevance over the whole index (#395). BM25
|
|
14117
|
+
// splits camelCase/snake_case, stems, and boosts path tokens, so queries
|
|
14118
|
+
// whose terms live inside identifiers (e.g. "component emit" → componentEmits)
|
|
14119
|
+
// are matched. The existing negative-signal penalty and recency/graph/learned
|
|
14120
|
+
// boosts are layered on top; the per-token signals stay for the explain table.
|
|
14121
|
+
const bm25Scores = new Map();
|
|
14122
|
+
for (const c of bm25rank(query, [...sigIndex.entries()].map(([file, sigs]) => ({ file, sigs })))) {
|
|
14123
|
+
bm25Scores.set(c.file, c.score);
|
|
14124
|
+
}
|
|
14125
|
+
|
|
13621
14126
|
const scored = [];
|
|
13622
14127
|
for (const [file, sigs] of sigIndex.entries()) {
|
|
13623
14128
|
const result = scoreFile(file, sigs, queryTokens, weights);
|
|
13624
|
-
|
|
14129
|
+
const penalty = result.signals.penalty;
|
|
14130
|
+
const base = bm25Scores.get(file) || 0;
|
|
14131
|
+
let score = base * penalty;
|
|
13625
14132
|
const signals = result.signals;
|
|
14133
|
+
signals.bm25 = base;
|
|
13626
14134
|
|
|
13627
14135
|
// Recency boost
|
|
13628
14136
|
if (recencySet && recencySet.has(file) && score > 0) {
|
|
@@ -16524,7 +17032,7 @@ function __tryGit(args, opts = {}) {
|
|
|
16524
17032
|
catch (_) { return ''; }
|
|
16525
17033
|
}
|
|
16526
17034
|
|
|
16527
|
-
const VERSION = '
|
|
17035
|
+
const VERSION = '8.0.0';
|
|
16528
17036
|
const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
|
|
16529
17037
|
|
|
16530
17038
|
function requireSourceOrBundled(key) {
|