sigmap 8.2.0 → 8.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/README.md +1 -1
- package/gen-context.js +162 -22
- package/llms-full.txt +2 -2
- package/llms.txt +2 -2
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/core/package.json +1 -1
- package/src/mcp/server.js +1 -1
- package/src/verify/lib-index.js +160 -20
package/CHANGELOG.md
CHANGED
|
@@ -10,6 +10,13 @@ Format: [Semantic Versioning](https://semver.org/)
|
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
+
## [8.3.0] — 2026-07-05
|
|
14
|
+
|
|
15
|
+
Minor release — **Python site-packages grounding: the moat now spans both major ecosystems.** v8.1/v8.2 built local-library grounding for JS/TS (`node_modules` `.d.ts`); this extends it to **Python**, so `verify-ai-output` and the `verify_suggestion` MCP tool ground AI-suggested Python code against the libraries actually installed in the project's venv — with pinned versions (D8). Zero-dependency, no Python runtime, deterministic.
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
- **Python site-packages grounding (#413, PR #414):** `buildLibraryIndex` (`src/verify/lib-index.js`) gains a Python pass alongside the JS/TS one. It reads direct deps from `requirements.txt` / `pyproject.toml` (PEP 621 `[project].dependencies` + Poetry), discovers the venv `site-packages` (`.venv|venv|env` → `lib/python*/site-packages`, or `Lib/site-packages` on Windows) **without spawning Python**, resolves each dep's installed module + version (`*.dist-info`, D8) with PEP 503 import-name normalization, and extracts exported names from the package's `__init__.py`/`.pyi` (`__all__`, top-level `def`/`class`, public assignments, and `from … import` re-exports). Both ecosystems merge into one symbol index — genuine installed-Python-library calls stop being false-flagged as `fake-symbol`. Byte-stable given a fixed installed tree; cached via `src/cache/sig-cache.js`; graceful on missing venv / unresolved deps.
|
|
19
|
+
|
|
13
20
|
## [8.2.0] — 2026-07-04
|
|
14
21
|
|
|
15
22
|
Minor release — **`verify_suggestion` MCP tool: the grounding moat, made consumable by agents.** v8.1.0 built local-library grounding inside the `verify-ai-output` CLI; this exposes it as the **18th MCP tool**, so a coding agent can verify its own generated code against the repo **and the libraries actually installed** in `node_modules` — *before it writes* — and get back the flagged issues plus the pinned versions it verified against (D8).
|
package/README.md
CHANGED
|
@@ -98,7 +98,7 @@ Ask → Rank → Context → Validate → Judge → Learn
|
|
|
98
98
|
|
|
99
99
|
<!--SM:benchmarkBlock-->
|
|
100
100
|
```
|
|
101
|
-
Benchmark : sigmap-v8.
|
|
101
|
+
Benchmark : sigmap-v8.3-main (21 repositories, including R language)
|
|
102
102
|
Date : 2026-07-04
|
|
103
103
|
|
|
104
104
|
Hit@5 : 86.7% (baseline 13.6% — 6.4× lift)
|
package/gen-context.js
CHANGED
|
@@ -13108,7 +13108,7 @@ __factories["./src/mcp/server"] = function(module, exports) {
|
|
|
13108
13108
|
|
|
13109
13109
|
const SERVER_INFO = {
|
|
13110
13110
|
name: 'sigmap',
|
|
13111
|
-
version: '8.
|
|
13111
|
+
version: '8.3.0',
|
|
13112
13112
|
description: 'SigMap MCP server — code signatures on demand',
|
|
13113
13113
|
};
|
|
13114
13114
|
|
|
@@ -16805,12 +16805,17 @@ __factories["./src/verify/lib-index"] = function(module, exports) {
|
|
|
16805
16805
|
* in `node_modules` and verify AI suggestions against repo + private +
|
|
16806
16806
|
* installed-lib symbols. This module builds the installed-lib half.
|
|
16807
16807
|
*
|
|
16808
|
-
*
|
|
16809
|
-
*
|
|
16810
|
-
*
|
|
16811
|
-
*
|
|
16812
|
-
*
|
|
16813
|
-
*
|
|
16808
|
+
* Two ecosystems, one index:
|
|
16809
|
+
* - **JS/TS** — each **direct** dependency in `package.json` resolved under
|
|
16810
|
+
* `node_modules/<dep>`; exports read from its TypeScript declaration entry
|
|
16811
|
+
* (`types`/`typings`, else `index.d.ts`).
|
|
16812
|
+
* - **Python** — each direct dependency in `requirements.txt`/`pyproject.toml`
|
|
16813
|
+
* resolved in the project's venv `site-packages`; exports read from the
|
|
16814
|
+
* package's `__init__.py`/`.pyi`. No Python runtime is spawned (North-Star #1).
|
|
16815
|
+
*
|
|
16816
|
+
* Pure, zero-dependency, deterministic: byte-stable given a fixed installed
|
|
16817
|
+
* tree. Bounded (per-file read cap + dep cap) and cached via
|
|
16818
|
+
* `src/cache/sig-cache.js` so repeat builds are near-free.
|
|
16814
16819
|
*/
|
|
16815
16820
|
|
|
16816
16821
|
const fs = require('fs');
|
|
@@ -16820,6 +16825,7 @@ __factories["./src/verify/lib-index"] = function(module, exports) {
|
|
|
16820
16825
|
const MAX_DTS_BYTES = 512 * 1024; // per-file read cap
|
|
16821
16826
|
const MAX_DEPS = 1000; // dep count cap
|
|
16822
16827
|
const DEP_KEYS = ['dependencies', 'devDependencies', 'peerDependencies', 'optionalDependencies'];
|
|
16828
|
+
const VENV_DIRS = ['.venv', 'venv', 'env', '.env'];
|
|
16823
16829
|
|
|
16824
16830
|
/**
|
|
16825
16831
|
* Extract exported symbol names from a `.d.ts` declaration file. Deterministic,
|
|
@@ -16895,6 +16901,130 @@ __factories["./src/verify/lib-index"] = function(module, exports) {
|
|
|
16895
16901
|
return { version, dtsPath: null }; // installed but untyped
|
|
16896
16902
|
}
|
|
16897
16903
|
|
|
16904
|
+
// ── Python ──────────────────────────────────────────────────────────────────
|
|
16905
|
+
|
|
16906
|
+
/**
|
|
16907
|
+
* Extract exported symbol names from a Python module's `__init__.py`/`.pyi`.
|
|
16908
|
+
* Deterministic, regex-based, top-level only: `__all__`, `def`/`class`, public
|
|
16909
|
+
* module-level assignments, and `from … import …` re-exports (a package's
|
|
16910
|
+
* public API is largely re-exports). Private names (leading `_`) are skipped
|
|
16911
|
+
* unless listed in `__all__`.
|
|
16912
|
+
* @param {string} src
|
|
16913
|
+
* @returns {string[]} sorted unique exported names
|
|
16914
|
+
*/
|
|
16915
|
+
function extractPyExports(src) {
|
|
16916
|
+
const names = new Set();
|
|
16917
|
+
if (!src) return [];
|
|
16918
|
+
|
|
16919
|
+
// __all__ = [ 'a', 'b', ... ] (authoritative when present; keeps privates)
|
|
16920
|
+
const allMatch = src.match(/^__all__\s*[:+]?=\s*[\[(]([\s\S]*?)[\])]/m);
|
|
16921
|
+
if (allMatch) {
|
|
16922
|
+
for (const m of allMatch[1].matchAll(/['"]([A-Za-z_]\w*)['"]/g)) names.add(m[1]);
|
|
16923
|
+
}
|
|
16924
|
+
|
|
16925
|
+
// top-level def / class (column 0)
|
|
16926
|
+
for (const m of src.matchAll(/^(?:async\s+)?def\s+([A-Za-z_]\w*)/gm)) if (!m[1].startsWith('_')) names.add(m[1]);
|
|
16927
|
+
for (const m of src.matchAll(/^class\s+([A-Za-z_]\w*)/gm)) if (!m[1].startsWith('_')) names.add(m[1]);
|
|
16928
|
+
|
|
16929
|
+
// top-level public assignments: NAME = … / NAME: type = … (not ==, +=, etc.)
|
|
16930
|
+
for (const m of src.matchAll(/^([A-Za-z_]\w*)\s*(?::[^=\n]+)?=(?!=)/gm)) {
|
|
16931
|
+
if (!m[1].startsWith('_')) names.add(m[1]);
|
|
16932
|
+
}
|
|
16933
|
+
|
|
16934
|
+
// re-exports: from .mod import Name, Other as Alias
|
|
16935
|
+
for (const m of src.matchAll(/^from\s+[^\n]+?\s+import\s+([^\n#]+)/gm)) {
|
|
16936
|
+
for (const part of m[1].split(',')) {
|
|
16937
|
+
const name = part.trim().replace(/[()]/g, '').split(/\s+as\s+/).pop().trim();
|
|
16938
|
+
if (/^[A-Za-z_]\w*$/.test(name) && !name.startsWith('_')) names.add(name);
|
|
16939
|
+
}
|
|
16940
|
+
}
|
|
16941
|
+
|
|
16942
|
+
return [...names].sort();
|
|
16943
|
+
}
|
|
16944
|
+
|
|
16945
|
+
/** Read direct Python dependency names from requirements.txt + pyproject.toml. */
|
|
16946
|
+
function pythonDirectDeps(cwd) {
|
|
16947
|
+
const names = new Set();
|
|
16948
|
+
try {
|
|
16949
|
+
const req = fs.readFileSync(path.join(cwd, 'requirements.txt'), 'utf8');
|
|
16950
|
+
for (const line of req.split('\n')) {
|
|
16951
|
+
const t = line.trim();
|
|
16952
|
+
if (!t || t.startsWith('#') || t.startsWith('-')) continue;
|
|
16953
|
+
const m = t.match(/^([A-Za-z0-9][A-Za-z0-9._-]*)/);
|
|
16954
|
+
if (m) names.add(m[1]);
|
|
16955
|
+
}
|
|
16956
|
+
} catch (_) { /* none */ }
|
|
16957
|
+
try {
|
|
16958
|
+
const py = fs.readFileSync(path.join(cwd, 'pyproject.toml'), 'utf8');
|
|
16959
|
+
// PEP 621: [project] dependencies = ["foo>=1", "bar"]
|
|
16960
|
+
const projDeps = py.match(/^\s*dependencies\s*=\s*\[([\s\S]*?)\]/m);
|
|
16961
|
+
if (projDeps) for (const m of projDeps[1].matchAll(/['"]([A-Za-z0-9][A-Za-z0-9._-]*)/g)) names.add(m[1]);
|
|
16962
|
+
// Poetry: [tool.poetry.dependencies]\n foo = "^1"
|
|
16963
|
+
const poetry = py.match(/\[tool\.poetry\.dependencies\]([\s\S]*?)(?:\n\[|$)/);
|
|
16964
|
+
if (poetry) for (const m of poetry[1].matchAll(/^([A-Za-z0-9][A-Za-z0-9._-]*)\s*=/gm)) {
|
|
16965
|
+
if (m[1] !== 'python') names.add(m[1]);
|
|
16966
|
+
}
|
|
16967
|
+
} catch (_) { /* none */ }
|
|
16968
|
+
return [...names].sort();
|
|
16969
|
+
}
|
|
16970
|
+
|
|
16971
|
+
/** Locate the project's venv `site-packages` directories (no Python runtime). */
|
|
16972
|
+
function findSitePackages(cwd) {
|
|
16973
|
+
const out = [];
|
|
16974
|
+
for (const v of VENV_DIRS) {
|
|
16975
|
+
const base = path.join(cwd, v);
|
|
16976
|
+
const libDir = path.join(base, 'lib'); // POSIX: <venv>/lib/pythonX.Y/site-packages
|
|
16977
|
+
let pyDirs = [];
|
|
16978
|
+
try { pyDirs = fs.readdirSync(libDir).filter((d) => /^python\d/.test(d)).sort(); } catch (_) { /* none */ }
|
|
16979
|
+
for (const py of pyDirs) {
|
|
16980
|
+
const sp = path.join(libDir, py, 'site-packages');
|
|
16981
|
+
try { if (fs.statSync(sp).isDirectory()) out.push(sp); } catch (_) { /* next */ }
|
|
16982
|
+
}
|
|
16983
|
+
const winSp = path.join(base, 'Lib', 'site-packages'); // Windows
|
|
16984
|
+
try { if (fs.statSync(winSp).isDirectory()) out.push(winSp); } catch (_) { /* next */ }
|
|
16985
|
+
}
|
|
16986
|
+
return out;
|
|
16987
|
+
}
|
|
16988
|
+
|
|
16989
|
+
/** PEP 503 name normalization (case-insensitive, `-`/`_`/`.` collapsed). */
|
|
16990
|
+
function normalizePy(name) {
|
|
16991
|
+
return String(name).toLowerCase().replace(/[-_.]+/g, '-');
|
|
16992
|
+
}
|
|
16993
|
+
|
|
16994
|
+
/** Find an installed distribution's version from its `*.dist-info`/`*.egg-info`. */
|
|
16995
|
+
function findPyVersion(sitePkgsDir, dep) {
|
|
16996
|
+
const norm = normalizePy(dep);
|
|
16997
|
+
let entries;
|
|
16998
|
+
try { entries = fs.readdirSync(sitePkgsDir); } catch (_) { return null; }
|
|
16999
|
+
for (const e of entries.sort()) {
|
|
17000
|
+
const m = e.match(/^(.+?)-(\d[^-]*)\.(?:dist-info|egg-info)$/);
|
|
17001
|
+
if (m && normalizePy(m[1]) === norm) return m[2];
|
|
17002
|
+
}
|
|
17003
|
+
return null;
|
|
17004
|
+
}
|
|
17005
|
+
|
|
17006
|
+
/**
|
|
17007
|
+
* Resolve a Python dependency to its installed module entry file + version.
|
|
17008
|
+
* @returns {{ version: string|null, sourcePath: string|null }|null} null if not installed
|
|
17009
|
+
*/
|
|
17010
|
+
function resolvePyEntry(sitePkgsDirs, dep) {
|
|
17011
|
+
const candidates = [...new Set([dep, dep.replace(/-/g, '_'), dep.toLowerCase(), dep.toLowerCase().replace(/-/g, '_')])];
|
|
17012
|
+
for (const sp of sitePkgsDirs) {
|
|
17013
|
+
const version = findPyVersion(sp, dep);
|
|
17014
|
+
for (const cand of candidates) {
|
|
17015
|
+
for (const entry of ['__init__.pyi', '__init__.py']) { // package
|
|
17016
|
+
const p = path.join(sp, cand, entry);
|
|
17017
|
+
try { if (fs.statSync(p).isFile()) return { version, sourcePath: p }; } catch (_) { /* next */ }
|
|
17018
|
+
}
|
|
17019
|
+
for (const ext of ['.pyi', '.py']) { // single-module
|
|
17020
|
+
const p = path.join(sp, cand + ext);
|
|
17021
|
+
try { if (fs.statSync(p).isFile()) return { version, sourcePath: p }; } catch (_) { /* next */ }
|
|
17022
|
+
}
|
|
17023
|
+
}
|
|
17024
|
+
}
|
|
17025
|
+
return null;
|
|
17026
|
+
}
|
|
17027
|
+
|
|
16898
17028
|
/**
|
|
16899
17029
|
* Build the installed-library signature index for `cwd`.
|
|
16900
17030
|
*
|
|
@@ -16907,17 +17037,24 @@ __factories["./src/verify/lib-index"] = function(module, exports) {
|
|
|
16907
17037
|
function buildLibraryIndex(cwd, opts = {}) {
|
|
16908
17038
|
const version = opts.version || '0';
|
|
16909
17039
|
const useCache = opts.cache !== false;
|
|
16910
|
-
const deps = directDeps(cwd).slice(0, MAX_DEPS);
|
|
16911
17040
|
|
|
16912
|
-
|
|
16913
|
-
|
|
17041
|
+
// Collect entries from both ecosystems; each carries its extractor kind.
|
|
17042
|
+
const entries = []; // { name, version, sourcePath, kind: 'dts'|'py' }
|
|
17043
|
+
for (const dep of directDeps(cwd).slice(0, MAX_DEPS)) {
|
|
16914
17044
|
const r = resolveEntry(cwd, dep);
|
|
16915
|
-
if (r) entries.push({ dep, version: r.version,
|
|
17045
|
+
if (r) entries.push({ name: dep, version: r.version, sourcePath: r.dtsPath, kind: 'dts' });
|
|
17046
|
+
}
|
|
17047
|
+
const sitePkgs = findSitePackages(cwd);
|
|
17048
|
+
if (sitePkgs.length) {
|
|
17049
|
+
for (const dep of pythonDirectDeps(cwd).slice(0, MAX_DEPS)) {
|
|
17050
|
+
const r = resolvePyEntry(sitePkgs, dep);
|
|
17051
|
+
if (r) entries.push({ name: dep, version: r.version, sourcePath: r.sourcePath, kind: 'py' });
|
|
17052
|
+
}
|
|
16916
17053
|
}
|
|
16917
17054
|
|
|
16918
17055
|
const cache = useCache ? loadCache(cwd, version) : new Map();
|
|
16919
|
-
const
|
|
16920
|
-
const { unchanged } = getChangedFiles(
|
|
17056
|
+
const files = entries.filter((e) => e.sourcePath).map((e) => e.sourcePath);
|
|
17057
|
+
const { unchanged } = getChangedFiles(files, cache);
|
|
16921
17058
|
const unchangedSet = new Set(unchanged);
|
|
16922
17059
|
|
|
16923
17060
|
const symbols = new Set();
|
|
@@ -16926,20 +17063,20 @@ __factories["./src/verify/lib-index"] = function(module, exports) {
|
|
|
16926
17063
|
|
|
16927
17064
|
for (const e of entries) {
|
|
16928
17065
|
let names;
|
|
16929
|
-
if (!e.
|
|
17066
|
+
if (!e.sourcePath) {
|
|
16930
17067
|
names = [];
|
|
16931
|
-
} else if (unchangedSet.has(e.
|
|
16932
|
-
names = cache.get(e.
|
|
17068
|
+
} else if (unchangedSet.has(e.sourcePath) && cache.get(e.sourcePath)) {
|
|
17069
|
+
names = cache.get(e.sourcePath).sigs || [];
|
|
16933
17070
|
} else {
|
|
16934
17071
|
let src = '';
|
|
16935
17072
|
try {
|
|
16936
|
-
if (fs.statSync(e.
|
|
17073
|
+
if (fs.statSync(e.sourcePath).size <= MAX_DTS_BYTES) src = fs.readFileSync(e.sourcePath, 'utf8');
|
|
16937
17074
|
} catch (_) { /* unreadable → empty */ }
|
|
16938
|
-
names = extractDtsExports(src);
|
|
16939
|
-
fresh.push({ file: e.
|
|
17075
|
+
names = e.kind === 'py' ? extractPyExports(src) : extractDtsExports(src);
|
|
17076
|
+
fresh.push({ file: e.sourcePath, sigs: names });
|
|
16940
17077
|
}
|
|
16941
17078
|
for (const n of names) symbols.add(n);
|
|
16942
|
-
libraries.push({ name: e.
|
|
17079
|
+
libraries.push({ name: e.name, version: e.version, symbols: names.length, typed: !!e.sourcePath });
|
|
16943
17080
|
}
|
|
16944
17081
|
|
|
16945
17082
|
if (useCache && fresh.length) {
|
|
@@ -16958,7 +17095,10 @@ __factories["./src/verify/lib-index"] = function(module, exports) {
|
|
|
16958
17095
|
.map((l) => `${l.name}@${l.version}`);
|
|
16959
17096
|
}
|
|
16960
17097
|
|
|
16961
|
-
module.exports = {
|
|
17098
|
+
module.exports = {
|
|
17099
|
+
buildLibraryIndex, extractDtsExports, directDeps, resolveEntry, formatVersionPins,
|
|
17100
|
+
extractPyExports, pythonDirectDeps, findSitePackages, resolvePyEntry,
|
|
17101
|
+
};
|
|
16962
17102
|
|
|
16963
17103
|
};
|
|
16964
17104
|
|
|
@@ -17291,7 +17431,7 @@ function __tryGit(args, opts = {}) {
|
|
|
17291
17431
|
catch (_) { return ''; }
|
|
17292
17432
|
}
|
|
17293
17433
|
|
|
17294
|
-
const VERSION = '8.
|
|
17434
|
+
const VERSION = '8.3.0';
|
|
17295
17435
|
const MARKER = '\n\n## Auto-generated signatures\n<!-- Updated by gen-context.js -->\n';
|
|
17296
17436
|
|
|
17297
17437
|
function requireSourceOrBundled(key) {
|
package/llms-full.txt
CHANGED
|
@@ -11,13 +11,13 @@ ranking keeps the relevant context in scope (cutting tokens ~97% as a side
|
|
|
11
11
|
effect), with no LLM calls, embeddings, or vector database. Works with Claude,
|
|
12
12
|
Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
|
|
13
13
|
|
|
14
|
-
# Version: 8.
|
|
14
|
+
# Version: 8.3.0 | Benchmark: sigmap-v8.3-main (2026-07-04)
|
|
15
15
|
# Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
|
|
16
16
|
# Regenerate: npm run generate:llms | Validate: npm run validate:llms
|
|
17
17
|
|
|
18
18
|
---
|
|
19
19
|
|
|
20
|
-
## Core metrics (benchmark: sigmap-v8.
|
|
20
|
+
## Core metrics (benchmark: sigmap-v8.3-main, 2026-07-04)
|
|
21
21
|
|
|
22
22
|
| Metric | Without SigMap | With SigMap |
|
|
23
23
|
|--------|----------------|-------------|
|
package/llms.txt
CHANGED
|
@@ -11,7 +11,7 @@ ranking keeps the relevant context in scope (cutting tokens ~97% as a side
|
|
|
11
11
|
effect), with no LLM calls, embeddings, or vector database. Works with Claude,
|
|
12
12
|
Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
|
|
13
13
|
|
|
14
|
-
# Version: 8.
|
|
14
|
+
# Version: 8.3.0 | Benchmark: sigmap-v8.3-main (2026-07-04)
|
|
15
15
|
# Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
|
|
16
16
|
# Regenerate: npm run generate:llms | Validate: npm run validate:llms
|
|
17
17
|
|
|
@@ -23,7 +23,7 @@ Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
|
|
|
23
23
|
- No blast-radius awareness before editing a hub file — `--impact` shows every file a change touches.
|
|
24
24
|
- Pasted stack traces, CI logs, and JSON bloat the prompt — `squeeze` minimizes them and enriches the top frame from the symbol index.
|
|
25
25
|
|
|
26
|
-
## Core metrics (benchmark: sigmap-v8.
|
|
26
|
+
## Core metrics (benchmark: sigmap-v8.3-main, 2026-07-04)
|
|
27
27
|
|
|
28
28
|
- hit@5 retrieval: 86.7% vs 13.6% random baseline (6.4× lift)
|
|
29
29
|
- Token reduction: 97.0% average across benchmark repos
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sigmap",
|
|
3
|
-
"version": "8.
|
|
3
|
+
"version": "8.3.0",
|
|
4
4
|
"description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
|
|
5
5
|
"main": "packages/core/index.js",
|
|
6
6
|
"exports": {
|
package/src/mcp/server.js
CHANGED
package/src/verify/lib-index.js
CHANGED
|
@@ -8,12 +8,17 @@
|
|
|
8
8
|
* in `node_modules` and verify AI suggestions against repo + private +
|
|
9
9
|
* installed-lib symbols. This module builds the installed-lib half.
|
|
10
10
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
11
|
+
* Two ecosystems, one index:
|
|
12
|
+
* - **JS/TS** — each **direct** dependency in `package.json` resolved under
|
|
13
|
+
* `node_modules/<dep>`; exports read from its TypeScript declaration entry
|
|
14
|
+
* (`types`/`typings`, else `index.d.ts`).
|
|
15
|
+
* - **Python** — each direct dependency in `requirements.txt`/`pyproject.toml`
|
|
16
|
+
* resolved in the project's venv `site-packages`; exports read from the
|
|
17
|
+
* package's `__init__.py`/`.pyi`. No Python runtime is spawned (North-Star #1).
|
|
18
|
+
*
|
|
19
|
+
* Pure, zero-dependency, deterministic: byte-stable given a fixed installed
|
|
20
|
+
* tree. Bounded (per-file read cap + dep cap) and cached via
|
|
21
|
+
* `src/cache/sig-cache.js` so repeat builds are near-free.
|
|
17
22
|
*/
|
|
18
23
|
|
|
19
24
|
const fs = require('fs');
|
|
@@ -23,6 +28,7 @@ const { loadCache, saveCache, getChangedFiles, updateCacheEntries } = require('.
|
|
|
23
28
|
const MAX_DTS_BYTES = 512 * 1024; // per-file read cap
|
|
24
29
|
const MAX_DEPS = 1000; // dep count cap
|
|
25
30
|
const DEP_KEYS = ['dependencies', 'devDependencies', 'peerDependencies', 'optionalDependencies'];
|
|
31
|
+
const VENV_DIRS = ['.venv', 'venv', 'env', '.env'];
|
|
26
32
|
|
|
27
33
|
/**
|
|
28
34
|
* Extract exported symbol names from a `.d.ts` declaration file. Deterministic,
|
|
@@ -98,6 +104,130 @@ function resolveEntry(cwd, dep) {
|
|
|
98
104
|
return { version, dtsPath: null }; // installed but untyped
|
|
99
105
|
}
|
|
100
106
|
|
|
107
|
+
// ── Python ──────────────────────────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Extract exported symbol names from a Python module's `__init__.py`/`.pyi`.
|
|
111
|
+
* Deterministic, regex-based, top-level only: `__all__`, `def`/`class`, public
|
|
112
|
+
* module-level assignments, and `from … import …` re-exports (a package's
|
|
113
|
+
* public API is largely re-exports). Private names (leading `_`) are skipped
|
|
114
|
+
* unless listed in `__all__`.
|
|
115
|
+
* @param {string} src
|
|
116
|
+
* @returns {string[]} sorted unique exported names
|
|
117
|
+
*/
|
|
118
|
+
function extractPyExports(src) {
|
|
119
|
+
const names = new Set();
|
|
120
|
+
if (!src) return [];
|
|
121
|
+
|
|
122
|
+
// __all__ = [ 'a', 'b', ... ] (authoritative when present; keeps privates)
|
|
123
|
+
const allMatch = src.match(/^__all__\s*[:+]?=\s*[\[(]([\s\S]*?)[\])]/m);
|
|
124
|
+
if (allMatch) {
|
|
125
|
+
for (const m of allMatch[1].matchAll(/['"]([A-Za-z_]\w*)['"]/g)) names.add(m[1]);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// top-level def / class (column 0)
|
|
129
|
+
for (const m of src.matchAll(/^(?:async\s+)?def\s+([A-Za-z_]\w*)/gm)) if (!m[1].startsWith('_')) names.add(m[1]);
|
|
130
|
+
for (const m of src.matchAll(/^class\s+([A-Za-z_]\w*)/gm)) if (!m[1].startsWith('_')) names.add(m[1]);
|
|
131
|
+
|
|
132
|
+
// top-level public assignments: NAME = … / NAME: type = … (not ==, +=, etc.)
|
|
133
|
+
for (const m of src.matchAll(/^([A-Za-z_]\w*)\s*(?::[^=\n]+)?=(?!=)/gm)) {
|
|
134
|
+
if (!m[1].startsWith('_')) names.add(m[1]);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// re-exports: from .mod import Name, Other as Alias
|
|
138
|
+
for (const m of src.matchAll(/^from\s+[^\n]+?\s+import\s+([^\n#]+)/gm)) {
|
|
139
|
+
for (const part of m[1].split(',')) {
|
|
140
|
+
const name = part.trim().replace(/[()]/g, '').split(/\s+as\s+/).pop().trim();
|
|
141
|
+
if (/^[A-Za-z_]\w*$/.test(name) && !name.startsWith('_')) names.add(name);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return [...names].sort();
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/** Read direct Python dependency names from requirements.txt + pyproject.toml. */
|
|
149
|
+
function pythonDirectDeps(cwd) {
|
|
150
|
+
const names = new Set();
|
|
151
|
+
try {
|
|
152
|
+
const req = fs.readFileSync(path.join(cwd, 'requirements.txt'), 'utf8');
|
|
153
|
+
for (const line of req.split('\n')) {
|
|
154
|
+
const t = line.trim();
|
|
155
|
+
if (!t || t.startsWith('#') || t.startsWith('-')) continue;
|
|
156
|
+
const m = t.match(/^([A-Za-z0-9][A-Za-z0-9._-]*)/);
|
|
157
|
+
if (m) names.add(m[1]);
|
|
158
|
+
}
|
|
159
|
+
} catch (_) { /* none */ }
|
|
160
|
+
try {
|
|
161
|
+
const py = fs.readFileSync(path.join(cwd, 'pyproject.toml'), 'utf8');
|
|
162
|
+
// PEP 621: [project] dependencies = ["foo>=1", "bar"]
|
|
163
|
+
const projDeps = py.match(/^\s*dependencies\s*=\s*\[([\s\S]*?)\]/m);
|
|
164
|
+
if (projDeps) for (const m of projDeps[1].matchAll(/['"]([A-Za-z0-9][A-Za-z0-9._-]*)/g)) names.add(m[1]);
|
|
165
|
+
// Poetry: [tool.poetry.dependencies]\n foo = "^1"
|
|
166
|
+
const poetry = py.match(/\[tool\.poetry\.dependencies\]([\s\S]*?)(?:\n\[|$)/);
|
|
167
|
+
if (poetry) for (const m of poetry[1].matchAll(/^([A-Za-z0-9][A-Za-z0-9._-]*)\s*=/gm)) {
|
|
168
|
+
if (m[1] !== 'python') names.add(m[1]);
|
|
169
|
+
}
|
|
170
|
+
} catch (_) { /* none */ }
|
|
171
|
+
return [...names].sort();
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/** Locate the project's venv `site-packages` directories (no Python runtime). */
|
|
175
|
+
function findSitePackages(cwd) {
|
|
176
|
+
const out = [];
|
|
177
|
+
for (const v of VENV_DIRS) {
|
|
178
|
+
const base = path.join(cwd, v);
|
|
179
|
+
const libDir = path.join(base, 'lib'); // POSIX: <venv>/lib/pythonX.Y/site-packages
|
|
180
|
+
let pyDirs = [];
|
|
181
|
+
try { pyDirs = fs.readdirSync(libDir).filter((d) => /^python\d/.test(d)).sort(); } catch (_) { /* none */ }
|
|
182
|
+
for (const py of pyDirs) {
|
|
183
|
+
const sp = path.join(libDir, py, 'site-packages');
|
|
184
|
+
try { if (fs.statSync(sp).isDirectory()) out.push(sp); } catch (_) { /* next */ }
|
|
185
|
+
}
|
|
186
|
+
const winSp = path.join(base, 'Lib', 'site-packages'); // Windows
|
|
187
|
+
try { if (fs.statSync(winSp).isDirectory()) out.push(winSp); } catch (_) { /* next */ }
|
|
188
|
+
}
|
|
189
|
+
return out;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/** PEP 503 name normalization (case-insensitive, `-`/`_`/`.` collapsed). */
|
|
193
|
+
function normalizePy(name) {
|
|
194
|
+
return String(name).toLowerCase().replace(/[-_.]+/g, '-');
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/** Find an installed distribution's version from its `*.dist-info`/`*.egg-info`. */
|
|
198
|
+
function findPyVersion(sitePkgsDir, dep) {
|
|
199
|
+
const norm = normalizePy(dep);
|
|
200
|
+
let entries;
|
|
201
|
+
try { entries = fs.readdirSync(sitePkgsDir); } catch (_) { return null; }
|
|
202
|
+
for (const e of entries.sort()) {
|
|
203
|
+
const m = e.match(/^(.+?)-(\d[^-]*)\.(?:dist-info|egg-info)$/);
|
|
204
|
+
if (m && normalizePy(m[1]) === norm) return m[2];
|
|
205
|
+
}
|
|
206
|
+
return null;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Resolve a Python dependency to its installed module entry file + version.
|
|
211
|
+
* @returns {{ version: string|null, sourcePath: string|null }|null} null if not installed
|
|
212
|
+
*/
|
|
213
|
+
function resolvePyEntry(sitePkgsDirs, dep) {
|
|
214
|
+
const candidates = [...new Set([dep, dep.replace(/-/g, '_'), dep.toLowerCase(), dep.toLowerCase().replace(/-/g, '_')])];
|
|
215
|
+
for (const sp of sitePkgsDirs) {
|
|
216
|
+
const version = findPyVersion(sp, dep);
|
|
217
|
+
for (const cand of candidates) {
|
|
218
|
+
for (const entry of ['__init__.pyi', '__init__.py']) { // package
|
|
219
|
+
const p = path.join(sp, cand, entry);
|
|
220
|
+
try { if (fs.statSync(p).isFile()) return { version, sourcePath: p }; } catch (_) { /* next */ }
|
|
221
|
+
}
|
|
222
|
+
for (const ext of ['.pyi', '.py']) { // single-module
|
|
223
|
+
const p = path.join(sp, cand + ext);
|
|
224
|
+
try { if (fs.statSync(p).isFile()) return { version, sourcePath: p }; } catch (_) { /* next */ }
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return null;
|
|
229
|
+
}
|
|
230
|
+
|
|
101
231
|
/**
|
|
102
232
|
* Build the installed-library signature index for `cwd`.
|
|
103
233
|
*
|
|
@@ -110,17 +240,24 @@ function resolveEntry(cwd, dep) {
|
|
|
110
240
|
function buildLibraryIndex(cwd, opts = {}) {
|
|
111
241
|
const version = opts.version || '0';
|
|
112
242
|
const useCache = opts.cache !== false;
|
|
113
|
-
const deps = directDeps(cwd).slice(0, MAX_DEPS);
|
|
114
243
|
|
|
115
|
-
|
|
116
|
-
|
|
244
|
+
// Collect entries from both ecosystems; each carries its extractor kind.
|
|
245
|
+
const entries = []; // { name, version, sourcePath, kind: 'dts'|'py' }
|
|
246
|
+
for (const dep of directDeps(cwd).slice(0, MAX_DEPS)) {
|
|
117
247
|
const r = resolveEntry(cwd, dep);
|
|
118
|
-
if (r) entries.push({ dep, version: r.version,
|
|
248
|
+
if (r) entries.push({ name: dep, version: r.version, sourcePath: r.dtsPath, kind: 'dts' });
|
|
249
|
+
}
|
|
250
|
+
const sitePkgs = findSitePackages(cwd);
|
|
251
|
+
if (sitePkgs.length) {
|
|
252
|
+
for (const dep of pythonDirectDeps(cwd).slice(0, MAX_DEPS)) {
|
|
253
|
+
const r = resolvePyEntry(sitePkgs, dep);
|
|
254
|
+
if (r) entries.push({ name: dep, version: r.version, sourcePath: r.sourcePath, kind: 'py' });
|
|
255
|
+
}
|
|
119
256
|
}
|
|
120
257
|
|
|
121
258
|
const cache = useCache ? loadCache(cwd, version) : new Map();
|
|
122
|
-
const
|
|
123
|
-
const { unchanged } = getChangedFiles(
|
|
259
|
+
const files = entries.filter((e) => e.sourcePath).map((e) => e.sourcePath);
|
|
260
|
+
const { unchanged } = getChangedFiles(files, cache);
|
|
124
261
|
const unchangedSet = new Set(unchanged);
|
|
125
262
|
|
|
126
263
|
const symbols = new Set();
|
|
@@ -129,20 +266,20 @@ function buildLibraryIndex(cwd, opts = {}) {
|
|
|
129
266
|
|
|
130
267
|
for (const e of entries) {
|
|
131
268
|
let names;
|
|
132
|
-
if (!e.
|
|
269
|
+
if (!e.sourcePath) {
|
|
133
270
|
names = [];
|
|
134
|
-
} else if (unchangedSet.has(e.
|
|
135
|
-
names = cache.get(e.
|
|
271
|
+
} else if (unchangedSet.has(e.sourcePath) && cache.get(e.sourcePath)) {
|
|
272
|
+
names = cache.get(e.sourcePath).sigs || [];
|
|
136
273
|
} else {
|
|
137
274
|
let src = '';
|
|
138
275
|
try {
|
|
139
|
-
if (fs.statSync(e.
|
|
276
|
+
if (fs.statSync(e.sourcePath).size <= MAX_DTS_BYTES) src = fs.readFileSync(e.sourcePath, 'utf8');
|
|
140
277
|
} catch (_) { /* unreadable → empty */ }
|
|
141
|
-
names = extractDtsExports(src);
|
|
142
|
-
fresh.push({ file: e.
|
|
278
|
+
names = e.kind === 'py' ? extractPyExports(src) : extractDtsExports(src);
|
|
279
|
+
fresh.push({ file: e.sourcePath, sigs: names });
|
|
143
280
|
}
|
|
144
281
|
for (const n of names) symbols.add(n);
|
|
145
|
-
libraries.push({ name: e.
|
|
282
|
+
libraries.push({ name: e.name, version: e.version, symbols: names.length, typed: !!e.sourcePath });
|
|
146
283
|
}
|
|
147
284
|
|
|
148
285
|
if (useCache && fresh.length) {
|
|
@@ -161,4 +298,7 @@ function formatVersionPins(libraries) {
|
|
|
161
298
|
.map((l) => `${l.name}@${l.version}`);
|
|
162
299
|
}
|
|
163
300
|
|
|
164
|
-
module.exports = {
|
|
301
|
+
module.exports = {
|
|
302
|
+
buildLibraryIndex, extractDtsExports, directDeps, resolveEntry, formatVersionPins,
|
|
303
|
+
extractPyExports, pythonDirectDeps, findSitePackages, resolvePyEntry,
|
|
304
|
+
};
|