sigmap 7.30.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -127,9 +127,13 @@ function formatOutput(sections) {
127
127
  ];
128
128
 
129
129
  const parts = [
130
- { key: 'imports', header: '### Import graph', content: sections.imports },
131
- { key: 'classes', header: '### Class hierarchy', content: sections.classes },
132
- { key: 'routes', header: '### Route table', content: sections.routes },
130
+ { key: 'imports', header: '### Import graph', content: sections.imports },
131
+ { key: 'classes', header: '### Class hierarchy', content: sections.classes },
132
+ { key: 'routes', header: '### Route table', content: sections.routes },
133
+ { key: 'env', header: '### Environment variables', content: sections.env },
134
+ { key: 'buildci', header: '### Build & CI', content: sections.buildci },
135
+ { key: 'manifests', header: '### Config & manifests', content: sections.manifests },
136
+ { key: 'migrations', header: '### Database migrations', content: sections.migrations },
133
137
  ];
134
138
 
135
139
  for (const { header, content } of parts) {
@@ -165,9 +169,13 @@ function main() {
165
169
  }
166
170
 
167
171
  const sections = {
168
- imports: runAnalyzer('import-graph', files, cwd),
169
- classes: runAnalyzer('class-hierarchy', files, cwd),
170
- routes: runAnalyzer('route-table', files, cwd),
172
+ imports: runAnalyzer('import-graph', files, cwd),
173
+ classes: runAnalyzer('class-hierarchy', files, cwd),
174
+ routes: runAnalyzer('route-table', files, cwd),
175
+ env: runAnalyzer('env-schema', files, cwd),
176
+ buildci: runAnalyzer('build-ci', files, cwd),
177
+ manifests: runAnalyzer('config-manifest', files, cwd),
178
+ migrations: runAnalyzer('migrations', files, cwd),
171
179
  };
172
180
 
173
181
  const output = formatOutput(sections);
package/llms-full.txt CHANGED
@@ -11,20 +11,20 @@ ranking keeps the relevant context in scope (cutting tokens ~97% as a side
11
11
  effect), with no LLM calls, embeddings, or vector database. Works with Claude,
12
12
  Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
13
13
 
14
- # Version: 7.30.0 | Benchmark: sigmap-v7.30-main (2026-06-23)
14
+ # Version: 8.0.0 | Benchmark: sigmap-v8.0-main (2026-07-04)
15
15
  # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
16
16
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
17
17
 
18
18
  ---
19
19
 
20
- ## Core metrics (benchmark: sigmap-v7.30-main, 2026-06-23)
20
+ ## Core metrics (benchmark: sigmap-v8.0-main, 2026-07-04)
21
21
 
22
22
  | Metric | Without SigMap | With SigMap |
23
23
  |--------|----------------|-------------|
24
- | Retrieval hit@5 | 13.6% (random) | 75.6% (5.6× lift) |
24
+ | Retrieval hit@5 | 13.6% (random) | 86.7% (6.4× lift) |
25
25
  | Token reduction | — | 97.0% average |
26
- | Task success proxy | 10% | 52.2% |
27
- | Prompts per task | 2.84 | 1.72 (39.4% fewer) |
26
+ | Task success proxy | 10% | 67.8% |
27
+ | Prompts per task | 2.84 | 1.46 (48.8% fewer) |
28
28
  | Supported languages | — | 33 |
29
29
  | MCP tools | — | 17 |
30
30
  | npm runtime dependencies | — | 0 |
package/llms.txt CHANGED
@@ -11,7 +11,7 @@ ranking keeps the relevant context in scope (cutting tokens ~97% as a side
11
11
  effect), with no LLM calls, embeddings, or vector database. Works with Claude,
12
12
  Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
13
13
 
14
- # Version: 7.30.0 | Benchmark: sigmap-v7.30-main (2026-06-23)
14
+ # Version: 8.0.0 | Benchmark: sigmap-v8.0-main (2026-07-04)
15
15
  # Source: auto-generated from package.json, version.json, benchmarks/latest.json, src/mcp/tools.js, src/config/defaults.js
16
16
  # Regenerate: npm run generate:llms | Validate: npm run validate:llms
17
17
 
@@ -23,12 +23,12 @@ Cursor, GitHub Copilot, Aider, Windsurf, local LLMs, and MCP.
23
23
  - No blast-radius awareness before editing a hub file — `--impact` shows every file a change touches.
24
24
  - Pasted stack traces, CI logs, and JSON bloat the prompt — `squeeze` minimizes them and enriches the top frame from the symbol index.
25
25
 
26
- ## Core metrics (benchmark: sigmap-v7.30-main, 2026-06-23)
26
+ ## Core metrics (benchmark: sigmap-v8.0-main, 2026-07-04)
27
27
 
28
- - hit@5 retrieval: 75.6% vs 13.6% random baseline (5.6× lift)
28
+ - hit@5 retrieval: 86.7% vs 13.6% random baseline (6.4× lift)
29
29
  - Token reduction: 97.0% average across benchmark repos
30
- - Task success: 52.2% vs 10% without SigMap
31
- - Prompts per task: 1.72 vs 2.84 baseline (39.4% fewer)
30
+ - Task success: 67.8% vs 10% without SigMap
31
+ - Prompts per task: 1.46 vs 2.84 baseline (48.8% fewer)
32
32
  - Languages: 33 supported · MCP tools: 17
33
33
  - Dependencies: zero npm runtime dependencies · fully offline
34
34
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap",
3
- "version": "7.30.0",
3
+ "version": "8.0.0",
4
4
  "description": "97% token reduction for AI coding. Extracts function & class signatures with TF-IDF ranking to feed only the right files to Claude, Cursor, Copilot, Aider, Windsurf, local LLMs & MCP. Zero dependencies, runs offline via npx.",
5
5
  "main": "packages/core/index.js",
6
6
  "exports": {
@@ -27,6 +27,7 @@
27
27
  "benchmark:matrix": "node scripts/run-benchmark-matrix.mjs --save --skip-clone",
28
28
  "benchmark:verify": "node scripts/run-verify-benchmark.mjs",
29
29
  "benchmark:squeeze": "node scripts/run-squeeze-benchmark.mjs --save",
30
+ "benchmark:test-discovery": "node scripts/run-test-discovery-benchmark.mjs --save",
30
31
  "validate:squeeze": "node scripts/run-squeeze-benchmark.mjs --gate",
31
32
  "health": "node gen-context.js --health",
32
33
  "map": "node gen-project-map.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-cli",
3
- "version": "7.30.0",
3
+ "version": "8.0.0",
4
4
  "description": "SigMap CLI wrapper — thin adapter for programmatic CLI invocation",
5
5
  "main": "index.js",
6
6
  "keywords": [
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sigmap-core",
3
- "version": "7.30.0",
3
+ "version": "8.0.0",
4
4
  "description": "SigMap core library — zero-dependency code signature extraction, retrieval, and security scanning",
5
5
  "main": "index.js",
6
6
  "keywords": [
@@ -20,6 +20,7 @@
20
20
  const fs = require('fs');
21
21
  const path = require('path');
22
22
  const { aggregate } = require('./scorer');
23
+ const { bm25rank } = require('../retrieval/bm25');
23
24
 
24
25
  // ---------------------------------------------------------------------------
25
26
  // Context file reader
@@ -81,79 +82,26 @@ function buildSigIndex(cwd) {
81
82
  }
82
83
 
83
84
  // ---------------------------------------------------------------------------
84
- // Simple keyword-based ranking (pre-retrieval layer; v2.3 adds proper ranker)
85
+ // Identifier-aware BM25 ranking (v7.31; see src/retrieval/bm25.js and #395)
85
86
  // ---------------------------------------------------------------------------
86
87
 
87
- /**
88
- * Tokenize a query or signature into lower-case word tokens.
89
- * Splits on whitespace, punctuation, camelCase, and snake_case.
90
- * @param {string} text
91
- * @returns {string[]}
92
- */
93
- function tokenize(text) {
94
- if (!text) return [];
95
- return text
96
- // split camelCase
97
- .replace(/([a-z])([A-Z])/g, '$1 $2')
98
- // split snake/kebab
99
- .replace(/[_\-]/g, ' ')
100
- // drop non-word chars
101
- .replace(/[^\w\s]/g, ' ')
102
- .toLowerCase()
103
- .split(/\s+/)
104
- .filter((t) => t.length > 1);
105
- }
106
-
107
- const STOP_WORDS = new Set([
108
- 'the', 'a', 'an', 'in', 'of', 'to', 'for', 'and', 'or', 'is', 'are',
109
- 'that', 'this', 'it', 'with', 'from', 'by', 'be', 'as', 'on', 'at',
110
- ]);
111
-
112
- /**
113
- * Score a single file's signatures against a query.
114
- * Returns a non-negative number; higher = more relevant.
115
- * @param {string[]} sigs - array of signature strings for this file
116
- * @param {string[]} queryTokens
117
- * @returns {number}
118
- */
119
- function scoreFile(sigs, queryTokens) {
120
- if (!sigs || sigs.length === 0) return 0;
121
-
122
- const sigText = sigs.join(' ');
123
- const sigTokens = new Set(tokenize(sigText));
124
-
125
- let score = 0;
126
- for (const qt of queryTokens) {
127
- if (STOP_WORDS.has(qt)) continue;
128
- if (sigTokens.has(qt)) score += 1;
129
- // Partial match (prefix)
130
- for (const st of sigTokens) {
131
- if (st !== qt && st.startsWith(qt) && qt.length >= 4) score += 0.3;
132
- }
133
- }
134
-
135
- return score;
136
- }
88
+ const { tokenize } = require('../retrieval/bm25');
137
89
 
138
90
  /**
139
- * Rank all files in the index against a query. Returns file paths sorted
140
- * by relevance score descending. Ties are broken by file path alphabetically.
91
+ * Rank all files in the index against a query with the identifier-aware BM25
92
+ * re-ranker. Returns file entries sorted by relevance score descending; ties
93
+ * are broken by file path alphabetically (deterministic).
141
94
  * @param {string} query
142
95
  * @param {Map<string, string[]>} index
143
96
  * @param {number} topK
144
97
  * @returns {{ file: string, score: number, sigs: string[] }[]}
145
98
  */
146
99
  function rank(query, index, topK = 10) {
147
- const queryTokens = tokenize(query);
148
- const scored = [];
149
-
100
+ const candidates = [];
150
101
  for (const [file, sigs] of index.entries()) {
151
- const score = scoreFile(sigs, queryTokens);
152
- scored.push({ file, score, sigs });
102
+ candidates.push({ file, sigs });
153
103
  }
154
-
155
- scored.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
156
- return scored.slice(0, topK);
104
+ return bm25rank(query, candidates).slice(0, topK);
157
105
  }
158
106
 
159
107
  // ---------------------------------------------------------------------------
@@ -33,7 +33,14 @@ const DEFAULT_TOP = 12;
33
33
  const GENERATED_RE = /(^|\/)(dist|build|out|vendor|node_modules)\/|\.(generated|min|bundle)\.|\.(pb|_pb)\.|\.pb\.go$|_pb2\.py$/;
34
34
  const TEST_RE = /(^|\/)(tests?|__tests__|spec|specs)\/|\.(test|spec)\.[a-z]+$|(^|\/)test_[^/]+\.py$|_test\.(go|py|rb)$/;
35
35
  const CONFIG_RE = /\.(json|ya?ml|toml|ini|conf|config|properties|env)$|(^|\/)(\.?[a-z]+rc)$|\.config\.[a-z]+$/i;
36
- const SECURITY_RE = /(^|\/|[._-])(auth|authn|authz|login|password|passwd|secret|credential|token|session|crypto|cipher|payment|billing|checkout|oauth|jwt|permission|acl|rbac)([._-]|\/|$)/i;
36
+ // DB migrations: framework dirs (Rails/Alembic/Prisma), Flyway `V1__x.sql`,
37
+ // timestamped migration files, and `*_migration.*` naming.
38
+ const MIGRATION_RE = /(^|\/)(migrations?|alembic\/versions|prisma\/migrations)(\/|$)|(^|\/)db\/migrate\/|(^|\/)V\d+(_\d+)*__[^/]+\.(sql|java)$|(^|\/)\d{8,}[_-][^/]+\.(sql|rb|py|js|ts)$|[._-]migration[s]?[._-]/i;
39
+ const PAYMENT_RE = /(^|\/|[._-])(payment|payments|billing|checkout|invoice|invoicing|subscription|stripe|paypal|braintree|charge|refund|payout)([._-]|\/|$)/i;
40
+ const AUTH_RE = /(^|\/|[._-])(auth|authn|authz|login|logout|signin|signup|password|passwd|session|oauth|jwt|permission|permissions|acl|rbac|credential|credentials)([._-]|\/|$)/i;
41
+ const SECURITY_RE = /(^|\/|[._-])(secret|secrets|crypto|cipher|encrypt|decrypt|token|signing|keystore|vault)([._-]|\/|$)/i;
42
+ // Public API surface: `api/` dirs, `public-api`, and module barrel entrypoints.
43
+ const PUBLIC_API_RE = /(^|\/)api(\/|$)|(^|\/)public[-_]?api(\/|$)|(^|\/)index\.(js|ts|mjs|cjs)$/i;
37
44
 
38
45
  /**
39
46
  * Split a signature's ` :start-end` line anchor from its symbol text.
@@ -51,17 +58,25 @@ function parseAnchor(sig) {
51
58
  }
52
59
 
53
60
  /**
54
- * Classify a file into a coarse risk label. Path-based heuristic (v1) — the
55
- * richer label set (C3) lands in v8.5.
61
+ * Classify a file into a risk label (C3, v8.5). Path-based, deterministic.
62
+ * Precedence is strict, most-specific-risk first: a migration touching payments
63
+ * is labeled `migration` (a schema change is the dominant risk), payment/auth
64
+ * outrank the generic `security` bucket, and `config`/`public-api` resolve
65
+ * before the `source` fallback. `test`/`generated` semantics are preserved so
66
+ * existing consumers (findRelatedTests, verifier) keep working.
56
67
  * @param {string} relPath
57
- * @returns {'generated'|'test'|'config'|'security'|'source'}
68
+ * @returns {'generated'|'test'|'migration'|'payment'|'auth'|'security'|'config'|'public-api'|'source'}
58
69
  */
59
70
  function riskLabelFor(relPath) {
60
71
  const p = relPath.replace(/\\/g, '/');
61
72
  if (GENERATED_RE.test(p)) return 'generated';
62
73
  if (TEST_RE.test(p)) return 'test';
74
+ if (MIGRATION_RE.test(p)) return 'migration';
75
+ if (PAYMENT_RE.test(p)) return 'payment';
76
+ if (AUTH_RE.test(p)) return 'auth';
63
77
  if (SECURITY_RE.test(p)) return 'security';
64
78
  if (CONFIG_RE.test(p)) return 'config';
79
+ if (PUBLIC_API_RE.test(p)) return 'public-api';
65
80
  return 'source';
66
81
  }
67
82
 
@@ -72,9 +87,28 @@ function stemOf(relPath) {
72
87
  }
73
88
 
74
89
  /**
75
- * Best-effort impl→test discovery (v1). Matches test files whose stem equals
76
- * the implementation file's stem, by common convention. Deterministic. The
77
- * accuracy-measured discovery (C2) lands in v8.5.
90
+ * Infer the implementation stem a test file targets, by stripping the
91
+ * conventional test affixes across languages (measured in the C2 benchmark):
92
+ * foo.test.js / foo.spec.ts → foo (JS/TS)
93
+ * test_foo.py → foo (Python / pytest)
94
+ * foo_test.go / foo_test.py → foo (Go, unittest)
95
+ * FooTest.java / BarSpec.scala → Foo (JVM, PascalCase)
96
+ * @param {string} relPath
97
+ * @returns {string}
98
+ */
99
+ function testTargetStem(relPath) {
100
+ let s = stemOf(relPath); // strips ext + trailing .test/.spec
101
+ s = s.replace(/^test[_-]/i, ''); // Python: test_foo
102
+ s = s.replace(/[_-]test$/i, ''); // Go / unittest: foo_test
103
+ s = s.replace(/(Tests?|Specs?)$/, ''); // JVM PascalCase: FooTest, BarSpec
104
+ return s;
105
+ }
106
+
107
+ /**
108
+ * Impl→test discovery (C2, v8.5). Matches test files back to their
109
+ * implementation by normalizing conventional test affixes, so JS/TS, Python,
110
+ * Go, and JVM naming conventions all resolve. Deterministic; accuracy is
111
+ * measured by `scripts/run-test-discovery-benchmark.mjs`.
78
112
  * @param {string} relPath
79
113
  * @param {string[]} allFiles - universe of indexed files (relative paths)
80
114
  * @returns {string[]}
@@ -87,7 +121,7 @@ function findRelatedTests(relPath, allFiles) {
87
121
  for (const f of allFiles) {
88
122
  if (f === relPath) continue;
89
123
  if (riskLabelFor(f) !== 'test') continue;
90
- if (stemOf(f).toLowerCase() === stem) out.push(f);
124
+ if (testTargetStem(f).toLowerCase() === stem) out.push(f);
91
125
  }
92
126
  return out.sort();
93
127
  }
@@ -0,0 +1,91 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Build & CI extractor (v8.5 C1).
5
+ *
6
+ * Surfaces how the project is built and validated: npm/pnpm/yarn scripts
7
+ * (package.json), GitHub Actions workflows (.github/workflows/*.yml), and
8
+ * Makefile targets. Pure, zero-dependency, deterministic.
9
+ *
10
+ * @param {string[]} files — absolute file paths (unused; roots are read directly)
11
+ * @param {string} cwd — project root
12
+ * @returns {string} formatted markdown table (empty string if none found)
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+
18
+ const MAX_ROWS = 120;
19
+
20
+ function readJson(p) {
21
+ try { return JSON.parse(fs.readFileSync(p, 'utf8')); } catch (_) { return null; }
22
+ }
23
+
24
+ function npmScripts(cwd, rows) {
25
+ const pkg = readJson(path.join(cwd, 'package.json'));
26
+ if (!pkg || !pkg.scripts || typeof pkg.scripts !== 'object') return;
27
+ for (const name of Object.keys(pkg.scripts).sort()) {
28
+ rows.push({ kind: 'script', name, detail: 'npm run ' + name });
29
+ }
30
+ }
31
+
32
+ function ciWorkflows(cwd, rows) {
33
+ const dir = path.join(cwd, '.github', 'workflows');
34
+ let entries;
35
+ try { entries = fs.readdirSync(dir); } catch (_) { return; }
36
+ for (const file of entries.sort()) {
37
+ if (!/\.ya?ml$/i.test(file)) continue;
38
+ let content;
39
+ try { content = fs.readFileSync(path.join(dir, file), 'utf8'); } catch (_) { continue; }
40
+ const nameMatch = content.match(/^name:\s*(.+)$/m);
41
+ const name = nameMatch ? nameMatch[1].trim().replace(/^['"]|['"]$/g, '') : file;
42
+ // Trigger events from an `on:` mapping or inline form.
43
+ const onMatch = content.match(/^on:\s*(.*)$/m);
44
+ let triggers = '';
45
+ if (onMatch) {
46
+ if (onMatch[1].trim()) {
47
+ triggers = onMatch[1].replace(/[[\]{}'"]/g, '').trim();
48
+ } else {
49
+ const block = content.slice(onMatch.index);
50
+ const events = [...block.matchAll(/^\s{2,}([a-z_]+):/gm)].map((m) => m[1]);
51
+ triggers = [...new Set(events)].slice(0, 6).join(', ');
52
+ }
53
+ }
54
+ rows.push({ kind: 'ci', name, detail: `${file}${triggers ? ' — ' + triggers : ''}` });
55
+ }
56
+ }
57
+
58
+ function makeTargets(cwd, rows) {
59
+ let content;
60
+ try { content = fs.readFileSync(path.join(cwd, 'Makefile'), 'utf8'); } catch (_) { return; }
61
+ const targets = [];
62
+ for (const line of content.split('\n')) {
63
+ const m = line.match(/^([a-zA-Z0-9_][a-zA-Z0-9_.-]*)\s*:(?!=)/);
64
+ if (m && m[1] !== '.PHONY') targets.push(m[1]);
65
+ }
66
+ for (const t of [...new Set(targets)].sort()) {
67
+ rows.push({ kind: 'make', name: t, detail: 'make ' + t });
68
+ }
69
+ }
70
+
71
+ function analyze(files, cwd) {
72
+ const rows = [];
73
+ npmScripts(cwd, rows);
74
+ ciWorkflows(cwd, rows);
75
+ makeTargets(cwd, rows);
76
+ if (rows.length === 0) return '';
77
+
78
+ const lines = [
79
+ '| Kind | Name | Detail |',
80
+ '|------|------|--------|',
81
+ ];
82
+ for (const r of rows.slice(0, MAX_ROWS)) {
83
+ lines.push(`| ${r.kind} | ${r.name} | ${r.detail} |`);
84
+ }
85
+ if (rows.length > MAX_ROWS) {
86
+ lines.push(`| … | | +${rows.length - MAX_ROWS} more |`);
87
+ }
88
+ return lines.join('\n');
89
+ }
90
+
91
+ module.exports = { analyze };
@@ -0,0 +1,101 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Config & package-manifest extractor (v8.5 C1).
5
+ *
6
+ * Surfaces the project's package manifests (name / version / dependency counts)
7
+ * across ecosystems and the notable root config files present. Pure,
8
+ * zero-dependency, deterministic.
9
+ *
10
+ * @param {string[]} files — absolute file paths (unused; roots are read directly)
11
+ * @param {string} cwd — project root
12
+ * @returns {string} formatted markdown table (empty string if none found)
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+
18
+ const CONFIG_FILES = [
19
+ 'tsconfig.json', 'jsconfig.json', '.eslintrc', '.eslintrc.json', '.eslintrc.js',
20
+ '.prettierrc', 'babel.config.js', 'jest.config.js', 'vitest.config.ts',
21
+ 'webpack.config.js', 'vite.config.ts', 'rollup.config.js', 'tailwind.config.js',
22
+ 'docker-compose.yml', 'docker-compose.yaml', 'Dockerfile', '.editorconfig',
23
+ ];
24
+
25
+ function readText(p) { try { return fs.readFileSync(p, 'utf8'); } catch (_) { return null; } }
26
+ function readJson(p) { try { return JSON.parse(fs.readFileSync(p, 'utf8')); } catch (_) { return null; } }
27
+ function count(obj) { return obj && typeof obj === 'object' ? Object.keys(obj).length : 0; }
28
+
29
+ function manifests(cwd, rows) {
30
+ const pkg = readJson(path.join(cwd, 'package.json'));
31
+ if (pkg) {
32
+ const deps = count(pkg.dependencies);
33
+ const dev = count(pkg.devDependencies);
34
+ const id = [pkg.name, pkg.version].filter(Boolean).join('@') || 'package.json';
35
+ rows.push({ manifest: 'package.json (npm)', detail: `${id} · ${deps} deps, ${dev} devDeps` });
36
+ }
37
+
38
+ const pyproject = readText(path.join(cwd, 'pyproject.toml'));
39
+ if (pyproject) {
40
+ const name = (pyproject.match(/^\s*name\s*=\s*["']([^"']+)["']/m) || [])[1];
41
+ const ver = (pyproject.match(/^\s*version\s*=\s*["']([^"']+)["']/m) || [])[1];
42
+ rows.push({ manifest: 'pyproject.toml (python)', detail: [name, ver].filter(Boolean).join('@') || 'present' });
43
+ } else if (readText(path.join(cwd, 'setup.py'))) {
44
+ rows.push({ manifest: 'setup.py (python)', detail: 'present' });
45
+ }
46
+ if (readText(path.join(cwd, 'requirements.txt'))) {
47
+ rows.push({ manifest: 'requirements.txt (python)', detail: 'present' });
48
+ }
49
+
50
+ const cargo = readText(path.join(cwd, 'Cargo.toml'));
51
+ if (cargo) {
52
+ const name = (cargo.match(/^\s*name\s*=\s*["']([^"']+)["']/m) || [])[1];
53
+ const ver = (cargo.match(/^\s*version\s*=\s*["']([^"']+)["']/m) || [])[1];
54
+ rows.push({ manifest: 'Cargo.toml (rust)', detail: [name, ver].filter(Boolean).join('@') || 'present' });
55
+ }
56
+
57
+ const gomod = readText(path.join(cwd, 'go.mod'));
58
+ if (gomod) {
59
+ const mod = (gomod.match(/^module\s+(\S+)/m) || [])[1];
60
+ const go = (gomod.match(/^go\s+(\S+)/m) || [])[1];
61
+ rows.push({ manifest: 'go.mod (go)', detail: [mod, go && 'go ' + go].filter(Boolean).join(' · ') || 'present' });
62
+ }
63
+
64
+ if (readText(path.join(cwd, 'pom.xml'))) rows.push({ manifest: 'pom.xml (maven)', detail: 'present' });
65
+ if (readText(path.join(cwd, 'build.gradle')) || readText(path.join(cwd, 'build.gradle.kts'))) {
66
+ rows.push({ manifest: 'build.gradle (gradle)', detail: 'present' });
67
+ }
68
+ if (readText(path.join(cwd, 'Gemfile'))) rows.push({ manifest: 'Gemfile (ruby)', detail: 'present' });
69
+ const composer = readJson(path.join(cwd, 'composer.json'));
70
+ if (composer) {
71
+ rows.push({ manifest: 'composer.json (php)', detail: `${composer.name || 'present'} · ${count(composer.require)} deps` });
72
+ }
73
+ }
74
+
75
+ function configFiles(cwd) {
76
+ const present = [];
77
+ for (const f of CONFIG_FILES) {
78
+ if (fs.existsSync(path.join(cwd, f))) present.push(f);
79
+ }
80
+ return present;
81
+ }
82
+
83
+ function analyze(files, cwd) {
84
+ const rows = [];
85
+ manifests(cwd, rows);
86
+ const configs = configFiles(cwd);
87
+ if (rows.length === 0 && configs.length === 0) return '';
88
+
89
+ const lines = [];
90
+ if (rows.length) {
91
+ lines.push('| Manifest | Detail |', '|----------|--------|');
92
+ for (const r of rows) lines.push(`| ${r.manifest} | ${r.detail} |`);
93
+ }
94
+ if (configs.length) {
95
+ if (lines.length) lines.push('');
96
+ lines.push(`**Config files:** ${configs.map((c) => '`' + c + '`').join(', ')}`);
97
+ }
98
+ return lines.join('\n');
99
+ }
100
+
101
+ module.exports = { analyze };
@@ -0,0 +1,90 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Environment-variable schema extractor (v8.5 C1).
5
+ *
6
+ * Surfaces the environment the project actually reads — from source across
7
+ * JS/TS, Python, Ruby, and Go, plus keys declared in a committed `.env.example`
8
+ * / `.env.sample` / `.env.template`. Pure, zero-dependency, deterministic.
9
+ *
10
+ * @param {string[]} files — absolute file paths to analyze (srcDirs-scoped)
11
+ * @param {string} cwd — project root
12
+ * @returns {string} formatted markdown table (empty string if none found)
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+
18
+ const SCAN_EXTS = new Set(['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.py', '.rb', '.go']);
19
+ const EXAMPLE_FILES = ['.env.example', '.env.sample', '.env.template', '.env.dist'];
20
+
21
+ // process.env.X / process.env['X'] / import.meta.env.X / Deno.env.get('X')
22
+ const JS_RE = /(?:process\.env|import\.meta\.env)(?:\.([A-Z_][A-Z0-9_]*)|\[\s*['"]([A-Z_][A-Z0-9_]*)['"]\s*\])|Deno\.env\.get\(\s*['"]([A-Z_][A-Z0-9_]*)['"]/g;
23
+ // os.environ['X'] / os.environ.get('X') / os.getenv('X') / getenv('X')
24
+ const PY_RE = /(?:os\.)?(?:environ(?:\.get)?\[?\s*['"]([A-Z_][A-Z0-9_]*)['"]|getenv\(\s*['"]([A-Z_][A-Z0-9_]*)['"])/g;
25
+ const RB_RE = /ENV\[\s*['"]([A-Z_][A-Z0-9_]*)['"]\s*\]/g;
26
+ const GO_RE = /os\.(?:Getenv|LookupEnv)\(\s*["`']([A-Z_][A-Z0-9_]*)["`']/g;
27
+
28
+ const MAX_ROWS = 200;
29
+
30
+ function collectMatches(re, content, into) {
31
+ let m;
32
+ re.lastIndex = 0;
33
+ while ((m = re.exec(content)) !== null) {
34
+ const name = m[1] || m[2] || m[3];
35
+ if (name) into.add(name);
36
+ }
37
+ }
38
+
39
+ function readExampleKeys(cwd) {
40
+ const keys = new Set();
41
+ for (const name of EXAMPLE_FILES) {
42
+ let content;
43
+ try { content = fs.readFileSync(path.join(cwd, name), 'utf8'); } catch (_) { continue; }
44
+ for (const line of content.split('\n')) {
45
+ const t = line.trim();
46
+ if (!t || t.startsWith('#')) continue;
47
+ const eq = t.match(/^(?:export\s+)?([A-Z_][A-Z0-9_]*)\s*=/);
48
+ if (eq) keys.add(eq[1]);
49
+ }
50
+ }
51
+ return keys;
52
+ }
53
+
54
+ function analyze(files, cwd) {
55
+ const fromCode = new Set();
56
+
57
+ for (const filePath of files) {
58
+ const ext = path.extname(filePath).toLowerCase();
59
+ if (!SCAN_EXTS.has(ext)) continue;
60
+ let content;
61
+ try { content = fs.readFileSync(filePath, 'utf8'); } catch (_) { continue; }
62
+
63
+ if (ext === '.py') collectMatches(PY_RE, content, fromCode);
64
+ else if (ext === '.rb') collectMatches(RB_RE, content, fromCode);
65
+ else if (ext === '.go') collectMatches(GO_RE, content, fromCode);
66
+ else collectMatches(JS_RE, content, fromCode);
67
+ }
68
+
69
+ const fromExample = readExampleKeys(cwd);
70
+ const all = new Set([...fromCode, ...fromExample]);
71
+ if (all.size === 0) return '';
72
+
73
+ const names = [...all].sort();
74
+ const lines = [
75
+ '| Variable | Source |',
76
+ '|----------|--------|',
77
+ ];
78
+ for (const name of names.slice(0, MAX_ROWS)) {
79
+ const src = [];
80
+ if (fromCode.has(name)) src.push('code');
81
+ if (fromExample.has(name)) src.push('.env.example');
82
+ lines.push(`| ${name} | ${src.join(', ')} |`);
83
+ }
84
+ if (names.length > MAX_ROWS) {
85
+ lines.push(`| … | +${names.length - MAX_ROWS} more |`);
86
+ }
87
+ return lines.join('\n');
88
+ }
89
+
90
+ module.exports = { analyze };