gitlumen-screen-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,86 @@
1
+ # gitlumen-screen-sdk
2
+
3
+ JavaScript SDK for screening public GitHub repositories and pull requests with GitLumen heuristic analysis.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install gitlumen-screen-sdk
9
+ ```
10
+
11
+ For local development from sibling folder:
12
+
13
+ ```bash
14
+ npm install ../gitlumen-screen-sdk
15
+ ```
16
+
17
+ ## Quick Start
18
+
19
+ ```js
20
+ import { GitLumenScreenSDK } from 'gitlumen-screen-sdk';
21
+
22
+ const sdk = new GitLumenScreenSDK({
23
+ githubToken: process.env.GITHUB_TOKEN,
24
+ dataDir: '.gitlumen-screen-sdk'
25
+ });
26
+
27
+ const report = await sdk.screenRepository(
28
+ {
29
+ repoUrl: 'https://github.com/owner/repo',
30
+ scope: 'standard'
31
+ },
32
+ 'compact'
33
+ );
34
+
35
+ console.log(report);
36
+ ```
37
+
38
+ ## API
39
+
40
+ ### new GitLumenScreenSDK(options)
41
+
42
+ Options:
43
+ - githubToken: optional GitHub token
44
+ - dataDir: local directory for report storage
45
+ - maxFileBytes: max file bytes fetched from raw GitHub file content
46
+ - githubApiBase: override GitHub API base URL
47
+ - rawBase: override raw content base URL
48
+ - userAgent: custom user agent string
49
+
50
+ ### screenRepository(input, output)
51
+
52
+ - input.repoUrl: required, repository or PR URL
53
+ - input.scope: quick | standard | deep
54
+ - input.branch: optional branch/ref
55
+ - input.maxFiles: optional cap for downloaded files
56
+ - output: compact | markdown | json
57
+
58
+ Returns:
59
+ - compact: reduced JSON object
60
+ - markdown: markdown string
61
+ - json: full report object
62
+
63
+ ### getReviewReport(reportId, output)
64
+
65
+ Read previously generated report by id.
66
+
67
+ ### listReviewReports(limit)
68
+
69
+ List stored reports from local data directory.
70
+
71
+ ### getRepositoryStructure(input)
72
+
73
+ Get tree structure summary without generating a full report.
74
+
75
+ ## Low-level exports
76
+
77
+ The package also exports:
78
+ - GitLumenService
79
+ - GitHubClient
80
+ - ReportStore
81
+ - reportCompact
82
+ - formatReportOutput
83
+
84
+ ## Requirements
85
+
86
+ - Node.js 20+
@@ -0,0 +1,16 @@
1
+ import { GitLumenScreenSDK } from '../src/index.js';
2
+
3
+ const sdk = new GitLumenScreenSDK({
4
+ // githubToken: process.env.GITHUB_TOKEN,
5
+ dataDir: '.gitlumen-screen-sdk'
6
+ });
7
+
8
+ const result = await sdk.screenRepository(
9
+ {
10
+ repoUrl: 'https://github.com/modelcontextprotocol/typescript-sdk',
11
+ scope: 'quick'
12
+ },
13
+ 'compact'
14
+ );
15
+
16
+ console.log(result);
package/package.json ADDED
@@ -0,0 +1,25 @@
1
+ {
2
+ "name": "gitlumen-screen-sdk",
3
+ "version": "0.1.0",
4
+ "description": "JavaScript SDK for GitLumen repository and pull request screening.",
5
+ "type": "module",
6
+ "main": "./src/index.js",
7
+ "exports": {
8
+ ".": "./src/index.js"
9
+ },
10
+ "keywords": [
11
+ "gitlumen",
12
+ "github",
13
+ "screening",
14
+ "sdk",
15
+ "risk-analysis"
16
+ ],
17
+ "author": "GitLumen",
18
+ "license": "MIT",
19
+ "engines": {
20
+ "node": ">=20.0.0"
21
+ },
22
+ "scripts": {
23
+ "example": "node ./examples/basic-usage.js"
24
+ }
25
+ }
package/src/config.js ADDED
@@ -0,0 +1,20 @@
1
+ import path from 'node:path';
2
+
3
+ export function createConfig(overrides = {}) {
4
+ return {
5
+ githubToken: overrides.githubToken ?? process.env.GITHUB_TOKEN ?? '',
6
+ dataDir: overrides.dataDir ?? path.resolve(process.cwd(), process.env.GITLUMEN_SCREEN_DATA_DIR || '.gitlumen-screen-sdk'),
7
+ maxFileBytes: overrides.maxFileBytes ?? Number.parseInt(process.env.GITLUMEN_MAX_FILE_BYTES || '120000', 10),
8
+ githubApiBase: overrides.githubApiBase ?? 'https://api.github.com',
9
+ rawBase: overrides.rawBase ?? 'https://raw.githubusercontent.com',
10
+ userAgent: overrides.userAgent ?? 'gitlumen-screen-sdk/0.1.0'
11
+ };
12
+ }
13
+
14
+ export const DEFAULT_LIMITS = {
15
+ maxTreeEntries: 2500,
16
+ quickMaxFiles: 40,
17
+ standardMaxFiles: 90,
18
+ deepMaxFiles: 180,
19
+ maxPrFiles: 300
20
+ };
@@ -0,0 +1,21 @@
1
+ import { truncateText } from './utils/text.js';
2
+
3
+ export function reportCompact(report) {
4
+ return {
5
+ reportId: report.reportId,
6
+ generatedAt: report.generatedAt,
7
+ target: report.target,
8
+ risk: report.risk,
9
+ summary: report.summary,
10
+ findings: report.findings.slice(0, 20),
11
+ decisionQuestions: report.decisionQuestions,
12
+ recommendations: report.recommendations,
13
+ markdown: truncateText(report.markdown, 12000)
14
+ };
15
+ }
16
+
17
+ export function formatReportOutput(report, output = 'compact') {
18
+ if (output === 'markdown') return report.markdown;
19
+ if (output === 'json') return report;
20
+ return reportCompact(report);
21
+ }
package/src/index.js ADDED
@@ -0,0 +1,62 @@
1
+ import { GitLumenService } from './services/gitlumen.js';
2
+ import { GitHubClient } from './services/github.js';
3
+ import { ReportStore } from './services/reportStore.js';
4
+ import { formatReportOutput, reportCompact } from './formatters.js';
5
+
6
+ export class GitLumenScreenSDK {
7
+ constructor(options = {}) {
8
+ const githubOptions = {
9
+ token: options.githubToken,
10
+ dataDir: options.dataDir,
11
+ maxFileBytes: options.maxFileBytes,
12
+ githubApiBase: options.githubApiBase,
13
+ rawBase: options.rawBase,
14
+ userAgent: options.userAgent
15
+ };
16
+
17
+ const storeOptions = {
18
+ dataDir: options.dataDir,
19
+ config: {
20
+ dataDir: options.dataDir
21
+ }
22
+ };
23
+
24
+ this.service = options.service || new GitLumenService({
25
+ githubClient: options.githubClient,
26
+ store: options.store,
27
+ githubOptions,
28
+ storeOptions
29
+ });
30
+ }
31
+
32
+ async screenRepository(input, output = 'compact') {
33
+ const report = await this.service.screenRepository(input);
34
+ return formatReportOutput(report, output);
35
+ }
36
+
37
+ async getReviewReport(reportId, output = 'compact') {
38
+ const report = await this.service.getReport(reportId);
39
+ return formatReportOutput(report, output);
40
+ }
41
+
42
+ async listReviewReports(limit = 20) {
43
+ return this.service.listReports(limit);
44
+ }
45
+
46
+ async getRepositoryStructure(input) {
47
+ return this.service.getRepositoryStructure(input);
48
+ }
49
+ }
50
+
51
+ export async function screenRepository(input, options = {}) {
52
+ const sdk = new GitLumenScreenSDK(options);
53
+ return sdk.screenRepository(input, options.output || 'compact');
54
+ }
55
+
56
+ export {
57
+ GitLumenService,
58
+ GitHubClient,
59
+ ReportStore,
60
+ reportCompact,
61
+ formatReportOutput
62
+ };
@@ -0,0 +1,589 @@
1
+ import { CategoryNames, SeverityWeights } from '../types.js';
2
+ import { createReportId, stableHash } from '../utils/ids.js';
3
+ import { unique } from '../utils/text.js';
4
+
5
+ function normalizePath(path) {
6
+ return path.toLowerCase().replace(/\\/g, '/');
7
+ }
8
+
9
+ function ext(path) {
10
+ const lower = normalizePath(path);
11
+ const idx = lower.lastIndexOf('.');
12
+ return idx >= 0 ? lower.slice(idx) : '';
13
+ }
14
+
15
+ function base(path) {
16
+ return normalizePath(path).split('/').pop();
17
+ }
18
+
19
+ function hasPath(paths, matcher) {
20
+ return paths.some((p) => matcher(normalizePath(p)));
21
+ }
22
+
23
+ function countBy(paths, matcher) {
24
+ return paths.filter((p) => matcher(normalizePath(p))).length;
25
+ }
26
+
27
+ function findPackageJson(snapshot) {
28
+ return snapshot.files.find((file) => base(file.path) === 'package.json');
29
+ }
30
+
31
+ function parseJsonSafe(text) {
32
+ try {
33
+ return JSON.parse(text);
34
+ } catch {
35
+ return null;
36
+ }
37
+ }
38
+
39
+ function addFinding(findings, { category, severity = 'medium', title, evidence = [], recommendation, file, confidence = 'medium' }) {
40
+ findings.push({
41
+ id: `glf_${stableHash(`${category}|${severity}|${title}|${file || ''}`, 10)}`,
42
+ category,
43
+ severity,
44
+ title,
45
+ evidence: evidence.filter(Boolean).slice(0, 6),
46
+ recommendation,
47
+ file: file || null,
48
+ confidence
49
+ });
50
+ }
51
+
52
+ function detectLanguages(paths) {
53
+ const counts = new Map();
54
+ const map = {
55
+ '.js': 'JavaScript', '.jsx': 'JavaScript', '.mjs': 'JavaScript', '.cjs': 'JavaScript',
56
+ '.ts': 'TypeScript', '.tsx': 'TypeScript',
57
+ '.py': 'Python', '.go': 'Go', '.rs': 'Rust', '.java': 'Java', '.kt': 'Kotlin',
58
+ '.rb': 'Ruby', '.php': 'PHP', '.cs': 'C#', '.sol': 'Solidity', '.vy': 'Vyper',
59
+ '.sh': 'Shell', '.tf': 'Terraform', '.yaml': 'YAML', '.yml': 'YAML'
60
+ };
61
+ for (const path of paths) {
62
+ const lang = map[ext(path)];
63
+ if (lang) counts.set(lang, (counts.get(lang) || 0) + 1);
64
+ }
65
+ return [...counts.entries()].sort((a, b) => b[1] - a[1]).map(([language, count]) => ({ language, count }));
66
+ }
67
+
68
+ function detectFrameworks(snapshot, packageJson) {
69
+ const paths = snapshot.tree.map((entry) => normalizePath(entry.path));
70
+ const deps = packageJson ? {
71
+ ...packageJson.dependencies,
72
+ ...packageJson.devDependencies,
73
+ ...packageJson.peerDependencies
74
+ } : {};
75
+ const names = new Set(Object.keys(deps || {}));
76
+ const frameworks = [];
77
+
78
+ if (names.has('next') || hasPath(paths, (p) => p.includes('next.config'))) frameworks.push('Next.js');
79
+ if (names.has('react')) frameworks.push('React');
80
+ if (names.has('vite') || hasPath(paths, (p) => p.includes('vite.config'))) frameworks.push('Vite');
81
+ if (names.has('express')) frameworks.push('Express');
82
+ if (names.has('fastify')) frameworks.push('Fastify');
83
+ if (names.has('hardhat') || hasPath(paths, (p) => p.includes('hardhat.config'))) frameworks.push('Hardhat');
84
+ if (hasPath(paths, (p) => p === 'foundry.toml')) frameworks.push('Foundry');
85
+ if (hasPath(paths, (p) => p === 'go.mod')) frameworks.push('Go modules');
86
+ if (hasPath(paths, (p) => p === 'pyproject.toml')) frameworks.push('Python/pyproject');
87
+ if (hasPath(paths, (p) => p === 'dockerfile' || p.endsWith('/dockerfile'))) frameworks.push('Docker');
88
+ if (hasPath(paths, (p) => p.includes('.github/workflows/'))) frameworks.push('GitHub Actions');
89
+
90
+ return unique(frameworks);
91
+ }
92
+
93
+ function analyzeStructure(snapshot, findings) {
94
+ const paths = snapshot.tree.map((entry) => normalizePath(entry.path));
95
+ const fileCount = snapshot.tree.filter((entry) => entry.type === 'blob').length;
96
+ const dirCount = snapshot.tree.filter((entry) => entry.type === 'tree').length;
97
+
98
+ const hasReadme = hasPath(paths, (p) => base(p) === 'readme.md' || base(p) === 'readme');
99
+ const hasLicense = hasPath(paths, (p) => base(p) === 'license' || base(p).startsWith('license.'));
100
+ const hasEnvExample = hasPath(paths, (p) => ['.env.example', '.env.sample', '.env.template'].includes(base(p)));
101
+ const hasCI = hasPath(paths, (p) => p.startsWith('.github/workflows/') || p.includes('/.github/workflows/'));
102
+ const hasDocker = hasPath(paths, (p) => base(p) === 'dockerfile' || base(p) === 'docker-compose.yml' || base(p) === 'compose.yml');
103
+ const hasTests = hasPath(paths, (p) => p.includes('/test/') || p.includes('/tests/') || p.includes('/__tests__/') || p.endsWith('.test.js') || p.endsWith('.spec.js') || p.endsWith('.test.ts') || p.endsWith('.spec.ts') || p.endsWith('_test.go') || p.endsWith('_test.py'));
104
+
105
+ if (!hasReadme) {
106
+ addFinding(findings, {
107
+ category: 'maintainability', severity: 'medium', title: 'README not detected',
108
+ evidence: ['No README file found in the repository root tree.'],
109
+ recommendation: 'Add a README that explains project purpose, setup, environment variables, run commands, and review surface.'
110
+ });
111
+ }
112
+
113
+ if (!hasLicense) {
114
+ addFinding(findings, {
115
+ category: 'maintainability', severity: 'low', title: 'License file not detected',
116
+ evidence: ['No LICENSE file found in the repository root tree.'],
117
+ recommendation: 'Add a license file so repository usage and contribution terms are clear.'
118
+ });
119
+ }
120
+
121
+ if (!hasEnvExample && snapshot.files.some((f) => /process\.env|import\.meta\.env|os\.environ|getenv\(/.test(f.content))) {
122
+ addFinding(findings, {
123
+ category: 'operations', severity: 'medium', title: 'Environment variables used without an env template',
124
+ evidence: ['Code uses environment variables, but .env.example/.env.sample was not detected.'],
125
+ recommendation: 'Add .env.example with variable names only and no real secrets.'
126
+ });
127
+ }
128
+
129
+ if (!hasCI) {
130
+ addFinding(findings, {
131
+ category: 'operations', severity: 'medium', title: 'CI workflow not detected',
132
+ evidence: ['No .github/workflows directory found in the analyzed tree.'],
133
+ recommendation: 'Add a minimal CI pipeline for lint, test, build, and dependency/security checks.'
134
+ });
135
+ }
136
+
137
+ if (!hasTests) {
138
+ addFinding(findings, {
139
+ category: 'tests', severity: 'high', title: 'Test surface not detected',
140
+ evidence: ['No common test folders/files were found, such as tests/, __tests__, *.spec.ts, *.test.ts, or *_test.go.'],
141
+ recommendation: 'Add unit tests for core logic and integration tests for key endpoints/tools.'
142
+ });
143
+ }
144
+
145
+ if (fileCount > 1200 && snapshot.limits.treeTruncated) {
146
+ addFinding(findings, {
147
+ category: 'architecture', severity: 'medium', title: 'Large repository with truncated tree during screening',
148
+ evidence: [`Tree returned ${snapshot.limits.treeEntriesReturned} entries; the original tree is likely larger.`],
149
+ recommendation: 'Use deep scope or PR-based screening to focus review on changed files.'
150
+ });
151
+ }
152
+
153
+ return { hasReadme, hasLicense, hasEnvExample, hasCI, hasDocker, hasTests, fileCount, dirCount };
154
+ }
155
+
156
+ function analyzePackageJson(snapshot, findings, packageJsonFile) {
157
+ if (!packageJsonFile) return { packageManager: null, scripts: {}, dependencies: {} };
158
+ const pkg = parseJsonSafe(packageJsonFile.content);
159
+ if (!pkg) {
160
+ addFinding(findings, {
161
+ category: 'maintainability', severity: 'medium', title: 'package.json is not valid JSON',
162
+ evidence: [`File: ${packageJsonFile.path}`],
163
+ recommendation: 'Fix package.json so dependency/build tooling can process it safely.',
164
+ file: packageJsonFile.path
165
+ });
166
+ return { packageManager: null, scripts: {}, dependencies: {} };
167
+ }
168
+
169
+ const paths = snapshot.tree.map((entry) => normalizePath(entry.path));
170
+ const hasLock = hasPath(paths, (p) => ['package-lock.json', 'pnpm-lock.yaml', 'yarn.lock', 'bun.lockb'].includes(base(p)));
171
+ const deps = { ...(pkg.dependencies || {}), ...(pkg.devDependencies || {}) };
172
+ const scripts = pkg.scripts || {};
173
+
174
+ if (!hasLock && Object.keys(deps).length > 0) {
175
+ addFinding(findings, {
176
+ category: 'dependencies', severity: 'medium', title: 'Dependency lockfile not detected',
177
+ evidence: ['package.json declares dependencies, but no npm/pnpm/yarn/bun lockfile was found.'],
178
+ recommendation: 'Commit a lockfile to make installs reproducible and reduce supply-chain risk.',
179
+ file: packageJsonFile.path
180
+ });
181
+ }
182
+
183
+ for (const [scriptName, command] of Object.entries(scripts)) {
184
+ if (/preinstall|postinstall|prepare/.test(scriptName) && /(curl|wget|node|bash|sh|python|powershell)/i.test(command)) {
185
+ addFinding(findings, {
186
+ category: 'dependencies', severity: 'high', title: `Risky lifecycle script: ${scriptName}`,
187
+ evidence: [`${scriptName}: ${String(command).slice(0, 180)}`],
188
+ recommendation: 'Review lifecycle scripts. Avoid downloading/executing scripts at install time unless strictly necessary.',
189
+ file: packageJsonFile.path
190
+ });
191
+ }
192
+ }
193
+
194
+ const riskyPackages = ['request', 'node-sass', 'event-stream', 'colors', 'faker'];
195
+ const riskyFound = riskyPackages.filter((name) => deps[name]);
196
+ if (riskyFound.length) {
197
+ addFinding(findings, {
198
+ category: 'dependencies', severity: 'medium', title: 'Dependencies requiring manual review detected',
199
+ evidence: riskyFound.map((name) => `${name}@${deps[name]}`),
200
+ recommendation: 'Verify dependency status, maintainer activity, and modern alternatives. Run npm audit/pnpm audit.',
201
+ file: packageJsonFile.path
202
+ });
203
+ }
204
+
205
+ const directSecrets = ['dotenv'].filter((name) => deps[name]);
206
+ if (directSecrets.length && !snapshot.tree.some((entry) => ['.env.example', '.env.sample'].includes(base(entry.path)))) {
207
+ addFinding(findings, {
208
+ category: 'operations', severity: 'low', title: 'dotenv used without an env sample',
209
+ evidence: [`Dependency: ${directSecrets.join(', ')}`],
210
+ recommendation: 'Add .env.example and environment variable documentation.',
211
+ file: packageJsonFile.path
212
+ });
213
+ }
214
+
215
+ return {
216
+ packageManager: hasPath(paths, (p) => base(p) === 'pnpm-lock.yaml') ? 'pnpm' : hasPath(paths, (p) => base(p) === 'yarn.lock') ? 'yarn' : hasPath(paths, (p) => base(p) === 'bun.lockb') ? 'bun' : 'npm',
217
+ scripts,
218
+ dependencies: deps,
219
+ packageName: pkg.name || null,
220
+ packageVersion: pkg.version || null
221
+ };
222
+ }
223
+
224
+ function analyzeCodeContent(snapshot, findings) {
225
+ const secretPatterns = [
226
+ { name: 'Private key', regex: /-----BEGIN (RSA |EC |OPENSSH |PGP )?PRIVATE KEY-----/i, severity: 'critical' },
227
+ { name: 'GitHub token', regex: /gh[pousr]_[A-Za-z0-9_]{20,}/, severity: 'critical' },
228
+ { name: 'AWS access key', regex: /AKIA[0-9A-Z]{16}/, severity: 'critical' },
229
+ { name: 'Generic secret assignment', regex: /(api[_-]?key|secret|password|private[_-]?key)\s*[:=]\s*['"][^'"\n]{16,}['"]/i, severity: 'high' }
230
+ ];
231
+
232
+ for (const file of snapshot.files) {
233
+ const content = file.content || '';
234
+ const lowerPath = normalizePath(file.path);
235
+
236
+ for (const pattern of secretPatterns) {
237
+ if (pattern.regex.test(content) && !lowerPath.includes('.env.example') && !lowerPath.includes('.env.sample')) {
238
+ addFinding(findings, {
239
+ category: 'security', severity: pattern.severity, title: `Potential hardcoded secret: ${pattern.name}`,
240
+ evidence: [`Pattern detected in ${file.path}. Secret value is intentionally not shown.`],
241
+ recommendation: 'Rotate the secret if valid, remove it from git history, and use a secret manager/environment variables.',
242
+ file: file.path,
243
+ confidence: pattern.name === 'Generic secret assignment' ? 'medium' : 'high'
244
+ });
245
+ }
246
+ }
247
+
248
+ if (/\beval\s*\(|new Function\s*\(/.test(content)) {
249
+ addFinding(findings, {
250
+ category: 'security', severity: 'high', title: 'Dynamic code execution detected',
251
+ evidence: ['Contains eval() or new Function().'],
252
+ recommendation: 'Avoid dynamic code execution. Use explicit parsers/validators.',
253
+ file: file.path
254
+ });
255
+ }
256
+
257
+ if (/child_process|exec\s*\(|execSync\s*\(|spawn\s*\(/.test(content) && /req\.|request|params|query|body|argv/.test(content)) {
258
+ addFinding(findings, {
259
+ category: 'security', severity: 'high', title: 'Command execution potentially influenced by user input',
260
+ evidence: ['child_process/exec/spawn is present along with request/argv input references in the same file.'],
261
+ recommendation: 'Validate argument allowlists, use execFile/spawn with argument arrays, and avoid shell interpolation.',
262
+ file: file.path,
263
+ confidence: 'medium'
264
+ });
265
+ }
266
+
267
+ if (/(md5|sha1)\s*\(/i.test(content) && /(password|token|secret|hash)/i.test(content)) {
268
+ addFinding(findings, {
269
+ category: 'security', severity: 'medium', title: 'Weak hash algorithm near secret/password context',
270
+ evidence: ['MD5/SHA1 found in a password/token/secret/hash context.'],
271
+ recommendation: 'Use bcrypt/argon2 for passwords, and SHA-256/HMAC for integrity where appropriate.',
272
+ file: file.path
273
+ });
274
+ }
275
+
276
+ if (/(SELECT|INSERT|UPDATE|DELETE)\s+.*\$\{|`.*(SELECT|INSERT|UPDATE|DELETE)/is.test(content) && /(req\.|params|query|body)/.test(content)) {
277
+ addFinding(findings, {
278
+ category: 'security', severity: 'high', title: 'Possible SQL query string interpolation',
279
+ evidence: ['SQL statements and template interpolation/request input appear in the same file.'],
280
+ recommendation: 'Use parameterized queries/ORM query builders and validate inputs.',
281
+ file: file.path,
282
+ confidence: 'medium'
283
+ });
284
+ }
285
+
286
+ if (/dangerouslySetInnerHTML/.test(content)) {
287
+ addFinding(findings, {
288
+ category: 'security', severity: 'medium', title: 'dangerouslySetInnerHTML detected',
289
+ evidence: ['React dangerouslySetInnerHTML is used.'],
290
+ recommendation: 'Ensure HTML is sanitized with a trusted library and does not come directly from untrusted user input.',
291
+ file: file.path
292
+ });
293
+ }
294
+
295
+ if (lowerPath.includes('.github/workflows/') && /pull_request_target/.test(content)) {
296
+ addFinding(findings, {
297
+ category: 'operations', severity: 'high', title: 'GitHub Actions uses pull_request_target',
298
+ evidence: ['pull_request_target can be dangerous if checkout/head code is not properly isolated.'],
299
+ recommendation: 'Audit permissions and token scopes, and avoid running untrusted PR code with privileged tokens.',
300
+ file: file.path
301
+ });
302
+ }
303
+
304
+ if (lowerPath.includes('.github/workflows/') && /uses:\s+[^@\s]+\/[^@\s]+@(main|master|latest|v\d+)/i.test(content)) {
305
+ addFinding(findings, {
306
+ category: 'dependencies', severity: 'medium', title: 'GitHub Action not pinned to a commit SHA',
307
+ evidence: ['Workflow uses action tags/branches instead of immutable commit SHAs.'],
308
+ recommendation: 'Pin third-party GitHub Actions to commit SHAs to reduce supply-chain risk.',
309
+ file: file.path
310
+ });
311
+ }
312
+
313
+ if (base(file.path) === 'dockerfile') {
314
+ if (/curl .*\|\s*(sh|bash)|wget .*\|\s*(sh|bash)/i.test(content)) {
315
+ addFinding(findings, {
316
+ category: 'operations', severity: 'high', title: 'Dockerfile executes remote scripts directly',
317
+ evidence: ['Detected curl/wget piped directly to a shell.'],
318
+ recommendation: 'Download artifacts with verified checksum/signature and avoid piping directly to shell.',
319
+ file: file.path
320
+ });
321
+ }
322
+ if (!/\nUSER\s+[^\s#]+/.test(`\n${content}`)) {
323
+ addFinding(findings, {
324
+ category: 'operations', severity: 'medium', title: 'Dockerfile does not set a non-root USER',
325
+ evidence: ['No USER instruction detected.'],
326
+ recommendation: 'Add a non-root user for runtime containers.',
327
+ file: file.path
328
+ });
329
+ }
330
+ }
331
+ }
332
+ }
333
+
334
+ function analyzePullRequest(snapshot, findings) {
335
+ if (snapshot.mode !== 'pull_request' || !snapshot.pullRequest) return null;
336
+ const pr = snapshot.pullRequest;
337
+ if (pr.changedFiles > 40 || pr.additions + pr.deletions > 2500) {
338
+ addFinding(findings, {
339
+ category: 'architecture', severity: 'high', title: 'PR is too large for safe review',
340
+ evidence: [`Changed files: ${pr.changedFiles}`, `Additions: ${pr.additions}`, `Deletions: ${pr.deletions}`],
341
+ recommendation: 'Split the PR into reviewable chunks or require targeted reviewers per domain.'
342
+ });
343
+ }
344
+
345
+ const removedTests = snapshot.files.some((file) => normalizePath(file.path).includes('test') && file.status === 'removed');
346
+ if (removedTests) {
347
+ addFinding(findings, {
348
+ category: 'tests', severity: 'high', title: 'PR removes test files',
349
+ evidence: ['A changed file under test/spec paths has status removed.'],
350
+ recommendation: 'Ensure replacement tests exist or explain why test removal is safe.'
351
+ });
352
+ }
353
+
354
+ return {
355
+ changedFiles: pr.changedFiles,
356
+ additions: pr.additions,
357
+ deletions: pr.deletions,
358
+ churn: pr.additions + pr.deletions
359
+ };
360
+ }
361
+
362
+ function computeRisk(findings) {
363
+ const categoryScores = Object.fromEntries(CategoryNames.map((category) => [category, 0]));
364
+ let total = 0;
365
+
366
+ for (const finding of findings) {
367
+ const weight = SeverityWeights[finding.severity] || 0;
368
+ total += weight;
369
+ categoryScores[finding.category] = Math.min(100, (categoryScores[finding.category] || 0) + weight * 1.6);
370
+ }
371
+
372
+ const cappedTotal = Math.min(100, Math.round(total));
373
+ let level = 'low';
374
+ let mergeReadiness = 'ready_with_standard_review';
375
+ if (cappedTotal >= 75) {
376
+ level = 'critical';
377
+ mergeReadiness = 'blocked_until_remediation';
378
+ } else if (cappedTotal >= 50) {
379
+ level = 'high';
380
+ mergeReadiness = 'needs_senior_review';
381
+ } else if (cappedTotal >= 25) {
382
+ level = 'medium';
383
+ mergeReadiness = 'review_required';
384
+ }
385
+
386
+ return {
387
+ score: cappedTotal,
388
+ level,
389
+ mergeReadiness,
390
+ categoryScores: Object.fromEntries(Object.entries(categoryScores).map(([k, v]) => [k, Math.round(Math.min(100, v))]))
391
+ };
392
+ }
393
+
394
+ function buildChapters({ snapshot, findings, signals, packageInfo, frameworks, languages, risk }) {
395
+ const topFindings = findings.slice().sort((a, b) => (SeverityWeights[b.severity] || 0) - (SeverityWeights[a.severity] || 0)).slice(0, 8);
396
+ const chapterList = [
397
+ {
398
+ title: 'Repository / PR Context',
399
+ summary: snapshot.mode === 'pull_request'
400
+ ? `Screening PR #${snapshot.pullRequest.number}: ${snapshot.pullRequest.title}`
401
+ : `Screening repository ${snapshot.owner}/${snapshot.repo} at ref ${snapshot.ref}`,
402
+ bullets: [
403
+ `Mode: ${snapshot.mode}`,
404
+ `Primary language: ${snapshot.language || languages[0]?.language || 'unknown'}`,
405
+ `Downloaded files: ${snapshot.limits.filesDownloaded}`,
406
+ `Tree entries: ${snapshot.limits.treeEntriesReturned}${snapshot.limits.treeTruncated ? ' (truncated)' : ''}`
407
+ ]
408
+ },
409
+ {
410
+ title: 'Risk Map',
411
+ summary: `Overall risk is ${risk.level.toUpperCase()} with score ${risk.score}/100. Merge readiness: ${risk.mergeReadiness}.`,
412
+ bullets: Object.entries(risk.categoryScores).map(([category, score]) => `${category}: ${score}/100`)
413
+ },
414
+ {
415
+ title: 'Architecture & Surface',
416
+ summary: frameworks.length ? `Detected framework/surface: ${frameworks.join(', ')}` : 'Primary framework is not clear from the analyzed files.',
417
+ bullets: [
418
+ `Files: ${signals.fileCount}`,
419
+ `Directories: ${signals.dirCount}`,
420
+ `Package manager: ${packageInfo.packageManager || 'not detected'}`,
421
+ `Languages: ${languages.slice(0, 6).map((l) => `${l.language}(${l.count})`).join(', ') || 'unknown'}`
422
+ ]
423
+ },
424
+ {
425
+ title: 'Top Findings',
426
+ summary: topFindings.length ? 'Review these findings first.' : 'No significant findings were detected by the heuristic pass.',
427
+ bullets: topFindings.length ? topFindings.map((f) => `[${f.severity}] ${f.title}${f.file ? ` — ${f.file}` : ''}`) : ['Continue with manual review for business logic and edge cases.']
428
+ },
429
+ {
430
+ title: 'Suggested Reviewer Questions',
431
+ summary: 'These questions help reviewers decide merge readiness.',
432
+ bullets: buildDecisionQuestions({ snapshot, findings, risk })
433
+ }
434
+ ];
435
+
436
+ return chapterList;
437
+ }
438
+
439
+ function buildDecisionQuestions({ snapshot, findings, risk }) {
440
+ const questions = [];
441
+ if (risk.level === 'critical' || risk.level === 'high') {
442
+ questions.push('Which high/critical findings must be resolved before merge, and who owns each one?');
443
+ }
444
+ if (findings.some((f) => f.category === 'security')) {
445
+ questions.push('Have all user inputs, secrets, tokens, and permission boundaries been validated?');
446
+ }
447
+ if (findings.some((f) => f.category === 'tests')) {
448
+ questions.push('Which tests prove critical paths, error paths, and regression cases are covered?');
449
+ }
450
+ if (snapshot.mode === 'pull_request') {
451
+ questions.push('Is this PR still reviewable as-is, or should it be split by change domain?');
452
+ }
453
+ if (findings.some((f) => f.category === 'dependencies')) {
454
+ questions.push('Have new dependencies/lifecycle scripts/GitHub Actions been audited for supply-chain risk?');
455
+ }
456
+ questions.push('What is the rollback plan if this change causes an incident after deployment?');
457
+ return unique(questions).slice(0, 8);
458
+ }
459
+
460
+ function buildRecommendations(findings, risk) {
461
+ const sorted = findings.slice().sort((a, b) => (SeverityWeights[b.severity] || 0) - (SeverityWeights[a.severity] || 0));
462
+ const recs = sorted.map((finding) => finding.recommendation).filter(Boolean);
463
+ const base = [
464
+ risk.score >= 50 ? 'Do not merge until high/critical findings have explicit owners and resolutions.' : 'Continue manual review for business logic, data flow, and edge cases.',
465
+ 'Run local lint/test/build and CI before declaring merge-ready.',
466
+ 'Use this MCP output as initial screening, not as a replacement for human security review.'
467
+ ];
468
+ return unique([...recs, ...base]).slice(0, 12);
469
+ }
470
+
471
+ function buildMarkdownReport(report) {
472
+ const lines = [];
473
+ lines.push(`# GitLumen MCP Screening Report`);
474
+ lines.push('');
475
+ lines.push(`**Report ID:** ${report.reportId}`);
476
+ lines.push(`**Target:** ${report.target.repoUrl}`);
477
+ lines.push(`**Mode:** ${report.target.mode}`);
478
+ lines.push(`**Ref:** ${report.target.ref}`);
479
+ lines.push(`**Risk:** ${report.risk.level.toUpperCase()} (${report.risk.score}/100)`);
480
+ lines.push(`**Merge readiness:** ${report.risk.mergeReadiness}`);
481
+ lines.push('');
482
+ lines.push(`## Summary`);
483
+ lines.push(report.summary);
484
+ lines.push('');
485
+ lines.push(`## Category Scores`);
486
+ for (const [category, score] of Object.entries(report.risk.categoryScores)) {
487
+ lines.push(`- ${category}: ${score}/100`);
488
+ }
489
+ lines.push('');
490
+ lines.push(`## Findings`);
491
+ if (!report.findings.length) {
492
+ lines.push('- No significant heuristic findings. Continue manual review.');
493
+ } else {
494
+ for (const finding of report.findings) {
495
+ lines.push(`- **[${finding.severity}] ${finding.title}**${finding.file ? ` — \`${finding.file}\`` : ''}`);
496
+ for (const evidence of finding.evidence || []) lines.push(` - Evidence: ${evidence}`);
497
+ lines.push(` - Recommendation: ${finding.recommendation}`);
498
+ }
499
+ }
500
+ lines.push('');
501
+ lines.push(`## Review Chapters`);
502
+ for (const chapter of report.chapters) {
503
+ lines.push(`### ${chapter.title}`);
504
+ lines.push(chapter.summary);
505
+ for (const bullet of chapter.bullets) lines.push(`- ${bullet}`);
506
+ lines.push('');
507
+ }
508
+ lines.push(`## Next Actions`);
509
+ for (const item of report.recommendations) lines.push(`- ${item}`);
510
+ lines.push('');
511
+ lines.push('> Generated by GitLumen MCP Server prototype. This is a heuristic screening layer, not a replacement for human code/security review.');
512
+ return lines.join('\n');
513
+ }
514
+
515
+ export function analyzeSnapshot(snapshot, input = {}) {
516
+ const findings = [];
517
+ const paths = snapshot.tree.map((entry) => entry.path);
518
+ const languages = detectLanguages(paths);
519
+ const packageJsonFile = findPackageJson(snapshot);
520
+ const packageInfo = analyzePackageJson(snapshot, findings, packageJsonFile);
521
+ const frameworks = detectFrameworks(snapshot, packageJsonFile ? parseJsonSafe(packageJsonFile.content) : null);
522
+ const signals = analyzeStructure(snapshot, findings);
523
+ analyzeCodeContent(snapshot, findings);
524
+ const prSignals = analyzePullRequest(snapshot, findings);
525
+
526
+ const risk = computeRisk(findings);
527
+ const reportId = createReportId({ repoUrl: snapshot.repoUrl, scope: input.scope });
528
+ const summary = risk.score >= 75
529
+ ? 'The repository/PR shows critical risk signals. Remediation is required before it can be considered merge/deploy-ready.'
530
+ : risk.score >= 50
531
+ ? 'The repository/PR has high risk. Senior review and remediation of key findings are recommended before merge.'
532
+ : risk.score >= 25
533
+ ? 'The repository/PR has medium risk. Continue manual review with focus on the listed findings.'
534
+ : 'The repository/PR appears relatively low risk based on initial heuristic screening. Continue manual review for business logic.';
535
+
536
+ const report = {
537
+ reportId,
538
+ generatedAt: new Date().toISOString(),
539
+ generator: {
540
+ name: 'gitlumen-screen-sdk',
541
+ version: '0.1.0',
542
+ mode: 'local-heuristic-screening'
543
+ },
544
+ target: {
545
+ mode: snapshot.mode,
546
+ owner: snapshot.owner,
547
+ repo: snapshot.repo,
548
+ repoUrl: snapshot.repoUrl,
549
+ ref: snapshot.ref,
550
+ defaultBranch: snapshot.defaultBranch,
551
+ pullRequest: snapshot.pullRequest || null
552
+ },
553
+ input: {
554
+ scope: input.scope || 'standard',
555
+ maxFiles: input.maxFiles || null
556
+ },
557
+ repositorySignals: {
558
+ description: snapshot.description,
559
+ stars: snapshot.stars,
560
+ forks: snapshot.forks,
561
+ primaryLanguage: snapshot.language,
562
+ detectedLanguages: languages,
563
+ frameworks,
564
+ packageInfo: {
565
+ packageManager: packageInfo.packageManager,
566
+ packageName: packageInfo.packageName,
567
+ packageVersion: packageInfo.packageVersion,
568
+ scripts: packageInfo.scripts ? Object.keys(packageInfo.scripts) : [],
569
+ dependencyCount: packageInfo.dependencies ? Object.keys(packageInfo.dependencies).length : 0
570
+ },
571
+ structure: signals,
572
+ pullRequest: prSignals,
573
+ limits: snapshot.limits
574
+ },
575
+ risk,
576
+ summary,
577
+ findings: findings.sort((a, b) => (SeverityWeights[b.severity] || 0) - (SeverityWeights[a.severity] || 0)),
578
+ chapters: [],
579
+ decisionQuestions: [],
580
+ recommendations: []
581
+ };
582
+
583
+ report.decisionQuestions = buildDecisionQuestions({ snapshot, findings: report.findings, risk });
584
+ report.recommendations = buildRecommendations(report.findings, risk);
585
+ report.chapters = buildChapters({ snapshot, findings: report.findings, signals, packageInfo, frameworks, languages, risk });
586
+ report.markdown = buildMarkdownReport(report);
587
+
588
+ return report;
589
+ }
@@ -0,0 +1,269 @@
1
+ import { createConfig, DEFAULT_LIMITS } from '../config.js';
2
+ import { parseGitHubUrl } from '../utils/githubUrl.js';
3
+
4
+ const TEXT_EXTENSIONS = new Set([
5
+ '.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs', '.json', '.jsonc', '.md', '.mdx', '.yml', '.yaml', '.toml', '.ini', '.env',
6
+ '.py', '.rb', '.go', '.rs', '.java', '.kt', '.cs', '.php', '.sol', '.vy', '.sh', '.bash', '.zsh', '.sql', '.graphql',
7
+ '.css', '.scss', '.html', '.vue', '.svelte', '.dockerfile', '.tf', '.tfvars', '.gradle', '.xml', '.properties', '.txt'
8
+ ]);
9
+
10
+ const IMPORTANT_FILENAMES = new Set([
11
+ 'package.json', 'package-lock.json', 'pnpm-lock.yaml', 'yarn.lock', 'bun.lockb',
12
+ 'requirements.txt', 'pyproject.toml', 'poetry.lock', 'pipfile', 'pipfile.lock',
13
+ 'go.mod', 'go.sum', 'cargo.toml', 'cargo.lock', 'pom.xml', 'build.gradle', 'gradle.properties',
14
+ 'dockerfile', 'docker-compose.yml', 'compose.yml', '.dockerignore',
15
+ '.env.example', '.env.sample', '.env.template',
16
+ 'readme.md', 'license', 'security.md', 'codeowners',
17
+ 'tsconfig.json', 'vite.config.js', 'vite.config.ts', 'next.config.js', 'next.config.mjs',
18
+ 'hardhat.config.js', 'hardhat.config.ts', 'foundry.toml', 'truffle-config.js'
19
+ ]);
20
+
21
+ function extnameLower(path) {
22
+ const name = path.toLowerCase();
23
+ const idx = name.lastIndexOf('.');
24
+ if (idx === -1) return '';
25
+ return name.slice(idx);
26
+ }
27
+
28
+ function basenameLower(path) {
29
+ return path.split('/').pop().toLowerCase();
30
+ }
31
+
32
+ function isTextLike(path) {
33
+ const base = basenameLower(path);
34
+ if (IMPORTANT_FILENAMES.has(base)) return true;
35
+ if (base.startsWith('.env')) return true;
36
+ return TEXT_EXTENSIONS.has(extnameLower(path));
37
+ }
38
+
39
+ function shouldSkipPath(path) {
40
+ const lower = path.toLowerCase();
41
+ return [
42
+ 'node_modules/', 'vendor/', 'dist/', 'build/', '.next/', '.nuxt/', '.git/', 'coverage/', '.cache/', 'target/',
43
+ 'public/assets/', 'static/assets/', 'assets/', '.turbo/', '.vercel/', '__pycache__/', '.venv/', 'venv/',
44
+ 'package-lock.json' // metadata exists in tree; content usually too noisy for screening
45
+ ].some((segment) => lower.includes(segment));
46
+ }
47
+
48
+ function scorePathPriority(path) {
49
+ const lower = path.toLowerCase();
50
+ const base = basenameLower(path);
51
+ let score = 0;
52
+ if (IMPORTANT_FILENAMES.has(base)) score += 100;
53
+ if (lower.includes('/.github/workflows/') || lower.startsWith('.github/workflows/')) score += 90;
54
+ if (lower.includes('src/') || lower.includes('app/') || lower.includes('server/') || lower.includes('api/')) score += 40;
55
+ if (lower.includes('test') || lower.includes('spec')) score += 20;
56
+ if (lower.includes('auth') || lower.includes('security') || lower.includes('wallet') || lower.includes('payment')) score += 35;
57
+ if (['.js', '.ts', '.tsx', '.jsx', '.py', '.go', '.rs', '.sol'].includes(extnameLower(path))) score += 25;
58
+ return score;
59
+ }
60
+
61
+ export class GitHubClient {
62
+ constructor(options = {}) {
63
+ this.config = createConfig(options);
64
+ this.token = options.token ?? this.config.githubToken;
65
+ }
66
+
67
+ headers(extra = {}) {
68
+ const headers = {
69
+ 'Accept': 'application/vnd.github+json',
70
+ 'X-GitHub-Api-Version': '2022-11-28',
71
+ 'User-Agent': this.config.userAgent,
72
+ ...extra
73
+ };
74
+ if (this.token) headers.Authorization = `Bearer ${this.token}`;
75
+ return headers;
76
+ }
77
+
78
+ async api(path, options = {}) {
79
+ const url = `${this.config.githubApiBase}${path}`;
80
+ let response;
81
+ try {
82
+ response = await fetch(url, {
83
+ ...options,
84
+ headers: this.headers(options.headers || {})
85
+ });
86
+ } catch (error) {
87
+ throw new Error(`Unable to reach GitHub API: ${error.cause?.code || error.code || error.message}. Check internet/DNS or set proxy/VPN if needed.`);
88
+ }
89
+
90
+ if (!response.ok) {
91
+ const body = await response.text().catch(() => '');
92
+ throw new Error(`GitHub API ${response.status} ${response.statusText}: ${path}${body ? ` - ${body.slice(0, 300)}` : ''}`);
93
+ }
94
+ return response.json();
95
+ }
96
+
97
+ async getRepository(owner, repo) {
98
+ return this.api(`/repos/${owner}/${repo}`);
99
+ }
100
+
101
+ async getTree(owner, repo, ref) {
102
+ const tree = await this.api(`/repos/${owner}/${repo}/git/trees/${encodeURIComponent(ref)}?recursive=1`);
103
+ return tree.tree || [];
104
+ }
105
+
106
+ async getPull(owner, repo, pullNumber) {
107
+ return this.api(`/repos/${owner}/${repo}/pulls/${pullNumber}`);
108
+ }
109
+
110
+ async getPullFiles(owner, repo, pullNumber, maxFiles = DEFAULT_LIMITS.maxPrFiles) {
111
+ const pages = [];
112
+ let page = 1;
113
+ while (pages.flat().length < maxFiles) {
114
+ const files = await this.api(`/repos/${owner}/${repo}/pulls/${pullNumber}/files?per_page=100&page=${page}`);
115
+ pages.push(files);
116
+ if (!files.length || files.length < 100) break;
117
+ page += 1;
118
+ if (page > 10) break;
119
+ }
120
+ return pages.flat().slice(0, maxFiles);
121
+ }
122
+
123
+ async getRawFile(owner, repo, ref, path, maxBytes = this.config.maxFileBytes) {
124
+ const url = `${this.config.rawBase}/${owner}/${repo}/${encodeURIComponent(ref).replace(/%2F/g, '/')}/${path.split('/').map(encodeURIComponent).join('/')}`;
125
+ let response;
126
+ try {
127
+ response = await fetch(url, { headers: this.headers({ Accept: 'text/plain,*/*' }) });
128
+ } catch {
129
+ return null;
130
+ }
131
+ if (!response.ok) return null;
132
+ const buffer = await response.arrayBuffer();
133
+ if (buffer.byteLength > maxBytes) {
134
+ const sliced = Buffer.from(buffer).subarray(0, maxBytes).toString('utf8');
135
+ return { content: sliced, truncated: true, size: buffer.byteLength };
136
+ }
137
+ return { content: Buffer.from(buffer).toString('utf8'), truncated: false, size: buffer.byteLength };
138
+ }
139
+
140
+ selectFilesForContent(treeEntries, scope, requestedMaxFiles) {
141
+ const maxFiles = requestedMaxFiles || {
142
+ quick: DEFAULT_LIMITS.quickMaxFiles,
143
+ standard: DEFAULT_LIMITS.standardMaxFiles,
144
+ deep: DEFAULT_LIMITS.deepMaxFiles
145
+ }[scope] || DEFAULT_LIMITS.standardMaxFiles;
146
+
147
+ return treeEntries
148
+ .filter((entry) => entry.type === 'blob')
149
+ .filter((entry) => !shouldSkipPath(entry.path))
150
+ .filter((entry) => isTextLike(entry.path))
151
+ .sort((a, b) => scorePathPriority(b.path) - scorePathPriority(a.path))
152
+ .slice(0, maxFiles);
153
+ }
154
+
155
+ async loadRepositorySnapshot({ repoUrl, branch, scope = 'standard', maxFiles }) {
156
+ const parsed = parseGitHubUrl(repoUrl);
157
+ const repoMeta = await this.getRepository(parsed.owner, parsed.repo);
158
+ const ref = branch || repoMeta.default_branch;
159
+
160
+ if (parsed.pullNumber) {
161
+ return this.loadPullRequestSnapshot({ ...parsed, repoMeta, ref, scope, maxFiles });
162
+ }
163
+
164
+ const tree = await this.getTree(parsed.owner, parsed.repo, ref);
165
+ const clippedTree = tree.slice(0, DEFAULT_LIMITS.maxTreeEntries);
166
+ const selected = this.selectFilesForContent(clippedTree, scope, maxFiles);
167
+ const files = [];
168
+
169
+ for (const entry of selected) {
170
+ const raw = await this.getRawFile(parsed.owner, parsed.repo, ref, entry.path);
171
+ if (!raw) continue;
172
+ files.push({
173
+ path: entry.path,
174
+ size: entry.size || raw.size || 0,
175
+ content: raw.content,
176
+ truncated: raw.truncated,
177
+ status: 'unchanged'
178
+ });
179
+ }
180
+
181
+ return {
182
+ mode: 'repository',
183
+ owner: parsed.owner,
184
+ repo: parsed.repo,
185
+ repoUrl: parsed.normalizedRepoUrl,
186
+ ref,
187
+ defaultBranch: repoMeta.default_branch,
188
+ description: repoMeta.description || '',
189
+ stars: repoMeta.stargazers_count || 0,
190
+ forks: repoMeta.forks_count || 0,
191
+ language: repoMeta.language || null,
192
+ isPrivate: Boolean(repoMeta.private),
193
+ tree: clippedTree.map((entry) => ({ path: entry.path, type: entry.type, size: entry.size || 0 })),
194
+ files,
195
+ limits: {
196
+ treeEntriesReturned: clippedTree.length,
197
+ treeTruncated: tree.length > clippedTree.length,
198
+ filesDownloaded: files.length,
199
+ scope
200
+ }
201
+ };
202
+ }
203
+
204
+ async loadPullRequestSnapshot({ owner, repo, pullNumber, repoMeta, scope = 'standard', maxFiles }) {
205
+ const pull = await this.getPull(owner, repo, pullNumber);
206
+ const prFiles = await this.getPullFiles(owner, repo, pullNumber, maxFiles || DEFAULT_LIMITS.maxPrFiles);
207
+ const selected = prFiles
208
+ .filter((file) => !shouldSkipPath(file.filename))
209
+ .filter((file) => isTextLike(file.filename))
210
+ .sort((a, b) => scorePathPriority(b.filename) - scorePathPriority(a.filename))
211
+ .slice(0, maxFiles || ({ quick: 60, standard: 120, deep: 240 }[scope] || 120));
212
+
213
+ const files = [];
214
+ const ref = pull.head?.sha || pull.head?.ref || repoMeta.default_branch;
215
+ for (const file of selected) {
216
+ let content = '';
217
+ let truncated = false;
218
+ if (file.status !== 'removed') {
219
+ const raw = await this.getRawFile(owner, repo, ref, file.filename);
220
+ content = raw?.content || '';
221
+ truncated = Boolean(raw?.truncated);
222
+ }
223
+ files.push({
224
+ path: file.filename,
225
+ size: file.changes || file.additions + file.deletions,
226
+ content,
227
+ truncated,
228
+ patch: file.patch || '',
229
+ status: file.status || 'modified',
230
+ additions: file.additions || 0,
231
+ deletions: file.deletions || 0
232
+ });
233
+ }
234
+
235
+ return {
236
+ mode: 'pull_request',
237
+ owner,
238
+ repo,
239
+ repoUrl: `https://github.com/${owner}/${repo}`,
240
+ ref,
241
+ defaultBranch: repoMeta.default_branch,
242
+ description: repoMeta.description || '',
243
+ stars: repoMeta.stargazers_count || 0,
244
+ forks: repoMeta.forks_count || 0,
245
+ language: repoMeta.language || null,
246
+ isPrivate: Boolean(repoMeta.private),
247
+ pullRequest: {
248
+ number: pullNumber,
249
+ title: pull.title,
250
+ state: pull.state,
251
+ author: pull.user?.login || null,
252
+ baseRef: pull.base?.ref || null,
253
+ headRef: pull.head?.ref || null,
254
+ changedFiles: pull.changed_files,
255
+ additions: pull.additions,
256
+ deletions: pull.deletions,
257
+ htmlUrl: pull.html_url
258
+ },
259
+ tree: prFiles.map((file) => ({ path: file.filename, type: 'blob', size: file.changes || 0, status: file.status })),
260
+ files,
261
+ limits: {
262
+ treeEntriesReturned: prFiles.length,
263
+ treeTruncated: prFiles.length >= DEFAULT_LIMITS.maxPrFiles,
264
+ filesDownloaded: files.length,
265
+ scope
266
+ }
267
+ };
268
+ }
269
+ }
@@ -0,0 +1,53 @@
1
+ import { GitHubClient } from './github.js';
2
+ import { analyzeSnapshot } from './analyzer.js';
3
+ import { ReportStore } from './reportStore.js';
4
+
5
+ export class GitLumenService {
6
+ constructor({ githubClient, store, githubOptions = {}, storeOptions = {} } = {}) {
7
+ this.github = githubClient || new GitHubClient(githubOptions);
8
+ this.store = store || new ReportStore(storeOptions);
9
+ }
10
+
11
+ async screenRepository(input) {
12
+ const scope = input.scope || 'standard';
13
+ const snapshot = await this.github.loadRepositorySnapshot({
14
+ repoUrl: input.repoUrl,
15
+ branch: input.branch || undefined,
16
+ scope,
17
+ maxFiles: input.maxFiles || undefined
18
+ });
19
+ const report = analyzeSnapshot(snapshot, { scope, maxFiles: input.maxFiles || null });
20
+ await this.store.save(report);
21
+ return report;
22
+ }
23
+
24
+ async getReport(reportId) {
25
+ return this.store.get(reportId);
26
+ }
27
+
28
+ async listReports(limit) {
29
+ return this.store.list(limit);
30
+ }
31
+
32
+ async getRepositoryStructure(input) {
33
+ const snapshot = await this.github.loadRepositorySnapshot({
34
+ repoUrl: input.repoUrl,
35
+ branch: input.branch || undefined,
36
+ scope: 'quick',
37
+ maxFiles: 1
38
+ });
39
+ return {
40
+ target: {
41
+ mode: snapshot.mode,
42
+ owner: snapshot.owner,
43
+ repo: snapshot.repo,
44
+ repoUrl: snapshot.repoUrl,
45
+ ref: snapshot.ref,
46
+ defaultBranch: snapshot.defaultBranch,
47
+ pullRequest: snapshot.pullRequest || null
48
+ },
49
+ structure: snapshot.tree.slice(0, input.limit || 300),
50
+ limits: snapshot.limits
51
+ };
52
+ }
53
+ }
@@ -0,0 +1,62 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { createConfig } from '../config.js';
4
+
5
+ export class ReportStore {
6
+ constructor({ dataDir, config } = {}) {
7
+ const runtimeConfig = createConfig(config || {});
8
+ this.dataDir = dataDir || runtimeConfig.dataDir;
9
+ this.reportsDir = path.join(this.dataDir, 'reports');
10
+ }
11
+
12
+ async ensure() {
13
+ await fs.mkdir(this.reportsDir, { recursive: true });
14
+ }
15
+
16
+ pathFor(reportId) {
17
+ if (!/^glr_[a-f0-9]{16}$/.test(reportId)) {
18
+ throw new Error(`Invalid report id: ${reportId}`);
19
+ }
20
+ return path.join(this.reportsDir, `${reportId}.json`);
21
+ }
22
+
23
+ async save(report) {
24
+ await this.ensure();
25
+ const file = this.pathFor(report.reportId);
26
+ await fs.writeFile(file, JSON.stringify(report, null, 2), 'utf8');
27
+ return file;
28
+ }
29
+
30
+ async get(reportId) {
31
+ await this.ensure();
32
+ const file = this.pathFor(reportId);
33
+ const raw = await fs.readFile(file, 'utf8');
34
+ return JSON.parse(raw);
35
+ }
36
+
37
+ async list(limit = 20) {
38
+ await this.ensure();
39
+ const entries = await fs.readdir(this.reportsDir, { withFileTypes: true });
40
+ const files = entries.filter((entry) => entry.isFile() && entry.name.endsWith('.json'));
41
+ const reports = [];
42
+ for (const file of files) {
43
+ try {
44
+ const fullPath = path.join(this.reportsDir, file.name);
45
+ const stat = await fs.stat(fullPath);
46
+ const parsed = JSON.parse(await fs.readFile(fullPath, 'utf8'));
47
+ reports.push({
48
+ reportId: parsed.reportId,
49
+ generatedAt: parsed.generatedAt,
50
+ target: parsed.target,
51
+ risk: parsed.risk,
52
+ summary: parsed.summary,
53
+ filePath: fullPath,
54
+ modifiedAt: stat.mtime.toISOString()
55
+ });
56
+ } catch {
57
+ // Skip corrupt report file.
58
+ }
59
+ }
60
+ return reports.sort((a, b) => new Date(b.generatedAt) - new Date(a.generatedAt)).slice(0, limit);
61
+ }
62
+ }
package/src/types.js ADDED
@@ -0,0 +1,16 @@
1
+ export const SeverityWeights = {
2
+ critical: 25,
3
+ high: 15,
4
+ medium: 8,
5
+ low: 3,
6
+ info: 0
7
+ };
8
+
9
+ export const CategoryNames = [
10
+ 'security',
11
+ 'dependencies',
12
+ 'tests',
13
+ 'architecture',
14
+ 'operations',
15
+ 'maintainability'
16
+ ];
@@ -0,0 +1,48 @@
1
+ export function parseGitHubUrl(repoUrl) {
2
+ if (!repoUrl || typeof repoUrl !== 'string') {
3
+ throw new Error('repoUrl is required');
4
+ }
5
+
6
+ let normalized = repoUrl.trim();
7
+ if (!/^https?:\/\//i.test(normalized) && !normalized.startsWith('git@')) {
8
+ normalized = `https://${normalized}`;
9
+ }
10
+
11
+ const sshMatch = normalized.match(/^git@github\.com:([^/]+)\/([^/]+?)(?:\.git)?$/i);
12
+ if (sshMatch) {
13
+ return {
14
+ owner: sshMatch[1],
15
+ repo: sshMatch[2],
16
+ pullNumber: null,
17
+ normalizedRepoUrl: `https://github.com/${sshMatch[1]}/${sshMatch[2]}`
18
+ };
19
+ }
20
+
21
+ let url;
22
+ try {
23
+ url = new URL(normalized);
24
+ } catch {
25
+ throw new Error(`Invalid GitHub URL: ${repoUrl}`);
26
+ }
27
+
28
+ if (!url.hostname.toLowerCase().includes('github.com')) {
29
+ throw new Error('Only github.com repositories are supported in this SDK');
30
+ }
31
+
32
+ const parts = url.pathname.split('/').filter(Boolean);
33
+ if (parts.length < 2) {
34
+ throw new Error('GitHub URL must include owner and repository name');
35
+ }
36
+
37
+ const owner = parts[0];
38
+ const repo = parts[1].replace(/\.git$/i, '');
39
+ const pullIndex = parts.findIndex((part) => part === 'pull');
40
+ const pullNumber = pullIndex >= 0 && parts[pullIndex + 1] ? Number(parts[pullIndex + 1]) : null;
41
+
42
+ return {
43
+ owner,
44
+ repo,
45
+ pullNumber: Number.isFinite(pullNumber) ? pullNumber : null,
46
+ normalizedRepoUrl: `https://github.com/${owner}/${repo}`
47
+ };
48
+ }
@@ -0,0 +1,10 @@
1
+ import crypto from 'node:crypto';
2
+
3
+ export function createReportId(input) {
4
+ const seed = `${input.repoUrl}|${input.scope || 'standard'}|${Date.now()}|${Math.random()}`;
5
+ return `glr_${crypto.createHash('sha256').update(seed).digest('hex').slice(0, 16)}`;
6
+ }
7
+
8
+ export function stableHash(input, length = 12) {
9
+ return crypto.createHash('sha256').update(String(input)).digest('hex').slice(0, length);
10
+ }
@@ -0,0 +1,12 @@
1
+ export function truncateText(text, max = 4000) {
2
+ if (!text || text.length <= max) return text || '';
3
+ return `${text.slice(0, max)}\n... [truncated ${text.length - max} chars]`;
4
+ }
5
+
6
+ export function safeJson(value) {
7
+ return JSON.stringify(value, null, 2);
8
+ }
9
+
10
+ export function unique(values) {
11
+ return [...new Set(values.filter(Boolean))];
12
+ }