corpus-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/check.d.ts +1 -0
- package/dist/commands/check.js +163 -0
- package/dist/commands/init-graph.d.ts +7 -0
- package/dist/commands/init-graph.js +270 -0
- package/dist/commands/init.d.ts +1 -0
- package/dist/commands/init.js +211 -0
- package/dist/commands/report.d.ts +1 -0
- package/dist/commands/report.js +93 -0
- package/dist/commands/scan.d.ts +1 -0
- package/dist/commands/scan.js +481 -0
- package/dist/commands/verify.d.ts +1 -0
- package/dist/commands/verify.js +334 -0
- package/dist/commands/watch.d.ts +1 -0
- package/dist/commands/watch.js +380 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +87 -0
- package/dist/utils/colors.d.ts +6 -0
- package/dist/utils/colors.js +6 -0
- package/dist/utils/config.d.ts +3 -0
- package/dist/utils/config.js +39 -0
- package/dist/utils/table.d.ts +2 -0
- package/dist/utils/table.js +24 -0
- package/package.json +28 -0
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
import { readFileSync, readdirSync, statSync, existsSync, openSync, readSync, closeSync } from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { green, amber, red, dim, bold } from '../utils/colors.js';
|
|
4
|
+
const IGNORE = new Set([
|
|
5
|
+
'node_modules', '.git', 'dist', '.next', '__pycache__', '.venv', 'venv',
|
|
6
|
+
'.cache', '.turbo', 'coverage', '.nyc_output', 'build', 'out', '.output',
|
|
7
|
+
'.nuxt', '.svelte-kit', 'vendor', 'Pods', '.gradle', 'target', 'bin',
|
|
8
|
+
'.corpus', '.expo', '.idea', '.vscode',
|
|
9
|
+
]);
|
|
10
|
+
// Scan ALL code files, not just a few extensions
|
|
11
|
+
const CODE_EXTS = new Set([
|
|
12
|
+
// JavaScript/TypeScript
|
|
13
|
+
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', '.mts', '.cts',
|
|
14
|
+
// Python
|
|
15
|
+
'.py', '.pyw',
|
|
16
|
+
// Ruby
|
|
17
|
+
'.rb', '.erb',
|
|
18
|
+
// Go
|
|
19
|
+
'.go',
|
|
20
|
+
// Rust
|
|
21
|
+
'.rs',
|
|
22
|
+
// Java/Kotlin
|
|
23
|
+
'.java', '.kt', '.kts',
|
|
24
|
+
// Swift/ObjC
|
|
25
|
+
'.swift', '.m', '.mm',
|
|
26
|
+
// C/C++
|
|
27
|
+
'.c', '.h', '.cpp', '.hpp', '.cc', '.hh',
|
|
28
|
+
// C#
|
|
29
|
+
'.cs',
|
|
30
|
+
// PHP
|
|
31
|
+
'.php',
|
|
32
|
+
// Shell
|
|
33
|
+
'.sh', '.bash', '.zsh',
|
|
34
|
+
// Config
|
|
35
|
+
'.json', '.yaml', '.yml', '.toml', '.ini', '.cfg', '.conf',
|
|
36
|
+
'.xml', '.plist',
|
|
37
|
+
// Web
|
|
38
|
+
'.html', '.htm', '.vue', '.svelte',
|
|
39
|
+
// SQL
|
|
40
|
+
'.sql',
|
|
41
|
+
// Infrastructure
|
|
42
|
+
'.tf', '.hcl', '.dockerfile',
|
|
43
|
+
// Other
|
|
44
|
+
'.r', '.jl', '.lua', '.pl', '.pm', '.ex', '.exs',
|
|
45
|
+
'.graphql', '.gql', '.proto',
|
|
46
|
+
]);
|
|
47
|
+
function isScannable(filepath) {
|
|
48
|
+
const ext = path.extname(filepath).toLowerCase();
|
|
49
|
+
const name = path.basename(filepath).toLowerCase();
|
|
50
|
+
// Scan by extension
|
|
51
|
+
if (CODE_EXTS.has(ext))
|
|
52
|
+
return true;
|
|
53
|
+
// Scan env files
|
|
54
|
+
if (name.startsWith('.env') || name === 'dockerfile' || name === 'makefile')
|
|
55
|
+
return true;
|
|
56
|
+
// Scan dotfiles that might contain secrets
|
|
57
|
+
if (name === '.npmrc' || name === '.pypirc' || name === '.netrc')
|
|
58
|
+
return true;
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
function isTestFile(f) {
|
|
62
|
+
const l = f.toLowerCase();
|
|
63
|
+
return l.includes('test') || l.includes('spec') || l.includes('__tests__') || l.includes('fixture') || l.includes('mock');
|
|
64
|
+
}
|
|
65
|
+
function isBinary(filepath) {
|
|
66
|
+
try {
|
|
67
|
+
const buf = Buffer.alloc(512);
|
|
68
|
+
const fd = openSync(filepath, 'r');
|
|
69
|
+
const bytesRead = readSync(fd, buf, 0, 512, 0);
|
|
70
|
+
closeSync(fd);
|
|
71
|
+
for (let i = 0; i < bytesRead; i++) {
|
|
72
|
+
if (buf[i] === 0)
|
|
73
|
+
return true; // null byte = binary
|
|
74
|
+
}
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
function walkDir(dir, files, maxDepth = 15, depth = 0) {
|
|
82
|
+
if (depth > maxDepth)
|
|
83
|
+
return;
|
|
84
|
+
try {
|
|
85
|
+
for (const entry of readdirSync(dir)) {
|
|
86
|
+
if (IGNORE.has(entry) || entry === '.git' || entry === 'node_modules')
|
|
87
|
+
continue;
|
|
88
|
+
const full = path.join(dir, entry);
|
|
89
|
+
try {
|
|
90
|
+
const s = statSync(full);
|
|
91
|
+
if (s.isDirectory())
|
|
92
|
+
walkDir(full, files, maxDepth, depth + 1);
|
|
93
|
+
else if (s.isFile() && s.size < 1_000_000 && isScannable(full) && !isBinary(full)) {
|
|
94
|
+
files.push(full);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
catch { /* permission error */ }
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
catch { /* permission error */ }
|
|
101
|
+
}
|
|
102
|
+
// ── Scanning ────────────────────────────────────────────────────────────────
|
|
103
|
+
const SECRETS = [
|
|
104
|
+
[/AKIA[0-9A-Z]{16}/g, 'AWS Access Key', 'Move to process.env.AWS_ACCESS_KEY_ID'],
|
|
105
|
+
[/gh[pousr]_[A-Za-z0-9_]{36,}/g, 'GitHub Token', 'Move to process.env.GITHUB_TOKEN'],
|
|
106
|
+
[/sk-[A-Za-z0-9]{20,}/g, 'OpenAI/Anthropic Key', 'Move to process.env.API_KEY'],
|
|
107
|
+
[/sk-ant-[A-Za-z0-9-]{20,}/g, 'Anthropic Key', 'Move to process.env.ANTHROPIC_API_KEY'],
|
|
108
|
+
[/[sr]k_live_[A-Za-z0-9]{20,}/g, 'Stripe Live Key', 'Move to process.env.STRIPE_SECRET_KEY'],
|
|
109
|
+
[/-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g, 'Private Key', 'Move to .env (never commit)'],
|
|
110
|
+
[/(?:postgres|mysql|mongodb|redis):\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/g, 'Database URL', 'Move to process.env.DATABASE_URL'],
|
|
111
|
+
[/xox[baprs]-[0-9a-zA-Z-]{10,}/g, 'Slack Token', 'Move to process.env.SLACK_TOKEN'],
|
|
112
|
+
[/SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}/g, 'SendGrid Key', 'Move to process.env.SENDGRID_API_KEY'],
|
|
113
|
+
[/(?:api_key|apikey|api_secret|secret_key|auth_token|access_token)\s*[=:]\s*['"]([A-Za-z0-9_\-]{20,})['"]/gi, 'Hardcoded Secret', 'Move to environment variable'],
|
|
114
|
+
[/(?:password|passwd|pwd)\s*[=:]\s*['"]([^'"]{8,})['"]/gi, 'Hardcoded Password', 'Move to environment variable'],
|
|
115
|
+
];
|
|
116
|
+
const SAFETY = [
|
|
117
|
+
[/\beval\s*\(/g, 'eval() usage', 'Use JSON.parse() or Function() instead', 'CRITICAL'],
|
|
118
|
+
[/\.innerHTML\s*=/g, 'innerHTML assignment', 'Use textContent or sanitize with DOMPurify', 'WARNING'],
|
|
119
|
+
[/rejectUnauthorized\s*:\s*false/g, 'Disabled SSL', 'Set rejectUnauthorized: true', 'WARNING'],
|
|
120
|
+
[/(?:cors|origin)\s*[=:]\s*['"]\*['"]/gi, 'Wildcard CORS', 'Set specific origin', 'WARNING'],
|
|
121
|
+
[/chmod\s+777/g, 'chmod 777', 'Use 755 or 644', 'WARNING'],
|
|
122
|
+
[/"(?:Action|Resource)"\s*:\s*"\*"/g, 'Wildcard IAM', 'Use least-privilege', 'CRITICAL'],
|
|
123
|
+
[/--no-verify/g, 'Skip verification flag', 'Remove before production', 'WARNING'],
|
|
124
|
+
[/dangerouslySetInnerHTML/g, 'dangerouslySetInnerHTML', 'Sanitize HTML content first', 'WARNING'],
|
|
125
|
+
];
|
|
126
|
+
const PLACEHOLDER_SKIP = [/^sk-(?:test|fake|dummy|placeholder|example|xxx|your)/i, /^(?:test|fake|dummy|placeholder|example|changeme|TODO|your_|xxx|aaa|123|abc)/i, /<[A-Z_]+>/];
|
|
127
|
+
function scanFile(filepath) {
|
|
128
|
+
let content;
|
|
129
|
+
try {
|
|
130
|
+
content = readFileSync(filepath, 'utf-8');
|
|
131
|
+
}
|
|
132
|
+
catch {
|
|
133
|
+
return { file: filepath, score: 100, findings: [], lines: 0 };
|
|
134
|
+
}
|
|
135
|
+
const lines = content.split('\n').length;
|
|
136
|
+
if (lines < 2)
|
|
137
|
+
return { file: filepath, score: 100, findings: [], lines };
|
|
138
|
+
const findings = [];
|
|
139
|
+
const isTF = isTestFile(filepath);
|
|
140
|
+
// Secrets
|
|
141
|
+
for (const [regex, name, fix] of SECRETS) {
|
|
142
|
+
regex.lastIndex = 0;
|
|
143
|
+
let m;
|
|
144
|
+
while ((m = regex.exec(content)) !== null) {
|
|
145
|
+
const val = m[1] ?? m[0];
|
|
146
|
+
if (PLACEHOLDER_SKIP.some((p) => p.test(val)))
|
|
147
|
+
continue;
|
|
148
|
+
const line = content.slice(0, m.index).split('\n').length;
|
|
149
|
+
findings.push({ severity: 'CRITICAL', type: name, line, message: `${name} detected`, fix, isAiPattern: false });
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
// Safety (skip scanner definition files)
|
|
153
|
+
const isScannerFile = filepath.includes('scanner') || filepath.includes('detect') || filepath.includes('guard');
|
|
154
|
+
if (!isScannerFile) {
|
|
155
|
+
for (const [regex, name, fix, sev] of SAFETY) {
|
|
156
|
+
regex.lastIndex = 0;
|
|
157
|
+
let m;
|
|
158
|
+
while ((m = regex.exec(content)) !== null) {
|
|
159
|
+
const line = content.slice(0, m.index).split('\n').length;
|
|
160
|
+
findings.push({ severity: sev, type: name, line, message: name, fix, isAiPattern: sev === 'WARNING' });
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
// AI patterns: inlined URLs
|
|
165
|
+
if (!filepath.includes('.env')) {
|
|
166
|
+
const urlRegex = /(?:const|let|var|=)\s*['"](?:postgres|mysql|mongodb|redis|https?):\/\/[^\s'"]*:[^\s'"]*@[^'"]+['"]/g;
|
|
167
|
+
let urlM;
|
|
168
|
+
while ((urlM = urlRegex.exec(content)) !== null) {
|
|
169
|
+
const line = content.slice(0, urlM.index).split('\n').length;
|
|
170
|
+
findings.push({ severity: 'WARNING', type: 'Inlined URL', line, message: 'URL with credentials hardcoded', fix: 'Use environment variable', isAiPattern: true });
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
// Unhandled async
|
|
174
|
+
const fetchRegex = /(?:await\s+)?fetch\s*\(/g;
|
|
175
|
+
let fetchM;
|
|
176
|
+
while ((fetchM = fetchRegex.exec(content)) !== null) {
|
|
177
|
+
const before = content.slice(Math.max(0, fetchM.index - 300), fetchM.index);
|
|
178
|
+
if (!before.includes('try') && !before.includes('catch') && !before.includes('.catch')) {
|
|
179
|
+
const line = content.slice(0, fetchM.index).split('\n').length;
|
|
180
|
+
findings.push({ severity: 'INFO', type: 'Unhandled fetch', line, message: 'fetch() without error handling', fix: 'Wrap in try/catch or add .catch()', isAiPattern: true });
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// PII
|
|
184
|
+
if (/\b\d{3}-\d{2}-\d{4}\b/.test(content)) {
|
|
185
|
+
const idx = content.search(/\b\d{3}-\d{2}-\d{4}\b/);
|
|
186
|
+
const line = content.slice(0, idx).split('\n').length;
|
|
187
|
+
findings.push({ severity: 'CRITICAL', type: 'SSN', line, message: 'SSN pattern in source', fix: 'Remove immediately', isAiPattern: false });
|
|
188
|
+
}
|
|
189
|
+
// Score
|
|
190
|
+
const mult = isTF ? 0.5 : 1;
|
|
191
|
+
let critD = 0, warnD = 0, infoD = 0;
|
|
192
|
+
for (const f of findings) {
|
|
193
|
+
if (f.severity === 'CRITICAL')
|
|
194
|
+
critD += 15 * mult;
|
|
195
|
+
else if (f.severity === 'WARNING')
|
|
196
|
+
warnD += 5 * mult;
|
|
197
|
+
else
|
|
198
|
+
infoD += 1 * mult;
|
|
199
|
+
}
|
|
200
|
+
const score = Math.max(0, Math.round(100 - Math.min(critD, 60) - Math.min(warnD, 25) - Math.min(infoD, 10)));
|
|
201
|
+
return { file: filepath, score, findings, lines };
|
|
202
|
+
}
|
|
203
|
+
function scoreColor(score) {
|
|
204
|
+
if (score >= 80)
|
|
205
|
+
return green;
|
|
206
|
+
if (score >= 50)
|
|
207
|
+
return amber;
|
|
208
|
+
return red;
|
|
209
|
+
}
|
|
210
|
+
function scoreLabel(score) {
|
|
211
|
+
if (score >= 80)
|
|
212
|
+
return 'PASS';
|
|
213
|
+
if (score >= 50)
|
|
214
|
+
return 'WARNING';
|
|
215
|
+
return 'CRITICAL';
|
|
216
|
+
}
|
|
217
|
+
// ── Main ────────────────────────────────────────────────────────────────────
|
|
218
|
+
export async function runVerify() {
|
|
219
|
+
const args = process.argv.slice(3);
|
|
220
|
+
const targetPaths = args.filter((a) => !a.startsWith('-'));
|
|
221
|
+
const jsonMode = args.includes('--json');
|
|
222
|
+
const showAll = args.includes('--all');
|
|
223
|
+
if (args.includes('--help') || args.includes('-h')) {
|
|
224
|
+
process.stdout.write(`
|
|
225
|
+
corpus verify [paths...] [options]
|
|
226
|
+
|
|
227
|
+
Compute trust scores for every file in your codebase.
|
|
228
|
+
Supports: TypeScript, JavaScript, Python, Go, Rust, Java, Swift,
|
|
229
|
+
Ruby, PHP, C/C++, Shell, SQL, YAML, JSON, and more.
|
|
230
|
+
|
|
231
|
+
Options:
|
|
232
|
+
--json Output as JSON
|
|
233
|
+
--all Show all files (default: only files with findings)
|
|
234
|
+
--help Show this help
|
|
235
|
+
|
|
236
|
+
Examples:
|
|
237
|
+
corpus verify Verify current directory (auto-detects files)
|
|
238
|
+
corpus verify src/ Verify specific directory
|
|
239
|
+
corpus verify --json Machine-readable output for CI
|
|
240
|
+
|
|
241
|
+
`);
|
|
242
|
+
return;
|
|
243
|
+
}
|
|
244
|
+
const paths = targetPaths.length > 0 ? targetPaths : ['.'];
|
|
245
|
+
const start = Date.now();
|
|
246
|
+
// Collect files
|
|
247
|
+
const allFiles = [];
|
|
248
|
+
for (const p of paths) {
|
|
249
|
+
if (!existsSync(p)) {
|
|
250
|
+
process.stderr.write(` Path not found: ${p}\n`);
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
const s = statSync(p);
|
|
254
|
+
if (s.isFile())
|
|
255
|
+
allFiles.push(p);
|
|
256
|
+
else if (s.isDirectory())
|
|
257
|
+
walkDir(p, allFiles);
|
|
258
|
+
}
|
|
259
|
+
if (allFiles.length === 0) {
|
|
260
|
+
process.stdout.write(dim('\n No scannable files found.\n'));
|
|
261
|
+
process.stdout.write(dim(' Corpus scans: .ts .js .py .go .rs .java .swift .rb .php .sh .sql .json .yaml and more.\n'));
|
|
262
|
+
process.stdout.write(dim(' Try: corpus verify .\n\n'));
|
|
263
|
+
return;
|
|
264
|
+
}
|
|
265
|
+
// Scan
|
|
266
|
+
const results = [];
|
|
267
|
+
for (const f of allFiles) {
|
|
268
|
+
results.push(scanFile(f));
|
|
269
|
+
}
|
|
270
|
+
results.sort((a, b) => a.score - b.score);
|
|
271
|
+
const timeMs = Date.now() - start;
|
|
272
|
+
const baseDir = path.resolve(paths[0] === '.' ? process.cwd() : paths[0]);
|
|
273
|
+
// Codebase score
|
|
274
|
+
const scored = results.filter((r) => r.lines >= 5);
|
|
275
|
+
const totalLines = scored.reduce((s, r) => s + r.lines, 0);
|
|
276
|
+
const codebaseScore = totalLines > 0
|
|
277
|
+
? Math.round(scored.reduce((s, r) => s + r.score * r.lines, 0) / totalLines)
|
|
278
|
+
: 100;
|
|
279
|
+
const critFiles = results.filter((r) => r.score < 50);
|
|
280
|
+
const warnFiles = results.filter((r) => r.score >= 50 && r.score < 80);
|
|
281
|
+
const cleanFiles = results.filter((r) => r.score >= 80);
|
|
282
|
+
if (jsonMode) {
|
|
283
|
+
process.stdout.write(JSON.stringify({
|
|
284
|
+
codebaseTrustScore: codebaseScore,
|
|
285
|
+
totalFiles: results.length,
|
|
286
|
+
criticalFiles: critFiles.length,
|
|
287
|
+
warningFiles: warnFiles.length,
|
|
288
|
+
cleanFiles: cleanFiles.length,
|
|
289
|
+
scanTimeMs: timeMs,
|
|
290
|
+
files: results.map((r) => ({
|
|
291
|
+
file: path.relative(baseDir, r.file),
|
|
292
|
+
score: r.score,
|
|
293
|
+
lines: r.lines,
|
|
294
|
+
findings: r.findings,
|
|
295
|
+
})),
|
|
296
|
+
}, null, 2) + '\n');
|
|
297
|
+
process.exit(codebaseScore < 50 ? 2 : codebaseScore < 80 ? 1 : 0);
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
// Pretty output
|
|
301
|
+
process.stdout.write('\n');
|
|
302
|
+
process.stdout.write(bold(` CORPUS VERIFY`) + dim(` ${allFiles.length} files ${(timeMs / 1000).toFixed(1)}s\n`));
|
|
303
|
+
process.stdout.write(' ' + '\u2550'.repeat(50) + '\n\n');
|
|
304
|
+
const sc = scoreColor(codebaseScore);
|
|
305
|
+
process.stdout.write(` CODEBASE TRUST SCORE: ${sc(bold(`${codebaseScore}/100`))}\n\n`);
|
|
306
|
+
const toShow = showAll ? results : results.filter((r) => r.findings.length > 0);
|
|
307
|
+
for (const r of toShow) {
|
|
308
|
+
const relPath = path.relative(baseDir, r.file);
|
|
309
|
+
const fc = scoreColor(r.score);
|
|
310
|
+
const label = scoreLabel(r.score);
|
|
311
|
+
process.stdout.write(` ${relPath.padEnd(50)} ${fc(`${r.score}/100`)} ${fc(label)}\n`);
|
|
312
|
+
for (const f of r.findings) {
|
|
313
|
+
const sev = f.severity === 'CRITICAL' ? red('CRIT') : f.severity === 'WARNING' ? amber('WARN') : dim('INFO');
|
|
314
|
+
const ai = f.isAiPattern ? amber(' [AI]') : '';
|
|
315
|
+
process.stdout.write(` ${sev} Line ${String(f.line).padEnd(4)} ${f.message}${ai}\n`);
|
|
316
|
+
process.stdout.write(` ${dim('FIX: ' + f.fix)}\n`);
|
|
317
|
+
}
|
|
318
|
+
if (r.findings.length > 0)
|
|
319
|
+
process.stdout.write('\n');
|
|
320
|
+
}
|
|
321
|
+
process.stdout.write(' ' + '\u2500'.repeat(50) + '\n');
|
|
322
|
+
const parts = [];
|
|
323
|
+
if (critFiles.length > 0)
|
|
324
|
+
parts.push(red(`${critFiles.length} critical`));
|
|
325
|
+
if (warnFiles.length > 0)
|
|
326
|
+
parts.push(amber(`${warnFiles.length} warning`));
|
|
327
|
+
parts.push(green(`${cleanFiles.length} clean`));
|
|
328
|
+
process.stdout.write(` Files: ${parts.join(', ')}\n\n`);
|
|
329
|
+
if (critFiles.length > 0)
|
|
330
|
+
process.exit(2);
|
|
331
|
+
if (warnFiles.length > 0)
|
|
332
|
+
process.exit(1);
|
|
333
|
+
process.exit(0);
|
|
334
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function runWatch(): Promise<void>;
|