corpus-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,334 @@
1
+ import { readFileSync, readdirSync, statSync, existsSync, openSync, readSync, closeSync } from 'fs';
2
+ import path from 'path';
3
+ import { green, amber, red, dim, bold } from '../utils/colors.js';
4
+ const IGNORE = new Set([
5
+ 'node_modules', '.git', 'dist', '.next', '__pycache__', '.venv', 'venv',
6
+ '.cache', '.turbo', 'coverage', '.nyc_output', 'build', 'out', '.output',
7
+ '.nuxt', '.svelte-kit', 'vendor', 'Pods', '.gradle', 'target', 'bin',
8
+ '.corpus', '.expo', '.idea', '.vscode',
9
+ ]);
10
+ // Scan ALL code files, not just a few extensions
11
+ const CODE_EXTS = new Set([
12
+ // JavaScript/TypeScript
13
+ '.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', '.mts', '.cts',
14
+ // Python
15
+ '.py', '.pyw',
16
+ // Ruby
17
+ '.rb', '.erb',
18
+ // Go
19
+ '.go',
20
+ // Rust
21
+ '.rs',
22
+ // Java/Kotlin
23
+ '.java', '.kt', '.kts',
24
+ // Swift/ObjC
25
+ '.swift', '.m', '.mm',
26
+ // C/C++
27
+ '.c', '.h', '.cpp', '.hpp', '.cc', '.hh',
28
+ // C#
29
+ '.cs',
30
+ // PHP
31
+ '.php',
32
+ // Shell
33
+ '.sh', '.bash', '.zsh',
34
+ // Config
35
+ '.json', '.yaml', '.yml', '.toml', '.ini', '.cfg', '.conf',
36
+ '.xml', '.plist',
37
+ // Web
38
+ '.html', '.htm', '.vue', '.svelte',
39
+ // SQL
40
+ '.sql',
41
+ // Infrastructure
42
+ '.tf', '.hcl', '.dockerfile',
43
+ // Other
44
+ '.r', '.jl', '.lua', '.pl', '.pm', '.ex', '.exs',
45
+ '.graphql', '.gql', '.proto',
46
+ ]);
47
+ function isScannable(filepath) {
48
+ const ext = path.extname(filepath).toLowerCase();
49
+ const name = path.basename(filepath).toLowerCase();
50
+ // Scan by extension
51
+ if (CODE_EXTS.has(ext))
52
+ return true;
53
+ // Scan env files
54
+ if (name.startsWith('.env') || name === 'dockerfile' || name === 'makefile')
55
+ return true;
56
+ // Scan dotfiles that might contain secrets
57
+ if (name === '.npmrc' || name === '.pypirc' || name === '.netrc')
58
+ return true;
59
+ return false;
60
+ }
61
+ function isTestFile(f) {
62
+ const l = f.toLowerCase();
63
+ return l.includes('test') || l.includes('spec') || l.includes('__tests__') || l.includes('fixture') || l.includes('mock');
64
+ }
65
+ function isBinary(filepath) {
66
+ try {
67
+ const buf = Buffer.alloc(512);
68
+ const fd = openSync(filepath, 'r');
69
+ const bytesRead = readSync(fd, buf, 0, 512, 0);
70
+ closeSync(fd);
71
+ for (let i = 0; i < bytesRead; i++) {
72
+ if (buf[i] === 0)
73
+ return true; // null byte = binary
74
+ }
75
+ return false;
76
+ }
77
+ catch {
78
+ return false;
79
+ }
80
+ }
81
+ function walkDir(dir, files, maxDepth = 15, depth = 0) {
82
+ if (depth > maxDepth)
83
+ return;
84
+ try {
85
+ for (const entry of readdirSync(dir)) {
86
+ if (IGNORE.has(entry) || entry === '.git' || entry === 'node_modules')
87
+ continue;
88
+ const full = path.join(dir, entry);
89
+ try {
90
+ const s = statSync(full);
91
+ if (s.isDirectory())
92
+ walkDir(full, files, maxDepth, depth + 1);
93
+ else if (s.isFile() && s.size < 1_000_000 && isScannable(full) && !isBinary(full)) {
94
+ files.push(full);
95
+ }
96
+ }
97
+ catch { /* permission error */ }
98
+ }
99
+ }
100
+ catch { /* permission error */ }
101
+ }
102
+ // ── Scanning ────────────────────────────────────────────────────────────────
103
+ const SECRETS = [
104
+ [/AKIA[0-9A-Z]{16}/g, 'AWS Access Key', 'Move to process.env.AWS_ACCESS_KEY_ID'],
105
+ [/gh[pousr]_[A-Za-z0-9_]{36,}/g, 'GitHub Token', 'Move to process.env.GITHUB_TOKEN'],
106
+ [/sk-[A-Za-z0-9]{20,}/g, 'OpenAI/Anthropic Key', 'Move to process.env.API_KEY'],
107
+ [/sk-ant-[A-Za-z0-9-]{20,}/g, 'Anthropic Key', 'Move to process.env.ANTHROPIC_API_KEY'],
108
+ [/[sr]k_live_[A-Za-z0-9]{20,}/g, 'Stripe Live Key', 'Move to process.env.STRIPE_SECRET_KEY'],
109
+ [/-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g, 'Private Key', 'Move to .env (never commit)'],
110
+ [/(?:postgres|mysql|mongodb|redis):\/\/[^\s'"]+:[^\s'"]+@[^\s'"]+/g, 'Database URL', 'Move to process.env.DATABASE_URL'],
111
+ [/xox[baprs]-[0-9a-zA-Z-]{10,}/g, 'Slack Token', 'Move to process.env.SLACK_TOKEN'],
112
+ [/SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}/g, 'SendGrid Key', 'Move to process.env.SENDGRID_API_KEY'],
113
+ [/(?:api_key|apikey|api_secret|secret_key|auth_token|access_token)\s*[=:]\s*['"]([A-Za-z0-9_\-]{20,})['"]/gi, 'Hardcoded Secret', 'Move to environment variable'],
114
+ [/(?:password|passwd|pwd)\s*[=:]\s*['"]([^'"]{8,})['"]/gi, 'Hardcoded Password', 'Move to environment variable'],
115
+ ];
116
+ const SAFETY = [
117
+ [/\beval\s*\(/g, 'eval() usage', 'Use JSON.parse() or Function() instead', 'CRITICAL'],
118
+ [/\.innerHTML\s*=/g, 'innerHTML assignment', 'Use textContent or sanitize with DOMPurify', 'WARNING'],
119
+ [/rejectUnauthorized\s*:\s*false/g, 'Disabled SSL', 'Set rejectUnauthorized: true', 'WARNING'],
120
+ [/(?:cors|origin)\s*[=:]\s*['"]\*['"]/gi, 'Wildcard CORS', 'Set specific origin', 'WARNING'],
121
+ [/chmod\s+777/g, 'chmod 777', 'Use 755 or 644', 'WARNING'],
122
+ [/"(?:Action|Resource)"\s*:\s*"\*"/g, 'Wildcard IAM', 'Use least-privilege', 'CRITICAL'],
123
+ [/--no-verify/g, 'Skip verification flag', 'Remove before production', 'WARNING'],
124
+ [/dangerouslySetInnerHTML/g, 'dangerouslySetInnerHTML', 'Sanitize HTML content first', 'WARNING'],
125
+ ];
126
+ const PLACEHOLDER_SKIP = [/^sk-(?:test|fake|dummy|placeholder|example|xxx|your)/i, /^(?:test|fake|dummy|placeholder|example|changeme|TODO|your_|xxx|aaa|123|abc)/i, /<[A-Z_]+>/];
127
+ function scanFile(filepath) {
128
+ let content;
129
+ try {
130
+ content = readFileSync(filepath, 'utf-8');
131
+ }
132
+ catch {
133
+ return { file: filepath, score: 100, findings: [], lines: 0 };
134
+ }
135
+ const lines = content.split('\n').length;
136
+ if (lines < 2)
137
+ return { file: filepath, score: 100, findings: [], lines };
138
+ const findings = [];
139
+ const isTF = isTestFile(filepath);
140
+ // Secrets
141
+ for (const [regex, name, fix] of SECRETS) {
142
+ regex.lastIndex = 0;
143
+ let m;
144
+ while ((m = regex.exec(content)) !== null) {
145
+ const val = m[1] ?? m[0];
146
+ if (PLACEHOLDER_SKIP.some((p) => p.test(val)))
147
+ continue;
148
+ const line = content.slice(0, m.index).split('\n').length;
149
+ findings.push({ severity: 'CRITICAL', type: name, line, message: `${name} detected`, fix, isAiPattern: false });
150
+ }
151
+ }
152
+ // Safety (skip scanner definition files)
153
+ const isScannerFile = filepath.includes('scanner') || filepath.includes('detect') || filepath.includes('guard');
154
+ if (!isScannerFile) {
155
+ for (const [regex, name, fix, sev] of SAFETY) {
156
+ regex.lastIndex = 0;
157
+ let m;
158
+ while ((m = regex.exec(content)) !== null) {
159
+ const line = content.slice(0, m.index).split('\n').length;
160
+ findings.push({ severity: sev, type: name, line, message: name, fix, isAiPattern: sev === 'WARNING' });
161
+ }
162
+ }
163
+ }
164
+ // AI patterns: inlined URLs
165
+ if (!filepath.includes('.env')) {
166
+ const urlRegex = /(?:const|let|var|=)\s*['"](?:postgres|mysql|mongodb|redis|https?):\/\/[^\s'"]*:[^\s'"]*@[^'"]+['"]/g;
167
+ let urlM;
168
+ while ((urlM = urlRegex.exec(content)) !== null) {
169
+ const line = content.slice(0, urlM.index).split('\n').length;
170
+ findings.push({ severity: 'WARNING', type: 'Inlined URL', line, message: 'URL with credentials hardcoded', fix: 'Use environment variable', isAiPattern: true });
171
+ }
172
+ }
173
+ // Unhandled async
174
+ const fetchRegex = /(?:await\s+)?fetch\s*\(/g;
175
+ let fetchM;
176
+ while ((fetchM = fetchRegex.exec(content)) !== null) {
177
+ const before = content.slice(Math.max(0, fetchM.index - 300), fetchM.index);
178
+ if (!before.includes('try') && !before.includes('catch') && !before.includes('.catch')) {
179
+ const line = content.slice(0, fetchM.index).split('\n').length;
180
+ findings.push({ severity: 'INFO', type: 'Unhandled fetch', line, message: 'fetch() without error handling', fix: 'Wrap in try/catch or add .catch()', isAiPattern: true });
181
+ }
182
+ }
183
+ // PII
184
+ if (/\b\d{3}-\d{2}-\d{4}\b/.test(content)) {
185
+ const idx = content.search(/\b\d{3}-\d{2}-\d{4}\b/);
186
+ const line = content.slice(0, idx).split('\n').length;
187
+ findings.push({ severity: 'CRITICAL', type: 'SSN', line, message: 'SSN pattern in source', fix: 'Remove immediately', isAiPattern: false });
188
+ }
189
+ // Score
190
+ const mult = isTF ? 0.5 : 1;
191
+ let critD = 0, warnD = 0, infoD = 0;
192
+ for (const f of findings) {
193
+ if (f.severity === 'CRITICAL')
194
+ critD += 15 * mult;
195
+ else if (f.severity === 'WARNING')
196
+ warnD += 5 * mult;
197
+ else
198
+ infoD += 1 * mult;
199
+ }
200
+ const score = Math.max(0, Math.round(100 - Math.min(critD, 60) - Math.min(warnD, 25) - Math.min(infoD, 10)));
201
+ return { file: filepath, score, findings, lines };
202
+ }
203
+ function scoreColor(score) {
204
+ if (score >= 80)
205
+ return green;
206
+ if (score >= 50)
207
+ return amber;
208
+ return red;
209
+ }
210
+ function scoreLabel(score) {
211
+ if (score >= 80)
212
+ return 'PASS';
213
+ if (score >= 50)
214
+ return 'WARNING';
215
+ return 'CRITICAL';
216
+ }
217
+ // ── Main ────────────────────────────────────────────────────────────────────
218
+ export async function runVerify() {
219
+ const args = process.argv.slice(3);
220
+ const targetPaths = args.filter((a) => !a.startsWith('-'));
221
+ const jsonMode = args.includes('--json');
222
+ const showAll = args.includes('--all');
223
+ if (args.includes('--help') || args.includes('-h')) {
224
+ process.stdout.write(`
225
+ corpus verify [paths...] [options]
226
+
227
+ Compute trust scores for every file in your codebase.
228
+ Supports: TypeScript, JavaScript, Python, Go, Rust, Java, Swift,
229
+ Ruby, PHP, C/C++, Shell, SQL, YAML, JSON, and more.
230
+
231
+ Options:
232
+ --json Output as JSON
233
+ --all Show all files (default: only files with findings)
234
+ --help Show this help
235
+
236
+ Examples:
237
+ corpus verify Verify current directory (auto-detects files)
238
+ corpus verify src/ Verify specific directory
239
+ corpus verify --json Machine-readable output for CI
240
+
241
+ `);
242
+ return;
243
+ }
244
+ const paths = targetPaths.length > 0 ? targetPaths : ['.'];
245
+ const start = Date.now();
246
+ // Collect files
247
+ const allFiles = [];
248
+ for (const p of paths) {
249
+ if (!existsSync(p)) {
250
+ process.stderr.write(` Path not found: ${p}\n`);
251
+ continue;
252
+ }
253
+ const s = statSync(p);
254
+ if (s.isFile())
255
+ allFiles.push(p);
256
+ else if (s.isDirectory())
257
+ walkDir(p, allFiles);
258
+ }
259
+ if (allFiles.length === 0) {
260
+ process.stdout.write(dim('\n No scannable files found.\n'));
261
+ process.stdout.write(dim(' Corpus scans: .ts .js .py .go .rs .java .swift .rb .php .sh .sql .json .yaml and more.\n'));
262
+ process.stdout.write(dim(' Try: corpus verify .\n\n'));
263
+ return;
264
+ }
265
+ // Scan
266
+ const results = [];
267
+ for (const f of allFiles) {
268
+ results.push(scanFile(f));
269
+ }
270
+ results.sort((a, b) => a.score - b.score);
271
+ const timeMs = Date.now() - start;
272
+ const baseDir = path.resolve(paths[0] === '.' ? process.cwd() : paths[0]);
273
+ // Codebase score
274
+ const scored = results.filter((r) => r.lines >= 5);
275
+ const totalLines = scored.reduce((s, r) => s + r.lines, 0);
276
+ const codebaseScore = totalLines > 0
277
+ ? Math.round(scored.reduce((s, r) => s + r.score * r.lines, 0) / totalLines)
278
+ : 100;
279
+ const critFiles = results.filter((r) => r.score < 50);
280
+ const warnFiles = results.filter((r) => r.score >= 50 && r.score < 80);
281
+ const cleanFiles = results.filter((r) => r.score >= 80);
282
+ if (jsonMode) {
283
+ process.stdout.write(JSON.stringify({
284
+ codebaseTrustScore: codebaseScore,
285
+ totalFiles: results.length,
286
+ criticalFiles: critFiles.length,
287
+ warningFiles: warnFiles.length,
288
+ cleanFiles: cleanFiles.length,
289
+ scanTimeMs: timeMs,
290
+ files: results.map((r) => ({
291
+ file: path.relative(baseDir, r.file),
292
+ score: r.score,
293
+ lines: r.lines,
294
+ findings: r.findings,
295
+ })),
296
+ }, null, 2) + '\n');
297
+ process.exit(codebaseScore < 50 ? 2 : codebaseScore < 80 ? 1 : 0);
298
+ return;
299
+ }
300
+ // Pretty output
301
+ process.stdout.write('\n');
302
+ process.stdout.write(bold(` CORPUS VERIFY`) + dim(` ${allFiles.length} files ${(timeMs / 1000).toFixed(1)}s\n`));
303
+ process.stdout.write(' ' + '\u2550'.repeat(50) + '\n\n');
304
+ const sc = scoreColor(codebaseScore);
305
+ process.stdout.write(` CODEBASE TRUST SCORE: ${sc(bold(`${codebaseScore}/100`))}\n\n`);
306
+ const toShow = showAll ? results : results.filter((r) => r.findings.length > 0);
307
+ for (const r of toShow) {
308
+ const relPath = path.relative(baseDir, r.file);
309
+ const fc = scoreColor(r.score);
310
+ const label = scoreLabel(r.score);
311
+ process.stdout.write(` ${relPath.padEnd(50)} ${fc(`${r.score}/100`)} ${fc(label)}\n`);
312
+ for (const f of r.findings) {
313
+ const sev = f.severity === 'CRITICAL' ? red('CRIT') : f.severity === 'WARNING' ? amber('WARN') : dim('INFO');
314
+ const ai = f.isAiPattern ? amber(' [AI]') : '';
315
+ process.stdout.write(` ${sev} Line ${String(f.line).padEnd(4)} ${f.message}${ai}\n`);
316
+ process.stdout.write(` ${dim('FIX: ' + f.fix)}\n`);
317
+ }
318
+ if (r.findings.length > 0)
319
+ process.stdout.write('\n');
320
+ }
321
+ process.stdout.write(' ' + '\u2500'.repeat(50) + '\n');
322
+ const parts = [];
323
+ if (critFiles.length > 0)
324
+ parts.push(red(`${critFiles.length} critical`));
325
+ if (warnFiles.length > 0)
326
+ parts.push(amber(`${warnFiles.length} warning`));
327
+ parts.push(green(`${cleanFiles.length} clean`));
328
+ process.stdout.write(` Files: ${parts.join(', ')}\n\n`);
329
+ if (critFiles.length > 0)
330
+ process.exit(2);
331
+ if (warnFiles.length > 0)
332
+ process.exit(1);
333
+ process.exit(0);
334
+ }
@@ -0,0 +1 @@
1
+ export declare function runWatch(): Promise<void>;