pinata-security-cli 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/dist/cli/index.js +1527 -369
  2. package/dist/cli/index.js.map +1 -1
  3. package/dist/index.d.ts +40 -1
  4. package/dist/index.js +10 -3
  5. package/dist/index.js.map +1 -1
  6. package/package.json +5 -1
  7. package/src/categories/definitions/data/data-race.yml +6 -2
  8. package/src/categories/definitions/input/boundary-testing.yml +7 -0
  9. package/src/categories/definitions/input/injection-fuzzing.yml +19 -0
  10. package/src/categories/definitions/input/null-undefined.yml +14 -0
  11. package/src/categories/definitions/network/connection-failure.yml +8 -2
  12. package/src/categories/definitions/resource/memory-leak.yml +7 -2
  13. package/src/categories/definitions/security/auth-failures.yml +8 -0
  14. package/src/categories/definitions/security/command-injection.yml +17 -0
  15. package/src/categories/definitions/security/csrf.yml +19 -0
  16. package/src/categories/definitions/security/data-exposure.yml +24 -0
  17. package/src/categories/definitions/security/dependency-risks.yml +6 -6
  18. package/src/categories/definitions/security/deserialization.yml +44 -0
  19. package/src/categories/definitions/security/file-upload.yml +39 -0
  20. package/src/categories/definitions/security/ldap-injection.yml +23 -0
  21. package/src/categories/definitions/security/path-traversal.yml +13 -0
  22. package/src/categories/definitions/security/prompt-injection.yml +14 -0
  23. package/src/categories/definitions/security/sql-injection.yml +30 -0
  24. package/src/categories/definitions/security/ssrf.yml +60 -0
  25. package/src/categories/definitions/security/xss.yml +36 -0
  26. package/src/categories/definitions/security/xxe.yml +32 -0
package/dist/cli/index.js CHANGED
@@ -1,15 +1,15 @@
1
1
  #!/usr/bin/env node
2
- import fs, { mkdir, writeFile, readFile, stat, readdir, mkdtemp, rm } from 'fs/promises';
3
- import path, { dirname, resolve, relative, join, basename, extname } from 'path';
2
+ import chalk6 from 'chalk';
3
+ import fs, { mkdir, writeFile, readFile, stat, readdir, unlink, mkdtemp, rm } from 'fs/promises';
4
+ import path, { dirname, resolve, relative, basename, extname, join } from 'path';
4
5
  import { existsSync, readFileSync, writeFileSync, chmodSync, mkdirSync } from 'fs';
5
6
  import { homedir, tmpdir } from 'os';
6
7
  import { z } from 'zod';
7
- import { spawn } from 'child_process';
8
+ import { execFile, spawn } from 'child_process';
8
9
  import { useState } from 'react';
9
10
  import { render, useApp, useInput, Box, Text } from 'ink';
10
11
  import Spinner from 'ink-spinner';
11
12
  import { jsx, jsxs } from 'react/jsx-runtime';
12
- import chalk6 from 'chalk';
13
13
  import { Command } from 'commander';
14
14
  import ora3 from 'ora';
15
15
  import YAML from 'yaml';
@@ -125,6 +125,359 @@ var init_result = __esm({
125
125
  "src/lib/result.ts"() {
126
126
  }
127
127
  });
128
+ var LOG_LEVELS, Logger, logger;
129
+ var init_logger = __esm({
130
+ "src/lib/logger.ts"() {
131
+ LOG_LEVELS = {
132
+ debug: 0,
133
+ info: 1,
134
+ warn: 2,
135
+ error: 3,
136
+ silent: 4
137
+ };
138
+ Logger = class _Logger {
139
+ level = "info";
140
+ prefix = "";
141
+ /**
142
+ * Configure the logger
143
+ */
144
+ configure(config2) {
145
+ if (config2.level !== void 0) {
146
+ this.level = config2.level;
147
+ }
148
+ if (config2.prefix !== void 0) {
149
+ this.prefix = config2.prefix;
150
+ }
151
+ }
152
+ /**
153
+ * Check if a log level should be output
154
+ */
155
+ shouldLog(level) {
156
+ return LOG_LEVELS[level] >= LOG_LEVELS[this.level];
157
+ }
158
+ /**
159
+ * Format a message with optional prefix
160
+ */
161
+ format(message) {
162
+ return this.prefix ? `${this.prefix} ${message}` : message;
163
+ }
164
+ /**
165
+ * Debug level logging (gray)
166
+ */
167
+ debug(message, ...args) {
168
+ if (this.shouldLog("debug")) {
169
+ console.debug(chalk6.gray(this.format(message)), ...args);
170
+ }
171
+ }
172
+ /**
173
+ * Info level logging (default color)
174
+ */
175
+ info(message, ...args) {
176
+ if (this.shouldLog("info")) {
177
+ console.info(this.format(message), ...args);
178
+ }
179
+ }
180
+ /**
181
+ * Warning level logging (yellow)
182
+ */
183
+ warn(message, ...args) {
184
+ if (this.shouldLog("warn")) {
185
+ console.warn(chalk6.yellow(this.format(message)), ...args);
186
+ }
187
+ }
188
+ /**
189
+ * Error level logging (red)
190
+ */
191
+ error(message, ...args) {
192
+ if (this.shouldLog("error")) {
193
+ console.error(chalk6.red(this.format(message)), ...args);
194
+ }
195
+ }
196
+ /**
197
+ * Success message (green)
198
+ */
199
+ success(message, ...args) {
200
+ if (this.shouldLog("info")) {
201
+ console.info(chalk6.green(this.format(message)), ...args);
202
+ }
203
+ }
204
+ /**
205
+ * Create a child logger with a prefix
206
+ */
207
+ child(prefix) {
208
+ const child = new _Logger();
209
+ child.level = this.level;
210
+ child.prefix = this.prefix ? `${this.prefix} ${prefix}` : prefix;
211
+ return child;
212
+ }
213
+ };
214
+ logger = new Logger();
215
+ }
216
+ });
217
+ async function discoverAttackSurface(projectRoot, config2 = {}) {
218
+ const maxFiles = config2.maxFiles ?? 200;
219
+ const endpoints = [];
220
+ const dbOperations = [];
221
+ const authChecks = [];
222
+ const stateMutations = [];
223
+ const findings = [];
224
+ const sourceFiles = await findSourceFiles(projectRoot, maxFiles);
225
+ log.info(`Scanning ${sourceFiles.length} files for attack surface`);
226
+ const hasRateLimiting = await projectHasRateLimiting(projectRoot);
227
+ for (const filePath of sourceFiles) {
228
+ try {
229
+ const content = await readFile(filePath, "utf-8");
230
+ const lines = content.split("\n");
231
+ const relPath = relative(projectRoot, filePath);
232
+ for (const [_framework, pattern] of Object.entries(ROUTE_PATTERNS)) {
233
+ const regex = new RegExp(pattern.source, pattern.flags);
234
+ let match;
235
+ while ((match = regex.exec(content)) !== null) {
236
+ const lineStart = content.slice(0, match.index).split("\n").length;
237
+ const contextStart = Math.max(0, lineStart - 10);
238
+ const contextEnd = Math.min(lines.length, lineStart + 5);
239
+ const context = lines.slice(contextStart, contextEnd).join("\n");
240
+ const hasAuth = AUTH_MIDDLEWARE_PATTERNS.some((p) => p.test(context));
241
+ const method = (match[1] ?? match[2] ?? "unknown").toUpperCase();
242
+ const path2 = match[2] ?? match[1] ?? "/unknown";
243
+ endpoints.push({
244
+ method,
245
+ path: path2,
246
+ filePath: relPath,
247
+ lineStart,
248
+ hasAuth,
249
+ middlewareChain: []
250
+ });
251
+ if (!hasAuth && ["POST", "PUT", "PATCH", "DELETE"].includes(method)) {
252
+ findings.push({
253
+ type: "missing-auth",
254
+ description: `${method} ${path2} has no authentication middleware`,
255
+ filePath: relPath,
256
+ lineStart,
257
+ severity: "critical",
258
+ confidence: "medium"
259
+ });
260
+ }
261
+ }
262
+ }
263
+ for (const pattern of DB_OPERATION_PATTERNS) {
264
+ const regex = new RegExp(pattern.source, "gm");
265
+ let match;
266
+ while ((match = regex.exec(content)) !== null) {
267
+ const lineStart = content.slice(0, match.index).split("\n").length;
268
+ const lineContent = lines[lineStart - 1] ?? "";
269
+ const isParameterized = /\$\d|\?\s*,|\?\s*\)|\bparams\b|:\w+/.test(lineContent);
270
+ dbOperations.push({
271
+ operation: match[0].slice(0, 50),
272
+ filePath: relPath,
273
+ lineStart,
274
+ isParameterized
275
+ });
276
+ }
277
+ }
278
+ const hasConcurrencyGuards = CONCURRENCY_GUARD_PATTERNS.some((p) => p.test(content));
279
+ if (!hasConcurrencyGuards) {
280
+ const writeOps = /\.(update|save|create|insert|upsert|increment|decrement)\s*\(/g;
281
+ let match;
282
+ while ((match = writeOps.exec(content)) !== null) {
283
+ const lineStart = content.slice(0, match.index).split("\n").length;
284
+ stateMutations.push({
285
+ filePath: relPath,
286
+ lineStart,
287
+ hasLock: false,
288
+ hasTransaction: false
289
+ });
290
+ }
291
+ }
292
+ for (const pattern of AUTH_MIDDLEWARE_PATTERNS) {
293
+ const regex = new RegExp(pattern.source, "gm");
294
+ let match;
295
+ while ((match = regex.exec(content)) !== null) {
296
+ const lineStart = content.slice(0, match.index).split("\n").length;
297
+ authChecks.push({
298
+ filePath: relPath,
299
+ lineStart,
300
+ type: match[0]
301
+ });
302
+ }
303
+ }
304
+ } catch {
305
+ }
306
+ }
307
+ if (!hasRateLimiting && endpoints.length > 0) {
308
+ const authEndpoints = endpoints.filter(
309
+ (e) => e.path.includes("login") || e.path.includes("auth") || e.path.includes("signin")
310
+ );
311
+ for (const ep of authEndpoints) {
312
+ findings.push({
313
+ type: "missing-rate-limit",
314
+ description: `Auth endpoint ${ep.method} ${ep.path} has no rate limiting`,
315
+ filePath: ep.filePath,
316
+ lineStart: ep.lineStart,
317
+ severity: "high",
318
+ confidence: "medium"
319
+ });
320
+ }
321
+ }
322
+ const byFile = /* @__PURE__ */ new Map();
323
+ for (const m of stateMutations) {
324
+ const list = byFile.get(m.filePath) ?? [];
325
+ list.push(m);
326
+ byFile.set(m.filePath, list);
327
+ }
328
+ for (const [filePath, mutations] of byFile) {
329
+ if (mutations.length > 3) {
330
+ const first = mutations[0];
331
+ findings.push({
332
+ type: "race-condition",
333
+ description: `${mutations.length} state mutations in this file without concurrency guards (transactions, locks, idempotency keys)`,
334
+ filePath,
335
+ lineStart: first?.lineStart ?? 0,
336
+ severity: "high",
337
+ confidence: "low"
338
+ });
339
+ }
340
+ }
341
+ log.info(
342
+ `Discovery complete: ${endpoints.length} endpoints, ${dbOperations.length} DB ops, ${findings.length} findings`
343
+ );
344
+ return { endpoints, dbOperations, authChecks, stateMutations, findings };
345
+ }
346
+ function findingsToGaps(findings, projectRoot) {
347
+ return findings.map((f, i) => ({
348
+ categoryId: `discovery-${f.type}`,
349
+ categoryName: formatFindingType(f.type),
350
+ domain: f.type === "missing-auth" ? "security" : f.type === "missing-rate-limit" ? "security" : "concurrency",
351
+ level: "integration",
352
+ priority: f.severity === "critical" ? "P0" : "P1",
353
+ severity: f.severity,
354
+ confidence: f.confidence,
355
+ filePath: resolve(projectRoot, f.filePath),
356
+ lineStart: f.lineStart,
357
+ lineEnd: f.lineStart,
358
+ columnStart: 0,
359
+ columnEnd: 0,
360
+ codeSnippet: f.description,
361
+ patternId: `discovery-${f.type}-${i}`,
362
+ patternType: "semantic",
363
+ priorityScore: f.severity === "critical" ? 12 : f.severity === "high" ? 9 : 4,
364
+ status: "pending"
365
+ }));
366
+ }
367
+ function formatFindingType(type) {
368
+ const labels = {
369
+ "missing-auth": "Missing Authentication",
370
+ "idor": "Insecure Direct Object Reference",
371
+ "race-condition": "Potential Race Condition",
372
+ "unvalidated-input": "Unvalidated Input",
373
+ "missing-rate-limit": "Missing Rate Limiting"
374
+ };
375
+ return labels[type] ?? type;
376
+ }
377
+ async function findSourceFiles(projectRoot, maxFiles) {
378
+ const files = [];
379
+ const extensions = /* @__PURE__ */ new Set([".ts", ".tsx", ".js", ".jsx", ".py", ".go"]);
380
+ const excludeDirs = /* @__PURE__ */ new Set([
381
+ "node_modules",
382
+ ".git",
383
+ "dist",
384
+ "build",
385
+ "out",
386
+ ".next",
387
+ "__pycache__",
388
+ "venv",
389
+ ".venv",
390
+ "coverage",
391
+ ".nyc_output",
392
+ "tests",
393
+ "test",
394
+ "__tests__",
395
+ "spec",
396
+ "fixtures",
397
+ "corpus",
398
+ "benchmarks",
399
+ "scripts"
400
+ ]);
401
+ async function walk(dir, depth) {
402
+ if (depth > 10 || files.length >= maxFiles) return;
403
+ try {
404
+ const entries = await readdir(dir, { withFileTypes: true });
405
+ for (const entry of entries) {
406
+ if (files.length >= maxFiles) return;
407
+ if (entry.isDirectory()) {
408
+ if (!excludeDirs.has(entry.name) && !entry.name.startsWith(".")) {
409
+ await walk(resolve(dir, entry.name), depth + 1);
410
+ }
411
+ } else if (entry.isFile() && extensions.has(extname(entry.name).toLowerCase())) {
412
+ files.push(resolve(dir, entry.name));
413
+ }
414
+ }
415
+ } catch {
416
+ }
417
+ }
418
+ await walk(projectRoot, 0);
419
+ return files;
420
+ }
421
+ async function projectHasRateLimiting(projectRoot) {
422
+ const pkgPath = resolve(projectRoot, "package.json");
423
+ if (!existsSync(pkgPath)) return false;
424
+ try {
425
+ const pkg = JSON.parse(await readFile(pkgPath, "utf-8"));
426
+ const allDeps = {
427
+ ...pkg["dependencies"],
428
+ ...pkg["devDependencies"]
429
+ };
430
+ return RATE_LIMIT_PATTERNS.some(
431
+ (p) => Object.keys(allDeps).some((dep) => p.test(dep))
432
+ );
433
+ } catch {
434
+ return false;
435
+ }
436
+ }
437
+ var ROUTE_PATTERNS, AUTH_MIDDLEWARE_PATTERNS, DB_OPERATION_PATTERNS, RATE_LIMIT_PATTERNS, CONCURRENCY_GUARD_PATTERNS, log;
438
+ var init_attack_surface = __esm({
439
+ "src/core/discovery/attack-surface.ts"() {
440
+ init_logger();
441
+ ROUTE_PATTERNS = {
442
+ express: /\.(get|post|put|patch|delete|all|use)\s*\(\s*["'`]([^"'`]+)["'`]/g,
443
+ fastify: /\.(get|post|put|patch|delete)\s*\(\s*["'`]([^"'`]+)["'`]/g,
444
+ nextjs: /export\s+(async\s+)?function\s+(GET|POST|PUT|PATCH|DELETE)\b/g,
445
+ flask: /@\w+\.route\s*\(\s*["']([^"']+)["']/g,
446
+ django: /path\s*\(\s*["']([^"']+)["']/g,
447
+ fastapi: /@\w+\.(get|post|put|patch|delete)\s*\(\s*["']([^"']+)["']/g
448
+ };
449
+ AUTH_MIDDLEWARE_PATTERNS = [
450
+ /\b(requireAuth|isAuthenticated|authenticate|authMiddleware|protect|ensureLoggedIn|passport\.authenticate|verifyToken|requireLogin)\b/,
451
+ /\b(auth|authentication|authorization)\s*\(/,
452
+ /\b(jwt|bearer|token)\s*\(/i
453
+ ];
454
+ DB_OPERATION_PATTERNS = [
455
+ /\.(query|execute|run|all|get|find|findOne|findMany|create|update|delete|insert|upsert|remove)\s*\(/,
456
+ /\b(SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|DROP)\b/
457
+ ];
458
+ RATE_LIMIT_PATTERNS = [
459
+ /rate[-_]?limit|rateLimiter|throttle|RateLimiterMemory|slowDown/i
460
+ ];
461
+ CONCURRENCY_GUARD_PATTERNS = [
462
+ /\b(transaction|BEGIN|COMMIT|ROLLBACK|LOCK|FOR UPDATE|serializable)\b/i,
463
+ /\b(mutex|semaphore|lock|atomic|synchronized)\b/i,
464
+ /\bidempotency[_-]?key\b/i
465
+ ];
466
+ log = logger.child("AttackSurface");
467
+ }
468
+ });
469
+
470
+ // src/core/discovery/index.ts
471
+ var discovery_exports = {};
472
+ __export(discovery_exports, {
473
+ discoverAttackSurface: () => discoverAttackSurface,
474
+ findingsToGaps: () => findingsToGaps
475
+ });
476
+ var init_discovery = __esm({
477
+ "src/core/discovery/index.ts"() {
478
+ init_attack_surface();
479
+ }
480
+ });
128
481
  function getCachePath(projectRoot) {
129
482
  return resolve(projectRoot, CACHE_DIR, CACHE_FILE);
130
483
  }
@@ -845,15 +1198,19 @@ var init_ai_verifier = __esm({
845
1198
  // Near sanitization
846
1199
  ]
847
1200
  };
848
- BATCH_PROMPT = `You are a security code reviewer. Analyze these potential vulnerabilities and determine which are real issues vs false positives.
1201
+ BATCH_PROMPT = `You are a security code reviewer. For each item, determine one of three verdicts:
1202
+
1203
+ 1. VULNERABLE: User-controlled data reaches a dangerous sink without adequate sanitization.
1204
+ You MUST describe a concrete exploit scenario step-by-step.
849
1205
 
850
- For each item, consider:
851
- - Is user input actually reaching this code?
852
- - Is there sanitization, validation, or encoding nearby?
853
- - Is this test code, example code, or production code?
854
- - Is there context that makes this safe?
1206
+ 2. FALSE_POSITIVE: The code is safe. You MUST cite the specific line, function, or mechanism
1207
+ that prevents exploitation (e.g., "parameterized query at line 42", "DOMPurify.sanitize
1208
+ call wraps the input").
855
1209
 
856
- Be rigorous. Most pattern matches are false positives.
1210
+ 3. NEEDS_REVIEW: You cannot determine with high confidence. Explain what information is missing.
1211
+
1212
+ Do not assume code is safe without identifying specific defenses.
1213
+ Do not assume code is vulnerable without tracing the data flow from source to sink.
857
1214
 
858
1215
  ITEMS TO ANALYZE:
859
1216
  {{items}}
@@ -862,9 +1219,9 @@ Respond with a JSON array. Each object MUST have these exact fields:
862
1219
  [
863
1220
  {
864
1221
  "id": "1",
865
- "isVulnerable": true/false,
866
- "confidence": "high"/"medium"/"low",
867
- "reasoning": "brief explanation"
1222
+ "verdict": "VULNERABLE" | "FALSE_POSITIVE" | "NEEDS_REVIEW",
1223
+ "confidence": "high" | "medium" | "low",
1224
+ "reasoning": "concrete evidence for your determination"
868
1225
  },
869
1226
  ...
870
1227
  ]
@@ -891,32 +1248,33 @@ FLAGGED LINE: {{flaggedLine}}
891
1248
  this.concurrency = config2.concurrency ?? 3;
892
1249
  }
893
1250
  /**
894
- * Verify multiple gaps efficiently using filtering, batching, and parallelism.
1251
+ * Prioritize gaps using AI triage. This is a SOFT prioritizer, not a gate.
1252
+ * No findings are dropped. The three-verdict system ranks findings for the
1253
+ * test forge queue: VULNERABLE first, NEEDS_REVIEW second, FALSE_POSITIVE last.
895
1254
  *
896
- * Flow:
897
- * 1. Pre-filter obvious false positives (test files, etc.)
898
- * 2. Group remaining gaps into batches of 10
899
- * 3. Process 3 batches in parallel
900
- * 4. Return verified gaps and dismissed with reasons
1255
+ * Test execution (not AI opinion) is the deterministic proof of vulnerability.
901
1256
  */
902
1257
  async verifyAll(gaps, getFileContent) {
903
1258
  const verified = [];
904
1259
  const dismissed = [];
1260
+ const needsReview = [];
905
1261
  const { toVerify, preFiltered } = this.preFilter(gaps);
906
1262
  dismissed.push(...preFiltered);
907
1263
  if (toVerify.length === 0) {
908
1264
  return {
909
1265
  verified: [],
910
1266
  dismissed,
1267
+ needsReview: [],
911
1268
  stats: {
912
1269
  total: gaps.length,
913
1270
  preFiltered: preFiltered.length,
914
1271
  aiDismissed: 0,
915
- aiVerified: 0
1272
+ aiVerified: 0,
1273
+ aiNeedsReview: 0
916
1274
  }
917
1275
  };
918
1276
  }
919
- console.log(`Pre-filtered ${preFiltered.length} gaps. Verifying ${toVerify.length} with AI...`);
1277
+ console.log(`Pre-filtered ${preFiltered.length} gaps. Prioritizing ${toVerify.length} with AI...`);
920
1278
  const fileContents = /* @__PURE__ */ new Map();
921
1279
  const uniquePaths = [...new Set(toVerify.map((g) => g.filePath))];
922
1280
  await Promise.all(
@@ -933,28 +1291,43 @@ FLAGGED LINE: {{flaggedLine}}
933
1291
  const results = await this.processParallel(batches, toVerify);
934
1292
  let aiVerified = 0;
935
1293
  let aiDismissed = 0;
1294
+ let aiNeedsReview = 0;
936
1295
  for (const gap of toVerify) {
937
1296
  const gapId = `${gap.filePath}:${gap.lineStart}`;
938
1297
  const result = results.get(gapId);
939
- if (!result || result.isVulnerable) {
1298
+ if (!result) {
1299
+ needsReview.push(gap);
1300
+ aiNeedsReview++;
1301
+ } else if (result.verdict === "VULNERABLE") {
940
1302
  verified.push(gap);
941
1303
  aiVerified++;
942
- } else {
943
- dismissed.push({
944
- gap,
945
- reason: result.reasoning
946
- });
1304
+ } else if (result.verdict === "NEEDS_REVIEW") {
1305
+ needsReview.push(gap);
1306
+ aiNeedsReview++;
1307
+ } else if (result.verdict === "FALSE_POSITIVE" && result.confidence === "high") {
1308
+ dismissed.push({ gap, reason: result.reasoning });
947
1309
  aiDismissed++;
1310
+ } else {
1311
+ needsReview.push(gap);
1312
+ aiNeedsReview++;
948
1313
  }
949
1314
  }
1315
+ const totalAnalyzed = aiVerified + aiDismissed + aiNeedsReview;
1316
+ if (totalAnalyzed > 0 && aiDismissed / totalAnalyzed > 0.8) {
1317
+ console.warn(
1318
+ `WARNING: ${Math.round(aiDismissed / totalAnalyzed * 100)}% of findings dismissed. Scanner may be miscalibrated -- consider reviewing dismissed findings.`
1319
+ );
1320
+ }
950
1321
  return {
951
1322
  verified,
952
1323
  dismissed,
1324
+ needsReview,
953
1325
  stats: {
954
1326
  total: gaps.length,
955
1327
  preFiltered: preFiltered.length,
956
1328
  aiDismissed,
957
- aiVerified
1329
+ aiVerified,
1330
+ aiNeedsReview
958
1331
  }
959
1332
  };
960
1333
  }
@@ -1111,7 +1484,7 @@ FLAGGED LINE: {{flaggedLine}}
1111
1484
  "anthropic-version": "2023-06-01"
1112
1485
  },
1113
1486
  body: JSON.stringify({
1114
- model: this.config.model ?? "claude-sonnet-4-20250514",
1487
+ model: this.config.model ?? "claude-opus-4-8",
1115
1488
  max_tokens: 4096,
1116
1489
  // Larger for batch responses
1117
1490
  messages: [{ role: "user", content: prompt }]
@@ -1169,12 +1542,34 @@ FLAGGED LINE: {{flaggedLine}}
1169
1542
  return [];
1170
1543
  }
1171
1544
  const parsed = JSON.parse(jsonMatch[0]);
1172
- return parsed.map((item) => ({
1173
- id: String(item.id),
1174
- isVulnerable: Boolean(item.isVulnerable),
1175
- confidence: item.confidence ?? "medium",
1176
- reasoning: item.reasoning ?? "No reasoning provided"
1177
- }));
1545
+ return parsed.map((item) => {
1546
+ const rawVerdict = item["verdict"];
1547
+ let verdict;
1548
+ let isVulnerable;
1549
+ if (rawVerdict === "VULNERABLE") {
1550
+ verdict = "VULNERABLE";
1551
+ isVulnerable = true;
1552
+ } else if (rawVerdict === "NEEDS_REVIEW") {
1553
+ verdict = "NEEDS_REVIEW";
1554
+ isVulnerable = false;
1555
+ } else if (rawVerdict === "FALSE_POSITIVE") {
1556
+ verdict = "FALSE_POSITIVE";
1557
+ isVulnerable = false;
1558
+ } else if (rawVerdict) {
1559
+ verdict = "NEEDS_REVIEW";
1560
+ isVulnerable = false;
1561
+ } else {
1562
+ isVulnerable = Boolean(item["isVulnerable"]);
1563
+ verdict = isVulnerable ? "VULNERABLE" : "FALSE_POSITIVE";
1564
+ }
1565
+ return {
1566
+ id: String(item["id"]),
1567
+ isVulnerable,
1568
+ verdict,
1569
+ confidence: item["confidence"] ?? "medium",
1570
+ reasoning: item["reasoning"] ?? "No reasoning provided"
1571
+ };
1572
+ });
1178
1573
  } catch (error) {
1179
1574
  console.error(`Failed to parse batch response: ${error instanceof Error ? error.message : String(error)}`);
1180
1575
  return [];
@@ -1458,7 +1853,7 @@ export default defineConfig({
1458
1853
  * Execute a command and capture output
1459
1854
  */
1460
1855
  exec(command, args, options = {}) {
1461
- return new Promise((resolve9) => {
1856
+ return new Promise((resolve10) => {
1462
1857
  let stdout = "";
1463
1858
  let stderr = "";
1464
1859
  let timedOut = false;
@@ -1478,7 +1873,7 @@ export default defineConfig({
1478
1873
  }, timeout);
1479
1874
  proc.on("close", (code) => {
1480
1875
  clearTimeout(timer);
1481
- resolve9({
1876
+ resolve10({
1482
1877
  stdout,
1483
1878
  stderr,
1484
1879
  exitCode: code ?? 1,
@@ -1487,7 +1882,7 @@ export default defineConfig({
1487
1882
  });
1488
1883
  proc.on("error", (err2) => {
1489
1884
  clearTimeout(timer);
1490
- resolve9({
1885
+ resolve10({
1491
1886
  stdout,
1492
1887
  stderr: stderr + "\n" + err2.message,
1493
1888
  exitCode: 1,
@@ -3436,6 +3831,719 @@ var init_execution = __esm({
3436
3831
  init_chains();
3437
3832
  }
3438
3833
  });
3834
+
3835
+ // src/testgen/generator.ts
3836
+ function buildGenerationPrompt(ctx) {
3837
+ const parts = [];
3838
+ parts.push(`Generate a complete, runnable ${ctx.testFramework.name} test file for this security vulnerability.`);
3839
+ parts.push("");
3840
+ parts.push("## Vulnerability");
3841
+ parts.push(`Type: ${ctx.gap.categoryId}`);
3842
+ parts.push(`Severity: ${ctx.gap.severity}`);
3843
+ parts.push(`File: ${ctx.gap.filePath}:${ctx.gap.lineStart}`);
3844
+ parts.push(`Pattern: ${ctx.gap.patternId}`);
3845
+ parts.push("");
3846
+ parts.push("## Vulnerable Code");
3847
+ parts.push("```");
3848
+ parts.push(ctx.functionBody);
3849
+ parts.push("```");
3850
+ parts.push("");
3851
+ if (ctx.functionName) {
3852
+ parts.push(`Function name: ${ctx.functionName}`);
3853
+ }
3854
+ parts.push("## File Imports");
3855
+ parts.push("```");
3856
+ parts.push(ctx.imports.join("\n"));
3857
+ parts.push("```");
3858
+ parts.push("");
3859
+ parts.push("## Context");
3860
+ parts.push(`Language: ${ctx.language}`);
3861
+ parts.push(`Test framework: ${ctx.testFramework.name}`);
3862
+ if (ctx.webFramework) parts.push(`Web framework: ${ctx.webFramework}`);
3863
+ if (ctx.dbType) parts.push(`Database: ${ctx.dbType}`);
3864
+ parts.push("");
3865
+ if (ctx.existingTestSample) {
3866
+ parts.push("## Existing Test Style (match this style)");
3867
+ parts.push("```");
3868
+ parts.push(ctx.existingTestSample.slice(0, 1500));
3869
+ parts.push("```");
3870
+ parts.push("");
3871
+ }
3872
+ parts.push("## Requirements");
3873
+ parts.push("1. Output ONLY the complete test file. No explanations, no markdown fences.");
3874
+ parts.push("2. Use real imports that resolve in this project.");
3875
+ parts.push("3. The test MUST FAIL when run against the current vulnerable code.");
3876
+ parts.push("4. Include at least 5 attack payloads specific to this vulnerability type.");
3877
+ parts.push("5. Include at least one boundary/edge case (empty string, null, very long input, unicode).");
3878
+ parts.push("6. If testing an HTTP endpoint, use supertest or direct function calls.");
3879
+ parts.push("7. Test the specific vulnerable code path, not a generic function.");
3880
+ parts.push("8. Each test should have a clear assertion that proves the vulnerability exists or is mitigated.");
3881
+ return parts.join("\n");
3882
+ }
3883
+ function buildPropertyPrompt(ctx) {
3884
+ const parts = [];
3885
+ parts.push(`Generate a property-based test using fast-check (TypeScript) or hypothesis (Python) for this security vulnerability.`);
3886
+ parts.push("");
3887
+ parts.push("## Vulnerability");
3888
+ parts.push(`Type: ${ctx.gap.categoryId}`);
3889
+ parts.push(`File: ${ctx.gap.filePath}:${ctx.gap.lineStart}`);
3890
+ parts.push("");
3891
+ parts.push("## Vulnerable Code");
3892
+ parts.push("```");
3893
+ parts.push(ctx.functionBody);
3894
+ parts.push("```");
3895
+ parts.push("");
3896
+ parts.push("## Requirements");
3897
+ parts.push("1. Output ONLY the complete test file. No explanations.");
3898
+ parts.push("2. Express a security INVARIANT as a property.");
3899
+ parts.push("3. The property should hold for ALL inputs, not just specific payloads.");
3900
+ parts.push("4. Use fast-check for TypeScript/JavaScript or hypothesis for Python.");
3901
+ parts.push("5. Example invariant: 'for all strings s, the output of sanitize(s) never contains <script>'");
3902
+ parts.push(`6. Test framework: ${ctx.testFramework.name}`);
3903
+ const invariantHints = {
3904
+ "sql-injection": "user input should never appear unescaped in the SQL query string",
3905
+ "xss": "user input should never appear as raw HTML in the output",
3906
+ "command-injection": "user input should never be passed to a shell command unescaped",
3907
+ "path-traversal": "resolved file path should always stay within the allowed directory",
3908
+ "ssrf": "user-supplied URL should never resolve to a private/internal IP",
3909
+ "xxe": "XML parsing should never resolve external entities",
3910
+ "deserialization": "deserialized objects should only be of expected types",
3911
+ "hardcoded-secrets": "no string matching secret patterns should exist in source",
3912
+ "race-condition": "concurrent operations on the same resource must produce the same result as serial execution",
3913
+ "auth-failures": "unauthenticated requests to protected endpoints must return 401 or 403",
3914
+ "rate-limiting": "requests exceeding the rate limit from a single source must be rejected",
3915
+ "csrf": "state-changing requests without a valid CSRF token must be rejected",
3916
+ "data-validation": "for all inputs, output must conform to the declared schema",
3917
+ "injection-fuzzing": "no input string should cause uncontrolled code execution or template evaluation",
3918
+ "null-handling": "null and undefined inputs should never cause unhandled exceptions",
3919
+ "data-exposure": "API responses should never contain fields not in the response schema",
3920
+ "discovery-missing-auth": "unauthenticated requests to state-changing endpoints must be rejected",
3921
+ "discovery-race-condition": "concurrent identical requests must not create duplicate state",
3922
+ "discovery-missing-rate-limit": "rapid sequential requests to auth endpoints must be throttled",
3923
+ "discovery-idor": "requests referencing another user's resource must return 403"
3924
+ };
3925
+ const hint = invariantHints[ctx.gap.categoryId];
3926
+ if (hint) {
3927
+ parts.push(`7. Invariant hint: "${hint}"`);
3928
+ }
3929
+ return parts.join("\n");
3930
+ }
3931
+ async function generateTest(ctx, callAI) {
3932
+ const systemPrompt = [
3933
+ "You are a senior security engineer writing adversarial tests.",
3934
+ "You write tests that BREAK code, not tests that pass.",
3935
+ "Your tests must be complete, runnable files with real imports.",
3936
+ "Output ONLY code. No markdown fences. No explanations.",
3937
+ "The test must FAIL against vulnerable code and PASS after a fix."
3938
+ ].join(" ");
3939
+ const prompt = buildGenerationPrompt(ctx);
3940
+ const content = await callAI(prompt, systemPrompt);
3941
+ const cleaned = stripMarkdownFences(content);
3942
+ return {
3943
+ filePath: ctx.suggestedTestPath,
3944
+ content: cleaned,
3945
+ categoryId: ctx.gap.categoryId,
3946
+ description: `Security test for ${ctx.gap.categoryId} in ${ctx.functionName ?? "unknown function"} at ${ctx.gap.filePath}:${ctx.gap.lineStart}`,
3947
+ isPropertyBased: false
3948
+ };
3949
+ }
3950
+ async function generatePropertyTest(ctx, callAI) {
3951
+ const systemPrompt = [
3952
+ "You are a formal verification expert writing property-based tests.",
3953
+ "Express security invariants that must hold for ALL inputs.",
3954
+ "Use fast-check for TypeScript/JavaScript or hypothesis for Python.",
3955
+ "Output ONLY code. No markdown fences. No explanations."
3956
+ ].join(" ");
3957
+ const prompt = buildPropertyPrompt(ctx);
3958
+ const content = await callAI(prompt, systemPrompt);
3959
+ const cleaned = stripMarkdownFences(content);
3960
+ const ext = ctx.language === "python" ? ".py" : ".ts";
3961
+ const propPath = ctx.suggestedTestPath.replace(/\.test\.(ts|js|py)$/, `.prop${ext}`);
3962
+ return {
3963
+ filePath: propPath,
3964
+ content: cleaned,
3965
+ categoryId: ctx.gap.categoryId,
3966
+ description: `Property-based security invariant for ${ctx.gap.categoryId}`,
3967
+ isPropertyBased: true
3968
+ };
3969
+ }
3970
+ function buildIntegrationPrompt(ctx) {
3971
+ const parts = [];
3972
+ const framework = ctx.webFramework ?? "express";
3973
+ const httpClient = HTTP_CLIENT_MAP[framework] ?? HTTP_CLIENT_MAP["express"];
3974
+ parts.push(`Generate a complete, runnable ${ctx.testFramework.name} INTEGRATION test that hits the actual HTTP endpoint.`);
3975
+ parts.push("");
3976
+ parts.push("## Vulnerability");
3977
+ parts.push(`Type: ${ctx.gap.categoryId}`);
3978
+ parts.push(`Severity: ${ctx.gap.severity}`);
3979
+ parts.push(`File: ${ctx.gap.filePath}:${ctx.gap.lineStart}`);
3980
+ parts.push("");
3981
+ parts.push("## Vulnerable Code (route handler)");
3982
+ parts.push("```");
3983
+ parts.push(ctx.functionBody);
3984
+ parts.push("```");
3985
+ parts.push("");
3986
+ parts.push("## File Imports");
3987
+ parts.push("```");
3988
+ parts.push(ctx.imports.join("\n"));
3989
+ parts.push("```");
3990
+ parts.push("");
3991
+ parts.push("## Context");
3992
+ parts.push(`Language: ${ctx.language}`);
3993
+ parts.push(`Test framework: ${ctx.testFramework.name}`);
3994
+ parts.push(`Web framework: ${framework}`);
3995
+ parts.push(`HTTP test client: ${httpClient.lib} (${httpClient.importStyle})`);
3996
+ if (ctx.dbType) parts.push(`Database: ${ctx.dbType}`);
3997
+ parts.push("");
3998
+ if (ctx.existingTestSample) {
3999
+ parts.push("## Existing Test Style");
4000
+ parts.push("```");
4001
+ parts.push(ctx.existingTestSample.slice(0, 1500));
4002
+ parts.push("```");
4003
+ parts.push("");
4004
+ }
4005
+ parts.push("## Requirements");
4006
+ parts.push("1. Output ONLY the complete test file. No explanations, no markdown fences.");
4007
+ parts.push(`2. Import the app/server and use ${httpClient.lib} to make real HTTP requests.`);
4008
+ parts.push("3. The test MUST FAIL when run against the current vulnerable code.");
4009
+ parts.push("4. Send at least 5 attack payloads as HTTP request bodies/params/headers.");
4010
+ parts.push("5. Include boundary cases (empty body, malformed JSON, oversized input).");
4011
+ parts.push("6. Assert on HTTP status codes AND response body content.");
4012
+ parts.push("7. Test the specific vulnerable endpoint, not a generic route.");
4013
+ parts.push("8. If the vulnerability is a race condition, send concurrent requests and assert consistency.");
4014
+ parts.push("9. If the vulnerability is missing auth, send requests without auth tokens and assert 401/403.");
4015
+ return parts.join("\n");
4016
+ }
4017
+ async function generateIntegrationTest(ctx, callAI) {
4018
+ const systemPrompt = [
4019
+ "You are a senior security engineer writing integration tests.",
4020
+ "You test endpoints end-to-end by sending real HTTP requests with attack payloads.",
4021
+ "Your tests must be complete, runnable files that import the app and use supertest/httpx.",
4022
+ "Output ONLY code. No markdown fences. No explanations.",
4023
+ "The test must FAIL against vulnerable code and PASS after a fix."
4024
+ ].join(" ");
4025
+ const prompt = buildIntegrationPrompt(ctx);
4026
+ const content = await callAI(prompt, systemPrompt);
4027
+ const cleaned = stripMarkdownFences(content);
4028
+ const integPath = ctx.suggestedTestPath.replace(/\.test\.(ts|js|py)$/, ".integ.test.$1");
4029
+ const actualPath = integPath.includes("$1") ? ctx.suggestedTestPath.replace(/\.test\.(\w+)$/, ".integ.test.$1") : integPath;
4030
+ return {
4031
+ filePath: actualPath,
4032
+ content: cleaned,
4033
+ categoryId: ctx.gap.categoryId,
4034
+ description: `Integration security test for ${ctx.gap.categoryId} at ${ctx.gap.filePath}:${ctx.gap.lineStart}`,
4035
+ isPropertyBased: false
4036
+ };
4037
+ }
4038
+ function stripMarkdownFences(content) {
4039
+ let result = content.trim();
4040
+ if (result.startsWith("```")) {
4041
+ const firstNewline = result.indexOf("\n");
4042
+ if (firstNewline !== -1) {
4043
+ result = result.slice(firstNewline + 1);
4044
+ }
4045
+ }
4046
+ if (result.endsWith("```")) {
4047
+ result = result.slice(0, -3).trimEnd();
4048
+ }
4049
+ return result;
4050
+ }
4051
+ var HTTP_CLIENT_MAP;
4052
+ var init_generator2 = __esm({
4053
+ "src/testgen/generator.ts"() {
4054
+ HTTP_CLIENT_MAP = {
4055
+ express: { lib: "supertest", importStyle: 'import request from "supertest";' },
4056
+ fastify: { lib: "supertest", importStyle: 'import request from "supertest";' },
4057
+ koa: { lib: "supertest", importStyle: 'import request from "supertest";' },
4058
+ nestjs: { lib: "supertest", importStyle: 'import request from "supertest";' },
4059
+ flask: { lib: "httpx", importStyle: "import httpx" },
4060
+ django: { lib: "django.test", importStyle: "from django.test import TestCase, Client" },
4061
+ fastapi: { lib: "httpx", importStyle: "from httpx import AsyncClient" },
4062
+ gin: { lib: "net/http/httptest", importStyle: '"net/http/httptest"' }
4063
+ };
4064
+ }
4065
+ });
4066
+ function runCommand(cmd, args, cwd, timeoutMs = 3e4) {
4067
+ return new Promise((resolve10) => {
4068
+ execFile(cmd, args, {
4069
+ cwd,
4070
+ timeout: timeoutMs,
4071
+ maxBuffer: 1024 * 1024
4072
+ }, (error, stdout, stderr) => {
4073
+ resolve10({
4074
+ stdout: stdout ?? "",
4075
+ stderr: stderr ?? "",
4076
+ exitCode: error?.code === "ERR_CHILD_PROCESS_STDIO_MAXBUFFER" ? 1 : error?.code ?? (error ? 1 : 0)
4077
+ });
4078
+ });
4079
+ });
4080
+ }
4081
+ async function checkTypeScript(filePath, cwd) {
4082
+ const result = await runCommand("npx", ["tsc", "--noEmit", "--esModuleInterop", "--skipLibCheck", filePath], cwd);
4083
+ return {
4084
+ ok: result.exitCode === 0,
4085
+ errors: result.stderr || result.stdout
4086
+ };
4087
+ }
4088
+ async function checkPython(filePath, cwd) {
4089
+ const result = await runCommand("python3", ["-m", "py_compile", filePath], cwd);
4090
+ return {
4091
+ ok: result.exitCode === 0,
4092
+ errors: result.stderr
4093
+ };
4094
+ }
4095
+ async function runTest(filePath, cwd, framework) {
4096
+ let result;
4097
+ switch (framework) {
4098
+ case "vitest":
4099
+ result = await runCommand("npx", ["vitest", "run", filePath, "--no-coverage"], cwd, 6e4);
4100
+ break;
4101
+ case "jest":
4102
+ result = await runCommand("npx", ["jest", filePath, "--no-coverage"], cwd, 6e4);
4103
+ break;
4104
+ case "pytest":
4105
+ result = await runCommand("python3", ["-m", "pytest", filePath, "-x", "--no-header"], cwd, 6e4);
4106
+ break;
4107
+ case "go-test":
4108
+ result = await runCommand("go", ["test", "-run", filePath], cwd, 6e4);
4109
+ break;
4110
+ default:
4111
+ return { failed: false, output: `Unknown framework: ${framework}` };
4112
+ }
4113
+ return {
4114
+ failed: result.exitCode !== 0,
4115
+ output: (result.stdout + "\n" + result.stderr).trim()
4116
+ };
4117
+ }
4118
+ async function validateTest(test, projectRoot, frameworkName, options = {}) {
4119
+ await mkdir(dirname(test.filePath), { recursive: true });
4120
+ await writeFile(test.filePath, test.content, "utf-8");
4121
+ try {
4122
+ const ext = extname(test.filePath);
4123
+ let compileResult;
4124
+ if (ext === ".ts" || ext === ".tsx") {
4125
+ compileResult = await checkTypeScript(test.filePath, projectRoot);
4126
+ } else if (ext === ".py") {
4127
+ compileResult = await checkPython(test.filePath, projectRoot);
4128
+ } else {
4129
+ compileResult = { ok: true, errors: "" };
4130
+ }
4131
+ if (!compileResult.ok) {
4132
+ return {
4133
+ test,
4134
+ compiles: false,
4135
+ failsCorrectly: false,
4136
+ compileErrors: compileResult.errors,
4137
+ error: "Test does not compile"
4138
+ };
4139
+ }
4140
+ if (options.skipRun) {
4141
+ return {
4142
+ test,
4143
+ compiles: true,
4144
+ failsCorrectly: true
4145
+ // assume good if skipping run
4146
+ };
4147
+ }
4148
+ const runResult = await runTest(test.filePath, projectRoot, frameworkName);
4149
+ return {
4150
+ test,
4151
+ compiles: true,
4152
+ failsCorrectly: runResult.failed,
4153
+ testOutput: runResult.output.slice(0, 2e3),
4154
+ // Cap output size
4155
+ ...runResult.failed ? {} : { error: "Test passed against vulnerable code (test is useless - should fail)" }
4156
+ };
4157
+ } finally {
4158
+ }
4159
+ }
4160
+ async function cleanupTest(filePath) {
4161
+ try {
4162
+ await unlink(filePath);
4163
+ } catch {
4164
+ }
4165
+ }
4166
+ async function measureMutationScore(sourceFile, testFile, projectRoot) {
4167
+ const relSource = relative(projectRoot, sourceFile);
4168
+ relative(projectRoot, testFile);
4169
+ const result = await runCommand("npx", [
4170
+ "stryker",
4171
+ "run",
4172
+ "--mutate",
4173
+ relSource,
4174
+ "--testRunner",
4175
+ "vitest",
4176
+ "--reporters",
4177
+ "json",
4178
+ "--jsonReporter.fileName",
4179
+ ".pinata/mutation-report.json",
4180
+ "--concurrency",
4181
+ "1",
4182
+ "--timeoutMS",
4183
+ "15000"
4184
+ ], projectRoot, 12e4);
4185
+ const output = (result.stdout + "\n" + result.stderr).trim();
4186
+ try {
4187
+ const reportPath = `${projectRoot}/.pinata/mutation-report.json`;
4188
+ const report = JSON.parse(await readFile(reportPath, "utf-8"));
4189
+ let killed = 0;
4190
+ let survived = 0;
4191
+ let timedOut = 0;
4192
+ let total = 0;
4193
+ if (report.files) {
4194
+ for (const file of Object.values(report.files)) {
4195
+ for (const mutant of file.mutants ?? []) {
4196
+ total++;
4197
+ if (mutant.status === "Killed") killed++;
4198
+ else if (mutant.status === "Survived") survived++;
4199
+ else if (mutant.status === "Timeout") timedOut++;
4200
+ }
4201
+ }
4202
+ }
4203
+ const score = total > 0 ? Math.round(killed / total * 100) : 0;
4204
+ return { score, totalMutants: total, killed, survived, timedOut, output };
4205
+ } catch {
4206
+ const scoreMatch = output.match(/Mutation score:\s*(\d+(?:\.\d+)?)/i);
4207
+ const score = scoreMatch ? Math.round(parseFloat(scoreMatch[1])) : 0;
4208
+ return { score, totalMutants: 0, killed: 0, survived: 0, timedOut: 0, output };
4209
+ }
4210
+ }
4211
+ var init_validator = __esm({
4212
+ "src/testgen/validator.ts"() {
4213
+ }
4214
+ });
4215
+
4216
+ // src/testgen/forge.ts
4217
+ var forge_exports = {};
4218
+ __export(forge_exports, {
4219
+ TestForge: () => TestForge,
4220
+ createTestForge: () => createTestForge
4221
+ });
4222
+ function buildCompileFixPrompt(ctx, testContent, errors) {
4223
+ return [
4224
+ `Fix the compilation errors in this ${ctx.testFramework.name} test file.`,
4225
+ "",
4226
+ "## Compile errors",
4227
+ "```",
4228
+ errors.slice(0, 3e3),
4229
+ "```",
4230
+ "",
4231
+ "## Current test file",
4232
+ "```",
4233
+ testContent,
4234
+ "```",
4235
+ "",
4236
+ "## Source file imports (use these exact paths)",
4237
+ "```",
4238
+ ctx.imports.join("\n"),
4239
+ "```",
4240
+ "",
4241
+ "Fix the errors and output the complete corrected test file."
4242
+ ].join("\n");
4243
+ }
4244
+ function buildHardenPrompt(ctx, testContent) {
4245
+ return [
4246
+ "This security test PASSED against vulnerable code. That means it is useless.",
4247
+ "The test must FAIL to prove the vulnerability exists.",
4248
+ "",
4249
+ "## Vulnerable code",
4250
+ "```",
4251
+ ctx.functionBody,
4252
+ "```",
4253
+ "",
4254
+ "## Current test (passes = useless)",
4255
+ "```",
4256
+ testContent,
4257
+ "```",
4258
+ "",
4259
+ "Rewrite the test to be adversarial enough to actually catch the vulnerability.",
4260
+ "Use more aggressive payloads and tighter assertions."
4261
+ ].join("\n");
4262
+ }
4263
+ function buildMutationEvolvePrompt(ctx, testContent, mutationResult) {
4264
+ return [
4265
+ `${mutationResult.survived} mutations survived your test (score: ${mutationResult.score}%).`,
4266
+ "Strengthen the test to catch more code mutations.",
4267
+ "",
4268
+ "## Source code being mutated",
4269
+ "```",
4270
+ ctx.functionBody,
4271
+ "```",
4272
+ "",
4273
+ "## Current test",
4274
+ "```",
4275
+ testContent,
4276
+ "```",
4277
+ "",
4278
+ "Add more assertions, edge cases, and boundary checks to kill surviving mutants."
4279
+ ].join("\n");
4280
+ }
4281
+ function createTestForge(config2) {
4282
+ return new TestForge(config2);
4283
+ }
4284
+ var DEFAULT_CONFIG2, TestForge, SYSTEM_PROMPT_FIX, SYSTEM_PROMPT_HARDEN, SYSTEM_PROMPT_EVOLVE;
4285
+ var init_forge = __esm({
4286
+ "src/testgen/forge.ts"() {
4287
+ init_logger();
4288
+ init_validator();
4289
+ init_generator2();
4290
+ init_generator2();
4291
+ DEFAULT_CONFIG2 = {
4292
+ maxCompileRetries: 3,
4293
+ maxHardenRetries: 2,
4294
+ maxMutationRetries: 2,
4295
+ mutationScoreThreshold: 50,
4296
+ skipRun: false,
4297
+ skipMutation: true,
4298
+ generateProperty: true
4299
+ };
4300
+ TestForge = class {
4301
+ config;
4302
+ log = logger.child("TestForge");
4303
+ constructor(config2 = {}) {
4304
+ this.config = { ...DEFAULT_CONFIG2, ...config2 };
4305
+ }
4306
+ async forge(ctx, callAI) {
4307
+ const accepted = [];
4308
+ const rejected = [];
4309
+ let totalAICalls = 0;
4310
+ let totalScriptCalls = 0;
4311
+ this.log.info(`Forging unit test for ${ctx.gap.categoryId} in ${ctx.gap.filePath}:${ctx.gap.lineStart}`);
4312
+ const unitResult = await this.forgeOne(ctx, callAI, "unit");
4313
+ totalAICalls += unitResult.aiCalls;
4314
+ totalScriptCalls += unitResult.scriptCalls;
4315
+ if (unitResult.accepted) {
4316
+ accepted.push(unitResult.accepted);
4317
+ } else {
4318
+ rejected.push(unitResult.rejected);
4319
+ }
4320
+ if (ctx.webFramework) {
4321
+ this.log.info(`Forging integration test for ${ctx.gap.categoryId}`);
4322
+ const integResult = await this.forgeOne(ctx, callAI, "integration");
4323
+ totalAICalls += integResult.aiCalls;
4324
+ totalScriptCalls += integResult.scriptCalls;
4325
+ if (integResult.accepted) {
4326
+ accepted.push(integResult.accepted);
4327
+ } else {
4328
+ rejected.push(integResult.rejected);
4329
+ }
4330
+ }
4331
+ if (this.config.generateProperty) {
4332
+ this.log.info(`Forging property test for ${ctx.gap.categoryId}`);
4333
+ const propResult = await this.forgeOne(ctx, callAI, "property");
4334
+ totalAICalls += propResult.aiCalls;
4335
+ totalScriptCalls += propResult.scriptCalls;
4336
+ if (propResult.accepted) {
4337
+ accepted.push(propResult.accepted);
4338
+ } else {
4339
+ rejected.push(propResult.rejected);
4340
+ }
4341
+ }
4342
+ const mutationScores = accepted.map((a) => a.mutationScore).filter((s) => s !== null);
4343
+ const avgMutationScore = mutationScores.length > 0 ? Math.round(mutationScores.reduce((a, b) => a + b, 0) / mutationScores.length) : 0;
4344
+ return {
4345
+ accepted,
4346
+ rejected,
4347
+ stats: {
4348
+ totalAttempts: accepted.length + rejected.length,
4349
+ totalAICalls,
4350
+ totalScriptCalls,
4351
+ accepted: accepted.length,
4352
+ rejected: rejected.length,
4353
+ avgMutationScore
4354
+ }
4355
+ };
4356
+ }
4357
+ async forgeOne(ctx, callAI, type) {
4358
+ let aiCalls = 0;
4359
+ let scriptCalls = 0;
4360
+ let lastError = "";
4361
+ let test = null;
4362
+ for (let attempt = 0; attempt <= this.config.maxCompileRetries; attempt++) {
4363
+ try {
4364
+ if (type === "property") {
4365
+ test = await generatePropertyTest(ctx, callAI);
4366
+ } else if (type === "integration") {
4367
+ test = await generateIntegrationTest(ctx, callAI);
4368
+ } else {
4369
+ test = await generateTest(ctx, callAI);
4370
+ }
4371
+ aiCalls++;
4372
+ const validation = await validateTest(
4373
+ test,
4374
+ ctx.projectRoot,
4375
+ ctx.testFramework.name,
4376
+ { skipRun: this.config.skipRun }
4377
+ );
4378
+ scriptCalls++;
4379
+ if (!validation.compiles) {
4380
+ lastError = validation.compileErrors ?? "Unknown compile error";
4381
+ this.log.debug(`Compile attempt ${attempt + 1} failed: ${lastError.slice(0, 200)}`);
4382
+ if (attempt < this.config.maxCompileRetries) {
4383
+ const fixPrompt = buildCompileFixPrompt(ctx, test.content, lastError);
4384
+ const fixed = await callAI(fixPrompt, SYSTEM_PROMPT_FIX);
4385
+ aiCalls++;
4386
+ test = {
4387
+ ...test,
4388
+ content: stripMarkdownFences(fixed)
4389
+ };
4390
+ const revalidation = await validateTest(
4391
+ test,
4392
+ ctx.projectRoot,
4393
+ ctx.testFramework.name,
4394
+ { skipRun: this.config.skipRun }
4395
+ );
4396
+ scriptCalls++;
4397
+ if (revalidation.compiles) {
4398
+ if (!this.config.skipRun && !revalidation.failsCorrectly) {
4399
+ const hardenResult = await this.hardenTest(ctx, test, callAI);
4400
+ aiCalls += hardenResult.aiCalls;
4401
+ scriptCalls += hardenResult.scriptCalls;
4402
+ if (hardenResult.test) {
4403
+ test = hardenResult.test;
4404
+ } else {
4405
+ lastError = "Test passes against vulnerable code -- could not harden";
4406
+ continue;
4407
+ }
4408
+ }
4409
+ let mutationScore2 = null;
4410
+ if (!this.config.skipMutation) {
4411
+ const mutResult = await this.mutationLoop(ctx, test, callAI);
4412
+ aiCalls += mutResult.aiCalls;
4413
+ scriptCalls += mutResult.scriptCalls;
4414
+ mutationScore2 = mutResult.score;
4415
+ if (mutResult.test) test = mutResult.test;
4416
+ }
4417
+ await cleanupTest(test.filePath);
4418
+ return {
4419
+ accepted: { test, validation: revalidation, mutationScore: mutationScore2, type },
4420
+ rejected: null,
4421
+ aiCalls,
4422
+ scriptCalls
4423
+ };
4424
+ }
4425
+ lastError = revalidation.compileErrors ?? "Compile fix failed";
4426
+ }
4427
+ continue;
4428
+ }
4429
+ if (!this.config.skipRun && !validation.failsCorrectly) {
4430
+ const hardenResult = await this.hardenTest(ctx, test, callAI);
4431
+ aiCalls += hardenResult.aiCalls;
4432
+ scriptCalls += hardenResult.scriptCalls;
4433
+ if (hardenResult.test) {
4434
+ test = hardenResult.test;
4435
+ } else {
4436
+ lastError = "Test passes against vulnerable code -- could not harden";
4437
+ await cleanupTest(test.filePath);
4438
+ continue;
4439
+ }
4440
+ }
4441
+ let mutationScore = null;
4442
+ if (!this.config.skipMutation) {
4443
+ const mutResult = await this.mutationLoop(ctx, test, callAI);
4444
+ aiCalls += mutResult.aiCalls;
4445
+ scriptCalls += mutResult.scriptCalls;
4446
+ mutationScore = mutResult.score;
4447
+ if (mutResult.test) test = mutResult.test;
4448
+ }
4449
+ await cleanupTest(test.filePath);
4450
+ return {
4451
+ accepted: { test, validation, mutationScore, type },
4452
+ rejected: null,
4453
+ aiCalls,
4454
+ scriptCalls
4455
+ };
4456
+ } catch (error) {
4457
+ lastError = error instanceof Error ? error.message : String(error);
4458
+ this.log.debug(`Forge attempt ${attempt + 1} error: ${lastError}`);
4459
+ }
4460
+ }
4461
+ if (test) await cleanupTest(test.filePath);
4462
+ return {
4463
+ accepted: null,
4464
+ rejected: {
4465
+ categoryId: ctx.gap.categoryId,
4466
+ filePath: ctx.gap.filePath,
4467
+ type,
4468
+ reason: lastError || "All retries exhausted",
4469
+ attempts: this.config.maxCompileRetries + 1
4470
+ },
4471
+ aiCalls,
4472
+ scriptCalls
4473
+ };
4474
+ }
4475
+ async hardenTest(ctx, test, callAI) {
4476
+ let aiCalls = 0;
4477
+ let scriptCalls = 0;
4478
+ for (let i = 0; i < this.config.maxHardenRetries; i++) {
4479
+ const prompt = buildHardenPrompt(ctx, test.content);
4480
+ const hardened = await callAI(prompt, SYSTEM_PROMPT_HARDEN);
4481
+ aiCalls++;
4482
+ const hardenedTest = {
4483
+ ...test,
4484
+ content: stripMarkdownFences(hardened)
4485
+ };
4486
+ const validation = await validateTest(
4487
+ hardenedTest,
4488
+ ctx.projectRoot,
4489
+ ctx.testFramework.name,
4490
+ { skipRun: false }
4491
+ );
4492
+ scriptCalls++;
4493
+ if (validation.compiles && validation.failsCorrectly) {
4494
+ return { test: hardenedTest, aiCalls, scriptCalls };
4495
+ }
4496
+ }
4497
+ return { test: null, aiCalls, scriptCalls };
4498
+ }
4499
+ async mutationLoop(ctx, test, callAI) {
4500
+ let aiCalls = 0;
4501
+ let scriptCalls = 0;
4502
+ let currentTest = test;
4503
+ for (let i = 0; i <= this.config.maxMutationRetries; i++) {
4504
+ try {
4505
+ const result = await measureMutationScore(
4506
+ ctx.gap.filePath,
4507
+ currentTest.filePath,
4508
+ ctx.projectRoot
4509
+ );
4510
+ scriptCalls++;
4511
+ if (result.score >= this.config.mutationScoreThreshold) {
4512
+ return { test: currentTest, score: result.score, aiCalls, scriptCalls };
4513
+ }
4514
+ if (i < this.config.maxMutationRetries) {
4515
+ const prompt = buildMutationEvolvePrompt(ctx, currentTest.content, result);
4516
+ const evolved = await callAI(prompt, SYSTEM_PROMPT_EVOLVE);
4517
+ aiCalls++;
4518
+ currentTest = {
4519
+ ...currentTest,
4520
+ content: stripMarkdownFences(evolved)
4521
+ };
4522
+ }
4523
+ } catch {
4524
+ return { test: currentTest, score: 0, aiCalls, scriptCalls };
4525
+ }
4526
+ }
4527
+ return { test: currentTest, score: 0, aiCalls, scriptCalls };
4528
+ }
4529
+ };
4530
+ SYSTEM_PROMPT_FIX = [
4531
+ "You are a senior engineer fixing a test file that failed to compile.",
4532
+ "Fix ONLY the compilation errors. Do not change the test logic.",
4533
+ "Output ONLY the complete fixed test file. No markdown fences. No explanations."
4534
+ ].join(" ");
4535
+ SYSTEM_PROMPT_HARDEN = [
4536
+ "You are a senior security engineer. The previous test passed against vulnerable code,",
4537
+ "meaning it is useless. Make the test adversarial enough to actually fail.",
4538
+ "Output ONLY the complete hardened test file. No markdown fences. No explanations."
4539
+ ].join(" ");
4540
+ SYSTEM_PROMPT_EVOLVE = [
4541
+ "You are a senior security engineer. Some mutations survived your test --",
4542
+ "meaning your test does not catch certain code changes. Strengthen the test",
4543
+ "to kill the surviving mutants. Output ONLY the complete test file."
4544
+ ].join(" ");
4545
+ }
4546
+ });
3439
4547
  function App({ results, loading, error }) {
3440
4548
  const { exit } = useApp();
3441
4549
  const [selectedIndex, setSelectedIndex] = useState(0);
@@ -4022,7 +5130,11 @@ var DetectionPatternSchema = z.object({
4022
5130
  /** Optional pattern that indicates code is NOT vulnerable (false positive filter) */
4023
5131
  negativePattern: z.string().optional(),
4024
5132
  /** Optional list of framework contexts where this pattern applies */
4025
- frameworks: z.array(z.string()).optional()
5133
+ frameworks: z.array(z.string()).optional(),
5134
+ /** Regex patterns identifying taint sources (user input entry points) in the same scope */
5135
+ sources: z.array(z.string()).optional(),
5136
+ /** Regex patterns identifying sanitizers that neutralize tainted data */
5137
+ sanitizers: z.array(z.string()).optional()
4026
5138
  });
4027
5139
  var DetectionResultSchema = z.object({
4028
5140
  /** ID of the pattern that matched */
@@ -4518,123 +5630,39 @@ var CategoryStore = class {
4518
5630
  const entries = await fs.readdir(dirPath, { withFileTypes: true });
4519
5631
  return entries;
4520
5632
  });
4521
- if (!loadResult.success) {
4522
- return [
4523
- err(
4524
- new ValidationError(`Failed to read directory: ${dirPath}`, {
4525
- dirPath,
4526
- cause: loadResult.error.message
4527
- })
4528
- )
4529
- ];
4530
- }
4531
- for (const entry of loadResult.data) {
4532
- const fullPath = path.join(dirPath, entry.name);
4533
- if (entry.isDirectory()) {
4534
- const subResults = await this.loadYamlFilesRecursive(fullPath);
4535
- results.push(...subResults);
4536
- } else if (entry.isFile() && (entry.name.endsWith(".yml") || entry.name.endsWith(".yaml"))) {
4537
- const result = await this.loadFromFile(fullPath);
4538
- results.push(result);
4539
- }
4540
- }
4541
- return results;
4542
- }
4543
- };
4544
- function createCategoryStore() {
4545
- return new CategoryStore();
4546
- }
4547
- init_errors();
4548
- var LOG_LEVELS = {
4549
- debug: 0,
4550
- info: 1,
4551
- warn: 2,
4552
- error: 3,
4553
- silent: 4
4554
- };
4555
- var Logger = class _Logger {
4556
- level = "info";
4557
- prefix = "";
4558
- /**
4559
- * Configure the logger
4560
- */
4561
- configure(config2) {
4562
- if (config2.level !== void 0) {
4563
- this.level = config2.level;
4564
- }
4565
- if (config2.prefix !== void 0) {
4566
- this.prefix = config2.prefix;
4567
- }
4568
- }
4569
- /**
4570
- * Check if a log level should be output
4571
- */
4572
- shouldLog(level) {
4573
- return LOG_LEVELS[level] >= LOG_LEVELS[this.level];
4574
- }
4575
- /**
4576
- * Format a message with optional prefix
4577
- */
4578
- format(message) {
4579
- return this.prefix ? `${this.prefix} ${message}` : message;
4580
- }
4581
- /**
4582
- * Debug level logging (gray)
4583
- */
4584
- debug(message, ...args) {
4585
- if (this.shouldLog("debug")) {
4586
- console.debug(chalk6.gray(this.format(message)), ...args);
4587
- }
4588
- }
4589
- /**
4590
- * Info level logging (default color)
4591
- */
4592
- info(message, ...args) {
4593
- if (this.shouldLog("info")) {
4594
- console.info(this.format(message), ...args);
4595
- }
4596
- }
4597
- /**
4598
- * Warning level logging (yellow)
4599
- */
4600
- warn(message, ...args) {
4601
- if (this.shouldLog("warn")) {
4602
- console.warn(chalk6.yellow(this.format(message)), ...args);
4603
- }
4604
- }
4605
- /**
4606
- * Error level logging (red)
4607
- */
4608
- error(message, ...args) {
4609
- if (this.shouldLog("error")) {
4610
- console.error(chalk6.red(this.format(message)), ...args);
5633
+ if (!loadResult.success) {
5634
+ return [
5635
+ err(
5636
+ new ValidationError(`Failed to read directory: ${dirPath}`, {
5637
+ dirPath,
5638
+ cause: loadResult.error.message
5639
+ })
5640
+ )
5641
+ ];
4611
5642
  }
4612
- }
4613
- /**
4614
- * Success message (green)
4615
- */
4616
- success(message, ...args) {
4617
- if (this.shouldLog("info")) {
4618
- console.info(chalk6.green(this.format(message)), ...args);
5643
+ for (const entry of loadResult.data) {
5644
+ const fullPath = path.join(dirPath, entry.name);
5645
+ if (entry.isDirectory()) {
5646
+ const subResults = await this.loadYamlFilesRecursive(fullPath);
5647
+ results.push(...subResults);
5648
+ } else if (entry.isFile() && (entry.name.endsWith(".yml") || entry.name.endsWith(".yaml"))) {
5649
+ const result = await this.loadFromFile(fullPath);
5650
+ results.push(result);
5651
+ }
4619
5652
  }
4620
- }
4621
- /**
4622
- * Create a child logger with a prefix
4623
- */
4624
- child(prefix) {
4625
- const child = new _Logger();
4626
- child.level = this.level;
4627
- child.prefix = this.prefix ? `${this.prefix} ${prefix}` : prefix;
4628
- return child;
5653
+ return results;
4629
5654
  }
4630
5655
  };
4631
- var logger = new Logger();
4632
-
4633
- // src/core/detection/pattern-matcher.ts
5656
+ function createCategoryStore() {
5657
+ return new CategoryStore();
5658
+ }
5659
+ init_errors();
5660
+ init_logger();
4634
5661
  init_result();
4635
5662
 
4636
5663
  // src/core/detection/ast-parser.ts
4637
5664
  init_errors();
5665
+ init_logger();
4638
5666
  init_result();
4639
5667
  var __filename$1 = fileURLToPath(import.meta.url);
4640
5668
  var __dirname$1 = dirname(__filename$1);
@@ -5013,10 +6041,11 @@ var PatternMatcher = class {
5013
6041
  }
5014
6042
  }
5015
6043
  const filteredMatches = this.applyNegativePatterns(matches, content, patterns);
6044
+ const contextFiltered = this.applySourceSinkFilter(filteredMatches, content);
5016
6045
  return ok({
5017
6046
  filePath: absolutePath,
5018
6047
  language,
5019
- matches: filteredMatches,
6048
+ matches: contextFiltered,
5020
6049
  scanTimeMs: performance.now() - startTime,
5021
6050
  warnings
5022
6051
  });
@@ -5229,6 +6258,97 @@ var PatternMatcher = class {
5229
6258
  }
5230
6259
  });
5231
6260
  }
6261
+ /**
6262
+ * Apply source-sink context filtering.
6263
+ *
6264
+ * For patterns that define `sources` and/or `sanitizers`, extract the
6265
+ * surrounding function body and check:
6266
+ * 1. If sources are defined but none appear in scope -> downgrade confidence
6267
+ * 2. If a sanitizer appears in scope -> suppress the match entirely
6268
+ *
6269
+ * Patterns without sources/sanitizers pass through unchanged.
6270
+ */
6271
+ applySourceSinkFilter(matches, content) {
6272
+ const lines = content.split("\n");
6273
+ return matches.filter((match) => {
6274
+ const { sources, sanitizers } = match.pattern;
6275
+ const hasSourceDefs = sources && sources.length > 0;
6276
+ const hasSanitizerDefs = sanitizers && sanitizers.length > 0;
6277
+ if (!hasSourceDefs && !hasSanitizerDefs) return true;
6278
+ const scopeCode = this.extractScope(lines, match.lineStart);
6279
+ if (hasSanitizerDefs) {
6280
+ for (const sanitizer of sanitizers) {
6281
+ try {
6282
+ if (new RegExp(sanitizer).test(scopeCode)) {
6283
+ this.log.debug(
6284
+ `Suppressed ${match.pattern.id} at line ${match.lineStart}: sanitizer found`
6285
+ );
6286
+ return false;
6287
+ }
6288
+ } catch {
6289
+ }
6290
+ }
6291
+ }
6292
+ if (hasSourceDefs) {
6293
+ let sourceFound = false;
6294
+ for (const source of sources) {
6295
+ try {
6296
+ if (new RegExp(source).test(scopeCode)) {
6297
+ sourceFound = true;
6298
+ break;
6299
+ }
6300
+ } catch {
6301
+ }
6302
+ }
6303
+ if (!sourceFound) {
6304
+ this.log.debug(
6305
+ `Downgraded ${match.pattern.id} at line ${match.lineStart}: no taint source in scope`
6306
+ );
6307
+ const downgraded = {
6308
+ ...match,
6309
+ pattern: { ...match.pattern, confidence: "low" }
6310
+ };
6311
+ Object.assign(match, downgraded);
6312
+ }
6313
+ }
6314
+ return true;
6315
+ });
6316
+ }
6317
+ /**
6318
+ * Extract the surrounding function/block scope for a given line.
6319
+ * Walks backwards to find the function start, forwards to find the end.
6320
+ * Falls back to +/- 30 lines if boundaries aren't found.
6321
+ */
6322
+ extractScope(lines, targetLine) {
6323
+ const idx = targetLine - 1;
6324
+ const FALLBACK_RADIUS = 30;
6325
+ let startIdx = Math.max(0, idx - FALLBACK_RADIUS);
6326
+ for (let i = idx; i >= Math.max(0, idx - 50); i--) {
6327
+ const line = lines[i]?.trim() ?? "";
6328
+ if (/^(export\s+)?(async\s+)?function\s/.test(line) || /^(export\s+)?(const|let|var)\s+\w+\s*=\s*(async\s+)?\(/.test(line) || /^(public|private|protected)\s/.test(line) || /^(\s*)def\s+\w+/.test(lines[i] ?? "") || /^(\s*)async\s+def\s+\w+/.test(lines[i] ?? "")) {
6329
+ startIdx = i;
6330
+ break;
6331
+ }
6332
+ }
6333
+ let endIdx = Math.min(lines.length - 1, idx + FALLBACK_RADIUS);
6334
+ let braceDepth = 0;
6335
+ let started = false;
6336
+ for (let i = startIdx; i < lines.length; i++) {
6337
+ const line = lines[i] ?? "";
6338
+ for (const ch of line) {
6339
+ if (ch === "{") {
6340
+ braceDepth++;
6341
+ started = true;
6342
+ }
6343
+ if (ch === "}") braceDepth--;
6344
+ if (started && braceDepth === 0) {
6345
+ endIdx = i;
6346
+ return lines.slice(startIdx, endIdx + 1).join("\n");
6347
+ }
6348
+ }
6349
+ }
6350
+ return lines.slice(startIdx, endIdx + 1).join("\n");
6351
+ }
5232
6352
  /**
5233
6353
  * Get code surrounding a match for negative pattern checking
5234
6354
  */
@@ -5529,10 +6649,10 @@ var SCORING_ADJUSTMENTS = [
5529
6649
  skip: ["serverless", "frontend-spa", "cli"],
5530
6650
  higherWeight: ["web-server", "api"]
5531
6651
  },
5532
- // Memory leaks are critical for long-running servers
6652
+ // Memory leaks are critical for long-running servers, irrelevant for short-lived CLI
5533
6653
  {
5534
6654
  categoryId: "memory-leak",
5535
- skip: ["serverless", "script"],
6655
+ skip: ["serverless", "script", "cli"],
5536
6656
  higherWeight: ["web-server", "desktop"]
5537
6657
  },
5538
6658
  // Rate limiting not needed for CLI
@@ -5559,10 +6679,17 @@ var SCORING_ADJUSTMENTS = [
5559
6679
  lowerWeight: ["cli", "script"],
5560
6680
  higherWeight: ["web-server", "api", "serverless"]
5561
6681
  },
5562
- // Connection failure handling less relevant for CLI
6682
+ // Connection failure handling not relevant for CLI (short-lived, user sees the error)
5563
6683
  {
5564
6684
  categoryId: "connection-failure",
5565
- lowerWeight: ["cli", "script", "library"],
6685
+ skip: ["cli", "script"],
6686
+ lowerWeight: ["library"],
6687
+ higherWeight: ["web-server", "api"]
6688
+ },
6689
+ // Thread safety not relevant for single-threaded Node.js CLI
6690
+ {
6691
+ categoryId: "thread-safety",
6692
+ skip: ["cli", "script"],
5566
6693
  higherWeight: ["web-server", "api"]
5567
6694
  },
5568
6695
  // Memory bloat less relevant for short-lived processes
@@ -5571,10 +6698,10 @@ var SCORING_ADJUSTMENTS = [
5571
6698
  skip: ["cli", "script", "serverless"],
5572
6699
  higherWeight: ["web-server", "desktop"]
5573
6700
  },
5574
- // Data race less relevant for single-threaded CLI
6701
+ // Data race not relevant for single-threaded CLI (Node.js is single-threaded)
5575
6702
  {
5576
6703
  categoryId: "data-race",
5577
- lowerWeight: ["cli", "script"],
6704
+ skip: ["cli", "script"],
5578
6705
  higherWeight: ["web-server", "api"]
5579
6706
  },
5580
6707
  // Network partition not relevant for CLI
@@ -5607,6 +6734,24 @@ var SCORING_ADJUSTMENTS = [
5607
6734
  skip: ["cli", "script", "library"],
5608
6735
  higherWeight: ["web-server", "api"]
5609
6736
  },
6737
+ // Idempotency is a server/API concern, not relevant for CLI
6738
+ {
6739
+ categoryId: "idempotency-missing",
6740
+ skip: ["cli", "script", "library", "frontend-spa"],
6741
+ higherWeight: ["web-server", "api"]
6742
+ },
6743
+ // Retry storms are a distributed systems concern
6744
+ {
6745
+ categoryId: "retry-storm",
6746
+ skip: ["cli", "script", "library", "frontend-spa"],
6747
+ higherWeight: ["web-server", "api", "serverless"]
6748
+ },
6749
+ // Schema migration issues less relevant for CLI
6750
+ {
6751
+ categoryId: "schema-migration",
6752
+ skip: ["cli", "script", "frontend-spa"],
6753
+ higherWeight: ["web-server", "api"]
6754
+ },
5610
6755
  // Encoding mismatch less relevant for CLI
5611
6756
  {
5612
6757
  categoryId: "encoding-mismatch",
@@ -5747,6 +6892,7 @@ function getProjectTypeDescription(type) {
5747
6892
  return descriptions[type];
5748
6893
  }
5749
6894
  init_errors();
6895
+ init_logger();
5750
6896
  init_result();
5751
6897
  var SEVERITY_WEIGHTS = {
5752
6898
  critical: 4,
@@ -5756,8 +6902,8 @@ var SEVERITY_WEIGHTS = {
5756
6902
  };
5757
6903
  var CONFIDENCE_WEIGHTS = {
5758
6904
  high: 1,
5759
- medium: 0.3,
5760
- low: 0.1
6905
+ medium: 0.7,
6906
+ low: 0.4
5761
6907
  };
5762
6908
  var PRIORITY_WEIGHTS = {
5763
6909
  P0: 3,
@@ -6006,28 +7152,50 @@ var Scanner = class {
6006
7152
  });
6007
7153
  }
6008
7154
  }
6009
- if (coverage.overallCoverage >= 90) {
6010
- const bonus = 5;
6011
- baseScore += bonus;
6012
- bonuses.push({ reason: "Excellent coverage (90%+)", points: bonus });
6013
- } else if (coverage.overallCoverage >= 75) {
6014
- const bonus = 3;
6015
- baseScore += bonus;
6016
- bonuses.push({ reason: "Good coverage (75%+)", points: bonus });
6017
- }
6018
- const criticalGaps = gaps.filter((g) => g.severity === "critical");
6019
- if (criticalGaps.length === 0 && categories.length > 0) {
6020
- const bonus = 5;
6021
- baseScore += bonus;
6022
- bonuses.push({ reason: "No critical gaps", points: bonus });
6023
- }
6024
- const highGaps = gaps.filter((g) => g.severity === "high");
6025
- if (highGaps.length === 0 && categories.length > 0) {
6026
- const bonus = 3;
6027
- baseScore += bonus;
6028
- bonuses.push({ reason: "No high severity gaps", points: bonus });
6029
- }
6030
- const overall = Math.max(0, Math.min(100, Math.round(baseScore)));
7155
+ const hasActiveGaps = gaps.filter((g) => g.status !== "dismissed").length > 0;
7156
+ const allDismissed = gaps.length > 0 && !hasActiveGaps;
7157
+ if (!allDismissed) {
7158
+ if (coverage.overallCoverage >= 90) {
7159
+ const bonus = 5;
7160
+ baseScore += bonus;
7161
+ bonuses.push({ reason: "Excellent coverage (90%+)", points: bonus });
7162
+ } else if (coverage.overallCoverage >= 75) {
7163
+ const bonus = 3;
7164
+ baseScore += bonus;
7165
+ bonuses.push({ reason: "Good coverage (75%+)", points: bonus });
7166
+ }
7167
+ const criticalGaps = gaps.filter((g) => g.severity === "critical" && g.status !== "dismissed");
7168
+ if (criticalGaps.length === 0 && categories.length > 0) {
7169
+ const bonus = 5;
7170
+ baseScore += bonus;
7171
+ bonuses.push({ reason: "No critical gaps", points: bonus });
7172
+ }
7173
+ const highGaps = gaps.filter((g) => g.severity === "high" && g.status !== "dismissed");
7174
+ if (highGaps.length === 0 && categories.length > 0) {
7175
+ const bonus = 3;
7176
+ baseScore += bonus;
7177
+ bonuses.push({ reason: "No high severity gaps", points: bonus });
7178
+ }
7179
+ }
7180
+ const unknownGaps = gaps.filter((g) => g.status === "unknown");
7181
+ if (unknownGaps.length > 0) {
7182
+ const unknownPenalty = Math.min(10, unknownGaps.length * 2);
7183
+ baseScore -= unknownPenalty;
7184
+ penalties.push({
7185
+ reason: `${unknownGaps.length} finding(s) could not be tested -- manual review needed`,
7186
+ points: Math.round(unknownPenalty)
7187
+ });
7188
+ }
7189
+ const scanConfidence = this.computeScanConfidence(gaps, categories, projectType);
7190
+ const maxScore = 70 + Math.round(scanConfidence * 30);
7191
+ const clampedScore = Math.max(0, Math.min(maxScore, Math.round(baseScore)));
7192
+ if (clampedScore < Math.round(baseScore)) {
7193
+ penalties.push({
7194
+ reason: `Score capped at ${maxScore} (scan confidence: ${Math.round(scanConfidence * 100)}%)`,
7195
+ points: Math.round(baseScore) - clampedScore
7196
+ });
7197
+ }
7198
+ const overall = clampedScore;
6031
7199
  const grade = this.scoreToGrade(overall);
6032
7200
  const bySeverity = {
6033
7201
  critical: this.calculateSeverityScore(gaps, "critical"),
@@ -6041,8 +7209,8 @@ var Scanner = class {
6041
7209
  byDomain: domainScores,
6042
7210
  bySeverity,
6043
7211
  penalties: penalties.slice(0, 10),
6044
- // Top 10 penalties
6045
- bonuses
7212
+ bonuses,
7213
+ scanConfidence
6046
7214
  };
6047
7215
  }
6048
7216
  /**
@@ -6051,6 +7219,32 @@ var Scanner = class {
6051
7219
  getPatternMatcher() {
6052
7220
  return this.patternMatcher;
6053
7221
  }
7222
+ /**
7223
+ * Compute scan confidence (0-1) based on how thoroughly the scanner analyzed the codebase.
7224
+ *
7225
+ * Factors:
7226
+ * - Project type detection confidence (high=1, medium=0.7, low=0.4)
7227
+ * - Category coverage (what fraction of applicable categories were scanned)
7228
+ * - Whether source-sink context filtering was used (higher precision = higher confidence)
7229
+ */
7230
+ computeScanConfidence(_gaps, categories, projectType) {
7231
+ const ptConfidence = projectType === "unknown" ? 0.3 : 0.8;
7232
+ const categoryBreadth = categories.length > 0 ? Math.min(1, categories.length / 20) : 0.2;
7233
+ let annotatedPatterns = 0;
7234
+ let totalPatterns = 0;
7235
+ for (const cat of categories) {
7236
+ for (const p of cat.detectionPatterns) {
7237
+ totalPatterns++;
7238
+ if (p.sources?.length || p.sanitizers?.length) {
7239
+ annotatedPatterns++;
7240
+ }
7241
+ }
7242
+ }
7243
+ const annotationRatio = totalPatterns > 0 ? annotatedPatterns / totalPatterns : 0;
7244
+ const taintBonus = annotationRatio * 0.2;
7245
+ const confidence = Math.min(1, ptConfidence * 0.4 + categoryBreadth * 0.4 + taintBonus + 0.1);
7246
+ return Math.round(confidence * 100) / 100;
7247
+ }
6054
7248
  // ============================================================
6055
7249
  // Private methods
6056
7250
  // ============================================================
@@ -6415,11 +7609,13 @@ function createScanner(categoryStore) {
6415
7609
  }
6416
7610
 
6417
7611
  // src/core/index.ts
7612
+ init_discovery();
6418
7613
  var VERSION = "0.4.0";
6419
7614
 
6420
7615
  // src/lib/index.ts
6421
7616
  init_errors();
6422
7617
  init_result();
7618
+ init_logger();
6423
7619
  init_errors();
6424
7620
  init_result();
6425
7621
  var SEVERITY_COLORS = {
@@ -6566,13 +7762,13 @@ function formatError(error) {
6566
7762
  var DEFAULT_CONFIG = {
6567
7763
  provider: "anthropic",
6568
7764
  apiKey: "",
6569
- model: "claude-sonnet-4-20250514",
7765
+ model: "claude-opus-4-8",
6570
7766
  maxTokens: 1024,
6571
7767
  temperature: 0.3,
6572
7768
  timeoutMs: 3e4
6573
7769
  };
6574
7770
  var PROVIDER_MODELS = {
6575
- anthropic: "claude-sonnet-4-20250514",
7771
+ anthropic: "claude-opus-4-8",
6576
7772
  openai: "gpt-4o",
6577
7773
  mock: "mock-model"
6578
7774
  };
@@ -6608,11 +7804,11 @@ var AIService = class {
6608
7804
  */
6609
7805
  getApiKeyFromConfig(provider) {
6610
7806
  try {
6611
- const { existsSync: existsSync7, readFileSync: readFileSync3 } = __require("fs");
7807
+ const { existsSync: existsSync8, readFileSync: readFileSync3 } = __require("fs");
6612
7808
  const { homedir: homedir3 } = __require("os");
6613
7809
  const { join: join5 } = __require("path");
6614
7810
  const configPath = join5(homedir3(), ".pinata", "config.json");
6615
- if (!existsSync7(configPath)) {
7811
+ if (!existsSync8(configPath)) {
6616
7812
  return "";
6617
7813
  }
6618
7814
  const content = readFileSync3(configPath, "utf-8");
@@ -7214,6 +8410,42 @@ function formatScanTerminal(result, basePath) {
7214
8410
  if (result.gaps.length > 0) {
7215
8411
  lines.push(chalk6.gray("Run `pinata generate --gaps` to create tests for these gaps."));
7216
8412
  }
8413
+ if (result.coverageTransparency) {
8414
+ const ct = result.coverageTransparency;
8415
+ lines.push("");
8416
+ lines.push(chalk6.bold("Coverage transparency:"));
8417
+ if (ct.endpointsTotal > 0) {
8418
+ lines.push(chalk6.gray(` Endpoints discovered: ${ct.endpointsTotal}`));
8419
+ }
8420
+ if (ct.unprotectedEndpoints.length > 0) {
8421
+ lines.push(chalk6.yellow(` Unprotected endpoints: ${ct.unprotectedEndpoints.length}`));
8422
+ for (const ep of ct.unprotectedEndpoints.slice(0, 5)) {
8423
+ lines.push(chalk6.yellow(` - ${ep}`));
8424
+ }
8425
+ if (ct.unprotectedEndpoints.length > 5) {
8426
+ lines.push(chalk6.gray(` ... and ${ct.unprotectedEndpoints.length - 5} more`));
8427
+ }
8428
+ }
8429
+ if (ct.concurrencyTotal > 0) {
8430
+ lines.push(chalk6.gray(` Concurrency guards: ${ct.concurrencyGuarded}/${ct.concurrencyTotal} state mutations guarded`));
8431
+ }
8432
+ if (ct.manualReviewNeeded.length > 0) {
8433
+ lines.push(chalk6.yellow(` Manual review needed: ${ct.manualReviewNeeded.length} items`));
8434
+ for (const item of ct.manualReviewNeeded.slice(0, 3)) {
8435
+ lines.push(chalk6.gray(` - ${item}`));
8436
+ }
8437
+ }
8438
+ }
8439
+ if (result.score.scanConfidence !== void 0) {
8440
+ lines.push(chalk6.gray(`
8441
+ Scan confidence: ${Math.round(result.score.scanConfidence * 100)}%`));
8442
+ }
8443
+ if (result.dismissed && result.dismissed.length > 0) {
8444
+ lines.push(chalk6.gray(`Findings deprioritized by AI: ${result.dismissed.length} (still available for test generation)`));
8445
+ }
8446
+ if (result.needsReview && result.needsReview.length > 0) {
8447
+ lines.push(chalk6.yellow(`Findings needing manual review: ${result.needsReview.length}`));
8448
+ }
7217
8449
  lines.push(chalk6.gray(`
7218
8450
  Scan completed in ${result.durationMs}ms`));
7219
8451
  return lines.join("\n");
@@ -7606,6 +8838,43 @@ function registerAnalyzeCommand(program2) {
7606
8838
  console.error(formatError(scanResult.error));
7607
8839
  process.exit(1);
7608
8840
  }
8841
+ if (spinner) {
8842
+ spinner.text = "Discovering attack surface...";
8843
+ }
8844
+ try {
8845
+ const { discoverAttackSurface: discoverAttackSurface2, findingsToGaps: findingsToGaps2 } = await Promise.resolve().then(() => (init_discovery(), discovery_exports));
8846
+ const attackSurface = await discoverAttackSurface2(targetDirectory);
8847
+ if (attackSurface.findings.length > 0) {
8848
+ const discoveryGaps = findingsToGaps2(attackSurface.findings, targetDirectory);
8849
+ scanResult.data.gaps.push(...discoveryGaps);
8850
+ for (const gap of discoveryGaps) {
8851
+ const byCat = scanResult.data.gapsByCategory.get(gap.categoryId) ?? [];
8852
+ byCat.push(gap);
8853
+ scanResult.data.gapsByCategory.set(gap.categoryId, byCat);
8854
+ const byFile = scanResult.data.gapsByFile.get(gap.filePath) ?? [];
8855
+ byFile.push(gap);
8856
+ scanResult.data.gapsByFile.set(gap.filePath, byFile);
8857
+ }
8858
+ }
8859
+ scanResult.data.coverageTransparency = {
8860
+ endpointsTested: 0,
8861
+ endpointsTotal: attackSurface.endpoints.length,
8862
+ endpointsUntested: attackSurface.endpoints.map((e) => `${e.method} ${e.path}`),
8863
+ dbOperationsTested: 0,
8864
+ dbOperationsTotal: attackSurface.dbOperations.length,
8865
+ authChecksCovered: attackSurface.authChecks.length,
8866
+ authChecksTotal: attackSurface.endpoints.filter((e) => ["POST", "PUT", "PATCH", "DELETE"].includes(e.method)).length,
8867
+ unprotectedEndpoints: attackSurface.endpoints.filter((e) => !e.hasAuth && ["POST", "PUT", "PATCH", "DELETE"].includes(e.method)).map((e) => `${e.method} ${e.path} (${e.filePath}:${e.lineStart})`),
8868
+ concurrencyGuarded: attackSurface.stateMutations.filter((s) => s.hasLock || s.hasTransaction).length,
8869
+ concurrencyTotal: attackSurface.stateMutations.length,
8870
+ manualReviewNeeded: []
8871
+ };
8872
+ if (attackSurface.findings.length > 0 && !isQuiet) {
8873
+ logger.info(`Discovery: ${attackSurface.endpoints.length} endpoints, ${attackSurface.findings.length} findings`);
8874
+ }
8875
+ } catch (error) {
8876
+ logger.debug(`Discovery skipped: ${error instanceof Error ? error.message : String(error)}`);
8877
+ }
7609
8878
  spinner?.stop();
7610
8879
  const shouldVerify = Boolean(options["verify"]);
7611
8880
  if (shouldVerify && scanResult.data.gaps.length > 0) {
@@ -7618,7 +8887,7 @@ function registerAnalyzeCommand(program2) {
7618
8887
  console.log(chalk6.gray("Get one at: https://console.anthropic.com/settings/keys"));
7619
8888
  console.log(chalk6.gray("Or: https://platform.openai.com/api-keys\n"));
7620
8889
  const rl = createInterface({ input: process.stdin, output: process.stdout });
7621
- const askQuestion = (question) => new Promise((resolve9) => rl.question(question, (answer) => resolve9(answer.trim())));
8890
+ const askQuestion = (question) => new Promise((resolve10) => rl.question(question, (answer) => resolve10(answer.trim())));
7622
8891
  const apiKey = await askQuestion(chalk6.cyan("Enter your Anthropic or OpenAI API key: "));
7623
8892
  rl.close();
7624
8893
  if (!apiKey) {
@@ -7638,31 +8907,44 @@ function registerAnalyzeCommand(program2) {
7638
8907
  provider = "openai";
7639
8908
  }
7640
8909
  if (hasApiKey2(provider)) {
7641
- const verifySpinner = showSpinner ? ora3("Verifying gaps with AI...").start() : null;
8910
+ const verifySpinner = showSpinner ? ora3("Prioritizing gaps with AI...").start() : null;
7642
8911
  try {
7643
8912
  const { AIVerifier: AIVerifier2 } = await Promise.resolve().then(() => (init_verifier(), verifier_exports));
7644
- const { readFile: readFile7 } = await import('fs/promises');
8913
+ const { readFile: readFile9 } = await import('fs/promises');
7645
8914
  const apiKey = getApiKey2(provider);
7646
8915
  const verifier = new AIVerifier2({ provider, ...apiKey ? { apiKey } : {} });
7647
- const { verified, dismissed, stats } = await verifier.verifyAll(
8916
+ const { verified, dismissed, needsReview, stats } = await verifier.verifyAll(
7648
8917
  scanResult.data.gaps,
7649
- async (path2) => readFile7(path2, "utf-8")
8918
+ async (path2) => readFile9(path2, "utf-8")
7650
8919
  );
7651
- scanResult.data.gaps = verified;
7652
- const severityWeights = { critical: 10, high: 5, medium: 2, low: 1 };
7653
- let deduction = 0;
7654
- for (const gap of verified) {
7655
- deduction += severityWeights[gap.severity] ?? 1;
8920
+ const prioritized = [
8921
+ ...verified.map((g) => ({ ...g, status: "pending" })),
8922
+ ...needsReview.map((g) => ({ ...g, status: "pending" })),
8923
+ ...dismissed.map(({ gap }) => ({ ...gap, status: "dismissed" }))
8924
+ ];
8925
+ scanResult.data.gaps = prioritized;
8926
+ scanResult.data.dismissed = dismissed;
8927
+ scanResult.data.needsReview = needsReview;
8928
+ scanResult.data.rawFindingCount = stats.total;
8929
+ const verifiedCount = verified.length;
8930
+ const totalCount = stats.total;
8931
+ const dismissalRate = totalCount > 0 ? stats.aiDismissed / totalCount : 0;
8932
+ if (verifiedCount === 0 && totalCount > 0 && dismissalRate > 0.8) {
8933
+ const MAX_SCORE_ALL_DISMISSED = 85;
8934
+ if (scanResult.data.score.overall > MAX_SCORE_ALL_DISMISSED) {
8935
+ scanResult.data.score.overall = MAX_SCORE_ALL_DISMISSED;
8936
+ scanResult.data.score.grade = "B";
8937
+ scanResult.data.score.penalties.push({
8938
+ reason: `All ${stats.aiDismissed} findings AI-dismissed -- score capped (manual review recommended)`,
8939
+ points: 100 - MAX_SCORE_ALL_DISMISSED
8940
+ });
8941
+ }
7656
8942
  }
7657
- const newOverall = Math.max(0, 100 - deduction);
7658
- const newGrade = newOverall >= 90 ? "A" : newOverall >= 80 ? "B" : newOverall >= 70 ? "C" : newOverall >= 60 ? "D" : "F";
7659
- scanResult.data.score.overall = newOverall;
7660
- scanResult.data.score.grade = newGrade;
7661
8943
  verifySpinner?.succeed(
7662
- `AI Verification: ${stats.total} total \u2192 ${stats.preFiltered} pre-filtered \u2192 ${stats.aiVerified} verified, ${stats.aiDismissed} AI-dismissed`
8944
+ `AI Triage: ${stats.total} total \u2192 ${stats.aiVerified} likely vulnerable, ${stats.aiNeedsReview} needs review, ${stats.aiDismissed} likely false positive`
7663
8945
  );
7664
8946
  if (isVerbose && dismissed.length > 0) {
7665
- console.log(chalk6.gray("\nDismissed as false positives:"));
8947
+ console.log(chalk6.gray("\nLikely false positives (still available for test generation):"));
7666
8948
  for (const { gap, reason } of dismissed.slice(0, 5)) {
7667
8949
  console.log(chalk6.gray(` - ${gap.categoryName} at ${gap.filePath}:${gap.lineStart}`));
7668
8950
  console.log(chalk6.gray(` Reason: ${reason.slice(0, 100)}...`));
@@ -7671,10 +8953,14 @@ function registerAnalyzeCommand(program2) {
7671
8953
  console.log(chalk6.gray(` ... and ${dismissed.length - 5} more`));
7672
8954
  }
7673
8955
  }
8956
+ if (needsReview.length > 0) {
8957
+ console.log(chalk6.yellow(`
8958
+ ${needsReview.length} finding(s) need manual review.`));
8959
+ }
7674
8960
  } catch (error) {
7675
- verifySpinner?.fail("AI verification failed (results unverified)");
8961
+ verifySpinner?.fail("AI prioritization failed (results unverified)");
7676
8962
  if (isVerbose) {
7677
- console.error(chalk6.yellow(`Verification error: ${error instanceof Error ? error.message : String(error)}`));
8963
+ console.error(chalk6.yellow(`Prioritization error: ${error instanceof Error ? error.message : String(error)}`));
7678
8964
  }
7679
8965
  }
7680
8966
  }
@@ -7683,7 +8969,7 @@ function registerAnalyzeCommand(program2) {
7683
8969
  const isDryRun = Boolean(options["dryRun"]);
7684
8970
  if (shouldExecute && scanResult.data.gaps.length > 0) {
7685
8971
  const { createRunner: createRunner2, isTestable: isTestable2 } = await Promise.resolve().then(() => (init_execution(), execution_exports));
7686
- const { readFile: readFile7 } = await import('fs/promises');
8972
+ const { readFile: readFile9 } = await import('fs/promises');
7687
8973
  const testableGaps = scanResult.data.gaps.filter((g) => isTestable2(g.categoryId));
7688
8974
  if (testableGaps.length === 0) {
7689
8975
  console.log(chalk6.yellow("\nNo dynamically testable gaps found."));
@@ -7698,7 +8984,7 @@ Dynamic execution unavailable: ${initResult.error}`));
7698
8984
  for (const gap of testableGaps) {
7699
8985
  if (!fileContents.has(gap.filePath)) {
7700
8986
  try {
7701
- fileContents.set(gap.filePath, await readFile7(gap.filePath, "utf-8"));
8987
+ fileContents.set(gap.filePath, await readFile9(gap.filePath, "utf-8"));
7702
8988
  } catch {
7703
8989
  }
7704
8990
  }
@@ -8067,150 +9353,14 @@ async function extractTestContexts(gaps, projectRoot) {
8067
9353
  return contexts;
8068
9354
  }
8069
9355
 
8070
- // src/testgen/generator.ts
8071
- function buildGenerationPrompt(ctx) {
8072
- const parts = [];
8073
- parts.push(`Generate a complete, runnable ${ctx.testFramework.name} test file for this security vulnerability.`);
8074
- parts.push("");
8075
- parts.push("## Vulnerability");
8076
- parts.push(`Type: ${ctx.gap.categoryId}`);
8077
- parts.push(`Severity: ${ctx.gap.severity}`);
8078
- parts.push(`File: ${ctx.gap.filePath}:${ctx.gap.lineStart}`);
8079
- parts.push(`Pattern: ${ctx.gap.patternId}`);
8080
- parts.push("");
8081
- parts.push("## Vulnerable Code");
8082
- parts.push("```");
8083
- parts.push(ctx.functionBody);
8084
- parts.push("```");
8085
- parts.push("");
8086
- if (ctx.functionName) {
8087
- parts.push(`Function name: ${ctx.functionName}`);
8088
- }
8089
- parts.push("## File Imports");
8090
- parts.push("```");
8091
- parts.push(ctx.imports.join("\n"));
8092
- parts.push("```");
8093
- parts.push("");
8094
- parts.push("## Context");
8095
- parts.push(`Language: ${ctx.language}`);
8096
- parts.push(`Test framework: ${ctx.testFramework.name}`);
8097
- if (ctx.webFramework) parts.push(`Web framework: ${ctx.webFramework}`);
8098
- if (ctx.dbType) parts.push(`Database: ${ctx.dbType}`);
8099
- parts.push("");
8100
- if (ctx.existingTestSample) {
8101
- parts.push("## Existing Test Style (match this style)");
8102
- parts.push("```");
8103
- parts.push(ctx.existingTestSample.slice(0, 1500));
8104
- parts.push("```");
8105
- parts.push("");
8106
- }
8107
- parts.push("## Requirements");
8108
- parts.push("1. Output ONLY the complete test file. No explanations, no markdown fences.");
8109
- parts.push("2. Use real imports that resolve in this project.");
8110
- parts.push("3. The test MUST FAIL when run against the current vulnerable code.");
8111
- parts.push("4. Include at least 5 attack payloads specific to this vulnerability type.");
8112
- parts.push("5. Include at least one boundary/edge case (empty string, null, very long input, unicode).");
8113
- parts.push("6. If testing an HTTP endpoint, use supertest or direct function calls.");
8114
- parts.push("7. Test the specific vulnerable code path, not a generic function.");
8115
- parts.push("8. Each test should have a clear assertion that proves the vulnerability exists or is mitigated.");
8116
- return parts.join("\n");
8117
- }
8118
- function buildPropertyPrompt(ctx) {
8119
- const parts = [];
8120
- parts.push(`Generate a property-based test using fast-check (TypeScript) or hypothesis (Python) for this security vulnerability.`);
8121
- parts.push("");
8122
- parts.push("## Vulnerability");
8123
- parts.push(`Type: ${ctx.gap.categoryId}`);
8124
- parts.push(`File: ${ctx.gap.filePath}:${ctx.gap.lineStart}`);
8125
- parts.push("");
8126
- parts.push("## Vulnerable Code");
8127
- parts.push("```");
8128
- parts.push(ctx.functionBody);
8129
- parts.push("```");
8130
- parts.push("");
8131
- parts.push("## Requirements");
8132
- parts.push("1. Output ONLY the complete test file. No explanations.");
8133
- parts.push("2. Express a security INVARIANT as a property.");
8134
- parts.push("3. The property should hold for ALL inputs, not just specific payloads.");
8135
- parts.push("4. Use fast-check for TypeScript/JavaScript or hypothesis for Python.");
8136
- parts.push("5. Example invariant: 'for all strings s, the output of sanitize(s) never contains <script>'");
8137
- parts.push(`6. Test framework: ${ctx.testFramework.name}`);
8138
- const invariantHints = {
8139
- "sql-injection": "user input should never appear unescaped in the SQL query string",
8140
- "xss": "user input should never appear as raw HTML in the output",
8141
- "command-injection": "user input should never be passed to a shell command unescaped",
8142
- "path-traversal": "resolved file path should always stay within the allowed directory",
8143
- "ssrf": "user-supplied URL should never resolve to a private/internal IP",
8144
- "xxe": "XML parsing should never resolve external entities",
8145
- "deserialization": "deserialized objects should only be of expected types",
8146
- "hardcoded-secrets": "no string matching secret patterns should exist in source"
8147
- };
8148
- const hint = invariantHints[ctx.gap.categoryId];
8149
- if (hint) {
8150
- parts.push(`7. Invariant hint: "${hint}"`);
8151
- }
8152
- return parts.join("\n");
8153
- }
8154
- async function generateTest(ctx, callAI) {
8155
- const systemPrompt = [
8156
- "You are a senior security engineer writing adversarial tests.",
8157
- "You write tests that BREAK code, not tests that pass.",
8158
- "Your tests must be complete, runnable files with real imports.",
8159
- "Output ONLY code. No markdown fences. No explanations.",
8160
- "The test must FAIL against vulnerable code and PASS after a fix."
8161
- ].join(" ");
8162
- const prompt = buildGenerationPrompt(ctx);
8163
- const content = await callAI(prompt, systemPrompt);
8164
- const cleaned = stripMarkdownFences(content);
8165
- return {
8166
- filePath: ctx.suggestedTestPath,
8167
- content: cleaned,
8168
- categoryId: ctx.gap.categoryId,
8169
- description: `Security test for ${ctx.gap.categoryId} in ${ctx.functionName ?? "unknown function"} at ${ctx.gap.filePath}:${ctx.gap.lineStart}`,
8170
- isPropertyBased: false
8171
- };
8172
- }
8173
- async function generatePropertyTest(ctx, callAI) {
8174
- const systemPrompt = [
8175
- "You are a formal verification expert writing property-based tests.",
8176
- "Express security invariants that must hold for ALL inputs.",
8177
- "Use fast-check for TypeScript/JavaScript or hypothesis for Python.",
8178
- "Output ONLY code. No markdown fences. No explanations."
8179
- ].join(" ");
8180
- const prompt = buildPropertyPrompt(ctx);
8181
- const content = await callAI(prompt, systemPrompt);
8182
- const cleaned = stripMarkdownFences(content);
8183
- const ext = ctx.language === "python" ? ".py" : ".ts";
8184
- const propPath = ctx.suggestedTestPath.replace(/\.test\.(ts|js|py)$/, `.prop${ext}`);
8185
- return {
8186
- filePath: propPath,
8187
- content: cleaned,
8188
- categoryId: ctx.gap.categoryId,
8189
- description: `Property-based security invariant for ${ctx.gap.categoryId}`,
8190
- isPropertyBased: true
8191
- };
8192
- }
8193
- function stripMarkdownFences(content) {
8194
- let result = content.trim();
8195
- if (result.startsWith("```")) {
8196
- const firstNewline = result.indexOf("\n");
8197
- if (firstNewline !== -1) {
8198
- result = result.slice(firstNewline + 1);
8199
- }
8200
- }
8201
- if (result.endsWith("```")) {
8202
- result = result.slice(0, -3).trimEnd();
8203
- }
8204
- return result;
8205
- }
8206
-
8207
9356
  // src/cli/commands/generate.ts
9357
+ init_generator2();
8208
9358
  function registerGenerateCommand(program2) {
8209
- program2.command("generate").description("Generate adversarial security tests for detected vulnerabilities").option("--gaps", "Generate tests for all detected gaps").option("-c, --category <id>", "Generate tests for specific category").option("-d, --domain <domain>", "Generate tests for all categories in domain").option("-s, --severity <level>", "Minimum severity: critical, high, medium, low", "medium").option("--write", "Write test files to disk").option("--property", "Also generate property-based tests (fast-check/hypothesis)").option("--ai-provider <provider>", "AI provider: anthropic, openai", "anthropic").option("-o, --output <format>", "Output format: terminal, json", "terminal").option("-v, --verbose", "Verbose output").option("-q, --quiet", "Quiet mode (errors only)").action(async (options) => {
9359
+ program2.command("generate").description("Generate adversarial security tests for detected vulnerabilities").option("--gaps", "Generate tests for all detected gaps").option("-c, --category <id>", "Generate tests for specific category").option("-d, --domain <domain>", "Generate tests for all categories in domain").option("-s, --severity <level>", "Minimum severity: critical, high, medium, low", "medium").option("--write", "Write test files to disk").option("--no-property", "Skip property-based test generation (enabled by default)").option("--ai-provider <provider>", "AI provider: anthropic, openai", "anthropic").option("-o, --output <format>", "Output format: terminal, json", "terminal").option("-v, --verbose", "Verbose output").option("-q, --quiet", "Quiet mode (errors only)").action(async (options) => {
8210
9360
  const isQuiet = Boolean(options["quiet"]);
8211
9361
  const isVerbose = Boolean(options["verbose"]);
8212
9362
  const shouldWrite = Boolean(options["write"]);
8213
- const withProperty = Boolean(options["property"]);
9363
+ const withProperty = options["property"] !== false;
8214
9364
  const aiProvider = String(options["aiProvider"] ?? "anthropic");
8215
9365
  const outputFormat = String(options["output"] ?? "terminal");
8216
9366
  if (isQuiet) {
@@ -8280,7 +9430,7 @@ function registerGenerateCommand(program2) {
8280
9430
  process.exit(1);
8281
9431
  }
8282
9432
  if (spinner) {
8283
- spinner.text = `Generating tests for ${contexts.length} findings with AI...`;
9433
+ spinner.text = `Forging tests for ${contexts.length} findings with AI...`;
8284
9434
  }
8285
9435
  const { hasApiKey: hasApiKey2, getApiKey: getApiKey2 } = await Promise.resolve().then(() => (init_config(), config_exports));
8286
9436
  if (!hasApiKey2(aiProvider)) {
@@ -8292,23 +9442,30 @@ AI test generation requires an API key.`));
8292
9442
  }
8293
9443
  const apiKey = getApiKey2(aiProvider) ?? "";
8294
9444
  const callAI = buildAICaller(aiProvider, apiKey);
9445
+ const { createTestForge: createTestForge2 } = await Promise.resolve().then(() => (init_forge(), forge_exports));
9446
+ const forge = createTestForge2({
9447
+ generateProperty: withProperty,
9448
+ skipRun: false,
9449
+ skipMutation: true
9450
+ });
8295
9451
  const generated = [];
8296
9452
  const errors = [];
9453
+ let totalAICalls = 0;
9454
+ let totalScriptCalls = 0;
8297
9455
  for (let i = 0; i < contexts.length; i++) {
8298
9456
  const ctx = contexts[i];
8299
9457
  if (spinner) {
8300
- spinner.text = `Generating test ${i + 1}/${contexts.length}: ${ctx.gap.categoryId} in ${relative(projectRoot, ctx.gap.filePath)}`;
9458
+ spinner.text = `Forging test ${i + 1}/${contexts.length}: ${ctx.gap.categoryId} in ${relative(projectRoot, ctx.gap.filePath)}`;
8301
9459
  }
8302
9460
  try {
8303
- const test = await generateTest(ctx, callAI);
8304
- generated.push(test);
8305
- if (withProperty) {
8306
- try {
8307
- const propTest = await generatePropertyTest(ctx, callAI);
8308
- generated.push(propTest);
8309
- } catch (err2) {
8310
- errors.push(`Property test failed for ${ctx.gap.categoryId}: ${err2 instanceof Error ? err2.message : String(err2)}`);
8311
- }
9461
+ const result = await forge.forge(ctx, callAI);
9462
+ totalAICalls += result.stats.totalAICalls;
9463
+ totalScriptCalls += result.stats.totalScriptCalls;
9464
+ for (const accepted of result.accepted) {
9465
+ generated.push(accepted.test);
9466
+ }
9467
+ for (const rejected of result.rejected) {
9468
+ errors.push(`Rejected ${rejected.categoryId} (${rejected.type}): ${rejected.reason} (${rejected.attempts} attempts)`);
8312
9469
  }
8313
9470
  } catch (err2) {
8314
9471
  errors.push(`Failed ${ctx.gap.categoryId}: ${err2 instanceof Error ? err2.message : String(err2)}`);
@@ -8331,11 +9488,12 @@ AI test generation requires an API key.`));
8331
9488
  isPropertyBased: t.isPropertyBased,
8332
9489
  lines: t.content.split("\n").length
8333
9490
  })),
8334
- errors
9491
+ errors,
9492
+ stats: { totalAICalls, totalScriptCalls }
8335
9493
  }, null, 2));
8336
9494
  } else {
8337
9495
  console.log();
8338
- console.log(chalk6.bold(`Generated ${generated.length} test file${generated.length === 1 ? "" : "s"}`));
9496
+ console.log(chalk6.bold(`Forged ${generated.length} test file${generated.length === 1 ? "" : "s"} (${totalAICalls} AI calls, ${totalScriptCalls} validations)`));
8339
9497
  console.log();
8340
9498
  for (const test of generated) {
8341
9499
  const relPath = relative(projectRoot, test.filePath);
@@ -8385,7 +9543,7 @@ function buildAICaller(provider, apiKey) {
8385
9543
  "anthropic-version": "2023-06-01"
8386
9544
  },
8387
9545
  body: JSON.stringify({
8388
- model: "claude-sonnet-4-20250514",
9546
+ model: "claude-opus-4-8",
8389
9547
  max_tokens: 4096,
8390
9548
  system: systemPrompt,
8391
9549
  messages: [{ role: "user", content: prompt }]
@@ -8550,9 +9708,9 @@ program.command("suggest-patterns").description("Use AI to suggest new detection
8550
9708
  const codeSnippets = options["code"] ?? [];
8551
9709
  const samples = [...codeSnippets];
8552
9710
  if (filePath) {
8553
- const { readFile: readFile7 } = await import('fs/promises');
9711
+ const { readFile: readFile9 } = await import('fs/promises');
8554
9712
  try {
8555
- const content = await readFile7(resolve(filePath), "utf-8");
9713
+ const content = await readFile9(resolve(filePath), "utf-8");
8556
9714
  samples.push(...content.split("\n---\n").filter((s) => s.trim()));
8557
9715
  } catch (error) {
8558
9716
  console.error(formatError(new Error(`Failed to read file: ${filePath}`)));
@@ -8809,16 +9967,16 @@ thresholds:
8809
9967
  high: 5
8810
9968
  medium: 20
8811
9969
  `;
8812
- const { writeFile: writeFileAsync, mkdir: mkdir4 } = await import('fs/promises');
9970
+ const { writeFile: writeFileAsync, mkdir: mkdir5 } = await import('fs/promises');
8813
9971
  try {
8814
9972
  await writeFileAsync(configPath, defaultConfig, "utf8");
8815
9973
  console.log(chalk6.green("Created .pinata.yml"));
8816
- await mkdir4(cacheDir, { recursive: true });
9974
+ await mkdir5(cacheDir, { recursive: true });
8817
9975
  console.log(chalk6.green("Created .pinata/ directory"));
8818
9976
  const gitignorePath = resolve(process.cwd(), ".gitignore");
8819
9977
  if (existsSync(gitignorePath)) {
8820
- const { readFile: readFile7, appendFile } = await import('fs/promises');
8821
- const gitignore = await readFile7(gitignorePath, "utf8");
9978
+ const { readFile: readFile9, appendFile } = await import('fs/promises');
9979
+ const gitignore = await readFile9(gitignorePath, "utf8");
8822
9980
  if (!gitignore.includes(".pinata/")) {
8823
9981
  await appendFile(gitignorePath, "\n# Pinata cache\n.pinata/\n");
8824
9982
  console.log(chalk6.green("Added .pinata/ to .gitignore"));
@@ -9090,9 +10248,9 @@ auth.command("login").description("Set API key for Pinata Cloud").option("-k, --
9090
10248
  }
9091
10249
  const configDir = resolve(process.cwd(), ".pinata");
9092
10250
  const authPath = resolve(configDir, "auth.json");
9093
- const { mkdir: mkdir4, writeFile: writeFileAsync } = await import('fs/promises');
10251
+ const { mkdir: mkdir5, writeFile: writeFileAsync } = await import('fs/promises');
9094
10252
  try {
9095
- await mkdir4(configDir, { recursive: true });
10253
+ await mkdir5(configDir, { recursive: true });
9096
10254
  const maskedKey = `****${apiKey.slice(-8)}`;
9097
10255
  await writeFileAsync(authPath, JSON.stringify({ configured: true, keyId: maskedKey, configuredAt: (/* @__PURE__ */ new Date()).toISOString() }, null, 2), "utf8");
9098
10256
  const envPath = resolve(configDir, ".env");
@@ -9135,8 +10293,8 @@ auth.command("status").description("Check authentication status").action(async (
9135
10293
  process.exit(0);
9136
10294
  }
9137
10295
  try {
9138
- const { readFile: readFile7 } = await import('fs/promises');
9139
- const authData = JSON.parse(await readFile7(authPath, "utf8"));
10296
+ const { readFile: readFile9 } = await import('fs/promises');
10297
+ const authData = JSON.parse(await readFile9(authPath, "utf8"));
9140
10298
  console.log(chalk6.green("Authenticated"));
9141
10299
  console.log(chalk6.gray(`Key ID: ${authData.keyId ?? "unknown"}`));
9142
10300
  console.log(chalk6.gray(`Configured: ${authData.configuredAt ?? "unknown"}`));