@lhi/tdd-audit 1.16.0 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/auditor.js ADDED
@@ -0,0 +1,880 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const { version } = require('../package.json');
6
+
7
+ // ─── Skill Suite Loader ───────────────────────────────────────────────────────
8
+
9
+ function loadSkillSuite(packageDir) {
10
+ const entries = [
11
+ ['SKILL.md', path.join(packageDir, 'SKILL.md')],
12
+ ['auto-audit.md', path.join(packageDir, 'prompts', 'auto-audit.md')],
13
+ ['red-phase.md', path.join(packageDir, 'prompts', 'red-phase.md')],
14
+ ['green-phase.md', path.join(packageDir, 'prompts', 'green-phase.md')],
15
+ ['refactor-phase.md', path.join(packageDir, 'prompts', 'refactor-phase.md')],
16
+ ['hardening-phase.md', path.join(packageDir, 'prompts', 'hardening-phase.md')],
17
+ ['ai-security.md', path.join(packageDir, 'prompts', 'ai-security.md')],
18
+ ['node-advanced-security.md', path.join(packageDir, 'prompts', 'node-advanced-security.md')],
19
+ ['security-test-patterns.md', path.join(packageDir, 'prompts', 'security-test-patterns.md')],
20
+ ];
21
+ const suite = {};
22
+ for (const [name, filePath] of entries) {
23
+ try { suite[name] = fs.readFileSync(filePath, 'utf8'); } catch { /* not present */ }
24
+ }
25
+ return suite;
26
+ }
27
+
28
+ function buildSystemPrompt(suite) {
29
+ const parts = [
30
+ 'You are a security engineer running the TDD Remediation Protocol audit.',
31
+ 'Use the tools provided to explore the repository, identify vulnerabilities, and (when permitted) apply fixes.',
32
+ 'File paths passed to tools are always relative to the project root.',
33
+ ];
34
+ for (const [name, content] of Object.entries(suite)) {
35
+ parts.push(`\n---\n## ${name}\n\n${content}`);
36
+ }
37
+ return parts.join('\n');
38
+ }
39
+
40
+ // ─── Structured Output Helpers ────────────────────────────────────────────────
41
+
42
+ // ─── Depth-tier output schemas ────────────────────────────────────────────────
43
+
44
+ /**
45
+ * Depth controls what the LLM produces and what capabilities are enabled.
46
+ *
47
+ * tier-1 — scan-only, minimal fields (name, severity, file, line, snippet)
48
+ * tier-2 — scan-only, adds risk, effort, cwe, owasp, references[]
49
+ * tier-3 — full audit, read-only; each finding includes a copy-ready `patch`
50
+ * and `testSnippet`. The user applies the patches manually.
51
+ * Billing unit: per report.
52
+ * tier-4 — full audit, writes enabled; LLM applies each patch via write_file.
53
+ * The `remediation` array tracks every applied patch (status=fixed).
54
+ * Billing unit: patchesApplied (remediation entries with status=fixed).
55
+ * `patchesApplied` is computed and added to the output envelope.
56
+ *
57
+ * The depth value is included in the output envelope so downstream consumers
58
+ * (UI, CI parsers, billing systems) can branch on it without parsing content.
59
+ */
60
+ const DEPTH_JSON_INSTRUCTIONS = {
61
+
62
+ 'tier-1': `
63
+ IMPORTANT — OUTPUT FORMAT:
64
+ Output a single fenced JSON block as your final message (nothing after it):
65
+
66
+ \`\`\`json
67
+ {
68
+ "stack": "<detected stack>",
69
+ "findings": [
70
+ {
71
+ "name": "<vulnerability type>",
72
+ "severity": "CRITICAL|HIGH|MEDIUM|LOW",
73
+ "file": "<relative path>",
74
+ "line": <integer>,
75
+ "snippet": "<code snippet up to 200 chars>"
76
+ }
77
+ ],
78
+ "likelyFalsePositives": [],
79
+ "remediation": []
80
+ }
81
+ \`\`\`
82
+ `,
83
+
84
+ 'tier-2': `
85
+ IMPORTANT — OUTPUT FORMAT:
86
+ Output a single fenced JSON block as your final message (nothing after it):
87
+
88
+ \`\`\`json
89
+ {
90
+ "stack": "<detected stack>",
91
+ "findings": [
92
+ {
93
+ "name": "<vulnerability type>",
94
+ "severity": "CRITICAL|HIGH|MEDIUM|LOW",
95
+ "file": "<relative path>",
96
+ "line": <integer>,
97
+ "snippet": "<code snippet up to 200 chars>",
98
+ "risk": "<plain-language explanation of what an attacker can do>",
99
+ "effort": "low|medium|high",
100
+ "cwe": "CWE-NNN",
101
+ "owasp": "A01:2021 — <category name>",
102
+ "references": ["<URL or standard citation>"]
103
+ }
104
+ ],
105
+ "likelyFalsePositives": [],
106
+ "remediation": []
107
+ }
108
+ \`\`\`
109
+ `,
110
+
111
+ 'tier-3': `
112
+ IMPORTANT — OUTPUT FORMAT:
113
+ For each finding, generate a complete, copy-paste-ready patch.
114
+ Do NOT call write_file — the user will apply the patches manually.
115
+
116
+ Output a single fenced JSON block as your final message (nothing after it):
117
+
118
+ \`\`\`json
119
+ {
120
+ "stack": "<detected stack>",
121
+ "findings": [
122
+ {
123
+ "name": "<vulnerability type>",
124
+ "severity": "CRITICAL|HIGH|MEDIUM|LOW",
125
+ "file": "<relative path>",
126
+ "line": <integer>,
127
+ "snippet": "<vulnerable code up to 200 chars>",
128
+ "risk": "<plain-language risk>",
129
+ "effort": "low|medium|high",
130
+ "cwe": "CWE-NNN",
131
+ "patch": "<complete replacement code block — ready to copy-paste>",
132
+ "testSnippet": "<Jest/Mocha/etc test that proves the fix works>"
133
+ }
134
+ ],
135
+ "likelyFalsePositives": [],
136
+ "remediation": []
137
+ }
138
+ \`\`\`
139
+ `,
140
+
141
+ 'tier-4': `
142
+ IMPORTANT — OUTPUT FORMAT:
143
+ For each finding, apply the patch by calling write_file before writing the report.
144
+ Call write_file once per vulnerability — one patch, one file write.
145
+ Track every patch attempt in the remediation array (status=fixed if written, skipped otherwise).
146
+
147
+ Output a single fenced JSON block as your final message (nothing after it):
148
+
149
+ \`\`\`json
150
+ {
151
+ "stack": "<detected stack>",
152
+ "findings": [
153
+ {
154
+ "name": "<vulnerability type>",
155
+ "severity": "CRITICAL|HIGH|MEDIUM|LOW",
156
+ "file": "<relative path>",
157
+ "line": <integer>,
158
+ "snippet": "<vulnerable code up to 200 chars>",
159
+ "risk": "<plain-language risk>",
160
+ "effort": "low|medium|high",
161
+ "cwe": "CWE-NNN",
162
+ "patch": "<the code block that was written via write_file>"
163
+ }
164
+ ],
165
+ "likelyFalsePositives": [],
166
+ "remediation": [
167
+ {
168
+ "name": "<vulnerability type>",
169
+ "status": "fixed|skipped",
170
+ "testFile": "<relative path or null>",
171
+ "fixApplied": "<one-line description of the change written>"
172
+ }
173
+ ]
174
+ }
175
+ \`\`\`
176
+ `,
177
+ };
178
+
179
+ /** Return the JSON output instruction for the given depth tier. */
180
+ function buildJsonOutputInstruction(depth) {
181
+ return DEPTH_JSON_INSTRUCTIONS[depth] || DEPTH_JSON_INSTRUCTIONS['tier-1'];
182
+ }
183
+
184
+ // Keep legacy constant for any external callers (backwards compat)
185
+ const JSON_OUTPUT_INSTRUCTION = DEPTH_JSON_INSTRUCTIONS['tier-1'];
186
+
187
+ /**
188
+ * Extract the last \`\`\`json ... \`\`\` block from buffered LLM output.
189
+ * Falls back to the last bare top-level JSON object if no fenced block is found.
190
+ */
191
+ function extractJsonBlock(text) {
192
+ // Walk backwards through all ```json blocks to find the last one
193
+ const fenceRe = /```json\s*([\s\S]*?)```/g;
194
+ let last = null;
195
+ let m;
196
+ while ((m = fenceRe.exec(text)) !== null) last = m[1].trim();
197
+ if (last) {
198
+ try { return JSON.parse(last); } catch { /* fall through */ }
199
+ }
200
+ // Bare JSON fallback: match the last {...} in the output
201
+ const bareRe = /\{[\s\S]*\}/g;
202
+ let bare = null;
203
+ while ((m = bareRe.exec(text)) !== null) bare = m[0];
204
+ if (bare) {
205
+ try { return JSON.parse(bare); } catch { /* fall through */ }
206
+ }
207
+ return null;
208
+ }
209
+
210
+ /**
211
+ * Wrap AI-extracted findings in the same envelope shape as toJson() so
212
+ * downstream consumers (CI parsers, badge logic) can treat both identically.
213
+ */
214
+ function aiToJson(extracted, { provider, model, scanOnly, depth = 'tier-1' }) {
215
+ const findings = Array.isArray(extracted?.findings) ? extracted.findings : [];
216
+ const likelyFalsePositives = Array.isArray(extracted?.likelyFalsePositives) ? extracted.likelyFalsePositives : [];
217
+ const remediation = Array.isArray(extracted?.remediation) ? extracted.remediation : [];
218
+
219
+ const summary = { CRITICAL: 0, HIGH: 0, MEDIUM: 0, LOW: 0 };
220
+ for (const f of findings) summary[f.severity] = (summary[f.severity] || 0) + 1;
221
+
222
+ // patchesApplied is the billable unit for tier-4:
223
+ // count of remediation entries where status === 'fixed' (i.e. write_file was called).
224
+ const patchesApplied = remediation.filter(r => r.status === 'fixed').length;
225
+
226
+ return {
227
+ version,
228
+ provider,
229
+ model: model || null,
230
+ depth,
231
+ mode: scanOnly ? 'scan-only' : 'full',
232
+ stack: extracted?.stack || null,
233
+ summary,
234
+ patchesApplied,
235
+ findings,
236
+ likelyFalsePositives,
237
+ remediation,
238
+ scannedAt: new Date().toISOString(),
239
+ };
240
+ }
241
+
242
+ // CWE map mirrored from reporter.js so SARIF output stays consistent
243
+ const CWE_MAP = {
244
+ 'SQL Injection': 'CWE-89', 'Command Injection': 'CWE-78', 'Path Traversal': 'CWE-22',
245
+ 'XSS': 'CWE-79', 'IDOR': 'CWE-639', 'Broken Auth': 'CWE-287',
246
+ 'Hardcoded Secret': 'CWE-798', 'SSRF': 'CWE-918', 'Open Redirect': 'CWE-601',
247
+ 'NoSQL Injection': 'CWE-943', 'Mass Assignment': 'CWE-915', 'Prototype Pollution': 'CWE-1321',
248
+ 'Weak Crypto': 'CWE-327', 'Insecure Deserialization': 'CWE-502', 'TLS Bypass': 'CWE-295',
249
+ 'Sensitive Storage': 'CWE-312', 'JWT Alg None': 'CWE-347', 'Secret Fallback': 'CWE-798',
250
+ 'eval() Injection': 'CWE-95', 'Template Injection': 'CWE-94', 'CORS Wildcard': 'CWE-942',
251
+ };
252
+ const SARIF_LEVEL = { CRITICAL: 'error', HIGH: 'error', MEDIUM: 'warning', LOW: 'note' };
253
+
254
+ /** Convert AI JSON findings envelope to SARIF 2.1.0. */
255
+ function aiToSarif(envelope) {
256
+ const findings = envelope.findings || [];
257
+ const rules = [];
258
+ const ruleIndex = {};
259
+
260
+ const results = findings.map(f => {
261
+ if (ruleIndex[f.name] === undefined) {
262
+ ruleIndex[f.name] = rules.length;
263
+ const cwe = CWE_MAP[f.name];
264
+ rules.push({
265
+ id: (f.name || 'unknown').replace(/\s+/g, '-').replace(/[()]/g, '').toLowerCase(),
266
+ name: f.name,
267
+ shortDescription: { text: f.name },
268
+ fullDescription: { text: `${f.name} — severity: ${f.severity}` },
269
+ defaultConfiguration: { level: SARIF_LEVEL[f.severity] || 'warning' },
270
+ ...(cwe && { relationships: [{ target: { id: cwe, toolComponent: { name: 'CWE' } } }] }),
271
+ helpUri: `https://cwe.mitre.org/data/definitions/${cwe ? cwe.replace('CWE-', '') : '0'}.html`,
272
+ });
273
+ }
274
+ return {
275
+ ruleId: rules[ruleIndex[f.name]].id,
276
+ ruleIndex: ruleIndex[f.name],
277
+ level: SARIF_LEVEL[f.severity] || 'warning',
278
+ message: { text: f.snippet || f.name },
279
+ locations: [{
280
+ physicalLocation: {
281
+ artifactLocation: { uri: (f.file || '').replace(/\\/g, '/'), uriBaseId: '%SRCROOT%' },
282
+ region: { startLine: typeof f.line === 'number' ? f.line : 1 },
283
+ },
284
+ }],
285
+ };
286
+ });
287
+
288
+ return {
289
+ $schema: 'https://json.schemastore.org/sarif-2.1.0.json',
290
+ version: '2.1.0',
291
+ runs: [{
292
+ tool: {
293
+ driver: {
294
+ name: '@lhi/tdd-audit',
295
+ version,
296
+ informationUri: 'https://www.npmjs.com/package/@lhi/tdd-audit',
297
+ rules,
298
+ },
299
+ },
300
+ results,
301
+ }],
302
+ };
303
+ }
304
+
305
+ // ─── Safe File-System Tools ───────────────────────────────────────────────────
306
+
307
+ const MAX_FILE_BYTES = 512 * 1024;
308
+ const MAX_LIST_RESULTS = 500;
309
+ const MAX_SEARCH_RESULTS = 100;
310
+
311
+ const SKIP_DIRS = new Set([
312
+ 'node_modules', '.git', 'dist', 'build', 'coverage', '.next', 'out',
313
+ '__pycache__', 'venv', '.venv', 'vendor', '.expo', '.dart_tool', '.pub-cache',
314
+ ]);
315
+
316
+ /**
317
+ * Resolve a user-supplied relative path and verify it stays inside projectDir.
318
+ * Throws if the resolved path escapes the project root (path traversal guard).
319
+ */
320
+ function safePath(inputPath, projectDir) {
321
+ if (typeof inputPath !== 'string' || !inputPath.trim()) {
322
+ throw new Error('path must be a non-empty string');
323
+ }
324
+ const resolved = path.resolve(projectDir, inputPath);
325
+ const projectAbs = path.resolve(projectDir);
326
+ if (resolved !== projectAbs && !resolved.startsWith(projectAbs + path.sep)) {
327
+ throw new Error(`Access denied: "${inputPath}" is outside the project directory`);
328
+ }
329
+ return resolved;
330
+ }
331
+
332
+ function* walkDir(dir) {
333
+ let entries;
334
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return; }
335
+ for (const entry of entries) {
336
+ if (SKIP_DIRS.has(entry.name)) continue;
337
+ const full = path.join(dir, entry.name);
338
+ if (entry.isDirectory()) yield* walkDir(full);
339
+ else if (entry.isFile()) yield full;
340
+ }
341
+ }
342
+
343
+ /** Convert a simple glob pattern (**\/*.js, src/**\/*.ts, *.md) to a RegExp. */
344
+ function globToRegex(pattern) {
345
+ const re = pattern
346
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&')
347
+ .replace(/\*\*/g, '\x00')
348
+ .replace(/\*/g, '[^/]*')
349
+ .replace(/\?/g, '[^/]')
350
+ .replace(/\x00/g, '.*');
351
+ return new RegExp(`^${re}$`);
352
+ }
353
+
354
+ function toolReadFile(input, projectDir) {
355
+ const { path: inputPath } = input;
356
+ let resolved;
357
+ try { resolved = safePath(inputPath, projectDir); } catch (e) { return { error: e.message }; }
358
+ if (!fs.existsSync(resolved)) return { error: `Not found: ${inputPath}` };
359
+ const stat = fs.statSync(resolved);
360
+ if (!stat.isFile()) return { error: `Not a file: ${inputPath}` };
361
+ if (stat.size > MAX_FILE_BYTES) return { error: `File too large (${stat.size} bytes, max 512 KB)` };
362
+ const buf = fs.readFileSync(resolved);
363
+ if (buf.indexOf(0) !== -1) return { error: 'Binary file — skipping' };
364
+ return { content: buf.toString('utf8'), bytes: stat.size };
365
+ }
366
+
367
+ function toolListFiles(input, projectDir) {
368
+ const { pattern } = input;
369
+ if (!pattern || typeof pattern !== 'string') return { error: 'pattern is required' };
370
+ const re = globToRegex(pattern);
371
+ const results = [];
372
+ for (const filePath of walkDir(projectDir)) {
373
+ if (results.length >= MAX_LIST_RESULTS) break;
374
+ const rel = path.relative(projectDir, filePath);
375
+ if (re.test(rel)) results.push(rel);
376
+ }
377
+ return { files: results, count: results.length };
378
+ }
379
+
380
+ function toolSearchInFiles(input, projectDir) {
381
+ const { pattern, glob: globPattern } = input;
382
+ if (!pattern || typeof pattern !== 'string') return { error: 'pattern is required' };
383
+ let re;
384
+ try { re = new RegExp(pattern, 'g'); } catch (e) { return { error: `Invalid regex: ${e.message}` }; }
385
+ const fileFilter = globPattern ? globToRegex(globPattern) : null;
386
+ const results = [];
387
+ for (const filePath of walkDir(projectDir)) {
388
+ if (results.length >= MAX_SEARCH_RESULTS) break;
389
+ const rel = path.relative(projectDir, filePath);
390
+ if (fileFilter && !fileFilter.test(rel)) continue;
391
+ const stat = fs.statSync(filePath);
392
+ if (stat.size > MAX_FILE_BYTES) continue;
393
+ const buf = fs.readFileSync(filePath);
394
+ if (buf.indexOf(0) !== -1) continue;
395
+ const lines = buf.toString('utf8').split('\n');
396
+ for (let i = 0; i < lines.length && results.length < MAX_SEARCH_RESULTS; i++) {
397
+ re.lastIndex = 0;
398
+ if (re.test(lines[i])) results.push({ file: rel, line: i + 1, content: lines[i].trim().slice(0, 200) });
399
+ }
400
+ }
401
+ return { matches: results, count: results.length };
402
+ }
403
+
404
+ function toolWriteFile(input, projectDir) {
405
+ const { path: inputPath, content } = input;
406
+ if (typeof content !== 'string') return { error: 'content must be a string' };
407
+ let resolved;
408
+ try { resolved = safePath(inputPath, projectDir); } catch (e) { return { error: e.message }; }
409
+ try {
410
+ fs.mkdirSync(path.dirname(resolved), { recursive: true });
411
+ fs.writeFileSync(resolved, content, 'utf8');
412
+ return { ok: true, path: inputPath, bytes: Buffer.byteLength(content) };
413
+ } catch (e) {
414
+ return { error: `Write failed: ${e.message}` };
415
+ }
416
+ }
417
+
418
+ function executeToolCall(name, input, projectDir, { allowWrites = false } = {}) {
419
+ switch (name) {
420
+ case 'read_file': return toolReadFile(input, projectDir);
421
+ case 'list_files': return toolListFiles(input, projectDir);
422
+ case 'search_in_files': return toolSearchInFiles(input, projectDir);
423
+ case 'write_file':
424
+ if (!allowWrites) return { error: 'Write access is disabled. Re-run with --allow-writes to enable file modifications.' };
425
+ return toolWriteFile(input, projectDir);
426
+ default:
427
+ return { error: `Unknown tool: ${name}` };
428
+ }
429
+ }
430
+
431
+ // ─── Tool Schemas ─────────────────────────────────────────────────────────────
432
+
433
+ const TOOL_DEFS = [
434
+ {
435
+ name: 'read_file',
436
+ description: 'Read the contents of a file in the project.',
437
+ params: {
438
+ type: 'object',
439
+ properties: {
440
+ path: { type: 'string', description: 'Relative path from the project root.' },
441
+ },
442
+ required: ['path'],
443
+ },
444
+ },
445
+ {
446
+ name: 'list_files',
447
+ description: 'List files matching a glob pattern (e.g. "src/**/*.js", "**/*.test.ts").',
448
+ params: {
449
+ type: 'object',
450
+ properties: {
451
+ pattern: { type: 'string', description: 'Glob pattern relative to the project root.' },
452
+ },
453
+ required: ['pattern'],
454
+ },
455
+ },
456
+ {
457
+ name: 'search_in_files',
458
+ description: 'Search for a regex pattern across source files. Returns matching lines with file and line number.',
459
+ params: {
460
+ type: 'object',
461
+ properties: {
462
+ pattern: { type: 'string', description: 'Regex pattern to search for.' },
463
+ glob: { type: 'string', description: 'Optional glob to restrict which files are searched.' },
464
+ },
465
+ required: ['pattern'],
466
+ },
467
+ },
468
+ {
469
+ name: 'write_file',
470
+ description: 'Write content to a file (creates parent directories as needed). Requires --allow-writes.',
471
+ params: {
472
+ type: 'object',
473
+ properties: {
474
+ path: { type: 'string', description: 'Relative path from the project root.' },
475
+ content: { type: 'string', description: 'Full file content to write.' },
476
+ },
477
+ required: ['path', 'content'],
478
+ },
479
+ },
480
+ ];
481
+
482
+ const TOOLS_ANTHROPIC = TOOL_DEFS.map(t => ({ name: t.name, description: t.description, input_schema: t.params }));
483
+ const TOOLS_OPENAI = TOOL_DEFS.map(t => ({ type: 'function', function: { name: t.name, description: t.description, parameters: t.params } }));
484
+ const TOOLS_GEMINI = TOOL_DEFS.map(t => ({ name: t.name, description: t.description, parameters: t.params }));
485
+
486
+ // ─── Shared Helpers ───────────────────────────────────────────────────────────
487
+
488
+ const MAX_TURNS = 60;
489
+ const MAX_OUTPUT_TOKENS = 8192;
490
+
491
+ function redact(text, apiKey) {
492
+ if (!apiKey || !text) return text || '';
493
+ return text.split(apiKey).join('[REDACTED]');
494
+ }
495
+
496
+ function logTool(name, input, verbose) {
497
+ if (!verbose) return;
498
+ const preview = JSON.stringify(input || {}).slice(0, 100);
499
+ process.stderr.write(` [tool: ${name}] ${preview}\n`);
500
+ }
501
+
502
+ // ─── Anthropic Agentic Loop ───────────────────────────────────────────────────
503
+
504
+ async function runAnthropicAudit({ systemPrompt, userMessage, apiKey, model, projectDir, allowWrites, verbose, writer }) {
505
+ const MODEL = model || 'claude-opus-4-6';
506
+ const URL = 'https://api.anthropic.com/v1/messages';
507
+ const headers = {
508
+ 'Content-Type': 'application/json',
509
+ 'x-api-key': apiKey,
510
+ 'anthropic-version': '2023-06-01',
511
+ };
512
+
513
+ const messages = [{ role: 'user', content: userMessage }];
514
+
515
+ for (let turn = 0; turn < MAX_TURNS; turn++) {
516
+ const res = await fetch(URL, {
517
+ method: 'POST',
518
+ headers,
519
+ body: JSON.stringify({
520
+ model: MODEL,
521
+ max_tokens: MAX_OUTPUT_TOKENS,
522
+ system: systemPrompt,
523
+ tools: TOOLS_ANTHROPIC,
524
+ messages,
525
+ }),
526
+ });
527
+
528
+ if (!res.ok) {
529
+ const raw = await res.text().catch(() => '');
530
+ throw new Error(`Anthropic returned ${res.status}: ${redact(raw, apiKey).slice(0, 300)}`);
531
+ }
532
+
533
+ const data = await res.json();
534
+ const { content, stop_reason } = data;
535
+
536
+ messages.push({ role: 'assistant', content });
537
+
538
+ for (const block of content) {
539
+ if (block.type === 'text' && block.text) writer(block.text);
540
+ }
541
+
542
+ if (stop_reason !== 'tool_use') break;
543
+
544
+ const toolResults = [];
545
+ for (const block of content) {
546
+ if (block.type !== 'tool_use') continue;
547
+ logTool(block.name, block.input, verbose);
548
+ const result = executeToolCall(block.name, block.input || {}, projectDir, { allowWrites });
549
+ toolResults.push({ type: 'tool_result', tool_use_id: block.id, content: JSON.stringify(result) });
550
+ }
551
+ messages.push({ role: 'user', content: toolResults });
552
+ }
553
+ }
554
+
555
+ // ─── OpenAI / OpenAI-Compatible Agentic Loop ──────────────────────────────────
556
+
557
+ async function runOpenAIAudit({ systemPrompt, userMessage, apiKey, model, baseUrl, projectDir, allowWrites, verbose, writer }) {
558
+ // Validate baseUrl to prevent SSRF — same guard as callProvider() in remediator.js (SEC-19/SEC-24).
559
+ // Only HTTPS is allowed for non-localhost hosts.
560
+ if (baseUrl) {
561
+ let parsed;
562
+ try { parsed = new URL(baseUrl); } catch {
563
+ throw new Error(`Invalid baseUrl "${baseUrl}" — must be a valid URL`);
564
+ }
565
+ const isLocalhost = ['localhost', '127.0.0.1', '::1'].includes(parsed.hostname);
566
+ if (parsed.protocol !== 'https:' && !isLocalhost) {
567
+ throw new Error(
568
+ `baseUrl must use HTTPS for non-localhost hosts (got "${parsed.protocol}//${parsed.hostname}"). ` +
569
+ 'Plain HTTP is only allowed for localhost.',
570
+ );
571
+ }
572
+ }
573
+
574
+ const MODEL = model || 'gpt-4o';
575
+ const base = (baseUrl || 'https://api.openai.com/v1').replace(/\/+$/, '');
576
+ const endpoint = `${base}/chat/completions`;
577
+ const headers = {
578
+ 'Content-Type': 'application/json',
579
+ 'Authorization': `Bearer ${apiKey}`,
580
+ };
581
+
582
+ const messages = [
583
+ { role: 'system', content: systemPrompt },
584
+ { role: 'user', content: userMessage },
585
+ ];
586
+
587
+ for (let turn = 0; turn < MAX_TURNS; turn++) {
588
+ const res = await fetch(endpoint, {
589
+ method: 'POST',
590
+ headers,
591
+ body: JSON.stringify({ model: MODEL, tools: TOOLS_OPENAI, messages, max_tokens: MAX_OUTPUT_TOKENS }),
592
+ });
593
+
594
+ if (!res.ok) {
595
+ const raw = await res.text().catch(() => '');
596
+ throw new Error(`OpenAI returned ${res.status}: ${redact(raw, apiKey).slice(0, 300)}`);
597
+ }
598
+
599
+ const data = await res.json();
600
+ const choice = data.choices?.[0];
601
+ if (!choice) throw new Error('Empty response from OpenAI');
602
+
603
+ const { message, finish_reason } = choice;
604
+ messages.push(message);
605
+
606
+ if (message.content) writer(message.content);
607
+ if (finish_reason === 'stop') break;
608
+
609
+ if (finish_reason === 'tool_calls' && message.tool_calls?.length) {
610
+ for (const tc of message.tool_calls) {
611
+ let input = {};
612
+ try { input = JSON.parse(tc.function.arguments || '{}'); } catch { /* malformed */ }
613
+ logTool(tc.function.name, input, verbose);
614
+ const result = executeToolCall(tc.function.name, input, projectDir, { allowWrites });
615
+ messages.push({ role: 'tool', tool_call_id: tc.id, content: JSON.stringify(result) });
616
+ }
617
+ }
618
+ }
619
+ }
620
+
621
+ // ─── Gemini Agentic Loop ──────────────────────────────────────────────────────
622
+
623
+ async function runGeminiAudit({ systemPrompt, userMessage, apiKey, model, projectDir, allowWrites, verbose, writer }) {
624
+ const MODEL = model || 'gemini-2.0-flash';
625
+ const URL = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(MODEL)}:generateContent`;
626
+ const headers = { 'Content-Type': 'application/json', 'x-goog-api-key': apiKey };
627
+
628
+ const contents = [{ role: 'user', parts: [{ text: userMessage }] }];
629
+
630
+ for (let turn = 0; turn < MAX_TURNS; turn++) {
631
+ const res = await fetch(URL, {
632
+ method: 'POST',
633
+ headers,
634
+ body: JSON.stringify({
635
+ contents,
636
+ systemInstruction: { parts: [{ text: systemPrompt }] },
637
+ tools: [{ functionDeclarations: TOOLS_GEMINI }],
638
+ }),
639
+ });
640
+
641
+ if (!res.ok) {
642
+ const raw = await res.text().catch(() => '');
643
+ throw new Error(`Gemini returned ${res.status}: ${redact(raw, apiKey).slice(0, 300)}`);
644
+ }
645
+
646
+ const data = await res.json();
647
+ const candidate = data.candidates?.[0];
648
+ if (!candidate) throw new Error('Empty response from Gemini');
649
+
650
+ const parts = candidate.content?.parts || [];
651
+ const finishReason = candidate.finishReason;
652
+
653
+ contents.push({ role: 'model', parts });
654
+
655
+ const functionCalls = parts.filter(p => p.functionCall);
656
+ for (const p of parts.filter(pp => pp.text)) writer(p.text);
657
+
658
+ if (finishReason === 'STOP' || functionCalls.length === 0) break;
659
+
660
+ const functionResponses = [];
661
+ for (const p of functionCalls) {
662
+ const { name, args } = p.functionCall;
663
+ logTool(name, args, verbose);
664
+ const result = executeToolCall(name, args || {}, projectDir, { allowWrites });
665
+ functionResponses.push({ functionResponse: { name, response: result } });
666
+ }
667
+ contents.push({ role: 'user', parts: functionResponses });
668
+ }
669
+ }
670
+
671
+ // ─── Fallback: single-shot for providers without tool use (e.g. ollama) ───────
672
+
673
+ async function runFallbackAudit({ systemPrompt, userMessage, provider, apiKey, model, baseUrl, projectDir, writer }) {
674
+ const { callProvider } = require('./remediator');
675
+ const { walkFiles } = require('./scanner');
676
+
677
+ process.stderr.write(' (provider does not support tool use — bundling project context for single-shot audit)\n\n');
678
+
679
+ const fileSamples = [];
680
+ for (const filePath of walkFiles(projectDir)) {
681
+ if (fileSamples.length >= 15) break;
682
+ try {
683
+ const stat = fs.statSync(filePath);
684
+ if (stat.size > 8000) continue;
685
+ const buf = fs.readFileSync(filePath);
686
+ if (buf.indexOf(0) !== -1) continue;
687
+ const rel = path.relative(projectDir, filePath);
688
+ fileSamples.push(`### ${rel}\n\`\`\`\n${buf.toString('utf8').slice(0, 3000)}\n\`\`\``);
689
+ } catch { /* skip */ }
690
+ }
691
+
692
+ const bundled = [systemPrompt, '\n\n## Project Files (sample)\n\n', fileSamples.join('\n\n'), '\n\n## Task\n\n', userMessage].join('');
693
+ const result = await callProvider(provider, apiKey, model, bundled, baseUrl);
694
+ writer(result);
695
+ }
696
+
697
+ // ─── Output Post-Processing ───────────────────────────────────────────────────
698
+
699
+ /**
700
+ * Given the buffered LLM text, extract the structured JSON report,
701
+ * wrap it in the standard envelope, and emit it via outputWriter.
702
+ *
703
+ * @param {string} buffered - raw LLM text accumulated by the writer
704
+ * @param {string} outputFormat - 'json' | 'sarif'
705
+ * @param {object} meta - { provider, model, scanOnly }
706
+ * @param {Function} [outputWriter] - where to send the final output
707
+ * (defaults to process.stdout.write)
708
+ */
709
+ function emitStructuredOutput(buffered, outputFormat, meta, outputWriter) {
710
+ const write = outputWriter || ((s) => process.stdout.write(s));
711
+ const { provider, model, scanOnly, depth } = meta;
712
+
713
+ const extracted = extractJsonBlock(buffered);
714
+ if (!extracted) {
715
+ process.stderr.write('\n⚠️ Could not extract a JSON report from the LLM response. Raw output:\n\n');
716
+ write(buffered + '\n');
717
+ return;
718
+ }
719
+ const envelope = aiToJson(extracted, { provider, model, scanOnly, depth });
720
+ write(JSON.stringify(outputFormat === 'sarif' ? aiToSarif(envelope) : envelope, null, 2) + '\n');
721
+ }
722
+
723
+ // ─── Main Entry Point ─────────────────────────────────────────────────────────
724
+
725
+ /**
726
+ * Run the TDD Audit against a project using an LLM with tool use.
727
+ *
728
+ * @param {object} opts
729
+ * @param {string} opts.projectDir - absolute path to the project being audited
730
+ * @param {string} opts.packageDir - absolute path to the tdd-audit package (where SKILL.md lives)
731
+ * @param {string} opts.provider - 'anthropic' | 'openai' | 'gemini' | 'ollama' | openai-compat
732
+ * @param {string} opts.apiKey - API key for the provider
733
+ * @param {string} [opts.model] - model override
734
+ * @param {string} [opts.baseUrl] - base URL override (OpenAI-compatible providers)
735
+ * @param {string} [opts.outputFormat]- 'text' (default) | 'json' | 'sarif'
736
+ * @param {boolean} [opts.scanOnly] - generate audit report only, no code changes
737
+ * @param {boolean} [opts.allowWrites] - allow the LLM to write files (default false)
738
+ * @param {boolean} [opts.verbose] - print tool call details to stderr
739
+ * @param {Function} [opts.onText] - called with each text chunk as the LLM streams output.
740
+ * When provided AND outputFormat is 'text', stdout is NOT
741
+ * written — the caller is responsible for all output.
742
+ * @param {Function} [opts.outputWriter]- replaces process.stdout.write for the final structured
743
+ * output when outputFormat is 'json' or 'sarif'.
744
+ * Useful for capturing output in plugin/server mode.
745
+ */
746
+ async function runAudit(opts) {
747
+ const {
748
+ projectDir,
749
+ packageDir,
750
+ provider,
751
+ apiKey,
752
+ model,
753
+ baseUrl,
754
+ outputFormat = 'text',
755
+ depth = 'tier-1',
756
+ findings = null, // pre-identified findings from a prior tier-3 report
757
+ verbose = false,
758
+ onText,
759
+ outputWriter,
760
+ } = opts;
761
+
762
+ if (!provider) throw new Error('No provider specified. Use --provider or set it in .tdd-audit.json');
763
+ if (!apiKey) throw new Error('No API key found. Use --api-key, set apiKey in .tdd-audit.json, or use apiKeyEnv');
764
+
765
+ // Depth-tier capability resolution:
766
+ // tier-4 → allowWrites=true, full audit (or targeted patch apply when findings supplied)
767
+ // tier-3 → read-only, full audit with copy-ready patch fields
768
+ // tier-2 → read-only, scan-only audit with rich fields
769
+ // tier-1 → read-only, scan-only audit, minimal fields
770
+ const allowWrites = opts.allowWrites || depth === 'tier-4';
771
+ const scanOnly = opts.scanOnly != null ? opts.scanOnly : (depth === 'tier-1' || depth === 'tier-2');
772
+
773
+ // Targeted apply: when findings are pre-supplied with depth=tier-4, skip scanning
774
+ // entirely and instruct the LLM to apply only those specific patches.
775
+ const isTargetedApply = depth === 'tier-4' && Array.isArray(findings) && findings.length > 0;
776
+
777
+ const suite = loadSkillSuite(packageDir);
778
+ const systemPrompt = buildSystemPrompt(suite);
779
+
780
+ // Compose user message
781
+ const structured = outputFormat !== 'text';
782
+ let lines;
783
+
784
+ if (isTargetedApply) {
785
+ // Targeted mode: no scan, apply only the supplied patches
786
+ lines = [
787
+ 'Apply the following pre-identified patch(es) to the project using write_file.',
788
+ 'Do NOT re-scan the codebase. Only work on the findings listed below.',
789
+ 'For each finding: read the current file, apply the patch, write the result back.',
790
+ '',
791
+ '## Findings to apply',
792
+ '',
793
+ JSON.stringify(findings, null, 2),
794
+ '',
795
+ 'Track every patch attempt in the remediation array (status=fixed if written, skipped if not).',
796
+ ];
797
+ } else if (scanOnly) {
798
+ lines = [
799
+ 'Run the TDD Remediation Protocol Auto-Audit on this project in scan-only mode.',
800
+ 'Use the available tools to explore the codebase. Follow Phase 0 of the auto-audit protocol:',
801
+ ' 1. Detect the tech stack',
802
+ ' 2. Explore the architecture',
803
+ ' 3. Search for vulnerability patterns',
804
+ ' 4. Present the Audit Report (grouped by severity: CRITICAL / HIGH / MEDIUM / LOW)',
805
+ 'Stop after presenting the Audit Report. Do not write any files.',
806
+ ];
807
+ } else {
808
+ lines = [
809
+ 'Run the TDD Remediation Protocol Auto-Audit on this project.',
810
+ 'Use the available tools to explore the codebase, identify vulnerabilities, and follow the full protocol:',
811
+ ' Phase 0 — Detect stack, explore architecture, present Audit Report',
812
+ ' Phase 1–3 — Remediate each vulnerability using Red-Green-Refactor (one at a time, CRITICAL first)',
813
+ ' Phase 4 — Coverage gate (≥ 95%)',
814
+ ' Phase 5 — README badge',
815
+ ' Phase 6 — SECURITY.md',
816
+ allowWrites
817
+ ? 'You have permission to write files using the write_file tool.'
818
+ : 'File writes are disabled. Describe the changes you would make but do not call write_file.',
819
+ ];
820
+ }
821
+
822
+ if (structured) lines.push(buildJsonOutputInstruction(depth));
823
+
824
+ const userMessage = lines.join('\n');
825
+
826
+ const mode = isTargetedApply
827
+ ? `targeted-apply/${depth}(${findings.length})`
828
+ : scanOnly ? `scan-only/${depth}` : allowWrites ? `full/${depth}+writes` : `full/${depth}`;
829
+ process.stderr.write(`\n🤖 TDD Audit — provider: ${provider}, model: ${model || '(default)'}, mode: ${mode}, format: ${outputFormat}\n\n`);
830
+
831
+ // Build the text writer.
832
+ // - When structured output is requested, always buffer for JSON extraction.
833
+ // - When onText is provided, forward each chunk to the caller (for SSE streaming etc.).
834
+ // - When neither structured nor onText, write directly to stdout (CLI text mode).
835
+ const buffer = [];
836
+ const writer = (text) => {
837
+ if (structured) buffer.push(text);
838
+ if (onText) onText(text);
839
+ else if (!structured) process.stdout.write(text);
840
+ };
841
+
842
+ const ctx = { systemPrompt, userMessage, apiKey, model, baseUrl, projectDir, allowWrites, verbose, writer };
843
+
844
+ switch (provider) {
845
+ case 'anthropic': await runAnthropicAudit(ctx); break;
846
+ case 'openai': await runOpenAIAudit(ctx); break;
847
+ case 'gemini': await runGeminiAudit(ctx); break;
848
+ default:
849
+ // OpenAI-compatible providers (groq, together, openrouter, etc.) go through the OpenAI loop
850
+ if (baseUrl) await runOpenAIAudit(ctx);
851
+ else await runFallbackAudit({ ...ctx, provider });
852
+ }
853
+
854
+ if (structured) {
855
+ emitStructuredOutput(buffer.join(''), outputFormat, { provider, model, scanOnly, depth }, outputWriter);
856
+ }
857
+
858
+ process.stderr.write('\n✅ Audit complete.\n');
859
+ }
860
+
861
+ module.exports = {
862
+ runAudit,
863
+ loadSkillSuite,
864
+ buildSystemPrompt,
865
+ buildJsonOutputInstruction,
866
+ DEPTH_JSON_INSTRUCTIONS,
867
+ executeToolCall,
868
+ extractJsonBlock,
869
+ aiToJson,
870
+ aiToSarif,
871
+ TOOLS_ANTHROPIC,
872
+ TOOLS_OPENAI,
873
+ TOOLS_GEMINI,
874
+ // exported for tests
875
+ safePath,
876
+ toolReadFile,
877
+ toolListFiles,
878
+ toolSearchInFiles,
879
+ toolWriteFile,
880
+ };