kekkai-cli 1.0.5__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. kekkai/cli.py +789 -19
  2. kekkai/compliance/__init__.py +68 -0
  3. kekkai/compliance/hipaa.py +235 -0
  4. kekkai/compliance/mappings.py +136 -0
  5. kekkai/compliance/owasp.py +517 -0
  6. kekkai/compliance/owasp_agentic.py +267 -0
  7. kekkai/compliance/pci_dss.py +205 -0
  8. kekkai/compliance/soc2.py +209 -0
  9. kekkai/dojo.py +91 -14
  10. kekkai/dojo_import.py +9 -1
  11. kekkai/fix/__init__.py +47 -0
  12. kekkai/fix/audit.py +278 -0
  13. kekkai/fix/differ.py +427 -0
  14. kekkai/fix/engine.py +500 -0
  15. kekkai/fix/prompts.py +251 -0
  16. kekkai/output.py +10 -12
  17. kekkai/report/__init__.py +41 -0
  18. kekkai/report/compliance_matrix.py +98 -0
  19. kekkai/report/generator.py +365 -0
  20. kekkai/report/html.py +69 -0
  21. kekkai/report/pdf.py +63 -0
  22. kekkai/report/unified.py +226 -0
  23. kekkai/scanners/container.py +33 -3
  24. kekkai/scanners/gitleaks.py +3 -1
  25. kekkai/scanners/semgrep.py +1 -1
  26. kekkai/scanners/trivy.py +1 -1
  27. kekkai/threatflow/model_adapter.py +143 -1
  28. kekkai/triage/__init__.py +54 -1
  29. kekkai/triage/loader.py +196 -0
  30. kekkai_cli-1.1.1.dist-info/METADATA +379 -0
  31. {kekkai_cli-1.0.5.dist-info → kekkai_cli-1.1.1.dist-info}/RECORD +34 -33
  32. {kekkai_cli-1.0.5.dist-info → kekkai_cli-1.1.1.dist-info}/entry_points.txt +0 -1
  33. {kekkai_cli-1.0.5.dist-info → kekkai_cli-1.1.1.dist-info}/top_level.txt +0 -1
  34. kekkai_cli-1.0.5.dist-info/METADATA +0 -135
  35. portal/__init__.py +0 -19
  36. portal/api.py +0 -155
  37. portal/auth.py +0 -103
  38. portal/enterprise/__init__.py +0 -32
  39. portal/enterprise/audit.py +0 -435
  40. portal/enterprise/licensing.py +0 -342
  41. portal/enterprise/rbac.py +0 -276
  42. portal/enterprise/saml.py +0 -595
  43. portal/ops/__init__.py +0 -53
  44. portal/ops/backup.py +0 -553
  45. portal/ops/log_shipper.py +0 -469
  46. portal/ops/monitoring.py +0 -517
  47. portal/ops/restore.py +0 -469
  48. portal/ops/secrets.py +0 -408
  49. portal/ops/upgrade.py +0 -591
  50. portal/tenants.py +0 -340
  51. portal/uploads.py +0 -259
  52. portal/web.py +0 -384
  53. {kekkai_cli-1.0.5.dist-info → kekkai_cli-1.1.1.dist-info}/WHEEL +0 -0
kekkai/fix/engine.py ADDED
@@ -0,0 +1,500 @@
1
+ """Core fix engine for AI-powered code remediation.
2
+
3
+ Orchestrates the finding → prompt → LLM → diff → apply workflow.
4
+
5
+ Security considerations:
6
+ - All inputs sanitized before LLM processing
7
+ - Preview mode default (no auto-apply)
8
+ - Audit logging for all operations
9
+ - Rate limiting on API calls
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import logging
16
+ from dataclasses import dataclass, field
17
+ from pathlib import Path
18
+ from typing import TYPE_CHECKING
19
+
20
+ from ..scanners.base import Finding, Severity
21
+ from ..threatflow.model_adapter import (
22
+ ModelAdapter,
23
+ ModelConfig,
24
+ create_adapter,
25
+ )
26
+ from ..threatflow.sanitizer import SanitizeConfig, TieredSanitizer
27
+ from .audit import FixAuditLog, create_session_id
28
+ from .differ import ApplyResult, DiffApplier, DiffParser, ParsedDiff
29
+ from .prompts import FixPromptBuilder
30
+
31
+ if TYPE_CHECKING:
32
+ pass
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ @dataclass
38
+ class FixConfig:
39
+ """Configuration for the fix engine."""
40
+
41
+ model_mode: str = "local" # local, openai, anthropic, mock
42
+ model_path: str | None = None
43
+ api_key: str | None = None
44
+ model_name: str | None = None
45
+ timeout_seconds: int = 120
46
+ max_fixes: int = 10
47
+ context_lines: int = 10
48
+ dry_run: bool = True
49
+ create_backups: bool = True
50
+ sanitize_input: bool = True
51
+ rate_limit_seconds: float = 1.0
52
+
53
+
54
+ @dataclass
55
+ class FixSuggestion:
56
+ """A fix suggestion from the LLM."""
57
+
58
+ finding: Finding
59
+ diff: ParsedDiff
60
+ raw_response: str
61
+ preview: str
62
+ success: bool
63
+ error: str | None = None
64
+
65
+
66
+ @dataclass
67
+ class FixResult:
68
+ """Result of a fix operation."""
69
+
70
+ success: bool
71
+ findings_processed: int = 0
72
+ fixes_generated: int = 0
73
+ fixes_applied: int = 0
74
+ suggestions: list[FixSuggestion] = field(default_factory=list)
75
+ apply_results: list[ApplyResult] = field(default_factory=list)
76
+ audit_log_path: Path | None = None
77
+ error: str | None = None
78
+ warnings: list[str] = field(default_factory=list)
79
+
80
+
81
+ class FixEngine:
82
+ """AI-powered code remediation engine.
83
+
84
+ Workflow:
85
+ 1. Load findings from scan results
86
+ 2. For each finding:
87
+ a. Extract code context from source file
88
+ b. Build fix prompt with sanitized content
89
+ c. Query LLM for fix suggestion
90
+ d. Parse diff from LLM response
91
+ e. Validate and optionally apply diff
92
+ 3. Generate audit log
93
+ """
94
+
95
+ def __init__(self, config: FixConfig | None = None) -> None:
96
+ self.config = config or FixConfig()
97
+ self._prompt_builder = FixPromptBuilder(context_lines=self.config.context_lines)
98
+ self._diff_parser = DiffParser()
99
+ self._diff_applier = DiffApplier()
100
+ self._sanitizer = TieredSanitizer(SanitizeConfig(strict_mode=False))
101
+ self._model: ModelAdapter | None = None
102
+
103
+ def _get_model(self) -> ModelAdapter:
104
+ """Get or create the model adapter."""
105
+ if self._model is None:
106
+ self._model = create_adapter(
107
+ mode=self.config.model_mode,
108
+ config=ModelConfig(
109
+ timeout_seconds=self.config.timeout_seconds,
110
+ model_path=self.config.model_path,
111
+ api_key=self.config.api_key,
112
+ model_name=self.config.model_name,
113
+ ),
114
+ )
115
+ return self._model
116
+
117
+ def fix(
118
+ self,
119
+ findings: list[Finding],
120
+ repo_path: Path,
121
+ output_dir: Path | None = None,
122
+ ) -> FixResult:
123
+ """Generate and optionally apply fixes for findings.
124
+
125
+ Args:
126
+ findings: List of findings from scan results
127
+ repo_path: Path to the repository root
128
+ output_dir: Optional output directory for diffs and audit log
129
+
130
+ Returns:
131
+ FixResult with details of all operations
132
+ """
133
+ if not findings:
134
+ return FixResult(success=True, warnings=["No findings to fix"])
135
+
136
+ # Filter to fixable findings (Semgrep only for now)
137
+ fixable = [f for f in findings if f.scanner == "semgrep" and f.file_path]
138
+ if not fixable:
139
+ return FixResult(
140
+ success=True,
141
+ warnings=["No Semgrep findings with file paths found"],
142
+ )
143
+
144
+ # Limit to max_fixes
145
+ to_fix = fixable[: self.config.max_fixes]
146
+ if len(fixable) > self.config.max_fixes:
147
+ logger.warning(
148
+ "fix_limit_reached",
149
+ extra={"total": len(fixable), "limit": self.config.max_fixes},
150
+ )
151
+
152
+ # Initialize audit log
153
+ audit_log = FixAuditLog(
154
+ session_id=create_session_id(),
155
+ repo_path=str(repo_path),
156
+ model_mode=self.config.model_mode,
157
+ )
158
+
159
+ if output_dir:
160
+ output_dir.mkdir(parents=True, exist_ok=True)
161
+ audit_log.set_output_path(output_dir / "fix-audit.json")
162
+
163
+ # Process findings
164
+ suggestions: list[FixSuggestion] = []
165
+ apply_results: list[ApplyResult] = []
166
+ warnings: list[str] = []
167
+
168
+ model = self._get_model()
169
+
170
+ for finding in to_fix:
171
+ # Record attempt
172
+ attempt = audit_log.record_attempt(
173
+ finding_id=finding.dedupe_hash(),
174
+ rule_id=finding.rule_id or "unknown",
175
+ file_path=finding.file_path or "",
176
+ line_number=finding.line or 0,
177
+ severity=finding.severity.value,
178
+ model_used=model.name,
179
+ )
180
+
181
+ # Generate fix
182
+ suggestion = self._generate_fix(finding, repo_path, model)
183
+ suggestions.append(suggestion)
184
+
185
+ if not suggestion.success:
186
+ audit_log.mark_failed(attempt, suggestion.error or "Unknown error")
187
+ err_msg = f"Failed to fix {finding.file_path}:{finding.line}: {suggestion.error}"
188
+ warnings.append(err_msg)
189
+ continue
190
+
191
+ # Update attempt with diff preview
192
+ audit_log.update_attempt(
193
+ attempt,
194
+ status="approved" if not self.config.dry_run else "pending",
195
+ diff_preview=suggestion.preview,
196
+ )
197
+
198
+ # Apply if not dry run
199
+ if not self.config.dry_run:
200
+ result = self._diff_applier.apply(
201
+ suggestion.diff,
202
+ repo_path,
203
+ dry_run=False,
204
+ create_backup=self.config.create_backups,
205
+ )
206
+ apply_results.append(result)
207
+
208
+ if result.success:
209
+ audit_log.mark_applied(
210
+ attempt,
211
+ lines_added=result.lines_added,
212
+ lines_removed=result.lines_removed,
213
+ backup_path=result.backup_path,
214
+ )
215
+ else:
216
+ audit_log.mark_failed(attempt, result.error or "Apply failed")
217
+ warnings.append(f"Failed to apply fix: {result.error}")
218
+
219
+ # Save audit log
220
+ audit_log_path = None
221
+ if output_dir:
222
+ audit_log_path = output_dir / "fix-audit.json"
223
+ audit_log.save(audit_log_path)
224
+
225
+ return FixResult(
226
+ success=True,
227
+ findings_processed=len(to_fix),
228
+ fixes_generated=len([s for s in suggestions if s.success]),
229
+ fixes_applied=len([r for r in apply_results if r.success]),
230
+ suggestions=suggestions,
231
+ apply_results=apply_results,
232
+ audit_log_path=audit_log_path,
233
+ warnings=warnings,
234
+ )
235
+
236
+ def _generate_fix(
237
+ self,
238
+ finding: Finding,
239
+ repo_path: Path,
240
+ model: ModelAdapter,
241
+ ) -> FixSuggestion:
242
+ """Generate a fix suggestion for a single finding."""
243
+ file_path = finding.file_path
244
+ if not file_path:
245
+ return FixSuggestion(
246
+ finding=finding,
247
+ diff=ParsedDiff("", ""),
248
+ raw_response="",
249
+ preview="",
250
+ success=False,
251
+ error="No file path in finding",
252
+ )
253
+
254
+ # Resolve full path
255
+ full_path = (repo_path / file_path).resolve()
256
+ if not full_path.exists():
257
+ return FixSuggestion(
258
+ finding=finding,
259
+ diff=ParsedDiff("", ""),
260
+ raw_response="",
261
+ preview="",
262
+ success=False,
263
+ error=f"File not found: {file_path}",
264
+ )
265
+
266
+ # Read file content
267
+ try:
268
+ file_content = full_path.read_text()
269
+ except (OSError, UnicodeDecodeError) as e:
270
+ return FixSuggestion(
271
+ finding=finding,
272
+ diff=ParsedDiff("", ""),
273
+ raw_response="",
274
+ preview="",
275
+ success=False,
276
+ error=f"Cannot read file: {e}",
277
+ )
278
+
279
+ # Extract code context
280
+ line_num = finding.line or 1
281
+ code_context, vulnerable_line = self._prompt_builder.extract_code_context(
282
+ file_content, line_num
283
+ )
284
+
285
+ # Sanitize content if enabled
286
+ if self.config.sanitize_input:
287
+ sanitize_result = self._sanitizer.sanitize_input(code_context, file_path)
288
+ if sanitize_result.blocked:
289
+ return FixSuggestion(
290
+ finding=finding,
291
+ diff=ParsedDiff("", ""),
292
+ raw_response="",
293
+ preview="",
294
+ success=False,
295
+ error=f"Content blocked by sanitizer: {sanitize_result.block_reason}",
296
+ )
297
+ code_context = sanitize_result.sanitized
298
+
299
+ # Build CWE context if available
300
+ additional_context = ""
301
+ if finding.cwe:
302
+ additional_context = f"CWE: {finding.cwe}"
303
+
304
+ # Build prompt
305
+ system_prompt = self._prompt_builder.build_system_prompt()
306
+ user_prompt = self._prompt_builder.build_fix_prompt(
307
+ rule_id=finding.rule_id or "",
308
+ severity=finding.severity.value,
309
+ title=finding.title,
310
+ description=finding.description,
311
+ file_path=file_path,
312
+ line_number=line_num,
313
+ code_context=code_context,
314
+ vulnerable_line=vulnerable_line,
315
+ additional_context=additional_context,
316
+ )
317
+
318
+ # Query LLM
319
+ try:
320
+ response = model.generate(
321
+ system_prompt=system_prompt,
322
+ user_prompt=user_prompt,
323
+ config=ModelConfig(
324
+ timeout_seconds=self.config.timeout_seconds,
325
+ max_tokens=2048,
326
+ ),
327
+ )
328
+ except Exception as e:
329
+ return FixSuggestion(
330
+ finding=finding,
331
+ diff=ParsedDiff("", ""),
332
+ raw_response="",
333
+ preview="",
334
+ success=False,
335
+ error=f"LLM error: {e}",
336
+ )
337
+
338
+ if not response.success:
339
+ return FixSuggestion(
340
+ finding=finding,
341
+ diff=ParsedDiff("", ""),
342
+ raw_response=response.content,
343
+ preview="",
344
+ success=False,
345
+ error="LLM returned empty response",
346
+ )
347
+
348
+ # Parse diff from response
349
+ diff = self._diff_parser.parse(response.content)
350
+
351
+ # If no diff parsed, try to use the content as-is
352
+ if not diff.is_valid:
353
+ return FixSuggestion(
354
+ finding=finding,
355
+ diff=diff,
356
+ raw_response=response.content,
357
+ preview=response.content[:500],
358
+ success=False,
359
+ error="Could not parse valid diff from LLM response",
360
+ )
361
+
362
+ # Generate preview
363
+ preview = self._diff_applier.preview(diff, repo_path)
364
+
365
+ return FixSuggestion(
366
+ finding=finding,
367
+ diff=diff,
368
+ raw_response=response.content,
369
+ preview=preview,
370
+ success=True,
371
+ )
372
+
373
+ def fix_from_scan_results(
374
+ self,
375
+ scan_results_path: Path,
376
+ repo_path: Path,
377
+ output_dir: Path | None = None,
378
+ ) -> FixResult:
379
+ """Generate fixes from a scan results JSON file.
380
+
381
+ Args:
382
+ scan_results_path: Path to scan results JSON
383
+ repo_path: Path to the repository root
384
+ output_dir: Optional output directory for diffs and audit log
385
+
386
+ Returns:
387
+ FixResult with details of all operations
388
+ """
389
+ # Parse scan results
390
+ try:
391
+ findings = self._load_findings(scan_results_path)
392
+ except (OSError, json.JSONDecodeError, KeyError) as e:
393
+ return FixResult(
394
+ success=False,
395
+ error=f"Failed to load scan results: {e}",
396
+ )
397
+
398
+ return self.fix(findings, repo_path, output_dir)
399
+
400
+ def _load_findings(self, path: Path) -> list[Finding]:
401
+ """Load findings from a scan results JSON file."""
402
+ data = json.loads(path.read_text())
403
+
404
+ # Handle different JSON formats
405
+ findings: list[Finding] = []
406
+
407
+ # Try Semgrep format
408
+ if "results" in data:
409
+ for result in data["results"]:
410
+ findings.append(self._parse_semgrep_result(result))
411
+ # Try Kekkai unified format
412
+ elif "findings" in data:
413
+ for f in data["findings"]:
414
+ findings.append(self._parse_unified_finding(f))
415
+
416
+ return findings
417
+
418
+ def _parse_semgrep_result(self, result: dict[str, object]) -> Finding:
419
+ """Parse a Semgrep result into a Finding."""
420
+ extra = result.get("extra", {})
421
+ if not isinstance(extra, dict):
422
+ extra = {}
423
+ metadata = extra.get("metadata", {})
424
+ if not isinstance(metadata, dict):
425
+ metadata = {}
426
+
427
+ severity_str = extra.get("severity", "warning")
428
+ if severity_str == "ERROR":
429
+ severity = Severity.HIGH
430
+ elif severity_str == "WARNING":
431
+ severity = Severity.MEDIUM
432
+ else:
433
+ severity = Severity.from_string(str(severity_str))
434
+
435
+ cwe_list = metadata.get("cwe", [])
436
+ if not isinstance(cwe_list, list):
437
+ cwe_list = []
438
+ cwe = str(cwe_list[0]) if cwe_list else None
439
+
440
+ start = result.get("start", {})
441
+ if not isinstance(start, dict):
442
+ start = {}
443
+
444
+ return Finding(
445
+ scanner="semgrep",
446
+ title=str(metadata.get("message") or result.get("check_id", "Semgrep finding")),
447
+ severity=severity,
448
+ description=str(extra.get("message", "")),
449
+ file_path=str(result.get("path", "")),
450
+ line=int(start.get("line", 0)) if start.get("line") else None,
451
+ rule_id=str(result.get("check_id", "")),
452
+ cwe=cwe,
453
+ )
454
+
455
+ def _parse_unified_finding(self, data: dict[str, object]) -> Finding:
456
+ """Parse a unified format finding."""
457
+ line_val = data.get("line")
458
+ return Finding(
459
+ scanner=str(data.get("scanner", "unknown")),
460
+ title=str(data.get("title", "")),
461
+ severity=Severity.from_string(str(data.get("severity", "unknown"))),
462
+ description=str(data.get("description", "")),
463
+ file_path=str(data.get("file_path")) if data.get("file_path") else None,
464
+ line=int(str(line_val)) if line_val else None,
465
+ rule_id=str(data.get("rule_id")) if data.get("rule_id") else None,
466
+ cwe=str(data.get("cwe")) if data.get("cwe") else None,
467
+ )
468
+
469
+
470
+ def create_fix_engine(
471
+ model_mode: str = "local",
472
+ dry_run: bool = True,
473
+ **kwargs: object,
474
+ ) -> FixEngine:
475
+ """Create a configured fix engine.
476
+
477
+ Args:
478
+ model_mode: LLM backend (local, openai, anthropic, mock)
479
+ dry_run: If True, don't apply fixes
480
+ **kwargs: Additional config options
481
+
482
+ Returns:
483
+ Configured FixEngine instance
484
+ """
485
+ timeout = kwargs.get("timeout_seconds", 120)
486
+ max_fixes_val = kwargs.get("max_fixes", 10)
487
+ context_val = kwargs.get("context_lines", 10)
488
+ config = FixConfig(
489
+ model_mode=model_mode,
490
+ dry_run=dry_run,
491
+ model_path=str(kwargs.get("model_path")) if kwargs.get("model_path") else None,
492
+ api_key=str(kwargs.get("api_key")) if kwargs.get("api_key") else None,
493
+ model_name=str(kwargs.get("model_name")) if kwargs.get("model_name") else None,
494
+ timeout_seconds=int(str(timeout)) if timeout is not None else 120,
495
+ max_fixes=int(str(max_fixes_val)) if max_fixes_val is not None else 10,
496
+ context_lines=int(str(context_val)) if context_val is not None else 10,
497
+ create_backups=bool(kwargs.get("create_backups", True)),
498
+ sanitize_input=bool(kwargs.get("sanitize_input", True)),
499
+ )
500
+ return FixEngine(config)