kekkai-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. kekkai/__init__.py +7 -0
  2. kekkai/cli.py +1038 -0
  3. kekkai/config.py +403 -0
  4. kekkai/dojo.py +419 -0
  5. kekkai/dojo_import.py +213 -0
  6. kekkai/github/__init__.py +16 -0
  7. kekkai/github/commenter.py +198 -0
  8. kekkai/github/models.py +56 -0
  9. kekkai/github/sanitizer.py +112 -0
  10. kekkai/installer/__init__.py +39 -0
  11. kekkai/installer/errors.py +23 -0
  12. kekkai/installer/extract.py +161 -0
  13. kekkai/installer/manager.py +252 -0
  14. kekkai/installer/manifest.py +189 -0
  15. kekkai/installer/verify.py +86 -0
  16. kekkai/manifest.py +77 -0
  17. kekkai/output.py +218 -0
  18. kekkai/paths.py +46 -0
  19. kekkai/policy.py +326 -0
  20. kekkai/runner.py +70 -0
  21. kekkai/scanners/__init__.py +67 -0
  22. kekkai/scanners/backends/__init__.py +14 -0
  23. kekkai/scanners/backends/base.py +73 -0
  24. kekkai/scanners/backends/docker.py +178 -0
  25. kekkai/scanners/backends/native.py +240 -0
  26. kekkai/scanners/base.py +110 -0
  27. kekkai/scanners/container.py +144 -0
  28. kekkai/scanners/falco.py +237 -0
  29. kekkai/scanners/gitleaks.py +237 -0
  30. kekkai/scanners/semgrep.py +227 -0
  31. kekkai/scanners/trivy.py +246 -0
  32. kekkai/scanners/url_policy.py +163 -0
  33. kekkai/scanners/zap.py +340 -0
  34. kekkai/threatflow/__init__.py +94 -0
  35. kekkai/threatflow/artifacts.py +476 -0
  36. kekkai/threatflow/chunking.py +361 -0
  37. kekkai/threatflow/core.py +438 -0
  38. kekkai/threatflow/mermaid.py +374 -0
  39. kekkai/threatflow/model_adapter.py +491 -0
  40. kekkai/threatflow/prompts.py +277 -0
  41. kekkai/threatflow/redaction.py +228 -0
  42. kekkai/threatflow/sanitizer.py +643 -0
  43. kekkai/triage/__init__.py +33 -0
  44. kekkai/triage/app.py +168 -0
  45. kekkai/triage/audit.py +203 -0
  46. kekkai/triage/ignore.py +269 -0
  47. kekkai/triage/models.py +185 -0
  48. kekkai/triage/screens.py +341 -0
  49. kekkai/triage/widgets.py +169 -0
  50. kekkai_cli-1.0.0.dist-info/METADATA +135 -0
  51. kekkai_cli-1.0.0.dist-info/RECORD +90 -0
  52. kekkai_cli-1.0.0.dist-info/WHEEL +5 -0
  53. kekkai_cli-1.0.0.dist-info/entry_points.txt +3 -0
  54. kekkai_cli-1.0.0.dist-info/top_level.txt +3 -0
  55. kekkai_core/__init__.py +3 -0
  56. kekkai_core/ci/__init__.py +11 -0
  57. kekkai_core/ci/benchmarks.py +354 -0
  58. kekkai_core/ci/metadata.py +104 -0
  59. kekkai_core/ci/validators.py +92 -0
  60. kekkai_core/docker/__init__.py +17 -0
  61. kekkai_core/docker/metadata.py +153 -0
  62. kekkai_core/docker/sbom.py +173 -0
  63. kekkai_core/docker/security.py +158 -0
  64. kekkai_core/docker/signing.py +135 -0
  65. kekkai_core/redaction.py +84 -0
  66. kekkai_core/slsa/__init__.py +13 -0
  67. kekkai_core/slsa/verify.py +121 -0
  68. kekkai_core/windows/__init__.py +29 -0
  69. kekkai_core/windows/chocolatey.py +335 -0
  70. kekkai_core/windows/installer.py +256 -0
  71. kekkai_core/windows/scoop.py +165 -0
  72. kekkai_core/windows/validators.py +220 -0
  73. portal/__init__.py +19 -0
  74. portal/api.py +155 -0
  75. portal/auth.py +103 -0
  76. portal/enterprise/__init__.py +32 -0
  77. portal/enterprise/audit.py +435 -0
  78. portal/enterprise/licensing.py +342 -0
  79. portal/enterprise/rbac.py +276 -0
  80. portal/enterprise/saml.py +595 -0
  81. portal/ops/__init__.py +53 -0
  82. portal/ops/backup.py +553 -0
  83. portal/ops/log_shipper.py +469 -0
  84. portal/ops/monitoring.py +517 -0
  85. portal/ops/restore.py +469 -0
  86. portal/ops/secrets.py +408 -0
  87. portal/ops/upgrade.py +591 -0
  88. portal/tenants.py +340 -0
  89. portal/uploads.py +259 -0
  90. portal/web.py +384 -0
@@ -0,0 +1,438 @@
1
+ """ThreatFlow core orchestrator.
2
+
3
+ Main entry point for threat model generation that:
4
+ - Coordinates chunking, redaction, sanitization
5
+ - Manages LLM interactions
6
+ - Produces structured artifacts
7
+ - Enforces security controls
8
+
9
+ ASVS V16.5.1: Generic errors without exposing internals.
10
+ ASVS V13.1.3: Timeouts and resource limits.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ import os
17
+ import time
18
+ from dataclasses import dataclass, field
19
+ from datetime import UTC, datetime
20
+ from pathlib import Path
21
+ from typing import TYPE_CHECKING
22
+
23
+ from .artifacts import ArtifactGenerator, ThreatModelArtifacts
24
+ from .chunking import ChunkingConfig, ChunkingResult, chunk_files
25
+ from .model_adapter import (
26
+ ModelAdapter,
27
+ ModelConfig,
28
+ ModelResponse,
29
+ create_adapter,
30
+ )
31
+ from .prompts import PromptBuilder
32
+ from .redaction import ThreatFlowRedactor
33
+ from .sanitizer import Sanitizer
34
+
35
+ if TYPE_CHECKING:
36
+ pass
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ @dataclass
42
+ class ThreatFlowConfig:
43
+ """Configuration for ThreatFlow analysis."""
44
+
45
+ # Model settings
46
+ model_mode: str = "local" # local, openai, anthropic, mock
47
+ model_path: str | None = None
48
+ api_key: str | None = None
49
+ api_base: str | None = None
50
+ model_name: str | None = None
51
+
52
+ # Processing settings
53
+ max_tokens_per_chunk: int = 2000
54
+ max_files: int = 500
55
+ timeout_seconds: int = 300
56
+
57
+ # Output settings
58
+ output_dir: Path | None = None
59
+
60
+ # Security settings
61
+ redact_secrets: bool = True
62
+ sanitize_content: bool = True
63
+ warn_on_injection: bool = True
64
+
65
+ @classmethod
66
+ def from_env(cls) -> ThreatFlowConfig:
67
+ """Create config from environment variables."""
68
+ return cls(
69
+ model_mode=os.environ.get("KEKKAI_THREATFLOW_MODE", "local"),
70
+ model_path=os.environ.get("KEKKAI_THREATFLOW_MODEL_PATH"),
71
+ api_key=os.environ.get("KEKKAI_THREATFLOW_API_KEY"),
72
+ api_base=os.environ.get("KEKKAI_THREATFLOW_API_BASE"),
73
+ model_name=os.environ.get("KEKKAI_THREATFLOW_MODEL_NAME"),
74
+ )
75
+
76
+
77
+ @dataclass
78
+ class ThreatFlowResult:
79
+ """Result of ThreatFlow analysis."""
80
+
81
+ success: bool
82
+ artifacts: ThreatModelArtifacts | None = None
83
+ output_files: list[Path] = field(default_factory=list)
84
+ model_mode: str = "unknown"
85
+ duration_ms: int = 0
86
+ error: str | None = None
87
+ warnings: list[str] = field(default_factory=list)
88
+ injection_warnings: list[str] = field(default_factory=list)
89
+ files_processed: int = 0
90
+ files_skipped: int = 0
91
+
92
+ def to_dict(self) -> dict[str, object]:
93
+ """Convert to dictionary for JSON output."""
94
+ return {
95
+ "success": self.success,
96
+ "model_mode": self.model_mode,
97
+ "duration_ms": self.duration_ms,
98
+ "error": self.error,
99
+ "warnings": self.warnings,
100
+ "injection_warnings": self.injection_warnings,
101
+ "files_processed": self.files_processed,
102
+ "files_skipped": self.files_skipped,
103
+ "output_files": [str(p) for p in self.output_files],
104
+ }
105
+
106
+
107
+ class ThreatFlow:
108
+ """ThreatFlow threat model generator.
109
+
110
+ Security-first design:
111
+ - Never executes repository code
112
+ - Redacts secrets before LLM processing
113
+ - Defends against prompt injection
114
+ - Local model by default
115
+ """
116
+
117
+ def __init__(
118
+ self,
119
+ config: ThreatFlowConfig | None = None,
120
+ adapter: ModelAdapter | None = None,
121
+ ) -> None:
122
+ self.config = config or ThreatFlowConfig.from_env()
123
+
124
+ # Initialize adapter
125
+ if adapter:
126
+ self._adapter = adapter
127
+ else:
128
+ model_config = ModelConfig(
129
+ model_path=self.config.model_path,
130
+ api_key=self.config.api_key,
131
+ api_base=self.config.api_base,
132
+ model_name=self.config.model_name,
133
+ timeout_seconds=self.config.timeout_seconds,
134
+ )
135
+ self._adapter = create_adapter(self.config.model_mode, model_config)
136
+
137
+ # Initialize security components
138
+ self._redactor = ThreatFlowRedactor()
139
+ self._sanitizer = Sanitizer()
140
+ self._prompt_builder = PromptBuilder()
141
+
142
+ @property
143
+ def model_mode(self) -> str:
144
+ """Get the current model mode."""
145
+ return self.config.model_mode
146
+
147
+ @property
148
+ def is_local(self) -> bool:
149
+ """Check if using local model."""
150
+ return self._adapter.is_local
151
+
152
+ def analyze(
153
+ self,
154
+ repo_path: Path,
155
+ output_dir: Path | None = None,
156
+ ) -> ThreatFlowResult:
157
+ """Analyze a repository and generate threat model.
158
+
159
+ Args:
160
+ repo_path: Path to the repository to analyze
161
+ output_dir: Directory for output artifacts
162
+
163
+ Returns:
164
+ ThreatFlowResult with artifacts and metadata
165
+ """
166
+ start_time = time.time()
167
+ warnings: list[str] = []
168
+ injection_warnings: list[str] = []
169
+
170
+ # Validate input
171
+ repo_path = Path(repo_path).resolve()
172
+ if not repo_path.exists():
173
+ return ThreatFlowResult(
174
+ success=False,
175
+ error="Repository path does not exist",
176
+ model_mode=self.config.model_mode,
177
+ )
178
+
179
+ if not repo_path.is_dir():
180
+ return ThreatFlowResult(
181
+ success=False,
182
+ error="Repository path is not a directory",
183
+ model_mode=self.config.model_mode,
184
+ )
185
+
186
+ # Determine output directory
187
+ out_dir = output_dir or self.config.output_dir
188
+ if out_dir is None:
189
+ out_dir = repo_path / ".threatflow"
190
+ out_dir = Path(out_dir)
191
+
192
+ # Warn if using remote API
193
+ if not self._adapter.is_local:
194
+ msg = (
195
+ f"WARNING: Using remote API ({self._adapter.name}). "
196
+ "Code content will be sent to external service."
197
+ )
198
+ logger.warning(msg)
199
+ warnings.append(msg)
200
+
201
+ try:
202
+ # Step 1: Chunk repository files
203
+ logger.info("Chunking repository files...")
204
+ chunking_config = ChunkingConfig(
205
+ max_tokens_per_chunk=self.config.max_tokens_per_chunk,
206
+ max_files=self.config.max_files,
207
+ )
208
+ chunk_result = chunk_files(repo_path, chunking_config)
209
+ warnings.extend(chunk_result.warnings)
210
+
211
+ if not chunk_result.chunks:
212
+ return ThreatFlowResult(
213
+ success=False,
214
+ error="No files to analyze in repository",
215
+ model_mode=self.config.model_mode,
216
+ files_skipped=len(chunk_result.skipped_files),
217
+ )
218
+
219
+ logger.info(
220
+ "Processed %d files into %d chunks",
221
+ chunk_result.total_files_processed,
222
+ len(chunk_result.chunks),
223
+ )
224
+
225
+ # Step 2: Prepare content with security processing
226
+ logger.info("Processing content with security controls...")
227
+ processed_content, proc_warnings = self._process_content(chunk_result)
228
+ injection_warnings.extend(proc_warnings)
229
+
230
+ # Step 3: Generate data flow analysis
231
+ logger.info("Generating data flow analysis...")
232
+ dataflow_response = self._generate_dataflow(processed_content)
233
+ if not dataflow_response.success:
234
+ return ThreatFlowResult(
235
+ success=False,
236
+ error="Failed to generate data flow analysis",
237
+ model_mode=self.config.model_mode,
238
+ duration_ms=int((time.time() - start_time) * 1000),
239
+ )
240
+
241
+ # Step 4: Generate threat analysis
242
+ logger.info("Generating threat analysis...")
243
+ threats_response = self._generate_threats(dataflow_response.content, processed_content)
244
+ if not threats_response.success:
245
+ return ThreatFlowResult(
246
+ success=False,
247
+ error="Failed to generate threat analysis",
248
+ model_mode=self.config.model_mode,
249
+ duration_ms=int((time.time() - start_time) * 1000),
250
+ )
251
+
252
+ # Step 5: Build artifacts
253
+ logger.info("Building artifacts...")
254
+ artifacts = self._build_artifacts(
255
+ repo_path=repo_path,
256
+ chunk_result=chunk_result,
257
+ dataflow_output=dataflow_response.content,
258
+ threats_output=threats_response.content,
259
+ )
260
+
261
+ # Step 6: Write output files
262
+ logger.info("Writing output files...")
263
+ generator = ArtifactGenerator(
264
+ output_dir=out_dir,
265
+ repo_name=repo_path.name,
266
+ )
267
+ output_files = generator.write_artifacts(artifacts)
268
+
269
+ duration_ms = int((time.time() - start_time) * 1000)
270
+
271
+ return ThreatFlowResult(
272
+ success=True,
273
+ artifacts=artifacts,
274
+ output_files=output_files,
275
+ model_mode=self.config.model_mode,
276
+ duration_ms=duration_ms,
277
+ warnings=warnings,
278
+ injection_warnings=injection_warnings,
279
+ files_processed=chunk_result.total_files_processed,
280
+ files_skipped=len(chunk_result.skipped_files),
281
+ )
282
+
283
+ except Exception:
284
+ # ASVS V16.5.1: Generic error messages
285
+ logger.exception("ThreatFlow analysis failed")
286
+ return ThreatFlowResult(
287
+ success=False,
288
+ error="Analysis failed. Check logs for details.",
289
+ model_mode=self.config.model_mode,
290
+ duration_ms=int((time.time() - start_time) * 1000),
291
+ warnings=warnings,
292
+ )
293
+
294
+ def _process_content(self, chunk_result: ChunkingResult) -> tuple[str, list[str]]:
295
+ """Process chunked content with redaction and sanitization.
296
+
297
+ Returns:
298
+ Tuple of (processed_content, injection_warnings)
299
+ """
300
+ warnings: list[str] = []
301
+ chunks_data: list[tuple[str, str, int, int]] = []
302
+
303
+ for chunk in chunk_result.chunks:
304
+ content = chunk.content
305
+
306
+ # Step 1: Redact secrets
307
+ if self.config.redact_secrets:
308
+ secrets_found = self._redactor.detect_secrets(content)
309
+ if secrets_found:
310
+ logger.info(
311
+ "Redacting secrets in %s: %s",
312
+ chunk.file_path,
313
+ [s[0] for s in secrets_found],
314
+ )
315
+ content = self._redactor.redact(content)
316
+
317
+ # Step 2: Sanitize for prompt injection
318
+ if self.config.sanitize_content:
319
+ result = self._sanitizer.sanitize(content)
320
+ if result.injections_found and self.config.warn_on_injection:
321
+ for name, risk, desc in result.injections_found:
322
+ msg = (
323
+ f"Injection pattern in {chunk.file_path}: "
324
+ f"{name} ({risk.value}) - {desc}"
325
+ )
326
+ warnings.append(msg)
327
+ logger.warning(msg)
328
+ content = result.sanitized
329
+
330
+ chunks_data.append((chunk.file_path, content, chunk.start_line, chunk.end_line))
331
+
332
+ # Format all chunks
333
+ formatted = self._prompt_builder.format_code_chunks(chunks_data)
334
+ return formatted, warnings
335
+
336
+ def _generate_dataflow(self, content: str) -> ModelResponse:
337
+ """Generate data flow analysis from content."""
338
+ system_prompt = self._prompt_builder.build_system_prompt()
339
+ user_prompt = self._prompt_builder.build_dataflow_prompt(content)
340
+
341
+ wrapped_content = self._sanitizer.wrap_content(user_prompt, "dataflow_analysis")
342
+
343
+ result: ModelResponse = self._adapter.generate(
344
+ system_prompt=system_prompt,
345
+ user_prompt=wrapped_content,
346
+ config=ModelConfig(timeout_seconds=self.config.timeout_seconds),
347
+ )
348
+ return result
349
+
350
+ def _generate_threats(self, dataflow_content: str, code_context: str) -> ModelResponse:
351
+ """Generate threat analysis from dataflow and code."""
352
+ system_prompt = self._prompt_builder.build_system_prompt()
353
+ user_prompt = self._prompt_builder.build_threats_prompt(
354
+ dataflow_content=dataflow_content,
355
+ code_context=code_context,
356
+ )
357
+
358
+ wrapped_content = self._sanitizer.wrap_content(user_prompt, "threat_analysis")
359
+
360
+ result: ModelResponse = self._adapter.generate(
361
+ system_prompt=system_prompt,
362
+ user_prompt=wrapped_content,
363
+ config=ModelConfig(timeout_seconds=self.config.timeout_seconds),
364
+ )
365
+ return result
366
+
367
+ def _build_artifacts(
368
+ self,
369
+ repo_path: Path,
370
+ chunk_result: ChunkingResult,
371
+ dataflow_output: str,
372
+ threats_output: str,
373
+ ) -> ThreatModelArtifacts:
374
+ """Build structured artifacts from LLM output."""
375
+ generator = ArtifactGenerator(output_dir=Path("."), repo_name=repo_path.name)
376
+
377
+ # Parse LLM outputs
378
+ threats = generator.parse_llm_threats(threats_output)
379
+ (
380
+ external_entities,
381
+ processes,
382
+ data_stores,
383
+ dataflows,
384
+ trust_boundaries,
385
+ ) = generator.parse_llm_dataflows(dataflow_output)
386
+
387
+ # Detect languages from chunks
388
+ languages = list({c.language for c in chunk_result.chunks if c.language})
389
+
390
+ return ThreatModelArtifacts(
391
+ threats=threats,
392
+ dataflows=dataflows,
393
+ external_entities=external_entities,
394
+ processes=processes,
395
+ data_stores=data_stores,
396
+ trust_boundaries=trust_boundaries,
397
+ assumptions=[
398
+ "All code was analyzed statically without runtime execution",
399
+ "Third-party dependencies are assumed to be from trusted sources",
400
+ "Environment configuration may differ from analysis assumptions",
401
+ ],
402
+ scope_notes=[
403
+ f"Repository: {repo_path.name}",
404
+ f"Files analyzed: {chunk_result.total_files_processed}",
405
+ ],
406
+ limitations=[
407
+ "This is an automated first-pass analysis",
408
+ "Dynamic behavior and runtime configuration not analyzed",
409
+ "Human review is required for production use",
410
+ ],
411
+ repo_name=repo_path.name,
412
+ analysis_timestamp=datetime.now(UTC).isoformat(),
413
+ model_used=self._adapter.name,
414
+ files_analyzed=chunk_result.total_files_processed,
415
+ languages_detected=languages,
416
+ )
417
+
418
+
419
+ def run_threatflow(
420
+ repo_path: Path | str,
421
+ output_dir: Path | str | None = None,
422
+ config: ThreatFlowConfig | None = None,
423
+ ) -> ThreatFlowResult:
424
+ """Convenience function to run ThreatFlow analysis.
425
+
426
+ Args:
427
+ repo_path: Path to the repository
428
+ output_dir: Optional output directory
429
+ config: Optional configuration
430
+
431
+ Returns:
432
+ ThreatFlowResult with analysis output
433
+ """
434
+ tf = ThreatFlow(config=config)
435
+ return tf.analyze(
436
+ repo_path=Path(repo_path),
437
+ output_dir=Path(output_dir) if output_dir else None,
438
+ )