kekkai-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. kekkai/__init__.py +7 -0
  2. kekkai/cli.py +1038 -0
  3. kekkai/config.py +403 -0
  4. kekkai/dojo.py +419 -0
  5. kekkai/dojo_import.py +213 -0
  6. kekkai/github/__init__.py +16 -0
  7. kekkai/github/commenter.py +198 -0
  8. kekkai/github/models.py +56 -0
  9. kekkai/github/sanitizer.py +112 -0
  10. kekkai/installer/__init__.py +39 -0
  11. kekkai/installer/errors.py +23 -0
  12. kekkai/installer/extract.py +161 -0
  13. kekkai/installer/manager.py +252 -0
  14. kekkai/installer/manifest.py +189 -0
  15. kekkai/installer/verify.py +86 -0
  16. kekkai/manifest.py +77 -0
  17. kekkai/output.py +218 -0
  18. kekkai/paths.py +46 -0
  19. kekkai/policy.py +326 -0
  20. kekkai/runner.py +70 -0
  21. kekkai/scanners/__init__.py +67 -0
  22. kekkai/scanners/backends/__init__.py +14 -0
  23. kekkai/scanners/backends/base.py +73 -0
  24. kekkai/scanners/backends/docker.py +178 -0
  25. kekkai/scanners/backends/native.py +240 -0
  26. kekkai/scanners/base.py +110 -0
  27. kekkai/scanners/container.py +144 -0
  28. kekkai/scanners/falco.py +237 -0
  29. kekkai/scanners/gitleaks.py +237 -0
  30. kekkai/scanners/semgrep.py +227 -0
  31. kekkai/scanners/trivy.py +246 -0
  32. kekkai/scanners/url_policy.py +163 -0
  33. kekkai/scanners/zap.py +340 -0
  34. kekkai/threatflow/__init__.py +94 -0
  35. kekkai/threatflow/artifacts.py +476 -0
  36. kekkai/threatflow/chunking.py +361 -0
  37. kekkai/threatflow/core.py +438 -0
  38. kekkai/threatflow/mermaid.py +374 -0
  39. kekkai/threatflow/model_adapter.py +491 -0
  40. kekkai/threatflow/prompts.py +277 -0
  41. kekkai/threatflow/redaction.py +228 -0
  42. kekkai/threatflow/sanitizer.py +643 -0
  43. kekkai/triage/__init__.py +33 -0
  44. kekkai/triage/app.py +168 -0
  45. kekkai/triage/audit.py +203 -0
  46. kekkai/triage/ignore.py +269 -0
  47. kekkai/triage/models.py +185 -0
  48. kekkai/triage/screens.py +341 -0
  49. kekkai/triage/widgets.py +169 -0
  50. kekkai_cli-1.0.0.dist-info/METADATA +135 -0
  51. kekkai_cli-1.0.0.dist-info/RECORD +90 -0
  52. kekkai_cli-1.0.0.dist-info/WHEEL +5 -0
  53. kekkai_cli-1.0.0.dist-info/entry_points.txt +3 -0
  54. kekkai_cli-1.0.0.dist-info/top_level.txt +3 -0
  55. kekkai_core/__init__.py +3 -0
  56. kekkai_core/ci/__init__.py +11 -0
  57. kekkai_core/ci/benchmarks.py +354 -0
  58. kekkai_core/ci/metadata.py +104 -0
  59. kekkai_core/ci/validators.py +92 -0
  60. kekkai_core/docker/__init__.py +17 -0
  61. kekkai_core/docker/metadata.py +153 -0
  62. kekkai_core/docker/sbom.py +173 -0
  63. kekkai_core/docker/security.py +158 -0
  64. kekkai_core/docker/signing.py +135 -0
  65. kekkai_core/redaction.py +84 -0
  66. kekkai_core/slsa/__init__.py +13 -0
  67. kekkai_core/slsa/verify.py +121 -0
  68. kekkai_core/windows/__init__.py +29 -0
  69. kekkai_core/windows/chocolatey.py +335 -0
  70. kekkai_core/windows/installer.py +256 -0
  71. kekkai_core/windows/scoop.py +165 -0
  72. kekkai_core/windows/validators.py +220 -0
  73. portal/__init__.py +19 -0
  74. portal/api.py +155 -0
  75. portal/auth.py +103 -0
  76. portal/enterprise/__init__.py +32 -0
  77. portal/enterprise/audit.py +435 -0
  78. portal/enterprise/licensing.py +342 -0
  79. portal/enterprise/rbac.py +276 -0
  80. portal/enterprise/saml.py +595 -0
  81. portal/ops/__init__.py +53 -0
  82. portal/ops/backup.py +553 -0
  83. portal/ops/log_shipper.py +469 -0
  84. portal/ops/monitoring.py +517 -0
  85. portal/ops/restore.py +469 -0
  86. portal/ops/secrets.py +408 -0
  87. portal/ops/upgrade.py +591 -0
  88. portal/tenants.py +340 -0
  89. portal/uploads.py +259 -0
  90. portal/web.py +384 -0
@@ -0,0 +1,476 @@
1
+ """Artifact generation for ThreatFlow threat models.
2
+
3
+ Generates structured Markdown artifacts:
4
+ - THREATS.md: Identified threats with STRIDE categorization
5
+ - DATAFLOWS.md: Data flow diagram description
6
+ - DATAFLOW.mmd: Mermaid.js DFD syntax (Milestone 3)
7
+ - ASSUMPTIONS.md: Analysis assumptions and limitations
8
+
9
+ ASVS V15.3.1: Output only the required subset of data.
10
+ ASVS V5.3.3: Output encoding for Mermaid format.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import re
17
+ from dataclasses import dataclass, field
18
+ from datetime import UTC, datetime
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+
23
+ @dataclass
24
+ class ThreatEntry:
25
+ """A single threat entry."""
26
+
27
+ id: str
28
+ title: str
29
+ category: str
30
+ affected_component: str
31
+ description: str
32
+ risk_level: str
33
+ mitigation: str
34
+ owasp_category: str | None = None
35
+
36
+ def to_markdown(self) -> str:
37
+ """Convert to Markdown format."""
38
+ owasp = f"\n- **OWASP**: {self.owasp_category}" if self.owasp_category else ""
39
+ return f"""### {self.id}: {self.title}
40
+ - **Category**: {self.category}
41
+ - **Affected Component**: {self.affected_component}
42
+ - **Risk Level**: {self.risk_level}{owasp}
43
+
44
+ **Description**: {self.description}
45
+
46
+ **Mitigation**: {self.mitigation}
47
+ """
48
+
49
+ def to_dict(self) -> dict[str, str | None]:
50
+ """Convert to dictionary."""
51
+ return {
52
+ "id": self.id,
53
+ "title": self.title,
54
+ "category": self.category,
55
+ "affected_component": self.affected_component,
56
+ "description": self.description,
57
+ "risk_level": self.risk_level,
58
+ "mitigation": self.mitigation,
59
+ "owasp_category": self.owasp_category,
60
+ }
61
+
62
+
63
+ @dataclass
64
+ class DataFlowEntry:
65
+ """A data flow entry."""
66
+
67
+ source: str
68
+ destination: str
69
+ data_type: str
70
+ trust_boundary_crossed: bool = False
71
+ notes: str | None = None
72
+
73
+ def to_markdown(self) -> str:
74
+ """Convert to Markdown format."""
75
+ boundary = " [CROSSES TRUST BOUNDARY]" if self.trust_boundary_crossed else ""
76
+ notes = f" - {self.notes}" if self.notes else ""
77
+ return f"- {self.source} -> {self.destination}: {self.data_type}{boundary}{notes}"
78
+
79
+
80
+ @dataclass
81
+ class ThreatModelArtifacts:
82
+ """Container for all threat model artifacts."""
83
+
84
+ threats: list[ThreatEntry] = field(default_factory=list)
85
+ dataflows: list[DataFlowEntry] = field(default_factory=list)
86
+ external_entities: list[str] = field(default_factory=list)
87
+ processes: list[str] = field(default_factory=list)
88
+ data_stores: list[str] = field(default_factory=list)
89
+ trust_boundaries: list[str] = field(default_factory=list)
90
+ assumptions: list[str] = field(default_factory=list)
91
+ scope_notes: list[str] = field(default_factory=list)
92
+ environment_notes: list[str] = field(default_factory=list)
93
+ limitations: list[str] = field(default_factory=list)
94
+ repo_name: str = ""
95
+ analysis_timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
96
+ model_used: str = "unknown"
97
+ files_analyzed: int = 0
98
+ languages_detected: list[str] = field(default_factory=list)
99
+
100
+ def threat_count_by_risk(self) -> dict[str, int]:
101
+ """Count threats by risk level."""
102
+ counts: dict[str, int] = {}
103
+ for threat in self.threats:
104
+ level = threat.risk_level.lower()
105
+ counts[level] = counts.get(level, 0) + 1
106
+ return counts
107
+
108
+ def threat_count_by_stride(self) -> dict[str, int]:
109
+ """Count threats by STRIDE category."""
110
+ counts: dict[str, int] = {}
111
+ for threat in self.threats:
112
+ cat = threat.category
113
+ counts[cat] = counts.get(cat, 0) + 1
114
+ return counts
115
+
116
+ def to_dict(self) -> dict[str, Any]:
117
+ """Convert to dictionary for JSON serialization."""
118
+ return {
119
+ "threats": [t.to_dict() for t in self.threats],
120
+ "dataflows": [
121
+ {
122
+ "source": df.source,
123
+ "destination": df.destination,
124
+ "data_type": df.data_type,
125
+ "trust_boundary_crossed": df.trust_boundary_crossed,
126
+ }
127
+ for df in self.dataflows
128
+ ],
129
+ "external_entities": self.external_entities,
130
+ "processes": self.processes,
131
+ "data_stores": self.data_stores,
132
+ "trust_boundaries": self.trust_boundaries,
133
+ "assumptions": self.assumptions,
134
+ "limitations": self.limitations,
135
+ "metadata": {
136
+ "repo_name": self.repo_name,
137
+ "analysis_timestamp": self.analysis_timestamp,
138
+ "model_used": self.model_used,
139
+ "files_analyzed": self.files_analyzed,
140
+ "languages_detected": self.languages_detected,
141
+ },
142
+ }
143
+
144
+
145
+ @dataclass
146
+ class ArtifactGenerator:
147
+ """Generates threat model artifact files."""
148
+
149
+ output_dir: Path
150
+ repo_name: str = ""
151
+
152
+ def __post_init__(self) -> None:
153
+ self.output_dir = Path(self.output_dir)
154
+
155
+ def generate_threats_md(self, artifacts: ThreatModelArtifacts) -> str:
156
+ """Generate THREATS.md content."""
157
+ lines = [
158
+ "# Threat Model: Identified Threats",
159
+ "",
160
+ f"> Generated: {artifacts.analysis_timestamp}",
161
+ f"> Repository: {artifacts.repo_name or 'Unknown'}",
162
+ f"> Model: {artifacts.model_used}",
163
+ "",
164
+ "## Summary",
165
+ "",
166
+ ]
167
+
168
+ # Add risk summary
169
+ risk_counts = artifacts.threat_count_by_risk()
170
+ lines.append("| Risk Level | Count |")
171
+ lines.append("|------------|-------|")
172
+ for level in ["critical", "high", "medium", "low"]:
173
+ count = risk_counts.get(level, 0)
174
+ lines.append(f"| {level.capitalize()} | {count} |")
175
+ lines.append(f"| **Total** | **{len(artifacts.threats)}** |")
176
+ lines.append("")
177
+
178
+ # Add STRIDE summary
179
+ lines.append("### By STRIDE Category")
180
+ lines.append("")
181
+ stride_counts = artifacts.threat_count_by_stride()
182
+ for cat, count in sorted(stride_counts.items()):
183
+ lines.append(f"- {cat}: {count}")
184
+ lines.append("")
185
+
186
+ # Add detailed threats
187
+ lines.append("## Detailed Threats")
188
+ lines.append("")
189
+
190
+ for threat in artifacts.threats:
191
+ lines.append(threat.to_markdown())
192
+ lines.append("")
193
+
194
+ return "\n".join(lines)
195
+
196
+ def generate_dataflows_md(self, artifacts: ThreatModelArtifacts) -> str:
197
+ """Generate DATAFLOWS.md content."""
198
+ lines = [
199
+ "# Threat Model: Data Flow Diagram",
200
+ "",
201
+ f"> Generated: {artifacts.analysis_timestamp}",
202
+ f"> Repository: {artifacts.repo_name or 'Unknown'}",
203
+ "",
204
+ "## External Entities",
205
+ "",
206
+ ]
207
+
208
+ for entity in artifacts.external_entities:
209
+ lines.append(f"- {entity}")
210
+ lines.append("")
211
+
212
+ lines.append("## Processes")
213
+ lines.append("")
214
+ for process in artifacts.processes:
215
+ lines.append(f"- {process}")
216
+ lines.append("")
217
+
218
+ lines.append("## Data Stores")
219
+ lines.append("")
220
+ for store in artifacts.data_stores:
221
+ lines.append(f"- {store}")
222
+ lines.append("")
223
+
224
+ lines.append("## Data Flows")
225
+ lines.append("")
226
+ for flow in artifacts.dataflows:
227
+ lines.append(flow.to_markdown())
228
+ lines.append("")
229
+
230
+ lines.append("## Trust Boundaries")
231
+ lines.append("")
232
+ for boundary in artifacts.trust_boundaries:
233
+ lines.append(f"- {boundary}")
234
+ lines.append("")
235
+
236
+ return "\n".join(lines)
237
+
238
+ def generate_assumptions_md(self, artifacts: ThreatModelArtifacts) -> str:
239
+ """Generate ASSUMPTIONS.md content."""
240
+ lines = [
241
+ "# Threat Model: Assumptions and Limitations",
242
+ "",
243
+ f"> Generated: {artifacts.analysis_timestamp}",
244
+ f"> Repository: {artifacts.repo_name or 'Unknown'}",
245
+ "",
246
+ "## Scope",
247
+ "",
248
+ ]
249
+
250
+ for note in artifacts.scope_notes:
251
+ lines.append(f"- {note}")
252
+ if not artifacts.scope_notes:
253
+ lines.append("- This analysis covers the provided repository code")
254
+ lines.append("")
255
+
256
+ lines.append("## Environment Assumptions")
257
+ lines.append("")
258
+ for note in artifacts.environment_notes:
259
+ lines.append(f"- {note}")
260
+ if not artifacts.environment_notes:
261
+ lines.append("- Standard deployment environment assumed")
262
+ lines.append("")
263
+
264
+ lines.append("## Analysis Assumptions")
265
+ lines.append("")
266
+ for assumption in artifacts.assumptions:
267
+ lines.append(f"- {assumption}")
268
+ if not artifacts.assumptions:
269
+ lines.append("- All third-party dependencies are from trusted sources")
270
+ lines.append("")
271
+
272
+ lines.append("## Limitations")
273
+ lines.append("")
274
+ for limitation in artifacts.limitations:
275
+ lines.append(f"- {limitation}")
276
+
277
+ # Always add standard limitations
278
+ lines.extend(
279
+ [
280
+ "- This is an automated first-pass analysis",
281
+ "- Human review and validation is required",
282
+ "- Runtime behavior was not analyzed",
283
+ "- Configuration and deployment specifics may vary",
284
+ ]
285
+ )
286
+ lines.append("")
287
+
288
+ lines.append("## Metadata")
289
+ lines.append("")
290
+ lines.append(f"- Files analyzed: {artifacts.files_analyzed}")
291
+ lines.append(f"- Languages: {', '.join(artifacts.languages_detected) or 'Unknown'}")
292
+ lines.append(f"- Model: {artifacts.model_used}")
293
+ lines.append("")
294
+
295
+ return "\n".join(lines)
296
+
297
+ def write_artifacts(self, artifacts: ThreatModelArtifacts) -> list[Path]:
298
+ """Write all artifact files and return paths."""
299
+ self.output_dir.mkdir(parents=True, exist_ok=True)
300
+ written: list[Path] = []
301
+
302
+ # Write THREATS.md
303
+ threats_path = self.output_dir / "THREATS.md"
304
+ threats_path.write_text(self.generate_threats_md(artifacts), encoding="utf-8")
305
+ written.append(threats_path)
306
+
307
+ # Write DATAFLOWS.md
308
+ dataflows_path = self.output_dir / "DATAFLOWS.md"
309
+ dataflows_path.write_text(self.generate_dataflows_md(artifacts), encoding="utf-8")
310
+ written.append(dataflows_path)
311
+
312
+ # Write ASSUMPTIONS.md
313
+ assumptions_path = self.output_dir / "ASSUMPTIONS.md"
314
+ assumptions_path.write_text(self.generate_assumptions_md(artifacts), encoding="utf-8")
315
+ written.append(assumptions_path)
316
+
317
+ # Write JSON summary
318
+ json_path = self.output_dir / "threat-model.json"
319
+ json_path.write_text(
320
+ json.dumps(artifacts.to_dict(), indent=2, default=str),
321
+ encoding="utf-8",
322
+ )
323
+ written.append(json_path)
324
+
325
+ # Write Mermaid DFD (Milestone 3)
326
+ mermaid_path = self.output_dir / "DATAFLOW.mmd"
327
+ mermaid_path.write_text(self.generate_dataflow_mmd(artifacts), encoding="utf-8")
328
+ written.append(mermaid_path)
329
+
330
+ return written
331
+
332
+ def generate_dataflow_mmd(self, artifacts: ThreatModelArtifacts) -> str:
333
+ """Generate Mermaid.js DFD syntax from artifacts.
334
+
335
+ Security: All labels are HTML-encoded and special characters sanitized
336
+ to prevent XSS when rendered in browsers.
337
+
338
+ Args:
339
+ artifacts: ThreatModelArtifacts containing DFD components
340
+
341
+ Returns:
342
+ Mermaid flowchart syntax string
343
+ """
344
+ from .mermaid import MermaidDFDGenerator
345
+
346
+ generator = MermaidDFDGenerator.from_artifacts(artifacts)
347
+ return generator.generate()
348
+
349
+ def parse_llm_threats(self, llm_output: str) -> list[ThreatEntry]:
350
+ """Parse LLM output into structured ThreatEntry objects.
351
+
352
+ Attempts to extract threats from various Markdown formats.
353
+ """
354
+ threats: list[ThreatEntry] = []
355
+
356
+ # Split by threat headers first
357
+ threat_blocks = re.split(r"(?=###?\s*T\d{3})", llm_output)
358
+
359
+ # Pattern for individual threat fields
360
+ for block in threat_blocks:
361
+ if not block.strip():
362
+ continue
363
+
364
+ # Extract threat ID and title
365
+ header_match = re.search(r"###?\s*(?P<id>T\d{3}):?\s*(?P<title>[^\n]+)", block)
366
+ if not header_match:
367
+ continue
368
+
369
+ # Extract fields
370
+ category_match = re.search(
371
+ r"(?:\*\*Category\*\*|Category)[:\s]*(?P<value>[^\n*]+)", block, re.IGNORECASE
372
+ )
373
+ component_match = re.search(
374
+ r"(?:\*\*Affected[^*]*\*\*|Affected[^:]*)[:\s]*(?P<value>[^\n*]+)",
375
+ block,
376
+ re.IGNORECASE,
377
+ )
378
+ desc_match = re.search(
379
+ r"(?:\*\*Description\*\*|Description)[:\s]*(?P<value>[^\n]+)", block, re.IGNORECASE
380
+ )
381
+ risk_match = re.search(
382
+ r"(?:\*\*Risk[^*]*\*\*|Risk[^:]*)[:\s]*(?P<value>[^\n*]+)", block, re.IGNORECASE
383
+ )
384
+ mitigation_match = re.search(
385
+ r"(?:\*\*Mitigation\*\*|Mitigation)[:\s]*(?P<value>[^\n]+)", block, re.IGNORECASE
386
+ )
387
+
388
+ threats.append(
389
+ ThreatEntry(
390
+ id=header_match.group("id").strip(),
391
+ title=header_match.group("title").strip(),
392
+ category=category_match.group("value").strip() if category_match else "Unknown",
393
+ affected_component=(
394
+ component_match.group("value").strip() if component_match else "Unknown"
395
+ ),
396
+ description=desc_match.group("value").strip() if desc_match else "",
397
+ risk_level=risk_match.group("value").strip() if risk_match else "Unknown",
398
+ mitigation=mitigation_match.group("value").strip() if mitigation_match else "",
399
+ )
400
+ )
401
+
402
+ return threats
403
+
404
+ def parse_llm_dataflows(
405
+ self, llm_output: str
406
+ ) -> tuple[
407
+ list[str], # external_entities
408
+ list[str], # processes
409
+ list[str], # data_stores
410
+ list[DataFlowEntry], # dataflows
411
+ list[str], # trust_boundaries
412
+ ]:
413
+ """Parse LLM output into structured dataflow components."""
414
+ external_entities: list[str] = []
415
+ processes: list[str] = []
416
+ data_stores: list[str] = []
417
+ dataflows: list[DataFlowEntry] = []
418
+ trust_boundaries: list[str] = []
419
+
420
+ # Current section being parsed
421
+ current_section = ""
422
+
423
+ for line in llm_output.split("\n"):
424
+ line = line.strip()
425
+ if not line:
426
+ continue
427
+
428
+ # Detect section headers
429
+ lower_line = line.lower()
430
+ if "external" in lower_line and ("entities" in lower_line or "##" in line):
431
+ current_section = "external"
432
+ continue
433
+ elif "process" in lower_line and "##" in line:
434
+ current_section = "processes"
435
+ continue
436
+ elif "data stor" in lower_line and "##" in line:
437
+ current_section = "stores"
438
+ continue
439
+ elif "data flow" in lower_line and "##" in line:
440
+ current_section = "flows"
441
+ continue
442
+ elif "trust" in lower_line and "boundar" in lower_line:
443
+ current_section = "boundaries"
444
+ continue
445
+
446
+ # Parse list items
447
+ if line.startswith("-"):
448
+ item = line[1:].strip()
449
+ item = re.sub(r"^\*\*([^*]+)\*\*:?", r"\1:", item) # Remove bold
450
+
451
+ if current_section == "external":
452
+ external_entities.append(item)
453
+ elif current_section == "processes":
454
+ processes.append(item)
455
+ elif current_section == "stores":
456
+ data_stores.append(item)
457
+ elif current_section == "boundaries":
458
+ trust_boundaries.append(item)
459
+ elif current_section == "flows":
460
+ # Parse flow format: Source -> Destination: Data Type
461
+ flow_match = re.match(
462
+ r"([^->]+)\s*->\s*([^:]+):\s*(.+)",
463
+ item,
464
+ )
465
+ if flow_match:
466
+ dataflows.append(
467
+ DataFlowEntry(
468
+ source=flow_match.group(1).strip(),
469
+ destination=flow_match.group(2).strip(),
470
+ data_type=flow_match.group(3).strip(),
471
+ trust_boundary_crossed="boundary" in item.lower()
472
+ or "trust" in item.lower(),
473
+ )
474
+ )
475
+
476
+ return external_entities, processes, data_stores, dataflows, trust_boundaries