kekkai-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kekkai/__init__.py +7 -0
- kekkai/cli.py +1038 -0
- kekkai/config.py +403 -0
- kekkai/dojo.py +419 -0
- kekkai/dojo_import.py +213 -0
- kekkai/github/__init__.py +16 -0
- kekkai/github/commenter.py +198 -0
- kekkai/github/models.py +56 -0
- kekkai/github/sanitizer.py +112 -0
- kekkai/installer/__init__.py +39 -0
- kekkai/installer/errors.py +23 -0
- kekkai/installer/extract.py +161 -0
- kekkai/installer/manager.py +252 -0
- kekkai/installer/manifest.py +189 -0
- kekkai/installer/verify.py +86 -0
- kekkai/manifest.py +77 -0
- kekkai/output.py +218 -0
- kekkai/paths.py +46 -0
- kekkai/policy.py +326 -0
- kekkai/runner.py +70 -0
- kekkai/scanners/__init__.py +67 -0
- kekkai/scanners/backends/__init__.py +14 -0
- kekkai/scanners/backends/base.py +73 -0
- kekkai/scanners/backends/docker.py +178 -0
- kekkai/scanners/backends/native.py +240 -0
- kekkai/scanners/base.py +110 -0
- kekkai/scanners/container.py +144 -0
- kekkai/scanners/falco.py +237 -0
- kekkai/scanners/gitleaks.py +237 -0
- kekkai/scanners/semgrep.py +227 -0
- kekkai/scanners/trivy.py +246 -0
- kekkai/scanners/url_policy.py +163 -0
- kekkai/scanners/zap.py +340 -0
- kekkai/threatflow/__init__.py +94 -0
- kekkai/threatflow/artifacts.py +476 -0
- kekkai/threatflow/chunking.py +361 -0
- kekkai/threatflow/core.py +438 -0
- kekkai/threatflow/mermaid.py +374 -0
- kekkai/threatflow/model_adapter.py +491 -0
- kekkai/threatflow/prompts.py +277 -0
- kekkai/threatflow/redaction.py +228 -0
- kekkai/threatflow/sanitizer.py +643 -0
- kekkai/triage/__init__.py +33 -0
- kekkai/triage/app.py +168 -0
- kekkai/triage/audit.py +203 -0
- kekkai/triage/ignore.py +269 -0
- kekkai/triage/models.py +185 -0
- kekkai/triage/screens.py +341 -0
- kekkai/triage/widgets.py +169 -0
- kekkai_cli-1.0.0.dist-info/METADATA +135 -0
- kekkai_cli-1.0.0.dist-info/RECORD +90 -0
- kekkai_cli-1.0.0.dist-info/WHEEL +5 -0
- kekkai_cli-1.0.0.dist-info/entry_points.txt +3 -0
- kekkai_cli-1.0.0.dist-info/top_level.txt +3 -0
- kekkai_core/__init__.py +3 -0
- kekkai_core/ci/__init__.py +11 -0
- kekkai_core/ci/benchmarks.py +354 -0
- kekkai_core/ci/metadata.py +104 -0
- kekkai_core/ci/validators.py +92 -0
- kekkai_core/docker/__init__.py +17 -0
- kekkai_core/docker/metadata.py +153 -0
- kekkai_core/docker/sbom.py +173 -0
- kekkai_core/docker/security.py +158 -0
- kekkai_core/docker/signing.py +135 -0
- kekkai_core/redaction.py +84 -0
- kekkai_core/slsa/__init__.py +13 -0
- kekkai_core/slsa/verify.py +121 -0
- kekkai_core/windows/__init__.py +29 -0
- kekkai_core/windows/chocolatey.py +335 -0
- kekkai_core/windows/installer.py +256 -0
- kekkai_core/windows/scoop.py +165 -0
- kekkai_core/windows/validators.py +220 -0
- portal/__init__.py +19 -0
- portal/api.py +155 -0
- portal/auth.py +103 -0
- portal/enterprise/__init__.py +32 -0
- portal/enterprise/audit.py +435 -0
- portal/enterprise/licensing.py +342 -0
- portal/enterprise/rbac.py +276 -0
- portal/enterprise/saml.py +595 -0
- portal/ops/__init__.py +53 -0
- portal/ops/backup.py +553 -0
- portal/ops/log_shipper.py +469 -0
- portal/ops/monitoring.py +517 -0
- portal/ops/restore.py +469 -0
- portal/ops/secrets.py +408 -0
- portal/ops/upgrade.py +591 -0
- portal/tenants.py +340 -0
- portal/uploads.py +259 -0
- portal/web.py +384 -0
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
"""Artifact generation for ThreatFlow threat models.
|
|
2
|
+
|
|
3
|
+
Generates structured Markdown artifacts:
|
|
4
|
+
- THREATS.md: Identified threats with STRIDE categorization
|
|
5
|
+
- DATAFLOWS.md: Data flow diagram description
|
|
6
|
+
- DATAFLOW.mmd: Mermaid.js DFD syntax (Milestone 3)
|
|
7
|
+
- ASSUMPTIONS.md: Analysis assumptions and limitations
|
|
8
|
+
|
|
9
|
+
ASVS V15.3.1: Output only the required subset of data.
|
|
10
|
+
ASVS V5.3.3: Output encoding for Mermaid format.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import re
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from datetime import UTC, datetime
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class ThreatEntry:
|
|
25
|
+
"""A single threat entry."""
|
|
26
|
+
|
|
27
|
+
id: str
|
|
28
|
+
title: str
|
|
29
|
+
category: str
|
|
30
|
+
affected_component: str
|
|
31
|
+
description: str
|
|
32
|
+
risk_level: str
|
|
33
|
+
mitigation: str
|
|
34
|
+
owasp_category: str | None = None
|
|
35
|
+
|
|
36
|
+
def to_markdown(self) -> str:
|
|
37
|
+
"""Convert to Markdown format."""
|
|
38
|
+
owasp = f"\n- **OWASP**: {self.owasp_category}" if self.owasp_category else ""
|
|
39
|
+
return f"""### {self.id}: {self.title}
|
|
40
|
+
- **Category**: {self.category}
|
|
41
|
+
- **Affected Component**: {self.affected_component}
|
|
42
|
+
- **Risk Level**: {self.risk_level}{owasp}
|
|
43
|
+
|
|
44
|
+
**Description**: {self.description}
|
|
45
|
+
|
|
46
|
+
**Mitigation**: {self.mitigation}
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def to_dict(self) -> dict[str, str | None]:
|
|
50
|
+
"""Convert to dictionary."""
|
|
51
|
+
return {
|
|
52
|
+
"id": self.id,
|
|
53
|
+
"title": self.title,
|
|
54
|
+
"category": self.category,
|
|
55
|
+
"affected_component": self.affected_component,
|
|
56
|
+
"description": self.description,
|
|
57
|
+
"risk_level": self.risk_level,
|
|
58
|
+
"mitigation": self.mitigation,
|
|
59
|
+
"owasp_category": self.owasp_category,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class DataFlowEntry:
|
|
65
|
+
"""A data flow entry."""
|
|
66
|
+
|
|
67
|
+
source: str
|
|
68
|
+
destination: str
|
|
69
|
+
data_type: str
|
|
70
|
+
trust_boundary_crossed: bool = False
|
|
71
|
+
notes: str | None = None
|
|
72
|
+
|
|
73
|
+
def to_markdown(self) -> str:
|
|
74
|
+
"""Convert to Markdown format."""
|
|
75
|
+
boundary = " [CROSSES TRUST BOUNDARY]" if self.trust_boundary_crossed else ""
|
|
76
|
+
notes = f" - {self.notes}" if self.notes else ""
|
|
77
|
+
return f"- {self.source} -> {self.destination}: {self.data_type}{boundary}{notes}"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class ThreatModelArtifacts:
|
|
82
|
+
"""Container for all threat model artifacts."""
|
|
83
|
+
|
|
84
|
+
threats: list[ThreatEntry] = field(default_factory=list)
|
|
85
|
+
dataflows: list[DataFlowEntry] = field(default_factory=list)
|
|
86
|
+
external_entities: list[str] = field(default_factory=list)
|
|
87
|
+
processes: list[str] = field(default_factory=list)
|
|
88
|
+
data_stores: list[str] = field(default_factory=list)
|
|
89
|
+
trust_boundaries: list[str] = field(default_factory=list)
|
|
90
|
+
assumptions: list[str] = field(default_factory=list)
|
|
91
|
+
scope_notes: list[str] = field(default_factory=list)
|
|
92
|
+
environment_notes: list[str] = field(default_factory=list)
|
|
93
|
+
limitations: list[str] = field(default_factory=list)
|
|
94
|
+
repo_name: str = ""
|
|
95
|
+
analysis_timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
|
|
96
|
+
model_used: str = "unknown"
|
|
97
|
+
files_analyzed: int = 0
|
|
98
|
+
languages_detected: list[str] = field(default_factory=list)
|
|
99
|
+
|
|
100
|
+
def threat_count_by_risk(self) -> dict[str, int]:
|
|
101
|
+
"""Count threats by risk level."""
|
|
102
|
+
counts: dict[str, int] = {}
|
|
103
|
+
for threat in self.threats:
|
|
104
|
+
level = threat.risk_level.lower()
|
|
105
|
+
counts[level] = counts.get(level, 0) + 1
|
|
106
|
+
return counts
|
|
107
|
+
|
|
108
|
+
def threat_count_by_stride(self) -> dict[str, int]:
|
|
109
|
+
"""Count threats by STRIDE category."""
|
|
110
|
+
counts: dict[str, int] = {}
|
|
111
|
+
for threat in self.threats:
|
|
112
|
+
cat = threat.category
|
|
113
|
+
counts[cat] = counts.get(cat, 0) + 1
|
|
114
|
+
return counts
|
|
115
|
+
|
|
116
|
+
def to_dict(self) -> dict[str, Any]:
|
|
117
|
+
"""Convert to dictionary for JSON serialization."""
|
|
118
|
+
return {
|
|
119
|
+
"threats": [t.to_dict() for t in self.threats],
|
|
120
|
+
"dataflows": [
|
|
121
|
+
{
|
|
122
|
+
"source": df.source,
|
|
123
|
+
"destination": df.destination,
|
|
124
|
+
"data_type": df.data_type,
|
|
125
|
+
"trust_boundary_crossed": df.trust_boundary_crossed,
|
|
126
|
+
}
|
|
127
|
+
for df in self.dataflows
|
|
128
|
+
],
|
|
129
|
+
"external_entities": self.external_entities,
|
|
130
|
+
"processes": self.processes,
|
|
131
|
+
"data_stores": self.data_stores,
|
|
132
|
+
"trust_boundaries": self.trust_boundaries,
|
|
133
|
+
"assumptions": self.assumptions,
|
|
134
|
+
"limitations": self.limitations,
|
|
135
|
+
"metadata": {
|
|
136
|
+
"repo_name": self.repo_name,
|
|
137
|
+
"analysis_timestamp": self.analysis_timestamp,
|
|
138
|
+
"model_used": self.model_used,
|
|
139
|
+
"files_analyzed": self.files_analyzed,
|
|
140
|
+
"languages_detected": self.languages_detected,
|
|
141
|
+
},
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@dataclass
|
|
146
|
+
class ArtifactGenerator:
|
|
147
|
+
"""Generates threat model artifact files."""
|
|
148
|
+
|
|
149
|
+
output_dir: Path
|
|
150
|
+
repo_name: str = ""
|
|
151
|
+
|
|
152
|
+
def __post_init__(self) -> None:
|
|
153
|
+
self.output_dir = Path(self.output_dir)
|
|
154
|
+
|
|
155
|
+
def generate_threats_md(self, artifacts: ThreatModelArtifacts) -> str:
|
|
156
|
+
"""Generate THREATS.md content."""
|
|
157
|
+
lines = [
|
|
158
|
+
"# Threat Model: Identified Threats",
|
|
159
|
+
"",
|
|
160
|
+
f"> Generated: {artifacts.analysis_timestamp}",
|
|
161
|
+
f"> Repository: {artifacts.repo_name or 'Unknown'}",
|
|
162
|
+
f"> Model: {artifacts.model_used}",
|
|
163
|
+
"",
|
|
164
|
+
"## Summary",
|
|
165
|
+
"",
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
# Add risk summary
|
|
169
|
+
risk_counts = artifacts.threat_count_by_risk()
|
|
170
|
+
lines.append("| Risk Level | Count |")
|
|
171
|
+
lines.append("|------------|-------|")
|
|
172
|
+
for level in ["critical", "high", "medium", "low"]:
|
|
173
|
+
count = risk_counts.get(level, 0)
|
|
174
|
+
lines.append(f"| {level.capitalize()} | {count} |")
|
|
175
|
+
lines.append(f"| **Total** | **{len(artifacts.threats)}** |")
|
|
176
|
+
lines.append("")
|
|
177
|
+
|
|
178
|
+
# Add STRIDE summary
|
|
179
|
+
lines.append("### By STRIDE Category")
|
|
180
|
+
lines.append("")
|
|
181
|
+
stride_counts = artifacts.threat_count_by_stride()
|
|
182
|
+
for cat, count in sorted(stride_counts.items()):
|
|
183
|
+
lines.append(f"- {cat}: {count}")
|
|
184
|
+
lines.append("")
|
|
185
|
+
|
|
186
|
+
# Add detailed threats
|
|
187
|
+
lines.append("## Detailed Threats")
|
|
188
|
+
lines.append("")
|
|
189
|
+
|
|
190
|
+
for threat in artifacts.threats:
|
|
191
|
+
lines.append(threat.to_markdown())
|
|
192
|
+
lines.append("")
|
|
193
|
+
|
|
194
|
+
return "\n".join(lines)
|
|
195
|
+
|
|
196
|
+
def generate_dataflows_md(self, artifacts: ThreatModelArtifacts) -> str:
|
|
197
|
+
"""Generate DATAFLOWS.md content."""
|
|
198
|
+
lines = [
|
|
199
|
+
"# Threat Model: Data Flow Diagram",
|
|
200
|
+
"",
|
|
201
|
+
f"> Generated: {artifacts.analysis_timestamp}",
|
|
202
|
+
f"> Repository: {artifacts.repo_name or 'Unknown'}",
|
|
203
|
+
"",
|
|
204
|
+
"## External Entities",
|
|
205
|
+
"",
|
|
206
|
+
]
|
|
207
|
+
|
|
208
|
+
for entity in artifacts.external_entities:
|
|
209
|
+
lines.append(f"- {entity}")
|
|
210
|
+
lines.append("")
|
|
211
|
+
|
|
212
|
+
lines.append("## Processes")
|
|
213
|
+
lines.append("")
|
|
214
|
+
for process in artifacts.processes:
|
|
215
|
+
lines.append(f"- {process}")
|
|
216
|
+
lines.append("")
|
|
217
|
+
|
|
218
|
+
lines.append("## Data Stores")
|
|
219
|
+
lines.append("")
|
|
220
|
+
for store in artifacts.data_stores:
|
|
221
|
+
lines.append(f"- {store}")
|
|
222
|
+
lines.append("")
|
|
223
|
+
|
|
224
|
+
lines.append("## Data Flows")
|
|
225
|
+
lines.append("")
|
|
226
|
+
for flow in artifacts.dataflows:
|
|
227
|
+
lines.append(flow.to_markdown())
|
|
228
|
+
lines.append("")
|
|
229
|
+
|
|
230
|
+
lines.append("## Trust Boundaries")
|
|
231
|
+
lines.append("")
|
|
232
|
+
for boundary in artifacts.trust_boundaries:
|
|
233
|
+
lines.append(f"- {boundary}")
|
|
234
|
+
lines.append("")
|
|
235
|
+
|
|
236
|
+
return "\n".join(lines)
|
|
237
|
+
|
|
238
|
+
def generate_assumptions_md(self, artifacts: ThreatModelArtifacts) -> str:
|
|
239
|
+
"""Generate ASSUMPTIONS.md content."""
|
|
240
|
+
lines = [
|
|
241
|
+
"# Threat Model: Assumptions and Limitations",
|
|
242
|
+
"",
|
|
243
|
+
f"> Generated: {artifacts.analysis_timestamp}",
|
|
244
|
+
f"> Repository: {artifacts.repo_name or 'Unknown'}",
|
|
245
|
+
"",
|
|
246
|
+
"## Scope",
|
|
247
|
+
"",
|
|
248
|
+
]
|
|
249
|
+
|
|
250
|
+
for note in artifacts.scope_notes:
|
|
251
|
+
lines.append(f"- {note}")
|
|
252
|
+
if not artifacts.scope_notes:
|
|
253
|
+
lines.append("- This analysis covers the provided repository code")
|
|
254
|
+
lines.append("")
|
|
255
|
+
|
|
256
|
+
lines.append("## Environment Assumptions")
|
|
257
|
+
lines.append("")
|
|
258
|
+
for note in artifacts.environment_notes:
|
|
259
|
+
lines.append(f"- {note}")
|
|
260
|
+
if not artifacts.environment_notes:
|
|
261
|
+
lines.append("- Standard deployment environment assumed")
|
|
262
|
+
lines.append("")
|
|
263
|
+
|
|
264
|
+
lines.append("## Analysis Assumptions")
|
|
265
|
+
lines.append("")
|
|
266
|
+
for assumption in artifacts.assumptions:
|
|
267
|
+
lines.append(f"- {assumption}")
|
|
268
|
+
if not artifacts.assumptions:
|
|
269
|
+
lines.append("- All third-party dependencies are from trusted sources")
|
|
270
|
+
lines.append("")
|
|
271
|
+
|
|
272
|
+
lines.append("## Limitations")
|
|
273
|
+
lines.append("")
|
|
274
|
+
for limitation in artifacts.limitations:
|
|
275
|
+
lines.append(f"- {limitation}")
|
|
276
|
+
|
|
277
|
+
# Always add standard limitations
|
|
278
|
+
lines.extend(
|
|
279
|
+
[
|
|
280
|
+
"- This is an automated first-pass analysis",
|
|
281
|
+
"- Human review and validation is required",
|
|
282
|
+
"- Runtime behavior was not analyzed",
|
|
283
|
+
"- Configuration and deployment specifics may vary",
|
|
284
|
+
]
|
|
285
|
+
)
|
|
286
|
+
lines.append("")
|
|
287
|
+
|
|
288
|
+
lines.append("## Metadata")
|
|
289
|
+
lines.append("")
|
|
290
|
+
lines.append(f"- Files analyzed: {artifacts.files_analyzed}")
|
|
291
|
+
lines.append(f"- Languages: {', '.join(artifacts.languages_detected) or 'Unknown'}")
|
|
292
|
+
lines.append(f"- Model: {artifacts.model_used}")
|
|
293
|
+
lines.append("")
|
|
294
|
+
|
|
295
|
+
return "\n".join(lines)
|
|
296
|
+
|
|
297
|
+
def write_artifacts(self, artifacts: ThreatModelArtifacts) -> list[Path]:
|
|
298
|
+
"""Write all artifact files and return paths."""
|
|
299
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
300
|
+
written: list[Path] = []
|
|
301
|
+
|
|
302
|
+
# Write THREATS.md
|
|
303
|
+
threats_path = self.output_dir / "THREATS.md"
|
|
304
|
+
threats_path.write_text(self.generate_threats_md(artifacts), encoding="utf-8")
|
|
305
|
+
written.append(threats_path)
|
|
306
|
+
|
|
307
|
+
# Write DATAFLOWS.md
|
|
308
|
+
dataflows_path = self.output_dir / "DATAFLOWS.md"
|
|
309
|
+
dataflows_path.write_text(self.generate_dataflows_md(artifacts), encoding="utf-8")
|
|
310
|
+
written.append(dataflows_path)
|
|
311
|
+
|
|
312
|
+
# Write ASSUMPTIONS.md
|
|
313
|
+
assumptions_path = self.output_dir / "ASSUMPTIONS.md"
|
|
314
|
+
assumptions_path.write_text(self.generate_assumptions_md(artifacts), encoding="utf-8")
|
|
315
|
+
written.append(assumptions_path)
|
|
316
|
+
|
|
317
|
+
# Write JSON summary
|
|
318
|
+
json_path = self.output_dir / "threat-model.json"
|
|
319
|
+
json_path.write_text(
|
|
320
|
+
json.dumps(artifacts.to_dict(), indent=2, default=str),
|
|
321
|
+
encoding="utf-8",
|
|
322
|
+
)
|
|
323
|
+
written.append(json_path)
|
|
324
|
+
|
|
325
|
+
# Write Mermaid DFD (Milestone 3)
|
|
326
|
+
mermaid_path = self.output_dir / "DATAFLOW.mmd"
|
|
327
|
+
mermaid_path.write_text(self.generate_dataflow_mmd(artifacts), encoding="utf-8")
|
|
328
|
+
written.append(mermaid_path)
|
|
329
|
+
|
|
330
|
+
return written
|
|
331
|
+
|
|
332
|
+
def generate_dataflow_mmd(self, artifacts: ThreatModelArtifacts) -> str:
|
|
333
|
+
"""Generate Mermaid.js DFD syntax from artifacts.
|
|
334
|
+
|
|
335
|
+
Security: All labels are HTML-encoded and special characters sanitized
|
|
336
|
+
to prevent XSS when rendered in browsers.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
artifacts: ThreatModelArtifacts containing DFD components
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Mermaid flowchart syntax string
|
|
343
|
+
"""
|
|
344
|
+
from .mermaid import MermaidDFDGenerator
|
|
345
|
+
|
|
346
|
+
generator = MermaidDFDGenerator.from_artifacts(artifacts)
|
|
347
|
+
return generator.generate()
|
|
348
|
+
|
|
349
|
+
def parse_llm_threats(self, llm_output: str) -> list[ThreatEntry]:
|
|
350
|
+
"""Parse LLM output into structured ThreatEntry objects.
|
|
351
|
+
|
|
352
|
+
Attempts to extract threats from various Markdown formats.
|
|
353
|
+
"""
|
|
354
|
+
threats: list[ThreatEntry] = []
|
|
355
|
+
|
|
356
|
+
# Split by threat headers first
|
|
357
|
+
threat_blocks = re.split(r"(?=###?\s*T\d{3})", llm_output)
|
|
358
|
+
|
|
359
|
+
# Pattern for individual threat fields
|
|
360
|
+
for block in threat_blocks:
|
|
361
|
+
if not block.strip():
|
|
362
|
+
continue
|
|
363
|
+
|
|
364
|
+
# Extract threat ID and title
|
|
365
|
+
header_match = re.search(r"###?\s*(?P<id>T\d{3}):?\s*(?P<title>[^\n]+)", block)
|
|
366
|
+
if not header_match:
|
|
367
|
+
continue
|
|
368
|
+
|
|
369
|
+
# Extract fields
|
|
370
|
+
category_match = re.search(
|
|
371
|
+
r"(?:\*\*Category\*\*|Category)[:\s]*(?P<value>[^\n*]+)", block, re.IGNORECASE
|
|
372
|
+
)
|
|
373
|
+
component_match = re.search(
|
|
374
|
+
r"(?:\*\*Affected[^*]*\*\*|Affected[^:]*)[:\s]*(?P<value>[^\n*]+)",
|
|
375
|
+
block,
|
|
376
|
+
re.IGNORECASE,
|
|
377
|
+
)
|
|
378
|
+
desc_match = re.search(
|
|
379
|
+
r"(?:\*\*Description\*\*|Description)[:\s]*(?P<value>[^\n]+)", block, re.IGNORECASE
|
|
380
|
+
)
|
|
381
|
+
risk_match = re.search(
|
|
382
|
+
r"(?:\*\*Risk[^*]*\*\*|Risk[^:]*)[:\s]*(?P<value>[^\n*]+)", block, re.IGNORECASE
|
|
383
|
+
)
|
|
384
|
+
mitigation_match = re.search(
|
|
385
|
+
r"(?:\*\*Mitigation\*\*|Mitigation)[:\s]*(?P<value>[^\n]+)", block, re.IGNORECASE
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
threats.append(
|
|
389
|
+
ThreatEntry(
|
|
390
|
+
id=header_match.group("id").strip(),
|
|
391
|
+
title=header_match.group("title").strip(),
|
|
392
|
+
category=category_match.group("value").strip() if category_match else "Unknown",
|
|
393
|
+
affected_component=(
|
|
394
|
+
component_match.group("value").strip() if component_match else "Unknown"
|
|
395
|
+
),
|
|
396
|
+
description=desc_match.group("value").strip() if desc_match else "",
|
|
397
|
+
risk_level=risk_match.group("value").strip() if risk_match else "Unknown",
|
|
398
|
+
mitigation=mitigation_match.group("value").strip() if mitigation_match else "",
|
|
399
|
+
)
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
return threats
|
|
403
|
+
|
|
404
|
+
def parse_llm_dataflows(
|
|
405
|
+
self, llm_output: str
|
|
406
|
+
) -> tuple[
|
|
407
|
+
list[str], # external_entities
|
|
408
|
+
list[str], # processes
|
|
409
|
+
list[str], # data_stores
|
|
410
|
+
list[DataFlowEntry], # dataflows
|
|
411
|
+
list[str], # trust_boundaries
|
|
412
|
+
]:
|
|
413
|
+
"""Parse LLM output into structured dataflow components."""
|
|
414
|
+
external_entities: list[str] = []
|
|
415
|
+
processes: list[str] = []
|
|
416
|
+
data_stores: list[str] = []
|
|
417
|
+
dataflows: list[DataFlowEntry] = []
|
|
418
|
+
trust_boundaries: list[str] = []
|
|
419
|
+
|
|
420
|
+
# Current section being parsed
|
|
421
|
+
current_section = ""
|
|
422
|
+
|
|
423
|
+
for line in llm_output.split("\n"):
|
|
424
|
+
line = line.strip()
|
|
425
|
+
if not line:
|
|
426
|
+
continue
|
|
427
|
+
|
|
428
|
+
# Detect section headers
|
|
429
|
+
lower_line = line.lower()
|
|
430
|
+
if "external" in lower_line and ("entities" in lower_line or "##" in line):
|
|
431
|
+
current_section = "external"
|
|
432
|
+
continue
|
|
433
|
+
elif "process" in lower_line and "##" in line:
|
|
434
|
+
current_section = "processes"
|
|
435
|
+
continue
|
|
436
|
+
elif "data stor" in lower_line and "##" in line:
|
|
437
|
+
current_section = "stores"
|
|
438
|
+
continue
|
|
439
|
+
elif "data flow" in lower_line and "##" in line:
|
|
440
|
+
current_section = "flows"
|
|
441
|
+
continue
|
|
442
|
+
elif "trust" in lower_line and "boundar" in lower_line:
|
|
443
|
+
current_section = "boundaries"
|
|
444
|
+
continue
|
|
445
|
+
|
|
446
|
+
# Parse list items
|
|
447
|
+
if line.startswith("-"):
|
|
448
|
+
item = line[1:].strip()
|
|
449
|
+
item = re.sub(r"^\*\*([^*]+)\*\*:?", r"\1:", item) # Remove bold
|
|
450
|
+
|
|
451
|
+
if current_section == "external":
|
|
452
|
+
external_entities.append(item)
|
|
453
|
+
elif current_section == "processes":
|
|
454
|
+
processes.append(item)
|
|
455
|
+
elif current_section == "stores":
|
|
456
|
+
data_stores.append(item)
|
|
457
|
+
elif current_section == "boundaries":
|
|
458
|
+
trust_boundaries.append(item)
|
|
459
|
+
elif current_section == "flows":
|
|
460
|
+
# Parse flow format: Source -> Destination: Data Type
|
|
461
|
+
flow_match = re.match(
|
|
462
|
+
r"([^->]+)\s*->\s*([^:]+):\s*(.+)",
|
|
463
|
+
item,
|
|
464
|
+
)
|
|
465
|
+
if flow_match:
|
|
466
|
+
dataflows.append(
|
|
467
|
+
DataFlowEntry(
|
|
468
|
+
source=flow_match.group(1).strip(),
|
|
469
|
+
destination=flow_match.group(2).strip(),
|
|
470
|
+
data_type=flow_match.group(3).strip(),
|
|
471
|
+
trust_boundary_crossed="boundary" in item.lower()
|
|
472
|
+
or "trust" in item.lower(),
|
|
473
|
+
)
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
return external_entities, processes, data_stores, dataflows, trust_boundaries
|