cyntrisec 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cyntrisec/__init__.py +3 -0
- cyntrisec/__main__.py +6 -0
- cyntrisec/aws/__init__.py +6 -0
- cyntrisec/aws/collectors/__init__.py +17 -0
- cyntrisec/aws/collectors/ec2.py +30 -0
- cyntrisec/aws/collectors/iam.py +116 -0
- cyntrisec/aws/collectors/lambda_.py +45 -0
- cyntrisec/aws/collectors/network.py +70 -0
- cyntrisec/aws/collectors/rds.py +38 -0
- cyntrisec/aws/collectors/s3.py +68 -0
- cyntrisec/aws/collectors/usage.py +188 -0
- cyntrisec/aws/credentials.py +153 -0
- cyntrisec/aws/normalizers/__init__.py +17 -0
- cyntrisec/aws/normalizers/ec2.py +115 -0
- cyntrisec/aws/normalizers/iam.py +182 -0
- cyntrisec/aws/normalizers/lambda_.py +83 -0
- cyntrisec/aws/normalizers/network.py +225 -0
- cyntrisec/aws/normalizers/rds.py +130 -0
- cyntrisec/aws/normalizers/s3.py +184 -0
- cyntrisec/aws/relationship_builder.py +1359 -0
- cyntrisec/aws/scanner.py +303 -0
- cyntrisec/cli/__init__.py +5 -0
- cyntrisec/cli/analyze.py +747 -0
- cyntrisec/cli/ask.py +412 -0
- cyntrisec/cli/can.py +307 -0
- cyntrisec/cli/comply.py +226 -0
- cyntrisec/cli/cuts.py +231 -0
- cyntrisec/cli/diff.py +332 -0
- cyntrisec/cli/errors.py +105 -0
- cyntrisec/cli/explain.py +348 -0
- cyntrisec/cli/main.py +114 -0
- cyntrisec/cli/manifest.py +893 -0
- cyntrisec/cli/output.py +117 -0
- cyntrisec/cli/remediate.py +643 -0
- cyntrisec/cli/report.py +462 -0
- cyntrisec/cli/scan.py +207 -0
- cyntrisec/cli/schemas.py +391 -0
- cyntrisec/cli/serve.py +164 -0
- cyntrisec/cli/setup.py +260 -0
- cyntrisec/cli/validate.py +101 -0
- cyntrisec/cli/waste.py +323 -0
- cyntrisec/core/__init__.py +31 -0
- cyntrisec/core/business_config.py +110 -0
- cyntrisec/core/business_logic.py +131 -0
- cyntrisec/core/compliance.py +437 -0
- cyntrisec/core/cost_estimator.py +301 -0
- cyntrisec/core/cuts.py +360 -0
- cyntrisec/core/diff.py +361 -0
- cyntrisec/core/graph.py +202 -0
- cyntrisec/core/paths.py +830 -0
- cyntrisec/core/schema.py +317 -0
- cyntrisec/core/simulator.py +371 -0
- cyntrisec/core/waste.py +309 -0
- cyntrisec/mcp/__init__.py +5 -0
- cyntrisec/mcp/server.py +862 -0
- cyntrisec/storage/__init__.py +7 -0
- cyntrisec/storage/filesystem.py +344 -0
- cyntrisec/storage/memory.py +113 -0
- cyntrisec/storage/protocol.py +92 -0
- cyntrisec-0.1.7.dist-info/METADATA +672 -0
- cyntrisec-0.1.7.dist-info/RECORD +65 -0
- cyntrisec-0.1.7.dist-info/WHEEL +4 -0
- cyntrisec-0.1.7.dist-info/entry_points.txt +2 -0
- cyntrisec-0.1.7.dist-info/licenses/LICENSE +190 -0
- cyntrisec-0.1.7.dist-info/licenses/NOTICE +5 -0
cyntrisec/core/paths.py
ADDED
|
@@ -0,0 +1,830 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Attack Path Finder - Heuristic-based attack path discovery.
|
|
3
|
+
|
|
4
|
+
Finds paths from internet-facing entry points to sensitive targets
|
|
5
|
+
through the capability graph. Uses a priority queue (best-first search)
|
|
6
|
+
to prioritize highest-risk paths.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import heapq
|
|
13
|
+
import uuid
|
|
14
|
+
from collections import deque
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from decimal import Decimal
|
|
17
|
+
|
|
18
|
+
from cyntrisec.core.graph import AwsGraph
|
|
19
|
+
from cyntrisec.core.schema import (
|
|
20
|
+
INTERNET_ASSET_ID,
|
|
21
|
+
Asset,
|
|
22
|
+
AttackPath,
|
|
23
|
+
ConfidenceLevel,
|
|
24
|
+
EdgeKind,
|
|
25
|
+
Relationship,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class PathFinderConfig:
|
|
31
|
+
"""Configuration for attack path discovery."""
|
|
32
|
+
|
|
33
|
+
max_depth: int = 8
|
|
34
|
+
max_paths: int = 200
|
|
35
|
+
min_risk_score: float = 0.0
|
|
36
|
+
include_unknown: bool = False # Task 11.2: Check UNKNOWN edges
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
|
+
class NetworkIdentity:
|
|
41
|
+
"""Represents the attacker's network vantage point."""
|
|
42
|
+
security_group_ids: tuple[str, ...] = field(default_factory=tuple)
|
|
43
|
+
vpc_id: str | None = None
|
|
44
|
+
subnet_id: str | None = None
|
|
45
|
+
|
|
46
|
+
def __hash__(self):
|
|
47
|
+
return hash((self.security_group_ids, self.vpc_id, self.subnet_id))
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class AttackerState:
|
|
52
|
+
"""
|
|
53
|
+
Represents the attacker's state during graph traversal.
|
|
54
|
+
|
|
55
|
+
Includes:
|
|
56
|
+
- Current origin (where they are)
|
|
57
|
+
- Compromised assets (what they own)
|
|
58
|
+
- Active principals (what roles they can assume)
|
|
59
|
+
- Network identity (security groups, VPC context)
|
|
60
|
+
"""
|
|
61
|
+
origin: str # "internet" or asset ID
|
|
62
|
+
compromised_assets: frozenset[str] = field(default_factory=frozenset)
|
|
63
|
+
active_principals: frozenset[str] = field(default_factory=frozenset)
|
|
64
|
+
network_identity: NetworkIdentity = field(default_factory=NetworkIdentity)
|
|
65
|
+
|
|
66
|
+
def state_key(self) -> int:
|
|
67
|
+
"""Return a hashable key for visited set tracking."""
|
|
68
|
+
# We track visited states by (current_node, state_key)
|
|
69
|
+
# State key includes principally the identity and capabilities
|
|
70
|
+
return hash((self.active_principals, self.network_identity))
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class CandidatePath:
|
|
74
|
+
"""
|
|
75
|
+
A raw discovered path from Phase A (Discovery).
|
|
76
|
+
"""
|
|
77
|
+
snapshot_id: uuid.UUID
|
|
78
|
+
path_asset_ids: list[uuid.UUID]
|
|
79
|
+
path_relationship_ids: list[uuid.UUID]
|
|
80
|
+
attacker_state: AttackerState
|
|
81
|
+
heuristic_score: float
|
|
82
|
+
context_relationship_ids: list[uuid.UUID] = field(default_factory=list)
|
|
83
|
+
|
|
84
|
+
class PathValidator:
|
|
85
|
+
"""
|
|
86
|
+
Phase B: Validates candidate paths and assigns confidence.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def validate_path_metadata(self, graph: AwsGraph, candidate: CandidatePath) -> tuple[ConfidenceLevel, str]:
|
|
90
|
+
"""Return confidence level and reason."""
|
|
91
|
+
# 1. Network
|
|
92
|
+
net_conf, net_reason = self._check_network_preconditions(graph, candidate)
|
|
93
|
+
|
|
94
|
+
# 2. PassRole
|
|
95
|
+
pass_conf, pass_reason = self._check_passrole_motif(graph, candidate)
|
|
96
|
+
|
|
97
|
+
# Merge
|
|
98
|
+
level = ConfidenceLevel.HIGH
|
|
99
|
+
if net_conf == ConfidenceLevel.LOW or pass_conf == ConfidenceLevel.LOW:
|
|
100
|
+
level = ConfidenceLevel.LOW
|
|
101
|
+
elif net_conf == ConfidenceLevel.MED or pass_conf == ConfidenceLevel.MED:
|
|
102
|
+
level = ConfidenceLevel.MED
|
|
103
|
+
|
|
104
|
+
reasons = []
|
|
105
|
+
if net_reason: reasons.append(net_reason)
|
|
106
|
+
if pass_reason: reasons.append(pass_reason)
|
|
107
|
+
reason_str = "; ".join(reasons)
|
|
108
|
+
|
|
109
|
+
return level, reason_str
|
|
110
|
+
|
|
111
|
+
def _check_network_preconditions(self, graph: AwsGraph, candidate: CandidatePath) -> tuple[ConfidenceLevel, str]:
|
|
112
|
+
"""
|
|
113
|
+
Verify CAN_REACH edges in the path.
|
|
114
|
+
"""
|
|
115
|
+
reasons = []
|
|
116
|
+
confidence = ConfidenceLevel.HIGH
|
|
117
|
+
|
|
118
|
+
# Iterate through path edges
|
|
119
|
+
for i, rel_id in enumerate(candidate.path_relationship_ids):
|
|
120
|
+
# Access edge data from graph (using ID is slow if we don't have direct lookup, but CandidatePath keeps order)
|
|
121
|
+
# We need to find the edge object.
|
|
122
|
+
# Helper: we know source/target from path_asset_ids[i], [i+1]
|
|
123
|
+
src_id = candidate.path_asset_ids[i]
|
|
124
|
+
tgt_id = candidate.path_asset_ids[i+1]
|
|
125
|
+
|
|
126
|
+
# Find the relationship
|
|
127
|
+
rel = None
|
|
128
|
+
for e in graph.edges_from(src_id):
|
|
129
|
+
if e.id == rel_id:
|
|
130
|
+
rel = e
|
|
131
|
+
break
|
|
132
|
+
|
|
133
|
+
if not rel:
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
if rel.relationship_type == "CAN_REACH":
|
|
137
|
+
target_asset = graph.asset(tgt_id)
|
|
138
|
+
if not target_asset:
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
port_range = rel.properties.get("port_range", "")
|
|
142
|
+
|
|
143
|
+
# Check 1: DB Exposure on Web Ports
|
|
144
|
+
# If target is RDS/DB and port is strictly Web (80/443), unlikely to work directly
|
|
145
|
+
is_db = target_asset.asset_type in ["rds:db-instance", "dynamodb:table", "redshift:cluster"]
|
|
146
|
+
is_web_port = port_range in ["80-80", "443-443"]
|
|
147
|
+
|
|
148
|
+
if is_db and is_web_port:
|
|
149
|
+
confidence = ConfidenceLevel.LOW
|
|
150
|
+
reasons.append(f"Unlikely database access via web ports ({port_range}) to {target_asset.name}")
|
|
151
|
+
|
|
152
|
+
reason_str = "; ".join(reasons)
|
|
153
|
+
return confidence, reason_str
|
|
154
|
+
|
|
155
|
+
def _check_passrole_motif(self, graph: AwsGraph, candidate: CandidatePath) -> tuple[ConfidenceLevel, str]:
|
|
156
|
+
"""
|
|
157
|
+
Verify iam:PassRole usage.
|
|
158
|
+
"""
|
|
159
|
+
reasons = []
|
|
160
|
+
confidence = ConfidenceLevel.HIGH # Start high, downgrade if PassRole found without trigger
|
|
161
|
+
|
|
162
|
+
for i, rel_id in enumerate(candidate.path_relationship_ids):
|
|
163
|
+
src_id = candidate.path_asset_ids[i]
|
|
164
|
+
|
|
165
|
+
# Find the relationship
|
|
166
|
+
rel = None
|
|
167
|
+
for e in graph.edges_from(src_id):
|
|
168
|
+
if e.id == rel_id:
|
|
169
|
+
rel = e
|
|
170
|
+
break
|
|
171
|
+
|
|
172
|
+
if not rel:
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
if rel.relationship_type == "CAN_PASS_TO":
|
|
176
|
+
# Motif found: Source -> (PassRole) -> TargetRole
|
|
177
|
+
# Check for execution permission (trigger) at Source
|
|
178
|
+
source_asset = graph.asset(src_id)
|
|
179
|
+
if not source_asset:
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
# If Source is Admin/Power user, assume they have trigger
|
|
183
|
+
# Using name heuristic or is_sensitive_target (if accurate)
|
|
184
|
+
is_admin = False
|
|
185
|
+
name_lower = source_asset.name.lower()
|
|
186
|
+
if "admin" in name_lower or "root" in name_lower:
|
|
187
|
+
is_admin = True
|
|
188
|
+
|
|
189
|
+
if not is_admin:
|
|
190
|
+
# Downgrade to MED as we can't verify trigger (e.g. lambda:CreateFunction)
|
|
191
|
+
# We don't have edges for it, and properties parsing is complex here.
|
|
192
|
+
if confidence == ConfidenceLevel.HIGH:
|
|
193
|
+
confidence = ConfidenceLevel.MED
|
|
194
|
+
reasons.append(f"PassRole found at {source_asset.name}, but execution permission (e.g. CreateFunction) unverified")
|
|
195
|
+
|
|
196
|
+
reason_str = "; ".join(reasons)
|
|
197
|
+
return confidence, reason_str
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class PathScorer:
|
|
201
|
+
"""
|
|
202
|
+
Scores attack paths based on edge weights and confidence modifiers.
|
|
203
|
+
|
|
204
|
+
Risk Score = Entry Confidence * Exploitability * Impact
|
|
205
|
+
|
|
206
|
+
Where:
|
|
207
|
+
- Entry Confidence: Likelihood of attacker reaching start (0-1)
|
|
208
|
+
- Exploitability: Difficulty of traversing path (0-1)
|
|
209
|
+
- Derived from Path Weight (sum of edge weights)
|
|
210
|
+
- Longer/Harder paths = Lower exploitability
|
|
211
|
+
- Impact: Value of target (0-1)
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
# Base weights: Lower is easier to traverse
|
|
215
|
+
EDGE_WEIGHTS = {
|
|
216
|
+
# IAM Privilege Escalation (Very Easy)
|
|
217
|
+
"CAN_ASSUME": 0.1,
|
|
218
|
+
"CAN_PASS_TO": 0.2, # Requires trigger
|
|
219
|
+
|
|
220
|
+
# IAM Data Access (Easy)
|
|
221
|
+
"MAY_READ": 0.3,
|
|
222
|
+
"MAY_WRITE": 0.3,
|
|
223
|
+
"MAY_READ_S3_OBJECT": 0.3,
|
|
224
|
+
|
|
225
|
+
# Network Reachability (Medium - requires exploit/creds)
|
|
226
|
+
"CAN_REACH": 0.5,
|
|
227
|
+
|
|
228
|
+
# Default
|
|
229
|
+
"default": 1.0
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
CONFIDENCE_MULTIPLIERS = {
|
|
233
|
+
ConfidenceLevel.HIGH: 1.0,
|
|
234
|
+
ConfidenceLevel.MED: 0.6,
|
|
235
|
+
ConfidenceLevel.LOW: 0.2
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
def score_path(
|
|
239
|
+
self,
|
|
240
|
+
graph: AwsGraph,
|
|
241
|
+
path_assets: list[uuid.UUID],
|
|
242
|
+
path_rels: list[uuid.UUID],
|
|
243
|
+
entry_confidence: float,
|
|
244
|
+
target_impact: float,
|
|
245
|
+
confidence_level: ConfidenceLevel = ConfidenceLevel.HIGH
|
|
246
|
+
) -> tuple[float, float]:
|
|
247
|
+
"""
|
|
248
|
+
Calculate (risk_score, exploitability_score).
|
|
249
|
+
"""
|
|
250
|
+
# Calculate total path weight based on edges
|
|
251
|
+
total_weight = 0.0
|
|
252
|
+
|
|
253
|
+
for i, rel_id in enumerate(path_rels):
|
|
254
|
+
src_id = path_assets[i]
|
|
255
|
+
# Find edge
|
|
256
|
+
rel = None
|
|
257
|
+
for e in graph.edges_from(src_id):
|
|
258
|
+
if e.id == rel_id:
|
|
259
|
+
rel = e
|
|
260
|
+
break
|
|
261
|
+
|
|
262
|
+
weight = 1.0
|
|
263
|
+
if rel:
|
|
264
|
+
weight = self.EDGE_WEIGHTS.get(rel.relationship_type, self.EDGE_WEIGHTS["default"])
|
|
265
|
+
|
|
266
|
+
total_weight += weight
|
|
267
|
+
|
|
268
|
+
# Exploitability formula: Decay based on difficulty
|
|
269
|
+
# e.g. 1.0 / (1.0 + weight) or similar sigmoid.
|
|
270
|
+
# Let's use linear decay with floor.
|
|
271
|
+
# Max reasonable weight ~ 5.0 (10 hops of 0.5).
|
|
272
|
+
exploitability = max(0.01, 1.0 - (total_weight * 0.15))
|
|
273
|
+
|
|
274
|
+
# Apply Confidence Penalty to Exploitability?
|
|
275
|
+
# No, confidence penalizes the final Risk Score directly (uncertainty).
|
|
276
|
+
conf_mult = self.CONFIDENCE_MULTIPLIERS.get(confidence_level, 0.2)
|
|
277
|
+
|
|
278
|
+
risk_score = entry_confidence * exploitability * target_impact * conf_mult
|
|
279
|
+
|
|
280
|
+
return float(risk_score), float(exploitability)
|
|
281
|
+
|
|
282
|
+
def score_edge(self, relationship_type: str) -> float:
|
|
283
|
+
"""Get weight for a single edge type."""
|
|
284
|
+
return self.EDGE_WEIGHTS.get(relationship_type, self.EDGE_WEIGHTS["default"])
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
class PathFinder:
|
|
289
|
+
"""
|
|
290
|
+
Discovers attack paths through the capability graph.
|
|
291
|
+
|
|
292
|
+
Uses Best-First Search (Priority Queue) to find highest-risk paths first.
|
|
293
|
+
|
|
294
|
+
Risk Heuristic:
|
|
295
|
+
- Prioritizes paths starting from high-confidence entry points.
|
|
296
|
+
- Penalizes length (shorter paths = higher exploitability).
|
|
297
|
+
"""
|
|
298
|
+
|
|
299
|
+
def __init__(self, config: PathFinderConfig | None = None):
|
|
300
|
+
self._config = config or PathFinderConfig()
|
|
301
|
+
self._scorer = PathScorer()
|
|
302
|
+
|
|
303
|
+
def find_paths(
|
|
304
|
+
self,
|
|
305
|
+
graph: AwsGraph,
|
|
306
|
+
snapshot_id: uuid.UUID,
|
|
307
|
+
) -> list[AttackPath]:
|
|
308
|
+
"""
|
|
309
|
+
Find all attack paths in the graph using Two-Phase Discovery.
|
|
310
|
+
"""
|
|
311
|
+
# Phase A: Discovery
|
|
312
|
+
candidates = self._discover_candidate_paths(graph, snapshot_id)
|
|
313
|
+
|
|
314
|
+
# Phase B: Validation
|
|
315
|
+
validator = PathValidator()
|
|
316
|
+
results = []
|
|
317
|
+
|
|
318
|
+
for candidate in candidates:
|
|
319
|
+
# Validate
|
|
320
|
+
confidence, reason = validator.validate_path_metadata(graph, candidate)
|
|
321
|
+
|
|
322
|
+
# Build final object
|
|
323
|
+
attack_path = self._create_path(
|
|
324
|
+
graph=graph,
|
|
325
|
+
snapshot_id=snapshot_id,
|
|
326
|
+
path_assets=candidate.path_asset_ids,
|
|
327
|
+
path_rels=candidate.path_relationship_ids,
|
|
328
|
+
context_rels=candidate.context_relationship_ids,
|
|
329
|
+
confidence_level=confidence,
|
|
330
|
+
confidence_reason=reason
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# Filter low risk (already done in discovery mostly, but good to re-check if scoring changes)
|
|
334
|
+
if float(attack_path.risk_score) >= self._config.min_risk_score:
|
|
335
|
+
results.append(attack_path)
|
|
336
|
+
|
|
337
|
+
# Sort by Risk Score Descending (Task 10.3)
|
|
338
|
+
results.sort(key=lambda p: p.risk_score, reverse=True)
|
|
339
|
+
|
|
340
|
+
return results
|
|
341
|
+
|
|
342
|
+
def _discover_candidate_paths(
|
|
343
|
+
self,
|
|
344
|
+
graph: AwsGraph,
|
|
345
|
+
snapshot_id: uuid.UUID,
|
|
346
|
+
) -> list[CandidatePath]:
|
|
347
|
+
"""
|
|
348
|
+
Phase A: Discover potential attack paths using k-best search.
|
|
349
|
+
"""
|
|
350
|
+
entry_points = graph.entry_points()
|
|
351
|
+
targets = {t.id: t for t in graph.sensitive_targets()}
|
|
352
|
+
|
|
353
|
+
if not entry_points or not targets:
|
|
354
|
+
return []
|
|
355
|
+
|
|
356
|
+
# Priority Queue: (-heuristic_score, path_len, current_id, path_assets, path_rels, attacker_state)
|
|
357
|
+
queue = []
|
|
358
|
+
for entry in entry_points:
|
|
359
|
+
# Initial state
|
|
360
|
+
# If entering via 0.0.0.0/0, origin is internet
|
|
361
|
+
initial_state = self._initialize_state_for_entry_point(graph, entry)
|
|
362
|
+
|
|
363
|
+
# Initial score based on entry confidence alone (length=1)
|
|
364
|
+
score = self._calculate_heuristic(graph, entry, 1)
|
|
365
|
+
|
|
366
|
+
# Use negative score for max-heap behavior
|
|
367
|
+
heapq.heappush(queue, (-score, 1, entry.id, [entry.id], [], initial_state))
|
|
368
|
+
|
|
369
|
+
found_candidates: list[CandidatePath] = []
|
|
370
|
+
visited_states: set[tuple[uuid.UUID, int]] = set()
|
|
371
|
+
|
|
372
|
+
# Limit visits per node to prevent explosion while finding alternative paths
|
|
373
|
+
# (asset_id -> visit_count)
|
|
374
|
+
node_visits: dict[uuid.UUID, int] = {}
|
|
375
|
+
MAX_VISITS_PER_NODE = 10
|
|
376
|
+
|
|
377
|
+
while queue and len(found_candidates) < self._config.max_paths:
|
|
378
|
+
neg_score, length, current_id, path_assets, path_rels, state = heapq.heappop(queue)
|
|
379
|
+
|
|
380
|
+
# Pruning
|
|
381
|
+
if length >= self._config.max_depth:
|
|
382
|
+
continue
|
|
383
|
+
|
|
384
|
+
# State-aware visited check
|
|
385
|
+
state_key = state.state_key()
|
|
386
|
+
if (current_id, state_key) in visited_states:
|
|
387
|
+
continue
|
|
388
|
+
visited_states.add((current_id, state_key))
|
|
389
|
+
|
|
390
|
+
# Count visits (soft limit to prevent infinite variations)
|
|
391
|
+
node_visits[current_id] = node_visits.get(current_id, 0) + 1
|
|
392
|
+
if node_visits[current_id] > MAX_VISITS_PER_NODE:
|
|
393
|
+
continue
|
|
394
|
+
|
|
395
|
+
# Check if we reached a target
|
|
396
|
+
if current_id in targets:
|
|
397
|
+
# We found a path!
|
|
398
|
+
|
|
399
|
+
# Context edges (structural)
|
|
400
|
+
context_rels = self._collect_context_edges(graph, path_assets)
|
|
401
|
+
|
|
402
|
+
candidate = CandidatePath(
|
|
403
|
+
snapshot_id=snapshot_id,
|
|
404
|
+
path_asset_ids=path_assets,
|
|
405
|
+
path_relationship_ids=path_rels,
|
|
406
|
+
attacker_state=state,
|
|
407
|
+
heuristic_score=-neg_score,
|
|
408
|
+
context_relationship_ids=context_rels
|
|
409
|
+
)
|
|
410
|
+
found_candidates.append(candidate)
|
|
411
|
+
|
|
412
|
+
# Expand neighbors
|
|
413
|
+
for rel in graph.edges_from(current_id):
|
|
414
|
+
# 7.2 Filter by edge_kind (Capability + Unknown if flag set)
|
|
415
|
+
# Task 11.2: Handle UNKNOWN edges
|
|
416
|
+
is_capability = rel.edge_kind == EdgeKind.CAPABILITY
|
|
417
|
+
is_unknown = rel.edge_kind == EdgeKind.UNKNOWN
|
|
418
|
+
|
|
419
|
+
allow_unknown = self._config.include_unknown
|
|
420
|
+
|
|
421
|
+
if not (is_capability or (is_unknown and allow_unknown)):
|
|
422
|
+
continue
|
|
423
|
+
|
|
424
|
+
next_id = rel.target_asset_id
|
|
425
|
+
|
|
426
|
+
# Cycle prevention
|
|
427
|
+
if next_id in path_assets:
|
|
428
|
+
continue
|
|
429
|
+
|
|
430
|
+
# 7.5 Precondition checking
|
|
431
|
+
if not self._check_preconditions(graph, rel, state):
|
|
432
|
+
continue
|
|
433
|
+
|
|
434
|
+
# 7.3 Update attacker state
|
|
435
|
+
next_state = self._update_attacker_state(graph, rel, next_id, state)
|
|
436
|
+
|
|
437
|
+
new_assets = path_assets + [next_id]
|
|
438
|
+
new_rels = path_rels + [rel.id]
|
|
439
|
+
new_len = length + 1
|
|
440
|
+
|
|
441
|
+
# Heuristic
|
|
442
|
+
entry_asset = graph.asset(path_assets[0])
|
|
443
|
+
new_score = self._calculate_heuristic(graph, entry_asset, new_len)
|
|
444
|
+
|
|
445
|
+
heapq.heappush(queue, (-new_score, new_len, next_id, new_assets, new_rels, next_state))
|
|
446
|
+
|
|
447
|
+
return found_candidates
|
|
448
|
+
|
|
449
|
+
def _initialize_state_for_entry_point(self, graph: AwsGraph, entry: Asset) -> AttackerState:
|
|
450
|
+
"""Initialize state for an entry point."""
|
|
451
|
+
# Check if we have network identity from the entry point
|
|
452
|
+
identity = self._get_network_identity(entry)
|
|
453
|
+
|
|
454
|
+
return AttackerState(
|
|
455
|
+
origin="internet",
|
|
456
|
+
compromised_assets=frozenset([str(entry.id)]),
|
|
457
|
+
network_identity=identity
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
def _get_network_identity(self, asset: Asset) -> NetworkIdentity:
|
|
461
|
+
"""Extract network identity from an asset."""
|
|
462
|
+
sg_ids = tuple(sorted(asset.properties.get("security_groups", [])))
|
|
463
|
+
vpc_id = asset.properties.get("vpc_id")
|
|
464
|
+
subnet_id = asset.properties.get("subnet_id")
|
|
465
|
+
return NetworkIdentity(
|
|
466
|
+
security_group_ids=sg_ids,
|
|
467
|
+
vpc_id=vpc_id,
|
|
468
|
+
subnet_id=subnet_id
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
def _update_attacker_state(
|
|
472
|
+
self,
|
|
473
|
+
graph: AwsGraph,
|
|
474
|
+
rel: Relationship,
|
|
475
|
+
next_id: uuid.UUID,
|
|
476
|
+
current_state: AttackerState
|
|
477
|
+
) -> AttackerState:
|
|
478
|
+
"""
|
|
479
|
+
Update attacker state after traversing an edge.
|
|
480
|
+
|
|
481
|
+
Updates:
|
|
482
|
+
- Compromised assets (adds new asset)
|
|
483
|
+
- Active principals (if assuming role)
|
|
484
|
+
- Network identity (if moving to compute resource)
|
|
485
|
+
"""
|
|
486
|
+
next_asset = graph.asset(next_id)
|
|
487
|
+
if not next_asset:
|
|
488
|
+
return current_state
|
|
489
|
+
|
|
490
|
+
new_compromised = set(current_state.compromised_assets)
|
|
491
|
+
new_compromised.add(str(next_id))
|
|
492
|
+
|
|
493
|
+
new_principals = set(current_state.active_principals)
|
|
494
|
+
# If we assumed a role, add it to active principals
|
|
495
|
+
if rel.relationship_type == "CAN_ASSUME":
|
|
496
|
+
new_principals.add(str(next_id))
|
|
497
|
+
|
|
498
|
+
# If we moved to a compute resource, update network identity
|
|
499
|
+
# (e.g. pivoting to an EC2 instance or Lambda)
|
|
500
|
+
new_identity = current_state.network_identity
|
|
501
|
+
if next_asset.asset_type in ["ec2:instance", "lambda:function"]:
|
|
502
|
+
new_identity = self._get_network_identity(next_asset)
|
|
503
|
+
|
|
504
|
+
return AttackerState(
|
|
505
|
+
origin=current_state.origin,
|
|
506
|
+
compromised_assets=frozenset(new_compromised),
|
|
507
|
+
active_principals=frozenset(new_principals),
|
|
508
|
+
network_identity=new_identity
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
def _check_preconditions(
|
|
512
|
+
self,
|
|
513
|
+
graph: AwsGraph,
|
|
514
|
+
rel: Relationship,
|
|
515
|
+
state: AttackerState
|
|
516
|
+
) -> bool:
|
|
517
|
+
"""
|
|
518
|
+
Check if edge can be traversed given current state.
|
|
519
|
+
|
|
520
|
+
Enforces:
|
|
521
|
+
- Network reachability (CAN_REACH source must match current identity)
|
|
522
|
+
"""
|
|
523
|
+
if rel.relationship_type == "CAN_REACH":
|
|
524
|
+
# For CAN_REACH edges, the source property specifies allowed origin.
|
|
525
|
+
# But the edge is typically SourceSG -> TargetInstance.
|
|
526
|
+
# If we are traversing this edge, we are at SourceSG (logical) or we match it?
|
|
527
|
+
|
|
528
|
+
# The edge is Source -> Target.
|
|
529
|
+
# If Source is a Security Group, we must "have" that SG in our identity.
|
|
530
|
+
source_asset = graph.asset(rel.source_asset_id)
|
|
531
|
+
if not source_asset:
|
|
532
|
+
return False
|
|
533
|
+
|
|
534
|
+
if source_asset.asset_type == "ec2:security-group":
|
|
535
|
+
# We can only traverse this edge if we originate from this SG
|
|
536
|
+
if source_asset.aws_resource_id not in state.network_identity.security_group_ids:
|
|
537
|
+
return False
|
|
538
|
+
|
|
539
|
+
elif source_asset.asset_type == "ec2:subnet":
|
|
540
|
+
# Only traverse if we are in this subnet
|
|
541
|
+
if state.network_identity.subnet_id != source_asset.aws_resource_id:
|
|
542
|
+
return False
|
|
543
|
+
|
|
544
|
+
return True
|
|
545
|
+
|
|
546
|
+
def _collect_context_edges(self, graph: AwsGraph, path_assets: list[uuid.UUID]) -> list[uuid.UUID]:
|
|
547
|
+
"""Collect structural edges relevant to the path."""
|
|
548
|
+
context_ids = []
|
|
549
|
+
for asset_id in path_assets:
|
|
550
|
+
# Get Structural edges from/to this asset
|
|
551
|
+
for rel in graph.edges_to(asset_id):
|
|
552
|
+
if rel.edge_kind == EdgeKind.STRUCTURAL:
|
|
553
|
+
context_ids.append(rel.id)
|
|
554
|
+
for rel in graph.edges_from(asset_id):
|
|
555
|
+
if rel.edge_kind == EdgeKind.STRUCTURAL:
|
|
556
|
+
context_ids.append(rel.id)
|
|
557
|
+
return list(set(context_ids))
|
|
558
|
+
|
|
559
|
+
def find_paths_between(
|
|
560
|
+
self,
|
|
561
|
+
graph: AwsGraph,
|
|
562
|
+
source_id: uuid.UUID,
|
|
563
|
+
target_id: uuid.UUID,
|
|
564
|
+
max_depth: int = 5,
|
|
565
|
+
) -> list[list[uuid.UUID]]:
|
|
566
|
+
"""
|
|
567
|
+
Find paths between two specific assets (for Business Logic).
|
|
568
|
+
Returns list of asset_id lists.
|
|
569
|
+
"""
|
|
570
|
+
# Simple BFS is usually fine for connectivity checks
|
|
571
|
+
paths = []
|
|
572
|
+
queue = deque([(source_id, [source_id])])
|
|
573
|
+
visited_hashes = set()
|
|
574
|
+
|
|
575
|
+
while queue and len(paths) < 10:
|
|
576
|
+
curr, path = queue.popleft()
|
|
577
|
+
if curr == target_id:
|
|
578
|
+
paths.append(path)
|
|
579
|
+
continue
|
|
580
|
+
|
|
581
|
+
if len(path) >= max_depth:
|
|
582
|
+
continue
|
|
583
|
+
|
|
584
|
+
for rel in graph.edges_from(curr):
|
|
585
|
+
nxt = rel.target_asset_id
|
|
586
|
+
if nxt not in path:
|
|
587
|
+
new_path = path + [nxt]
|
|
588
|
+
ph = self._hash_path(new_path)
|
|
589
|
+
if ph not in visited_hashes:
|
|
590
|
+
visited_hashes.add(ph)
|
|
591
|
+
queue.append((nxt, new_path))
|
|
592
|
+
return paths
|
|
593
|
+
|
|
594
|
+
def _calculate_heuristic(self, graph: AwsGraph, entry_asset: Asset | None, length: int) -> float:
|
|
595
|
+
"""
|
|
596
|
+
Calculate heuristic score for best-first search.
|
|
597
|
+
Higher is better (higher risk).
|
|
598
|
+
"""
|
|
599
|
+
entry_conf = self._entry_confidence(graph, entry_asset)
|
|
600
|
+
exploitability = self._exploitability(length)
|
|
601
|
+
# We assume potential impact is 1.0 (unknown) during traversal
|
|
602
|
+
return entry_conf * exploitability
|
|
603
|
+
|
|
604
|
+
def _hash_path(self, path_assets: list[uuid.UUID]) -> str:
|
|
605
|
+
"""Create a unique hash for a path."""
|
|
606
|
+
path_str = "|".join(str(a) for a in path_assets)
|
|
607
|
+
return hashlib.sha256(path_str.encode()).hexdigest()
|
|
608
|
+
|
|
609
|
+
def _create_path(
|
|
610
|
+
self,
|
|
611
|
+
*,
|
|
612
|
+
graph: AwsGraph,
|
|
613
|
+
snapshot_id: uuid.UUID,
|
|
614
|
+
path_assets: list[uuid.UUID],
|
|
615
|
+
path_rels: list[uuid.UUID],
|
|
616
|
+
context_rels: list[uuid.UUID] | None = None,
|
|
617
|
+
confidence_level: ConfidenceLevel = ConfidenceLevel.HIGH,
|
|
618
|
+
confidence_reason: str = "",
|
|
619
|
+
) -> AttackPath:
|
|
620
|
+
"""Create an AttackPath from discovered path."""
|
|
621
|
+
entry = graph.asset(path_assets[0])
|
|
622
|
+
target = graph.asset(path_assets[-1])
|
|
623
|
+
|
|
624
|
+
# Calculate scores using PathScorer (Task 10)
|
|
625
|
+
entry_confidence = self._entry_confidence(graph, entry)
|
|
626
|
+
impact = self._impact_score(target)
|
|
627
|
+
|
|
628
|
+
risk, exploitability = self._scorer.score_path(
|
|
629
|
+
graph=graph,
|
|
630
|
+
path_assets=path_assets,
|
|
631
|
+
path_rels=path_rels,
|
|
632
|
+
entry_confidence=entry_confidence,
|
|
633
|
+
target_impact=impact,
|
|
634
|
+
confidence_level=confidence_level
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
# Determine attack vector
|
|
638
|
+
vector = self._attack_vector(graph, path_assets)
|
|
639
|
+
|
|
640
|
+
# Build proof chain
|
|
641
|
+
proof = self._build_proof(graph, path_assets, path_rels)
|
|
642
|
+
|
|
643
|
+
return AttackPath(
|
|
644
|
+
snapshot_id=snapshot_id,
|
|
645
|
+
source_asset_id=path_assets[0],
|
|
646
|
+
target_asset_id=path_assets[-1],
|
|
647
|
+
path_asset_ids=path_assets,
|
|
648
|
+
path_relationship_ids=path_rels,
|
|
649
|
+
attack_chain_relationship_ids=path_rels, # During discovery, all are capability
|
|
650
|
+
context_relationship_ids=context_rels or [],
|
|
651
|
+
attack_vector=vector,
|
|
652
|
+
path_length=len(path_rels),
|
|
653
|
+
entry_confidence=Decimal(str(round(entry_confidence, 4))),
|
|
654
|
+
exploitability_score=Decimal(str(round(exploitability, 4))),
|
|
655
|
+
impact_score=Decimal(str(round(impact, 4))),
|
|
656
|
+
risk_score=Decimal(str(round(risk, 4))),
|
|
657
|
+
confidence_level=confidence_level,
|
|
658
|
+
confidence_reason=confidence_reason,
|
|
659
|
+
proof=proof,
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
def _entry_confidence(self, graph: AwsGraph, asset: Asset | None) -> float:
|
|
663
|
+
"""Calculate entry point accessibility (0-1)."""
|
|
664
|
+
if not asset:
|
|
665
|
+
return 0.5
|
|
666
|
+
|
|
667
|
+
base_score = 0.5
|
|
668
|
+
|
|
669
|
+
# Check CAN_REACH edges from Internet
|
|
670
|
+
internet_edges = []
|
|
671
|
+
if INTERNET_ASSET_ID in graph.outgoing:
|
|
672
|
+
for rel in graph.outgoing[INTERNET_ASSET_ID]:
|
|
673
|
+
if rel.target_asset_id == asset.id and rel.relationship_type == "CAN_REACH":
|
|
674
|
+
internet_edges.append(rel)
|
|
675
|
+
|
|
676
|
+
if internet_edges:
|
|
677
|
+
# Task 6.4: Set entry_confidence based on port category
|
|
678
|
+
best_edge_score = 0.0
|
|
679
|
+
|
|
680
|
+
for edge in internet_edges:
|
|
681
|
+
port_range = edge.properties.get("port_range", "")
|
|
682
|
+
|
|
683
|
+
# Determine port category score
|
|
684
|
+
if self._is_web_port(port_range):
|
|
685
|
+
score = 0.9 # web
|
|
686
|
+
elif self._is_admin_port(port_range):
|
|
687
|
+
score = 0.7 # admin
|
|
688
|
+
elif self._is_db_port(port_range):
|
|
689
|
+
score = 0.6 # db
|
|
690
|
+
else:
|
|
691
|
+
score = 0.8 # other/high ports often risky
|
|
692
|
+
|
|
693
|
+
# Adjust by asset type
|
|
694
|
+
if asset.asset_type in ["elbv2:load-balancer", "elb:load-balancer"]:
|
|
695
|
+
score += 0.05
|
|
696
|
+
|
|
697
|
+
# Adjust by rule specificity
|
|
698
|
+
# If explicit CIDR was used but it's 0.0.0.0/0 (implied by being internet edge here), +0.0
|
|
699
|
+
# If restricted CIDR (but still internet reachable? e.g. large public block), -0.1
|
|
700
|
+
# Since we only create CAN_REACH from world for 0.0.0.0/0, it is open world.
|
|
701
|
+
# So +0.0
|
|
702
|
+
|
|
703
|
+
if score > best_edge_score:
|
|
704
|
+
best_edge_score = score
|
|
705
|
+
|
|
706
|
+
return min(1.0, best_edge_score)
|
|
707
|
+
|
|
708
|
+
# Fallback for legacy assets without CAN_REACH edges
|
|
709
|
+
# Higher confidence for clearly public resources
|
|
710
|
+
if asset.asset_type == "ec2:instance":
|
|
711
|
+
if asset.properties.get("public_ip"):
|
|
712
|
+
return 0.9
|
|
713
|
+
elif asset.asset_type in ["elbv2:load-balancer", "elb:load-balancer"]:
|
|
714
|
+
if asset.properties.get("scheme") == "internet-facing":
|
|
715
|
+
return 0.85
|
|
716
|
+
elif asset.asset_type == "cloudfront:distribution":
|
|
717
|
+
return 0.8
|
|
718
|
+
elif asset.asset_type == "apigateway:rest-api":
|
|
719
|
+
return 0.75
|
|
720
|
+
|
|
721
|
+
return 0.5
|
|
722
|
+
|
|
723
|
+
def _is_web_port(self, port_range: str) -> bool:
|
|
724
|
+
return port_range in ["80-80", "443-443", "8080-8080", "8443-8443"]
|
|
725
|
+
|
|
726
|
+
def _is_admin_port(self, port_range: str) -> bool:
|
|
727
|
+
return port_range in ["22-22", "3389-3389", "5985-5985", "5986-5986"]
|
|
728
|
+
|
|
729
|
+
def _is_db_port(self, port_range: str) -> bool:
|
|
730
|
+
return port_range in ["3306-3306", "5432-5432", "1433-1433", "27017-27017"]
|
|
731
|
+
|
|
732
|
+
def _exploitability(self, path_length: int) -> float:
|
|
733
|
+
"""Calculate exploitability based on path length (Legacy/Fallback)."""
|
|
734
|
+
# Longer paths are harder to exploit
|
|
735
|
+
return max(0.1, 1.0 - (path_length * 0.1))
|
|
736
|
+
|
|
737
|
+
def _impact_score(self, asset: Asset | None) -> float:
|
|
738
|
+
"""Calculate impact score of reaching the target (0-1)."""
|
|
739
|
+
if not asset:
|
|
740
|
+
return 0.5
|
|
741
|
+
|
|
742
|
+
# High-value targets
|
|
743
|
+
if asset.asset_type in ["rds:db-instance", "dynamodb:table"]:
|
|
744
|
+
return 0.9
|
|
745
|
+
elif asset.asset_type in ["secretsmanager:secret", "ssm:parameter"]:
|
|
746
|
+
name_lower = asset.name.lower()
|
|
747
|
+
if any(kw in name_lower for kw in ["prod", "secret", "key", "password"]):
|
|
748
|
+
return 1.0
|
|
749
|
+
return 0.85
|
|
750
|
+
elif asset.asset_type == "iam:role":
|
|
751
|
+
# Roles can be impacts if they are Admin
|
|
752
|
+
name_lower = asset.name.lower()
|
|
753
|
+
if any(kw in name_lower for kw in ["admin", "root"]):
|
|
754
|
+
return 0.95
|
|
755
|
+
return 0.6
|
|
756
|
+
elif asset.asset_type == "s3:bucket":
|
|
757
|
+
name_lower = asset.name.lower()
|
|
758
|
+
if any(kw in name_lower for kw in ["backup", "secret", "credential"]):
|
|
759
|
+
return 0.9
|
|
760
|
+
return 0.5
|
|
761
|
+
|
|
762
|
+
return 0.5
|
|
763
|
+
|
|
764
|
+
def _attack_vector(
|
|
765
|
+
self,
|
|
766
|
+
graph: AwsGraph,
|
|
767
|
+
path_assets: list[uuid.UUID],
|
|
768
|
+
) -> str:
|
|
769
|
+
"""Determine attack vector classification."""
|
|
770
|
+
if not path_assets:
|
|
771
|
+
return "unknown"
|
|
772
|
+
|
|
773
|
+
entry = graph.asset(path_assets[0])
|
|
774
|
+
target = graph.asset(path_assets[-1])
|
|
775
|
+
|
|
776
|
+
if not entry or not target:
|
|
777
|
+
return "network"
|
|
778
|
+
|
|
779
|
+
# Classify based on entry and target types
|
|
780
|
+
if entry.asset_type in ["elbv2:load-balancer", "elb:load-balancer"]:
|
|
781
|
+
return "web-to-infrastructure"
|
|
782
|
+
elif entry.asset_type == "cloudfront:distribution":
|
|
783
|
+
return "cdn-pivot"
|
|
784
|
+
elif entry.asset_type == "apigateway:rest-api":
|
|
785
|
+
return "api-exploitation"
|
|
786
|
+
elif entry.asset_type == "ec2:instance":
|
|
787
|
+
return "instance-compromise"
|
|
788
|
+
elif "iam" in entry.asset_type:
|
|
789
|
+
return "privilege-escalation"
|
|
790
|
+
|
|
791
|
+
return "lateral-movement"
|
|
792
|
+
|
|
793
|
+
def _build_proof(
|
|
794
|
+
self,
|
|
795
|
+
graph: AwsGraph,
|
|
796
|
+
path_assets: list[uuid.UUID],
|
|
797
|
+
path_rels: list[uuid.UUID],
|
|
798
|
+
) -> dict:
|
|
799
|
+
"""Build proof chain showing why path exists."""
|
|
800
|
+
steps = []
|
|
801
|
+
|
|
802
|
+
for i, asset_id in enumerate(path_assets):
|
|
803
|
+
asset = graph.asset(asset_id)
|
|
804
|
+
if not asset:
|
|
805
|
+
continue
|
|
806
|
+
|
|
807
|
+
step = {
|
|
808
|
+
"index": i,
|
|
809
|
+
"asset_id": str(asset_id),
|
|
810
|
+
"asset_type": asset.asset_type,
|
|
811
|
+
"name": asset.name,
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
# Add relationship info for non-first steps
|
|
815
|
+
if i > 0 and i - 1 < len(path_rels):
|
|
816
|
+
rels = graph.edges_from(path_assets[i - 1])
|
|
817
|
+
for rel in rels:
|
|
818
|
+
if rel.target_asset_id == asset_id:
|
|
819
|
+
step["via_relationship"] = {
|
|
820
|
+
"type": rel.relationship_type,
|
|
821
|
+
"properties": rel.properties,
|
|
822
|
+
}
|
|
823
|
+
break
|
|
824
|
+
|
|
825
|
+
steps.append(step)
|
|
826
|
+
|
|
827
|
+
return {
|
|
828
|
+
"path_length": len(path_rels),
|
|
829
|
+
"steps": steps,
|
|
830
|
+
}
|