cyntrisec 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. cyntrisec/__init__.py +3 -0
  2. cyntrisec/__main__.py +6 -0
  3. cyntrisec/aws/__init__.py +6 -0
  4. cyntrisec/aws/collectors/__init__.py +17 -0
  5. cyntrisec/aws/collectors/ec2.py +30 -0
  6. cyntrisec/aws/collectors/iam.py +116 -0
  7. cyntrisec/aws/collectors/lambda_.py +45 -0
  8. cyntrisec/aws/collectors/network.py +70 -0
  9. cyntrisec/aws/collectors/rds.py +38 -0
  10. cyntrisec/aws/collectors/s3.py +68 -0
  11. cyntrisec/aws/collectors/usage.py +188 -0
  12. cyntrisec/aws/credentials.py +153 -0
  13. cyntrisec/aws/normalizers/__init__.py +17 -0
  14. cyntrisec/aws/normalizers/ec2.py +115 -0
  15. cyntrisec/aws/normalizers/iam.py +182 -0
  16. cyntrisec/aws/normalizers/lambda_.py +83 -0
  17. cyntrisec/aws/normalizers/network.py +225 -0
  18. cyntrisec/aws/normalizers/rds.py +130 -0
  19. cyntrisec/aws/normalizers/s3.py +184 -0
  20. cyntrisec/aws/relationship_builder.py +1359 -0
  21. cyntrisec/aws/scanner.py +303 -0
  22. cyntrisec/cli/__init__.py +5 -0
  23. cyntrisec/cli/analyze.py +747 -0
  24. cyntrisec/cli/ask.py +412 -0
  25. cyntrisec/cli/can.py +307 -0
  26. cyntrisec/cli/comply.py +226 -0
  27. cyntrisec/cli/cuts.py +231 -0
  28. cyntrisec/cli/diff.py +332 -0
  29. cyntrisec/cli/errors.py +105 -0
  30. cyntrisec/cli/explain.py +348 -0
  31. cyntrisec/cli/main.py +114 -0
  32. cyntrisec/cli/manifest.py +893 -0
  33. cyntrisec/cli/output.py +117 -0
  34. cyntrisec/cli/remediate.py +643 -0
  35. cyntrisec/cli/report.py +462 -0
  36. cyntrisec/cli/scan.py +207 -0
  37. cyntrisec/cli/schemas.py +391 -0
  38. cyntrisec/cli/serve.py +164 -0
  39. cyntrisec/cli/setup.py +260 -0
  40. cyntrisec/cli/validate.py +101 -0
  41. cyntrisec/cli/waste.py +323 -0
  42. cyntrisec/core/__init__.py +31 -0
  43. cyntrisec/core/business_config.py +110 -0
  44. cyntrisec/core/business_logic.py +131 -0
  45. cyntrisec/core/compliance.py +437 -0
  46. cyntrisec/core/cost_estimator.py +301 -0
  47. cyntrisec/core/cuts.py +360 -0
  48. cyntrisec/core/diff.py +361 -0
  49. cyntrisec/core/graph.py +202 -0
  50. cyntrisec/core/paths.py +830 -0
  51. cyntrisec/core/schema.py +317 -0
  52. cyntrisec/core/simulator.py +371 -0
  53. cyntrisec/core/waste.py +309 -0
  54. cyntrisec/mcp/__init__.py +5 -0
  55. cyntrisec/mcp/server.py +862 -0
  56. cyntrisec/storage/__init__.py +7 -0
  57. cyntrisec/storage/filesystem.py +344 -0
  58. cyntrisec/storage/memory.py +113 -0
  59. cyntrisec/storage/protocol.py +92 -0
  60. cyntrisec-0.1.7.dist-info/METADATA +672 -0
  61. cyntrisec-0.1.7.dist-info/RECORD +65 -0
  62. cyntrisec-0.1.7.dist-info/WHEEL +4 -0
  63. cyntrisec-0.1.7.dist-info/entry_points.txt +2 -0
  64. cyntrisec-0.1.7.dist-info/licenses/LICENSE +190 -0
  65. cyntrisec-0.1.7.dist-info/licenses/NOTICE +5 -0
@@ -0,0 +1,830 @@
1
+ """
2
+ Attack Path Finder - Heuristic-based attack path discovery.
3
+
4
+ Finds paths from internet-facing entry points to sensitive targets
5
+ through the capability graph. Uses a priority queue (best-first search)
6
+ to prioritize highest-risk paths.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ import heapq
13
+ import uuid
14
+ from collections import deque
15
+ from dataclasses import dataclass, field
16
+ from decimal import Decimal
17
+
18
+ from cyntrisec.core.graph import AwsGraph
19
+ from cyntrisec.core.schema import (
20
+ INTERNET_ASSET_ID,
21
+ Asset,
22
+ AttackPath,
23
+ ConfidenceLevel,
24
+ EdgeKind,
25
+ Relationship,
26
+ )
27
+
28
+
29
+ @dataclass
30
+ class PathFinderConfig:
31
+ """Configuration for attack path discovery."""
32
+
33
+ max_depth: int = 8
34
+ max_paths: int = 200
35
+ min_risk_score: float = 0.0
36
+ include_unknown: bool = False # Task 11.2: Check UNKNOWN edges
37
+
38
+
39
+ @dataclass(frozen=True)
40
+ class NetworkIdentity:
41
+ """Represents the attacker's network vantage point."""
42
+ security_group_ids: tuple[str, ...] = field(default_factory=tuple)
43
+ vpc_id: str | None = None
44
+ subnet_id: str | None = None
45
+
46
+ def __hash__(self):
47
+ return hash((self.security_group_ids, self.vpc_id, self.subnet_id))
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class AttackerState:
52
+ """
53
+ Represents the attacker's state during graph traversal.
54
+
55
+ Includes:
56
+ - Current origin (where they are)
57
+ - Compromised assets (what they own)
58
+ - Active principals (what roles they can assume)
59
+ - Network identity (security groups, VPC context)
60
+ """
61
+ origin: str # "internet" or asset ID
62
+ compromised_assets: frozenset[str] = field(default_factory=frozenset)
63
+ active_principals: frozenset[str] = field(default_factory=frozenset)
64
+ network_identity: NetworkIdentity = field(default_factory=NetworkIdentity)
65
+
66
+ def state_key(self) -> int:
67
+ """Return a hashable key for visited set tracking."""
68
+ # We track visited states by (current_node, state_key)
69
+ # State key includes principally the identity and capabilities
70
+ return hash((self.active_principals, self.network_identity))
71
+
72
+ @dataclass
73
+ class CandidatePath:
74
+ """
75
+ A raw discovered path from Phase A (Discovery).
76
+ """
77
+ snapshot_id: uuid.UUID
78
+ path_asset_ids: list[uuid.UUID]
79
+ path_relationship_ids: list[uuid.UUID]
80
+ attacker_state: AttackerState
81
+ heuristic_score: float
82
+ context_relationship_ids: list[uuid.UUID] = field(default_factory=list)
83
+
84
+ class PathValidator:
85
+ """
86
+ Phase B: Validates candidate paths and assigns confidence.
87
+ """
88
+
89
+ def validate_path_metadata(self, graph: AwsGraph, candidate: CandidatePath) -> tuple[ConfidenceLevel, str]:
90
+ """Return confidence level and reason."""
91
+ # 1. Network
92
+ net_conf, net_reason = self._check_network_preconditions(graph, candidate)
93
+
94
+ # 2. PassRole
95
+ pass_conf, pass_reason = self._check_passrole_motif(graph, candidate)
96
+
97
+ # Merge
98
+ level = ConfidenceLevel.HIGH
99
+ if net_conf == ConfidenceLevel.LOW or pass_conf == ConfidenceLevel.LOW:
100
+ level = ConfidenceLevel.LOW
101
+ elif net_conf == ConfidenceLevel.MED or pass_conf == ConfidenceLevel.MED:
102
+ level = ConfidenceLevel.MED
103
+
104
+ reasons = []
105
+ if net_reason: reasons.append(net_reason)
106
+ if pass_reason: reasons.append(pass_reason)
107
+ reason_str = "; ".join(reasons)
108
+
109
+ return level, reason_str
110
+
111
+ def _check_network_preconditions(self, graph: AwsGraph, candidate: CandidatePath) -> tuple[ConfidenceLevel, str]:
112
+ """
113
+ Verify CAN_REACH edges in the path.
114
+ """
115
+ reasons = []
116
+ confidence = ConfidenceLevel.HIGH
117
+
118
+ # Iterate through path edges
119
+ for i, rel_id in enumerate(candidate.path_relationship_ids):
120
+ # Access edge data from graph (using ID is slow if we don't have direct lookup, but CandidatePath keeps order)
121
+ # We need to find the edge object.
122
+ # Helper: we know source/target from path_asset_ids[i], [i+1]
123
+ src_id = candidate.path_asset_ids[i]
124
+ tgt_id = candidate.path_asset_ids[i+1]
125
+
126
+ # Find the relationship
127
+ rel = None
128
+ for e in graph.edges_from(src_id):
129
+ if e.id == rel_id:
130
+ rel = e
131
+ break
132
+
133
+ if not rel:
134
+ continue
135
+
136
+ if rel.relationship_type == "CAN_REACH":
137
+ target_asset = graph.asset(tgt_id)
138
+ if not target_asset:
139
+ continue
140
+
141
+ port_range = rel.properties.get("port_range", "")
142
+
143
+ # Check 1: DB Exposure on Web Ports
144
+ # If target is RDS/DB and port is strictly Web (80/443), unlikely to work directly
145
+ is_db = target_asset.asset_type in ["rds:db-instance", "dynamodb:table", "redshift:cluster"]
146
+ is_web_port = port_range in ["80-80", "443-443"]
147
+
148
+ if is_db and is_web_port:
149
+ confidence = ConfidenceLevel.LOW
150
+ reasons.append(f"Unlikely database access via web ports ({port_range}) to {target_asset.name}")
151
+
152
+ reason_str = "; ".join(reasons)
153
+ return confidence, reason_str
154
+
155
+ def _check_passrole_motif(self, graph: AwsGraph, candidate: CandidatePath) -> tuple[ConfidenceLevel, str]:
156
+ """
157
+ Verify iam:PassRole usage.
158
+ """
159
+ reasons = []
160
+ confidence = ConfidenceLevel.HIGH # Start high, downgrade if PassRole found without trigger
161
+
162
+ for i, rel_id in enumerate(candidate.path_relationship_ids):
163
+ src_id = candidate.path_asset_ids[i]
164
+
165
+ # Find the relationship
166
+ rel = None
167
+ for e in graph.edges_from(src_id):
168
+ if e.id == rel_id:
169
+ rel = e
170
+ break
171
+
172
+ if not rel:
173
+ continue
174
+
175
+ if rel.relationship_type == "CAN_PASS_TO":
176
+ # Motif found: Source -> (PassRole) -> TargetRole
177
+ # Check for execution permission (trigger) at Source
178
+ source_asset = graph.asset(src_id)
179
+ if not source_asset:
180
+ continue
181
+
182
+ # If Source is Admin/Power user, assume they have trigger
183
+ # Using name heuristic or is_sensitive_target (if accurate)
184
+ is_admin = False
185
+ name_lower = source_asset.name.lower()
186
+ if "admin" in name_lower or "root" in name_lower:
187
+ is_admin = True
188
+
189
+ if not is_admin:
190
+ # Downgrade to MED as we can't verify trigger (e.g. lambda:CreateFunction)
191
+ # We don't have edges for it, and properties parsing is complex here.
192
+ if confidence == ConfidenceLevel.HIGH:
193
+ confidence = ConfidenceLevel.MED
194
+ reasons.append(f"PassRole found at {source_asset.name}, but execution permission (e.g. CreateFunction) unverified")
195
+
196
+ reason_str = "; ".join(reasons)
197
+ return confidence, reason_str
198
+
199
+
200
+ class PathScorer:
201
+ """
202
+ Scores attack paths based on edge weights and confidence modifiers.
203
+
204
+ Risk Score = Entry Confidence * Exploitability * Impact
205
+
206
+ Where:
207
+ - Entry Confidence: Likelihood of attacker reaching start (0-1)
208
+ - Exploitability: Difficulty of traversing path (0-1)
209
+ - Derived from Path Weight (sum of edge weights)
210
+ - Longer/Harder paths = Lower exploitability
211
+ - Impact: Value of target (0-1)
212
+ """
213
+
214
+ # Base weights: Lower is easier to traverse
215
+ EDGE_WEIGHTS = {
216
+ # IAM Privilege Escalation (Very Easy)
217
+ "CAN_ASSUME": 0.1,
218
+ "CAN_PASS_TO": 0.2, # Requires trigger
219
+
220
+ # IAM Data Access (Easy)
221
+ "MAY_READ": 0.3,
222
+ "MAY_WRITE": 0.3,
223
+ "MAY_READ_S3_OBJECT": 0.3,
224
+
225
+ # Network Reachability (Medium - requires exploit/creds)
226
+ "CAN_REACH": 0.5,
227
+
228
+ # Default
229
+ "default": 1.0
230
+ }
231
+
232
+ CONFIDENCE_MULTIPLIERS = {
233
+ ConfidenceLevel.HIGH: 1.0,
234
+ ConfidenceLevel.MED: 0.6,
235
+ ConfidenceLevel.LOW: 0.2
236
+ }
237
+
238
+ def score_path(
239
+ self,
240
+ graph: AwsGraph,
241
+ path_assets: list[uuid.UUID],
242
+ path_rels: list[uuid.UUID],
243
+ entry_confidence: float,
244
+ target_impact: float,
245
+ confidence_level: ConfidenceLevel = ConfidenceLevel.HIGH
246
+ ) -> tuple[float, float]:
247
+ """
248
+ Calculate (risk_score, exploitability_score).
249
+ """
250
+ # Calculate total path weight based on edges
251
+ total_weight = 0.0
252
+
253
+ for i, rel_id in enumerate(path_rels):
254
+ src_id = path_assets[i]
255
+ # Find edge
256
+ rel = None
257
+ for e in graph.edges_from(src_id):
258
+ if e.id == rel_id:
259
+ rel = e
260
+ break
261
+
262
+ weight = 1.0
263
+ if rel:
264
+ weight = self.EDGE_WEIGHTS.get(rel.relationship_type, self.EDGE_WEIGHTS["default"])
265
+
266
+ total_weight += weight
267
+
268
+ # Exploitability formula: Decay based on difficulty
269
+ # e.g. 1.0 / (1.0 + weight) or similar sigmoid.
270
+ # Let's use linear decay with floor.
271
+ # Max reasonable weight ~ 5.0 (10 hops of 0.5).
272
+ exploitability = max(0.01, 1.0 - (total_weight * 0.15))
273
+
274
+ # Apply Confidence Penalty to Exploitability?
275
+ # No, confidence penalizes the final Risk Score directly (uncertainty).
276
+ conf_mult = self.CONFIDENCE_MULTIPLIERS.get(confidence_level, 0.2)
277
+
278
+ risk_score = entry_confidence * exploitability * target_impact * conf_mult
279
+
280
+ return float(risk_score), float(exploitability)
281
+
282
+ def score_edge(self, relationship_type: str) -> float:
283
+ """Get weight for a single edge type."""
284
+ return self.EDGE_WEIGHTS.get(relationship_type, self.EDGE_WEIGHTS["default"])
285
+
286
+
287
+
288
+ class PathFinder:
289
+ """
290
+ Discovers attack paths through the capability graph.
291
+
292
+ Uses Best-First Search (Priority Queue) to find highest-risk paths first.
293
+
294
+ Risk Heuristic:
295
+ - Prioritizes paths starting from high-confidence entry points.
296
+ - Penalizes length (shorter paths = higher exploitability).
297
+ """
298
+
299
+ def __init__(self, config: PathFinderConfig | None = None):
300
+ self._config = config or PathFinderConfig()
301
+ self._scorer = PathScorer()
302
+
303
+ def find_paths(
304
+ self,
305
+ graph: AwsGraph,
306
+ snapshot_id: uuid.UUID,
307
+ ) -> list[AttackPath]:
308
+ """
309
+ Find all attack paths in the graph using Two-Phase Discovery.
310
+ """
311
+ # Phase A: Discovery
312
+ candidates = self._discover_candidate_paths(graph, snapshot_id)
313
+
314
+ # Phase B: Validation
315
+ validator = PathValidator()
316
+ results = []
317
+
318
+ for candidate in candidates:
319
+ # Validate
320
+ confidence, reason = validator.validate_path_metadata(graph, candidate)
321
+
322
+ # Build final object
323
+ attack_path = self._create_path(
324
+ graph=graph,
325
+ snapshot_id=snapshot_id,
326
+ path_assets=candidate.path_asset_ids,
327
+ path_rels=candidate.path_relationship_ids,
328
+ context_rels=candidate.context_relationship_ids,
329
+ confidence_level=confidence,
330
+ confidence_reason=reason
331
+ )
332
+
333
+ # Filter low risk (already done in discovery mostly, but good to re-check if scoring changes)
334
+ if float(attack_path.risk_score) >= self._config.min_risk_score:
335
+ results.append(attack_path)
336
+
337
+ # Sort by Risk Score Descending (Task 10.3)
338
+ results.sort(key=lambda p: p.risk_score, reverse=True)
339
+
340
+ return results
341
+
342
+ def _discover_candidate_paths(
343
+ self,
344
+ graph: AwsGraph,
345
+ snapshot_id: uuid.UUID,
346
+ ) -> list[CandidatePath]:
347
+ """
348
+ Phase A: Discover potential attack paths using k-best search.
349
+ """
350
+ entry_points = graph.entry_points()
351
+ targets = {t.id: t for t in graph.sensitive_targets()}
352
+
353
+ if not entry_points or not targets:
354
+ return []
355
+
356
+ # Priority Queue: (-heuristic_score, path_len, current_id, path_assets, path_rels, attacker_state)
357
+ queue = []
358
+ for entry in entry_points:
359
+ # Initial state
360
+ # If entering via 0.0.0.0/0, origin is internet
361
+ initial_state = self._initialize_state_for_entry_point(graph, entry)
362
+
363
+ # Initial score based on entry confidence alone (length=1)
364
+ score = self._calculate_heuristic(graph, entry, 1)
365
+
366
+ # Use negative score for max-heap behavior
367
+ heapq.heappush(queue, (-score, 1, entry.id, [entry.id], [], initial_state))
368
+
369
+ found_candidates: list[CandidatePath] = []
370
+ visited_states: set[tuple[uuid.UUID, int]] = set()
371
+
372
+ # Limit visits per node to prevent explosion while finding alternative paths
373
+ # (asset_id -> visit_count)
374
+ node_visits: dict[uuid.UUID, int] = {}
375
+ MAX_VISITS_PER_NODE = 10
376
+
377
+ while queue and len(found_candidates) < self._config.max_paths:
378
+ neg_score, length, current_id, path_assets, path_rels, state = heapq.heappop(queue)
379
+
380
+ # Pruning
381
+ if length >= self._config.max_depth:
382
+ continue
383
+
384
+ # State-aware visited check
385
+ state_key = state.state_key()
386
+ if (current_id, state_key) in visited_states:
387
+ continue
388
+ visited_states.add((current_id, state_key))
389
+
390
+ # Count visits (soft limit to prevent infinite variations)
391
+ node_visits[current_id] = node_visits.get(current_id, 0) + 1
392
+ if node_visits[current_id] > MAX_VISITS_PER_NODE:
393
+ continue
394
+
395
+ # Check if we reached a target
396
+ if current_id in targets:
397
+ # We found a path!
398
+
399
+ # Context edges (structural)
400
+ context_rels = self._collect_context_edges(graph, path_assets)
401
+
402
+ candidate = CandidatePath(
403
+ snapshot_id=snapshot_id,
404
+ path_asset_ids=path_assets,
405
+ path_relationship_ids=path_rels,
406
+ attacker_state=state,
407
+ heuristic_score=-neg_score,
408
+ context_relationship_ids=context_rels
409
+ )
410
+ found_candidates.append(candidate)
411
+
412
+ # Expand neighbors
413
+ for rel in graph.edges_from(current_id):
414
+ # 7.2 Filter by edge_kind (Capability + Unknown if flag set)
415
+ # Task 11.2: Handle UNKNOWN edges
416
+ is_capability = rel.edge_kind == EdgeKind.CAPABILITY
417
+ is_unknown = rel.edge_kind == EdgeKind.UNKNOWN
418
+
419
+ allow_unknown = self._config.include_unknown
420
+
421
+ if not (is_capability or (is_unknown and allow_unknown)):
422
+ continue
423
+
424
+ next_id = rel.target_asset_id
425
+
426
+ # Cycle prevention
427
+ if next_id in path_assets:
428
+ continue
429
+
430
+ # 7.5 Precondition checking
431
+ if not self._check_preconditions(graph, rel, state):
432
+ continue
433
+
434
+ # 7.3 Update attacker state
435
+ next_state = self._update_attacker_state(graph, rel, next_id, state)
436
+
437
+ new_assets = path_assets + [next_id]
438
+ new_rels = path_rels + [rel.id]
439
+ new_len = length + 1
440
+
441
+ # Heuristic
442
+ entry_asset = graph.asset(path_assets[0])
443
+ new_score = self._calculate_heuristic(graph, entry_asset, new_len)
444
+
445
+ heapq.heappush(queue, (-new_score, new_len, next_id, new_assets, new_rels, next_state))
446
+
447
+ return found_candidates
448
+
449
+ def _initialize_state_for_entry_point(self, graph: AwsGraph, entry: Asset) -> AttackerState:
450
+ """Initialize state for an entry point."""
451
+ # Check if we have network identity from the entry point
452
+ identity = self._get_network_identity(entry)
453
+
454
+ return AttackerState(
455
+ origin="internet",
456
+ compromised_assets=frozenset([str(entry.id)]),
457
+ network_identity=identity
458
+ )
459
+
460
+ def _get_network_identity(self, asset: Asset) -> NetworkIdentity:
461
+ """Extract network identity from an asset."""
462
+ sg_ids = tuple(sorted(asset.properties.get("security_groups", [])))
463
+ vpc_id = asset.properties.get("vpc_id")
464
+ subnet_id = asset.properties.get("subnet_id")
465
+ return NetworkIdentity(
466
+ security_group_ids=sg_ids,
467
+ vpc_id=vpc_id,
468
+ subnet_id=subnet_id
469
+ )
470
+
471
+ def _update_attacker_state(
472
+ self,
473
+ graph: AwsGraph,
474
+ rel: Relationship,
475
+ next_id: uuid.UUID,
476
+ current_state: AttackerState
477
+ ) -> AttackerState:
478
+ """
479
+ Update attacker state after traversing an edge.
480
+
481
+ Updates:
482
+ - Compromised assets (adds new asset)
483
+ - Active principals (if assuming role)
484
+ - Network identity (if moving to compute resource)
485
+ """
486
+ next_asset = graph.asset(next_id)
487
+ if not next_asset:
488
+ return current_state
489
+
490
+ new_compromised = set(current_state.compromised_assets)
491
+ new_compromised.add(str(next_id))
492
+
493
+ new_principals = set(current_state.active_principals)
494
+ # If we assumed a role, add it to active principals
495
+ if rel.relationship_type == "CAN_ASSUME":
496
+ new_principals.add(str(next_id))
497
+
498
+ # If we moved to a compute resource, update network identity
499
+ # (e.g. pivoting to an EC2 instance or Lambda)
500
+ new_identity = current_state.network_identity
501
+ if next_asset.asset_type in ["ec2:instance", "lambda:function"]:
502
+ new_identity = self._get_network_identity(next_asset)
503
+
504
+ return AttackerState(
505
+ origin=current_state.origin,
506
+ compromised_assets=frozenset(new_compromised),
507
+ active_principals=frozenset(new_principals),
508
+ network_identity=new_identity
509
+ )
510
+
511
+ def _check_preconditions(
512
+ self,
513
+ graph: AwsGraph,
514
+ rel: Relationship,
515
+ state: AttackerState
516
+ ) -> bool:
517
+ """
518
+ Check if edge can be traversed given current state.
519
+
520
+ Enforces:
521
+ - Network reachability (CAN_REACH source must match current identity)
522
+ """
523
+ if rel.relationship_type == "CAN_REACH":
524
+ # For CAN_REACH edges, the source property specifies allowed origin.
525
+ # But the edge is typically SourceSG -> TargetInstance.
526
+ # If we are traversing this edge, we are at SourceSG (logical) or we match it?
527
+
528
+ # The edge is Source -> Target.
529
+ # If Source is a Security Group, we must "have" that SG in our identity.
530
+ source_asset = graph.asset(rel.source_asset_id)
531
+ if not source_asset:
532
+ return False
533
+
534
+ if source_asset.asset_type == "ec2:security-group":
535
+ # We can only traverse this edge if we originate from this SG
536
+ if source_asset.aws_resource_id not in state.network_identity.security_group_ids:
537
+ return False
538
+
539
+ elif source_asset.asset_type == "ec2:subnet":
540
+ # Only traverse if we are in this subnet
541
+ if state.network_identity.subnet_id != source_asset.aws_resource_id:
542
+ return False
543
+
544
+ return True
545
+
546
+ def _collect_context_edges(self, graph: AwsGraph, path_assets: list[uuid.UUID]) -> list[uuid.UUID]:
547
+ """Collect structural edges relevant to the path."""
548
+ context_ids = []
549
+ for asset_id in path_assets:
550
+ # Get Structural edges from/to this asset
551
+ for rel in graph.edges_to(asset_id):
552
+ if rel.edge_kind == EdgeKind.STRUCTURAL:
553
+ context_ids.append(rel.id)
554
+ for rel in graph.edges_from(asset_id):
555
+ if rel.edge_kind == EdgeKind.STRUCTURAL:
556
+ context_ids.append(rel.id)
557
+ return list(set(context_ids))
558
+
559
+ def find_paths_between(
560
+ self,
561
+ graph: AwsGraph,
562
+ source_id: uuid.UUID,
563
+ target_id: uuid.UUID,
564
+ max_depth: int = 5,
565
+ ) -> list[list[uuid.UUID]]:
566
+ """
567
+ Find paths between two specific assets (for Business Logic).
568
+ Returns list of asset_id lists.
569
+ """
570
+ # Simple BFS is usually fine for connectivity checks
571
+ paths = []
572
+ queue = deque([(source_id, [source_id])])
573
+ visited_hashes = set()
574
+
575
+ while queue and len(paths) < 10:
576
+ curr, path = queue.popleft()
577
+ if curr == target_id:
578
+ paths.append(path)
579
+ continue
580
+
581
+ if len(path) >= max_depth:
582
+ continue
583
+
584
+ for rel in graph.edges_from(curr):
585
+ nxt = rel.target_asset_id
586
+ if nxt not in path:
587
+ new_path = path + [nxt]
588
+ ph = self._hash_path(new_path)
589
+ if ph not in visited_hashes:
590
+ visited_hashes.add(ph)
591
+ queue.append((nxt, new_path))
592
+ return paths
593
+
594
+ def _calculate_heuristic(self, graph: AwsGraph, entry_asset: Asset | None, length: int) -> float:
595
+ """
596
+ Calculate heuristic score for best-first search.
597
+ Higher is better (higher risk).
598
+ """
599
+ entry_conf = self._entry_confidence(graph, entry_asset)
600
+ exploitability = self._exploitability(length)
601
+ # We assume potential impact is 1.0 (unknown) during traversal
602
+ return entry_conf * exploitability
603
+
604
+ def _hash_path(self, path_assets: list[uuid.UUID]) -> str:
605
+ """Create a unique hash for a path."""
606
+ path_str = "|".join(str(a) for a in path_assets)
607
+ return hashlib.sha256(path_str.encode()).hexdigest()
608
+
609
+ def _create_path(
610
+ self,
611
+ *,
612
+ graph: AwsGraph,
613
+ snapshot_id: uuid.UUID,
614
+ path_assets: list[uuid.UUID],
615
+ path_rels: list[uuid.UUID],
616
+ context_rels: list[uuid.UUID] | None = None,
617
+ confidence_level: ConfidenceLevel = ConfidenceLevel.HIGH,
618
+ confidence_reason: str = "",
619
+ ) -> AttackPath:
620
+ """Create an AttackPath from discovered path."""
621
+ entry = graph.asset(path_assets[0])
622
+ target = graph.asset(path_assets[-1])
623
+
624
+ # Calculate scores using PathScorer (Task 10)
625
+ entry_confidence = self._entry_confidence(graph, entry)
626
+ impact = self._impact_score(target)
627
+
628
+ risk, exploitability = self._scorer.score_path(
629
+ graph=graph,
630
+ path_assets=path_assets,
631
+ path_rels=path_rels,
632
+ entry_confidence=entry_confidence,
633
+ target_impact=impact,
634
+ confidence_level=confidence_level
635
+ )
636
+
637
+ # Determine attack vector
638
+ vector = self._attack_vector(graph, path_assets)
639
+
640
+ # Build proof chain
641
+ proof = self._build_proof(graph, path_assets, path_rels)
642
+
643
+ return AttackPath(
644
+ snapshot_id=snapshot_id,
645
+ source_asset_id=path_assets[0],
646
+ target_asset_id=path_assets[-1],
647
+ path_asset_ids=path_assets,
648
+ path_relationship_ids=path_rels,
649
+ attack_chain_relationship_ids=path_rels, # During discovery, all are capability
650
+ context_relationship_ids=context_rels or [],
651
+ attack_vector=vector,
652
+ path_length=len(path_rels),
653
+ entry_confidence=Decimal(str(round(entry_confidence, 4))),
654
+ exploitability_score=Decimal(str(round(exploitability, 4))),
655
+ impact_score=Decimal(str(round(impact, 4))),
656
+ risk_score=Decimal(str(round(risk, 4))),
657
+ confidence_level=confidence_level,
658
+ confidence_reason=confidence_reason,
659
+ proof=proof,
660
+ )
661
+
662
+ def _entry_confidence(self, graph: AwsGraph, asset: Asset | None) -> float:
663
+ """Calculate entry point accessibility (0-1)."""
664
+ if not asset:
665
+ return 0.5
666
+
667
+ base_score = 0.5
668
+
669
+ # Check CAN_REACH edges from Internet
670
+ internet_edges = []
671
+ if INTERNET_ASSET_ID in graph.outgoing:
672
+ for rel in graph.outgoing[INTERNET_ASSET_ID]:
673
+ if rel.target_asset_id == asset.id and rel.relationship_type == "CAN_REACH":
674
+ internet_edges.append(rel)
675
+
676
+ if internet_edges:
677
+ # Task 6.4: Set entry_confidence based on port category
678
+ best_edge_score = 0.0
679
+
680
+ for edge in internet_edges:
681
+ port_range = edge.properties.get("port_range", "")
682
+
683
+ # Determine port category score
684
+ if self._is_web_port(port_range):
685
+ score = 0.9 # web
686
+ elif self._is_admin_port(port_range):
687
+ score = 0.7 # admin
688
+ elif self._is_db_port(port_range):
689
+ score = 0.6 # db
690
+ else:
691
+ score = 0.8 # other/high ports often risky
692
+
693
+ # Adjust by asset type
694
+ if asset.asset_type in ["elbv2:load-balancer", "elb:load-balancer"]:
695
+ score += 0.05
696
+
697
+ # Adjust by rule specificity
698
+ # If explicit CIDR was used but it's 0.0.0.0/0 (implied by being internet edge here), +0.0
699
+ # If restricted CIDR (but still internet reachable? e.g. large public block), -0.1
700
+ # Since we only create CAN_REACH from world for 0.0.0.0/0, it is open world.
701
+ # So +0.0
702
+
703
+ if score > best_edge_score:
704
+ best_edge_score = score
705
+
706
+ return min(1.0, best_edge_score)
707
+
708
+ # Fallback for legacy assets without CAN_REACH edges
709
+ # Higher confidence for clearly public resources
710
+ if asset.asset_type == "ec2:instance":
711
+ if asset.properties.get("public_ip"):
712
+ return 0.9
713
+ elif asset.asset_type in ["elbv2:load-balancer", "elb:load-balancer"]:
714
+ if asset.properties.get("scheme") == "internet-facing":
715
+ return 0.85
716
+ elif asset.asset_type == "cloudfront:distribution":
717
+ return 0.8
718
+ elif asset.asset_type == "apigateway:rest-api":
719
+ return 0.75
720
+
721
+ return 0.5
722
+
723
+ def _is_web_port(self, port_range: str) -> bool:
724
+ return port_range in ["80-80", "443-443", "8080-8080", "8443-8443"]
725
+
726
+ def _is_admin_port(self, port_range: str) -> bool:
727
+ return port_range in ["22-22", "3389-3389", "5985-5985", "5986-5986"]
728
+
729
+ def _is_db_port(self, port_range: str) -> bool:
730
+ return port_range in ["3306-3306", "5432-5432", "1433-1433", "27017-27017"]
731
+
732
+ def _exploitability(self, path_length: int) -> float:
733
+ """Calculate exploitability based on path length (Legacy/Fallback)."""
734
+ # Longer paths are harder to exploit
735
+ return max(0.1, 1.0 - (path_length * 0.1))
736
+
737
+ def _impact_score(self, asset: Asset | None) -> float:
738
+ """Calculate impact score of reaching the target (0-1)."""
739
+ if not asset:
740
+ return 0.5
741
+
742
+ # High-value targets
743
+ if asset.asset_type in ["rds:db-instance", "dynamodb:table"]:
744
+ return 0.9
745
+ elif asset.asset_type in ["secretsmanager:secret", "ssm:parameter"]:
746
+ name_lower = asset.name.lower()
747
+ if any(kw in name_lower for kw in ["prod", "secret", "key", "password"]):
748
+ return 1.0
749
+ return 0.85
750
+ elif asset.asset_type == "iam:role":
751
+ # Roles can be impacts if they are Admin
752
+ name_lower = asset.name.lower()
753
+ if any(kw in name_lower for kw in ["admin", "root"]):
754
+ return 0.95
755
+ return 0.6
756
+ elif asset.asset_type == "s3:bucket":
757
+ name_lower = asset.name.lower()
758
+ if any(kw in name_lower for kw in ["backup", "secret", "credential"]):
759
+ return 0.9
760
+ return 0.5
761
+
762
+ return 0.5
763
+
764
+ def _attack_vector(
765
+ self,
766
+ graph: AwsGraph,
767
+ path_assets: list[uuid.UUID],
768
+ ) -> str:
769
+ """Determine attack vector classification."""
770
+ if not path_assets:
771
+ return "unknown"
772
+
773
+ entry = graph.asset(path_assets[0])
774
+ target = graph.asset(path_assets[-1])
775
+
776
+ if not entry or not target:
777
+ return "network"
778
+
779
+ # Classify based on entry and target types
780
+ if entry.asset_type in ["elbv2:load-balancer", "elb:load-balancer"]:
781
+ return "web-to-infrastructure"
782
+ elif entry.asset_type == "cloudfront:distribution":
783
+ return "cdn-pivot"
784
+ elif entry.asset_type == "apigateway:rest-api":
785
+ return "api-exploitation"
786
+ elif entry.asset_type == "ec2:instance":
787
+ return "instance-compromise"
788
+ elif "iam" in entry.asset_type:
789
+ return "privilege-escalation"
790
+
791
+ return "lateral-movement"
792
+
793
+ def _build_proof(
794
+ self,
795
+ graph: AwsGraph,
796
+ path_assets: list[uuid.UUID],
797
+ path_rels: list[uuid.UUID],
798
+ ) -> dict:
799
+ """Build proof chain showing why path exists."""
800
+ steps = []
801
+
802
+ for i, asset_id in enumerate(path_assets):
803
+ asset = graph.asset(asset_id)
804
+ if not asset:
805
+ continue
806
+
807
+ step = {
808
+ "index": i,
809
+ "asset_id": str(asset_id),
810
+ "asset_type": asset.asset_type,
811
+ "name": asset.name,
812
+ }
813
+
814
+ # Add relationship info for non-first steps
815
+ if i > 0 and i - 1 < len(path_rels):
816
+ rels = graph.edges_from(path_assets[i - 1])
817
+ for rel in rels:
818
+ if rel.target_asset_id == asset_id:
819
+ step["via_relationship"] = {
820
+ "type": rel.relationship_type,
821
+ "properties": rel.properties,
822
+ }
823
+ break
824
+
825
+ steps.append(step)
826
+
827
+ return {
828
+ "path_length": len(path_rels),
829
+ "steps": steps,
830
+ }