cartography 0.107.0rc3__py3-none-any.whl → 0.108.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (39) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +10 -0
  3. cartography/config.py +5 -0
  4. cartography/data/indexes.cypher +0 -8
  5. cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
  6. cartography/intel/aws/__init__.py +1 -0
  7. cartography/intel/aws/cloudtrail.py +17 -4
  8. cartography/intel/aws/cloudtrail_management_events.py +560 -16
  9. cartography/intel/aws/cloudwatch.py +73 -4
  10. cartography/intel/aws/ec2/security_groups.py +140 -122
  11. cartography/intel/aws/ec2/snapshots.py +47 -84
  12. cartography/intel/aws/ec2/subnets.py +37 -63
  13. cartography/intel/aws/elasticache.py +102 -79
  14. cartography/intel/aws/guardduty.py +275 -0
  15. cartography/intel/aws/resources.py +2 -0
  16. cartography/intel/github/repos.py +370 -28
  17. cartography/models/aws/cloudtrail/management_events.py +95 -6
  18. cartography/models/aws/cloudtrail/trail.py +21 -0
  19. cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
  20. cartography/models/aws/ec2/security_group_rules.py +109 -0
  21. cartography/models/aws/ec2/security_groups.py +90 -0
  22. cartography/models/aws/ec2/snapshots.py +58 -0
  23. cartography/models/aws/ec2/subnets.py +65 -0
  24. cartography/models/aws/ec2/volumes.py +20 -0
  25. cartography/models/aws/elasticache/__init__.py +0 -0
  26. cartography/models/aws/elasticache/cluster.py +65 -0
  27. cartography/models/aws/elasticache/topic.py +67 -0
  28. cartography/models/aws/guardduty/__init__.py +1 -0
  29. cartography/models/aws/guardduty/findings.py +102 -0
  30. cartography/models/github/dependencies.py +74 -0
  31. cartography/models/github/manifests.py +49 -0
  32. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/METADATA +3 -3
  33. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/RECORD +37 -26
  34. cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
  35. cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
  36. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/WHEEL +0 -0
  37. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/entry_points.txt +0 -0
  38. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/licenses/LICENSE +0 -0
  39. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,12 @@ from cartography.client.core.tx import load_matchlinks
13
13
  from cartography.graph.job import GraphJob
14
14
  from cartography.intel.aws.ec2.util import get_botocore_config
15
15
  from cartography.models.aws.cloudtrail.management_events import AssumedRoleMatchLink
16
+ from cartography.models.aws.cloudtrail.management_events import (
17
+ AssumedRoleWithSAMLMatchLink,
18
+ )
19
+ from cartography.models.aws.cloudtrail.management_events import (
20
+ GitHubRepoAssumeRoleWithWebIdentityMatchLink,
21
+ )
16
22
  from cartography.util import aws_handle_regions
17
23
  from cartography.util import timeit
18
24
 
@@ -74,11 +80,123 @@ def get_assume_role_events(
74
80
  return all_events
75
81
 
76
82
 
83
+ @timeit
84
+ @aws_handle_regions
85
+ def get_saml_role_events(
86
+ boto3_session: boto3.Session, region: str, lookback_hours: int
87
+ ) -> List[Dict[str, Any]]:
88
+ """
89
+ Fetch CloudTrail AssumeRoleWithSAML events from the specified time period.
90
+
91
+ Focuses specifically on SAML-based role assumption events.
92
+
93
+ :type boto3_session: boto3.Session
94
+ :param boto3_session: The boto3 session to use for API calls
95
+ :type region: str
96
+ :param region: The AWS region to fetch events from
97
+ :type lookback_hours: int
98
+ :param lookback_hours: Number of hours back to retrieve events from
99
+ :rtype: List[Dict[str, Any]]
100
+ :return: List of CloudTrail AssumeRoleWithSAML events
101
+ """
102
+ client = boto3_session.client(
103
+ "cloudtrail", region_name=region, config=get_botocore_config()
104
+ )
105
+
106
+ # Calculate time range
107
+ end_time = datetime.utcnow()
108
+ start_time = end_time - timedelta(hours=lookback_hours)
109
+
110
+ logger.info(
111
+ f"Fetching CloudTrail AssumeRoleWithSAML events for region '{region}' "
112
+ f"from {start_time} to {end_time} ({lookback_hours} hours)"
113
+ )
114
+
115
+ paginator = client.get_paginator("lookup_events")
116
+
117
+ page_iterator = paginator.paginate(
118
+ LookupAttributes=[
119
+ {"AttributeKey": "EventName", "AttributeValue": "AssumeRoleWithSAML"}
120
+ ],
121
+ StartTime=start_time,
122
+ EndTime=end_time,
123
+ PaginationConfig={
124
+ "MaxItems": 10000, # Reasonable limit to prevent excessive API calls
125
+ "PageSize": 50, # CloudTrail API limit per page
126
+ },
127
+ )
128
+
129
+ all_events = []
130
+ for page in page_iterator:
131
+ all_events.extend(page.get("Events", []))
132
+
133
+ logger.info(
134
+ f"Retrieved {len(all_events)} AssumeRoleWithSAML events from region '{region}'"
135
+ )
136
+
137
+ return all_events
138
+
139
+
140
+ @timeit
141
+ @aws_handle_regions
142
+ def get_web_identity_role_events(
143
+ boto3_session: boto3.Session, region: str, lookback_hours: int
144
+ ) -> List[Dict[str, Any]]:
145
+ """
146
+ Fetch CloudTrail AssumeRoleWithWebIdentity events from the specified time period.
147
+
148
+ Focuses specifically on WebIdentity-based role assumption events.
149
+
150
+ :type boto3_session: boto3.Session
151
+ :param boto3_session: The boto3 session to use for API calls
152
+ :type region: str
153
+ :param region: The AWS region to fetch events from
154
+ :type lookback_hours: int
155
+ :param lookback_hours: Number of hours back to retrieve events from
156
+ :rtype: List[Dict[str, Any]]
157
+ :return: List of CloudTrail AssumeRoleWithWebIdentity events
158
+ """
159
+ client = boto3_session.client(
160
+ "cloudtrail", region_name=region, config=get_botocore_config()
161
+ )
162
+
163
+ # Calculate time range
164
+ end_time = datetime.utcnow()
165
+ start_time = end_time - timedelta(hours=lookback_hours)
166
+
167
+ logger.info(
168
+ f"Fetching CloudTrail AssumeRoleWithWebIdentity events for region '{region}' "
169
+ f"from {start_time} to {end_time} ({lookback_hours} hours)"
170
+ )
171
+
172
+ paginator = client.get_paginator("lookup_events")
173
+
174
+ page_iterator = paginator.paginate(
175
+ LookupAttributes=[
176
+ {"AttributeKey": "EventName", "AttributeValue": "AssumeRoleWithWebIdentity"}
177
+ ],
178
+ StartTime=start_time,
179
+ EndTime=end_time,
180
+ PaginationConfig={
181
+ "MaxItems": 10000, # Reasonable limit to prevent excessive API calls
182
+ "PageSize": 50, # CloudTrail API limit per page
183
+ },
184
+ )
185
+
186
+ all_events = []
187
+ for page in page_iterator:
188
+ all_events.extend(page.get("Events", []))
189
+
190
+ logger.info(
191
+ f"Retrieved {len(all_events)} AssumeRoleWithWebIdentity events from region '{region}'"
192
+ )
193
+
194
+ return all_events
195
+
196
+
77
197
  @timeit
78
198
  def transform_assume_role_events_to_role_assumptions(
79
199
  events: List[Dict[str, Any]],
80
- region: str,
81
- current_aws_account_id: str,
82
200
  ) -> List[Dict[str, Any]]:
83
201
  """
84
202
  Transform raw CloudTrail AssumeRole events into aggregated role assumption relationships.
@@ -93,16 +211,12 @@ def transform_assume_role_events_to_role_assumptions(
93
211
 
94
212
  :type events: List[Dict[str, Any]]
95
213
  :param events: List of raw CloudTrail AssumeRole events from lookup_events API
96
- :type region: str
97
- :param region: The AWS region where events were retrieved from
98
- :type current_aws_account_id: str
99
- :param current_aws_account_id: The AWS account ID being synced
100
214
  :rtype: List[Dict[str, Any]]
101
215
  :return: List of aggregated role assumption relationships ready for loading
102
216
  """
103
217
  aggregated: Dict[tuple, Dict[str, Any]] = {}
104
218
  logger.info(
105
- f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions for region '{region}'"
219
+ f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions"
106
220
  )
107
221
 
108
222
  for event in events:
@@ -132,7 +246,6 @@ def transform_assume_role_events_to_role_assumptions(
132
246
 
133
247
  if key in aggregated:
134
248
  aggregated[key]["times_used"] += 1
135
- aggregated[key]["assume_role_count"] += 1 # All events are AssumeRole
136
249
  # Handle None values safely for time comparisons
137
250
  if event_time:
138
251
  existing_first = aggregated[key]["first_seen_in_time_window"]
@@ -149,15 +262,166 @@ def transform_assume_role_events_to_role_assumptions(
149
262
  "times_used": 1,
150
263
  "first_seen_in_time_window": event_time,
151
264
  "last_used": event_time,
152
- "event_types": ["AssumeRole"],
153
- "assume_role_count": 1,
154
- "saml_count": 0,
155
- "web_identity_count": 0,
156
265
  }
157
266
 
158
267
  return list(aggregated.values())
159
268
 
160
269
 
270
+ @timeit
271
+ def transform_saml_role_events_to_role_assumptions(
272
+ events: List[Dict[str, Any]],
273
+ ) -> List[Dict[str, Any]]:
274
+ """
275
+ Transform raw CloudTrail AssumeRoleWithSAML events into aggregated role assumption relationships.
276
+
277
+ Focuses specifically on SAML-based role assumption events, providing optimized processing
278
+ for federated identity scenarios.
279
+
280
+ This function performs the complete transformation pipeline:
281
+ 1. Extract role assumption events from CloudTrail AssumeRoleWithSAML data
282
+ 2. Aggregate events by (source_principal, destination_principal) pairs
283
+ 3. Return aggregated relationships ready for loading
284
+
285
+ :type events: List[Dict[str, Any]]
286
+ :param events: List of raw CloudTrail AssumeRoleWithSAML events from lookup_events API
287
+ :rtype: List[Dict[str, Any]]
288
+ :return: List of aggregated SAML role assumption relationships ready for loading.
289
+ Each dict contains keys: source_principal_arn, destination_principal_arn,
290
+ times_used, first_seen_in_time_window, last_used
291
+ """
292
+ aggregated: Dict[tuple, Dict[str, Any]] = {}
293
+ logger.info(
294
+ f"Transforming {len(events)} CloudTrail AssumeRoleWithSAML events to role assumptions"
295
+ )
296
+
297
+ for event in events:
298
+
299
+ cloudtrail_event = json.loads(event["CloudTrailEvent"])
300
+
301
+ response_elements = cloudtrail_event.get("responseElements", {})
302
+ assumed_role_user = response_elements.get("assumedRoleUser", {})
303
+
304
+ if assumed_role_user.get("arn"):
305
+ assumed_role_arn = assumed_role_user["arn"]
306
+ # Extract username from assumed role ARN: arn:aws:sts::account:assumed-role/RoleName/username
307
+ source_principal = assumed_role_arn.split("/")[-1]
308
+ destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
309
+ else:
310
+ logger.debug(
311
+ f"Skipping CloudTrail AssumeRoleWithSAML event due to missing assumedRoleUser.arn. Event: {event.get('EventId', 'unknown')}"
312
+ )
313
+ continue
314
+
315
+ event_time = event.get("EventTime")
316
+
317
+ key = (source_principal, destination_principal)
318
+
319
+ if key in aggregated:
320
+ aggregated[key]["times_used"] += 1
321
+ # Handle None values safely for time comparisons
322
+ if event_time:
323
+ existing_first = aggregated[key]["first_seen_in_time_window"]
324
+ existing_last = aggregated[key]["last_used"]
325
+
326
+ if existing_first is None or event_time < existing_first:
327
+ aggregated[key]["first_seen_in_time_window"] = event_time
328
+ if existing_last is None or event_time > existing_last:
329
+ aggregated[key]["last_used"] = event_time
330
+ else:
331
+ aggregated[key] = {
332
+ "source_principal_arn": source_principal,
333
+ "destination_principal_arn": destination_principal,
334
+ "times_used": 1,
335
+ "first_seen_in_time_window": event_time,
336
+ "last_used": event_time,
337
+ }
338
+
339
+ return list(aggregated.values())
340
+
341
+
342
+ @timeit
343
+ def transform_web_identity_role_events_to_role_assumptions(
344
+ events: List[Dict[str, Any]],
345
+ ) -> List[Dict[str, Any]]:
346
+ """
347
+ Transform raw CloudTrail AssumeRoleWithWebIdentity events into aggregated role assumption relationships.
348
+
349
+ Focuses specifically on WebIdentity-based role assumption events, providing optimized processing
350
+ for federated web identity scenarios.
351
+
352
+ This function performs the complete transformation pipeline:
353
+ 1. Extract role assumption events from CloudTrail AssumeRoleWithWebIdentity data
354
+ 2. Aggregate events by (source_principal, destination_principal) pairs
355
+ 3. Return aggregated relationships ready for loading
356
+
357
+ :type events: List[Dict[str, Any]]
358
+ :param events: List of raw CloudTrail AssumeRoleWithWebIdentity events from lookup_events API
359
+ :rtype: List[Dict[str, Any]]
360
+ :return: List of aggregated WebIdentity role assumption relationships ready for loading.
361
+ Each dict contains keys: source_repo_fullname, destination_principal_arn,
362
+ times_used, first_seen_in_time_window, last_used
363
+ """
364
+ github_aggregated: Dict[tuple, Dict[str, Any]] = {}
365
+ logger.info(
366
+ f"Transforming {len(events)} CloudTrail AssumeRoleWithWebIdentity events to role assumptions"
367
+ )
368
+
369
+ for event in events:
370
+
371
+ cloudtrail_event = json.loads(event["CloudTrailEvent"])
372
+
373
+ user_identity = cloudtrail_event.get("userIdentity", {})
374
+
375
+ if user_identity.get("type") == "WebIdentityUser" and user_identity.get(
376
+ "userName"
377
+ ):
378
+ identity_provider = user_identity.get("identityProvider", "unknown")
379
+ destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
380
+ event_time = event.get("EventTime")
381
+
382
+ # Only process GitHub Actions events
383
+ if "token.actions.githubusercontent.com" in identity_provider:
384
+ # GitHub repo fullname is directly in userName (e.g., "sublimagesec/sublimage")
385
+ github_repo = user_identity.get("userName", "")
386
+ if not github_repo:
387
+ logger.debug(
388
+ f"Missing userName in GitHub WebIdentity event: {event.get('EventId', 'unknown')}"
389
+ )
390
+ continue
391
+ key = (github_repo, destination_principal)
392
+
393
+ if key in github_aggregated:
394
+ github_aggregated[key]["times_used"] += 1
395
+ # Handle None values safely for time comparisons
396
+ if event_time:
397
+ existing_first = github_aggregated[key][
398
+ "first_seen_in_time_window"
399
+ ]
400
+ existing_last = github_aggregated[key]["last_used"]
401
+
402
+ if existing_first is None or event_time < existing_first:
403
+ github_aggregated[key][
404
+ "first_seen_in_time_window"
405
+ ] = event_time
406
+ if existing_last is None or event_time > existing_last:
407
+ github_aggregated[key]["last_used"] = event_time
408
+ else:
409
+ github_aggregated[key] = {
410
+ "source_repo_fullname": github_repo,
411
+ "destination_principal_arn": destination_principal,
412
+ "times_used": 1,
413
+ "first_seen_in_time_window": event_time,
414
+ "last_used": event_time,
415
+ }
416
+ else:
417
+ # Skip non-GitHub events for now
418
+ continue
419
+ else:
420
+ continue
421
+ # Return aggregated relationships directly
422
+ return list(github_aggregated.values())
423
+
424
+
161
425
  @timeit
162
426
  def load_role_assumptions(
163
427
  neo4j_session: neo4j.Session,
@@ -169,7 +433,7 @@ def load_role_assumptions(
169
433
  Load aggregated role assumption relationships into Neo4j using MatchLink pattern.
170
434
 
171
435
  Creates direct ASSUMED_ROLE relationships with aggregated properties:
172
- (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {lastused, times_used, first_seen_in_time_window, last_seen}]->(AWSRole)
436
+ (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
173
437
 
174
438
  Assumes that both source principals and destination roles already exist in the graph.
175
439
 
@@ -200,6 +464,90 @@ def load_role_assumptions(
200
464
  )
201
465
 
202
466
 
467
+ @timeit
468
+ def load_saml_role_assumptions(
469
+ neo4j_session: neo4j.Session,
470
+ aggregated_role_assumptions: List[Dict[str, Any]],
471
+ current_aws_account_id: str,
472
+ aws_update_tag: int,
473
+ ) -> None:
474
+ """
475
+ Load aggregated SAML role assumption relationships into Neo4j using MatchLink pattern.
476
+
477
+ Creates direct ASSUMED_ROLE_WITH_SAML relationships with aggregated properties:
478
+ (AWSRole)-[:ASSUMED_ROLE_WITH_SAML {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
479
+
480
+ Assumes that both source principals and destination roles already exist in the graph.
481
+
482
+ :type neo4j_session: neo4j.Session
483
+ :param neo4j_session: The Neo4j session to use for database operations
484
+ :type aggregated_role_assumptions: List[Dict[str, Any]]
485
+ :param aggregated_role_assumptions: List of aggregated SAML role assumption relationships from transform function
486
+ :type current_aws_account_id: str
487
+ :param current_aws_account_id: The AWS account ID being synced
488
+ :type aws_update_tag: int
489
+ :param aws_update_tag: Timestamp tag for tracking data freshness
490
+ :rtype: None
491
+ """
492
+ # Use MatchLink to create relationships between existing nodes
493
+ matchlink_schema = AssumedRoleWithSAMLMatchLink()
494
+
495
+ load_matchlinks(
496
+ neo4j_session,
497
+ matchlink_schema,
498
+ aggregated_role_assumptions,
499
+ lastupdated=aws_update_tag,
500
+ _sub_resource_label="AWSAccount",
501
+ _sub_resource_id=current_aws_account_id,
502
+ )
503
+
504
+ logger.info(
505
+ f"Successfully loaded {len(aggregated_role_assumptions)} SAML role assumption relationships"
506
+ )
507
+
508
+
509
+ @timeit
510
+ def load_web_identity_role_assumptions(
511
+ neo4j_session: neo4j.Session,
512
+ aggregated_role_assumptions: List[Dict[str, Any]],
513
+ current_aws_account_id: str,
514
+ aws_update_tag: int,
515
+ ) -> None:
516
+ """
517
+ Load aggregated WebIdentity role assumption relationships into Neo4j using MatchLink pattern.
518
+
519
+ Creates direct ASSUMED_ROLE_WITH_WEB_IDENTITY relationships with aggregated properties:
520
+ (GitHubRepository)-[:ASSUMED_ROLE_WITH_WEB_IDENTITY {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
521
+
522
+ Assumes that both source principals and destination roles already exist in the graph.
523
+
524
+ :type neo4j_session: neo4j.Session
525
+ :param neo4j_session: The Neo4j session to use for database operations
526
+ :type aggregated_role_assumptions: List[Dict[str, Any]]
527
+ :param aggregated_role_assumptions: List of aggregated WebIdentity role assumption relationships from transform function
528
+ :type current_aws_account_id: str
529
+ :param current_aws_account_id: The AWS account ID being synced
530
+ :type aws_update_tag: int
531
+ :param aws_update_tag: Timestamp tag for tracking data freshness
532
+ :rtype: None
533
+ """
534
+ # Use MatchLink to create relationships between existing nodes
535
+ matchlink_schema = GitHubRepoAssumeRoleWithWebIdentityMatchLink()
536
+
537
+ load_matchlinks(
538
+ neo4j_session,
539
+ matchlink_schema,
540
+ aggregated_role_assumptions,
541
+ lastupdated=aws_update_tag,
542
+ _sub_resource_label="AWSAccount",
543
+ _sub_resource_id=current_aws_account_id,
544
+ )
545
+
546
+ logger.info(
547
+ f"Successfully loaded {len(aggregated_role_assumptions)} WebIdentity role assumption relationships"
548
+ )
549
+
550
+
203
551
  def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
204
552
  """
205
553
  Convert an assumed role ARN to the original role ARN.
@@ -270,7 +618,7 @@ def sync_assume_role_events(
270
618
  4. Run cleanup after processing all regions
271
619
 
272
620
  The resulting graph contains direct relationships like:
273
- (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used, lastused}]->(AWSRole)
621
+ (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
274
622
 
275
623
  :type neo4j_session: neo4j.Session
276
624
  :param neo4j_session: The Neo4j session
@@ -316,8 +664,6 @@ def sync_assume_role_events(
316
664
  # Transform AssumeRole events to role assumptions
317
665
  assume_role_assumptions = transform_assume_role_events_to_role_assumptions(
318
666
  events=assume_role_events,
319
- region=region,
320
- current_aws_account_id=current_aws_account_id,
321
667
  )
322
668
 
323
669
  # Load AssumeRole assumptions for this region
@@ -341,6 +687,180 @@ def sync_assume_role_events(
341
687
  )
342
688
 
343
689
 
690
+ @timeit
691
+ def sync_saml_role_events(
692
+ neo4j_session: neo4j.Session,
693
+ boto3_session: boto3.Session,
694
+ regions: List[str],
695
+ current_aws_account_id: str,
696
+ update_tag: int,
697
+ common_job_parameters: Dict[str, Any],
698
+ ) -> None:
699
+ """
700
+ Sync CloudTrail SAML management events to create ASSUMED_ROLE_WITH_SAML relationships.
701
+
702
+ This function orchestrates the complete process:
703
+ 1. Fetch CloudTrail SAML management events region by region
704
+ 2. Transform events into role assumption records per region
705
+ 3. Load role assumption relationships into Neo4j for each region
706
+
707
+ The resulting graph contains direct relationships like:
708
+ (AWSRole)-[:ASSUMED_ROLE_WITH_SAML {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
709
+
710
+ :type neo4j_session: neo4j.Session
711
+ :param neo4j_session: The Neo4j session
712
+ :type boto3_session: boto3.Session
713
+ :param boto3_session: The boto3 session to use for API calls
714
+ :type regions: List[str]
715
+ :param regions: List of AWS regions to sync
716
+ :type current_aws_account_id: str
717
+ :param current_aws_account_id: The AWS account ID being synced
718
+ :type update_tag: int
719
+ :param update_tag: Timestamp tag for tracking data freshness
720
+ :rtype: None
721
+ """
722
+ # Extract lookback hours from common_job_parameters (set by CLI parameter)
723
+ lookback_hours = common_job_parameters.get(
724
+ "aws_cloudtrail_management_events_lookback_hours"
725
+ )
726
+
727
+ if not lookback_hours:
728
+ logger.info(
729
+ "CloudTrail SAML management events sync skipped - no lookback period specified"
730
+ )
731
+ return
732
+
733
+ logger.info(
734
+ f"Syncing SAML events for {len(regions)} regions with {lookback_hours} hour lookback period"
735
+ )
736
+
737
+ total_saml_role_assumptions = 0
738
+
739
+ # Process events region by region
740
+ for region in regions:
741
+ logger.info(f"Processing CloudTrail SAML events for region {region}")
742
+
743
+ # Process AssumeRoleWithSAML events specifically
744
+ logger.info(
745
+ f"Fetching AssumeRoleWithSAML events specifically for region {region}"
746
+ )
747
+ saml_role_events = get_saml_role_events(
748
+ boto3_session=boto3_session,
749
+ region=region,
750
+ lookback_hours=lookback_hours,
751
+ )
752
+
753
+ # Transform AssumeRoleWithSAML events to role assumptions
754
+ saml_role_assumptions = transform_saml_role_events_to_role_assumptions(
755
+ events=saml_role_events,
756
+ )
757
+
758
+ # Load SAML role assumptions for this region
759
+ load_saml_role_assumptions(
760
+ neo4j_session=neo4j_session,
761
+ aggregated_role_assumptions=saml_role_assumptions,
762
+ current_aws_account_id=current_aws_account_id,
763
+ aws_update_tag=update_tag,
764
+ )
765
+ total_saml_role_assumptions += len(saml_role_assumptions)
766
+ logger.info(
767
+ f"Loaded {len(saml_role_assumptions)} SAML role assumptions for region {region}"
768
+ )
769
+
770
+ logger.info(
771
+ f"CloudTrail SAML management events sync completed successfully. "
772
+ f"Processed {total_saml_role_assumptions} total SAML role assumption events across {len(regions)} regions."
773
+ )
774
+
775
+
776
+ @timeit
777
+ def sync_web_identity_role_events(
778
+ neo4j_session: neo4j.Session,
779
+ boto3_session: boto3.Session,
780
+ regions: List[str],
781
+ current_aws_account_id: str,
782
+ update_tag: int,
783
+ common_job_parameters: Dict[str, Any],
784
+ ) -> None:
785
+ """
786
+ Sync CloudTrail WebIdentity management events to create ASSUMED_ROLE_WITH_WEB_IDENTITY relationships.
787
+
788
+ This function orchestrates the complete process:
789
+ 1. Fetch CloudTrail WebIdentity management events region by region
790
+ 2. Transform events into role assumption records per region
791
+ 3. Load role assumption relationships into Neo4j for each region
792
+
793
+ The resulting graph contains direct relationships like:
794
+ (GitHubRepository)-[:ASSUMED_ROLE_WITH_WEB_IDENTITY {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
795
+
796
+ :type neo4j_session: neo4j.Session
797
+ :param neo4j_session: The Neo4j session
798
+ :type boto3_session: boto3.Session
799
+ :param boto3_session: The boto3 session to use for API calls
800
+ :type regions: List[str]
801
+ :param regions: List of AWS regions to sync
802
+ :type current_aws_account_id: str
803
+ :param current_aws_account_id: The AWS account ID being synced
804
+ :type update_tag: int
805
+ :param update_tag: Timestamp tag for tracking data freshness
806
+ :rtype: None
807
+ """
808
+ # Extract lookback hours from common_job_parameters (set by CLI parameter)
809
+ lookback_hours = common_job_parameters.get(
810
+ "aws_cloudtrail_management_events_lookback_hours"
811
+ )
812
+
813
+ if not lookback_hours:
814
+ logger.info(
815
+ "CloudTrail WebIdentity management events sync skipped - no lookback period specified"
816
+ )
817
+ return
818
+
819
+ logger.info(
820
+ f"Syncing WebIdentity events for {len(regions)} regions with {lookback_hours} hour lookback period"
821
+ )
822
+
823
+ total_web_identity_role_assumptions = 0
824
+
825
+ # Process events region by region
826
+ for region in regions:
827
+ logger.info(f"Processing CloudTrail WebIdentity events for region {region}")
828
+
829
+ # Process AssumeRoleWithWebIdentity events specifically
830
+ logger.info(
831
+ f"Fetching AssumeRoleWithWebIdentity events specifically for region {region}"
832
+ )
833
+ web_identity_role_events = get_web_identity_role_events(
834
+ boto3_session=boto3_session,
835
+ region=region,
836
+ lookback_hours=lookback_hours,
837
+ )
838
+
839
+ # Transform AssumeRoleWithWebIdentity events to role assumptions
840
+ web_identity_role_assumptions = (
841
+ transform_web_identity_role_events_to_role_assumptions(
842
+ events=web_identity_role_events,
843
+ )
844
+ )
845
+
846
+ # Load WebIdentity role assumptions for this region
847
+ load_web_identity_role_assumptions(
848
+ neo4j_session=neo4j_session,
849
+ aggregated_role_assumptions=web_identity_role_assumptions,
850
+ current_aws_account_id=current_aws_account_id,
851
+ aws_update_tag=update_tag,
852
+ )
853
+ total_web_identity_role_assumptions += len(web_identity_role_assumptions)
854
+ logger.info(
855
+ f"Loaded {len(web_identity_role_assumptions)} WebIdentity role assumptions for region {region}"
856
+ )
857
+
858
+ logger.info(
859
+ f"CloudTrail WebIdentity management events sync completed successfully. "
860
+ f"Processed {total_web_identity_role_assumptions} total WebIdentity role assumption events across {len(regions)} regions."
861
+ )
862
+
863
+
344
864
  # Main sync function for when we decide to add more event types
345
865
  @timeit
346
866
  def sync(
@@ -353,7 +873,11 @@ def sync(
353
873
  ) -> None:
354
874
  """
355
875
  Main sync function for CloudTrail management events.
876
+
877
+ Syncs AssumeRole, AssumeRoleWithSAML, and AssumeRoleWithWebIdentity events to create separate
878
+ relationship types in the graph for security analysis.
356
879
  """
880
+ # Sync regular AssumeRole events
357
881
  sync_assume_role_events(
358
882
  neo4j_session=neo4j_session,
359
883
  boto3_session=boto3_session,
@@ -362,3 +886,23 @@ def sync(
362
886
  update_tag=update_tag,
363
887
  common_job_parameters=common_job_parameters,
364
888
  )
889
+
890
+ # Sync SAML AssumeRoleWithSAML events
891
+ sync_saml_role_events(
892
+ neo4j_session=neo4j_session,
893
+ boto3_session=boto3_session,
894
+ regions=regions,
895
+ current_aws_account_id=current_aws_account_id,
896
+ update_tag=update_tag,
897
+ common_job_parameters=common_job_parameters,
898
+ )
899
+
900
+ # Sync WebIdentity AssumeRoleWithWebIdentity events
901
+ sync_web_identity_role_events(
902
+ neo4j_session=neo4j_session,
903
+ boto3_session=boto3_session,
904
+ regions=regions,
905
+ current_aws_account_id=current_aws_account_id,
906
+ update_tag=update_tag,
907
+ common_job_parameters=common_job_parameters,
908
+ )