cartography 0.108.0rc1__py3-none-any.whl → 0.109.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (37) hide show
  1. cartography/_version.py +2 -2
  2. cartography/data/indexes.cypher +0 -2
  3. cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
  4. cartography/intel/aws/cloudtrail.py +17 -4
  5. cartography/intel/aws/cloudtrail_management_events.py +593 -16
  6. cartography/intel/aws/cloudwatch.py +73 -4
  7. cartography/intel/aws/ec2/subnets.py +37 -63
  8. cartography/intel/aws/ecr.py +55 -80
  9. cartography/intel/aws/elasticache.py +102 -79
  10. cartography/intel/aws/resourcegroupstaggingapi.py +77 -18
  11. cartography/intel/aws/secretsmanager.py +62 -44
  12. cartography/intel/entra/groups.py +29 -1
  13. cartography/intel/gcp/__init__.py +10 -0
  14. cartography/intel/gcp/compute.py +19 -42
  15. cartography/models/aws/cloudtrail/management_events.py +95 -6
  16. cartography/models/aws/cloudtrail/trail.py +21 -0
  17. cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
  18. cartography/models/aws/ec2/subnets.py +65 -0
  19. cartography/models/aws/ecr/__init__.py +0 -0
  20. cartography/models/aws/ecr/image.py +41 -0
  21. cartography/models/aws/ecr/repository.py +72 -0
  22. cartography/models/aws/ecr/repository_image.py +95 -0
  23. cartography/models/aws/elasticache/__init__.py +0 -0
  24. cartography/models/aws/elasticache/cluster.py +65 -0
  25. cartography/models/aws/elasticache/topic.py +67 -0
  26. cartography/models/aws/secretsmanager/secret.py +106 -0
  27. cartography/models/entra/group.py +26 -0
  28. cartography/models/entra/user.py +6 -0
  29. cartography/models/gcp/compute/__init__.py +0 -0
  30. cartography/models/gcp/compute/vpc.py +50 -0
  31. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/METADATA +1 -1
  32. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/RECORD +36 -25
  33. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  34. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/WHEEL +0 -0
  35. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/entry_points.txt +0 -0
  36. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/licenses/LICENSE +0 -0
  37. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,12 @@ from cartography.client.core.tx import load_matchlinks
13
13
  from cartography.graph.job import GraphJob
14
14
  from cartography.intel.aws.ec2.util import get_botocore_config
15
15
  from cartography.models.aws.cloudtrail.management_events import AssumedRoleMatchLink
16
+ from cartography.models.aws.cloudtrail.management_events import (
17
+ AssumedRoleWithSAMLMatchLink,
18
+ )
19
+ from cartography.models.aws.cloudtrail.management_events import (
20
+ GitHubRepoAssumeRoleWithWebIdentityMatchLink,
21
+ )
16
22
  from cartography.util import aws_handle_regions
17
23
  from cartography.util import timeit
18
24
 
@@ -74,11 +80,123 @@ def get_assume_role_events(
74
80
  return all_events
75
81
 
76
82
 
83
+ @timeit
84
+ @aws_handle_regions
85
+ def get_saml_role_events(
86
+ boto3_session: boto3.Session, region: str, lookback_hours: int
87
+ ) -> List[Dict[str, Any]]:
88
+ """
89
+ Fetch CloudTrail AssumeRoleWithSAML events from the specified time period.
90
+
91
+ Focuses specifically on SAML-based role assumption events.
92
+
93
+ :type boto3_session: boto3.Session
94
+ :param boto3_session: The boto3 session to use for API calls
95
+ :type region: str
96
+ :param region: The AWS region to fetch events from
97
+ :type lookback_hours: int
98
+ :param lookback_hours: Number of hours back to retrieve events from
99
+ :rtype: List[Dict[str, Any]]
100
+ :return: List of CloudTrail AssumeRoleWithSAML events
101
+ """
102
+ client = boto3_session.client(
103
+ "cloudtrail", region_name=region, config=get_botocore_config()
104
+ )
105
+
106
+ # Calculate time range
107
+ end_time = datetime.utcnow()
108
+ start_time = end_time - timedelta(hours=lookback_hours)
109
+
110
+ logger.info(
111
+ f"Fetching CloudTrail AssumeRoleWithSAML events for region '{region}' "
112
+ f"from {start_time} to {end_time} ({lookback_hours} hours)"
113
+ )
114
+
115
+ paginator = client.get_paginator("lookup_events")
116
+
117
+ page_iterator = paginator.paginate(
118
+ LookupAttributes=[
119
+ {"AttributeKey": "EventName", "AttributeValue": "AssumeRoleWithSAML"}
120
+ ],
121
+ StartTime=start_time,
122
+ EndTime=end_time,
123
+ PaginationConfig={
124
+ "MaxItems": 10000, # Reasonable limit to prevent excessive API calls
125
+ "PageSize": 50, # CloudTrail API limit per page
126
+ },
127
+ )
128
+
129
+ all_events = []
130
+ for page in page_iterator:
131
+ all_events.extend(page.get("Events", []))
132
+
133
+ logger.info(
134
+ f"Retrieved {len(all_events)} AssumeRoleWithSAML events from region '{region}'"
135
+ )
136
+
137
+ return all_events
138
+
139
+
140
+ @timeit
141
+ @aws_handle_regions
142
+ def get_web_identity_role_events(
143
+ boto3_session: boto3.Session, region: str, lookback_hours: int
144
+ ) -> List[Dict[str, Any]]:
145
+ """
146
+ Fetch CloudTrail AssumeRoleWithWebIdentity events from the specified time period.
147
+
148
+ Focuses specifically on WebIdentity-based role assumption events.
149
+
150
+ :type boto3_session: boto3.Session
151
+ :param boto3_session: The boto3 session to use for API calls
152
+ :type region: str
153
+ :param region: The AWS region to fetch events from
154
+ :type lookback_hours: int
155
+ :param lookback_hours: Number of hours back to retrieve events from
156
+ :rtype: List[Dict[str, Any]]
157
+ :return: List of CloudTrail AssumeRoleWithWebIdentity events
158
+ """
159
+ client = boto3_session.client(
160
+ "cloudtrail", region_name=region, config=get_botocore_config()
161
+ )
162
+
163
+ # Calculate time range
164
+ end_time = datetime.utcnow()
165
+ start_time = end_time - timedelta(hours=lookback_hours)
166
+
167
+ logger.info(
168
+ f"Fetching CloudTrail AssumeRoleWithWebIdentity events for region '{region}' "
169
+ f"from {start_time} to {end_time} ({lookback_hours} hours)"
170
+ )
171
+
172
+ paginator = client.get_paginator("lookup_events")
173
+
174
+ page_iterator = paginator.paginate(
175
+ LookupAttributes=[
176
+ {"AttributeKey": "EventName", "AttributeValue": "AssumeRoleWithWebIdentity"}
177
+ ],
178
+ StartTime=start_time,
179
+ EndTime=end_time,
180
+ PaginationConfig={
181
+ "MaxItems": 10000, # Reasonable limit to prevent excessive API calls
182
+ "PageSize": 50, # CloudTrail API limit per page
183
+ },
184
+ )
185
+
186
+ all_events = []
187
+ for page in page_iterator:
188
+ all_events.extend(page.get("Events", []))
189
+
190
+ logger.info(
191
+ f"Retrieved {len(all_events)} AssumeRoleWithWebIdentity events from region '{region}'"
192
+ )
193
+
194
+ return all_events
195
+
196
+
77
197
  @timeit
78
198
  def transform_assume_role_events_to_role_assumptions(
79
199
  events: List[Dict[str, Any]],
80
- region: str,
81
- current_aws_account_id: str,
82
200
  ) -> List[Dict[str, Any]]:
83
201
  """
84
202
  Transform raw CloudTrail AssumeRole events into aggregated role assumption relationships.
@@ -93,16 +211,12 @@ def transform_assume_role_events_to_role_assumptions(
93
211
 
94
212
  :type events: List[Dict[str, Any]]
95
213
  :param events: List of raw CloudTrail AssumeRole events from lookup_events API
96
- :type region: str
97
- :param region: The AWS region where events were retrieved from
98
- :type current_aws_account_id: str
99
- :param current_aws_account_id: The AWS account ID being synced
100
214
  :rtype: List[Dict[str, Any]]
101
215
  :return: List of aggregated role assumption relationships ready for loading
102
216
  """
103
217
  aggregated: Dict[tuple, Dict[str, Any]] = {}
104
218
  logger.info(
105
- f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions for region '{region}'"
219
+ f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions"
106
220
  )
107
221
 
108
222
  for event in events:
@@ -132,7 +246,6 @@ def transform_assume_role_events_to_role_assumptions(
132
246
 
133
247
  if key in aggregated:
134
248
  aggregated[key]["times_used"] += 1
135
- aggregated[key]["assume_role_count"] += 1 # All events are AssumeRole
136
249
  # Handle None values safely for time comparisons
137
250
  if event_time:
138
251
  existing_first = aggregated[key]["first_seen_in_time_window"]
@@ -149,15 +262,168 @@ def transform_assume_role_events_to_role_assumptions(
149
262
  "times_used": 1,
150
263
  "first_seen_in_time_window": event_time,
151
264
  "last_used": event_time,
152
- "event_types": ["AssumeRole"],
153
- "assume_role_count": 1,
154
- "saml_count": 0,
155
- "web_identity_count": 0,
156
265
  }
157
266
 
158
267
  return list(aggregated.values())
159
268
 
160
269
 
270
+ @timeit
271
+ def transform_saml_role_events_to_role_assumptions(
272
+ events: List[Dict[str, Any]],
273
+ ) -> List[Dict[str, Any]]:
274
+ """
275
+ Transform raw CloudTrail AssumeRoleWithSAML events into aggregated role assumption relationships.
276
+
277
+ Focuses specifically on SAML-based role assumption events, providing optimized processing
278
+ for federated identity scenarios.
279
+
280
+ This function performs the complete transformation pipeline:
281
+ 1. Extract role assumption events from CloudTrail AssumeRoleWithSAML data
282
+ 2. Aggregate events by (source_principal, destination_principal) pairs
283
+ 3. Return aggregated relationships ready for loading
284
+
285
+ :type events: List[Dict[str, Any]]
286
+ :param events: List of raw CloudTrail AssumeRoleWithSAML events from lookup_events API
287
+ :rtype: List[Dict[str, Any]]
288
+ :return: List of aggregated SAML role assumption relationships ready for loading.
289
+ Each dict contains keys: source_principal_arn, destination_principal_arn,
290
+ times_used, first_seen_in_time_window, last_used
291
+ """
292
+ aggregated: Dict[tuple, Dict[str, Any]] = {}
293
+ logger.info(
294
+ f"Transforming {len(events)} CloudTrail AssumeRoleWithSAML events to role assumptions"
295
+ )
296
+
297
+ for event in events:
298
+
299
+ cloudtrail_event = json.loads(event["CloudTrailEvent"])
300
+
301
+ response_elements = cloudtrail_event.get("responseElements", {})
302
+ assumed_role_user = response_elements.get("assumedRoleUser", {})
303
+
304
+ if assumed_role_user.get("arn"):
305
+ assumed_role_arn = assumed_role_user["arn"]
306
+ # Extract username from assumed role ARN: arn:aws:sts::account:assumed-role/RoleName/username
307
+ source_principal = assumed_role_arn.split("/")[-1]
308
+ destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
309
+ else:
310
+ logger.debug(
311
+ f"Skipping CloudTrail AssumeRoleWithSAML event due to missing assumedRoleUser.arn. Event: {event.get('EventId', 'unknown')}"
312
+ )
313
+ continue
314
+
315
+ event_time = event.get("EventTime")
316
+
317
+ key = (source_principal, destination_principal)
318
+
319
+ if key in aggregated:
320
+ aggregated[key]["times_used"] += 1
321
+ # Handle None values safely for time comparisons
322
+ if event_time:
323
+ existing_first = aggregated[key]["first_seen_in_time_window"]
324
+ existing_last = aggregated[key]["last_used"]
325
+
326
+ if existing_first is None or event_time < existing_first:
327
+ aggregated[key]["first_seen_in_time_window"] = event_time
328
+ if existing_last is None or event_time > existing_last:
329
+ aggregated[key]["last_used"] = event_time
330
+ else:
331
+ aggregated[key] = {
332
+ "source_principal_arn": source_principal,
333
+ "destination_principal_arn": destination_principal,
334
+ "times_used": 1,
335
+ "first_seen_in_time_window": event_time,
336
+ "last_used": event_time,
337
+ }
338
+
339
+ return list(aggregated.values())
340
+
341
+
342
+ @timeit
343
+ def transform_web_identity_role_events_to_role_assumptions(
344
+ events: List[Dict[str, Any]],
345
+ ) -> List[Dict[str, Any]]:
346
+ """
347
+ Transform raw CloudTrail AssumeRoleWithWebIdentity events into aggregated role assumption relationships.
348
+
349
+ Focuses specifically on WebIdentity-based role assumption events, providing optimized processing
350
+ for federated web identity scenarios.
351
+
352
+ This function performs the complete transformation pipeline:
353
+ 1. Extract role assumption events from CloudTrail AssumeRoleWithWebIdentity data
354
+ 2. Aggregate events by (source_principal, destination_principal) pairs
355
+ 3. Return aggregated relationships ready for loading
356
+
357
+ :type events: List[Dict[str, Any]]
358
+ :param events: List of raw CloudTrail AssumeRoleWithWebIdentity events from lookup_events API
359
+ :rtype: List[Dict[str, Any]]
360
+ :return: List of aggregated WebIdentity role assumption relationships ready for loading.
361
+ Each dict contains keys: source_repo_fullname, destination_principal_arn,
362
+ times_used, first_seen_in_time_window, last_used
363
+ """
364
+ github_aggregated: Dict[tuple, Dict[str, Any]] = {}
365
+ logger.info(
366
+ f"Transforming {len(events)} CloudTrail AssumeRoleWithWebIdentity events to role assumptions"
367
+ )
368
+
369
+ for event in events:
370
+
371
+ cloudtrail_event = json.loads(event["CloudTrailEvent"])
372
+
373
+ user_identity = cloudtrail_event.get("userIdentity", {})
374
+
375
+ if user_identity.get("type") == "WebIdentityUser" and user_identity.get(
376
+ "userName"
377
+ ):
378
+ identity_provider = user_identity.get("identityProvider", "unknown")
379
+ destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
380
+ event_time = event.get("EventTime")
381
+
382
+ # Only process GitHub Actions events
383
+ if "token.actions.githubusercontent.com" in identity_provider:
384
+ # Extract GitHub repo fullname from userName format: "repo:{organization}/{repository}:{context}"
385
+ user_name = user_identity.get("userName", "")
386
+ if not user_name:
387
+ logger.debug(
388
+ f"Missing userName in GitHub WebIdentity event: {event.get('EventId', 'unknown')}"
389
+ )
390
+ continue
391
+
392
+ github_repo = _extract_github_repo_from_username(user_name)
393
+ key = (github_repo, destination_principal)
394
+
395
+ if key in github_aggregated:
396
+ github_aggregated[key]["times_used"] += 1
397
+ # Handle None values safely for time comparisons
398
+ if event_time:
399
+ existing_first = github_aggregated[key][
400
+ "first_seen_in_time_window"
401
+ ]
402
+ existing_last = github_aggregated[key]["last_used"]
403
+
404
+ if existing_first is None or event_time < existing_first:
405
+ github_aggregated[key][
406
+ "first_seen_in_time_window"
407
+ ] = event_time
408
+ if existing_last is None or event_time > existing_last:
409
+ github_aggregated[key]["last_used"] = event_time
410
+ else:
411
+ github_aggregated[key] = {
412
+ "source_repo_fullname": github_repo,
413
+ "destination_principal_arn": destination_principal,
414
+ "times_used": 1,
415
+ "first_seen_in_time_window": event_time,
416
+ "last_used": event_time,
417
+ }
418
+ else:
419
+ # Skip non-GitHub events for now
420
+ continue
421
+ else:
422
+ continue
423
+ # Return aggregated relationships directly
424
+ return list(github_aggregated.values())
425
+
426
+
161
427
  @timeit
162
428
  def load_role_assumptions(
163
429
  neo4j_session: neo4j.Session,
@@ -169,7 +435,7 @@ def load_role_assumptions(
169
435
  Load aggregated role assumption relationships into Neo4j using MatchLink pattern.
170
436
 
171
437
  Creates direct ASSUMED_ROLE relationships with aggregated properties:
172
- (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {lastused, times_used, first_seen_in_time_window, last_seen}]->(AWSRole)
438
+ (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
173
439
 
174
440
  Assumes that both source principals and destination roles already exist in the graph.
175
441
 
@@ -200,6 +466,90 @@ def load_role_assumptions(
200
466
  )
201
467
 
202
468
 
469
+ @timeit
470
+ def load_saml_role_assumptions(
471
+ neo4j_session: neo4j.Session,
472
+ aggregated_role_assumptions: List[Dict[str, Any]],
473
+ current_aws_account_id: str,
474
+ aws_update_tag: int,
475
+ ) -> None:
476
+ """
477
+ Load aggregated SAML role assumption relationships into Neo4j using MatchLink pattern.
478
+
479
+ Creates direct ASSUMED_ROLE_WITH_SAML relationships with aggregated properties:
480
+ (AWSRole)-[:ASSUMED_ROLE_WITH_SAML {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
481
+
482
+ Assumes that both source principals and destination roles already exist in the graph.
483
+
484
+ :type neo4j_session: neo4j.Session
485
+ :param neo4j_session: The Neo4j session to use for database operations
486
+ :type aggregated_role_assumptions: List[Dict[str, Any]]
487
+ :param aggregated_role_assumptions: List of aggregated SAML role assumption relationships from transform function
488
+ :type current_aws_account_id: str
489
+ :param current_aws_account_id: The AWS account ID being synced
490
+ :type aws_update_tag: int
491
+ :param aws_update_tag: Timestamp tag for tracking data freshness
492
+ :rtype: None
493
+ """
494
+ # Use MatchLink to create relationships between existing nodes
495
+ matchlink_schema = AssumedRoleWithSAMLMatchLink()
496
+
497
+ load_matchlinks(
498
+ neo4j_session,
499
+ matchlink_schema,
500
+ aggregated_role_assumptions,
501
+ lastupdated=aws_update_tag,
502
+ _sub_resource_label="AWSAccount",
503
+ _sub_resource_id=current_aws_account_id,
504
+ )
505
+
506
+ logger.info(
507
+ f"Successfully loaded {len(aggregated_role_assumptions)} SAML role assumption relationships"
508
+ )
509
+
510
+
511
+ @timeit
512
+ def load_web_identity_role_assumptions(
513
+ neo4j_session: neo4j.Session,
514
+ aggregated_role_assumptions: List[Dict[str, Any]],
515
+ current_aws_account_id: str,
516
+ aws_update_tag: int,
517
+ ) -> None:
518
+ """
519
+ Load aggregated WebIdentity role assumption relationships into Neo4j using MatchLink pattern.
520
+
521
+ Creates direct ASSUMED_ROLE_WITH_WEB_IDENTITY relationships with aggregated properties:
522
+ (GitHubRepository)-[:ASSUMED_ROLE_WITH_WEB_IDENTITY {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
523
+
524
+ Assumes that both source principals and destination roles already exist in the graph.
525
+
526
+ :type neo4j_session: neo4j.Session
527
+ :param neo4j_session: The Neo4j session to use for database operations
528
+ :type aggregated_role_assumptions: List[Dict[str, Any]]
529
+ :param aggregated_role_assumptions: List of aggregated WebIdentity role assumption relationships from transform function
530
+ :type current_aws_account_id: str
531
+ :param current_aws_account_id: The AWS account ID being synced
532
+ :type aws_update_tag: int
533
+ :param aws_update_tag: Timestamp tag for tracking data freshness
534
+ :rtype: None
535
+ """
536
+ # Use MatchLink to create relationships between existing nodes
537
+ matchlink_schema = GitHubRepoAssumeRoleWithWebIdentityMatchLink()
538
+
539
+ load_matchlinks(
540
+ neo4j_session,
541
+ matchlink_schema,
542
+ aggregated_role_assumptions,
543
+ lastupdated=aws_update_tag,
544
+ _sub_resource_label="AWSAccount",
545
+ _sub_resource_id=current_aws_account_id,
546
+ )
547
+
548
+ logger.info(
549
+ f"Successfully loaded {len(aggregated_role_assumptions)} WebIdentity role assumption relationships"
550
+ )
551
+
552
+
203
553
  def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
204
554
  """
205
555
  Convert an assumed role ARN to the original role ARN.
@@ -224,6 +574,37 @@ def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
224
574
  return assumed_role_arn
225
575
 
226
576
 
577
+ def _extract_github_repo_from_username(user_name: str) -> str:
578
+ """
579
+ Extract GitHub repository fullname from CloudTrail userName field.
580
+
581
+ GitHub Actions CloudTrail events have userName in the format:
582
+ "repo:{organization}/{repository}:{context}"
583
+ """
584
+ if not user_name:
585
+ return ""
586
+
587
+ parts = user_name.split(":")
588
+
589
+ # Need at least 3 parts: ["repo", "{organization}/{repository}", "{context}"]
590
+ if len(parts) < 3 or parts[0] != "repo":
591
+ return ""
592
+
593
+ # Extract "{organization}/{repository}"
594
+ repo_fullname = parts[1]
595
+
596
+ # Validate it looks like "{organization}/{repository}" format
597
+ if repo_fullname.count("/") != 1:
598
+ return ""
599
+
600
+ # Ensure both organization and repo exist
601
+ owner, repo = repo_fullname.split("/")
602
+ if not owner or not repo:
603
+ return ""
604
+
605
+ return repo_fullname
606
+
607
+
227
608
  @timeit
228
609
  def cleanup(
229
610
  neo4j_session: neo4j.Session, current_aws_account_id: str, update_tag: int
@@ -270,7 +651,7 @@ def sync_assume_role_events(
270
651
  4. Run cleanup after processing all regions
271
652
 
272
653
  The resulting graph contains direct relationships like:
273
- (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used, lastused}]->(AWSRole)
654
+ (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
274
655
 
275
656
  :type neo4j_session: neo4j.Session
276
657
  :param neo4j_session: The Neo4j session
@@ -316,8 +697,6 @@ def sync_assume_role_events(
316
697
  # Transform AssumeRole events to role assumptions
317
698
  assume_role_assumptions = transform_assume_role_events_to_role_assumptions(
318
699
  events=assume_role_events,
319
- region=region,
320
- current_aws_account_id=current_aws_account_id,
321
700
  )
322
701
 
323
702
  # Load AssumeRole assumptions for this region
@@ -341,6 +720,180 @@ def sync_assume_role_events(
341
720
  )
342
721
 
343
722
 
723
+ @timeit
724
+ def sync_saml_role_events(
725
+ neo4j_session: neo4j.Session,
726
+ boto3_session: boto3.Session,
727
+ regions: List[str],
728
+ current_aws_account_id: str,
729
+ update_tag: int,
730
+ common_job_parameters: Dict[str, Any],
731
+ ) -> None:
732
+ """
733
+ Sync CloudTrail SAML management events to create ASSUMED_ROLE_WITH_SAML relationships.
734
+
735
+ This function orchestrates the complete process:
736
+ 1. Fetch CloudTrail SAML management events region by region
737
+ 2. Transform events into role assumption records per region
738
+ 3. Load role assumption relationships into Neo4j for each region
739
+
740
+ The resulting graph contains direct relationships like:
741
+ (AWSRole)-[:ASSUMED_ROLE_WITH_SAML {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
742
+
743
+ :type neo4j_session: neo4j.Session
744
+ :param neo4j_session: The Neo4j session
745
+ :type boto3_session: boto3.Session
746
+ :param boto3_session: The boto3 session to use for API calls
747
+ :type regions: List[str]
748
+ :param regions: List of AWS regions to sync
749
+ :type current_aws_account_id: str
750
+ :param current_aws_account_id: The AWS account ID being synced
751
+ :type update_tag: int
752
+ :param update_tag: Timestamp tag for tracking data freshness
753
+ :rtype: None
754
+ """
755
+ # Extract lookback hours from common_job_parameters (set by CLI parameter)
756
+ lookback_hours = common_job_parameters.get(
757
+ "aws_cloudtrail_management_events_lookback_hours"
758
+ )
759
+
760
+ if not lookback_hours:
761
+ logger.info(
762
+ "CloudTrail SAML management events sync skipped - no lookback period specified"
763
+ )
764
+ return
765
+
766
+ logger.info(
767
+ f"Syncing SAML events for {len(regions)} regions with {lookback_hours} hour lookback period"
768
+ )
769
+
770
+ total_saml_role_assumptions = 0
771
+
772
+ # Process events region by region
773
+ for region in regions:
774
+ logger.info(f"Processing CloudTrail SAML events for region {region}")
775
+
776
+ # Process AssumeRoleWithSAML events specifically
777
+ logger.info(
778
+ f"Fetching AssumeRoleWithSAML events specifically for region {region}"
779
+ )
780
+ saml_role_events = get_saml_role_events(
781
+ boto3_session=boto3_session,
782
+ region=region,
783
+ lookback_hours=lookback_hours,
784
+ )
785
+
786
+ # Transform AssumeRoleWithSAML events to role assumptions
787
+ saml_role_assumptions = transform_saml_role_events_to_role_assumptions(
788
+ events=saml_role_events,
789
+ )
790
+
791
+ # Load SAML role assumptions for this region
792
+ load_saml_role_assumptions(
793
+ neo4j_session=neo4j_session,
794
+ aggregated_role_assumptions=saml_role_assumptions,
795
+ current_aws_account_id=current_aws_account_id,
796
+ aws_update_tag=update_tag,
797
+ )
798
+ total_saml_role_assumptions += len(saml_role_assumptions)
799
+ logger.info(
800
+ f"Loaded {len(saml_role_assumptions)} SAML role assumptions for region {region}"
801
+ )
802
+
803
+ logger.info(
804
+ f"CloudTrail SAML management events sync completed successfully. "
805
+ f"Processed {total_saml_role_assumptions} total SAML role assumption events across {len(regions)} regions."
806
+ )
807
+
808
+
809
+ @timeit
810
+ def sync_web_identity_role_events(
811
+ neo4j_session: neo4j.Session,
812
+ boto3_session: boto3.Session,
813
+ regions: List[str],
814
+ current_aws_account_id: str,
815
+ update_tag: int,
816
+ common_job_parameters: Dict[str, Any],
817
+ ) -> None:
818
+ """
819
+ Sync CloudTrail WebIdentity management events to create ASSUMED_ROLE_WITH_WEB_IDENTITY relationships.
820
+
821
+ This function orchestrates the complete process:
822
+ 1. Fetch CloudTrail WebIdentity management events region by region
823
+ 2. Transform events into role assumption records per region
824
+ 3. Load role assumption relationships into Neo4j for each region
825
+
826
+ The resulting graph contains direct relationships like:
827
+ (GitHubRepository)-[:ASSUMED_ROLE_WITH_WEB_IDENTITY {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
828
+
829
+ :type neo4j_session: neo4j.Session
830
+ :param neo4j_session: The Neo4j session
831
+ :type boto3_session: boto3.Session
832
+ :param boto3_session: The boto3 session to use for API calls
833
+ :type regions: List[str]
834
+ :param regions: List of AWS regions to sync
835
+ :type current_aws_account_id: str
836
+ :param current_aws_account_id: The AWS account ID being synced
837
+ :type update_tag: int
838
+ :param update_tag: Timestamp tag for tracking data freshness
839
+ :rtype: None
840
+ """
841
+ # Extract lookback hours from common_job_parameters (set by CLI parameter)
842
+ lookback_hours = common_job_parameters.get(
843
+ "aws_cloudtrail_management_events_lookback_hours"
844
+ )
845
+
846
+ if not lookback_hours:
847
+ logger.info(
848
+ "CloudTrail WebIdentity management events sync skipped - no lookback period specified"
849
+ )
850
+ return
851
+
852
+ logger.info(
853
+ f"Syncing WebIdentity events for {len(regions)} regions with {lookback_hours} hour lookback period"
854
+ )
855
+
856
+ total_web_identity_role_assumptions = 0
857
+
858
+ # Process events region by region
859
+ for region in regions:
860
+ logger.info(f"Processing CloudTrail WebIdentity events for region {region}")
861
+
862
+ # Process AssumeRoleWithWebIdentity events specifically
863
+ logger.info(
864
+ f"Fetching AssumeRoleWithWebIdentity events specifically for region {region}"
865
+ )
866
+ web_identity_role_events = get_web_identity_role_events(
867
+ boto3_session=boto3_session,
868
+ region=region,
869
+ lookback_hours=lookback_hours,
870
+ )
871
+
872
+ # Transform AssumeRoleWithWebIdentity events to role assumptions
873
+ web_identity_role_assumptions = (
874
+ transform_web_identity_role_events_to_role_assumptions(
875
+ events=web_identity_role_events,
876
+ )
877
+ )
878
+
879
+ # Load WebIdentity role assumptions for this region
880
+ load_web_identity_role_assumptions(
881
+ neo4j_session=neo4j_session,
882
+ aggregated_role_assumptions=web_identity_role_assumptions,
883
+ current_aws_account_id=current_aws_account_id,
884
+ aws_update_tag=update_tag,
885
+ )
886
+ total_web_identity_role_assumptions += len(web_identity_role_assumptions)
887
+ logger.info(
888
+ f"Loaded {len(web_identity_role_assumptions)} WebIdentity role assumptions for region {region}"
889
+ )
890
+
891
+ logger.info(
892
+ f"CloudTrail WebIdentity management events sync completed successfully. "
893
+ f"Processed {total_web_identity_role_assumptions} total WebIdentity role assumption events across {len(regions)} regions."
894
+ )
895
+
896
+
344
897
  # Main sync function for when we decide to add more event types
345
898
  @timeit
346
899
  def sync(
@@ -353,7 +906,11 @@ def sync(
353
906
  ) -> None:
354
907
  """
355
908
  Main sync function for CloudTrail management events.
909
+
910
+ Syncs AssumeRole, AssumeRoleWithSAML, and AssumeRoleWithWebIdentity events to create separate
911
+ relationship types in the graph for security analysis.
356
912
  """
913
+ # Sync regular AssumeRole events
357
914
  sync_assume_role_events(
358
915
  neo4j_session=neo4j_session,
359
916
  boto3_session=boto3_session,
@@ -362,3 +919,23 @@ def sync(
362
919
  update_tag=update_tag,
363
920
  common_job_parameters=common_job_parameters,
364
921
  )
922
+
923
+ # Sync SAML AssumeRoleWithSAML events
924
+ sync_saml_role_events(
925
+ neo4j_session=neo4j_session,
926
+ boto3_session=boto3_session,
927
+ regions=regions,
928
+ current_aws_account_id=current_aws_account_id,
929
+ update_tag=update_tag,
930
+ common_job_parameters=common_job_parameters,
931
+ )
932
+
933
+ # Sync WebIdentity AssumeRoleWithWebIdentity events
934
+ sync_web_identity_role_events(
935
+ neo4j_session=neo4j_session,
936
+ boto3_session=boto3_session,
937
+ regions=regions,
938
+ current_aws_account_id=current_aws_account_id,
939
+ update_tag=update_tag,
940
+ common_job_parameters=common_job_parameters,
941
+ )