cartography 0.108.0rc1__py3-none-any.whl → 0.109.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (81) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +14 -0
  3. cartography/config.py +4 -0
  4. cartography/data/indexes.cypher +0 -17
  5. cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
  6. cartography/intel/aws/cloudtrail.py +17 -4
  7. cartography/intel/aws/cloudtrail_management_events.py +614 -16
  8. cartography/intel/aws/cloudwatch.py +73 -4
  9. cartography/intel/aws/ec2/subnets.py +37 -63
  10. cartography/intel/aws/ecr.py +55 -80
  11. cartography/intel/aws/elasticache.py +102 -79
  12. cartography/intel/aws/eventbridge.py +91 -0
  13. cartography/intel/aws/glue.py +117 -0
  14. cartography/intel/aws/identitycenter.py +71 -23
  15. cartography/intel/aws/kms.py +160 -200
  16. cartography/intel/aws/lambda_function.py +206 -190
  17. cartography/intel/aws/rds.py +243 -458
  18. cartography/intel/aws/resourcegroupstaggingapi.py +77 -18
  19. cartography/intel/aws/resources.py +4 -0
  20. cartography/intel/aws/route53.py +334 -332
  21. cartography/intel/aws/secretsmanager.py +62 -44
  22. cartography/intel/entra/groups.py +29 -1
  23. cartography/intel/gcp/__init__.py +10 -0
  24. cartography/intel/gcp/compute.py +19 -42
  25. cartography/intel/trivy/__init__.py +73 -13
  26. cartography/intel/trivy/scanner.py +115 -92
  27. cartography/models/aws/cloudtrail/management_events.py +95 -6
  28. cartography/models/aws/cloudtrail/trail.py +21 -0
  29. cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
  30. cartography/models/aws/ec2/subnets.py +65 -0
  31. cartography/models/aws/ecr/__init__.py +0 -0
  32. cartography/models/aws/ecr/image.py +41 -0
  33. cartography/models/aws/ecr/repository.py +72 -0
  34. cartography/models/aws/ecr/repository_image.py +95 -0
  35. cartography/models/aws/elasticache/__init__.py +0 -0
  36. cartography/models/aws/elasticache/cluster.py +65 -0
  37. cartography/models/aws/elasticache/topic.py +67 -0
  38. cartography/models/aws/eventbridge/__init__.py +0 -0
  39. cartography/models/aws/eventbridge/rule.py +77 -0
  40. cartography/models/aws/glue/__init__.py +0 -0
  41. cartography/models/aws/glue/connection.py +51 -0
  42. cartography/models/aws/identitycenter/awspermissionset.py +44 -0
  43. cartography/models/aws/kms/__init__.py +0 -0
  44. cartography/models/aws/kms/aliases.py +86 -0
  45. cartography/models/aws/kms/grants.py +65 -0
  46. cartography/models/aws/kms/keys.py +88 -0
  47. cartography/models/aws/lambda_function/__init__.py +0 -0
  48. cartography/models/aws/lambda_function/alias.py +74 -0
  49. cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
  50. cartography/models/aws/lambda_function/lambda_function.py +89 -0
  51. cartography/models/aws/lambda_function/layer.py +72 -0
  52. cartography/models/aws/rds/__init__.py +0 -0
  53. cartography/models/aws/rds/cluster.py +89 -0
  54. cartography/models/aws/rds/instance.py +154 -0
  55. cartography/models/aws/rds/snapshot.py +108 -0
  56. cartography/models/aws/rds/subnet_group.py +101 -0
  57. cartography/models/aws/route53/__init__.py +0 -0
  58. cartography/models/aws/route53/dnsrecord.py +214 -0
  59. cartography/models/aws/route53/nameserver.py +63 -0
  60. cartography/models/aws/route53/subzone.py +40 -0
  61. cartography/models/aws/route53/zone.py +47 -0
  62. cartography/models/aws/secretsmanager/secret.py +106 -0
  63. cartography/models/entra/group.py +26 -0
  64. cartography/models/entra/user.py +6 -0
  65. cartography/models/gcp/compute/__init__.py +0 -0
  66. cartography/models/gcp/compute/vpc.py +50 -0
  67. cartography/util.py +8 -1
  68. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/METADATA +2 -2
  69. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/RECORD +73 -44
  70. cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
  71. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
  72. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
  73. cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
  74. cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
  75. cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
  76. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  77. cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
  78. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/WHEEL +0 -0
  79. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/entry_points.txt +0 -0
  80. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/licenses/LICENSE +0 -0
  81. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,12 @@ from cartography.client.core.tx import load_matchlinks
13
13
  from cartography.graph.job import GraphJob
14
14
  from cartography.intel.aws.ec2.util import get_botocore_config
15
15
  from cartography.models.aws.cloudtrail.management_events import AssumedRoleMatchLink
16
+ from cartography.models.aws.cloudtrail.management_events import (
17
+ AssumedRoleWithSAMLMatchLink,
18
+ )
19
+ from cartography.models.aws.cloudtrail.management_events import (
20
+ GitHubRepoAssumeRoleWithWebIdentityMatchLink,
21
+ )
16
22
  from cartography.util import aws_handle_regions
17
23
  from cartography.util import timeit
18
24
 
@@ -74,11 +80,123 @@ def get_assume_role_events(
74
80
  return all_events
75
81
 
76
82
 
83
+ @timeit
84
+ @aws_handle_regions
85
+ def get_saml_role_events(
86
+ boto3_session: boto3.Session, region: str, lookback_hours: int
87
+ ) -> List[Dict[str, Any]]:
88
+ """
89
+ Fetch CloudTrail AssumeRoleWithSAML events from the specified time period.
90
+
91
+ Focuses specifically on SAML-based role assumption events.
92
+
93
+ :type boto3_session: boto3.Session
94
+ :param boto3_session: The boto3 session to use for API calls
95
+ :type region: str
96
+ :param region: The AWS region to fetch events from
97
+ :type lookback_hours: int
98
+ :param lookback_hours: Number of hours back to retrieve events from
99
+ :rtype: List[Dict[str, Any]]
100
+ :return: List of CloudTrail AssumeRoleWithSAML events
101
+ """
102
+ client = boto3_session.client(
103
+ "cloudtrail", region_name=region, config=get_botocore_config()
104
+ )
105
+
106
+ # Calculate time range
107
+ end_time = datetime.utcnow()
108
+ start_time = end_time - timedelta(hours=lookback_hours)
109
+
110
+ logger.info(
111
+ f"Fetching CloudTrail AssumeRoleWithSAML events for region '{region}' "
112
+ f"from {start_time} to {end_time} ({lookback_hours} hours)"
113
+ )
114
+
115
+ paginator = client.get_paginator("lookup_events")
116
+
117
+ page_iterator = paginator.paginate(
118
+ LookupAttributes=[
119
+ {"AttributeKey": "EventName", "AttributeValue": "AssumeRoleWithSAML"}
120
+ ],
121
+ StartTime=start_time,
122
+ EndTime=end_time,
123
+ PaginationConfig={
124
+ "MaxItems": 10000, # Reasonable limit to prevent excessive API calls
125
+ "PageSize": 50, # CloudTrail API limit per page
126
+ },
127
+ )
128
+
129
+ all_events = []
130
+ for page in page_iterator:
131
+ all_events.extend(page.get("Events", []))
132
+
133
+ logger.info(
134
+ f"Retrieved {len(all_events)} AssumeRoleWithSAML events from region '{region}'"
135
+ )
136
+
137
+ return all_events
138
+
139
+
140
+ @timeit
141
+ @aws_handle_regions
142
+ def get_web_identity_role_events(
143
+ boto3_session: boto3.Session, region: str, lookback_hours: int
144
+ ) -> List[Dict[str, Any]]:
145
+ """
146
+ Fetch CloudTrail AssumeRoleWithWebIdentity events from the specified time period.
147
+
148
+ Focuses specifically on WebIdentity-based role assumption events.
149
+
150
+ :type boto3_session: boto3.Session
151
+ :param boto3_session: The boto3 session to use for API calls
152
+ :type region: str
153
+ :param region: The AWS region to fetch events from
154
+ :type lookback_hours: int
155
+ :param lookback_hours: Number of hours back to retrieve events from
156
+ :rtype: List[Dict[str, Any]]
157
+ :return: List of CloudTrail AssumeRoleWithWebIdentity events
158
+ """
159
+ client = boto3_session.client(
160
+ "cloudtrail", region_name=region, config=get_botocore_config()
161
+ )
162
+
163
+ # Calculate time range
164
+ end_time = datetime.utcnow()
165
+ start_time = end_time - timedelta(hours=lookback_hours)
166
+
167
+ logger.info(
168
+ f"Fetching CloudTrail AssumeRoleWithWebIdentity events for region '{region}' "
169
+ f"from {start_time} to {end_time} ({lookback_hours} hours)"
170
+ )
171
+
172
+ paginator = client.get_paginator("lookup_events")
173
+
174
+ page_iterator = paginator.paginate(
175
+ LookupAttributes=[
176
+ {"AttributeKey": "EventName", "AttributeValue": "AssumeRoleWithWebIdentity"}
177
+ ],
178
+ StartTime=start_time,
179
+ EndTime=end_time,
180
+ PaginationConfig={
181
+ "MaxItems": 10000, # Reasonable limit to prevent excessive API calls
182
+ "PageSize": 50, # CloudTrail API limit per page
183
+ },
184
+ )
185
+
186
+ all_events = []
187
+ for page in page_iterator:
188
+ all_events.extend(page.get("Events", []))
189
+
190
+ logger.info(
191
+ f"Retrieved {len(all_events)} AssumeRoleWithWebIdentity events from region '{region}'"
192
+ )
193
+
194
+ return all_events
195
+
196
+
77
197
  @timeit
78
198
  def transform_assume_role_events_to_role_assumptions(
79
199
  events: List[Dict[str, Any]],
80
- region: str,
81
- current_aws_account_id: str,
82
200
  ) -> List[Dict[str, Any]]:
83
201
  """
84
202
  Transform raw CloudTrail AssumeRole events into aggregated role assumption relationships.
@@ -93,22 +211,25 @@ def transform_assume_role_events_to_role_assumptions(
93
211
 
94
212
  :type events: List[Dict[str, Any]]
95
213
  :param events: List of raw CloudTrail AssumeRole events from lookup_events API
96
- :type region: str
97
- :param region: The AWS region where events were retrieved from
98
- :type current_aws_account_id: str
99
- :param current_aws_account_id: The AWS account ID being synced
100
214
  :rtype: List[Dict[str, Any]]
101
215
  :return: List of aggregated role assumption relationships ready for loading
102
216
  """
103
217
  aggregated: Dict[tuple, Dict[str, Any]] = {}
104
218
  logger.info(
105
- f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions for region '{region}'"
219
+ f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions"
106
220
  )
107
221
 
108
222
  for event in events:
109
223
 
110
224
  cloudtrail_event = json.loads(event["CloudTrailEvent"])
111
225
 
226
+ # Skip events with null requestParameters since we can't extract roleArn
227
+ if not cloudtrail_event.get("requestParameters"):
228
+ logger.debug(
229
+ f"Skipping CloudTrail AssumeRole event due to missing requestParameters. Event: {event.get('EventId', 'unknown')}"
230
+ )
231
+ continue
232
+
112
233
  if cloudtrail_event.get("userIdentity", {}).get("arn"):
113
234
  source_principal = cloudtrail_event["userIdentity"]["arn"]
114
235
  destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
@@ -132,7 +253,6 @@ def transform_assume_role_events_to_role_assumptions(
132
253
 
133
254
  if key in aggregated:
134
255
  aggregated[key]["times_used"] += 1
135
- aggregated[key]["assume_role_count"] += 1 # All events are AssumeRole
136
256
  # Handle None values safely for time comparisons
137
257
  if event_time:
138
258
  existing_first = aggregated[key]["first_seen_in_time_window"]
@@ -149,15 +269,182 @@ def transform_assume_role_events_to_role_assumptions(
149
269
  "times_used": 1,
150
270
  "first_seen_in_time_window": event_time,
151
271
  "last_used": event_time,
152
- "event_types": ["AssumeRole"],
153
- "assume_role_count": 1,
154
- "saml_count": 0,
155
- "web_identity_count": 0,
156
272
  }
157
273
 
158
274
  return list(aggregated.values())
159
275
 
160
276
 
277
+ @timeit
278
+ def transform_saml_role_events_to_role_assumptions(
279
+ events: List[Dict[str, Any]],
280
+ ) -> List[Dict[str, Any]]:
281
+ """
282
+ Transform raw CloudTrail AssumeRoleWithSAML events into aggregated role assumption relationships.
283
+
284
+ Focuses specifically on SAML-based role assumption events, providing optimized processing
285
+ for federated identity scenarios.
286
+
287
+ This function performs the complete transformation pipeline:
288
+ 1. Extract role assumption events from CloudTrail AssumeRoleWithSAML data
289
+ 2. Aggregate events by (source_principal, destination_principal) pairs
290
+ 3. Return aggregated relationships ready for loading
291
+
292
+ :type events: List[Dict[str, Any]]
293
+ :param events: List of raw CloudTrail AssumeRoleWithSAML events from lookup_events API
294
+ :rtype: List[Dict[str, Any]]
295
+ :return: List of aggregated SAML role assumption relationships ready for loading.
296
+ Each dict contains keys: source_principal_arn, destination_principal_arn,
297
+ times_used, first_seen_in_time_window, last_used
298
+ """
299
+ aggregated: Dict[tuple, Dict[str, Any]] = {}
300
+ logger.info(
301
+ f"Transforming {len(events)} CloudTrail AssumeRoleWithSAML events to role assumptions"
302
+ )
303
+
304
+ for event in events:
305
+
306
+ cloudtrail_event = json.loads(event["CloudTrailEvent"])
307
+
308
+ # Skip events with null requestParameters since we can't extract roleArn
309
+ if not cloudtrail_event.get("requestParameters"):
310
+ logger.debug(
311
+ f"Skipping CloudTrail AssumeRoleWithSAML event due to missing requestParameters. Event: {event.get('EventId', 'unknown')}"
312
+ )
313
+ continue
314
+
315
+ response_elements = cloudtrail_event.get("responseElements", {})
316
+ assumed_role_user = response_elements.get("assumedRoleUser", {})
317
+
318
+ if assumed_role_user.get("arn"):
319
+ assumed_role_arn = assumed_role_user["arn"]
320
+ # Extract username from assumed role ARN: arn:aws:sts::account:assumed-role/RoleName/username
321
+ source_principal = assumed_role_arn.split("/")[-1]
322
+ destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
323
+ else:
324
+ logger.debug(
325
+ f"Skipping CloudTrail AssumeRoleWithSAML event due to missing assumedRoleUser.arn. Event: {event.get('EventId', 'unknown')}"
326
+ )
327
+ continue
328
+
329
+ event_time = event.get("EventTime")
330
+
331
+ key = (source_principal, destination_principal)
332
+
333
+ if key in aggregated:
334
+ aggregated[key]["times_used"] += 1
335
+ # Handle None values safely for time comparisons
336
+ if event_time:
337
+ existing_first = aggregated[key]["first_seen_in_time_window"]
338
+ existing_last = aggregated[key]["last_used"]
339
+
340
+ if existing_first is None or event_time < existing_first:
341
+ aggregated[key]["first_seen_in_time_window"] = event_time
342
+ if existing_last is None or event_time > existing_last:
343
+ aggregated[key]["last_used"] = event_time
344
+ else:
345
+ aggregated[key] = {
346
+ "source_principal_arn": source_principal,
347
+ "destination_principal_arn": destination_principal,
348
+ "times_used": 1,
349
+ "first_seen_in_time_window": event_time,
350
+ "last_used": event_time,
351
+ }
352
+
353
+ return list(aggregated.values())
354
+
355
+
356
+ @timeit
357
+ def transform_web_identity_role_events_to_role_assumptions(
358
+ events: List[Dict[str, Any]],
359
+ ) -> List[Dict[str, Any]]:
360
+ """
361
+ Transform raw CloudTrail AssumeRoleWithWebIdentity events into aggregated role assumption relationships.
362
+
363
+ Focuses specifically on WebIdentity-based role assumption events, providing optimized processing
364
+ for federated web identity scenarios.
365
+
366
+ This function performs the complete transformation pipeline:
367
+ 1. Extract role assumption events from CloudTrail AssumeRoleWithWebIdentity data
368
+ 2. Aggregate events by (source_principal, destination_principal) pairs
369
+ 3. Return aggregated relationships ready for loading
370
+
371
+ :type events: List[Dict[str, Any]]
372
+ :param events: List of raw CloudTrail AssumeRoleWithWebIdentity events from lookup_events API
373
+ :rtype: List[Dict[str, Any]]
374
+ :return: List of aggregated WebIdentity role assumption relationships ready for loading.
375
+ Each dict contains keys: source_repo_fullname, destination_principal_arn,
376
+ times_used, first_seen_in_time_window, last_used
377
+ """
378
+ github_aggregated: Dict[tuple, Dict[str, Any]] = {}
379
+ logger.info(
380
+ f"Transforming {len(events)} CloudTrail AssumeRoleWithWebIdentity events to role assumptions"
381
+ )
382
+
383
+ for event in events:
384
+
385
+ cloudtrail_event = json.loads(event["CloudTrailEvent"])
386
+
387
+ # Skip events with null requestParameters since we can't extract roleArn
388
+ if not cloudtrail_event.get("requestParameters"):
389
+ logger.debug(
390
+ f"Skipping CloudTrail AssumeRoleWithWebIdentity event due to missing requestParameters. Event: {event.get('EventId', 'unknown')}"
391
+ )
392
+ continue
393
+
394
+ user_identity = cloudtrail_event.get("userIdentity", {})
395
+
396
+ if user_identity.get("type") == "WebIdentityUser" and user_identity.get(
397
+ "userName"
398
+ ):
399
+ identity_provider = user_identity.get("identityProvider", "unknown")
400
+ destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
401
+ event_time = event.get("EventTime")
402
+
403
+ # Only process GitHub Actions events
404
+ if "token.actions.githubusercontent.com" in identity_provider:
405
+ # Extract GitHub repo fullname from userName format: "repo:{organization}/{repository}:{context}"
406
+ user_name = user_identity.get("userName", "")
407
+ if not user_name:
408
+ logger.debug(
409
+ f"Missing userName in GitHub WebIdentity event: {event.get('EventId', 'unknown')}"
410
+ )
411
+ continue
412
+
413
+ github_repo = _extract_github_repo_from_username(user_name)
414
+ key = (github_repo, destination_principal)
415
+
416
+ if key in github_aggregated:
417
+ github_aggregated[key]["times_used"] += 1
418
+ # Handle None values safely for time comparisons
419
+ if event_time:
420
+ existing_first = github_aggregated[key][
421
+ "first_seen_in_time_window"
422
+ ]
423
+ existing_last = github_aggregated[key]["last_used"]
424
+
425
+ if existing_first is None or event_time < existing_first:
426
+ github_aggregated[key][
427
+ "first_seen_in_time_window"
428
+ ] = event_time
429
+ if existing_last is None or event_time > existing_last:
430
+ github_aggregated[key]["last_used"] = event_time
431
+ else:
432
+ github_aggregated[key] = {
433
+ "source_repo_fullname": github_repo,
434
+ "destination_principal_arn": destination_principal,
435
+ "times_used": 1,
436
+ "first_seen_in_time_window": event_time,
437
+ "last_used": event_time,
438
+ }
439
+ else:
440
+ # Skip non-GitHub events for now
441
+ continue
442
+ else:
443
+ continue
444
+ # Return aggregated relationships directly
445
+ return list(github_aggregated.values())
446
+
447
+
161
448
  @timeit
162
449
  def load_role_assumptions(
163
450
  neo4j_session: neo4j.Session,
@@ -169,7 +456,7 @@ def load_role_assumptions(
169
456
  Load aggregated role assumption relationships into Neo4j using MatchLink pattern.
170
457
 
171
458
  Creates direct ASSUMED_ROLE relationships with aggregated properties:
172
- (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {lastused, times_used, first_seen_in_time_window, last_seen}]->(AWSRole)
459
+ (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
173
460
 
174
461
  Assumes that both source principals and destination roles already exist in the graph.
175
462
 
@@ -200,6 +487,90 @@ def load_role_assumptions(
200
487
  )
201
488
 
202
489
 
490
+ @timeit
491
+ def load_saml_role_assumptions(
492
+ neo4j_session: neo4j.Session,
493
+ aggregated_role_assumptions: List[Dict[str, Any]],
494
+ current_aws_account_id: str,
495
+ aws_update_tag: int,
496
+ ) -> None:
497
+ """
498
+ Load aggregated SAML role assumption relationships into Neo4j using MatchLink pattern.
499
+
500
+ Creates direct ASSUMED_ROLE_WITH_SAML relationships with aggregated properties:
501
+ (AWSRole)-[:ASSUMED_ROLE_WITH_SAML {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
502
+
503
+ Assumes that both source principals and destination roles already exist in the graph.
504
+
505
+ :type neo4j_session: neo4j.Session
506
+ :param neo4j_session: The Neo4j session to use for database operations
507
+ :type aggregated_role_assumptions: List[Dict[str, Any]]
508
+ :param aggregated_role_assumptions: List of aggregated SAML role assumption relationships from transform function
509
+ :type current_aws_account_id: str
510
+ :param current_aws_account_id: The AWS account ID being synced
511
+ :type aws_update_tag: int
512
+ :param aws_update_tag: Timestamp tag for tracking data freshness
513
+ :rtype: None
514
+ """
515
+ # Use MatchLink to create relationships between existing nodes
516
+ matchlink_schema = AssumedRoleWithSAMLMatchLink()
517
+
518
+ load_matchlinks(
519
+ neo4j_session,
520
+ matchlink_schema,
521
+ aggregated_role_assumptions,
522
+ lastupdated=aws_update_tag,
523
+ _sub_resource_label="AWSAccount",
524
+ _sub_resource_id=current_aws_account_id,
525
+ )
526
+
527
+ logger.info(
528
+ f"Successfully loaded {len(aggregated_role_assumptions)} SAML role assumption relationships"
529
+ )
530
+
531
+
532
+ @timeit
533
+ def load_web_identity_role_assumptions(
534
+ neo4j_session: neo4j.Session,
535
+ aggregated_role_assumptions: List[Dict[str, Any]],
536
+ current_aws_account_id: str,
537
+ aws_update_tag: int,
538
+ ) -> None:
539
+ """
540
+ Load aggregated WebIdentity role assumption relationships into Neo4j using MatchLink pattern.
541
+
542
+ Creates direct ASSUMED_ROLE_WITH_WEB_IDENTITY relationships with aggregated properties:
543
+ (GitHubRepository)-[:ASSUMED_ROLE_WITH_WEB_IDENTITY {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
544
+
545
+ Assumes that both source principals and destination roles already exist in the graph.
546
+
547
+ :type neo4j_session: neo4j.Session
548
+ :param neo4j_session: The Neo4j session to use for database operations
549
+ :type aggregated_role_assumptions: List[Dict[str, Any]]
550
+ :param aggregated_role_assumptions: List of aggregated WebIdentity role assumption relationships from transform function
551
+ :type current_aws_account_id: str
552
+ :param current_aws_account_id: The AWS account ID being synced
553
+ :type aws_update_tag: int
554
+ :param aws_update_tag: Timestamp tag for tracking data freshness
555
+ :rtype: None
556
+ """
557
+ # Use MatchLink to create relationships between existing nodes
558
+ matchlink_schema = GitHubRepoAssumeRoleWithWebIdentityMatchLink()
559
+
560
+ load_matchlinks(
561
+ neo4j_session,
562
+ matchlink_schema,
563
+ aggregated_role_assumptions,
564
+ lastupdated=aws_update_tag,
565
+ _sub_resource_label="AWSAccount",
566
+ _sub_resource_id=current_aws_account_id,
567
+ )
568
+
569
+ logger.info(
570
+ f"Successfully loaded {len(aggregated_role_assumptions)} WebIdentity role assumption relationships"
571
+ )
572
+
573
+
203
574
  def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
204
575
  """
205
576
  Convert an assumed role ARN to the original role ARN.
@@ -224,6 +595,37 @@ def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
224
595
  return assumed_role_arn
225
596
 
226
597
 
598
+ def _extract_github_repo_from_username(user_name: str) -> str:
599
+ """
600
+ Extract GitHub repository fullname from CloudTrail userName field.
601
+
602
+ GitHub Actions CloudTrail events have userName in the format:
603
+ "repo:{organization}/{repository}:{context}"
604
+ """
605
+ if not user_name:
606
+ return ""
607
+
608
+ parts = user_name.split(":")
609
+
610
+ # Need at least 3 parts: ["repo", "{organization}/{repository}", "{context}"]
611
+ if len(parts) < 3 or parts[0] != "repo":
612
+ return ""
613
+
614
+ # Extract "{organization}/{repository}"
615
+ repo_fullname = parts[1]
616
+
617
+ # Validate it looks like "{organization}/{repository}" format
618
+ if repo_fullname.count("/") != 1:
619
+ return ""
620
+
621
+ # Ensure both organization and repo exist
622
+ owner, repo = repo_fullname.split("/")
623
+ if not owner or not repo:
624
+ return ""
625
+
626
+ return repo_fullname
627
+
628
+
227
629
  @timeit
228
630
  def cleanup(
229
631
  neo4j_session: neo4j.Session, current_aws_account_id: str, update_tag: int
@@ -270,7 +672,7 @@ def sync_assume_role_events(
270
672
  4. Run cleanup after processing all regions
271
673
 
272
674
  The resulting graph contains direct relationships like:
273
- (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used, lastused}]->(AWSRole)
675
+ (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
274
676
 
275
677
  :type neo4j_session: neo4j.Session
276
678
  :param neo4j_session: The Neo4j session
@@ -316,8 +718,6 @@ def sync_assume_role_events(
316
718
  # Transform AssumeRole events to role assumptions
317
719
  assume_role_assumptions = transform_assume_role_events_to_role_assumptions(
318
720
  events=assume_role_events,
319
- region=region,
320
- current_aws_account_id=current_aws_account_id,
321
721
  )
322
722
 
323
723
  # Load AssumeRole assumptions for this region
@@ -341,6 +741,180 @@ def sync_assume_role_events(
341
741
  )
342
742
 
343
743
 
744
+ @timeit
745
+ def sync_saml_role_events(
746
+ neo4j_session: neo4j.Session,
747
+ boto3_session: boto3.Session,
748
+ regions: List[str],
749
+ current_aws_account_id: str,
750
+ update_tag: int,
751
+ common_job_parameters: Dict[str, Any],
752
+ ) -> None:
753
+ """
754
+ Sync CloudTrail SAML management events to create ASSUMED_ROLE_WITH_SAML relationships.
755
+
756
+ This function orchestrates the complete process:
757
+ 1. Fetch CloudTrail SAML management events region by region
758
+ 2. Transform events into role assumption records per region
759
+ 3. Load role assumption relationships into Neo4j for each region
760
+
761
+ The resulting graph contains direct relationships like:
762
+ (AWSRole)-[:ASSUMED_ROLE_WITH_SAML {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
763
+
764
+ :type neo4j_session: neo4j.Session
765
+ :param neo4j_session: The Neo4j session
766
+ :type boto3_session: boto3.Session
767
+ :param boto3_session: The boto3 session to use for API calls
768
+ :type regions: List[str]
769
+ :param regions: List of AWS regions to sync
770
+ :type current_aws_account_id: str
771
+ :param current_aws_account_id: The AWS account ID being synced
772
+ :type update_tag: int
773
+ :param update_tag: Timestamp tag for tracking data freshness
774
+ :rtype: None
775
+ """
776
+ # Extract lookback hours from common_job_parameters (set by CLI parameter)
777
+ lookback_hours = common_job_parameters.get(
778
+ "aws_cloudtrail_management_events_lookback_hours"
779
+ )
780
+
781
+ if not lookback_hours:
782
+ logger.info(
783
+ "CloudTrail SAML management events sync skipped - no lookback period specified"
784
+ )
785
+ return
786
+
787
+ logger.info(
788
+ f"Syncing SAML events for {len(regions)} regions with {lookback_hours} hour lookback period"
789
+ )
790
+
791
+ total_saml_role_assumptions = 0
792
+
793
+ # Process events region by region
794
+ for region in regions:
795
+ logger.info(f"Processing CloudTrail SAML events for region {region}")
796
+
797
+ # Process AssumeRoleWithSAML events specifically
798
+ logger.info(
799
+ f"Fetching AssumeRoleWithSAML events specifically for region {region}"
800
+ )
801
+ saml_role_events = get_saml_role_events(
802
+ boto3_session=boto3_session,
803
+ region=region,
804
+ lookback_hours=lookback_hours,
805
+ )
806
+
807
+ # Transform AssumeRoleWithSAML events to role assumptions
808
+ saml_role_assumptions = transform_saml_role_events_to_role_assumptions(
809
+ events=saml_role_events,
810
+ )
811
+
812
+ # Load SAML role assumptions for this region
813
+ load_saml_role_assumptions(
814
+ neo4j_session=neo4j_session,
815
+ aggregated_role_assumptions=saml_role_assumptions,
816
+ current_aws_account_id=current_aws_account_id,
817
+ aws_update_tag=update_tag,
818
+ )
819
+ total_saml_role_assumptions += len(saml_role_assumptions)
820
+ logger.info(
821
+ f"Loaded {len(saml_role_assumptions)} SAML role assumptions for region {region}"
822
+ )
823
+
824
+ logger.info(
825
+ f"CloudTrail SAML management events sync completed successfully. "
826
+ f"Processed {total_saml_role_assumptions} total SAML role assumption events across {len(regions)} regions."
827
+ )
828
+
829
+
830
+ @timeit
831
+ def sync_web_identity_role_events(
832
+ neo4j_session: neo4j.Session,
833
+ boto3_session: boto3.Session,
834
+ regions: List[str],
835
+ current_aws_account_id: str,
836
+ update_tag: int,
837
+ common_job_parameters: Dict[str, Any],
838
+ ) -> None:
839
+ """
840
+ Sync CloudTrail WebIdentity management events to create ASSUMED_ROLE_WITH_WEB_IDENTITY relationships.
841
+
842
+ This function orchestrates the complete process:
843
+ 1. Fetch CloudTrail WebIdentity management events region by region
844
+ 2. Transform events into role assumption records per region
845
+ 3. Load role assumption relationships into Neo4j for each region
846
+
847
+ The resulting graph contains direct relationships like:
848
+ (GitHubRepository)-[:ASSUMED_ROLE_WITH_WEB_IDENTITY {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
849
+
850
+ :type neo4j_session: neo4j.Session
851
+ :param neo4j_session: The Neo4j session
852
+ :type boto3_session: boto3.Session
853
+ :param boto3_session: The boto3 session to use for API calls
854
+ :type regions: List[str]
855
+ :param regions: List of AWS regions to sync
856
+ :type current_aws_account_id: str
857
+ :param current_aws_account_id: The AWS account ID being synced
858
+ :type update_tag: int
859
+ :param update_tag: Timestamp tag for tracking data freshness
860
+ :rtype: None
861
+ """
862
+ # Extract lookback hours from common_job_parameters (set by CLI parameter)
863
+ lookback_hours = common_job_parameters.get(
864
+ "aws_cloudtrail_management_events_lookback_hours"
865
+ )
866
+
867
+ if not lookback_hours:
868
+ logger.info(
869
+ "CloudTrail WebIdentity management events sync skipped - no lookback period specified"
870
+ )
871
+ return
872
+
873
+ logger.info(
874
+ f"Syncing WebIdentity events for {len(regions)} regions with {lookback_hours} hour lookback period"
875
+ )
876
+
877
+ total_web_identity_role_assumptions = 0
878
+
879
+ # Process events region by region
880
+ for region in regions:
881
+ logger.info(f"Processing CloudTrail WebIdentity events for region {region}")
882
+
883
+ # Process AssumeRoleWithWebIdentity events specifically
884
+ logger.info(
885
+ f"Fetching AssumeRoleWithWebIdentity events specifically for region {region}"
886
+ )
887
+ web_identity_role_events = get_web_identity_role_events(
888
+ boto3_session=boto3_session,
889
+ region=region,
890
+ lookback_hours=lookback_hours,
891
+ )
892
+
893
+ # Transform AssumeRoleWithWebIdentity events to role assumptions
894
+ web_identity_role_assumptions = (
895
+ transform_web_identity_role_events_to_role_assumptions(
896
+ events=web_identity_role_events,
897
+ )
898
+ )
899
+
900
+ # Load WebIdentity role assumptions for this region
901
+ load_web_identity_role_assumptions(
902
+ neo4j_session=neo4j_session,
903
+ aggregated_role_assumptions=web_identity_role_assumptions,
904
+ current_aws_account_id=current_aws_account_id,
905
+ aws_update_tag=update_tag,
906
+ )
907
+ total_web_identity_role_assumptions += len(web_identity_role_assumptions)
908
+ logger.info(
909
+ f"Loaded {len(web_identity_role_assumptions)} WebIdentity role assumptions for region {region}"
910
+ )
911
+
912
+ logger.info(
913
+ f"CloudTrail WebIdentity management events sync completed successfully. "
914
+ f"Processed {total_web_identity_role_assumptions} total WebIdentity role assumption events across {len(regions)} regions."
915
+ )
916
+
917
+
344
918
  # Main sync function for when we decide to add more event types
345
919
  @timeit
346
920
  def sync(
@@ -353,7 +927,11 @@ def sync(
353
927
  ) -> None:
354
928
  """
355
929
  Main sync function for CloudTrail management events.
930
+
931
+ Syncs AssumeRole, AssumeRoleWithSAML, and AssumeRoleWithWebIdentity events to create separate
932
+ relationship types in the graph for security analysis.
356
933
  """
934
+ # Sync regular AssumeRole events
357
935
  sync_assume_role_events(
358
936
  neo4j_session=neo4j_session,
359
937
  boto3_session=boto3_session,
@@ -362,3 +940,23 @@ def sync(
362
940
  update_tag=update_tag,
363
941
  common_job_parameters=common_job_parameters,
364
942
  )
943
+
944
+ # Sync SAML AssumeRoleWithSAML events
945
+ sync_saml_role_events(
946
+ neo4j_session=neo4j_session,
947
+ boto3_session=boto3_session,
948
+ regions=regions,
949
+ current_aws_account_id=current_aws_account_id,
950
+ update_tag=update_tag,
951
+ common_job_parameters=common_job_parameters,
952
+ )
953
+
954
+ # Sync WebIdentity AssumeRoleWithWebIdentity events
955
+ sync_web_identity_role_events(
956
+ neo4j_session=neo4j_session,
957
+ boto3_session=boto3_session,
958
+ regions=regions,
959
+ current_aws_account_id=current_aws_account_id,
960
+ update_tag=update_tag,
961
+ common_job_parameters=common_job_parameters,
962
+ )