cartography 0.108.0rc1__py3-none-any.whl → 0.109.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +14 -0
- cartography/config.py +4 -0
- cartography/data/indexes.cypher +0 -17
- cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
- cartography/intel/aws/cloudtrail.py +17 -4
- cartography/intel/aws/cloudtrail_management_events.py +614 -16
- cartography/intel/aws/cloudwatch.py +73 -4
- cartography/intel/aws/ec2/subnets.py +37 -63
- cartography/intel/aws/ecr.py +55 -80
- cartography/intel/aws/elasticache.py +102 -79
- cartography/intel/aws/eventbridge.py +91 -0
- cartography/intel/aws/glue.py +117 -0
- cartography/intel/aws/identitycenter.py +71 -23
- cartography/intel/aws/kms.py +160 -200
- cartography/intel/aws/lambda_function.py +206 -190
- cartography/intel/aws/rds.py +243 -458
- cartography/intel/aws/resourcegroupstaggingapi.py +77 -18
- cartography/intel/aws/resources.py +4 -0
- cartography/intel/aws/route53.py +334 -332
- cartography/intel/aws/secretsmanager.py +62 -44
- cartography/intel/entra/groups.py +29 -1
- cartography/intel/gcp/__init__.py +10 -0
- cartography/intel/gcp/compute.py +19 -42
- cartography/intel/trivy/__init__.py +73 -13
- cartography/intel/trivy/scanner.py +115 -92
- cartography/models/aws/cloudtrail/management_events.py +95 -6
- cartography/models/aws/cloudtrail/trail.py +21 -0
- cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
- cartography/models/aws/ec2/subnets.py +65 -0
- cartography/models/aws/ecr/__init__.py +0 -0
- cartography/models/aws/ecr/image.py +41 -0
- cartography/models/aws/ecr/repository.py +72 -0
- cartography/models/aws/ecr/repository_image.py +95 -0
- cartography/models/aws/elasticache/__init__.py +0 -0
- cartography/models/aws/elasticache/cluster.py +65 -0
- cartography/models/aws/elasticache/topic.py +67 -0
- cartography/models/aws/eventbridge/__init__.py +0 -0
- cartography/models/aws/eventbridge/rule.py +77 -0
- cartography/models/aws/glue/__init__.py +0 -0
- cartography/models/aws/glue/connection.py +51 -0
- cartography/models/aws/identitycenter/awspermissionset.py +44 -0
- cartography/models/aws/kms/__init__.py +0 -0
- cartography/models/aws/kms/aliases.py +86 -0
- cartography/models/aws/kms/grants.py +65 -0
- cartography/models/aws/kms/keys.py +88 -0
- cartography/models/aws/lambda_function/__init__.py +0 -0
- cartography/models/aws/lambda_function/alias.py +74 -0
- cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
- cartography/models/aws/lambda_function/lambda_function.py +89 -0
- cartography/models/aws/lambda_function/layer.py +72 -0
- cartography/models/aws/rds/__init__.py +0 -0
- cartography/models/aws/rds/cluster.py +89 -0
- cartography/models/aws/rds/instance.py +154 -0
- cartography/models/aws/rds/snapshot.py +108 -0
- cartography/models/aws/rds/subnet_group.py +101 -0
- cartography/models/aws/route53/__init__.py +0 -0
- cartography/models/aws/route53/dnsrecord.py +214 -0
- cartography/models/aws/route53/nameserver.py +63 -0
- cartography/models/aws/route53/subzone.py +40 -0
- cartography/models/aws/route53/zone.py +47 -0
- cartography/models/aws/secretsmanager/secret.py +106 -0
- cartography/models/entra/group.py +26 -0
- cartography/models/entra/user.py +6 -0
- cartography/models/gcp/compute/__init__.py +0 -0
- cartography/models/gcp/compute/vpc.py +50 -0
- cartography/util.py +8 -1
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/METADATA +2 -2
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/RECORD +73 -44
- cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
- cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
- cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
- cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
- cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
- cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/WHEEL +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/top_level.txt +0 -0
|
@@ -13,6 +13,12 @@ from cartography.client.core.tx import load_matchlinks
|
|
|
13
13
|
from cartography.graph.job import GraphJob
|
|
14
14
|
from cartography.intel.aws.ec2.util import get_botocore_config
|
|
15
15
|
from cartography.models.aws.cloudtrail.management_events import AssumedRoleMatchLink
|
|
16
|
+
from cartography.models.aws.cloudtrail.management_events import (
|
|
17
|
+
AssumedRoleWithSAMLMatchLink,
|
|
18
|
+
)
|
|
19
|
+
from cartography.models.aws.cloudtrail.management_events import (
|
|
20
|
+
GitHubRepoAssumeRoleWithWebIdentityMatchLink,
|
|
21
|
+
)
|
|
16
22
|
from cartography.util import aws_handle_regions
|
|
17
23
|
from cartography.util import timeit
|
|
18
24
|
|
|
@@ -74,11 +80,123 @@ def get_assume_role_events(
|
|
|
74
80
|
return all_events
|
|
75
81
|
|
|
76
82
|
|
|
83
|
+
@timeit
|
|
84
|
+
@aws_handle_regions
|
|
85
|
+
def get_saml_role_events(
|
|
86
|
+
boto3_session: boto3.Session, region: str, lookback_hours: int
|
|
87
|
+
) -> List[Dict[str, Any]]:
|
|
88
|
+
"""
|
|
89
|
+
Fetch CloudTrail AssumeRoleWithSAML events from the specified time period.
|
|
90
|
+
|
|
91
|
+
Focuses specifically on SAML-based role assumption events.
|
|
92
|
+
|
|
93
|
+
:type boto3_session: boto3.Session
|
|
94
|
+
:param boto3_session: The boto3 session to use for API calls
|
|
95
|
+
:type region: str
|
|
96
|
+
:param region: The AWS region to fetch events from
|
|
97
|
+
:type lookback_hours: int
|
|
98
|
+
:param lookback_hours: Number of hours back to retrieve events from
|
|
99
|
+
:rtype: List[Dict[str, Any]]
|
|
100
|
+
:return: List of CloudTrail AssumeRoleWithSAML events
|
|
101
|
+
"""
|
|
102
|
+
client = boto3_session.client(
|
|
103
|
+
"cloudtrail", region_name=region, config=get_botocore_config()
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Calculate time range
|
|
107
|
+
end_time = datetime.utcnow()
|
|
108
|
+
start_time = end_time - timedelta(hours=lookback_hours)
|
|
109
|
+
|
|
110
|
+
logger.info(
|
|
111
|
+
f"Fetching CloudTrail AssumeRoleWithSAML events for region '{region}' "
|
|
112
|
+
f"from {start_time} to {end_time} ({lookback_hours} hours)"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
paginator = client.get_paginator("lookup_events")
|
|
116
|
+
|
|
117
|
+
page_iterator = paginator.paginate(
|
|
118
|
+
LookupAttributes=[
|
|
119
|
+
{"AttributeKey": "EventName", "AttributeValue": "AssumeRoleWithSAML"}
|
|
120
|
+
],
|
|
121
|
+
StartTime=start_time,
|
|
122
|
+
EndTime=end_time,
|
|
123
|
+
PaginationConfig={
|
|
124
|
+
"MaxItems": 10000, # Reasonable limit to prevent excessive API calls
|
|
125
|
+
"PageSize": 50, # CloudTrail API limit per page
|
|
126
|
+
},
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
all_events = []
|
|
130
|
+
for page in page_iterator:
|
|
131
|
+
all_events.extend(page.get("Events", []))
|
|
132
|
+
|
|
133
|
+
logger.info(
|
|
134
|
+
f"Retrieved {len(all_events)} AssumeRoleWithSAML events from region '{region}'"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
return all_events
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@timeit
|
|
141
|
+
@aws_handle_regions
|
|
142
|
+
def get_web_identity_role_events(
|
|
143
|
+
boto3_session: boto3.Session, region: str, lookback_hours: int
|
|
144
|
+
) -> List[Dict[str, Any]]:
|
|
145
|
+
"""
|
|
146
|
+
Fetch CloudTrail AssumeRoleWithWebIdentity events from the specified time period.
|
|
147
|
+
|
|
148
|
+
Focuses specifically on WebIdentity-based role assumption events.
|
|
149
|
+
|
|
150
|
+
:type boto3_session: boto3.Session
|
|
151
|
+
:param boto3_session: The boto3 session to use for API calls
|
|
152
|
+
:type region: str
|
|
153
|
+
:param region: The AWS region to fetch events from
|
|
154
|
+
:type lookback_hours: int
|
|
155
|
+
:param lookback_hours: Number of hours back to retrieve events from
|
|
156
|
+
:rtype: List[Dict[str, Any]]
|
|
157
|
+
:return: List of CloudTrail AssumeRoleWithWebIdentity events
|
|
158
|
+
"""
|
|
159
|
+
client = boto3_session.client(
|
|
160
|
+
"cloudtrail", region_name=region, config=get_botocore_config()
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Calculate time range
|
|
164
|
+
end_time = datetime.utcnow()
|
|
165
|
+
start_time = end_time - timedelta(hours=lookback_hours)
|
|
166
|
+
|
|
167
|
+
logger.info(
|
|
168
|
+
f"Fetching CloudTrail AssumeRoleWithWebIdentity events for region '{region}' "
|
|
169
|
+
f"from {start_time} to {end_time} ({lookback_hours} hours)"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
paginator = client.get_paginator("lookup_events")
|
|
173
|
+
|
|
174
|
+
page_iterator = paginator.paginate(
|
|
175
|
+
LookupAttributes=[
|
|
176
|
+
{"AttributeKey": "EventName", "AttributeValue": "AssumeRoleWithWebIdentity"}
|
|
177
|
+
],
|
|
178
|
+
StartTime=start_time,
|
|
179
|
+
EndTime=end_time,
|
|
180
|
+
PaginationConfig={
|
|
181
|
+
"MaxItems": 10000, # Reasonable limit to prevent excessive API calls
|
|
182
|
+
"PageSize": 50, # CloudTrail API limit per page
|
|
183
|
+
},
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
all_events = []
|
|
187
|
+
for page in page_iterator:
|
|
188
|
+
all_events.extend(page.get("Events", []))
|
|
189
|
+
|
|
190
|
+
logger.info(
|
|
191
|
+
f"Retrieved {len(all_events)} AssumeRoleWithWebIdentity events from region '{region}'"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return all_events
|
|
195
|
+
|
|
196
|
+
|
|
77
197
|
@timeit
|
|
78
198
|
def transform_assume_role_events_to_role_assumptions(
|
|
79
199
|
events: List[Dict[str, Any]],
|
|
80
|
-
region: str,
|
|
81
|
-
current_aws_account_id: str,
|
|
82
200
|
) -> List[Dict[str, Any]]:
|
|
83
201
|
"""
|
|
84
202
|
Transform raw CloudTrail AssumeRole events into aggregated role assumption relationships.
|
|
@@ -93,22 +211,25 @@ def transform_assume_role_events_to_role_assumptions(
|
|
|
93
211
|
|
|
94
212
|
:type events: List[Dict[str, Any]]
|
|
95
213
|
:param events: List of raw CloudTrail AssumeRole events from lookup_events API
|
|
96
|
-
:type region: str
|
|
97
|
-
:param region: The AWS region where events were retrieved from
|
|
98
|
-
:type current_aws_account_id: str
|
|
99
|
-
:param current_aws_account_id: The AWS account ID being synced
|
|
100
214
|
:rtype: List[Dict[str, Any]]
|
|
101
215
|
:return: List of aggregated role assumption relationships ready for loading
|
|
102
216
|
"""
|
|
103
217
|
aggregated: Dict[tuple, Dict[str, Any]] = {}
|
|
104
218
|
logger.info(
|
|
105
|
-
f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions
|
|
219
|
+
f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions"
|
|
106
220
|
)
|
|
107
221
|
|
|
108
222
|
for event in events:
|
|
109
223
|
|
|
110
224
|
cloudtrail_event = json.loads(event["CloudTrailEvent"])
|
|
111
225
|
|
|
226
|
+
# Skip events with null requestParameters since we can't extract roleArn
|
|
227
|
+
if not cloudtrail_event.get("requestParameters"):
|
|
228
|
+
logger.debug(
|
|
229
|
+
f"Skipping CloudTrail AssumeRole event due to missing requestParameters. Event: {event.get('EventId', 'unknown')}"
|
|
230
|
+
)
|
|
231
|
+
continue
|
|
232
|
+
|
|
112
233
|
if cloudtrail_event.get("userIdentity", {}).get("arn"):
|
|
113
234
|
source_principal = cloudtrail_event["userIdentity"]["arn"]
|
|
114
235
|
destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
|
|
@@ -132,7 +253,6 @@ def transform_assume_role_events_to_role_assumptions(
|
|
|
132
253
|
|
|
133
254
|
if key in aggregated:
|
|
134
255
|
aggregated[key]["times_used"] += 1
|
|
135
|
-
aggregated[key]["assume_role_count"] += 1 # All events are AssumeRole
|
|
136
256
|
# Handle None values safely for time comparisons
|
|
137
257
|
if event_time:
|
|
138
258
|
existing_first = aggregated[key]["first_seen_in_time_window"]
|
|
@@ -149,15 +269,182 @@ def transform_assume_role_events_to_role_assumptions(
|
|
|
149
269
|
"times_used": 1,
|
|
150
270
|
"first_seen_in_time_window": event_time,
|
|
151
271
|
"last_used": event_time,
|
|
152
|
-
"event_types": ["AssumeRole"],
|
|
153
|
-
"assume_role_count": 1,
|
|
154
|
-
"saml_count": 0,
|
|
155
|
-
"web_identity_count": 0,
|
|
156
272
|
}
|
|
157
273
|
|
|
158
274
|
return list(aggregated.values())
|
|
159
275
|
|
|
160
276
|
|
|
277
|
+
@timeit
|
|
278
|
+
def transform_saml_role_events_to_role_assumptions(
|
|
279
|
+
events: List[Dict[str, Any]],
|
|
280
|
+
) -> List[Dict[str, Any]]:
|
|
281
|
+
"""
|
|
282
|
+
Transform raw CloudTrail AssumeRoleWithSAML events into aggregated role assumption relationships.
|
|
283
|
+
|
|
284
|
+
Focuses specifically on SAML-based role assumption events, providing optimized processing
|
|
285
|
+
for federated identity scenarios.
|
|
286
|
+
|
|
287
|
+
This function performs the complete transformation pipeline:
|
|
288
|
+
1. Extract role assumption events from CloudTrail AssumeRoleWithSAML data
|
|
289
|
+
2. Aggregate events by (source_principal, destination_principal) pairs
|
|
290
|
+
3. Return aggregated relationships ready for loading
|
|
291
|
+
|
|
292
|
+
:type events: List[Dict[str, Any]]
|
|
293
|
+
:param events: List of raw CloudTrail AssumeRoleWithSAML events from lookup_events API
|
|
294
|
+
:rtype: List[Dict[str, Any]]
|
|
295
|
+
:return: List of aggregated SAML role assumption relationships ready for loading.
|
|
296
|
+
Each dict contains keys: source_principal_arn, destination_principal_arn,
|
|
297
|
+
times_used, first_seen_in_time_window, last_used
|
|
298
|
+
"""
|
|
299
|
+
aggregated: Dict[tuple, Dict[str, Any]] = {}
|
|
300
|
+
logger.info(
|
|
301
|
+
f"Transforming {len(events)} CloudTrail AssumeRoleWithSAML events to role assumptions"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
for event in events:
|
|
305
|
+
|
|
306
|
+
cloudtrail_event = json.loads(event["CloudTrailEvent"])
|
|
307
|
+
|
|
308
|
+
# Skip events with null requestParameters since we can't extract roleArn
|
|
309
|
+
if not cloudtrail_event.get("requestParameters"):
|
|
310
|
+
logger.debug(
|
|
311
|
+
f"Skipping CloudTrail AssumeRoleWithSAML event due to missing requestParameters. Event: {event.get('EventId', 'unknown')}"
|
|
312
|
+
)
|
|
313
|
+
continue
|
|
314
|
+
|
|
315
|
+
response_elements = cloudtrail_event.get("responseElements", {})
|
|
316
|
+
assumed_role_user = response_elements.get("assumedRoleUser", {})
|
|
317
|
+
|
|
318
|
+
if assumed_role_user.get("arn"):
|
|
319
|
+
assumed_role_arn = assumed_role_user["arn"]
|
|
320
|
+
# Extract username from assumed role ARN: arn:aws:sts::account:assumed-role/RoleName/username
|
|
321
|
+
source_principal = assumed_role_arn.split("/")[-1]
|
|
322
|
+
destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
|
|
323
|
+
else:
|
|
324
|
+
logger.debug(
|
|
325
|
+
f"Skipping CloudTrail AssumeRoleWithSAML event due to missing assumedRoleUser.arn. Event: {event.get('EventId', 'unknown')}"
|
|
326
|
+
)
|
|
327
|
+
continue
|
|
328
|
+
|
|
329
|
+
event_time = event.get("EventTime")
|
|
330
|
+
|
|
331
|
+
key = (source_principal, destination_principal)
|
|
332
|
+
|
|
333
|
+
if key in aggregated:
|
|
334
|
+
aggregated[key]["times_used"] += 1
|
|
335
|
+
# Handle None values safely for time comparisons
|
|
336
|
+
if event_time:
|
|
337
|
+
existing_first = aggregated[key]["first_seen_in_time_window"]
|
|
338
|
+
existing_last = aggregated[key]["last_used"]
|
|
339
|
+
|
|
340
|
+
if existing_first is None or event_time < existing_first:
|
|
341
|
+
aggregated[key]["first_seen_in_time_window"] = event_time
|
|
342
|
+
if existing_last is None or event_time > existing_last:
|
|
343
|
+
aggregated[key]["last_used"] = event_time
|
|
344
|
+
else:
|
|
345
|
+
aggregated[key] = {
|
|
346
|
+
"source_principal_arn": source_principal,
|
|
347
|
+
"destination_principal_arn": destination_principal,
|
|
348
|
+
"times_used": 1,
|
|
349
|
+
"first_seen_in_time_window": event_time,
|
|
350
|
+
"last_used": event_time,
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
return list(aggregated.values())
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
@timeit
|
|
357
|
+
def transform_web_identity_role_events_to_role_assumptions(
|
|
358
|
+
events: List[Dict[str, Any]],
|
|
359
|
+
) -> List[Dict[str, Any]]:
|
|
360
|
+
"""
|
|
361
|
+
Transform raw CloudTrail AssumeRoleWithWebIdentity events into aggregated role assumption relationships.
|
|
362
|
+
|
|
363
|
+
Focuses specifically on WebIdentity-based role assumption events, providing optimized processing
|
|
364
|
+
for federated web identity scenarios.
|
|
365
|
+
|
|
366
|
+
This function performs the complete transformation pipeline:
|
|
367
|
+
1. Extract role assumption events from CloudTrail AssumeRoleWithWebIdentity data
|
|
368
|
+
2. Aggregate events by (source_principal, destination_principal) pairs
|
|
369
|
+
3. Return aggregated relationships ready for loading
|
|
370
|
+
|
|
371
|
+
:type events: List[Dict[str, Any]]
|
|
372
|
+
:param events: List of raw CloudTrail AssumeRoleWithWebIdentity events from lookup_events API
|
|
373
|
+
:rtype: List[Dict[str, Any]]
|
|
374
|
+
:return: List of aggregated WebIdentity role assumption relationships ready for loading.
|
|
375
|
+
Each dict contains keys: source_repo_fullname, destination_principal_arn,
|
|
376
|
+
times_used, first_seen_in_time_window, last_used
|
|
377
|
+
"""
|
|
378
|
+
github_aggregated: Dict[tuple, Dict[str, Any]] = {}
|
|
379
|
+
logger.info(
|
|
380
|
+
f"Transforming {len(events)} CloudTrail AssumeRoleWithWebIdentity events to role assumptions"
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
for event in events:
|
|
384
|
+
|
|
385
|
+
cloudtrail_event = json.loads(event["CloudTrailEvent"])
|
|
386
|
+
|
|
387
|
+
# Skip events with null requestParameters since we can't extract roleArn
|
|
388
|
+
if not cloudtrail_event.get("requestParameters"):
|
|
389
|
+
logger.debug(
|
|
390
|
+
f"Skipping CloudTrail AssumeRoleWithWebIdentity event due to missing requestParameters. Event: {event.get('EventId', 'unknown')}"
|
|
391
|
+
)
|
|
392
|
+
continue
|
|
393
|
+
|
|
394
|
+
user_identity = cloudtrail_event.get("userIdentity", {})
|
|
395
|
+
|
|
396
|
+
if user_identity.get("type") == "WebIdentityUser" and user_identity.get(
|
|
397
|
+
"userName"
|
|
398
|
+
):
|
|
399
|
+
identity_provider = user_identity.get("identityProvider", "unknown")
|
|
400
|
+
destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
|
|
401
|
+
event_time = event.get("EventTime")
|
|
402
|
+
|
|
403
|
+
# Only process GitHub Actions events
|
|
404
|
+
if "token.actions.githubusercontent.com" in identity_provider:
|
|
405
|
+
# Extract GitHub repo fullname from userName format: "repo:{organization}/{repository}:{context}"
|
|
406
|
+
user_name = user_identity.get("userName", "")
|
|
407
|
+
if not user_name:
|
|
408
|
+
logger.debug(
|
|
409
|
+
f"Missing userName in GitHub WebIdentity event: {event.get('EventId', 'unknown')}"
|
|
410
|
+
)
|
|
411
|
+
continue
|
|
412
|
+
|
|
413
|
+
github_repo = _extract_github_repo_from_username(user_name)
|
|
414
|
+
key = (github_repo, destination_principal)
|
|
415
|
+
|
|
416
|
+
if key in github_aggregated:
|
|
417
|
+
github_aggregated[key]["times_used"] += 1
|
|
418
|
+
# Handle None values safely for time comparisons
|
|
419
|
+
if event_time:
|
|
420
|
+
existing_first = github_aggregated[key][
|
|
421
|
+
"first_seen_in_time_window"
|
|
422
|
+
]
|
|
423
|
+
existing_last = github_aggregated[key]["last_used"]
|
|
424
|
+
|
|
425
|
+
if existing_first is None or event_time < existing_first:
|
|
426
|
+
github_aggregated[key][
|
|
427
|
+
"first_seen_in_time_window"
|
|
428
|
+
] = event_time
|
|
429
|
+
if existing_last is None or event_time > existing_last:
|
|
430
|
+
github_aggregated[key]["last_used"] = event_time
|
|
431
|
+
else:
|
|
432
|
+
github_aggregated[key] = {
|
|
433
|
+
"source_repo_fullname": github_repo,
|
|
434
|
+
"destination_principal_arn": destination_principal,
|
|
435
|
+
"times_used": 1,
|
|
436
|
+
"first_seen_in_time_window": event_time,
|
|
437
|
+
"last_used": event_time,
|
|
438
|
+
}
|
|
439
|
+
else:
|
|
440
|
+
# Skip non-GitHub events for now
|
|
441
|
+
continue
|
|
442
|
+
else:
|
|
443
|
+
continue
|
|
444
|
+
# Return aggregated relationships directly
|
|
445
|
+
return list(github_aggregated.values())
|
|
446
|
+
|
|
447
|
+
|
|
161
448
|
@timeit
|
|
162
449
|
def load_role_assumptions(
|
|
163
450
|
neo4j_session: neo4j.Session,
|
|
@@ -169,7 +456,7 @@ def load_role_assumptions(
|
|
|
169
456
|
Load aggregated role assumption relationships into Neo4j using MatchLink pattern.
|
|
170
457
|
|
|
171
458
|
Creates direct ASSUMED_ROLE relationships with aggregated properties:
|
|
172
|
-
(AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {
|
|
459
|
+
(AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
|
|
173
460
|
|
|
174
461
|
Assumes that both source principals and destination roles already exist in the graph.
|
|
175
462
|
|
|
@@ -200,6 +487,90 @@ def load_role_assumptions(
|
|
|
200
487
|
)
|
|
201
488
|
|
|
202
489
|
|
|
490
|
+
@timeit
|
|
491
|
+
def load_saml_role_assumptions(
|
|
492
|
+
neo4j_session: neo4j.Session,
|
|
493
|
+
aggregated_role_assumptions: List[Dict[str, Any]],
|
|
494
|
+
current_aws_account_id: str,
|
|
495
|
+
aws_update_tag: int,
|
|
496
|
+
) -> None:
|
|
497
|
+
"""
|
|
498
|
+
Load aggregated SAML role assumption relationships into Neo4j using MatchLink pattern.
|
|
499
|
+
|
|
500
|
+
Creates direct ASSUMED_ROLE_WITH_SAML relationships with aggregated properties:
|
|
501
|
+
(AWSRole)-[:ASSUMED_ROLE_WITH_SAML {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
|
|
502
|
+
|
|
503
|
+
Assumes that both source principals and destination roles already exist in the graph.
|
|
504
|
+
|
|
505
|
+
:type neo4j_session: neo4j.Session
|
|
506
|
+
:param neo4j_session: The Neo4j session to use for database operations
|
|
507
|
+
:type aggregated_role_assumptions: List[Dict[str, Any]]
|
|
508
|
+
:param aggregated_role_assumptions: List of aggregated SAML role assumption relationships from transform function
|
|
509
|
+
:type current_aws_account_id: str
|
|
510
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
511
|
+
:type aws_update_tag: int
|
|
512
|
+
:param aws_update_tag: Timestamp tag for tracking data freshness
|
|
513
|
+
:rtype: None
|
|
514
|
+
"""
|
|
515
|
+
# Use MatchLink to create relationships between existing nodes
|
|
516
|
+
matchlink_schema = AssumedRoleWithSAMLMatchLink()
|
|
517
|
+
|
|
518
|
+
load_matchlinks(
|
|
519
|
+
neo4j_session,
|
|
520
|
+
matchlink_schema,
|
|
521
|
+
aggregated_role_assumptions,
|
|
522
|
+
lastupdated=aws_update_tag,
|
|
523
|
+
_sub_resource_label="AWSAccount",
|
|
524
|
+
_sub_resource_id=current_aws_account_id,
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
logger.info(
|
|
528
|
+
f"Successfully loaded {len(aggregated_role_assumptions)} SAML role assumption relationships"
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
@timeit
|
|
533
|
+
def load_web_identity_role_assumptions(
|
|
534
|
+
neo4j_session: neo4j.Session,
|
|
535
|
+
aggregated_role_assumptions: List[Dict[str, Any]],
|
|
536
|
+
current_aws_account_id: str,
|
|
537
|
+
aws_update_tag: int,
|
|
538
|
+
) -> None:
|
|
539
|
+
"""
|
|
540
|
+
Load aggregated WebIdentity role assumption relationships into Neo4j using MatchLink pattern.
|
|
541
|
+
|
|
542
|
+
Creates direct ASSUMED_ROLE_WITH_WEB_IDENTITY relationships with aggregated properties:
|
|
543
|
+
(GitHubRepository)-[:ASSUMED_ROLE_WITH_WEB_IDENTITY {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
|
|
544
|
+
|
|
545
|
+
Assumes that both source principals and destination roles already exist in the graph.
|
|
546
|
+
|
|
547
|
+
:type neo4j_session: neo4j.Session
|
|
548
|
+
:param neo4j_session: The Neo4j session to use for database operations
|
|
549
|
+
:type aggregated_role_assumptions: List[Dict[str, Any]]
|
|
550
|
+
:param aggregated_role_assumptions: List of aggregated WebIdentity role assumption relationships from transform function
|
|
551
|
+
:type current_aws_account_id: str
|
|
552
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
553
|
+
:type aws_update_tag: int
|
|
554
|
+
:param aws_update_tag: Timestamp tag for tracking data freshness
|
|
555
|
+
:rtype: None
|
|
556
|
+
"""
|
|
557
|
+
# Use MatchLink to create relationships between existing nodes
|
|
558
|
+
matchlink_schema = GitHubRepoAssumeRoleWithWebIdentityMatchLink()
|
|
559
|
+
|
|
560
|
+
load_matchlinks(
|
|
561
|
+
neo4j_session,
|
|
562
|
+
matchlink_schema,
|
|
563
|
+
aggregated_role_assumptions,
|
|
564
|
+
lastupdated=aws_update_tag,
|
|
565
|
+
_sub_resource_label="AWSAccount",
|
|
566
|
+
_sub_resource_id=current_aws_account_id,
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
logger.info(
|
|
570
|
+
f"Successfully loaded {len(aggregated_role_assumptions)} WebIdentity role assumption relationships"
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
|
|
203
574
|
def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
|
|
204
575
|
"""
|
|
205
576
|
Convert an assumed role ARN to the original role ARN.
|
|
@@ -224,6 +595,37 @@ def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
|
|
|
224
595
|
return assumed_role_arn
|
|
225
596
|
|
|
226
597
|
|
|
598
|
+
def _extract_github_repo_from_username(user_name: str) -> str:
|
|
599
|
+
"""
|
|
600
|
+
Extract GitHub repository fullname from CloudTrail userName field.
|
|
601
|
+
|
|
602
|
+
GitHub Actions CloudTrail events have userName in the format:
|
|
603
|
+
"repo:{organization}/{repository}:{context}"
|
|
604
|
+
"""
|
|
605
|
+
if not user_name:
|
|
606
|
+
return ""
|
|
607
|
+
|
|
608
|
+
parts = user_name.split(":")
|
|
609
|
+
|
|
610
|
+
# Need at least 3 parts: ["repo", "{organization}/{repository}", "{context}"]
|
|
611
|
+
if len(parts) < 3 or parts[0] != "repo":
|
|
612
|
+
return ""
|
|
613
|
+
|
|
614
|
+
# Extract "{organization}/{repository}"
|
|
615
|
+
repo_fullname = parts[1]
|
|
616
|
+
|
|
617
|
+
# Validate it looks like "{organization}/{repository}" format
|
|
618
|
+
if repo_fullname.count("/") != 1:
|
|
619
|
+
return ""
|
|
620
|
+
|
|
621
|
+
# Ensure both organization and repo exist
|
|
622
|
+
owner, repo = repo_fullname.split("/")
|
|
623
|
+
if not owner or not repo:
|
|
624
|
+
return ""
|
|
625
|
+
|
|
626
|
+
return repo_fullname
|
|
627
|
+
|
|
628
|
+
|
|
227
629
|
@timeit
|
|
228
630
|
def cleanup(
|
|
229
631
|
neo4j_session: neo4j.Session, current_aws_account_id: str, update_tag: int
|
|
@@ -270,7 +672,7 @@ def sync_assume_role_events(
|
|
|
270
672
|
4. Run cleanup after processing all regions
|
|
271
673
|
|
|
272
674
|
The resulting graph contains direct relationships like:
|
|
273
|
-
(AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used,
|
|
675
|
+
(AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
|
|
274
676
|
|
|
275
677
|
:type neo4j_session: neo4j.Session
|
|
276
678
|
:param neo4j_session: The Neo4j session
|
|
@@ -316,8 +718,6 @@ def sync_assume_role_events(
|
|
|
316
718
|
# Transform AssumeRole events to role assumptions
|
|
317
719
|
assume_role_assumptions = transform_assume_role_events_to_role_assumptions(
|
|
318
720
|
events=assume_role_events,
|
|
319
|
-
region=region,
|
|
320
|
-
current_aws_account_id=current_aws_account_id,
|
|
321
721
|
)
|
|
322
722
|
|
|
323
723
|
# Load AssumeRole assumptions for this region
|
|
@@ -341,6 +741,180 @@ def sync_assume_role_events(
|
|
|
341
741
|
)
|
|
342
742
|
|
|
343
743
|
|
|
744
|
+
@timeit
|
|
745
|
+
def sync_saml_role_events(
|
|
746
|
+
neo4j_session: neo4j.Session,
|
|
747
|
+
boto3_session: boto3.Session,
|
|
748
|
+
regions: List[str],
|
|
749
|
+
current_aws_account_id: str,
|
|
750
|
+
update_tag: int,
|
|
751
|
+
common_job_parameters: Dict[str, Any],
|
|
752
|
+
) -> None:
|
|
753
|
+
"""
|
|
754
|
+
Sync CloudTrail SAML management events to create ASSUMED_ROLE_WITH_SAML relationships.
|
|
755
|
+
|
|
756
|
+
This function orchestrates the complete process:
|
|
757
|
+
1. Fetch CloudTrail SAML management events region by region
|
|
758
|
+
2. Transform events into role assumption records per region
|
|
759
|
+
3. Load role assumption relationships into Neo4j for each region
|
|
760
|
+
|
|
761
|
+
The resulting graph contains direct relationships like:
|
|
762
|
+
(AWSRole)-[:ASSUMED_ROLE_WITH_SAML {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
|
|
763
|
+
|
|
764
|
+
:type neo4j_session: neo4j.Session
|
|
765
|
+
:param neo4j_session: The Neo4j session
|
|
766
|
+
:type boto3_session: boto3.Session
|
|
767
|
+
:param boto3_session: The boto3 session to use for API calls
|
|
768
|
+
:type regions: List[str]
|
|
769
|
+
:param regions: List of AWS regions to sync
|
|
770
|
+
:type current_aws_account_id: str
|
|
771
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
772
|
+
:type update_tag: int
|
|
773
|
+
:param update_tag: Timestamp tag for tracking data freshness
|
|
774
|
+
:rtype: None
|
|
775
|
+
"""
|
|
776
|
+
# Extract lookback hours from common_job_parameters (set by CLI parameter)
|
|
777
|
+
lookback_hours = common_job_parameters.get(
|
|
778
|
+
"aws_cloudtrail_management_events_lookback_hours"
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
if not lookback_hours:
|
|
782
|
+
logger.info(
|
|
783
|
+
"CloudTrail SAML management events sync skipped - no lookback period specified"
|
|
784
|
+
)
|
|
785
|
+
return
|
|
786
|
+
|
|
787
|
+
logger.info(
|
|
788
|
+
f"Syncing SAML events for {len(regions)} regions with {lookback_hours} hour lookback period"
|
|
789
|
+
)
|
|
790
|
+
|
|
791
|
+
total_saml_role_assumptions = 0
|
|
792
|
+
|
|
793
|
+
# Process events region by region
|
|
794
|
+
for region in regions:
|
|
795
|
+
logger.info(f"Processing CloudTrail SAML events for region {region}")
|
|
796
|
+
|
|
797
|
+
# Process AssumeRoleWithSAML events specifically
|
|
798
|
+
logger.info(
|
|
799
|
+
f"Fetching AssumeRoleWithSAML events specifically for region {region}"
|
|
800
|
+
)
|
|
801
|
+
saml_role_events = get_saml_role_events(
|
|
802
|
+
boto3_session=boto3_session,
|
|
803
|
+
region=region,
|
|
804
|
+
lookback_hours=lookback_hours,
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
# Transform AssumeRoleWithSAML events to role assumptions
|
|
808
|
+
saml_role_assumptions = transform_saml_role_events_to_role_assumptions(
|
|
809
|
+
events=saml_role_events,
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
# Load SAML role assumptions for this region
|
|
813
|
+
load_saml_role_assumptions(
|
|
814
|
+
neo4j_session=neo4j_session,
|
|
815
|
+
aggregated_role_assumptions=saml_role_assumptions,
|
|
816
|
+
current_aws_account_id=current_aws_account_id,
|
|
817
|
+
aws_update_tag=update_tag,
|
|
818
|
+
)
|
|
819
|
+
total_saml_role_assumptions += len(saml_role_assumptions)
|
|
820
|
+
logger.info(
|
|
821
|
+
f"Loaded {len(saml_role_assumptions)} SAML role assumptions for region {region}"
|
|
822
|
+
)
|
|
823
|
+
|
|
824
|
+
logger.info(
|
|
825
|
+
f"CloudTrail SAML management events sync completed successfully. "
|
|
826
|
+
f"Processed {total_saml_role_assumptions} total SAML role assumption events across {len(regions)} regions."
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
@timeit
|
|
831
|
+
def sync_web_identity_role_events(
|
|
832
|
+
neo4j_session: neo4j.Session,
|
|
833
|
+
boto3_session: boto3.Session,
|
|
834
|
+
regions: List[str],
|
|
835
|
+
current_aws_account_id: str,
|
|
836
|
+
update_tag: int,
|
|
837
|
+
common_job_parameters: Dict[str, Any],
|
|
838
|
+
) -> None:
|
|
839
|
+
"""
|
|
840
|
+
Sync CloudTrail WebIdentity management events to create ASSUMED_ROLE_WITH_WEB_IDENTITY relationships.
|
|
841
|
+
|
|
842
|
+
This function orchestrates the complete process:
|
|
843
|
+
1. Fetch CloudTrail WebIdentity management events region by region
|
|
844
|
+
2. Transform events into role assumption records per region
|
|
845
|
+
3. Load role assumption relationships into Neo4j for each region
|
|
846
|
+
|
|
847
|
+
The resulting graph contains direct relationships like:
|
|
848
|
+
(GitHubRepository)-[:ASSUMED_ROLE_WITH_WEB_IDENTITY {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
|
|
849
|
+
|
|
850
|
+
:type neo4j_session: neo4j.Session
|
|
851
|
+
:param neo4j_session: The Neo4j session
|
|
852
|
+
:type boto3_session: boto3.Session
|
|
853
|
+
:param boto3_session: The boto3 session to use for API calls
|
|
854
|
+
:type regions: List[str]
|
|
855
|
+
:param regions: List of AWS regions to sync
|
|
856
|
+
:type current_aws_account_id: str
|
|
857
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
858
|
+
:type update_tag: int
|
|
859
|
+
:param update_tag: Timestamp tag for tracking data freshness
|
|
860
|
+
:rtype: None
|
|
861
|
+
"""
|
|
862
|
+
# Extract lookback hours from common_job_parameters (set by CLI parameter)
|
|
863
|
+
lookback_hours = common_job_parameters.get(
|
|
864
|
+
"aws_cloudtrail_management_events_lookback_hours"
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
if not lookback_hours:
|
|
868
|
+
logger.info(
|
|
869
|
+
"CloudTrail WebIdentity management events sync skipped - no lookback period specified"
|
|
870
|
+
)
|
|
871
|
+
return
|
|
872
|
+
|
|
873
|
+
logger.info(
|
|
874
|
+
f"Syncing WebIdentity events for {len(regions)} regions with {lookback_hours} hour lookback period"
|
|
875
|
+
)
|
|
876
|
+
|
|
877
|
+
total_web_identity_role_assumptions = 0
|
|
878
|
+
|
|
879
|
+
# Process events region by region
|
|
880
|
+
for region in regions:
|
|
881
|
+
logger.info(f"Processing CloudTrail WebIdentity events for region {region}")
|
|
882
|
+
|
|
883
|
+
# Process AssumeRoleWithWebIdentity events specifically
|
|
884
|
+
logger.info(
|
|
885
|
+
f"Fetching AssumeRoleWithWebIdentity events specifically for region {region}"
|
|
886
|
+
)
|
|
887
|
+
web_identity_role_events = get_web_identity_role_events(
|
|
888
|
+
boto3_session=boto3_session,
|
|
889
|
+
region=region,
|
|
890
|
+
lookback_hours=lookback_hours,
|
|
891
|
+
)
|
|
892
|
+
|
|
893
|
+
# Transform AssumeRoleWithWebIdentity events to role assumptions
|
|
894
|
+
web_identity_role_assumptions = (
|
|
895
|
+
transform_web_identity_role_events_to_role_assumptions(
|
|
896
|
+
events=web_identity_role_events,
|
|
897
|
+
)
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
# Load WebIdentity role assumptions for this region
|
|
901
|
+
load_web_identity_role_assumptions(
|
|
902
|
+
neo4j_session=neo4j_session,
|
|
903
|
+
aggregated_role_assumptions=web_identity_role_assumptions,
|
|
904
|
+
current_aws_account_id=current_aws_account_id,
|
|
905
|
+
aws_update_tag=update_tag,
|
|
906
|
+
)
|
|
907
|
+
total_web_identity_role_assumptions += len(web_identity_role_assumptions)
|
|
908
|
+
logger.info(
|
|
909
|
+
f"Loaded {len(web_identity_role_assumptions)} WebIdentity role assumptions for region {region}"
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
logger.info(
|
|
913
|
+
f"CloudTrail WebIdentity management events sync completed successfully. "
|
|
914
|
+
f"Processed {total_web_identity_role_assumptions} total WebIdentity role assumption events across {len(regions)} regions."
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
|
|
344
918
|
# Main sync function for when we decide to add more event types
|
|
345
919
|
@timeit
|
|
346
920
|
def sync(
|
|
@@ -353,7 +927,11 @@ def sync(
|
|
|
353
927
|
) -> None:
|
|
354
928
|
"""
|
|
355
929
|
Main sync function for CloudTrail management events.
|
|
930
|
+
|
|
931
|
+
Syncs AssumeRole, AssumeRoleWithSAML, and AssumeRoleWithWebIdentity events to create separate
|
|
932
|
+
relationship types in the graph for security analysis.
|
|
356
933
|
"""
|
|
934
|
+
# Sync regular AssumeRole events
|
|
357
935
|
sync_assume_role_events(
|
|
358
936
|
neo4j_session=neo4j_session,
|
|
359
937
|
boto3_session=boto3_session,
|
|
@@ -362,3 +940,23 @@ def sync(
|
|
|
362
940
|
update_tag=update_tag,
|
|
363
941
|
common_job_parameters=common_job_parameters,
|
|
364
942
|
)
|
|
943
|
+
|
|
944
|
+
# Sync SAML AssumeRoleWithSAML events
|
|
945
|
+
sync_saml_role_events(
|
|
946
|
+
neo4j_session=neo4j_session,
|
|
947
|
+
boto3_session=boto3_session,
|
|
948
|
+
regions=regions,
|
|
949
|
+
current_aws_account_id=current_aws_account_id,
|
|
950
|
+
update_tag=update_tag,
|
|
951
|
+
common_job_parameters=common_job_parameters,
|
|
952
|
+
)
|
|
953
|
+
|
|
954
|
+
# Sync WebIdentity AssumeRoleWithWebIdentity events
|
|
955
|
+
sync_web_identity_role_events(
|
|
956
|
+
neo4j_session=neo4j_session,
|
|
957
|
+
boto3_session=boto3_session,
|
|
958
|
+
regions=regions,
|
|
959
|
+
current_aws_account_id=current_aws_account_id,
|
|
960
|
+
update_tag=update_tag,
|
|
961
|
+
common_job_parameters=common_job_parameters,
|
|
962
|
+
)
|