cartography 0.108.0rc1__py3-none-any.whl → 0.109.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/data/indexes.cypher +0 -2
- cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
- cartography/intel/aws/cloudtrail.py +17 -4
- cartography/intel/aws/cloudtrail_management_events.py +593 -16
- cartography/intel/aws/cloudwatch.py +73 -4
- cartography/intel/aws/ec2/subnets.py +37 -63
- cartography/intel/aws/ecr.py +55 -80
- cartography/intel/aws/elasticache.py +102 -79
- cartography/intel/aws/resourcegroupstaggingapi.py +77 -18
- cartography/intel/aws/secretsmanager.py +62 -44
- cartography/intel/entra/groups.py +29 -1
- cartography/intel/gcp/__init__.py +10 -0
- cartography/intel/gcp/compute.py +19 -42
- cartography/models/aws/cloudtrail/management_events.py +95 -6
- cartography/models/aws/cloudtrail/trail.py +21 -0
- cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
- cartography/models/aws/ec2/subnets.py +65 -0
- cartography/models/aws/ecr/__init__.py +0 -0
- cartography/models/aws/ecr/image.py +41 -0
- cartography/models/aws/ecr/repository.py +72 -0
- cartography/models/aws/ecr/repository_image.py +95 -0
- cartography/models/aws/elasticache/__init__.py +0 -0
- cartography/models/aws/elasticache/cluster.py +65 -0
- cartography/models/aws/elasticache/topic.py +67 -0
- cartography/models/aws/secretsmanager/secret.py +106 -0
- cartography/models/entra/group.py +26 -0
- cartography/models/entra/user.py +6 -0
- cartography/models/gcp/compute/__init__.py +0 -0
- cartography/models/gcp/compute/vpc.py +50 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/METADATA +1 -1
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/RECORD +36 -25
- cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/WHEEL +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/entry_points.txt +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -13,6 +13,12 @@ from cartography.client.core.tx import load_matchlinks
|
|
|
13
13
|
from cartography.graph.job import GraphJob
|
|
14
14
|
from cartography.intel.aws.ec2.util import get_botocore_config
|
|
15
15
|
from cartography.models.aws.cloudtrail.management_events import AssumedRoleMatchLink
|
|
16
|
+
from cartography.models.aws.cloudtrail.management_events import (
|
|
17
|
+
AssumedRoleWithSAMLMatchLink,
|
|
18
|
+
)
|
|
19
|
+
from cartography.models.aws.cloudtrail.management_events import (
|
|
20
|
+
GitHubRepoAssumeRoleWithWebIdentityMatchLink,
|
|
21
|
+
)
|
|
16
22
|
from cartography.util import aws_handle_regions
|
|
17
23
|
from cartography.util import timeit
|
|
18
24
|
|
|
@@ -74,11 +80,123 @@ def get_assume_role_events(
|
|
|
74
80
|
return all_events
|
|
75
81
|
|
|
76
82
|
|
|
83
|
+
@timeit
|
|
84
|
+
@aws_handle_regions
|
|
85
|
+
def get_saml_role_events(
|
|
86
|
+
boto3_session: boto3.Session, region: str, lookback_hours: int
|
|
87
|
+
) -> List[Dict[str, Any]]:
|
|
88
|
+
"""
|
|
89
|
+
Fetch CloudTrail AssumeRoleWithSAML events from the specified time period.
|
|
90
|
+
|
|
91
|
+
Focuses specifically on SAML-based role assumption events.
|
|
92
|
+
|
|
93
|
+
:type boto3_session: boto3.Session
|
|
94
|
+
:param boto3_session: The boto3 session to use for API calls
|
|
95
|
+
:type region: str
|
|
96
|
+
:param region: The AWS region to fetch events from
|
|
97
|
+
:type lookback_hours: int
|
|
98
|
+
:param lookback_hours: Number of hours back to retrieve events from
|
|
99
|
+
:rtype: List[Dict[str, Any]]
|
|
100
|
+
:return: List of CloudTrail AssumeRoleWithSAML events
|
|
101
|
+
"""
|
|
102
|
+
client = boto3_session.client(
|
|
103
|
+
"cloudtrail", region_name=region, config=get_botocore_config()
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Calculate time range
|
|
107
|
+
end_time = datetime.utcnow()
|
|
108
|
+
start_time = end_time - timedelta(hours=lookback_hours)
|
|
109
|
+
|
|
110
|
+
logger.info(
|
|
111
|
+
f"Fetching CloudTrail AssumeRoleWithSAML events for region '{region}' "
|
|
112
|
+
f"from {start_time} to {end_time} ({lookback_hours} hours)"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
paginator = client.get_paginator("lookup_events")
|
|
116
|
+
|
|
117
|
+
page_iterator = paginator.paginate(
|
|
118
|
+
LookupAttributes=[
|
|
119
|
+
{"AttributeKey": "EventName", "AttributeValue": "AssumeRoleWithSAML"}
|
|
120
|
+
],
|
|
121
|
+
StartTime=start_time,
|
|
122
|
+
EndTime=end_time,
|
|
123
|
+
PaginationConfig={
|
|
124
|
+
"MaxItems": 10000, # Reasonable limit to prevent excessive API calls
|
|
125
|
+
"PageSize": 50, # CloudTrail API limit per page
|
|
126
|
+
},
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
all_events = []
|
|
130
|
+
for page in page_iterator:
|
|
131
|
+
all_events.extend(page.get("Events", []))
|
|
132
|
+
|
|
133
|
+
logger.info(
|
|
134
|
+
f"Retrieved {len(all_events)} AssumeRoleWithSAML events from region '{region}'"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
return all_events
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@timeit
|
|
141
|
+
@aws_handle_regions
|
|
142
|
+
def get_web_identity_role_events(
|
|
143
|
+
boto3_session: boto3.Session, region: str, lookback_hours: int
|
|
144
|
+
) -> List[Dict[str, Any]]:
|
|
145
|
+
"""
|
|
146
|
+
Fetch CloudTrail AssumeRoleWithWebIdentity events from the specified time period.
|
|
147
|
+
|
|
148
|
+
Focuses specifically on WebIdentity-based role assumption events.
|
|
149
|
+
|
|
150
|
+
:type boto3_session: boto3.Session
|
|
151
|
+
:param boto3_session: The boto3 session to use for API calls
|
|
152
|
+
:type region: str
|
|
153
|
+
:param region: The AWS region to fetch events from
|
|
154
|
+
:type lookback_hours: int
|
|
155
|
+
:param lookback_hours: Number of hours back to retrieve events from
|
|
156
|
+
:rtype: List[Dict[str, Any]]
|
|
157
|
+
:return: List of CloudTrail AssumeRoleWithWebIdentity events
|
|
158
|
+
"""
|
|
159
|
+
client = boto3_session.client(
|
|
160
|
+
"cloudtrail", region_name=region, config=get_botocore_config()
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Calculate time range
|
|
164
|
+
end_time = datetime.utcnow()
|
|
165
|
+
start_time = end_time - timedelta(hours=lookback_hours)
|
|
166
|
+
|
|
167
|
+
logger.info(
|
|
168
|
+
f"Fetching CloudTrail AssumeRoleWithWebIdentity events for region '{region}' "
|
|
169
|
+
f"from {start_time} to {end_time} ({lookback_hours} hours)"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
paginator = client.get_paginator("lookup_events")
|
|
173
|
+
|
|
174
|
+
page_iterator = paginator.paginate(
|
|
175
|
+
LookupAttributes=[
|
|
176
|
+
{"AttributeKey": "EventName", "AttributeValue": "AssumeRoleWithWebIdentity"}
|
|
177
|
+
],
|
|
178
|
+
StartTime=start_time,
|
|
179
|
+
EndTime=end_time,
|
|
180
|
+
PaginationConfig={
|
|
181
|
+
"MaxItems": 10000, # Reasonable limit to prevent excessive API calls
|
|
182
|
+
"PageSize": 50, # CloudTrail API limit per page
|
|
183
|
+
},
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
all_events = []
|
|
187
|
+
for page in page_iterator:
|
|
188
|
+
all_events.extend(page.get("Events", []))
|
|
189
|
+
|
|
190
|
+
logger.info(
|
|
191
|
+
f"Retrieved {len(all_events)} AssumeRoleWithWebIdentity events from region '{region}'"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return all_events
|
|
195
|
+
|
|
196
|
+
|
|
77
197
|
@timeit
|
|
78
198
|
def transform_assume_role_events_to_role_assumptions(
|
|
79
199
|
events: List[Dict[str, Any]],
|
|
80
|
-
region: str,
|
|
81
|
-
current_aws_account_id: str,
|
|
82
200
|
) -> List[Dict[str, Any]]:
|
|
83
201
|
"""
|
|
84
202
|
Transform raw CloudTrail AssumeRole events into aggregated role assumption relationships.
|
|
@@ -93,16 +211,12 @@ def transform_assume_role_events_to_role_assumptions(
|
|
|
93
211
|
|
|
94
212
|
:type events: List[Dict[str, Any]]
|
|
95
213
|
:param events: List of raw CloudTrail AssumeRole events from lookup_events API
|
|
96
|
-
:type region: str
|
|
97
|
-
:param region: The AWS region where events were retrieved from
|
|
98
|
-
:type current_aws_account_id: str
|
|
99
|
-
:param current_aws_account_id: The AWS account ID being synced
|
|
100
214
|
:rtype: List[Dict[str, Any]]
|
|
101
215
|
:return: List of aggregated role assumption relationships ready for loading
|
|
102
216
|
"""
|
|
103
217
|
aggregated: Dict[tuple, Dict[str, Any]] = {}
|
|
104
218
|
logger.info(
|
|
105
|
-
f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions
|
|
219
|
+
f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions"
|
|
106
220
|
)
|
|
107
221
|
|
|
108
222
|
for event in events:
|
|
@@ -132,7 +246,6 @@ def transform_assume_role_events_to_role_assumptions(
|
|
|
132
246
|
|
|
133
247
|
if key in aggregated:
|
|
134
248
|
aggregated[key]["times_used"] += 1
|
|
135
|
-
aggregated[key]["assume_role_count"] += 1 # All events are AssumeRole
|
|
136
249
|
# Handle None values safely for time comparisons
|
|
137
250
|
if event_time:
|
|
138
251
|
existing_first = aggregated[key]["first_seen_in_time_window"]
|
|
@@ -149,15 +262,168 @@ def transform_assume_role_events_to_role_assumptions(
|
|
|
149
262
|
"times_used": 1,
|
|
150
263
|
"first_seen_in_time_window": event_time,
|
|
151
264
|
"last_used": event_time,
|
|
152
|
-
"event_types": ["AssumeRole"],
|
|
153
|
-
"assume_role_count": 1,
|
|
154
|
-
"saml_count": 0,
|
|
155
|
-
"web_identity_count": 0,
|
|
156
265
|
}
|
|
157
266
|
|
|
158
267
|
return list(aggregated.values())
|
|
159
268
|
|
|
160
269
|
|
|
270
|
+
@timeit
|
|
271
|
+
def transform_saml_role_events_to_role_assumptions(
|
|
272
|
+
events: List[Dict[str, Any]],
|
|
273
|
+
) -> List[Dict[str, Any]]:
|
|
274
|
+
"""
|
|
275
|
+
Transform raw CloudTrail AssumeRoleWithSAML events into aggregated role assumption relationships.
|
|
276
|
+
|
|
277
|
+
Focuses specifically on SAML-based role assumption events, providing optimized processing
|
|
278
|
+
for federated identity scenarios.
|
|
279
|
+
|
|
280
|
+
This function performs the complete transformation pipeline:
|
|
281
|
+
1. Extract role assumption events from CloudTrail AssumeRoleWithSAML data
|
|
282
|
+
2. Aggregate events by (source_principal, destination_principal) pairs
|
|
283
|
+
3. Return aggregated relationships ready for loading
|
|
284
|
+
|
|
285
|
+
:type events: List[Dict[str, Any]]
|
|
286
|
+
:param events: List of raw CloudTrail AssumeRoleWithSAML events from lookup_events API
|
|
287
|
+
:rtype: List[Dict[str, Any]]
|
|
288
|
+
:return: List of aggregated SAML role assumption relationships ready for loading.
|
|
289
|
+
Each dict contains keys: source_principal_arn, destination_principal_arn,
|
|
290
|
+
times_used, first_seen_in_time_window, last_used
|
|
291
|
+
"""
|
|
292
|
+
aggregated: Dict[tuple, Dict[str, Any]] = {}
|
|
293
|
+
logger.info(
|
|
294
|
+
f"Transforming {len(events)} CloudTrail AssumeRoleWithSAML events to role assumptions"
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
for event in events:
|
|
298
|
+
|
|
299
|
+
cloudtrail_event = json.loads(event["CloudTrailEvent"])
|
|
300
|
+
|
|
301
|
+
response_elements = cloudtrail_event.get("responseElements", {})
|
|
302
|
+
assumed_role_user = response_elements.get("assumedRoleUser", {})
|
|
303
|
+
|
|
304
|
+
if assumed_role_user.get("arn"):
|
|
305
|
+
assumed_role_arn = assumed_role_user["arn"]
|
|
306
|
+
# Extract username from assumed role ARN: arn:aws:sts::account:assumed-role/RoleName/username
|
|
307
|
+
source_principal = assumed_role_arn.split("/")[-1]
|
|
308
|
+
destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
|
|
309
|
+
else:
|
|
310
|
+
logger.debug(
|
|
311
|
+
f"Skipping CloudTrail AssumeRoleWithSAML event due to missing assumedRoleUser.arn. Event: {event.get('EventId', 'unknown')}"
|
|
312
|
+
)
|
|
313
|
+
continue
|
|
314
|
+
|
|
315
|
+
event_time = event.get("EventTime")
|
|
316
|
+
|
|
317
|
+
key = (source_principal, destination_principal)
|
|
318
|
+
|
|
319
|
+
if key in aggregated:
|
|
320
|
+
aggregated[key]["times_used"] += 1
|
|
321
|
+
# Handle None values safely for time comparisons
|
|
322
|
+
if event_time:
|
|
323
|
+
existing_first = aggregated[key]["first_seen_in_time_window"]
|
|
324
|
+
existing_last = aggregated[key]["last_used"]
|
|
325
|
+
|
|
326
|
+
if existing_first is None or event_time < existing_first:
|
|
327
|
+
aggregated[key]["first_seen_in_time_window"] = event_time
|
|
328
|
+
if existing_last is None or event_time > existing_last:
|
|
329
|
+
aggregated[key]["last_used"] = event_time
|
|
330
|
+
else:
|
|
331
|
+
aggregated[key] = {
|
|
332
|
+
"source_principal_arn": source_principal,
|
|
333
|
+
"destination_principal_arn": destination_principal,
|
|
334
|
+
"times_used": 1,
|
|
335
|
+
"first_seen_in_time_window": event_time,
|
|
336
|
+
"last_used": event_time,
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
return list(aggregated.values())
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
@timeit
|
|
343
|
+
def transform_web_identity_role_events_to_role_assumptions(
|
|
344
|
+
events: List[Dict[str, Any]],
|
|
345
|
+
) -> List[Dict[str, Any]]:
|
|
346
|
+
"""
|
|
347
|
+
Transform raw CloudTrail AssumeRoleWithWebIdentity events into aggregated role assumption relationships.
|
|
348
|
+
|
|
349
|
+
Focuses specifically on WebIdentity-based role assumption events, providing optimized processing
|
|
350
|
+
for federated web identity scenarios.
|
|
351
|
+
|
|
352
|
+
This function performs the complete transformation pipeline:
|
|
353
|
+
1. Extract role assumption events from CloudTrail AssumeRoleWithWebIdentity data
|
|
354
|
+
2. Aggregate events by (source_principal, destination_principal) pairs
|
|
355
|
+
3. Return aggregated relationships ready for loading
|
|
356
|
+
|
|
357
|
+
:type events: List[Dict[str, Any]]
|
|
358
|
+
:param events: List of raw CloudTrail AssumeRoleWithWebIdentity events from lookup_events API
|
|
359
|
+
:rtype: List[Dict[str, Any]]
|
|
360
|
+
:return: List of aggregated WebIdentity role assumption relationships ready for loading.
|
|
361
|
+
Each dict contains keys: source_repo_fullname, destination_principal_arn,
|
|
362
|
+
times_used, first_seen_in_time_window, last_used
|
|
363
|
+
"""
|
|
364
|
+
github_aggregated: Dict[tuple, Dict[str, Any]] = {}
|
|
365
|
+
logger.info(
|
|
366
|
+
f"Transforming {len(events)} CloudTrail AssumeRoleWithWebIdentity events to role assumptions"
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
for event in events:
|
|
370
|
+
|
|
371
|
+
cloudtrail_event = json.loads(event["CloudTrailEvent"])
|
|
372
|
+
|
|
373
|
+
user_identity = cloudtrail_event.get("userIdentity", {})
|
|
374
|
+
|
|
375
|
+
if user_identity.get("type") == "WebIdentityUser" and user_identity.get(
|
|
376
|
+
"userName"
|
|
377
|
+
):
|
|
378
|
+
identity_provider = user_identity.get("identityProvider", "unknown")
|
|
379
|
+
destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
|
|
380
|
+
event_time = event.get("EventTime")
|
|
381
|
+
|
|
382
|
+
# Only process GitHub Actions events
|
|
383
|
+
if "token.actions.githubusercontent.com" in identity_provider:
|
|
384
|
+
# Extract GitHub repo fullname from userName format: "repo:{organization}/{repository}:{context}"
|
|
385
|
+
user_name = user_identity.get("userName", "")
|
|
386
|
+
if not user_name:
|
|
387
|
+
logger.debug(
|
|
388
|
+
f"Missing userName in GitHub WebIdentity event: {event.get('EventId', 'unknown')}"
|
|
389
|
+
)
|
|
390
|
+
continue
|
|
391
|
+
|
|
392
|
+
github_repo = _extract_github_repo_from_username(user_name)
|
|
393
|
+
key = (github_repo, destination_principal)
|
|
394
|
+
|
|
395
|
+
if key in github_aggregated:
|
|
396
|
+
github_aggregated[key]["times_used"] += 1
|
|
397
|
+
# Handle None values safely for time comparisons
|
|
398
|
+
if event_time:
|
|
399
|
+
existing_first = github_aggregated[key][
|
|
400
|
+
"first_seen_in_time_window"
|
|
401
|
+
]
|
|
402
|
+
existing_last = github_aggregated[key]["last_used"]
|
|
403
|
+
|
|
404
|
+
if existing_first is None or event_time < existing_first:
|
|
405
|
+
github_aggregated[key][
|
|
406
|
+
"first_seen_in_time_window"
|
|
407
|
+
] = event_time
|
|
408
|
+
if existing_last is None or event_time > existing_last:
|
|
409
|
+
github_aggregated[key]["last_used"] = event_time
|
|
410
|
+
else:
|
|
411
|
+
github_aggregated[key] = {
|
|
412
|
+
"source_repo_fullname": github_repo,
|
|
413
|
+
"destination_principal_arn": destination_principal,
|
|
414
|
+
"times_used": 1,
|
|
415
|
+
"first_seen_in_time_window": event_time,
|
|
416
|
+
"last_used": event_time,
|
|
417
|
+
}
|
|
418
|
+
else:
|
|
419
|
+
# Skip non-GitHub events for now
|
|
420
|
+
continue
|
|
421
|
+
else:
|
|
422
|
+
continue
|
|
423
|
+
# Return aggregated relationships directly
|
|
424
|
+
return list(github_aggregated.values())
|
|
425
|
+
|
|
426
|
+
|
|
161
427
|
@timeit
|
|
162
428
|
def load_role_assumptions(
|
|
163
429
|
neo4j_session: neo4j.Session,
|
|
@@ -169,7 +435,7 @@ def load_role_assumptions(
|
|
|
169
435
|
Load aggregated role assumption relationships into Neo4j using MatchLink pattern.
|
|
170
436
|
|
|
171
437
|
Creates direct ASSUMED_ROLE relationships with aggregated properties:
|
|
172
|
-
(AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {
|
|
438
|
+
(AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
|
|
173
439
|
|
|
174
440
|
Assumes that both source principals and destination roles already exist in the graph.
|
|
175
441
|
|
|
@@ -200,6 +466,90 @@ def load_role_assumptions(
|
|
|
200
466
|
)
|
|
201
467
|
|
|
202
468
|
|
|
469
|
+
@timeit
|
|
470
|
+
def load_saml_role_assumptions(
|
|
471
|
+
neo4j_session: neo4j.Session,
|
|
472
|
+
aggregated_role_assumptions: List[Dict[str, Any]],
|
|
473
|
+
current_aws_account_id: str,
|
|
474
|
+
aws_update_tag: int,
|
|
475
|
+
) -> None:
|
|
476
|
+
"""
|
|
477
|
+
Load aggregated SAML role assumption relationships into Neo4j using MatchLink pattern.
|
|
478
|
+
|
|
479
|
+
Creates direct ASSUMED_ROLE_WITH_SAML relationships with aggregated properties:
|
|
480
|
+
(AWSRole)-[:ASSUMED_ROLE_WITH_SAML {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
|
|
481
|
+
|
|
482
|
+
Assumes that both source principals and destination roles already exist in the graph.
|
|
483
|
+
|
|
484
|
+
:type neo4j_session: neo4j.Session
|
|
485
|
+
:param neo4j_session: The Neo4j session to use for database operations
|
|
486
|
+
:type aggregated_role_assumptions: List[Dict[str, Any]]
|
|
487
|
+
:param aggregated_role_assumptions: List of aggregated SAML role assumption relationships from transform function
|
|
488
|
+
:type current_aws_account_id: str
|
|
489
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
490
|
+
:type aws_update_tag: int
|
|
491
|
+
:param aws_update_tag: Timestamp tag for tracking data freshness
|
|
492
|
+
:rtype: None
|
|
493
|
+
"""
|
|
494
|
+
# Use MatchLink to create relationships between existing nodes
|
|
495
|
+
matchlink_schema = AssumedRoleWithSAMLMatchLink()
|
|
496
|
+
|
|
497
|
+
load_matchlinks(
|
|
498
|
+
neo4j_session,
|
|
499
|
+
matchlink_schema,
|
|
500
|
+
aggregated_role_assumptions,
|
|
501
|
+
lastupdated=aws_update_tag,
|
|
502
|
+
_sub_resource_label="AWSAccount",
|
|
503
|
+
_sub_resource_id=current_aws_account_id,
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
logger.info(
|
|
507
|
+
f"Successfully loaded {len(aggregated_role_assumptions)} SAML role assumption relationships"
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
@timeit
|
|
512
|
+
def load_web_identity_role_assumptions(
|
|
513
|
+
neo4j_session: neo4j.Session,
|
|
514
|
+
aggregated_role_assumptions: List[Dict[str, Any]],
|
|
515
|
+
current_aws_account_id: str,
|
|
516
|
+
aws_update_tag: int,
|
|
517
|
+
) -> None:
|
|
518
|
+
"""
|
|
519
|
+
Load aggregated WebIdentity role assumption relationships into Neo4j using MatchLink pattern.
|
|
520
|
+
|
|
521
|
+
Creates direct ASSUMED_ROLE_WITH_WEB_IDENTITY relationships with aggregated properties:
|
|
522
|
+
(GitHubRepository)-[:ASSUMED_ROLE_WITH_WEB_IDENTITY {lastupdated, times_used, first_seen_in_time_window, last_used}]->(AWSRole)
|
|
523
|
+
|
|
524
|
+
Assumes that both source principals and destination roles already exist in the graph.
|
|
525
|
+
|
|
526
|
+
:type neo4j_session: neo4j.Session
|
|
527
|
+
:param neo4j_session: The Neo4j session to use for database operations
|
|
528
|
+
:type aggregated_role_assumptions: List[Dict[str, Any]]
|
|
529
|
+
:param aggregated_role_assumptions: List of aggregated WebIdentity role assumption relationships from transform function
|
|
530
|
+
:type current_aws_account_id: str
|
|
531
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
532
|
+
:type aws_update_tag: int
|
|
533
|
+
:param aws_update_tag: Timestamp tag for tracking data freshness
|
|
534
|
+
:rtype: None
|
|
535
|
+
"""
|
|
536
|
+
# Use MatchLink to create relationships between existing nodes
|
|
537
|
+
matchlink_schema = GitHubRepoAssumeRoleWithWebIdentityMatchLink()
|
|
538
|
+
|
|
539
|
+
load_matchlinks(
|
|
540
|
+
neo4j_session,
|
|
541
|
+
matchlink_schema,
|
|
542
|
+
aggregated_role_assumptions,
|
|
543
|
+
lastupdated=aws_update_tag,
|
|
544
|
+
_sub_resource_label="AWSAccount",
|
|
545
|
+
_sub_resource_id=current_aws_account_id,
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
logger.info(
|
|
549
|
+
f"Successfully loaded {len(aggregated_role_assumptions)} WebIdentity role assumption relationships"
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
|
|
203
553
|
def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
|
|
204
554
|
"""
|
|
205
555
|
Convert an assumed role ARN to the original role ARN.
|
|
@@ -224,6 +574,37 @@ def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
|
|
|
224
574
|
return assumed_role_arn
|
|
225
575
|
|
|
226
576
|
|
|
577
|
+
def _extract_github_repo_from_username(user_name: str) -> str:
|
|
578
|
+
"""
|
|
579
|
+
Extract GitHub repository fullname from CloudTrail userName field.
|
|
580
|
+
|
|
581
|
+
GitHub Actions CloudTrail events have userName in the format:
|
|
582
|
+
"repo:{organization}/{repository}:{context}"
|
|
583
|
+
"""
|
|
584
|
+
if not user_name:
|
|
585
|
+
return ""
|
|
586
|
+
|
|
587
|
+
parts = user_name.split(":")
|
|
588
|
+
|
|
589
|
+
# Need at least 3 parts: ["repo", "{organization}/{repository}", "{context}"]
|
|
590
|
+
if len(parts) < 3 or parts[0] != "repo":
|
|
591
|
+
return ""
|
|
592
|
+
|
|
593
|
+
# Extract "{organization}/{repository}"
|
|
594
|
+
repo_fullname = parts[1]
|
|
595
|
+
|
|
596
|
+
# Validate it looks like "{organization}/{repository}" format
|
|
597
|
+
if repo_fullname.count("/") != 1:
|
|
598
|
+
return ""
|
|
599
|
+
|
|
600
|
+
# Ensure both organization and repo exist
|
|
601
|
+
owner, repo = repo_fullname.split("/")
|
|
602
|
+
if not owner or not repo:
|
|
603
|
+
return ""
|
|
604
|
+
|
|
605
|
+
return repo_fullname
|
|
606
|
+
|
|
607
|
+
|
|
227
608
|
@timeit
|
|
228
609
|
def cleanup(
|
|
229
610
|
neo4j_session: neo4j.Session, current_aws_account_id: str, update_tag: int
|
|
@@ -270,7 +651,7 @@ def sync_assume_role_events(
|
|
|
270
651
|
4. Run cleanup after processing all regions
|
|
271
652
|
|
|
272
653
|
The resulting graph contains direct relationships like:
|
|
273
|
-
(AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used,
|
|
654
|
+
(AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
|
|
274
655
|
|
|
275
656
|
:type neo4j_session: neo4j.Session
|
|
276
657
|
:param neo4j_session: The Neo4j session
|
|
@@ -316,8 +697,6 @@ def sync_assume_role_events(
|
|
|
316
697
|
# Transform AssumeRole events to role assumptions
|
|
317
698
|
assume_role_assumptions = transform_assume_role_events_to_role_assumptions(
|
|
318
699
|
events=assume_role_events,
|
|
319
|
-
region=region,
|
|
320
|
-
current_aws_account_id=current_aws_account_id,
|
|
321
700
|
)
|
|
322
701
|
|
|
323
702
|
# Load AssumeRole assumptions for this region
|
|
@@ -341,6 +720,180 @@ def sync_assume_role_events(
|
|
|
341
720
|
)
|
|
342
721
|
|
|
343
722
|
|
|
723
|
+
@timeit
|
|
724
|
+
def sync_saml_role_events(
|
|
725
|
+
neo4j_session: neo4j.Session,
|
|
726
|
+
boto3_session: boto3.Session,
|
|
727
|
+
regions: List[str],
|
|
728
|
+
current_aws_account_id: str,
|
|
729
|
+
update_tag: int,
|
|
730
|
+
common_job_parameters: Dict[str, Any],
|
|
731
|
+
) -> None:
|
|
732
|
+
"""
|
|
733
|
+
Sync CloudTrail SAML management events to create ASSUMED_ROLE_WITH_SAML relationships.
|
|
734
|
+
|
|
735
|
+
This function orchestrates the complete process:
|
|
736
|
+
1. Fetch CloudTrail SAML management events region by region
|
|
737
|
+
2. Transform events into role assumption records per region
|
|
738
|
+
3. Load role assumption relationships into Neo4j for each region
|
|
739
|
+
|
|
740
|
+
The resulting graph contains direct relationships like:
|
|
741
|
+
(AWSRole)-[:ASSUMED_ROLE_WITH_SAML {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
|
|
742
|
+
|
|
743
|
+
:type neo4j_session: neo4j.Session
|
|
744
|
+
:param neo4j_session: The Neo4j session
|
|
745
|
+
:type boto3_session: boto3.Session
|
|
746
|
+
:param boto3_session: The boto3 session to use for API calls
|
|
747
|
+
:type regions: List[str]
|
|
748
|
+
:param regions: List of AWS regions to sync
|
|
749
|
+
:type current_aws_account_id: str
|
|
750
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
751
|
+
:type update_tag: int
|
|
752
|
+
:param update_tag: Timestamp tag for tracking data freshness
|
|
753
|
+
:rtype: None
|
|
754
|
+
"""
|
|
755
|
+
# Extract lookback hours from common_job_parameters (set by CLI parameter)
|
|
756
|
+
lookback_hours = common_job_parameters.get(
|
|
757
|
+
"aws_cloudtrail_management_events_lookback_hours"
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
if not lookback_hours:
|
|
761
|
+
logger.info(
|
|
762
|
+
"CloudTrail SAML management events sync skipped - no lookback period specified"
|
|
763
|
+
)
|
|
764
|
+
return
|
|
765
|
+
|
|
766
|
+
logger.info(
|
|
767
|
+
f"Syncing SAML events for {len(regions)} regions with {lookback_hours} hour lookback period"
|
|
768
|
+
)
|
|
769
|
+
|
|
770
|
+
total_saml_role_assumptions = 0
|
|
771
|
+
|
|
772
|
+
# Process events region by region
|
|
773
|
+
for region in regions:
|
|
774
|
+
logger.info(f"Processing CloudTrail SAML events for region {region}")
|
|
775
|
+
|
|
776
|
+
# Process AssumeRoleWithSAML events specifically
|
|
777
|
+
logger.info(
|
|
778
|
+
f"Fetching AssumeRoleWithSAML events specifically for region {region}"
|
|
779
|
+
)
|
|
780
|
+
saml_role_events = get_saml_role_events(
|
|
781
|
+
boto3_session=boto3_session,
|
|
782
|
+
region=region,
|
|
783
|
+
lookback_hours=lookback_hours,
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
# Transform AssumeRoleWithSAML events to role assumptions
|
|
787
|
+
saml_role_assumptions = transform_saml_role_events_to_role_assumptions(
|
|
788
|
+
events=saml_role_events,
|
|
789
|
+
)
|
|
790
|
+
|
|
791
|
+
# Load SAML role assumptions for this region
|
|
792
|
+
load_saml_role_assumptions(
|
|
793
|
+
neo4j_session=neo4j_session,
|
|
794
|
+
aggregated_role_assumptions=saml_role_assumptions,
|
|
795
|
+
current_aws_account_id=current_aws_account_id,
|
|
796
|
+
aws_update_tag=update_tag,
|
|
797
|
+
)
|
|
798
|
+
total_saml_role_assumptions += len(saml_role_assumptions)
|
|
799
|
+
logger.info(
|
|
800
|
+
f"Loaded {len(saml_role_assumptions)} SAML role assumptions for region {region}"
|
|
801
|
+
)
|
|
802
|
+
|
|
803
|
+
logger.info(
|
|
804
|
+
f"CloudTrail SAML management events sync completed successfully. "
|
|
805
|
+
f"Processed {total_saml_role_assumptions} total SAML role assumption events across {len(regions)} regions."
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
@timeit
|
|
810
|
+
def sync_web_identity_role_events(
|
|
811
|
+
neo4j_session: neo4j.Session,
|
|
812
|
+
boto3_session: boto3.Session,
|
|
813
|
+
regions: List[str],
|
|
814
|
+
current_aws_account_id: str,
|
|
815
|
+
update_tag: int,
|
|
816
|
+
common_job_parameters: Dict[str, Any],
|
|
817
|
+
) -> None:
|
|
818
|
+
"""
|
|
819
|
+
Sync CloudTrail WebIdentity management events to create ASSUMED_ROLE_WITH_WEB_IDENTITY relationships.
|
|
820
|
+
|
|
821
|
+
This function orchestrates the complete process:
|
|
822
|
+
1. Fetch CloudTrail WebIdentity management events region by region
|
|
823
|
+
2. Transform events into role assumption records per region
|
|
824
|
+
3. Load role assumption relationships into Neo4j for each region
|
|
825
|
+
|
|
826
|
+
The resulting graph contains direct relationships like:
|
|
827
|
+
(GitHubRepository)-[:ASSUMED_ROLE_WITH_WEB_IDENTITY {times_used, first_seen_in_time_window, last_used, lastupdated}]->(AWSRole)
|
|
828
|
+
|
|
829
|
+
:type neo4j_session: neo4j.Session
|
|
830
|
+
:param neo4j_session: The Neo4j session
|
|
831
|
+
:type boto3_session: boto3.Session
|
|
832
|
+
:param boto3_session: The boto3 session to use for API calls
|
|
833
|
+
:type regions: List[str]
|
|
834
|
+
:param regions: List of AWS regions to sync
|
|
835
|
+
:type current_aws_account_id: str
|
|
836
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
837
|
+
:type update_tag: int
|
|
838
|
+
:param update_tag: Timestamp tag for tracking data freshness
|
|
839
|
+
:rtype: None
|
|
840
|
+
"""
|
|
841
|
+
# Extract lookback hours from common_job_parameters (set by CLI parameter)
|
|
842
|
+
lookback_hours = common_job_parameters.get(
|
|
843
|
+
"aws_cloudtrail_management_events_lookback_hours"
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
if not lookback_hours:
|
|
847
|
+
logger.info(
|
|
848
|
+
"CloudTrail WebIdentity management events sync skipped - no lookback period specified"
|
|
849
|
+
)
|
|
850
|
+
return
|
|
851
|
+
|
|
852
|
+
logger.info(
|
|
853
|
+
f"Syncing WebIdentity events for {len(regions)} regions with {lookback_hours} hour lookback period"
|
|
854
|
+
)
|
|
855
|
+
|
|
856
|
+
total_web_identity_role_assumptions = 0
|
|
857
|
+
|
|
858
|
+
# Process events region by region
|
|
859
|
+
for region in regions:
|
|
860
|
+
logger.info(f"Processing CloudTrail WebIdentity events for region {region}")
|
|
861
|
+
|
|
862
|
+
# Process AssumeRoleWithWebIdentity events specifically
|
|
863
|
+
logger.info(
|
|
864
|
+
f"Fetching AssumeRoleWithWebIdentity events specifically for region {region}"
|
|
865
|
+
)
|
|
866
|
+
web_identity_role_events = get_web_identity_role_events(
|
|
867
|
+
boto3_session=boto3_session,
|
|
868
|
+
region=region,
|
|
869
|
+
lookback_hours=lookback_hours,
|
|
870
|
+
)
|
|
871
|
+
|
|
872
|
+
# Transform AssumeRoleWithWebIdentity events to role assumptions
|
|
873
|
+
web_identity_role_assumptions = (
|
|
874
|
+
transform_web_identity_role_events_to_role_assumptions(
|
|
875
|
+
events=web_identity_role_events,
|
|
876
|
+
)
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
# Load WebIdentity role assumptions for this region
|
|
880
|
+
load_web_identity_role_assumptions(
|
|
881
|
+
neo4j_session=neo4j_session,
|
|
882
|
+
aggregated_role_assumptions=web_identity_role_assumptions,
|
|
883
|
+
current_aws_account_id=current_aws_account_id,
|
|
884
|
+
aws_update_tag=update_tag,
|
|
885
|
+
)
|
|
886
|
+
total_web_identity_role_assumptions += len(web_identity_role_assumptions)
|
|
887
|
+
logger.info(
|
|
888
|
+
f"Loaded {len(web_identity_role_assumptions)} WebIdentity role assumptions for region {region}"
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
logger.info(
|
|
892
|
+
f"CloudTrail WebIdentity management events sync completed successfully. "
|
|
893
|
+
f"Processed {total_web_identity_role_assumptions} total WebIdentity role assumption events across {len(regions)} regions."
|
|
894
|
+
)
|
|
895
|
+
|
|
896
|
+
|
|
344
897
|
# Main sync function for when we decide to add more event types
|
|
345
898
|
@timeit
|
|
346
899
|
def sync(
|
|
@@ -353,7 +906,11 @@ def sync(
|
|
|
353
906
|
) -> None:
|
|
354
907
|
"""
|
|
355
908
|
Main sync function for CloudTrail management events.
|
|
909
|
+
|
|
910
|
+
Syncs AssumeRole, AssumeRoleWithSAML, and AssumeRoleWithWebIdentity events to create separate
|
|
911
|
+
relationship types in the graph for security analysis.
|
|
356
912
|
"""
|
|
913
|
+
# Sync regular AssumeRole events
|
|
357
914
|
sync_assume_role_events(
|
|
358
915
|
neo4j_session=neo4j_session,
|
|
359
916
|
boto3_session=boto3_session,
|
|
@@ -362,3 +919,23 @@ def sync(
|
|
|
362
919
|
update_tag=update_tag,
|
|
363
920
|
common_job_parameters=common_job_parameters,
|
|
364
921
|
)
|
|
922
|
+
|
|
923
|
+
# Sync SAML AssumeRoleWithSAML events
|
|
924
|
+
sync_saml_role_events(
|
|
925
|
+
neo4j_session=neo4j_session,
|
|
926
|
+
boto3_session=boto3_session,
|
|
927
|
+
regions=regions,
|
|
928
|
+
current_aws_account_id=current_aws_account_id,
|
|
929
|
+
update_tag=update_tag,
|
|
930
|
+
common_job_parameters=common_job_parameters,
|
|
931
|
+
)
|
|
932
|
+
|
|
933
|
+
# Sync WebIdentity AssumeRoleWithWebIdentity events
|
|
934
|
+
sync_web_identity_role_events(
|
|
935
|
+
neo4j_session=neo4j_session,
|
|
936
|
+
boto3_session=boto3_session,
|
|
937
|
+
regions=regions,
|
|
938
|
+
current_aws_account_id=current_aws_account_id,
|
|
939
|
+
update_tag=update_tag,
|
|
940
|
+
common_job_parameters=common_job_parameters,
|
|
941
|
+
)
|