cartography 0.106.0rc2__py3-none-any.whl → 0.107.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +131 -2
- cartography/config.py +42 -0
- cartography/driftdetect/cli.py +3 -2
- cartography/intel/airbyte/__init__.py +105 -0
- cartography/intel/airbyte/connections.py +120 -0
- cartography/intel/airbyte/destinations.py +81 -0
- cartography/intel/airbyte/organizations.py +59 -0
- cartography/intel/airbyte/sources.py +78 -0
- cartography/intel/airbyte/tags.py +64 -0
- cartography/intel/airbyte/users.py +106 -0
- cartography/intel/airbyte/util.py +122 -0
- cartography/intel/airbyte/workspaces.py +63 -0
- cartography/intel/aws/__init__.py +1 -0
- cartography/intel/aws/cloudtrail_management_events.py +364 -0
- cartography/intel/aws/codebuild.py +132 -0
- cartography/intel/aws/inspector.py +77 -48
- cartography/intel/aws/resources.py +4 -0
- cartography/intel/aws/sns.py +62 -2
- cartography/intel/entra/users.py +84 -42
- cartography/intel/scaleway/__init__.py +127 -0
- cartography/intel/scaleway/iam/__init__.py +0 -0
- cartography/intel/scaleway/iam/apikeys.py +71 -0
- cartography/intel/scaleway/iam/applications.py +71 -0
- cartography/intel/scaleway/iam/groups.py +71 -0
- cartography/intel/scaleway/iam/users.py +71 -0
- cartography/intel/scaleway/instances/__init__.py +0 -0
- cartography/intel/scaleway/instances/flexibleips.py +86 -0
- cartography/intel/scaleway/instances/instances.py +92 -0
- cartography/intel/scaleway/projects.py +79 -0
- cartography/intel/scaleway/storage/__init__.py +0 -0
- cartography/intel/scaleway/storage/snapshots.py +86 -0
- cartography/intel/scaleway/storage/volumes.py +84 -0
- cartography/intel/scaleway/utils.py +37 -0
- cartography/intel/sentinelone/__init__.py +63 -0
- cartography/intel/sentinelone/account.py +140 -0
- cartography/intel/sentinelone/agent.py +139 -0
- cartography/intel/sentinelone/api.py +113 -0
- cartography/intel/sentinelone/utils.py +9 -0
- cartography/models/airbyte/__init__.py +0 -0
- cartography/models/airbyte/connection.py +138 -0
- cartography/models/airbyte/destination.py +75 -0
- cartography/models/airbyte/organization.py +19 -0
- cartography/models/airbyte/source.py +75 -0
- cartography/models/airbyte/stream.py +74 -0
- cartography/models/airbyte/tag.py +69 -0
- cartography/models/airbyte/user.py +111 -0
- cartography/models/airbyte/workspace.py +46 -0
- cartography/models/aws/cloudtrail/management_events.py +64 -0
- cartography/models/aws/codebuild/__init__.py +0 -0
- cartography/models/aws/codebuild/project.py +49 -0
- cartography/models/aws/ecs/containers.py +19 -0
- cartography/models/aws/ecs/task_definitions.py +38 -0
- cartography/models/aws/inspector/findings.py +37 -0
- cartography/models/aws/inspector/packages.py +1 -31
- cartography/models/aws/sns/topic_subscription.py +74 -0
- cartography/models/entra/user.py +17 -51
- cartography/models/scaleway/__init__.py +0 -0
- cartography/models/scaleway/iam/__init__.py +0 -0
- cartography/models/scaleway/iam/apikey.py +96 -0
- cartography/models/scaleway/iam/application.py +52 -0
- cartography/models/scaleway/iam/group.py +95 -0
- cartography/models/scaleway/iam/user.py +60 -0
- cartography/models/scaleway/instance/__init__.py +0 -0
- cartography/models/scaleway/instance/flexibleip.py +52 -0
- cartography/models/scaleway/instance/instance.py +118 -0
- cartography/models/scaleway/organization.py +19 -0
- cartography/models/scaleway/project.py +48 -0
- cartography/models/scaleway/storage/__init__.py +0 -0
- cartography/models/scaleway/storage/snapshot.py +78 -0
- cartography/models/scaleway/storage/volume.py +51 -0
- cartography/models/sentinelone/__init__.py +1 -0
- cartography/models/sentinelone/account.py +40 -0
- cartography/models/sentinelone/agent.py +50 -0
- cartography/sync.py +11 -4
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/METADATA +20 -16
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/RECORD +81 -21
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/WHEEL +0 -0
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/entry_points.txt +0 -0
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from datetime import timedelta
|
|
5
|
+
from typing import Any
|
|
6
|
+
from typing import Dict
|
|
7
|
+
from typing import List
|
|
8
|
+
|
|
9
|
+
import boto3
|
|
10
|
+
import neo4j
|
|
11
|
+
|
|
12
|
+
from cartography.client.core.tx import load_matchlinks
|
|
13
|
+
from cartography.graph.job import GraphJob
|
|
14
|
+
from cartography.intel.aws.ec2.util import get_botocore_config
|
|
15
|
+
from cartography.models.aws.cloudtrail.management_events import AssumedRoleMatchLink
|
|
16
|
+
from cartography.util import aws_handle_regions
|
|
17
|
+
from cartography.util import timeit
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@timeit
|
|
23
|
+
@aws_handle_regions
|
|
24
|
+
def get_assume_role_events(
|
|
25
|
+
boto3_session: boto3.Session, region: str, lookback_hours: int
|
|
26
|
+
) -> List[Dict[str, Any]]:
|
|
27
|
+
"""
|
|
28
|
+
Fetch CloudTrail AssumeRole events from the specified time period.
|
|
29
|
+
|
|
30
|
+
Focuses specifically on standard AssumeRole events, excluding SAML and WebIdentity variants.
|
|
31
|
+
|
|
32
|
+
:type boto3_session: boto3.Session
|
|
33
|
+
:param boto3_session: The boto3 session to use for API calls
|
|
34
|
+
:type region: str
|
|
35
|
+
:param region: The AWS region to fetch events from
|
|
36
|
+
:type lookback_hours: int
|
|
37
|
+
:param lookback_hours: Number of hours back to retrieve events from
|
|
38
|
+
:rtype: List[Dict[str, Any]]
|
|
39
|
+
:return: List of CloudTrail AssumeRole events
|
|
40
|
+
"""
|
|
41
|
+
client = boto3_session.client(
|
|
42
|
+
"cloudtrail", region_name=region, config=get_botocore_config()
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Calculate time range
|
|
46
|
+
end_time = datetime.utcnow()
|
|
47
|
+
start_time = end_time - timedelta(hours=lookback_hours)
|
|
48
|
+
|
|
49
|
+
logger.info(
|
|
50
|
+
f"Fetching CloudTrail AssumeRole events for region '{region}' "
|
|
51
|
+
f"from {start_time} to {end_time} ({lookback_hours} hours)"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
paginator = client.get_paginator("lookup_events")
|
|
55
|
+
|
|
56
|
+
page_iterator = paginator.paginate(
|
|
57
|
+
LookupAttributes=[
|
|
58
|
+
{"AttributeKey": "EventName", "AttributeValue": "AssumeRole"}
|
|
59
|
+
],
|
|
60
|
+
StartTime=start_time,
|
|
61
|
+
EndTime=end_time,
|
|
62
|
+
PaginationConfig={
|
|
63
|
+
"MaxItems": 10000, # Reasonable limit to prevent excessive API calls
|
|
64
|
+
"PageSize": 50, # CloudTrail API limit per page
|
|
65
|
+
},
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
all_events = []
|
|
69
|
+
for page in page_iterator:
|
|
70
|
+
all_events.extend(page.get("Events", []))
|
|
71
|
+
|
|
72
|
+
logger.info(f"Retrieved {len(all_events)} AssumeRole events from region '{region}'")
|
|
73
|
+
|
|
74
|
+
return all_events
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@timeit
|
|
78
|
+
def transform_assume_role_events_to_role_assumptions(
|
|
79
|
+
events: List[Dict[str, Any]],
|
|
80
|
+
region: str,
|
|
81
|
+
current_aws_account_id: str,
|
|
82
|
+
) -> List[Dict[str, Any]]:
|
|
83
|
+
"""
|
|
84
|
+
Transform raw CloudTrail AssumeRole events into aggregated role assumption relationships.
|
|
85
|
+
|
|
86
|
+
Focuses specifically on standard AssumeRole events, providing optimized processing
|
|
87
|
+
for the most common role assumption scenario.
|
|
88
|
+
|
|
89
|
+
This function performs the complete transformation pipeline:
|
|
90
|
+
1. Extract role assumption events from CloudTrail AssumeRole data
|
|
91
|
+
2. Aggregate events by (source_principal, destination_principal) pairs
|
|
92
|
+
3. Return aggregated relationships ready for loading
|
|
93
|
+
|
|
94
|
+
:type events: List[Dict[str, Any]]
|
|
95
|
+
:param events: List of raw CloudTrail AssumeRole events from lookup_events API
|
|
96
|
+
:type region: str
|
|
97
|
+
:param region: The AWS region where events were retrieved from
|
|
98
|
+
:type current_aws_account_id: str
|
|
99
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
100
|
+
:rtype: List[Dict[str, Any]]
|
|
101
|
+
:return: List of aggregated role assumption relationships ready for loading
|
|
102
|
+
"""
|
|
103
|
+
aggregated: Dict[tuple, Dict[str, Any]] = {}
|
|
104
|
+
logger.info(
|
|
105
|
+
f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions for region '{region}'"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
for event in events:
|
|
109
|
+
|
|
110
|
+
cloudtrail_event = json.loads(event["CloudTrailEvent"])
|
|
111
|
+
|
|
112
|
+
if cloudtrail_event.get("userIdentity", {}).get("arn"):
|
|
113
|
+
source_principal = cloudtrail_event["userIdentity"]["arn"]
|
|
114
|
+
destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
|
|
115
|
+
else:
|
|
116
|
+
logger.debug(
|
|
117
|
+
f"Skipping CloudTrail AssumeRole event due to missing UserIdentity.arn. Event: {event.get('EventId', 'unknown')}"
|
|
118
|
+
)
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
|
|
122
|
+
|
|
123
|
+
normalized_source_principal = _convert_assumed_role_arn_to_role_arn(
|
|
124
|
+
source_principal
|
|
125
|
+
)
|
|
126
|
+
normalized_destination_principal = _convert_assumed_role_arn_to_role_arn(
|
|
127
|
+
destination_principal
|
|
128
|
+
)
|
|
129
|
+
event_time = event.get("EventTime")
|
|
130
|
+
|
|
131
|
+
key = (normalized_source_principal, normalized_destination_principal)
|
|
132
|
+
|
|
133
|
+
if key in aggregated:
|
|
134
|
+
aggregated[key]["times_used"] += 1
|
|
135
|
+
aggregated[key]["assume_role_count"] += 1 # All events are AssumeRole
|
|
136
|
+
# Handle None values safely for time comparisons
|
|
137
|
+
if event_time:
|
|
138
|
+
existing_first = aggregated[key]["first_seen_in_time_window"]
|
|
139
|
+
existing_last = aggregated[key]["last_used"]
|
|
140
|
+
|
|
141
|
+
if existing_first is None or event_time < existing_first:
|
|
142
|
+
aggregated[key]["first_seen_in_time_window"] = event_time
|
|
143
|
+
if existing_last is None or event_time > existing_last:
|
|
144
|
+
aggregated[key]["last_used"] = event_time
|
|
145
|
+
else:
|
|
146
|
+
aggregated[key] = {
|
|
147
|
+
"source_principal_arn": normalized_source_principal,
|
|
148
|
+
"destination_principal_arn": normalized_destination_principal,
|
|
149
|
+
"times_used": 1,
|
|
150
|
+
"first_seen_in_time_window": event_time,
|
|
151
|
+
"last_used": event_time,
|
|
152
|
+
"event_types": ["AssumeRole"],
|
|
153
|
+
"assume_role_count": 1,
|
|
154
|
+
"saml_count": 0,
|
|
155
|
+
"web_identity_count": 0,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return list(aggregated.values())
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@timeit
|
|
162
|
+
def load_role_assumptions(
|
|
163
|
+
neo4j_session: neo4j.Session,
|
|
164
|
+
aggregated_role_assumptions: List[Dict[str, Any]],
|
|
165
|
+
current_aws_account_id: str,
|
|
166
|
+
aws_update_tag: int,
|
|
167
|
+
) -> None:
|
|
168
|
+
"""
|
|
169
|
+
Load aggregated role assumption relationships into Neo4j using MatchLink pattern.
|
|
170
|
+
|
|
171
|
+
Creates direct ASSUMED_ROLE relationships with aggregated properties:
|
|
172
|
+
(AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {lastused, times_used, first_seen_in_time_window, last_seen}]->(AWSRole)
|
|
173
|
+
|
|
174
|
+
Assumes that both source principals and destination roles already exist in the graph.
|
|
175
|
+
|
|
176
|
+
:type neo4j_session: neo4j.Session
|
|
177
|
+
:param neo4j_session: The Neo4j session to use for database operations
|
|
178
|
+
:type aggregated_role_assumptions: List[Dict[str, Any]]
|
|
179
|
+
:param aggregated_role_assumptions: List of aggregated role assumption relationships from transform function
|
|
180
|
+
:type current_aws_account_id: str
|
|
181
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
182
|
+
:type aws_update_tag: int
|
|
183
|
+
:param aws_update_tag: Timestamp tag for tracking data freshness
|
|
184
|
+
:rtype: None
|
|
185
|
+
"""
|
|
186
|
+
# Use MatchLink to create relationships between existing nodes
|
|
187
|
+
matchlink_schema = AssumedRoleMatchLink()
|
|
188
|
+
|
|
189
|
+
load_matchlinks(
|
|
190
|
+
neo4j_session,
|
|
191
|
+
matchlink_schema,
|
|
192
|
+
aggregated_role_assumptions,
|
|
193
|
+
lastupdated=aws_update_tag,
|
|
194
|
+
_sub_resource_label="AWSAccount",
|
|
195
|
+
_sub_resource_id=current_aws_account_id,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
logger.info(
|
|
199
|
+
f"Successfully loaded {len(aggregated_role_assumptions)} role assumption relationships"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
|
|
204
|
+
"""
|
|
205
|
+
Convert an assumed role ARN to the original role ARN.
|
|
206
|
+
|
|
207
|
+
Example:
|
|
208
|
+
Input: "arn:aws:sts::123456789012:assumed-role/MyRole/session-name"
|
|
209
|
+
Output: "arn:aws:iam::123456789012:role/MyRole"
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
# Split the ARN into parts
|
|
213
|
+
arn_parts = assumed_role_arn.split(":")
|
|
214
|
+
if len(arn_parts) >= 6 and arn_parts[2] == "sts" and "assumed-role" in arn_parts[5]:
|
|
215
|
+
# Extract account ID and role name
|
|
216
|
+
account_id = arn_parts[4]
|
|
217
|
+
resource_part = arn_parts[5] # "assumed-role/MyRole/session-name"
|
|
218
|
+
role_name = resource_part.split("/")[1] # Extract "MyRole"
|
|
219
|
+
|
|
220
|
+
# Construct the IAM role ARN
|
|
221
|
+
return f"arn:aws:iam::{account_id}:role/{role_name}"
|
|
222
|
+
|
|
223
|
+
# Return original ARN if conversion fails
|
|
224
|
+
return assumed_role_arn
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@timeit
|
|
228
|
+
def cleanup(
|
|
229
|
+
neo4j_session: neo4j.Session, current_aws_account_id: str, update_tag: int
|
|
230
|
+
) -> None:
|
|
231
|
+
"""
|
|
232
|
+
Run CloudTrail management events cleanup job to remove stale ASSUMED_ROLE relationships.
|
|
233
|
+
|
|
234
|
+
:type neo4j_session: neo4j.Session
|
|
235
|
+
:param neo4j_session: The Neo4j session to use for database operations
|
|
236
|
+
:type current_aws_account_id: str
|
|
237
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
238
|
+
:type update_tag: int
|
|
239
|
+
:param update_tag: Timestamp tag for tracking data freshness
|
|
240
|
+
:rtype: None
|
|
241
|
+
"""
|
|
242
|
+
logger.info("Running CloudTrail management events cleanup job.")
|
|
243
|
+
|
|
244
|
+
matchlink_schema = AssumedRoleMatchLink()
|
|
245
|
+
cleanup_job = GraphJob.from_matchlink(
|
|
246
|
+
matchlink_schema,
|
|
247
|
+
"AWSAccount",
|
|
248
|
+
current_aws_account_id,
|
|
249
|
+
update_tag,
|
|
250
|
+
)
|
|
251
|
+
cleanup_job.run(neo4j_session)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
@timeit
|
|
255
|
+
def sync_assume_role_events(
|
|
256
|
+
neo4j_session: neo4j.Session,
|
|
257
|
+
boto3_session: boto3.Session,
|
|
258
|
+
regions: List[str],
|
|
259
|
+
current_aws_account_id: str,
|
|
260
|
+
update_tag: int,
|
|
261
|
+
common_job_parameters: Dict[str, Any],
|
|
262
|
+
) -> None:
|
|
263
|
+
"""
|
|
264
|
+
Sync CloudTrail management events to create ASSUMED_ROLE relationships.
|
|
265
|
+
|
|
266
|
+
This function orchestrates the complete process:
|
|
267
|
+
1. Fetch CloudTrail management events region by region
|
|
268
|
+
2. Transform events into role assumption records per region
|
|
269
|
+
3. Load role assumption relationships into Neo4j for each region
|
|
270
|
+
4. Run cleanup after processing all regions
|
|
271
|
+
|
|
272
|
+
The resulting graph contains direct relationships like:
|
|
273
|
+
(AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used, lastused}]->(AWSRole)
|
|
274
|
+
|
|
275
|
+
:type neo4j_session: neo4j.Session
|
|
276
|
+
:param neo4j_session: The Neo4j session
|
|
277
|
+
:type boto3_session: boto3.Session
|
|
278
|
+
:param boto3_session: The boto3 session to use for API calls
|
|
279
|
+
:type regions: List[str]
|
|
280
|
+
:param regions: List of AWS regions to sync
|
|
281
|
+
:type current_aws_account_id: str
|
|
282
|
+
:param current_aws_account_id: The AWS account ID being synced
|
|
283
|
+
:type aws_update_tag: int
|
|
284
|
+
:param aws_update_tag: Timestamp tag for tracking data freshness
|
|
285
|
+
:rtype: None
|
|
286
|
+
"""
|
|
287
|
+
# Extract lookback hours from common_job_parameters (set by CLI parameter)
|
|
288
|
+
lookback_hours = common_job_parameters.get(
|
|
289
|
+
"aws_cloudtrail_management_events_lookback_hours"
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
if not lookback_hours:
|
|
293
|
+
logger.info(
|
|
294
|
+
"CloudTrail management events sync skipped - no lookback period specified"
|
|
295
|
+
)
|
|
296
|
+
return
|
|
297
|
+
|
|
298
|
+
logger.info(
|
|
299
|
+
f"Syncing {len(regions)} regions with {lookback_hours} hour lookback period"
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
total_role_assumptions = 0
|
|
303
|
+
|
|
304
|
+
# Process events region by region
|
|
305
|
+
for region in regions:
|
|
306
|
+
logger.info(f"Processing CloudTrail events for region {region}")
|
|
307
|
+
|
|
308
|
+
# Process AssumeRole events specifically
|
|
309
|
+
logger.info(f"Fetching AssumeRole events specifically for region {region}")
|
|
310
|
+
assume_role_events = get_assume_role_events(
|
|
311
|
+
boto3_session=boto3_session,
|
|
312
|
+
region=region,
|
|
313
|
+
lookback_hours=lookback_hours,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Transform AssumeRole events to role assumptions
|
|
317
|
+
assume_role_assumptions = transform_assume_role_events_to_role_assumptions(
|
|
318
|
+
events=assume_role_events,
|
|
319
|
+
region=region,
|
|
320
|
+
current_aws_account_id=current_aws_account_id,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
# Load AssumeRole assumptions for this region
|
|
324
|
+
load_role_assumptions(
|
|
325
|
+
neo4j_session=neo4j_session,
|
|
326
|
+
aggregated_role_assumptions=assume_role_assumptions,
|
|
327
|
+
current_aws_account_id=current_aws_account_id,
|
|
328
|
+
aws_update_tag=update_tag,
|
|
329
|
+
)
|
|
330
|
+
total_role_assumptions += len(assume_role_assumptions)
|
|
331
|
+
logger.info(
|
|
332
|
+
f"Loaded {len(assume_role_assumptions)} AssumeRole assumptions for region {region}"
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Run cleanup for stale relationships after processing all regions
|
|
336
|
+
cleanup(neo4j_session, current_aws_account_id, update_tag)
|
|
337
|
+
|
|
338
|
+
logger.info(
|
|
339
|
+
f"CloudTrail management events sync completed successfully. "
|
|
340
|
+
f"Processed {total_role_assumptions} total role assumption events across {len(regions)} regions."
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
# Main sync function for when we decide to add more event types
|
|
345
|
+
@timeit
|
|
346
|
+
def sync(
|
|
347
|
+
neo4j_session: neo4j.Session,
|
|
348
|
+
boto3_session: boto3.Session,
|
|
349
|
+
regions: List[str],
|
|
350
|
+
current_aws_account_id: str,
|
|
351
|
+
update_tag: int,
|
|
352
|
+
common_job_parameters: Dict[str, Any],
|
|
353
|
+
) -> None:
|
|
354
|
+
"""
|
|
355
|
+
Main sync function for CloudTrail management events.
|
|
356
|
+
"""
|
|
357
|
+
sync_assume_role_events(
|
|
358
|
+
neo4j_session=neo4j_session,
|
|
359
|
+
boto3_session=boto3_session,
|
|
360
|
+
regions=regions,
|
|
361
|
+
current_aws_account_id=current_aws_account_id,
|
|
362
|
+
update_tag=update_tag,
|
|
363
|
+
common_job_parameters=common_job_parameters,
|
|
364
|
+
)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Dict
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
import boto3
|
|
7
|
+
import neo4j
|
|
8
|
+
|
|
9
|
+
from cartography.client.core.tx import load
|
|
10
|
+
from cartography.graph.job import GraphJob
|
|
11
|
+
from cartography.intel.aws.ec2.util import get_botocore_config
|
|
12
|
+
from cartography.models.aws.codebuild.project import CodeBuildProjectSchema
|
|
13
|
+
from cartography.util import aws_handle_regions
|
|
14
|
+
from cartography.util import timeit
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@timeit
|
|
20
|
+
@aws_handle_regions
|
|
21
|
+
def get_all_codebuild_projects(
|
|
22
|
+
boto3_session: boto3.Session, region: str
|
|
23
|
+
) -> List[Dict[str, Any]]:
|
|
24
|
+
|
|
25
|
+
client = boto3_session.client(
|
|
26
|
+
"codebuild", region_name=region, config=get_botocore_config()
|
|
27
|
+
)
|
|
28
|
+
paginator = client.get_paginator("list_projects")
|
|
29
|
+
|
|
30
|
+
all_projects = []
|
|
31
|
+
|
|
32
|
+
for page in paginator.paginate():
|
|
33
|
+
project_names = page.get("projects", [])
|
|
34
|
+
if not project_names:
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
# AWS batch_get_projects accepts up to 100 project names per call as per AWS documentation.
|
|
38
|
+
for i in range(0, len(project_names), 100):
|
|
39
|
+
batch = project_names[i : i + 100]
|
|
40
|
+
response = client.batch_get_projects(names=batch)
|
|
41
|
+
projects = response.get("projects", [])
|
|
42
|
+
all_projects.extend(projects)
|
|
43
|
+
return all_projects
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def transform_codebuild_projects(
|
|
47
|
+
projects: List[Dict[str, Any]], region: str
|
|
48
|
+
) -> List[Dict[str, Any]]:
|
|
49
|
+
"""
|
|
50
|
+
Transform CodeBuild project data for ingestion into Neo4j.
|
|
51
|
+
|
|
52
|
+
- Includes all environment variable names.
|
|
53
|
+
- Variables of type 'PLAINTEXT' retain their values.
|
|
54
|
+
- Other types (e.g., 'PARAMETER_STORE', 'SECRETS_MANAGER') have their values redacted.
|
|
55
|
+
"""
|
|
56
|
+
transformed_codebuild_projects = []
|
|
57
|
+
for project in projects:
|
|
58
|
+
env_vars = project.get("environment", {}).get("environmentVariables", [])
|
|
59
|
+
env_var_strings = [
|
|
60
|
+
f"{var.get('name')}={var.get('value') if var.get('type') == 'PLAINTEXT' else '<REDACTED>'}"
|
|
61
|
+
for var in env_vars
|
|
62
|
+
]
|
|
63
|
+
transformed_project = {
|
|
64
|
+
"arn": project["arn"],
|
|
65
|
+
"created": project.get("created"),
|
|
66
|
+
"environmentVariables": env_var_strings,
|
|
67
|
+
"sourceType": project.get("source", {}).get("type"),
|
|
68
|
+
"sourceLocation": project.get("source", {}).get("location"),
|
|
69
|
+
}
|
|
70
|
+
transformed_codebuild_projects.append(transformed_project)
|
|
71
|
+
|
|
72
|
+
return transformed_codebuild_projects
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@timeit
|
|
76
|
+
def load_codebuild_projects(
|
|
77
|
+
neo4j_session: neo4j.Session,
|
|
78
|
+
data: List[Dict[str, Any]],
|
|
79
|
+
region: str,
|
|
80
|
+
current_aws_account_id: str,
|
|
81
|
+
aws_update_tag: int,
|
|
82
|
+
) -> None:
|
|
83
|
+
logger.info(
|
|
84
|
+
f"Loading CodeBuild {len(data)} projects for region '{region}' into graph.",
|
|
85
|
+
)
|
|
86
|
+
load(
|
|
87
|
+
neo4j_session,
|
|
88
|
+
CodeBuildProjectSchema(),
|
|
89
|
+
data,
|
|
90
|
+
lastupdated=aws_update_tag,
|
|
91
|
+
Region=region,
|
|
92
|
+
AWS_ID=current_aws_account_id,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@timeit
|
|
97
|
+
def cleanup(
|
|
98
|
+
neo4j_session: neo4j.Session,
|
|
99
|
+
common_job_parameters: Dict[str, Any],
|
|
100
|
+
) -> None:
|
|
101
|
+
logger.debug("Running Efs cleanup job.")
|
|
102
|
+
GraphJob.from_node_schema(CodeBuildProjectSchema(), common_job_parameters).run(
|
|
103
|
+
neo4j_session
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@timeit
|
|
108
|
+
def sync(
|
|
109
|
+
neo4j_session: neo4j.Session,
|
|
110
|
+
boto3_session: boto3.session.Session,
|
|
111
|
+
regions: List[str],
|
|
112
|
+
current_aws_account_id: str,
|
|
113
|
+
update_tag: int,
|
|
114
|
+
common_job_parameters: Dict[str, Any],
|
|
115
|
+
) -> None:
|
|
116
|
+
for region in regions:
|
|
117
|
+
logger.info(
|
|
118
|
+
f"Syncing CodeBuild for region '{region}' in account '{current_aws_account_id}'.",
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
projects = get_all_codebuild_projects(boto3_session, region)
|
|
122
|
+
transformed_projects = transform_codebuild_projects(projects, region)
|
|
123
|
+
|
|
124
|
+
load_codebuild_projects(
|
|
125
|
+
neo4j_session,
|
|
126
|
+
transformed_projects,
|
|
127
|
+
region,
|
|
128
|
+
current_aws_account_id,
|
|
129
|
+
update_tag,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
@@ -3,14 +3,17 @@ from typing import Any
|
|
|
3
3
|
from typing import Dict
|
|
4
4
|
from typing import Iterator
|
|
5
5
|
from typing import List
|
|
6
|
+
from typing import Set
|
|
6
7
|
from typing import Tuple
|
|
7
8
|
|
|
8
9
|
import boto3
|
|
9
10
|
import neo4j
|
|
10
11
|
|
|
11
12
|
from cartography.client.core.tx import load
|
|
13
|
+
from cartography.client.core.tx import load_matchlinks
|
|
12
14
|
from cartography.graph.job import GraphJob
|
|
13
15
|
from cartography.models.aws.inspector.findings import AWSInspectorFindingSchema
|
|
16
|
+
from cartography.models.aws.inspector.findings import InspectorFindingToPackageMatchLink
|
|
14
17
|
from cartography.models.aws.inspector.packages import AWSInspectorPackageSchema
|
|
15
18
|
from cartography.util import aws_handle_regions
|
|
16
19
|
from cartography.util import aws_paginate
|
|
@@ -107,9 +110,10 @@ def get_inspector_findings(
|
|
|
107
110
|
|
|
108
111
|
def transform_inspector_findings(
|
|
109
112
|
results: List[Dict[str, Any]],
|
|
110
|
-
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
|
113
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, str]]]:
|
|
111
114
|
findings_list: List[Dict] = []
|
|
112
|
-
|
|
115
|
+
packages_set: Set[frozenset] = set()
|
|
116
|
+
finding_to_package_map: List[Dict[str, str]] = []
|
|
113
117
|
|
|
114
118
|
for f in results:
|
|
115
119
|
finding: Dict = {}
|
|
@@ -163,55 +167,45 @@ def transform_inspector_findings(
|
|
|
163
167
|
"vendorUpdatedAt",
|
|
164
168
|
)
|
|
165
169
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
170
|
+
packages = transform_inspector_packages(f["packageVulnerabilityDetails"])
|
|
171
|
+
finding["vulnerablepackageids"] = list(packages.keys())
|
|
172
|
+
for package_id, package in packages.items():
|
|
173
|
+
finding_to_package_map.append(
|
|
174
|
+
{
|
|
175
|
+
"findingarn": finding["id"],
|
|
176
|
+
"packageid": package_id,
|
|
177
|
+
"remediation": package.get("remediation"),
|
|
178
|
+
"fixedInVersion": package.get("fixedInVersion"),
|
|
179
|
+
"filePath": package.get("filePath"),
|
|
180
|
+
"sourceLayerHash": package.get("sourceLayerHash"),
|
|
181
|
+
"sourceLambdaLayerArn": package.get("sourceLambdaLayerArn"),
|
|
182
|
+
}
|
|
183
|
+
)
|
|
184
|
+
packages_set.add(frozenset(package.items()))
|
|
174
185
|
findings_list.append(finding)
|
|
175
|
-
packages_list =
|
|
176
|
-
return findings_list, packages_list
|
|
177
|
-
|
|
186
|
+
packages_list = [dict(p) for p in packages_set]
|
|
187
|
+
return findings_list, packages_list, finding_to_package_map
|
|
178
188
|
|
|
179
|
-
def transform_inspector_packages(packages: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
180
|
-
packages_list: List[Dict] = []
|
|
181
|
-
for package_id in packages.keys():
|
|
182
|
-
packages_list.append(packages[package_id])
|
|
183
189
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
def _process_packages(
|
|
190
|
+
def transform_inspector_packages(
|
|
188
191
|
package_details: Dict[str, Any],
|
|
189
|
-
aws_account_id: str,
|
|
190
|
-
finding_arn: str,
|
|
191
192
|
) -> Dict[str, Any]:
|
|
192
193
|
packages: Dict[str, Any] = {}
|
|
193
194
|
for package in package_details["vulnerablePackages"]:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
f"{
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
new_package["filepath"] = package.get("filePath")
|
|
209
|
-
new_package["fixedinversion"] = package.get("fixedInVersion")
|
|
210
|
-
new_package["sourcelayerhash"] = package.get("sourceLayerHash")
|
|
211
|
-
new_package["awsaccount"] = aws_account_id
|
|
212
|
-
new_package["findingarn"] = finding_arn
|
|
213
|
-
|
|
214
|
-
packages[new_package["id"]] = new_package
|
|
195
|
+
# Following RPM package naming convention for consistency
|
|
196
|
+
name = package["name"] # Mandatory field
|
|
197
|
+
epoch = str(package.get("epoch", ""))
|
|
198
|
+
if epoch:
|
|
199
|
+
epoch = f"{epoch}:"
|
|
200
|
+
version = package["version"] # Mandatory field
|
|
201
|
+
release = package.get("release", "")
|
|
202
|
+
if release:
|
|
203
|
+
release = f"-{release}"
|
|
204
|
+
arch = package.get("arch", "")
|
|
205
|
+
if arch:
|
|
206
|
+
arch = f".{arch}"
|
|
207
|
+
id = f"{name}|{epoch}{version}{release}{arch}"
|
|
208
|
+
packages[id] = {**package, "id": id}
|
|
215
209
|
|
|
216
210
|
return packages
|
|
217
211
|
|
|
@@ -244,7 +238,6 @@ def load_inspector_findings(
|
|
|
244
238
|
def load_inspector_packages(
|
|
245
239
|
neo4j_session: neo4j.Session,
|
|
246
240
|
packages: List[Dict[str, Any]],
|
|
247
|
-
region: str,
|
|
248
241
|
aws_update_tag: int,
|
|
249
242
|
current_aws_account_id: str,
|
|
250
243
|
) -> None:
|
|
@@ -252,12 +245,28 @@ def load_inspector_packages(
|
|
|
252
245
|
neo4j_session,
|
|
253
246
|
AWSInspectorPackageSchema(),
|
|
254
247
|
packages,
|
|
255
|
-
Region=region,
|
|
256
248
|
AWS_ID=current_aws_account_id,
|
|
257
249
|
lastupdated=aws_update_tag,
|
|
258
250
|
)
|
|
259
251
|
|
|
260
252
|
|
|
253
|
+
@timeit
|
|
254
|
+
def load_inspector_finding_to_package_match_links(
|
|
255
|
+
neo4j_session: neo4j.Session,
|
|
256
|
+
finding_to_package_map: List[Dict[str, str]],
|
|
257
|
+
aws_update_tag: int,
|
|
258
|
+
current_aws_account_id: str,
|
|
259
|
+
) -> None:
|
|
260
|
+
load_matchlinks(
|
|
261
|
+
neo4j_session,
|
|
262
|
+
InspectorFindingToPackageMatchLink(),
|
|
263
|
+
finding_to_package_map,
|
|
264
|
+
lastupdated=aws_update_tag,
|
|
265
|
+
_sub_resource_label="AWSAccount",
|
|
266
|
+
_sub_resource_id=current_aws_account_id,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
|
|
261
270
|
@timeit
|
|
262
271
|
def cleanup(
|
|
263
272
|
neo4j_session: neo4j.Session,
|
|
@@ -270,6 +279,14 @@ def cleanup(
|
|
|
270
279
|
GraphJob.from_node_schema(AWSInspectorPackageSchema(), common_job_parameters).run(
|
|
271
280
|
neo4j_session,
|
|
272
281
|
)
|
|
282
|
+
GraphJob.from_matchlink(
|
|
283
|
+
InspectorFindingToPackageMatchLink(),
|
|
284
|
+
"AWSAccount",
|
|
285
|
+
common_job_parameters["ACCOUNT_ID"],
|
|
286
|
+
common_job_parameters["UPDATE_TAG"],
|
|
287
|
+
).run(
|
|
288
|
+
neo4j_session,
|
|
289
|
+
)
|
|
273
290
|
|
|
274
291
|
|
|
275
292
|
def _sync_findings_for_account(
|
|
@@ -288,7 +305,9 @@ def _sync_findings_for_account(
|
|
|
288
305
|
logger.info(f"No findings to sync for account {account_id} in region {region}")
|
|
289
306
|
return
|
|
290
307
|
for f_batch in findings:
|
|
291
|
-
finding_data, package_data =
|
|
308
|
+
finding_data, package_data, finding_to_package_map = (
|
|
309
|
+
transform_inspector_findings(f_batch)
|
|
310
|
+
)
|
|
292
311
|
logger.info(f"Loading {len(finding_data)} findings from account {account_id}")
|
|
293
312
|
load_inspector_findings(
|
|
294
313
|
neo4j_session,
|
|
@@ -301,7 +320,15 @@ def _sync_findings_for_account(
|
|
|
301
320
|
load_inspector_packages(
|
|
302
321
|
neo4j_session,
|
|
303
322
|
package_data,
|
|
304
|
-
|
|
323
|
+
update_tag,
|
|
324
|
+
current_aws_account_id,
|
|
325
|
+
)
|
|
326
|
+
logger.info(
|
|
327
|
+
f"Loading {len(finding_to_package_map)} finding to package relationships"
|
|
328
|
+
)
|
|
329
|
+
load_inspector_finding_to_package_match_links(
|
|
330
|
+
neo4j_session,
|
|
331
|
+
finding_to_package_map,
|
|
305
332
|
update_tag,
|
|
306
333
|
current_aws_account_id,
|
|
307
334
|
)
|
|
@@ -337,5 +364,7 @@ def sync(
|
|
|
337
364
|
update_tag,
|
|
338
365
|
current_aws_account_id,
|
|
339
366
|
)
|
|
367
|
+
common_job_parameters["ACCOUNT_ID"] = current_aws_account_id
|
|
368
|
+
common_job_parameters["UPDATE_TAG"] = update_tag
|
|
340
369
|
|
|
341
370
|
cleanup(neo4j_session, common_job_parameters)
|