cartography 0.106.0rc2__py3-none-any.whl → 0.107.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (81) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +131 -2
  3. cartography/config.py +42 -0
  4. cartography/driftdetect/cli.py +3 -2
  5. cartography/intel/airbyte/__init__.py +105 -0
  6. cartography/intel/airbyte/connections.py +120 -0
  7. cartography/intel/airbyte/destinations.py +81 -0
  8. cartography/intel/airbyte/organizations.py +59 -0
  9. cartography/intel/airbyte/sources.py +78 -0
  10. cartography/intel/airbyte/tags.py +64 -0
  11. cartography/intel/airbyte/users.py +106 -0
  12. cartography/intel/airbyte/util.py +122 -0
  13. cartography/intel/airbyte/workspaces.py +63 -0
  14. cartography/intel/aws/__init__.py +1 -0
  15. cartography/intel/aws/cloudtrail_management_events.py +364 -0
  16. cartography/intel/aws/codebuild.py +132 -0
  17. cartography/intel/aws/inspector.py +77 -48
  18. cartography/intel/aws/resources.py +4 -0
  19. cartography/intel/aws/sns.py +62 -2
  20. cartography/intel/entra/users.py +84 -42
  21. cartography/intel/scaleway/__init__.py +127 -0
  22. cartography/intel/scaleway/iam/__init__.py +0 -0
  23. cartography/intel/scaleway/iam/apikeys.py +71 -0
  24. cartography/intel/scaleway/iam/applications.py +71 -0
  25. cartography/intel/scaleway/iam/groups.py +71 -0
  26. cartography/intel/scaleway/iam/users.py +71 -0
  27. cartography/intel/scaleway/instances/__init__.py +0 -0
  28. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  29. cartography/intel/scaleway/instances/instances.py +92 -0
  30. cartography/intel/scaleway/projects.py +79 -0
  31. cartography/intel/scaleway/storage/__init__.py +0 -0
  32. cartography/intel/scaleway/storage/snapshots.py +86 -0
  33. cartography/intel/scaleway/storage/volumes.py +84 -0
  34. cartography/intel/scaleway/utils.py +37 -0
  35. cartography/intel/sentinelone/__init__.py +63 -0
  36. cartography/intel/sentinelone/account.py +140 -0
  37. cartography/intel/sentinelone/agent.py +139 -0
  38. cartography/intel/sentinelone/api.py +113 -0
  39. cartography/intel/sentinelone/utils.py +9 -0
  40. cartography/models/airbyte/__init__.py +0 -0
  41. cartography/models/airbyte/connection.py +138 -0
  42. cartography/models/airbyte/destination.py +75 -0
  43. cartography/models/airbyte/organization.py +19 -0
  44. cartography/models/airbyte/source.py +75 -0
  45. cartography/models/airbyte/stream.py +74 -0
  46. cartography/models/airbyte/tag.py +69 -0
  47. cartography/models/airbyte/user.py +111 -0
  48. cartography/models/airbyte/workspace.py +46 -0
  49. cartography/models/aws/cloudtrail/management_events.py +64 -0
  50. cartography/models/aws/codebuild/__init__.py +0 -0
  51. cartography/models/aws/codebuild/project.py +49 -0
  52. cartography/models/aws/ecs/containers.py +19 -0
  53. cartography/models/aws/ecs/task_definitions.py +38 -0
  54. cartography/models/aws/inspector/findings.py +37 -0
  55. cartography/models/aws/inspector/packages.py +1 -31
  56. cartography/models/aws/sns/topic_subscription.py +74 -0
  57. cartography/models/entra/user.py +17 -51
  58. cartography/models/scaleway/__init__.py +0 -0
  59. cartography/models/scaleway/iam/__init__.py +0 -0
  60. cartography/models/scaleway/iam/apikey.py +96 -0
  61. cartography/models/scaleway/iam/application.py +52 -0
  62. cartography/models/scaleway/iam/group.py +95 -0
  63. cartography/models/scaleway/iam/user.py +60 -0
  64. cartography/models/scaleway/instance/__init__.py +0 -0
  65. cartography/models/scaleway/instance/flexibleip.py +52 -0
  66. cartography/models/scaleway/instance/instance.py +118 -0
  67. cartography/models/scaleway/organization.py +19 -0
  68. cartography/models/scaleway/project.py +48 -0
  69. cartography/models/scaleway/storage/__init__.py +0 -0
  70. cartography/models/scaleway/storage/snapshot.py +78 -0
  71. cartography/models/scaleway/storage/volume.py +51 -0
  72. cartography/models/sentinelone/__init__.py +1 -0
  73. cartography/models/sentinelone/account.py +40 -0
  74. cartography/models/sentinelone/agent.py +50 -0
  75. cartography/sync.py +11 -4
  76. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/METADATA +20 -16
  77. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/RECORD +81 -21
  78. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/WHEEL +0 -0
  79. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/entry_points.txt +0 -0
  80. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/licenses/LICENSE +0 -0
  81. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,364 @@
1
+ import json
2
+ import logging
3
+ from datetime import datetime
4
+ from datetime import timedelta
5
+ from typing import Any
6
+ from typing import Dict
7
+ from typing import List
8
+
9
+ import boto3
10
+ import neo4j
11
+
12
+ from cartography.client.core.tx import load_matchlinks
13
+ from cartography.graph.job import GraphJob
14
+ from cartography.intel.aws.ec2.util import get_botocore_config
15
+ from cartography.models.aws.cloudtrail.management_events import AssumedRoleMatchLink
16
+ from cartography.util import aws_handle_regions
17
+ from cartography.util import timeit
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @timeit
23
+ @aws_handle_regions
24
+ def get_assume_role_events(
25
+ boto3_session: boto3.Session, region: str, lookback_hours: int
26
+ ) -> List[Dict[str, Any]]:
27
+ """
28
+ Fetch CloudTrail AssumeRole events from the specified time period.
29
+
30
+ Focuses specifically on standard AssumeRole events, excluding SAML and WebIdentity variants.
31
+
32
+ :type boto3_session: boto3.Session
33
+ :param boto3_session: The boto3 session to use for API calls
34
+ :type region: str
35
+ :param region: The AWS region to fetch events from
36
+ :type lookback_hours: int
37
+ :param lookback_hours: Number of hours back to retrieve events from
38
+ :rtype: List[Dict[str, Any]]
39
+ :return: List of CloudTrail AssumeRole events
40
+ """
41
+ client = boto3_session.client(
42
+ "cloudtrail", region_name=region, config=get_botocore_config()
43
+ )
44
+
45
+ # Calculate time range
46
+ end_time = datetime.utcnow()
47
+ start_time = end_time - timedelta(hours=lookback_hours)
48
+
49
+ logger.info(
50
+ f"Fetching CloudTrail AssumeRole events for region '{region}' "
51
+ f"from {start_time} to {end_time} ({lookback_hours} hours)"
52
+ )
53
+
54
+ paginator = client.get_paginator("lookup_events")
55
+
56
+ page_iterator = paginator.paginate(
57
+ LookupAttributes=[
58
+ {"AttributeKey": "EventName", "AttributeValue": "AssumeRole"}
59
+ ],
60
+ StartTime=start_time,
61
+ EndTime=end_time,
62
+ PaginationConfig={
63
+ "MaxItems": 10000, # Reasonable limit to prevent excessive API calls
64
+ "PageSize": 50, # CloudTrail API limit per page
65
+ },
66
+ )
67
+
68
+ all_events = []
69
+ for page in page_iterator:
70
+ all_events.extend(page.get("Events", []))
71
+
72
+ logger.info(f"Retrieved {len(all_events)} AssumeRole events from region '{region}'")
73
+
74
+ return all_events
75
+
76
+
77
+ @timeit
78
+ def transform_assume_role_events_to_role_assumptions(
79
+ events: List[Dict[str, Any]],
80
+ region: str,
81
+ current_aws_account_id: str,
82
+ ) -> List[Dict[str, Any]]:
83
+ """
84
+ Transform raw CloudTrail AssumeRole events into aggregated role assumption relationships.
85
+
86
+ Focuses specifically on standard AssumeRole events, providing optimized processing
87
+ for the most common role assumption scenario.
88
+
89
+ This function performs the complete transformation pipeline:
90
+ 1. Extract role assumption events from CloudTrail AssumeRole data
91
+ 2. Aggregate events by (source_principal, destination_principal) pairs
92
+ 3. Return aggregated relationships ready for loading
93
+
94
+ :type events: List[Dict[str, Any]]
95
+ :param events: List of raw CloudTrail AssumeRole events from lookup_events API
96
+ :type region: str
97
+ :param region: The AWS region where events were retrieved from
98
+ :type current_aws_account_id: str
99
+ :param current_aws_account_id: The AWS account ID being synced
100
+ :rtype: List[Dict[str, Any]]
101
+ :return: List of aggregated role assumption relationships ready for loading
102
+ """
103
+ aggregated: Dict[tuple, Dict[str, Any]] = {}
104
+ logger.info(
105
+ f"Transforming {len(events)} CloudTrail AssumeRole events to role assumptions for region '{region}'"
106
+ )
107
+
108
+ for event in events:
109
+
110
+ cloudtrail_event = json.loads(event["CloudTrailEvent"])
111
+
112
+ if cloudtrail_event.get("userIdentity", {}).get("arn"):
113
+ source_principal = cloudtrail_event["userIdentity"]["arn"]
114
+ destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
115
+ else:
116
+ logger.debug(
117
+ f"Skipping CloudTrail AssumeRole event due to missing UserIdentity.arn. Event: {event.get('EventId', 'unknown')}"
118
+ )
119
+ continue
120
+
121
+ destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
122
+
123
+ normalized_source_principal = _convert_assumed_role_arn_to_role_arn(
124
+ source_principal
125
+ )
126
+ normalized_destination_principal = _convert_assumed_role_arn_to_role_arn(
127
+ destination_principal
128
+ )
129
+ event_time = event.get("EventTime")
130
+
131
+ key = (normalized_source_principal, normalized_destination_principal)
132
+
133
+ if key in aggregated:
134
+ aggregated[key]["times_used"] += 1
135
+ aggregated[key]["assume_role_count"] += 1 # All events are AssumeRole
136
+ # Handle None values safely for time comparisons
137
+ if event_time:
138
+ existing_first = aggregated[key]["first_seen_in_time_window"]
139
+ existing_last = aggregated[key]["last_used"]
140
+
141
+ if existing_first is None or event_time < existing_first:
142
+ aggregated[key]["first_seen_in_time_window"] = event_time
143
+ if existing_last is None or event_time > existing_last:
144
+ aggregated[key]["last_used"] = event_time
145
+ else:
146
+ aggregated[key] = {
147
+ "source_principal_arn": normalized_source_principal,
148
+ "destination_principal_arn": normalized_destination_principal,
149
+ "times_used": 1,
150
+ "first_seen_in_time_window": event_time,
151
+ "last_used": event_time,
152
+ "event_types": ["AssumeRole"],
153
+ "assume_role_count": 1,
154
+ "saml_count": 0,
155
+ "web_identity_count": 0,
156
+ }
157
+
158
+ return list(aggregated.values())
159
+
160
+
161
+ @timeit
162
+ def load_role_assumptions(
163
+ neo4j_session: neo4j.Session,
164
+ aggregated_role_assumptions: List[Dict[str, Any]],
165
+ current_aws_account_id: str,
166
+ aws_update_tag: int,
167
+ ) -> None:
168
+ """
169
+ Load aggregated role assumption relationships into Neo4j using MatchLink pattern.
170
+
171
+ Creates direct ASSUMED_ROLE relationships with aggregated properties:
172
+ (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {lastused, times_used, first_seen_in_time_window, last_seen}]->(AWSRole)
173
+
174
+ Assumes that both source principals and destination roles already exist in the graph.
175
+
176
+ :type neo4j_session: neo4j.Session
177
+ :param neo4j_session: The Neo4j session to use for database operations
178
+ :type aggregated_role_assumptions: List[Dict[str, Any]]
179
+ :param aggregated_role_assumptions: List of aggregated role assumption relationships from transform function
180
+ :type current_aws_account_id: str
181
+ :param current_aws_account_id: The AWS account ID being synced
182
+ :type aws_update_tag: int
183
+ :param aws_update_tag: Timestamp tag for tracking data freshness
184
+ :rtype: None
185
+ """
186
+ # Use MatchLink to create relationships between existing nodes
187
+ matchlink_schema = AssumedRoleMatchLink()
188
+
189
+ load_matchlinks(
190
+ neo4j_session,
191
+ matchlink_schema,
192
+ aggregated_role_assumptions,
193
+ lastupdated=aws_update_tag,
194
+ _sub_resource_label="AWSAccount",
195
+ _sub_resource_id=current_aws_account_id,
196
+ )
197
+
198
+ logger.info(
199
+ f"Successfully loaded {len(aggregated_role_assumptions)} role assumption relationships"
200
+ )
201
+
202
+
203
+ def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
204
+ """
205
+ Convert an assumed role ARN to the original role ARN.
206
+
207
+ Example:
208
+ Input: "arn:aws:sts::123456789012:assumed-role/MyRole/session-name"
209
+ Output: "arn:aws:iam::123456789012:role/MyRole"
210
+ """
211
+
212
+ # Split the ARN into parts
213
+ arn_parts = assumed_role_arn.split(":")
214
+ if len(arn_parts) >= 6 and arn_parts[2] == "sts" and "assumed-role" in arn_parts[5]:
215
+ # Extract account ID and role name
216
+ account_id = arn_parts[4]
217
+ resource_part = arn_parts[5] # "assumed-role/MyRole/session-name"
218
+ role_name = resource_part.split("/")[1] # Extract "MyRole"
219
+
220
+ # Construct the IAM role ARN
221
+ return f"arn:aws:iam::{account_id}:role/{role_name}"
222
+
223
+ # Return original ARN if conversion fails
224
+ return assumed_role_arn
225
+
226
+
227
+ @timeit
228
+ def cleanup(
229
+ neo4j_session: neo4j.Session, current_aws_account_id: str, update_tag: int
230
+ ) -> None:
231
+ """
232
+ Run CloudTrail management events cleanup job to remove stale ASSUMED_ROLE relationships.
233
+
234
+ :type neo4j_session: neo4j.Session
235
+ :param neo4j_session: The Neo4j session to use for database operations
236
+ :type current_aws_account_id: str
237
+ :param current_aws_account_id: The AWS account ID being synced
238
+ :type update_tag: int
239
+ :param update_tag: Timestamp tag for tracking data freshness
240
+ :rtype: None
241
+ """
242
+ logger.info("Running CloudTrail management events cleanup job.")
243
+
244
+ matchlink_schema = AssumedRoleMatchLink()
245
+ cleanup_job = GraphJob.from_matchlink(
246
+ matchlink_schema,
247
+ "AWSAccount",
248
+ current_aws_account_id,
249
+ update_tag,
250
+ )
251
+ cleanup_job.run(neo4j_session)
252
+
253
+
254
+ @timeit
255
+ def sync_assume_role_events(
256
+ neo4j_session: neo4j.Session,
257
+ boto3_session: boto3.Session,
258
+ regions: List[str],
259
+ current_aws_account_id: str,
260
+ update_tag: int,
261
+ common_job_parameters: Dict[str, Any],
262
+ ) -> None:
263
+ """
264
+ Sync CloudTrail management events to create ASSUMED_ROLE relationships.
265
+
266
+ This function orchestrates the complete process:
267
+ 1. Fetch CloudTrail management events region by region
268
+ 2. Transform events into role assumption records per region
269
+ 3. Load role assumption relationships into Neo4j for each region
270
+ 4. Run cleanup after processing all regions
271
+
272
+ The resulting graph contains direct relationships like:
273
+ (AWSUser|AWSRole|AWSPrincipal)-[:ASSUMED_ROLE {times_used, first_seen_in_time_window, last_used, lastused}]->(AWSRole)
274
+
275
+ :type neo4j_session: neo4j.Session
276
+ :param neo4j_session: The Neo4j session
277
+ :type boto3_session: boto3.Session
278
+ :param boto3_session: The boto3 session to use for API calls
279
+ :type regions: List[str]
280
+ :param regions: List of AWS regions to sync
281
+ :type current_aws_account_id: str
282
+ :param current_aws_account_id: The AWS account ID being synced
283
+ :type aws_update_tag: int
284
+ :param aws_update_tag: Timestamp tag for tracking data freshness
285
+ :rtype: None
286
+ """
287
+ # Extract lookback hours from common_job_parameters (set by CLI parameter)
288
+ lookback_hours = common_job_parameters.get(
289
+ "aws_cloudtrail_management_events_lookback_hours"
290
+ )
291
+
292
+ if not lookback_hours:
293
+ logger.info(
294
+ "CloudTrail management events sync skipped - no lookback period specified"
295
+ )
296
+ return
297
+
298
+ logger.info(
299
+ f"Syncing {len(regions)} regions with {lookback_hours} hour lookback period"
300
+ )
301
+
302
+ total_role_assumptions = 0
303
+
304
+ # Process events region by region
305
+ for region in regions:
306
+ logger.info(f"Processing CloudTrail events for region {region}")
307
+
308
+ # Process AssumeRole events specifically
309
+ logger.info(f"Fetching AssumeRole events specifically for region {region}")
310
+ assume_role_events = get_assume_role_events(
311
+ boto3_session=boto3_session,
312
+ region=region,
313
+ lookback_hours=lookback_hours,
314
+ )
315
+
316
+ # Transform AssumeRole events to role assumptions
317
+ assume_role_assumptions = transform_assume_role_events_to_role_assumptions(
318
+ events=assume_role_events,
319
+ region=region,
320
+ current_aws_account_id=current_aws_account_id,
321
+ )
322
+
323
+ # Load AssumeRole assumptions for this region
324
+ load_role_assumptions(
325
+ neo4j_session=neo4j_session,
326
+ aggregated_role_assumptions=assume_role_assumptions,
327
+ current_aws_account_id=current_aws_account_id,
328
+ aws_update_tag=update_tag,
329
+ )
330
+ total_role_assumptions += len(assume_role_assumptions)
331
+ logger.info(
332
+ f"Loaded {len(assume_role_assumptions)} AssumeRole assumptions for region {region}"
333
+ )
334
+
335
+ # Run cleanup for stale relationships after processing all regions
336
+ cleanup(neo4j_session, current_aws_account_id, update_tag)
337
+
338
+ logger.info(
339
+ f"CloudTrail management events sync completed successfully. "
340
+ f"Processed {total_role_assumptions} total role assumption events across {len(regions)} regions."
341
+ )
342
+
343
+
344
+ # Main sync function for when we decide to add more event types
345
+ @timeit
346
+ def sync(
347
+ neo4j_session: neo4j.Session,
348
+ boto3_session: boto3.Session,
349
+ regions: List[str],
350
+ current_aws_account_id: str,
351
+ update_tag: int,
352
+ common_job_parameters: Dict[str, Any],
353
+ ) -> None:
354
+ """
355
+ Main sync function for CloudTrail management events.
356
+ """
357
+ sync_assume_role_events(
358
+ neo4j_session=neo4j_session,
359
+ boto3_session=boto3_session,
360
+ regions=regions,
361
+ current_aws_account_id=current_aws_account_id,
362
+ update_tag=update_tag,
363
+ common_job_parameters=common_job_parameters,
364
+ )
@@ -0,0 +1,132 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import boto3
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.aws.ec2.util import get_botocore_config
12
+ from cartography.models.aws.codebuild.project import CodeBuildProjectSchema
13
+ from cartography.util import aws_handle_regions
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ @aws_handle_regions
21
+ def get_all_codebuild_projects(
22
+ boto3_session: boto3.Session, region: str
23
+ ) -> List[Dict[str, Any]]:
24
+
25
+ client = boto3_session.client(
26
+ "codebuild", region_name=region, config=get_botocore_config()
27
+ )
28
+ paginator = client.get_paginator("list_projects")
29
+
30
+ all_projects = []
31
+
32
+ for page in paginator.paginate():
33
+ project_names = page.get("projects", [])
34
+ if not project_names:
35
+ continue
36
+
37
+ # AWS batch_get_projects accepts up to 100 project names per call as per AWS documentation.
38
+ for i in range(0, len(project_names), 100):
39
+ batch = project_names[i : i + 100]
40
+ response = client.batch_get_projects(names=batch)
41
+ projects = response.get("projects", [])
42
+ all_projects.extend(projects)
43
+ return all_projects
44
+
45
+
46
+ def transform_codebuild_projects(
47
+ projects: List[Dict[str, Any]], region: str
48
+ ) -> List[Dict[str, Any]]:
49
+ """
50
+ Transform CodeBuild project data for ingestion into Neo4j.
51
+
52
+ - Includes all environment variable names.
53
+ - Variables of type 'PLAINTEXT' retain their values.
54
+ - Other types (e.g., 'PARAMETER_STORE', 'SECRETS_MANAGER') have their values redacted.
55
+ """
56
+ transformed_codebuild_projects = []
57
+ for project in projects:
58
+ env_vars = project.get("environment", {}).get("environmentVariables", [])
59
+ env_var_strings = [
60
+ f"{var.get('name')}={var.get('value') if var.get('type') == 'PLAINTEXT' else '<REDACTED>'}"
61
+ for var in env_vars
62
+ ]
63
+ transformed_project = {
64
+ "arn": project["arn"],
65
+ "created": project.get("created"),
66
+ "environmentVariables": env_var_strings,
67
+ "sourceType": project.get("source", {}).get("type"),
68
+ "sourceLocation": project.get("source", {}).get("location"),
69
+ }
70
+ transformed_codebuild_projects.append(transformed_project)
71
+
72
+ return transformed_codebuild_projects
73
+
74
+
75
+ @timeit
76
+ def load_codebuild_projects(
77
+ neo4j_session: neo4j.Session,
78
+ data: List[Dict[str, Any]],
79
+ region: str,
80
+ current_aws_account_id: str,
81
+ aws_update_tag: int,
82
+ ) -> None:
83
+ logger.info(
84
+ f"Loading CodeBuild {len(data)} projects for region '{region}' into graph.",
85
+ )
86
+ load(
87
+ neo4j_session,
88
+ CodeBuildProjectSchema(),
89
+ data,
90
+ lastupdated=aws_update_tag,
91
+ Region=region,
92
+ AWS_ID=current_aws_account_id,
93
+ )
94
+
95
+
96
+ @timeit
97
+ def cleanup(
98
+ neo4j_session: neo4j.Session,
99
+ common_job_parameters: Dict[str, Any],
100
+ ) -> None:
101
+ logger.debug("Running Efs cleanup job.")
102
+ GraphJob.from_node_schema(CodeBuildProjectSchema(), common_job_parameters).run(
103
+ neo4j_session
104
+ )
105
+
106
+
107
+ @timeit
108
+ def sync(
109
+ neo4j_session: neo4j.Session,
110
+ boto3_session: boto3.session.Session,
111
+ regions: List[str],
112
+ current_aws_account_id: str,
113
+ update_tag: int,
114
+ common_job_parameters: Dict[str, Any],
115
+ ) -> None:
116
+ for region in regions:
117
+ logger.info(
118
+ f"Syncing CodeBuild for region '{region}' in account '{current_aws_account_id}'.",
119
+ )
120
+
121
+ projects = get_all_codebuild_projects(boto3_session, region)
122
+ transformed_projects = transform_codebuild_projects(projects, region)
123
+
124
+ load_codebuild_projects(
125
+ neo4j_session,
126
+ transformed_projects,
127
+ region,
128
+ current_aws_account_id,
129
+ update_tag,
130
+ )
131
+
132
+ cleanup(neo4j_session, common_job_parameters)
@@ -3,14 +3,17 @@ from typing import Any
3
3
  from typing import Dict
4
4
  from typing import Iterator
5
5
  from typing import List
6
+ from typing import Set
6
7
  from typing import Tuple
7
8
 
8
9
  import boto3
9
10
  import neo4j
10
11
 
11
12
  from cartography.client.core.tx import load
13
+ from cartography.client.core.tx import load_matchlinks
12
14
  from cartography.graph.job import GraphJob
13
15
  from cartography.models.aws.inspector.findings import AWSInspectorFindingSchema
16
+ from cartography.models.aws.inspector.findings import InspectorFindingToPackageMatchLink
14
17
  from cartography.models.aws.inspector.packages import AWSInspectorPackageSchema
15
18
  from cartography.util import aws_handle_regions
16
19
  from cartography.util import aws_paginate
@@ -107,9 +110,10 @@ def get_inspector_findings(
107
110
 
108
111
  def transform_inspector_findings(
109
112
  results: List[Dict[str, Any]],
110
- ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
113
+ ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, str]]]:
111
114
  findings_list: List[Dict] = []
112
- packages: Dict[str, Any] = {}
115
+ packages_set: Set[frozenset] = set()
116
+ finding_to_package_map: List[Dict[str, str]] = []
113
117
 
114
118
  for f in results:
115
119
  finding: Dict = {}
@@ -163,55 +167,45 @@ def transform_inspector_findings(
163
167
  "vendorUpdatedAt",
164
168
  )
165
169
 
166
- new_packages = _process_packages(
167
- f["packageVulnerabilityDetails"],
168
- f["awsAccountId"],
169
- f["findingArn"],
170
- )
171
- finding["vulnerablepackageids"] = list(new_packages.keys())
172
- packages = {**packages, **new_packages}
173
-
170
+ packages = transform_inspector_packages(f["packageVulnerabilityDetails"])
171
+ finding["vulnerablepackageids"] = list(packages.keys())
172
+ for package_id, package in packages.items():
173
+ finding_to_package_map.append(
174
+ {
175
+ "findingarn": finding["id"],
176
+ "packageid": package_id,
177
+ "remediation": package.get("remediation"),
178
+ "fixedInVersion": package.get("fixedInVersion"),
179
+ "filePath": package.get("filePath"),
180
+ "sourceLayerHash": package.get("sourceLayerHash"),
181
+ "sourceLambdaLayerArn": package.get("sourceLambdaLayerArn"),
182
+ }
183
+ )
184
+ packages_set.add(frozenset(package.items()))
174
185
  findings_list.append(finding)
175
- packages_list = transform_inspector_packages(packages)
176
- return findings_list, packages_list
177
-
186
+ packages_list = [dict(p) for p in packages_set]
187
+ return findings_list, packages_list, finding_to_package_map
178
188
 
179
- def transform_inspector_packages(packages: Dict[str, Any]) -> List[Dict[str, Any]]:
180
- packages_list: List[Dict] = []
181
- for package_id in packages.keys():
182
- packages_list.append(packages[package_id])
183
189
 
184
- return packages_list
185
-
186
-
187
- def _process_packages(
190
+ def transform_inspector_packages(
188
191
  package_details: Dict[str, Any],
189
- aws_account_id: str,
190
- finding_arn: str,
191
192
  ) -> Dict[str, Any]:
192
193
  packages: Dict[str, Any] = {}
193
194
  for package in package_details["vulnerablePackages"]:
194
- new_package = {}
195
- new_package["id"] = (
196
- f"{package.get('name', '')}|"
197
- f"{package.get('arch', '')}|"
198
- f"{package.get('version', '')}|"
199
- f"{package.get('release', '')}|"
200
- f"{package.get('epoch', '')}"
201
- )
202
- new_package["name"] = package.get("name")
203
- new_package["arch"] = package.get("arch")
204
- new_package["version"] = package.get("version")
205
- new_package["release"] = package.get("release")
206
- new_package["epoch"] = package.get("epoch")
207
- new_package["manager"] = package.get("packageManager")
208
- new_package["filepath"] = package.get("filePath")
209
- new_package["fixedinversion"] = package.get("fixedInVersion")
210
- new_package["sourcelayerhash"] = package.get("sourceLayerHash")
211
- new_package["awsaccount"] = aws_account_id
212
- new_package["findingarn"] = finding_arn
213
-
214
- packages[new_package["id"]] = new_package
195
+ # Following RPM package naming convention for consistency
196
+ name = package["name"] # Mandatory field
197
+ epoch = str(package.get("epoch", ""))
198
+ if epoch:
199
+ epoch = f"{epoch}:"
200
+ version = package["version"] # Mandatory field
201
+ release = package.get("release", "")
202
+ if release:
203
+ release = f"-{release}"
204
+ arch = package.get("arch", "")
205
+ if arch:
206
+ arch = f".{arch}"
207
+ id = f"{name}|{epoch}{version}{release}{arch}"
208
+ packages[id] = {**package, "id": id}
215
209
 
216
210
  return packages
217
211
 
@@ -244,7 +238,6 @@ def load_inspector_findings(
244
238
  def load_inspector_packages(
245
239
  neo4j_session: neo4j.Session,
246
240
  packages: List[Dict[str, Any]],
247
- region: str,
248
241
  aws_update_tag: int,
249
242
  current_aws_account_id: str,
250
243
  ) -> None:
@@ -252,12 +245,28 @@ def load_inspector_packages(
252
245
  neo4j_session,
253
246
  AWSInspectorPackageSchema(),
254
247
  packages,
255
- Region=region,
256
248
  AWS_ID=current_aws_account_id,
257
249
  lastupdated=aws_update_tag,
258
250
  )
259
251
 
260
252
 
253
+ @timeit
254
+ def load_inspector_finding_to_package_match_links(
255
+ neo4j_session: neo4j.Session,
256
+ finding_to_package_map: List[Dict[str, str]],
257
+ aws_update_tag: int,
258
+ current_aws_account_id: str,
259
+ ) -> None:
260
+ load_matchlinks(
261
+ neo4j_session,
262
+ InspectorFindingToPackageMatchLink(),
263
+ finding_to_package_map,
264
+ lastupdated=aws_update_tag,
265
+ _sub_resource_label="AWSAccount",
266
+ _sub_resource_id=current_aws_account_id,
267
+ )
268
+
269
+
261
270
  @timeit
262
271
  def cleanup(
263
272
  neo4j_session: neo4j.Session,
@@ -270,6 +279,14 @@ def cleanup(
270
279
  GraphJob.from_node_schema(AWSInspectorPackageSchema(), common_job_parameters).run(
271
280
  neo4j_session,
272
281
  )
282
+ GraphJob.from_matchlink(
283
+ InspectorFindingToPackageMatchLink(),
284
+ "AWSAccount",
285
+ common_job_parameters["ACCOUNT_ID"],
286
+ common_job_parameters["UPDATE_TAG"],
287
+ ).run(
288
+ neo4j_session,
289
+ )
273
290
 
274
291
 
275
292
  def _sync_findings_for_account(
@@ -288,7 +305,9 @@ def _sync_findings_for_account(
288
305
  logger.info(f"No findings to sync for account {account_id} in region {region}")
289
306
  return
290
307
  for f_batch in findings:
291
- finding_data, package_data = transform_inspector_findings(f_batch)
308
+ finding_data, package_data, finding_to_package_map = (
309
+ transform_inspector_findings(f_batch)
310
+ )
292
311
  logger.info(f"Loading {len(finding_data)} findings from account {account_id}")
293
312
  load_inspector_findings(
294
313
  neo4j_session,
@@ -301,7 +320,15 @@ def _sync_findings_for_account(
301
320
  load_inspector_packages(
302
321
  neo4j_session,
303
322
  package_data,
304
- region,
323
+ update_tag,
324
+ current_aws_account_id,
325
+ )
326
+ logger.info(
327
+ f"Loading {len(finding_to_package_map)} finding to package relationships"
328
+ )
329
+ load_inspector_finding_to_package_match_links(
330
+ neo4j_session,
331
+ finding_to_package_map,
305
332
  update_tag,
306
333
  current_aws_account_id,
307
334
  )
@@ -337,5 +364,7 @@ def sync(
337
364
  update_tag,
338
365
  current_aws_account_id,
339
366
  )
367
+ common_job_parameters["ACCOUNT_ID"] = current_aws_account_id
368
+ common_job_parameters["UPDATE_TAG"] = update_tag
340
369
 
341
370
  cleanup(neo4j_session, common_job_parameters)