cartography 0.109.0rc1__py3-none-any.whl → 0.110.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (58) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +22 -0
  3. cartography/config.py +13 -0
  4. cartography/data/indexes.cypher +0 -15
  5. cartography/intel/aws/cloudtrail_management_events.py +21 -0
  6. cartography/intel/aws/eventbridge.py +91 -0
  7. cartography/intel/aws/glue.py +117 -0
  8. cartography/intel/aws/identitycenter.py +71 -23
  9. cartography/intel/aws/kms.py +160 -200
  10. cartography/intel/aws/lambda_function.py +206 -190
  11. cartography/intel/aws/rds.py +243 -458
  12. cartography/intel/aws/resources.py +4 -0
  13. cartography/intel/aws/route53.py +334 -332
  14. cartography/intel/entra/__init__.py +43 -41
  15. cartography/intel/entra/applications.py +1 -2
  16. cartography/intel/entra/ou.py +1 -1
  17. cartography/intel/entra/resources.py +20 -0
  18. cartography/intel/trivy/__init__.py +73 -13
  19. cartography/intel/trivy/scanner.py +115 -92
  20. cartography/models/aws/eventbridge/__init__.py +0 -0
  21. cartography/models/aws/eventbridge/rule.py +77 -0
  22. cartography/models/aws/glue/__init__.py +0 -0
  23. cartography/models/aws/glue/connection.py +51 -0
  24. cartography/models/aws/identitycenter/awspermissionset.py +44 -0
  25. cartography/models/aws/kms/__init__.py +0 -0
  26. cartography/models/aws/kms/aliases.py +86 -0
  27. cartography/models/aws/kms/grants.py +65 -0
  28. cartography/models/aws/kms/keys.py +88 -0
  29. cartography/models/aws/lambda_function/__init__.py +0 -0
  30. cartography/models/aws/lambda_function/alias.py +74 -0
  31. cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
  32. cartography/models/aws/lambda_function/lambda_function.py +89 -0
  33. cartography/models/aws/lambda_function/layer.py +72 -0
  34. cartography/models/aws/rds/__init__.py +0 -0
  35. cartography/models/aws/rds/cluster.py +89 -0
  36. cartography/models/aws/rds/instance.py +154 -0
  37. cartography/models/aws/rds/snapshot.py +108 -0
  38. cartography/models/aws/rds/subnet_group.py +101 -0
  39. cartography/models/aws/route53/__init__.py +0 -0
  40. cartography/models/aws/route53/dnsrecord.py +214 -0
  41. cartography/models/aws/route53/nameserver.py +63 -0
  42. cartography/models/aws/route53/subzone.py +40 -0
  43. cartography/models/aws/route53/zone.py +47 -0
  44. cartography/models/snipeit/asset.py +1 -0
  45. cartography/util.py +8 -1
  46. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/METADATA +2 -2
  47. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/RECORD +51 -32
  48. cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
  49. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
  50. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
  51. cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
  52. cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
  53. cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
  54. cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
  55. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/WHEEL +0 -0
  56. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/entry_points.txt +0 -0
  57. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/licenses/LICENSE +0 -0
  58. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/top_level.txt +0 -0
cartography/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.109.0rc1'
21
- __version_tuple__ = version_tuple = (0, 109, 0, 'rc1')
20
+ __version__ = version = '0.110.0rc1'
21
+ __version_tuple__ = version_tuple = (0, 110, 0, 'rc1')
cartography/cli.py CHANGED
@@ -254,6 +254,14 @@ class CLI:
254
254
  "The name of environment variable containing Entra Client Secret for Service Principal Authentication."
255
255
  ),
256
256
  )
257
+ parser.add_argument(
258
+ "--entra-best-effort-mode",
259
+ action="store_true",
260
+ help=(
261
+ "Enable Entra ID sync best effort mode. This will allow cartography to continue "
262
+ "syncing other Entra ID entities and delay raising an exception until the end of the sync."
263
+ ),
264
+ )
257
265
  parser.add_argument(
258
266
  "--aws-requested-syncs",
259
267
  type=str,
@@ -700,6 +708,15 @@ class CLI:
700
708
  "Required if you are using the Trivy module. Ignored otherwise."
701
709
  ),
702
710
  )
711
+ parser.add_argument(
712
+ "--trivy-results-dir",
713
+ type=str,
714
+ default=None,
715
+ help=(
716
+ "Path to a directory containing Trivy JSON results on disk. "
717
+ "Required if you are using the Trivy module with local results."
718
+ ),
719
+ )
703
720
  parser.add_argument(
704
721
  "--scaleway-org",
705
722
  type=str,
@@ -1089,6 +1106,9 @@ class CLI:
1089
1106
  if config.trivy_s3_prefix:
1090
1107
  logger.debug(f"Trivy S3 prefix: {config.trivy_s3_prefix}")
1091
1108
 
1109
+ if config.trivy_results_dir:
1110
+ logger.debug(f"Trivy results dir: {config.trivy_results_dir}")
1111
+
1092
1112
  # Scaleway config
1093
1113
  if config.scaleway_secret_key_env_var:
1094
1114
  logger.debug(
@@ -1118,6 +1138,8 @@ class CLI:
1118
1138
  config.sentinelone_api_token = os.environ.get(
1119
1139
  config.sentinelone_api_token_env_var
1120
1140
  )
1141
+ else:
1142
+ config.sentinelone_api_token = None
1121
1143
 
1122
1144
  # Run cartography
1123
1145
  try:
cartography/config.py CHANGED
@@ -51,6 +51,9 @@ class Config:
51
51
  :param entra_client_id: Client Id for connecting in a Service Principal Authentication approach. Optional.
52
52
  :type entra_client_secret: str
53
53
  :param entra_client_secret: Client Secret for connecting in a Service Principal Authentication approach. Optional.
54
+ :type entra_best_effort_mode: bool
55
+ :param entra_best_effort_mode: If True, Entra ID sync will continue on errors and raise an aggregated
56
+ exception at the end of the sync. If False (default), exceptions will be raised immediately.
54
57
  :type aws_requested_syncs: str
55
58
  :param aws_requested_syncs: Comma-separated list of AWS resources to sync. Optional.
56
59
  :type aws_guardduty_severity_threshold: str
@@ -152,6 +155,8 @@ class Config:
152
155
  :param trivy_s3_bucket: The S3 bucket name containing Trivy scan results. Optional.
153
156
  :type trivy_s3_prefix: str
154
157
  :param trivy_s3_prefix: The S3 prefix path containing Trivy scan results. Optional.
158
+ :type trivy_results_dir: str
159
+ :param trivy_results_dir: Local directory containing Trivy scan results. Optional.
155
160
  :type scaleway_access_key: str
156
161
  :param scaleway_access_key: Scaleway access key. Optional.
157
162
  :type scaleway_secret_key: str
@@ -162,6 +167,8 @@ class Config:
162
167
  :param sentinelone_api_url: SentinelOne API URL. Optional.
163
168
  :type sentinelone_api_token: string
164
169
  :param sentinelone_api_token: SentinelOne API token for authentication. Optional.
170
+ :type sentinelone_api_token_env_var: string
171
+ :param sentinelone_api_token_env_var: The name of an environment variable containing the SentinelOne API token. Optional.
165
172
  :type sentinelone_account_ids: list[str]
166
173
  :param sentinelone_account_ids: List of SentinelOne account IDs to sync. Optional.
167
174
  """
@@ -187,6 +194,7 @@ class Config:
187
194
  entra_tenant_id=None,
188
195
  entra_client_id=None,
189
196
  entra_client_secret=None,
197
+ entra_best_effort_mode=False,
190
198
  aws_requested_syncs=None,
191
199
  aws_guardduty_severity_threshold=None,
192
200
  analysis_job_directory=None,
@@ -243,11 +251,13 @@ class Config:
243
251
  airbyte_api_url=None,
244
252
  trivy_s3_bucket=None,
245
253
  trivy_s3_prefix=None,
254
+ trivy_results_dir=None,
246
255
  scaleway_access_key=None,
247
256
  scaleway_secret_key=None,
248
257
  scaleway_org=None,
249
258
  sentinelone_api_url=None,
250
259
  sentinelone_api_token=None,
260
+ sentinelone_api_token_env_var=None,
251
261
  sentinelone_account_ids=None,
252
262
  ):
253
263
  self.neo4j_uri = neo4j_uri
@@ -271,6 +281,7 @@ class Config:
271
281
  self.entra_tenant_id = entra_tenant_id
272
282
  self.entra_client_id = entra_client_id
273
283
  self.entra_client_secret = entra_client_secret
284
+ self.entra_best_effort_mode = entra_best_effort_mode
274
285
  self.aws_requested_syncs = aws_requested_syncs
275
286
  self.aws_guardduty_severity_threshold = aws_guardduty_severity_threshold
276
287
  self.analysis_job_directory = analysis_job_directory
@@ -327,9 +338,11 @@ class Config:
327
338
  self.airbyte_api_url = airbyte_api_url
328
339
  self.trivy_s3_bucket = trivy_s3_bucket
329
340
  self.trivy_s3_prefix = trivy_s3_prefix
341
+ self.trivy_results_dir = trivy_results_dir
330
342
  self.scaleway_access_key = scaleway_access_key
331
343
  self.scaleway_secret_key = scaleway_secret_key
332
344
  self.scaleway_org = scaleway_org
333
345
  self.sentinelone_api_url = sentinelone_api_url
334
346
  self.sentinelone_api_token = sentinelone_api_token
347
+ self.sentinelone_api_token_env_var = sentinelone_api_token_env_var
335
348
  self.sentinelone_account_ids = sentinelone_account_ids
@@ -29,14 +29,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:AWSIpv4CidrBlock) ON (n.id);
29
29
  CREATE INDEX IF NOT EXISTS FOR (n:AWSIpv4CidrBlock) ON (n.lastupdated);
30
30
  CREATE INDEX IF NOT EXISTS FOR (n:AWSIpv6CidrBlock) ON (n.id);
31
31
  CREATE INDEX IF NOT EXISTS FOR (n:AWSIpv6CidrBlock) ON (n.lastupdated);
32
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambda) ON (n.id);
33
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambda) ON (n.lastupdated);
34
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaEventSourceMapping) ON (n.id);
35
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaEventSourceMapping) ON (n.lastupdated);
36
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaFunctionAlias) ON (n.id);
37
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaFunctionAlias) ON (n.lastupdated);
38
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaLayer) ON (n.id);
39
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaLayer) ON (n.lastupdated);
40
32
  CREATE INDEX IF NOT EXISTS FOR (n:AWSPeeringConnection) ON (n.id);
41
33
  CREATE INDEX IF NOT EXISTS FOR (n:AWSPeeringConnection) ON (n.lastupdated);
42
34
  CREATE INDEX IF NOT EXISTS FOR (n:AWSPolicy) ON (n.id);
@@ -158,13 +150,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:IpRange) ON (n.id);
158
150
  CREATE INDEX IF NOT EXISTS FOR (n:IpRange) ON (n.lastupdated);
159
151
  CREATE INDEX IF NOT EXISTS FOR (n:JamfComputerGroup) ON (n.id);
160
152
  CREATE INDEX IF NOT EXISTS FOR (n:JamfComputerGroup) ON (n.lastupdated);
161
- CREATE INDEX IF NOT EXISTS FOR (n:KMSKey) ON (n.id);
162
- CREATE INDEX IF NOT EXISTS FOR (n:KMSKey) ON (n.arn);
163
- CREATE INDEX IF NOT EXISTS FOR (n:KMSKey) ON (n.lastupdated);
164
- CREATE INDEX IF NOT EXISTS FOR (n:KMSAlias) ON (n.id);
165
- CREATE INDEX IF NOT EXISTS FOR (n:KMSAlias) ON (n.lastupdated);
166
- CREATE INDEX IF NOT EXISTS FOR (n:KMSGrant) ON (n.id);
167
- CREATE INDEX IF NOT EXISTS FOR (n:KMSGrant) ON (n.lastupdated);
168
153
  CREATE INDEX IF NOT EXISTS FOR (n:LaunchConfiguration) ON (n.id);
169
154
  CREATE INDEX IF NOT EXISTS FOR (n:LaunchConfiguration) ON (n.name);
170
155
  CREATE INDEX IF NOT EXISTS FOR (n:LaunchConfiguration) ON (n.lastupdated);
@@ -223,6 +223,13 @@ def transform_assume_role_events_to_role_assumptions(
223
223
 
224
224
  cloudtrail_event = json.loads(event["CloudTrailEvent"])
225
225
 
226
+ # Skip events with null requestParameters since we can't extract roleArn
227
+ if not cloudtrail_event.get("requestParameters"):
228
+ logger.debug(
229
+ f"Skipping CloudTrail AssumeRole event due to missing requestParameters. Event: {event.get('EventId', 'unknown')}"
230
+ )
231
+ continue
232
+
226
233
  if cloudtrail_event.get("userIdentity", {}).get("arn"):
227
234
  source_principal = cloudtrail_event["userIdentity"]["arn"]
228
235
  destination_principal = cloudtrail_event["requestParameters"]["roleArn"]
@@ -298,6 +305,13 @@ def transform_saml_role_events_to_role_assumptions(
298
305
 
299
306
  cloudtrail_event = json.loads(event["CloudTrailEvent"])
300
307
 
308
+ # Skip events with null requestParameters since we can't extract roleArn
309
+ if not cloudtrail_event.get("requestParameters"):
310
+ logger.debug(
311
+ f"Skipping CloudTrail AssumeRoleWithSAML event due to missing requestParameters. Event: {event.get('EventId', 'unknown')}"
312
+ )
313
+ continue
314
+
301
315
  response_elements = cloudtrail_event.get("responseElements", {})
302
316
  assumed_role_user = response_elements.get("assumedRoleUser", {})
303
317
 
@@ -370,6 +384,13 @@ def transform_web_identity_role_events_to_role_assumptions(
370
384
 
371
385
  cloudtrail_event = json.loads(event["CloudTrailEvent"])
372
386
 
387
+ # Skip events with null requestParameters since we can't extract roleArn
388
+ if not cloudtrail_event.get("requestParameters"):
389
+ logger.debug(
390
+ f"Skipping CloudTrail AssumeRoleWithWebIdentity event due to missing requestParameters. Event: {event.get('EventId', 'unknown')}"
391
+ )
392
+ continue
393
+
373
394
  user_identity = cloudtrail_event.get("userIdentity", {})
374
395
 
375
396
  if user_identity.get("type") == "WebIdentityUser" and user_identity.get(
@@ -0,0 +1,91 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import boto3
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.aws.ec2.util import get_botocore_config
12
+ from cartography.models.aws.eventbridge.rule import EventBridgeRuleSchema
13
+ from cartography.util import aws_handle_regions
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ @aws_handle_regions
21
+ def get_eventbridge_rules(
22
+ boto3_session: boto3.Session, region: str
23
+ ) -> List[Dict[str, Any]]:
24
+ client = boto3_session.client(
25
+ "events", region_name=region, config=get_botocore_config()
26
+ )
27
+ paginator = client.get_paginator("list_rules")
28
+ rules = []
29
+
30
+ for page in paginator.paginate():
31
+ rules.extend(page.get("Rules", []))
32
+
33
+ return rules
34
+
35
+
36
+ @timeit
37
+ def load_eventbridge_rules(
38
+ neo4j_session: neo4j.Session,
39
+ data: List[Dict[str, Any]],
40
+ region: str,
41
+ current_aws_account_id: str,
42
+ aws_update_tag: int,
43
+ ) -> None:
44
+ logger.info(
45
+ f"Loading EventBridge {len(data)} rules for region '{region}' into graph.",
46
+ )
47
+ load(
48
+ neo4j_session,
49
+ EventBridgeRuleSchema(),
50
+ data,
51
+ lastupdated=aws_update_tag,
52
+ Region=region,
53
+ AWS_ID=current_aws_account_id,
54
+ )
55
+
56
+
57
+ @timeit
58
+ def cleanup(
59
+ neo4j_session: neo4j.Session,
60
+ common_job_parameters: Dict[str, Any],
61
+ ) -> None:
62
+ logger.debug("Running EventBridge cleanup job.")
63
+ GraphJob.from_node_schema(EventBridgeRuleSchema(), common_job_parameters).run(
64
+ neo4j_session
65
+ )
66
+
67
+
68
+ @timeit
69
+ def sync(
70
+ neo4j_session: neo4j.Session,
71
+ boto3_session: boto3.session.Session,
72
+ regions: List[str],
73
+ current_aws_account_id: str,
74
+ update_tag: int,
75
+ common_job_parameters: Dict[str, Any],
76
+ ) -> None:
77
+ for region in regions:
78
+ logger.info(
79
+ f"Syncing EventBridge for region '{region}' in account '{current_aws_account_id}'.",
80
+ )
81
+
82
+ rules = get_eventbridge_rules(boto3_session, region)
83
+ load_eventbridge_rules(
84
+ neo4j_session,
85
+ rules,
86
+ region,
87
+ current_aws_account_id,
88
+ update_tag,
89
+ )
90
+
91
+ cleanup(neo4j_session, common_job_parameters)
@@ -0,0 +1,117 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import boto3
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.aws.ec2.util import get_botocore_config
12
+ from cartography.models.aws.glue.connection import GlueConnectionSchema
13
+ from cartography.util import aws_handle_regions
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ @aws_handle_regions
21
+ def get_glue_connections(
22
+ boto3_session: boto3.Session, region: str
23
+ ) -> List[Dict[str, Any]]:
24
+ client = boto3_session.client(
25
+ "glue", region_name=region, config=get_botocore_config()
26
+ )
27
+ paginator = client.get_paginator("get_connections")
28
+ connections = []
29
+ for page in paginator.paginate():
30
+ connections.extend(page.get("ConnectionList", []))
31
+
32
+ return connections
33
+
34
+
35
+ def transform_glue_connections(
36
+ connections: List[Dict[str, Any]], region: str
37
+ ) -> List[Dict[str, Any]]:
38
+ """
39
+ Transform Glue connection data for ingestion
40
+ """
41
+ transformed_connections = []
42
+ for connection in connections:
43
+ transformed_connection = {
44
+ "Name": connection["Name"],
45
+ "Description": connection.get("Description"),
46
+ "ConnectionType": connection.get("ConnectionType"),
47
+ "Status": connection.get("Status"),
48
+ "StatusReason": connection.get("StatusReason"),
49
+ "AuthenticationType": connection.get("AuthenticationConfiguration", {}).get(
50
+ "AuthenticationType"
51
+ ),
52
+ "SecretArn": connection.get("AuthenticationConfiguration", {}).get(
53
+ "SecretArn"
54
+ ),
55
+ "Region": region,
56
+ }
57
+ transformed_connections.append(transformed_connection)
58
+ return transformed_connections
59
+
60
+
61
+ @timeit
62
+ def load_glue_connections(
63
+ neo4j_session: neo4j.Session,
64
+ data: List[Dict[str, Any]],
65
+ region: str,
66
+ current_aws_account_id: str,
67
+ aws_update_tag: int,
68
+ ) -> None:
69
+ logger.info(
70
+ f"Loading Glue {len(data)} connections for region '{region}' into graph.",
71
+ )
72
+ load(
73
+ neo4j_session,
74
+ GlueConnectionSchema(),
75
+ data,
76
+ lastupdated=aws_update_tag,
77
+ Region=region,
78
+ AWS_ID=current_aws_account_id,
79
+ )
80
+
81
+
82
+ @timeit
83
+ def cleanup(
84
+ neo4j_session: neo4j.Session,
85
+ common_job_parameters: Dict[str, Any],
86
+ ) -> None:
87
+ logger.debug("Running Glue cleanup job.")
88
+ GraphJob.from_node_schema(GlueConnectionSchema(), common_job_parameters).run(
89
+ neo4j_session
90
+ )
91
+
92
+
93
+ @timeit
94
+ def sync(
95
+ neo4j_session: neo4j.Session,
96
+ boto3_session: boto3.session.Session,
97
+ regions: List[str],
98
+ current_aws_account_id: str,
99
+ update_tag: int,
100
+ common_job_parameters: Dict[str, Any],
101
+ ) -> None:
102
+ for region in regions:
103
+ logger.info(
104
+ f"Syncing Glue for region '{region}' in account '{current_aws_account_id}'.",
105
+ )
106
+
107
+ connections = get_glue_connections(boto3_session, region)
108
+ transformed_connections = transform_glue_connections(connections, region)
109
+ load_glue_connections(
110
+ neo4j_session,
111
+ transformed_connections,
112
+ region,
113
+ current_aws_account_id,
114
+ update_tag,
115
+ )
116
+
117
+ cleanup(neo4j_session, common_job_parameters)
@@ -7,6 +7,7 @@ import boto3
7
7
  import neo4j
8
8
 
9
9
  from cartography.client.core.tx import load
10
+ from cartography.client.core.tx import load_matchlinks
10
11
  from cartography.graph.job import GraphJob
11
12
  from cartography.models.aws.identitycenter.awsidentitycenter import (
12
13
  AWSIdentityCenterInstanceSchema,
@@ -14,9 +15,11 @@ from cartography.models.aws.identitycenter.awsidentitycenter import (
14
15
  from cartography.models.aws.identitycenter.awspermissionset import (
15
16
  AWSPermissionSetSchema,
16
17
  )
18
+ from cartography.models.aws.identitycenter.awspermissionset import (
19
+ RoleAssignmentAllowedByMatchLink,
20
+ )
17
21
  from cartography.models.aws.identitycenter.awsssouser import AWSSSOUserSchema
18
22
  from cartography.util import aws_handle_regions
19
- from cartography.util import run_cleanup_job
20
23
  from cartography.util import timeit
21
24
 
22
25
  logger = logging.getLogger(__name__)
@@ -120,6 +123,8 @@ def load_permission_sets(
120
123
  InstanceArn=instance_arn,
121
124
  Region=region,
122
125
  AWS_ID=aws_account_id,
126
+ _sub_resource_label="AWSAccount",
127
+ _sub_resource_id=aws_account_id,
123
128
  )
124
129
 
125
130
 
@@ -220,31 +225,64 @@ def get_role_assignments(
220
225
  return role_assignments
221
226
 
222
227
 
228
+ @timeit
229
+ def get_permset_roles(
230
+ neo4j_session: neo4j.Session,
231
+ role_assignments: List[Dict[str, Any]],
232
+ ) -> List[Dict[str, Any]]:
233
+ """
234
+ Enrich role assignments with exact role ARNs by querying existing permission set relationships.
235
+ Uses the ASSIGNED_TO_ROLE relationships created when permission sets were loaded.
236
+ """
237
+ # Get unique permission set ARNs from role assignments
238
+ permset_ids = list({ra["PermissionSetArn"] for ra in role_assignments})
239
+
240
+ query = """
241
+ MATCH (role:AWSRole)<-[:ASSIGNED_TO_ROLE]-(permset:AWSPermissionSet)
242
+ WHERE permset.arn IN $PermSetIds
243
+ RETURN permset.arn AS PermissionSetArn, role.arn AS RoleArn
244
+ """
245
+ result = neo4j_session.run(query, PermSetIds=permset_ids)
246
+ permset_to_role = [record.data() for record in result]
247
+
248
+ # Create mapping from permission set ARN to role ARN
249
+ permset_to_role_map = {
250
+ entry["PermissionSetArn"]: entry["RoleArn"] for entry in permset_to_role
251
+ }
252
+
253
+ # Enrich role assignments with exact role ARNs
254
+ enriched_assignments = []
255
+ for assignment in role_assignments:
256
+ role_arn = permset_to_role_map.get(assignment["PermissionSetArn"])
257
+ enriched_assignments.append(
258
+ {
259
+ **assignment,
260
+ "RoleArn": role_arn,
261
+ }
262
+ )
263
+
264
+ return enriched_assignments
265
+
266
+
223
267
  @timeit
224
268
  def load_role_assignments(
225
269
  neo4j_session: neo4j.Session,
226
270
  role_assignments: List[Dict],
271
+ aws_account_id: str,
227
272
  aws_update_tag: int,
228
273
  ) -> None:
229
274
  """
230
- Load role assignments into the graph
275
+ Load role assignments into the graph using MatchLink schema
231
276
  """
232
277
  logger.info(f"Loading {len(role_assignments)} role assignments")
233
- if role_assignments:
234
- neo4j_session.run(
235
- """
236
- UNWIND $role_assignments AS ra
237
- MATCH (acc:AWSAccount{id:ra.AccountId}) -[:RESOURCE]->
238
- (role:AWSRole)<-[:ASSIGNED_TO_ROLE]-
239
- (permset:AWSPermissionSet {id: ra.PermissionSetArn})
240
- MATCH (sso:AWSSSOUser {id: ra.UserId})
241
- MERGE (role)-[r:ALLOWED_BY]->(sso)
242
- SET r.lastupdated = $aws_update_tag,
243
- r.permission_set_arn = ra.PermissionSetArn
244
- """,
245
- role_assignments=role_assignments,
246
- aws_update_tag=aws_update_tag,
247
- )
278
+ load_matchlinks(
279
+ neo4j_session,
280
+ RoleAssignmentAllowedByMatchLink(),
281
+ role_assignments,
282
+ lastupdated=aws_update_tag,
283
+ _sub_resource_label="AWSAccount",
284
+ _sub_resource_id=aws_account_id,
285
+ )
248
286
 
249
287
 
250
288
  @timeit
@@ -262,11 +300,14 @@ def cleanup(
262
300
  GraphJob.from_node_schema(AWSSSOUserSchema(), common_job_parameters).run(
263
301
  neo4j_session,
264
302
  )
265
- run_cleanup_job(
266
- "aws_import_identity_center_cleanup.json",
267
- neo4j_session,
268
- common_job_parameters,
269
- )
303
+
304
+ # Clean up role assignment MatchLinks
305
+ GraphJob.from_matchlink(
306
+ RoleAssignmentAllowedByMatchLink(),
307
+ "AWSAccount",
308
+ common_job_parameters["AWS_ID"],
309
+ common_job_parameters["UPDATE_TAG"],
310
+ ).run(neo4j_session)
270
311
 
271
312
 
272
313
  @timeit
@@ -327,9 +368,16 @@ def sync_identity_center_instances(
327
368
  instance_arn,
328
369
  region,
329
370
  )
330
- load_role_assignments(
371
+
372
+ # Enrich role assignments with exact role ARNs using permission set relationships
373
+ enriched_role_assignments = get_permset_roles(
331
374
  neo4j_session,
332
375
  role_assignments,
376
+ )
377
+ load_role_assignments(
378
+ neo4j_session,
379
+ enriched_role_assignments,
380
+ current_aws_account_id,
333
381
  update_tag,
334
382
  )
335
383