cartography 0.108.0rc2__py3-none-any.whl → 0.109.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (63) hide show
  1. cartography/_version.py +2 -2
  2. cartography/data/indexes.cypher +0 -17
  3. cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
  4. cartography/intel/aws/cloudtrail_management_events.py +36 -3
  5. cartography/intel/aws/ecr.py +55 -80
  6. cartography/intel/aws/glue.py +117 -0
  7. cartography/intel/aws/identitycenter.py +71 -23
  8. cartography/intel/aws/kms.py +160 -200
  9. cartography/intel/aws/lambda_function.py +206 -190
  10. cartography/intel/aws/rds.py +243 -458
  11. cartography/intel/aws/resourcegroupstaggingapi.py +77 -18
  12. cartography/intel/aws/resources.py +2 -0
  13. cartography/intel/aws/route53.py +334 -332
  14. cartography/intel/aws/secretsmanager.py +62 -44
  15. cartography/intel/entra/groups.py +29 -1
  16. cartography/intel/gcp/__init__.py +10 -0
  17. cartography/intel/gcp/compute.py +19 -42
  18. cartography/models/aws/ecr/__init__.py +0 -0
  19. cartography/models/aws/ecr/image.py +41 -0
  20. cartography/models/aws/ecr/repository.py +72 -0
  21. cartography/models/aws/ecr/repository_image.py +95 -0
  22. cartography/models/aws/glue/__init__.py +0 -0
  23. cartography/models/aws/glue/connection.py +51 -0
  24. cartography/models/aws/identitycenter/awspermissionset.py +44 -0
  25. cartography/models/aws/kms/__init__.py +0 -0
  26. cartography/models/aws/kms/aliases.py +86 -0
  27. cartography/models/aws/kms/grants.py +65 -0
  28. cartography/models/aws/kms/keys.py +88 -0
  29. cartography/models/aws/lambda_function/__init__.py +0 -0
  30. cartography/models/aws/lambda_function/alias.py +74 -0
  31. cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
  32. cartography/models/aws/lambda_function/lambda_function.py +89 -0
  33. cartography/models/aws/lambda_function/layer.py +72 -0
  34. cartography/models/aws/rds/__init__.py +0 -0
  35. cartography/models/aws/rds/cluster.py +89 -0
  36. cartography/models/aws/rds/instance.py +154 -0
  37. cartography/models/aws/rds/snapshot.py +108 -0
  38. cartography/models/aws/rds/subnet_group.py +101 -0
  39. cartography/models/aws/route53/__init__.py +0 -0
  40. cartography/models/aws/route53/dnsrecord.py +214 -0
  41. cartography/models/aws/route53/nameserver.py +63 -0
  42. cartography/models/aws/route53/subzone.py +40 -0
  43. cartography/models/aws/route53/zone.py +47 -0
  44. cartography/models/aws/secretsmanager/secret.py +106 -0
  45. cartography/models/entra/group.py +26 -0
  46. cartography/models/entra/user.py +6 -0
  47. cartography/models/gcp/compute/__init__.py +0 -0
  48. cartography/models/gcp/compute/vpc.py +50 -0
  49. cartography/util.py +8 -1
  50. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc2.dist-info}/METADATA +2 -2
  51. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc2.dist-info}/RECORD +55 -34
  52. cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
  53. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
  54. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
  55. cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
  56. cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
  57. cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
  58. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  59. cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
  60. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc2.dist-info}/WHEEL +0 -0
  61. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc2.dist-info}/entry_points.txt +0 -0
  62. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc2.dist-info}/licenses/LICENSE +0 -0
  63. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc2.dist-info}/top_level.txt +0 -0
cartography/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.108.0rc2'
21
- __version_tuple__ = version_tuple = (0, 108, 0, 'rc2')
20
+ __version__ = version = '0.109.0rc2'
21
+ __version_tuple__ = version_tuple = (0, 109, 0, 'rc2')
@@ -29,14 +29,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:AWSIpv4CidrBlock) ON (n.id);
29
29
  CREATE INDEX IF NOT EXISTS FOR (n:AWSIpv4CidrBlock) ON (n.lastupdated);
30
30
  CREATE INDEX IF NOT EXISTS FOR (n:AWSIpv6CidrBlock) ON (n.id);
31
31
  CREATE INDEX IF NOT EXISTS FOR (n:AWSIpv6CidrBlock) ON (n.lastupdated);
32
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambda) ON (n.id);
33
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambda) ON (n.lastupdated);
34
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaEventSourceMapping) ON (n.id);
35
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaEventSourceMapping) ON (n.lastupdated);
36
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaFunctionAlias) ON (n.id);
37
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaFunctionAlias) ON (n.lastupdated);
38
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaLayer) ON (n.id);
39
- CREATE INDEX IF NOT EXISTS FOR (n:AWSLambdaLayer) ON (n.lastupdated);
40
32
  CREATE INDEX IF NOT EXISTS FOR (n:AWSPeeringConnection) ON (n.id);
41
33
  CREATE INDEX IF NOT EXISTS FOR (n:AWSPeeringConnection) ON (n.lastupdated);
42
34
  CREATE INDEX IF NOT EXISTS FOR (n:AWSPolicy) ON (n.id);
@@ -158,13 +150,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:IpRange) ON (n.id);
158
150
  CREATE INDEX IF NOT EXISTS FOR (n:IpRange) ON (n.lastupdated);
159
151
  CREATE INDEX IF NOT EXISTS FOR (n:JamfComputerGroup) ON (n.id);
160
152
  CREATE INDEX IF NOT EXISTS FOR (n:JamfComputerGroup) ON (n.lastupdated);
161
- CREATE INDEX IF NOT EXISTS FOR (n:KMSKey) ON (n.id);
162
- CREATE INDEX IF NOT EXISTS FOR (n:KMSKey) ON (n.arn);
163
- CREATE INDEX IF NOT EXISTS FOR (n:KMSKey) ON (n.lastupdated);
164
- CREATE INDEX IF NOT EXISTS FOR (n:KMSAlias) ON (n.id);
165
- CREATE INDEX IF NOT EXISTS FOR (n:KMSAlias) ON (n.lastupdated);
166
- CREATE INDEX IF NOT EXISTS FOR (n:KMSGrant) ON (n.id);
167
- CREATE INDEX IF NOT EXISTS FOR (n:KMSGrant) ON (n.lastupdated);
168
153
  CREATE INDEX IF NOT EXISTS FOR (n:LaunchConfiguration) ON (n.id);
169
154
  CREATE INDEX IF NOT EXISTS FOR (n:LaunchConfiguration) ON (n.name);
170
155
  CREATE INDEX IF NOT EXISTS FOR (n:LaunchConfiguration) ON (n.lastupdated);
@@ -259,8 +244,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.id);
259
244
  CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.name);
260
245
  CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.arn);
261
246
  CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.lastupdated);
262
- CREATE INDEX IF NOT EXISTS FOR (n:SecretsManagerSecret) ON (n.id);
263
- CREATE INDEX IF NOT EXISTS FOR (n:SecretsManagerSecret) ON (n.lastupdated);
264
247
  CREATE INDEX IF NOT EXISTS FOR (n:SecurityHub) ON (n.id);
265
248
  CREATE INDEX IF NOT EXISTS FOR (n:SecurityHub) ON (n.lastupdated);
266
249
  CREATE INDEX IF NOT EXISTS FOR (n:SpotlightVulnerability) ON (n.id);
@@ -1,17 +1,5 @@
1
1
  {
2
2
  "statements": [
3
- {
4
- "query": "MATCH (n:GCPVpc) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)",
5
- "iterative": true,
6
- "iterationsize": 100,
7
- "__comment__": "Delete GCP VPCs that no longer exist and detach them from all previously connected nodes."
8
- },
9
- {
10
- "query": "MATCH (:GCPVpc)<-[r:RESOURCE]-(:GCPProject) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
11
- "iterative": true,
12
- "iterationsize": 100,
13
- "__comment__": "Remove GCP VPC-to-Project relationships that are out of date."
14
- },
15
3
  {
16
4
  "query": "MATCH (:GCPInstance)-[r:MEMBER_OF_GCP_VPC]->(:GCPVpc) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
17
5
  "iterative": true,
@@ -381,13 +381,15 @@ def transform_web_identity_role_events_to_role_assumptions(
381
381
 
382
382
  # Only process GitHub Actions events
383
383
  if "token.actions.githubusercontent.com" in identity_provider:
384
- # GitHub repo fullname is directly in userName (e.g., "sublimagesec/sublimage")
385
- github_repo = user_identity.get("userName", "")
386
- if not github_repo:
384
+ # Extract GitHub repo fullname from userName format: "repo:{organization}/{repository}:{context}"
385
+ user_name = user_identity.get("userName", "")
386
+ if not user_name:
387
387
  logger.debug(
388
388
  f"Missing userName in GitHub WebIdentity event: {event.get('EventId', 'unknown')}"
389
389
  )
390
390
  continue
391
+
392
+ github_repo = _extract_github_repo_from_username(user_name)
391
393
  key = (github_repo, destination_principal)
392
394
 
393
395
  if key in github_aggregated:
@@ -572,6 +574,37 @@ def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
572
574
  return assumed_role_arn
573
575
 
574
576
 
577
+ def _extract_github_repo_from_username(user_name: str) -> str:
578
+ """
579
+ Extract GitHub repository fullname from CloudTrail userName field.
580
+
581
+ GitHub Actions CloudTrail events have userName in the format:
582
+ "repo:{organization}/{repository}:{context}"
583
+ """
584
+ if not user_name:
585
+ return ""
586
+
587
+ parts = user_name.split(":")
588
+
589
+ # Need at least 3 parts: ["repo", "{organization}/{repository}", "{context}"]
590
+ if len(parts) < 3 or parts[0] != "repo":
591
+ return ""
592
+
593
+ # Extract "{organization}/{repository}"
594
+ repo_fullname = parts[1]
595
+
596
+ # Validate it looks like "{organization}/{repository}" format
597
+ if repo_fullname.count("/") != 1:
598
+ return ""
599
+
600
+ # Ensure both organization and repo exist
601
+ owner, repo = repo_fullname.split("/")
602
+ if not owner or not repo:
603
+ return ""
604
+
605
+ return repo_fullname
606
+
607
+
575
608
  @timeit
576
609
  def cleanup(
577
610
  neo4j_session: neo4j.Session, current_aws_account_id: str, update_tag: int
@@ -6,9 +6,12 @@ from typing import List
6
6
  import boto3
7
7
  import neo4j
8
8
 
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.models.aws.ecr.image import ECRImageSchema
12
+ from cartography.models.aws.ecr.repository import ECRRepositorySchema
13
+ from cartography.models.aws.ecr.repository_image import ECRRepositoryImageSchema
9
14
  from cartography.util import aws_handle_regions
10
- from cartography.util import batch
11
- from cartography.util import run_cleanup_job
12
15
  from cartography.util import timeit
13
16
  from cartography.util import to_asynchronous
14
17
  from cartography.util import to_synchronous
@@ -74,33 +77,17 @@ def load_ecr_repositories(
74
77
  current_aws_account_id: str,
75
78
  aws_update_tag: int,
76
79
  ) -> None:
77
- query = """
78
- UNWIND $Repositories as ecr_repo
79
- MERGE (repo:ECRRepository{id: ecr_repo.repositoryArn})
80
- ON CREATE SET repo.firstseen = timestamp(),
81
- repo.arn = ecr_repo.repositoryArn,
82
- repo.name = ecr_repo.repositoryName,
83
- repo.region = $Region,
84
- repo.created_at = ecr_repo.createdAt
85
- SET repo.lastupdated = $aws_update_tag,
86
- repo.uri = ecr_repo.repositoryUri
87
- WITH repo
88
-
89
- MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
90
- MERGE (owner)-[r:RESOURCE]->(repo)
91
- ON CREATE SET r.firstseen = timestamp()
92
- SET r.lastupdated = $aws_update_tag
93
- """
94
80
  logger.info(
95
81
  f"Loading {len(repos)} ECR repositories for region {region} into graph.",
96
82
  )
97
- neo4j_session.run(
98
- query,
99
- Repositories=repos,
83
+ load(
84
+ neo4j_session,
85
+ ECRRepositorySchema(),
86
+ repos,
87
+ lastupdated=aws_update_tag,
100
88
  Region=region,
101
- aws_update_tag=aws_update_tag,
102
- AWS_ACCOUNT_ID=current_aws_account_id,
103
- ).consume() # See issue #440
89
+ AWS_ID=current_aws_account_id,
90
+ )
104
91
 
105
92
 
106
93
  @timeit
@@ -114,8 +101,13 @@ def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
114
101
  for repo_uri in sorted(repo_data.keys()):
115
102
  repo_images = repo_data[repo_uri]
116
103
  for img in repo_images:
117
- if "imageDigest" in img and img["imageDigest"]:
104
+ digest = img.get("imageDigest")
105
+ if digest:
106
+ tag = img.get("imageTag")
107
+ uri = repo_uri + (f":{tag}" if tag else "")
118
108
  img["repo_uri"] = repo_uri
109
+ img["uri"] = uri
110
+ img["id"] = uri
119
111
  repo_images_list.append(img)
120
112
  else:
121
113
  logger.warning(
@@ -127,74 +119,51 @@ def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
127
119
  return repo_images_list
128
120
 
129
121
 
130
- def _load_ecr_repo_img_tx(
131
- tx: neo4j.Transaction,
132
- repo_images_list: List[Dict],
133
- aws_update_tag: int,
134
- region: str,
135
- ) -> None:
136
- query = """
137
- UNWIND $RepoList as repo_img
138
- MERGE (ri:ECRRepositoryImage{id: repo_img.repo_uri + COALESCE(":" + repo_img.imageTag, '')})
139
- ON CREATE SET ri.firstseen = timestamp()
140
- SET ri.lastupdated = $aws_update_tag,
141
- ri.tag = repo_img.imageTag,
142
- ri.uri = repo_img.repo_uri + COALESCE(":" + repo_img.imageTag, ''),
143
- ri.image_size_bytes = repo_img.imageSizeInBytes,
144
- ri.image_pushed_at = repo_img.imagePushedAt,
145
- ri.image_manifest_media_type = repo_img.imageManifestMediaType,
146
- ri.artifact_media_type = repo_img.artifactMediaType,
147
- ri.last_recorded_pull_time = repo_img.lastRecordedPullTime
148
- WITH ri, repo_img
149
-
150
- MERGE (img:ECRImage{id: repo_img.imageDigest})
151
- ON CREATE SET img.firstseen = timestamp(),
152
- img.digest = repo_img.imageDigest
153
- SET img.lastupdated = $aws_update_tag,
154
- img.region = $Region
155
- WITH ri, img, repo_img
156
-
157
- MERGE (ri)-[r1:IMAGE]->(img)
158
- ON CREATE SET r1.firstseen = timestamp()
159
- SET r1.lastupdated = $aws_update_tag
160
- WITH ri, repo_img
161
-
162
- MATCH (repo:ECRRepository{uri: repo_img.repo_uri})
163
- MERGE (repo)-[r2:REPO_IMAGE]->(ri)
164
- ON CREATE SET r2.firstseen = timestamp()
165
- SET r2.lastupdated = $aws_update_tag
166
- """
167
- tx.run(
168
- query,
169
- RepoList=repo_images_list,
170
- Region=region,
171
- aws_update_tag=aws_update_tag,
172
- )
173
-
174
-
175
122
  @timeit
176
123
  def load_ecr_repository_images(
177
124
  neo4j_session: neo4j.Session,
178
125
  repo_images_list: List[Dict],
179
126
  region: str,
127
+ current_aws_account_id: str,
180
128
  aws_update_tag: int,
181
129
  ) -> None:
182
130
  logger.info(
183
131
  f"Loading {len(repo_images_list)} ECR repository images in {region} into graph.",
184
132
  )
185
- for repo_image_batch in batch(repo_images_list, size=10000):
186
- neo4j_session.write_transaction(
187
- _load_ecr_repo_img_tx,
188
- repo_image_batch,
189
- aws_update_tag,
190
- region,
191
- )
133
+ image_digests = {img["imageDigest"] for img in repo_images_list}
134
+ ecr_images = [{"imageDigest": d} for d in image_digests]
135
+
136
+ load(
137
+ neo4j_session,
138
+ ECRImageSchema(),
139
+ ecr_images,
140
+ lastupdated=aws_update_tag,
141
+ Region=region,
142
+ AWS_ID=current_aws_account_id,
143
+ )
144
+
145
+ load(
146
+ neo4j_session,
147
+ ECRRepositoryImageSchema(),
148
+ repo_images_list,
149
+ lastupdated=aws_update_tag,
150
+ Region=region,
151
+ AWS_ID=current_aws_account_id,
152
+ )
192
153
 
193
154
 
194
155
  @timeit
195
156
  def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
196
157
  logger.debug("Running ECR cleanup job.")
197
- run_cleanup_job("aws_import_ecr_cleanup.json", neo4j_session, common_job_parameters)
158
+ GraphJob.from_node_schema(ECRRepositorySchema(), common_job_parameters).run(
159
+ neo4j_session
160
+ )
161
+ GraphJob.from_node_schema(ECRRepositoryImageSchema(), common_job_parameters).run(
162
+ neo4j_session
163
+ )
164
+ GraphJob.from_node_schema(ECRImageSchema(), common_job_parameters).run(
165
+ neo4j_session
166
+ )
198
167
 
199
168
 
200
169
  def _get_image_data(
@@ -251,5 +220,11 @@ def sync(
251
220
  update_tag,
252
221
  )
253
222
  repo_images_list = transform_ecr_repository_images(image_data)
254
- load_ecr_repository_images(neo4j_session, repo_images_list, region, update_tag)
223
+ load_ecr_repository_images(
224
+ neo4j_session,
225
+ repo_images_list,
226
+ region,
227
+ current_aws_account_id,
228
+ update_tag,
229
+ )
255
230
  cleanup(neo4j_session, common_job_parameters)
@@ -0,0 +1,117 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import boto3
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.aws.ec2.util import get_botocore_config
12
+ from cartography.models.aws.glue.connection import GlueConnectionSchema
13
+ from cartography.util import aws_handle_regions
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ @aws_handle_regions
21
+ def get_glue_connections(
22
+ boto3_session: boto3.Session, region: str
23
+ ) -> List[Dict[str, Any]]:
24
+ client = boto3_session.client(
25
+ "glue", region_name=region, config=get_botocore_config()
26
+ )
27
+ paginator = client.get_paginator("get_connections")
28
+ connections = []
29
+ for page in paginator.paginate():
30
+ connections.extend(page.get("ConnectionList", []))
31
+
32
+ return connections
33
+
34
+
35
+ def transform_glue_connections(
36
+ connections: List[Dict[str, Any]], region: str
37
+ ) -> List[Dict[str, Any]]:
38
+ """
39
+ Transform Glue connection data for ingestion
40
+ """
41
+ transformed_connections = []
42
+ for connection in connections:
43
+ transformed_connection = {
44
+ "Name": connection["Name"],
45
+ "Description": connection.get("Description"),
46
+ "ConnectionType": connection.get("ConnectionType"),
47
+ "Status": connection.get("Status"),
48
+ "StatusReason": connection.get("StatusReason"),
49
+ "AuthenticationType": connection.get("AuthenticationConfiguration", {}).get(
50
+ "AuthenticationType"
51
+ ),
52
+ "SecretArn": connection.get("AuthenticationConfiguration", {}).get(
53
+ "SecretArn"
54
+ ),
55
+ "Region": region,
56
+ }
57
+ transformed_connections.append(transformed_connection)
58
+ return transformed_connections
59
+
60
+
61
+ @timeit
62
+ def load_glue_connections(
63
+ neo4j_session: neo4j.Session,
64
+ data: List[Dict[str, Any]],
65
+ region: str,
66
+ current_aws_account_id: str,
67
+ aws_update_tag: int,
68
+ ) -> None:
69
+ logger.info(
70
+ f"Loading Glue {len(data)} connections for region '{region}' into graph.",
71
+ )
72
+ load(
73
+ neo4j_session,
74
+ GlueConnectionSchema(),
75
+ data,
76
+ lastupdated=aws_update_tag,
77
+ Region=region,
78
+ AWS_ID=current_aws_account_id,
79
+ )
80
+
81
+
82
+ @timeit
83
+ def cleanup(
84
+ neo4j_session: neo4j.Session,
85
+ common_job_parameters: Dict[str, Any],
86
+ ) -> None:
87
+ logger.debug("Running Glue cleanup job.")
88
+ GraphJob.from_node_schema(GlueConnectionSchema(), common_job_parameters).run(
89
+ neo4j_session
90
+ )
91
+
92
+
93
+ @timeit
94
+ def sync(
95
+ neo4j_session: neo4j.Session,
96
+ boto3_session: boto3.session.Session,
97
+ regions: List[str],
98
+ current_aws_account_id: str,
99
+ update_tag: int,
100
+ common_job_parameters: Dict[str, Any],
101
+ ) -> None:
102
+ for region in regions:
103
+ logger.info(
104
+ f"Syncing Glue for region '{region}' in account '{current_aws_account_id}'.",
105
+ )
106
+
107
+ connections = get_glue_connections(boto3_session, region)
108
+ transformed_connections = transform_glue_connections(connections, region)
109
+ load_glue_connections(
110
+ neo4j_session,
111
+ transformed_connections,
112
+ region,
113
+ current_aws_account_id,
114
+ update_tag,
115
+ )
116
+
117
+ cleanup(neo4j_session, common_job_parameters)
@@ -7,6 +7,7 @@ import boto3
7
7
  import neo4j
8
8
 
9
9
  from cartography.client.core.tx import load
10
+ from cartography.client.core.tx import load_matchlinks
10
11
  from cartography.graph.job import GraphJob
11
12
  from cartography.models.aws.identitycenter.awsidentitycenter import (
12
13
  AWSIdentityCenterInstanceSchema,
@@ -14,9 +15,11 @@ from cartography.models.aws.identitycenter.awsidentitycenter import (
14
15
  from cartography.models.aws.identitycenter.awspermissionset import (
15
16
  AWSPermissionSetSchema,
16
17
  )
18
+ from cartography.models.aws.identitycenter.awspermissionset import (
19
+ RoleAssignmentAllowedByMatchLink,
20
+ )
17
21
  from cartography.models.aws.identitycenter.awsssouser import AWSSSOUserSchema
18
22
  from cartography.util import aws_handle_regions
19
- from cartography.util import run_cleanup_job
20
23
  from cartography.util import timeit
21
24
 
22
25
  logger = logging.getLogger(__name__)
@@ -120,6 +123,8 @@ def load_permission_sets(
120
123
  InstanceArn=instance_arn,
121
124
  Region=region,
122
125
  AWS_ID=aws_account_id,
126
+ _sub_resource_label="AWSAccount",
127
+ _sub_resource_id=aws_account_id,
123
128
  )
124
129
 
125
130
 
@@ -220,31 +225,64 @@ def get_role_assignments(
220
225
  return role_assignments
221
226
 
222
227
 
228
+ @timeit
229
+ def get_permset_roles(
230
+ neo4j_session: neo4j.Session,
231
+ role_assignments: List[Dict[str, Any]],
232
+ ) -> List[Dict[str, Any]]:
233
+ """
234
+ Enrich role assignments with exact role ARNs by querying existing permission set relationships.
235
+ Uses the ASSIGNED_TO_ROLE relationships created when permission sets were loaded.
236
+ """
237
+ # Get unique permission set ARNs from role assignments
238
+ permset_ids = list({ra["PermissionSetArn"] for ra in role_assignments})
239
+
240
+ query = """
241
+ MATCH (role:AWSRole)<-[:ASSIGNED_TO_ROLE]-(permset:AWSPermissionSet)
242
+ WHERE permset.arn IN $PermSetIds
243
+ RETURN permset.arn AS PermissionSetArn, role.arn AS RoleArn
244
+ """
245
+ result = neo4j_session.run(query, PermSetIds=permset_ids)
246
+ permset_to_role = [record.data() for record in result]
247
+
248
+ # Create mapping from permission set ARN to role ARN
249
+ permset_to_role_map = {
250
+ entry["PermissionSetArn"]: entry["RoleArn"] for entry in permset_to_role
251
+ }
252
+
253
+ # Enrich role assignments with exact role ARNs
254
+ enriched_assignments = []
255
+ for assignment in role_assignments:
256
+ role_arn = permset_to_role_map.get(assignment["PermissionSetArn"])
257
+ enriched_assignments.append(
258
+ {
259
+ **assignment,
260
+ "RoleArn": role_arn,
261
+ }
262
+ )
263
+
264
+ return enriched_assignments
265
+
266
+
223
267
  @timeit
224
268
  def load_role_assignments(
225
269
  neo4j_session: neo4j.Session,
226
270
  role_assignments: List[Dict],
271
+ aws_account_id: str,
227
272
  aws_update_tag: int,
228
273
  ) -> None:
229
274
  """
230
- Load role assignments into the graph
275
+ Load role assignments into the graph using MatchLink schema
231
276
  """
232
277
  logger.info(f"Loading {len(role_assignments)} role assignments")
233
- if role_assignments:
234
- neo4j_session.run(
235
- """
236
- UNWIND $role_assignments AS ra
237
- MATCH (acc:AWSAccount{id:ra.AccountId}) -[:RESOURCE]->
238
- (role:AWSRole)<-[:ASSIGNED_TO_ROLE]-
239
- (permset:AWSPermissionSet {id: ra.PermissionSetArn})
240
- MATCH (sso:AWSSSOUser {id: ra.UserId})
241
- MERGE (role)-[r:ALLOWED_BY]->(sso)
242
- SET r.lastupdated = $aws_update_tag,
243
- r.permission_set_arn = ra.PermissionSetArn
244
- """,
245
- role_assignments=role_assignments,
246
- aws_update_tag=aws_update_tag,
247
- )
278
+ load_matchlinks(
279
+ neo4j_session,
280
+ RoleAssignmentAllowedByMatchLink(),
281
+ role_assignments,
282
+ lastupdated=aws_update_tag,
283
+ _sub_resource_label="AWSAccount",
284
+ _sub_resource_id=aws_account_id,
285
+ )
248
286
 
249
287
 
250
288
  @timeit
@@ -262,11 +300,14 @@ def cleanup(
262
300
  GraphJob.from_node_schema(AWSSSOUserSchema(), common_job_parameters).run(
263
301
  neo4j_session,
264
302
  )
265
- run_cleanup_job(
266
- "aws_import_identity_center_cleanup.json",
267
- neo4j_session,
268
- common_job_parameters,
269
- )
303
+
304
+ # Clean up role assignment MatchLinks
305
+ GraphJob.from_matchlink(
306
+ RoleAssignmentAllowedByMatchLink(),
307
+ "AWSAccount",
308
+ common_job_parameters["AWS_ID"],
309
+ common_job_parameters["UPDATE_TAG"],
310
+ ).run(neo4j_session)
270
311
 
271
312
 
272
313
  @timeit
@@ -327,9 +368,16 @@ def sync_identity_center_instances(
327
368
  instance_arn,
328
369
  region,
329
370
  )
330
- load_role_assignments(
371
+
372
+ # Enrich role assignments with exact role ARNs using permission set relationships
373
+ enriched_role_assignments = get_permset_roles(
331
374
  neo4j_session,
332
375
  role_assignments,
376
+ )
377
+ load_role_assignments(
378
+ neo4j_session,
379
+ enriched_role_assignments,
380
+ current_aws_account_id,
333
381
  update_tag,
334
382
  )
335
383