cartography 0.108.0rc2__py3-none-any.whl → 0.109.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (26) hide show
  1. cartography/_version.py +2 -2
  2. cartography/data/indexes.cypher +0 -2
  3. cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
  4. cartography/intel/aws/cloudtrail_management_events.py +36 -3
  5. cartography/intel/aws/ecr.py +55 -80
  6. cartography/intel/aws/resourcegroupstaggingapi.py +77 -18
  7. cartography/intel/aws/secretsmanager.py +62 -44
  8. cartography/intel/entra/groups.py +29 -1
  9. cartography/intel/gcp/__init__.py +10 -0
  10. cartography/intel/gcp/compute.py +19 -42
  11. cartography/models/aws/ecr/__init__.py +0 -0
  12. cartography/models/aws/ecr/image.py +41 -0
  13. cartography/models/aws/ecr/repository.py +72 -0
  14. cartography/models/aws/ecr/repository_image.py +95 -0
  15. cartography/models/aws/secretsmanager/secret.py +106 -0
  16. cartography/models/entra/group.py +26 -0
  17. cartography/models/entra/user.py +6 -0
  18. cartography/models/gcp/compute/__init__.py +0 -0
  19. cartography/models/gcp/compute/vpc.py +50 -0
  20. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/METADATA +1 -1
  21. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/RECORD +25 -19
  22. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  23. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/WHEEL +0 -0
  24. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/entry_points.txt +0 -0
  25. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/licenses/LICENSE +0 -0
  26. {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/top_level.txt +0 -0
cartography/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.108.0rc2'
21
- __version_tuple__ = version_tuple = (0, 108, 0, 'rc2')
20
+ __version__ = version = '0.109.0rc1'
21
+ __version_tuple__ = version_tuple = (0, 109, 0, 'rc1')
@@ -259,8 +259,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.id);
259
259
  CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.name);
260
260
  CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.arn);
261
261
  CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.lastupdated);
262
- CREATE INDEX IF NOT EXISTS FOR (n:SecretsManagerSecret) ON (n.id);
263
- CREATE INDEX IF NOT EXISTS FOR (n:SecretsManagerSecret) ON (n.lastupdated);
264
262
  CREATE INDEX IF NOT EXISTS FOR (n:SecurityHub) ON (n.id);
265
263
  CREATE INDEX IF NOT EXISTS FOR (n:SecurityHub) ON (n.lastupdated);
266
264
  CREATE INDEX IF NOT EXISTS FOR (n:SpotlightVulnerability) ON (n.id);
@@ -1,17 +1,5 @@
1
1
  {
2
2
  "statements": [
3
- {
4
- "query": "MATCH (n:GCPVpc) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)",
5
- "iterative": true,
6
- "iterationsize": 100,
7
- "__comment__": "Delete GCP VPCs that no longer exist and detach them from all previously connected nodes."
8
- },
9
- {
10
- "query": "MATCH (:GCPVpc)<-[r:RESOURCE]-(:GCPProject) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
11
- "iterative": true,
12
- "iterationsize": 100,
13
- "__comment__": "Remove GCP VPC-to-Project relationships that are out of date."
14
- },
15
3
  {
16
4
  "query": "MATCH (:GCPInstance)-[r:MEMBER_OF_GCP_VPC]->(:GCPVpc) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
17
5
  "iterative": true,
@@ -381,13 +381,15 @@ def transform_web_identity_role_events_to_role_assumptions(
381
381
 
382
382
  # Only process GitHub Actions events
383
383
  if "token.actions.githubusercontent.com" in identity_provider:
384
- # GitHub repo fullname is directly in userName (e.g., "sublimagesec/sublimage")
385
- github_repo = user_identity.get("userName", "")
386
- if not github_repo:
384
+ # Extract GitHub repo fullname from userName format: "repo:{organization}/{repository}:{context}"
385
+ user_name = user_identity.get("userName", "")
386
+ if not user_name:
387
387
  logger.debug(
388
388
  f"Missing userName in GitHub WebIdentity event: {event.get('EventId', 'unknown')}"
389
389
  )
390
390
  continue
391
+
392
+ github_repo = _extract_github_repo_from_username(user_name)
391
393
  key = (github_repo, destination_principal)
392
394
 
393
395
  if key in github_aggregated:
@@ -572,6 +574,37 @@ def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
572
574
  return assumed_role_arn
573
575
 
574
576
 
577
+ def _extract_github_repo_from_username(user_name: str) -> str:
578
+ """
579
+ Extract GitHub repository fullname from CloudTrail userName field.
580
+
581
+ GitHub Actions CloudTrail events have userName in the format:
582
+ "repo:{organization}/{repository}:{context}"
583
+ """
584
+ if not user_name:
585
+ return ""
586
+
587
+ parts = user_name.split(":")
588
+
589
+ # Need at least 3 parts: ["repo", "{organization}/{repository}", "{context}"]
590
+ if len(parts) < 3 or parts[0] != "repo":
591
+ return ""
592
+
593
+ # Extract "{organization}/{repository}"
594
+ repo_fullname = parts[1]
595
+
596
+ # Validate it looks like "{organization}/{repository}" format
597
+ if repo_fullname.count("/") != 1:
598
+ return ""
599
+
600
+ # Ensure both organization and repo exist
601
+ owner, repo = repo_fullname.split("/")
602
+ if not owner or not repo:
603
+ return ""
604
+
605
+ return repo_fullname
606
+
607
+
575
608
  @timeit
576
609
  def cleanup(
577
610
  neo4j_session: neo4j.Session, current_aws_account_id: str, update_tag: int
@@ -6,9 +6,12 @@ from typing import List
6
6
  import boto3
7
7
  import neo4j
8
8
 
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.models.aws.ecr.image import ECRImageSchema
12
+ from cartography.models.aws.ecr.repository import ECRRepositorySchema
13
+ from cartography.models.aws.ecr.repository_image import ECRRepositoryImageSchema
9
14
  from cartography.util import aws_handle_regions
10
- from cartography.util import batch
11
- from cartography.util import run_cleanup_job
12
15
  from cartography.util import timeit
13
16
  from cartography.util import to_asynchronous
14
17
  from cartography.util import to_synchronous
@@ -74,33 +77,17 @@ def load_ecr_repositories(
74
77
  current_aws_account_id: str,
75
78
  aws_update_tag: int,
76
79
  ) -> None:
77
- query = """
78
- UNWIND $Repositories as ecr_repo
79
- MERGE (repo:ECRRepository{id: ecr_repo.repositoryArn})
80
- ON CREATE SET repo.firstseen = timestamp(),
81
- repo.arn = ecr_repo.repositoryArn,
82
- repo.name = ecr_repo.repositoryName,
83
- repo.region = $Region,
84
- repo.created_at = ecr_repo.createdAt
85
- SET repo.lastupdated = $aws_update_tag,
86
- repo.uri = ecr_repo.repositoryUri
87
- WITH repo
88
-
89
- MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
90
- MERGE (owner)-[r:RESOURCE]->(repo)
91
- ON CREATE SET r.firstseen = timestamp()
92
- SET r.lastupdated = $aws_update_tag
93
- """
94
80
  logger.info(
95
81
  f"Loading {len(repos)} ECR repositories for region {region} into graph.",
96
82
  )
97
- neo4j_session.run(
98
- query,
99
- Repositories=repos,
83
+ load(
84
+ neo4j_session,
85
+ ECRRepositorySchema(),
86
+ repos,
87
+ lastupdated=aws_update_tag,
100
88
  Region=region,
101
- aws_update_tag=aws_update_tag,
102
- AWS_ACCOUNT_ID=current_aws_account_id,
103
- ).consume() # See issue #440
89
+ AWS_ID=current_aws_account_id,
90
+ )
104
91
 
105
92
 
106
93
  @timeit
@@ -114,8 +101,13 @@ def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
114
101
  for repo_uri in sorted(repo_data.keys()):
115
102
  repo_images = repo_data[repo_uri]
116
103
  for img in repo_images:
117
- if "imageDigest" in img and img["imageDigest"]:
104
+ digest = img.get("imageDigest")
105
+ if digest:
106
+ tag = img.get("imageTag")
107
+ uri = repo_uri + (f":{tag}" if tag else "")
118
108
  img["repo_uri"] = repo_uri
109
+ img["uri"] = uri
110
+ img["id"] = uri
119
111
  repo_images_list.append(img)
120
112
  else:
121
113
  logger.warning(
@@ -127,74 +119,51 @@ def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
127
119
  return repo_images_list
128
120
 
129
121
 
130
- def _load_ecr_repo_img_tx(
131
- tx: neo4j.Transaction,
132
- repo_images_list: List[Dict],
133
- aws_update_tag: int,
134
- region: str,
135
- ) -> None:
136
- query = """
137
- UNWIND $RepoList as repo_img
138
- MERGE (ri:ECRRepositoryImage{id: repo_img.repo_uri + COALESCE(":" + repo_img.imageTag, '')})
139
- ON CREATE SET ri.firstseen = timestamp()
140
- SET ri.lastupdated = $aws_update_tag,
141
- ri.tag = repo_img.imageTag,
142
- ri.uri = repo_img.repo_uri + COALESCE(":" + repo_img.imageTag, ''),
143
- ri.image_size_bytes = repo_img.imageSizeInBytes,
144
- ri.image_pushed_at = repo_img.imagePushedAt,
145
- ri.image_manifest_media_type = repo_img.imageManifestMediaType,
146
- ri.artifact_media_type = repo_img.artifactMediaType,
147
- ri.last_recorded_pull_time = repo_img.lastRecordedPullTime
148
- WITH ri, repo_img
149
-
150
- MERGE (img:ECRImage{id: repo_img.imageDigest})
151
- ON CREATE SET img.firstseen = timestamp(),
152
- img.digest = repo_img.imageDigest
153
- SET img.lastupdated = $aws_update_tag,
154
- img.region = $Region
155
- WITH ri, img, repo_img
156
-
157
- MERGE (ri)-[r1:IMAGE]->(img)
158
- ON CREATE SET r1.firstseen = timestamp()
159
- SET r1.lastupdated = $aws_update_tag
160
- WITH ri, repo_img
161
-
162
- MATCH (repo:ECRRepository{uri: repo_img.repo_uri})
163
- MERGE (repo)-[r2:REPO_IMAGE]->(ri)
164
- ON CREATE SET r2.firstseen = timestamp()
165
- SET r2.lastupdated = $aws_update_tag
166
- """
167
- tx.run(
168
- query,
169
- RepoList=repo_images_list,
170
- Region=region,
171
- aws_update_tag=aws_update_tag,
172
- )
173
-
174
-
175
122
  @timeit
176
123
  def load_ecr_repository_images(
177
124
  neo4j_session: neo4j.Session,
178
125
  repo_images_list: List[Dict],
179
126
  region: str,
127
+ current_aws_account_id: str,
180
128
  aws_update_tag: int,
181
129
  ) -> None:
182
130
  logger.info(
183
131
  f"Loading {len(repo_images_list)} ECR repository images in {region} into graph.",
184
132
  )
185
- for repo_image_batch in batch(repo_images_list, size=10000):
186
- neo4j_session.write_transaction(
187
- _load_ecr_repo_img_tx,
188
- repo_image_batch,
189
- aws_update_tag,
190
- region,
191
- )
133
+ image_digests = {img["imageDigest"] for img in repo_images_list}
134
+ ecr_images = [{"imageDigest": d} for d in image_digests]
135
+
136
+ load(
137
+ neo4j_session,
138
+ ECRImageSchema(),
139
+ ecr_images,
140
+ lastupdated=aws_update_tag,
141
+ Region=region,
142
+ AWS_ID=current_aws_account_id,
143
+ )
144
+
145
+ load(
146
+ neo4j_session,
147
+ ECRRepositoryImageSchema(),
148
+ repo_images_list,
149
+ lastupdated=aws_update_tag,
150
+ Region=region,
151
+ AWS_ID=current_aws_account_id,
152
+ )
192
153
 
193
154
 
194
155
  @timeit
195
156
  def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
196
157
  logger.debug("Running ECR cleanup job.")
197
- run_cleanup_job("aws_import_ecr_cleanup.json", neo4j_session, common_job_parameters)
158
+ GraphJob.from_node_schema(ECRRepositorySchema(), common_job_parameters).run(
159
+ neo4j_session
160
+ )
161
+ GraphJob.from_node_schema(ECRRepositoryImageSchema(), common_job_parameters).run(
162
+ neo4j_session
163
+ )
164
+ GraphJob.from_node_schema(ECRImageSchema(), common_job_parameters).run(
165
+ neo4j_session
166
+ )
198
167
 
199
168
 
200
169
  def _get_image_data(
@@ -251,5 +220,11 @@ def sync(
251
220
  update_tag,
252
221
  )
253
222
  repo_images_list = transform_ecr_repository_images(image_data)
254
- load_ecr_repository_images(neo4j_session, repo_images_list, region, update_tag)
223
+ load_ecr_repository_images(
224
+ neo4j_session,
225
+ repo_images_list,
226
+ region,
227
+ current_aws_account_id,
228
+ update_tag,
229
+ )
255
230
  cleanup(neo4j_session, common_job_parameters)
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  from string import Template
3
+ from typing import Any
3
4
  from typing import Dict
4
5
  from typing import List
5
6
 
@@ -56,6 +57,35 @@ def get_short_id_from_lb2_arn(alb_arn: str) -> str:
56
57
  return alb_arn.split("/")[-2]
57
58
 
58
59
 
60
+ def get_resource_type_from_arn(arn: str) -> str:
61
+ """Return the resource type format expected by the Tagging API.
62
+
63
+ The Resource Groups Tagging API requires resource types in the form
64
+ ``service:resource``. Most ARNs embed the resource type in the fifth segment
65
+ after the service name. Load balancer ARNs add an extra ``app`` or ``net``
66
+ component that must be preserved. S3 and SQS ARNs only contain the service
67
+ name. This helper extracts the appropriate string so that ARNs can be
68
+ grouped correctly for API calls.
69
+ """
70
+
71
+ parts = arn.split(":", 5)
72
+ service = parts[2]
73
+ if service in {"s3", "sqs"}:
74
+ return service
75
+
76
+ resource = parts[5]
77
+ if service == "elasticloadbalancing" and resource.startswith("loadbalancer/"):
78
+ segments = resource.split("/")
79
+ if len(segments) > 2 and segments[1] in {"app", "net"}:
80
+ resource_type = f"{segments[0]}/{segments[1]}"
81
+ else:
82
+ resource_type = segments[0]
83
+ else:
84
+ resource_type = resource.split("/")[0].split(":")[0]
85
+
86
+ return f"{service}:{resource_type}" if resource_type else service
87
+
88
+
59
89
  # We maintain a mapping from AWS resource types to their associated labels and unique identifiers.
60
90
  # label: the node label used in cartography for this resource type
61
91
  # property: the field of this node that uniquely identified this resource type
@@ -158,27 +188,27 @@ TAG_RESOURCE_TYPE_MAPPINGS: Dict = {
158
188
  @aws_handle_regions
159
189
  def get_tags(
160
190
  boto3_session: boto3.session.Session,
161
- resource_type: str,
191
+ resource_types: list[str],
162
192
  region: str,
163
- ) -> List[Dict]:
164
- """
165
- Create boto3 client and retrieve tag data.
166
- """
167
- # this is a temporary workaround to populate AWS tags for IAM roles.
168
- # resourcegroupstaggingapi does not support IAM roles and no ETA is provided
169
- # TODO: when resourcegroupstaggingapi supports iam:role, remove this condition block
170
- if resource_type == "iam:role":
171
- return get_role_tags(boto3_session)
193
+ ) -> list[dict[str, Any]]:
194
+ """Retrieve tag data for the provided resource types."""
195
+ resources: list[dict[str, Any]] = []
196
+
197
+ if "iam:role" in resource_types:
198
+ resources.extend(get_role_tags(boto3_session))
199
+ resource_types = [rt for rt in resource_types if rt != "iam:role"]
200
+
201
+ if not resource_types:
202
+ return resources
172
203
 
173
204
  client = boto3_session.client("resourcegroupstaggingapi", region_name=region)
174
205
  paginator = client.get_paginator("get_resources")
175
- resources: List[Dict] = []
176
- for page in paginator.paginate(
177
- # Only ingest tags for resources that Cartography supports.
178
- # This is just a starting list; there may be others supported by this API.
179
- ResourceTypeFilters=[resource_type],
180
- ):
181
- resources.extend(page["ResourceTagMappingList"])
206
+
207
+ # Batch resource types into groups of 100
208
+ # (https://docs.aws.amazon.com/resourcegroupstagging/latest/APIReference/API_GetResources.html)
209
+ for resource_types_batch in batch(resource_types, size=100):
210
+ for page in paginator.paginate(ResourceTypeFilters=resource_types_batch):
211
+ resources.extend(page["ResourceTagMappingList"])
182
212
  return resources
183
213
 
184
214
 
@@ -210,6 +240,9 @@ def _load_tags_tx(
210
240
  r.firstseen = timestamp()
211
241
  """,
212
242
  )
243
+ if not tag_data:
244
+ return
245
+
213
246
  query = INGEST_TAG_TEMPLATE.safe_substitute(
214
247
  resource_label=TAG_RESOURCE_TYPE_MAPPINGS[resource_type]["label"],
215
248
  property=TAG_RESOURCE_TYPE_MAPPINGS[resource_type]["property"],
@@ -262,6 +295,26 @@ def compute_resource_id(tag_mapping: Dict, resource_type: str) -> str:
262
295
  return resource_id
263
296
 
264
297
 
298
+ def _group_tag_data_by_resource_type(
299
+ tag_data: List[Dict],
300
+ tag_resource_type_mappings: Dict,
301
+ ) -> Dict[str, List[Dict]]:
302
+ """Group raw tag data by the resource types Cartography supports."""
303
+
304
+ grouped: Dict[str, List[Dict]] = {rtype: [] for rtype in tag_resource_type_mappings}
305
+ for mapping in tag_data:
306
+ rtype = get_resource_type_from_arn(mapping["ResourceARN"])
307
+ if rtype in grouped:
308
+ grouped[rtype].append(mapping)
309
+ else:
310
+ logger.debug(
311
+ "Unknown tag resource type %s from ARN %s",
312
+ rtype,
313
+ mapping["ResourceARN"],
314
+ )
315
+ return grouped
316
+
317
+
265
318
  @timeit
266
319
  def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
267
320
  run_cleanup_job(
@@ -285,8 +338,14 @@ def sync(
285
338
  logger.info(
286
339
  f"Syncing AWS tags for account {current_aws_account_id} and region {region}",
287
340
  )
341
+ all_tag_data = get_tags(
342
+ boto3_session, list(tag_resource_type_mappings.keys()), region
343
+ )
344
+ grouped = _group_tag_data_by_resource_type(
345
+ all_tag_data, tag_resource_type_mappings
346
+ )
288
347
  for resource_type in tag_resource_type_mappings.keys():
289
- tag_data = get_tags(boto3_session, resource_type, region)
348
+ tag_data = grouped.get(resource_type, [])
290
349
  transform_tags(tag_data, resource_type) # type: ignore
291
350
  logger.info(
292
351
  f"Loading {len(tag_data)} tags for resource type {resource_type}",
@@ -7,6 +7,7 @@ import neo4j
7
7
 
8
8
  from cartography.client.core.tx import load
9
9
  from cartography.graph.job import GraphJob
10
+ from cartography.models.aws.secretsmanager.secret import SecretsManagerSecretSchema
10
11
  from cartography.models.aws.secretsmanager.secret_version import (
11
12
  SecretsManagerSecretVersionSchema,
12
13
  )
@@ -14,7 +15,6 @@ from cartography.stats import get_stats_client
14
15
  from cartography.util import aws_handle_regions
15
16
  from cartography.util import dict_date_to_epoch
16
17
  from cartography.util import merge_module_sync_metadata
17
- from cartography.util import run_cleanup_job
18
18
  from cartography.util import timeit
19
19
 
20
20
  logger = logging.getLogger(__name__)
@@ -32,6 +32,37 @@ def get_secret_list(boto3_session: boto3.session.Session, region: str) -> List[D
32
32
  return secrets
33
33
 
34
34
 
35
+ def transform_secrets(
36
+ secrets: List[Dict],
37
+ ) -> List[Dict]:
38
+ """
39
+ Transform AWS Secrets Manager Secrets to match the data model.
40
+ """
41
+ transformed_data = []
42
+ for secret in secrets:
43
+ # Start with a copy of the original secret data
44
+ transformed = dict(secret)
45
+
46
+ # Convert date fields to epoch timestamps
47
+ transformed["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
48
+ transformed["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
49
+ transformed["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
50
+ transformed["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
51
+ transformed["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
52
+
53
+ # Flatten nested RotationRules.AutomaticallyAfterDays property
54
+ if "RotationRules" in secret and secret["RotationRules"]:
55
+ rotation_rules = secret["RotationRules"]
56
+ if "AutomaticallyAfterDays" in rotation_rules:
57
+ transformed["RotationRulesAutomaticallyAfterDays"] = rotation_rules[
58
+ "AutomaticallyAfterDays"
59
+ ]
60
+
61
+ transformed_data.append(transformed)
62
+
63
+ return transformed_data
64
+
65
+
35
66
  @timeit
36
67
  def load_secrets(
37
68
  neo4j_session: neo4j.Session,
@@ -40,48 +71,33 @@ def load_secrets(
40
71
  current_aws_account_id: str,
41
72
  aws_update_tag: int,
42
73
  ) -> None:
43
- ingest_secrets = """
44
- UNWIND $Secrets as secret
45
- MERGE (s:SecretsManagerSecret{id: secret.ARN})
46
- ON CREATE SET s.firstseen = timestamp()
47
- SET s.name = secret.Name, s.arn = secret.ARN, s.description = secret.Description,
48
- s.kms_key_id = secret.KmsKeyId, s.rotation_enabled = secret.RotationEnabled,
49
- s.rotation_lambda_arn = secret.RotationLambdaARN,
50
- s.rotation_rules_automatically_after_days = secret.RotationRules.AutomaticallyAfterDays,
51
- s.last_rotated_date = secret.LastRotatedDate, s.last_changed_date = secret.LastChangedDate,
52
- s.last_accessed_date = secret.LastAccessedDate, s.deleted_date = secret.DeletedDate,
53
- s.owning_service = secret.OwningService, s.created_date = secret.CreatedDate,
54
- s.primary_region = secret.PrimaryRegion, s.region = $Region,
55
- s.lastupdated = $aws_update_tag
56
- WITH s
57
- MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
58
- MERGE (owner)-[r:RESOURCE]->(s)
59
- ON CREATE SET r.firstseen = timestamp()
60
- SET r.lastupdated = $aws_update_tag
61
- """
62
- for secret in data:
63
- secret["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
64
- secret["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
65
- secret["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
66
- secret["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
67
- secret["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
68
-
69
- neo4j_session.run(
70
- ingest_secrets,
71
- Secrets=data,
74
+ """
75
+ Load transformed secrets into Neo4j using the data model.
76
+ Expects data to already be transformed by transform_secrets().
77
+ """
78
+ logger.info(f"Loading {len(data)} Secrets for region {region} into graph.")
79
+
80
+ # Load using the schema-based approach
81
+ load(
82
+ neo4j_session,
83
+ SecretsManagerSecretSchema(),
84
+ data,
85
+ lastupdated=aws_update_tag,
72
86
  Region=region,
73
- AWS_ACCOUNT_ID=current_aws_account_id,
74
- aws_update_tag=aws_update_tag,
87
+ AWS_ID=current_aws_account_id,
75
88
  )
76
89
 
77
90
 
78
91
  @timeit
79
92
  def cleanup_secrets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
80
- run_cleanup_job(
81
- "aws_import_secrets_cleanup.json",
82
- neo4j_session,
83
- common_job_parameters,
93
+ """
94
+ Run Secrets cleanup job using the data model.
95
+ """
96
+ logger.debug("Running Secrets cleanup job.")
97
+ cleanup_job = GraphJob.from_node_schema(
98
+ SecretsManagerSecretSchema(), common_job_parameters
84
99
  )
100
+ cleanup_job.run(neo4j_session)
85
101
 
86
102
 
87
103
  @timeit
@@ -121,8 +137,6 @@ def get_secret_versions(
121
137
 
122
138
  def transform_secret_versions(
123
139
  versions: List[Dict],
124
- region: str,
125
- aws_account_id: str,
126
140
  ) -> List[Dict]:
127
141
  """
128
142
  Transform AWS Secrets Manager Secret Versions to match the data model.
@@ -203,7 +217,15 @@ def sync(
203
217
  )
204
218
  secrets = get_secret_list(boto3_session, region)
205
219
 
206
- load_secrets(neo4j_session, secrets, region, current_aws_account_id, update_tag)
220
+ transformed_secrets = transform_secrets(secrets)
221
+
222
+ load_secrets(
223
+ neo4j_session,
224
+ transformed_secrets,
225
+ region,
226
+ current_aws_account_id,
227
+ update_tag,
228
+ )
207
229
 
208
230
  all_versions = []
209
231
  for secret in secrets:
@@ -216,11 +238,7 @@ def sync(
216
238
  )
217
239
  all_versions.extend(versions)
218
240
 
219
- transformed_data = transform_secret_versions(
220
- all_versions,
221
- region,
222
- current_aws_account_id,
223
- )
241
+ transformed_data = transform_secret_versions(all_versions)
224
242
 
225
243
  load_secret_versions(
226
244
  neo4j_session,
@@ -59,10 +59,29 @@ async def get_group_members(
59
59
  return user_ids, group_ids
60
60
 
61
61
 
62
+ @timeit
63
+ async def get_group_owners(client: GraphServiceClient, group_id: str) -> list[str]:
64
+ """Get owner user IDs for a given group."""
65
+ owner_ids: list[str] = []
66
+ request_builder = client.groups.by_group_id(group_id).owners
67
+ page = await request_builder.get()
68
+ while page:
69
+ if page.value:
70
+ for obj in page.value:
71
+ odata_type = getattr(obj, "odata_type", "")
72
+ if odata_type == "#microsoft.graph.user":
73
+ owner_ids.append(obj.id)
74
+ if not page.odata_next_link:
75
+ break
76
+ page = await request_builder.with_url(page.odata_next_link).get()
77
+ return owner_ids
78
+
79
+
62
80
  def transform_groups(
63
81
  groups: list[Group],
64
82
  user_member_map: dict[str, list[str]],
65
83
  group_member_map: dict[str, list[str]],
84
+ group_owner_map: dict[str, list[str]],
66
85
  ) -> list[dict[str, Any]]:
67
86
  """Transform API responses into dictionaries for ingestion."""
68
87
  result: list[dict[str, Any]] = []
@@ -82,6 +101,7 @@ def transform_groups(
82
101
  "deleted_date_time": g.deleted_date_time,
83
102
  "member_ids": user_member_map.get(g.id, []),
84
103
  "member_group_ids": group_member_map.get(g.id, []),
104
+ "owner_ids": group_owner_map.get(g.id, []),
85
105
  }
86
106
  result.append(transformed)
87
107
  return result
@@ -134,6 +154,12 @@ async def sync_entra_groups(
134
154
 
135
155
  user_member_map: dict[str, list[str]] = {}
136
156
  group_member_map: dict[str, list[str]] = {}
157
+ group_owner_map: dict[str, list[str]] = {}
158
+
159
+ for group in groups:
160
+ owners = await get_group_owners(client, group.id)
161
+ group_owner_map[group.id] = owners
162
+
137
163
  for group in groups:
138
164
  try:
139
165
  users, subgroups = await get_group_members(client, group.id)
@@ -144,7 +170,9 @@ async def sync_entra_groups(
144
170
  user_member_map[group.id] = []
145
171
  group_member_map[group.id] = []
146
172
 
147
- transformed_groups = transform_groups(groups, user_member_map, group_member_map)
173
+ transformed_groups = transform_groups(
174
+ groups, user_member_map, group_member_map, group_owner_map
175
+ )
148
176
 
149
177
  load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
150
178
  load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)