cartography 0.108.0rc1__py3-none-any.whl → 0.109.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (37) hide show
  1. cartography/_version.py +2 -2
  2. cartography/data/indexes.cypher +0 -2
  3. cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
  4. cartography/intel/aws/cloudtrail.py +17 -4
  5. cartography/intel/aws/cloudtrail_management_events.py +593 -16
  6. cartography/intel/aws/cloudwatch.py +73 -4
  7. cartography/intel/aws/ec2/subnets.py +37 -63
  8. cartography/intel/aws/ecr.py +55 -80
  9. cartography/intel/aws/elasticache.py +102 -79
  10. cartography/intel/aws/resourcegroupstaggingapi.py +77 -18
  11. cartography/intel/aws/secretsmanager.py +62 -44
  12. cartography/intel/entra/groups.py +29 -1
  13. cartography/intel/gcp/__init__.py +10 -0
  14. cartography/intel/gcp/compute.py +19 -42
  15. cartography/models/aws/cloudtrail/management_events.py +95 -6
  16. cartography/models/aws/cloudtrail/trail.py +21 -0
  17. cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
  18. cartography/models/aws/ec2/subnets.py +65 -0
  19. cartography/models/aws/ecr/__init__.py +0 -0
  20. cartography/models/aws/ecr/image.py +41 -0
  21. cartography/models/aws/ecr/repository.py +72 -0
  22. cartography/models/aws/ecr/repository_image.py +95 -0
  23. cartography/models/aws/elasticache/__init__.py +0 -0
  24. cartography/models/aws/elasticache/cluster.py +65 -0
  25. cartography/models/aws/elasticache/topic.py +67 -0
  26. cartography/models/aws/secretsmanager/secret.py +106 -0
  27. cartography/models/entra/group.py +26 -0
  28. cartography/models/entra/user.py +6 -0
  29. cartography/models/gcp/compute/__init__.py +0 -0
  30. cartography/models/gcp/compute/vpc.py +50 -0
  31. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/METADATA +1 -1
  32. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/RECORD +36 -25
  33. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  34. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/WHEEL +0 -0
  35. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/entry_points.txt +0 -0
  36. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/licenses/LICENSE +0 -0
  37. {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,118 +1,132 @@
1
1
  import logging
2
- from typing import Dict
3
- from typing import List
4
- from typing import Set
2
+ from typing import Any
5
3
 
6
4
  import boto3
7
5
  import neo4j
8
6
 
7
+ from cartography.client.core.tx import load
8
+ from cartography.graph.job import GraphJob
9
+ from cartography.models.aws.elasticache.cluster import ElasticacheClusterSchema
10
+ from cartography.models.aws.elasticache.topic import ElasticacheTopicSchema
9
11
  from cartography.stats import get_stats_client
10
12
  from cartography.util import aws_handle_regions
11
13
  from cartography.util import merge_module_sync_metadata
12
- from cartography.util import run_cleanup_job
13
14
  from cartography.util import timeit
14
15
 
15
16
  logger = logging.getLogger(__name__)
16
17
  stat_handler = get_stats_client(__name__)
17
18
 
18
19
 
19
- def _get_topic(cluster: Dict) -> Dict:
20
- return cluster["NotificationConfiguration"]
21
-
22
-
23
- def transform_elasticache_topics(cluster_data: List[Dict]) -> List[Dict]:
24
- """
25
- Collect unique TopicArns from the cluster data
26
- """
27
- seen: Set[str] = set()
28
- topics: List[Dict] = []
29
- for cluster in cluster_data:
30
- topic = _get_topic(cluster)
31
- topic_arn = topic["TopicArn"]
32
- if topic_arn not in seen:
33
- seen.add(topic_arn)
34
- topics.append(topic)
35
- return topics
36
-
37
-
38
20
  @timeit
39
21
  @aws_handle_regions
40
22
  def get_elasticache_clusters(
41
23
  boto3_session: boto3.session.Session,
42
24
  region: str,
43
- ) -> List[Dict]:
44
- logger.debug(f"Getting ElastiCache Clusters in region '{region}'.")
25
+ ) -> list[dict[str, Any]]:
45
26
  client = boto3_session.client("elasticache", region_name=region)
46
27
  paginator = client.get_paginator("describe_cache_clusters")
47
- clusters: List[Dict] = []
28
+ clusters: list[dict[str, Any]] = []
48
29
  for page in paginator.paginate():
49
- clusters.extend(page["CacheClusters"])
30
+ clusters.extend(page.get("CacheClusters", []))
50
31
  return clusters
51
32
 
52
33
 
34
+ def transform_elasticache_clusters(
35
+ clusters: list[dict[str, Any]], region: str
36
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
37
+ cluster_data: list[dict[str, Any]] = []
38
+ topics: dict[str, dict[str, Any]] = {}
39
+
40
+ for cluster in clusters:
41
+ notification = cluster.get("NotificationConfiguration", {})
42
+ topic_arn = notification.get("TopicArn")
43
+ cluster_record = {
44
+ "ARN": cluster["ARN"],
45
+ "CacheClusterId": cluster["CacheClusterId"],
46
+ "CacheNodeType": cluster.get("CacheNodeType"),
47
+ "Engine": cluster.get("Engine"),
48
+ "EngineVersion": cluster.get("EngineVersion"),
49
+ "CacheClusterStatus": cluster.get("CacheClusterStatus"),
50
+ "NumCacheNodes": cluster.get("NumCacheNodes"),
51
+ "PreferredAvailabilityZone": cluster.get("PreferredAvailabilityZone"),
52
+ "PreferredMaintenanceWindow": cluster.get("PreferredMaintenanceWindow"),
53
+ "CacheClusterCreateTime": cluster.get("CacheClusterCreateTime"),
54
+ "CacheSubnetGroupName": cluster.get("CacheSubnetGroupName"),
55
+ "AutoMinorVersionUpgrade": cluster.get("AutoMinorVersionUpgrade"),
56
+ "ReplicationGroupId": cluster.get("ReplicationGroupId"),
57
+ "SnapshotRetentionLimit": cluster.get("SnapshotRetentionLimit"),
58
+ "SnapshotWindow": cluster.get("SnapshotWindow"),
59
+ "AuthTokenEnabled": cluster.get("AuthTokenEnabled"),
60
+ "TransitEncryptionEnabled": cluster.get("TransitEncryptionEnabled"),
61
+ "AtRestEncryptionEnabled": cluster.get("AtRestEncryptionEnabled"),
62
+ "TopicArn": topic_arn,
63
+ "Region": region,
64
+ }
65
+ cluster_data.append(cluster_record)
66
+
67
+ if topic_arn:
68
+ topics.setdefault(
69
+ topic_arn,
70
+ {
71
+ "TopicArn": topic_arn,
72
+ "TopicStatus": notification.get("TopicStatus"),
73
+ "cluster_arns": [],
74
+ },
75
+ )["cluster_arns"].append(cluster["ARN"])
76
+
77
+ return cluster_data, list(topics.values())
78
+
79
+
53
80
  @timeit
54
81
  def load_elasticache_clusters(
55
82
  neo4j_session: neo4j.Session,
56
- clusters: List[Dict],
83
+ clusters: list[dict[str, Any]],
57
84
  region: str,
58
85
  aws_account_id: str,
59
86
  update_tag: int,
60
87
  ) -> None:
61
- query = """
62
- UNWIND $clusters as elasticache_cluster
63
- MERGE (cluster:ElasticacheCluster{id:elasticache_cluster.ARN})
64
- ON CREATE SET cluster.firstseen = timestamp(),
65
- cluster.arn = elasticache_cluster.ARN,
66
- cluster.topic_arn = elasticache_cluster.NotificationConfiguration.TopicArn,
67
- cluster.id = elasticache_cluster.CacheClusterId,
68
- cluster.region = $region
69
- SET cluster.lastupdated = $aws_update_tag
70
-
71
- WITH cluster, elasticache_cluster
72
- MATCH (owner:AWSAccount{id: $aws_account_id})
73
- MERGE (owner)-[r3:RESOURCE]->(cluster)
74
- ON CREATE SET r3.firstseen = timestamp()
75
- SET r3.lastupdated = $aws_update_tag
76
-
77
- WITH elasticache_cluster, owner
78
- WHERE NOT elasticache_cluster.NotificationConfiguration IS NULL
79
- MERGE (topic:ElasticacheTopic{id: elasticache_cluster.NotificationConfiguration.TopicArn})
80
- ON CREATE SET topic.firstseen = timestamp(),
81
- topic.arn = elasticache_cluster.NotificationConfiguration.TopicArn
82
- SET topic.lastupdated = $aws_update_tag,
83
- topic.status = elasticache_cluster.NotificationConfiguration.Status
84
-
85
- MERGE (topic)-[r:CACHE_CLUSTER]->(cluster)
86
- ON CREATE SET r.firstseen = timestamp()
87
- SET r.lastupdated = $aws_update_tag
88
- WITH cluster, topic
89
-
90
- MERGE (owner)-[r2:RESOURCE]->(topic)
91
- ON CREATE SET r2.firstseen = timestamp()
92
- SET r2.lastupdated = $aws_update_tag
93
- """
94
88
  logger.info(
95
- f"Loading f{len(clusters)} ElastiCache clusters for region '{region}' into graph.",
89
+ f"Loading {len(clusters)} ElastiCache clusters for region '{region}' into graph."
96
90
  )
97
- neo4j_session.run(
98
- query,
99
- clusters=clusters,
100
- region=region,
101
- aws_update_tag=update_tag,
102
- aws_account_id=aws_account_id,
91
+ load(
92
+ neo4j_session,
93
+ ElasticacheClusterSchema(),
94
+ clusters,
95
+ lastupdated=update_tag,
96
+ Region=region,
97
+ AWS_ID=aws_account_id,
103
98
  )
104
99
 
105
100
 
106
101
  @timeit
107
- def cleanup(
102
+ def load_elasticache_topics(
108
103
  neo4j_session: neo4j.Session,
109
- current_aws_account_id: str,
104
+ topics: list[dict[str, Any]],
105
+ aws_account_id: str,
110
106
  update_tag: int,
111
107
  ) -> None:
112
- run_cleanup_job(
113
- "aws_import_elasticache_cleanup.json",
108
+ if not topics:
109
+ return
110
+ logger.info(f"Loading {len(topics)} ElastiCache topics into graph.")
111
+ load(
114
112
  neo4j_session,
115
- {"UPDATE_TAG": update_tag, "AWS_ID": current_aws_account_id},
113
+ ElasticacheTopicSchema(),
114
+ topics,
115
+ lastupdated=update_tag,
116
+ AWS_ID=aws_account_id,
117
+ )
118
+
119
+
120
+ @timeit
121
+ def cleanup(
122
+ neo4j_session: neo4j.Session,
123
+ common_job_parameters: dict[str, Any],
124
+ ) -> None:
125
+ GraphJob.from_node_schema(ElasticacheClusterSchema(), common_job_parameters).run(
126
+ neo4j_session
127
+ )
128
+ GraphJob.from_node_schema(ElasticacheTopicSchema(), common_job_parameters).run(
129
+ neo4j_session
116
130
  )
117
131
 
118
132
 
@@ -120,24 +134,33 @@ def cleanup(
120
134
  def sync(
121
135
  neo4j_session: neo4j.Session,
122
136
  boto3_session: boto3.session.Session,
123
- regions: List[str],
137
+ regions: list[str],
124
138
  current_aws_account_id: str,
125
139
  update_tag: int,
126
- common_job_parameters: Dict,
140
+ common_job_parameters: dict[str, Any],
127
141
  ) -> None:
128
142
  for region in regions:
129
143
  logger.info(
130
- f"Syncing ElastiCache clusters for region '{region}' in account {current_aws_account_id}",
144
+ "Syncing ElastiCache clusters for region '%s' in account '%s'.",
145
+ region,
146
+ current_aws_account_id,
131
147
  )
132
- clusters = get_elasticache_clusters(boto3_session, region)
148
+ raw_clusters = get_elasticache_clusters(boto3_session, region)
149
+ cluster_data, topic_data = transform_elasticache_clusters(raw_clusters, region)
133
150
  load_elasticache_clusters(
134
151
  neo4j_session,
135
- clusters,
152
+ cluster_data,
136
153
  region,
137
154
  current_aws_account_id,
138
155
  update_tag,
139
156
  )
140
- cleanup(neo4j_session, current_aws_account_id, update_tag)
157
+ load_elasticache_topics(
158
+ neo4j_session,
159
+ topic_data,
160
+ current_aws_account_id,
161
+ update_tag,
162
+ )
163
+ cleanup(neo4j_session, common_job_parameters)
141
164
  merge_module_sync_metadata(
142
165
  neo4j_session,
143
166
  group_type="AWSAccount",
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  from string import Template
3
+ from typing import Any
3
4
  from typing import Dict
4
5
  from typing import List
5
6
 
@@ -56,6 +57,35 @@ def get_short_id_from_lb2_arn(alb_arn: str) -> str:
56
57
  return alb_arn.split("/")[-2]
57
58
 
58
59
 
60
+ def get_resource_type_from_arn(arn: str) -> str:
61
+ """Return the resource type format expected by the Tagging API.
62
+
63
+ The Resource Groups Tagging API requires resource types in the form
64
+ ``service:resource``. Most ARNs embed the resource type in the fifth segment
65
+ after the service name. Load balancer ARNs add an extra ``app`` or ``net``
66
+ component that must be preserved. S3 and SQS ARNs only contain the service
67
+ name. This helper extracts the appropriate string so that ARNs can be
68
+ grouped correctly for API calls.
69
+ """
70
+
71
+ parts = arn.split(":", 5)
72
+ service = parts[2]
73
+ if service in {"s3", "sqs"}:
74
+ return service
75
+
76
+ resource = parts[5]
77
+ if service == "elasticloadbalancing" and resource.startswith("loadbalancer/"):
78
+ segments = resource.split("/")
79
+ if len(segments) > 2 and segments[1] in {"app", "net"}:
80
+ resource_type = f"{segments[0]}/{segments[1]}"
81
+ else:
82
+ resource_type = segments[0]
83
+ else:
84
+ resource_type = resource.split("/")[0].split(":")[0]
85
+
86
+ return f"{service}:{resource_type}" if resource_type else service
87
+
88
+
59
89
  # We maintain a mapping from AWS resource types to their associated labels and unique identifiers.
60
90
  # label: the node label used in cartography for this resource type
61
91
  # property: the field of this node that uniquely identified this resource type
@@ -158,27 +188,27 @@ TAG_RESOURCE_TYPE_MAPPINGS: Dict = {
158
188
  @aws_handle_regions
159
189
  def get_tags(
160
190
  boto3_session: boto3.session.Session,
161
- resource_type: str,
191
+ resource_types: list[str],
162
192
  region: str,
163
- ) -> List[Dict]:
164
- """
165
- Create boto3 client and retrieve tag data.
166
- """
167
- # this is a temporary workaround to populate AWS tags for IAM roles.
168
- # resourcegroupstaggingapi does not support IAM roles and no ETA is provided
169
- # TODO: when resourcegroupstaggingapi supports iam:role, remove this condition block
170
- if resource_type == "iam:role":
171
- return get_role_tags(boto3_session)
193
+ ) -> list[dict[str, Any]]:
194
+ """Retrieve tag data for the provided resource types."""
195
+ resources: list[dict[str, Any]] = []
196
+
197
+ if "iam:role" in resource_types:
198
+ resources.extend(get_role_tags(boto3_session))
199
+ resource_types = [rt for rt in resource_types if rt != "iam:role"]
200
+
201
+ if not resource_types:
202
+ return resources
172
203
 
173
204
  client = boto3_session.client("resourcegroupstaggingapi", region_name=region)
174
205
  paginator = client.get_paginator("get_resources")
175
- resources: List[Dict] = []
176
- for page in paginator.paginate(
177
- # Only ingest tags for resources that Cartography supports.
178
- # This is just a starting list; there may be others supported by this API.
179
- ResourceTypeFilters=[resource_type],
180
- ):
181
- resources.extend(page["ResourceTagMappingList"])
206
+
207
+ # Batch resource types into groups of 100
208
+ # (https://docs.aws.amazon.com/resourcegroupstagging/latest/APIReference/API_GetResources.html)
209
+ for resource_types_batch in batch(resource_types, size=100):
210
+ for page in paginator.paginate(ResourceTypeFilters=resource_types_batch):
211
+ resources.extend(page["ResourceTagMappingList"])
182
212
  return resources
183
213
 
184
214
 
@@ -210,6 +240,9 @@ def _load_tags_tx(
210
240
  r.firstseen = timestamp()
211
241
  """,
212
242
  )
243
+ if not tag_data:
244
+ return
245
+
213
246
  query = INGEST_TAG_TEMPLATE.safe_substitute(
214
247
  resource_label=TAG_RESOURCE_TYPE_MAPPINGS[resource_type]["label"],
215
248
  property=TAG_RESOURCE_TYPE_MAPPINGS[resource_type]["property"],
@@ -262,6 +295,26 @@ def compute_resource_id(tag_mapping: Dict, resource_type: str) -> str:
262
295
  return resource_id
263
296
 
264
297
 
298
+ def _group_tag_data_by_resource_type(
299
+ tag_data: List[Dict],
300
+ tag_resource_type_mappings: Dict,
301
+ ) -> Dict[str, List[Dict]]:
302
+ """Group raw tag data by the resource types Cartography supports."""
303
+
304
+ grouped: Dict[str, List[Dict]] = {rtype: [] for rtype in tag_resource_type_mappings}
305
+ for mapping in tag_data:
306
+ rtype = get_resource_type_from_arn(mapping["ResourceARN"])
307
+ if rtype in grouped:
308
+ grouped[rtype].append(mapping)
309
+ else:
310
+ logger.debug(
311
+ "Unknown tag resource type %s from ARN %s",
312
+ rtype,
313
+ mapping["ResourceARN"],
314
+ )
315
+ return grouped
316
+
317
+
265
318
  @timeit
266
319
  def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
267
320
  run_cleanup_job(
@@ -285,8 +338,14 @@ def sync(
285
338
  logger.info(
286
339
  f"Syncing AWS tags for account {current_aws_account_id} and region {region}",
287
340
  )
341
+ all_tag_data = get_tags(
342
+ boto3_session, list(tag_resource_type_mappings.keys()), region
343
+ )
344
+ grouped = _group_tag_data_by_resource_type(
345
+ all_tag_data, tag_resource_type_mappings
346
+ )
288
347
  for resource_type in tag_resource_type_mappings.keys():
289
- tag_data = get_tags(boto3_session, resource_type, region)
348
+ tag_data = grouped.get(resource_type, [])
290
349
  transform_tags(tag_data, resource_type) # type: ignore
291
350
  logger.info(
292
351
  f"Loading {len(tag_data)} tags for resource type {resource_type}",
@@ -7,6 +7,7 @@ import neo4j
7
7
 
8
8
  from cartography.client.core.tx import load
9
9
  from cartography.graph.job import GraphJob
10
+ from cartography.models.aws.secretsmanager.secret import SecretsManagerSecretSchema
10
11
  from cartography.models.aws.secretsmanager.secret_version import (
11
12
  SecretsManagerSecretVersionSchema,
12
13
  )
@@ -14,7 +15,6 @@ from cartography.stats import get_stats_client
14
15
  from cartography.util import aws_handle_regions
15
16
  from cartography.util import dict_date_to_epoch
16
17
  from cartography.util import merge_module_sync_metadata
17
- from cartography.util import run_cleanup_job
18
18
  from cartography.util import timeit
19
19
 
20
20
  logger = logging.getLogger(__name__)
@@ -32,6 +32,37 @@ def get_secret_list(boto3_session: boto3.session.Session, region: str) -> List[D
32
32
  return secrets
33
33
 
34
34
 
35
+ def transform_secrets(
36
+ secrets: List[Dict],
37
+ ) -> List[Dict]:
38
+ """
39
+ Transform AWS Secrets Manager Secrets to match the data model.
40
+ """
41
+ transformed_data = []
42
+ for secret in secrets:
43
+ # Start with a copy of the original secret data
44
+ transformed = dict(secret)
45
+
46
+ # Convert date fields to epoch timestamps
47
+ transformed["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
48
+ transformed["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
49
+ transformed["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
50
+ transformed["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
51
+ transformed["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
52
+
53
+ # Flatten nested RotationRules.AutomaticallyAfterDays property
54
+ if "RotationRules" in secret and secret["RotationRules"]:
55
+ rotation_rules = secret["RotationRules"]
56
+ if "AutomaticallyAfterDays" in rotation_rules:
57
+ transformed["RotationRulesAutomaticallyAfterDays"] = rotation_rules[
58
+ "AutomaticallyAfterDays"
59
+ ]
60
+
61
+ transformed_data.append(transformed)
62
+
63
+ return transformed_data
64
+
65
+
35
66
  @timeit
36
67
  def load_secrets(
37
68
  neo4j_session: neo4j.Session,
@@ -40,48 +71,33 @@ def load_secrets(
40
71
  current_aws_account_id: str,
41
72
  aws_update_tag: int,
42
73
  ) -> None:
43
- ingest_secrets = """
44
- UNWIND $Secrets as secret
45
- MERGE (s:SecretsManagerSecret{id: secret.ARN})
46
- ON CREATE SET s.firstseen = timestamp()
47
- SET s.name = secret.Name, s.arn = secret.ARN, s.description = secret.Description,
48
- s.kms_key_id = secret.KmsKeyId, s.rotation_enabled = secret.RotationEnabled,
49
- s.rotation_lambda_arn = secret.RotationLambdaARN,
50
- s.rotation_rules_automatically_after_days = secret.RotationRules.AutomaticallyAfterDays,
51
- s.last_rotated_date = secret.LastRotatedDate, s.last_changed_date = secret.LastChangedDate,
52
- s.last_accessed_date = secret.LastAccessedDate, s.deleted_date = secret.DeletedDate,
53
- s.owning_service = secret.OwningService, s.created_date = secret.CreatedDate,
54
- s.primary_region = secret.PrimaryRegion, s.region = $Region,
55
- s.lastupdated = $aws_update_tag
56
- WITH s
57
- MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
58
- MERGE (owner)-[r:RESOURCE]->(s)
59
- ON CREATE SET r.firstseen = timestamp()
60
- SET r.lastupdated = $aws_update_tag
61
- """
62
- for secret in data:
63
- secret["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
64
- secret["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
65
- secret["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
66
- secret["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
67
- secret["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
68
-
69
- neo4j_session.run(
70
- ingest_secrets,
71
- Secrets=data,
74
+ """
75
+ Load transformed secrets into Neo4j using the data model.
76
+ Expects data to already be transformed by transform_secrets().
77
+ """
78
+ logger.info(f"Loading {len(data)} Secrets for region {region} into graph.")
79
+
80
+ # Load using the schema-based approach
81
+ load(
82
+ neo4j_session,
83
+ SecretsManagerSecretSchema(),
84
+ data,
85
+ lastupdated=aws_update_tag,
72
86
  Region=region,
73
- AWS_ACCOUNT_ID=current_aws_account_id,
74
- aws_update_tag=aws_update_tag,
87
+ AWS_ID=current_aws_account_id,
75
88
  )
76
89
 
77
90
 
78
91
  @timeit
79
92
  def cleanup_secrets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
80
- run_cleanup_job(
81
- "aws_import_secrets_cleanup.json",
82
- neo4j_session,
83
- common_job_parameters,
93
+ """
94
+ Run Secrets cleanup job using the data model.
95
+ """
96
+ logger.debug("Running Secrets cleanup job.")
97
+ cleanup_job = GraphJob.from_node_schema(
98
+ SecretsManagerSecretSchema(), common_job_parameters
84
99
  )
100
+ cleanup_job.run(neo4j_session)
85
101
 
86
102
 
87
103
  @timeit
@@ -121,8 +137,6 @@ def get_secret_versions(
121
137
 
122
138
  def transform_secret_versions(
123
139
  versions: List[Dict],
124
- region: str,
125
- aws_account_id: str,
126
140
  ) -> List[Dict]:
127
141
  """
128
142
  Transform AWS Secrets Manager Secret Versions to match the data model.
@@ -203,7 +217,15 @@ def sync(
203
217
  )
204
218
  secrets = get_secret_list(boto3_session, region)
205
219
 
206
- load_secrets(neo4j_session, secrets, region, current_aws_account_id, update_tag)
220
+ transformed_secrets = transform_secrets(secrets)
221
+
222
+ load_secrets(
223
+ neo4j_session,
224
+ transformed_secrets,
225
+ region,
226
+ current_aws_account_id,
227
+ update_tag,
228
+ )
207
229
 
208
230
  all_versions = []
209
231
  for secret in secrets:
@@ -216,11 +238,7 @@ def sync(
216
238
  )
217
239
  all_versions.extend(versions)
218
240
 
219
- transformed_data = transform_secret_versions(
220
- all_versions,
221
- region,
222
- current_aws_account_id,
223
- )
241
+ transformed_data = transform_secret_versions(all_versions)
224
242
 
225
243
  load_secret_versions(
226
244
  neo4j_session,
@@ -59,10 +59,29 @@ async def get_group_members(
59
59
  return user_ids, group_ids
60
60
 
61
61
 
62
+ @timeit
63
+ async def get_group_owners(client: GraphServiceClient, group_id: str) -> list[str]:
64
+ """Get owner user IDs for a given group."""
65
+ owner_ids: list[str] = []
66
+ request_builder = client.groups.by_group_id(group_id).owners
67
+ page = await request_builder.get()
68
+ while page:
69
+ if page.value:
70
+ for obj in page.value:
71
+ odata_type = getattr(obj, "odata_type", "")
72
+ if odata_type == "#microsoft.graph.user":
73
+ owner_ids.append(obj.id)
74
+ if not page.odata_next_link:
75
+ break
76
+ page = await request_builder.with_url(page.odata_next_link).get()
77
+ return owner_ids
78
+
79
+
62
80
  def transform_groups(
63
81
  groups: list[Group],
64
82
  user_member_map: dict[str, list[str]],
65
83
  group_member_map: dict[str, list[str]],
84
+ group_owner_map: dict[str, list[str]],
66
85
  ) -> list[dict[str, Any]]:
67
86
  """Transform API responses into dictionaries for ingestion."""
68
87
  result: list[dict[str, Any]] = []
@@ -82,6 +101,7 @@ def transform_groups(
82
101
  "deleted_date_time": g.deleted_date_time,
83
102
  "member_ids": user_member_map.get(g.id, []),
84
103
  "member_group_ids": group_member_map.get(g.id, []),
104
+ "owner_ids": group_owner_map.get(g.id, []),
85
105
  }
86
106
  result.append(transformed)
87
107
  return result
@@ -134,6 +154,12 @@ async def sync_entra_groups(
134
154
 
135
155
  user_member_map: dict[str, list[str]] = {}
136
156
  group_member_map: dict[str, list[str]] = {}
157
+ group_owner_map: dict[str, list[str]] = {}
158
+
159
+ for group in groups:
160
+ owners = await get_group_owners(client, group.id)
161
+ group_owner_map[group.id] = owners
162
+
137
163
  for group in groups:
138
164
  try:
139
165
  users, subgroups = await get_group_members(client, group.id)
@@ -144,7 +170,9 @@ async def sync_entra_groups(
144
170
  user_member_map[group.id] = []
145
171
  group_member_map[group.id] = []
146
172
 
147
- transformed_groups = transform_groups(groups, user_member_map, group_member_map)
173
+ transformed_groups = transform_groups(
174
+ groups, user_member_map, group_member_map, group_owner_map
175
+ )
148
176
 
149
177
  load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
150
178
  load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)
@@ -391,6 +391,7 @@ def _sync_multiple_projects(
391
391
  # Compute data sync
392
392
  for project in projects:
393
393
  project_id = project["projectId"]
394
+ common_job_parameters["PROJECT_ID"] = project_id
394
395
  logger.info("Syncing GCP project %s for Compute.", project_id)
395
396
  _sync_single_project_compute(
396
397
  neo4j_session,
@@ -399,10 +400,12 @@ def _sync_multiple_projects(
399
400
  gcp_update_tag,
400
401
  common_job_parameters,
401
402
  )
403
+ del common_job_parameters["PROJECT_ID"]
402
404
 
403
405
  # Storage data sync
404
406
  for project in projects:
405
407
  project_id = project["projectId"]
408
+ common_job_parameters["PROJECT_ID"] = project_id
406
409
  logger.info("Syncing GCP project %s for Storage", project_id)
407
410
  _sync_single_project_storage(
408
411
  neo4j_session,
@@ -411,10 +414,12 @@ def _sync_multiple_projects(
411
414
  gcp_update_tag,
412
415
  common_job_parameters,
413
416
  )
417
+ del common_job_parameters["PROJECT_ID"]
414
418
 
415
419
  # GKE data sync
416
420
  for project in projects:
417
421
  project_id = project["projectId"]
422
+ common_job_parameters["PROJECT_ID"] = project_id
418
423
  logger.info("Syncing GCP project %s for GKE", project_id)
419
424
  _sync_single_project_gke(
420
425
  neo4j_session,
@@ -423,10 +428,12 @@ def _sync_multiple_projects(
423
428
  gcp_update_tag,
424
429
  common_job_parameters,
425
430
  )
431
+ del common_job_parameters["PROJECT_ID"]
426
432
 
427
433
  # DNS data sync
428
434
  for project in projects:
429
435
  project_id = project["projectId"]
436
+ common_job_parameters["PROJECT_ID"] = project_id
430
437
  logger.info("Syncing GCP project %s for DNS", project_id)
431
438
  _sync_single_project_dns(
432
439
  neo4j_session,
@@ -435,14 +442,17 @@ def _sync_multiple_projects(
435
442
  gcp_update_tag,
436
443
  common_job_parameters,
437
444
  )
445
+ del common_job_parameters["PROJECT_ID"]
438
446
 
439
447
  # IAM data sync
440
448
  for project in projects:
441
449
  project_id = project["projectId"]
450
+ common_job_parameters["PROJECT_ID"] = project_id
442
451
  logger.info("Syncing GCP project %s for IAM", project_id)
443
452
  _sync_single_project_iam(
444
453
  neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters
445
454
  )
455
+ del common_job_parameters["PROJECT_ID"]
446
456
 
447
457
 
448
458
  @timeit