cartography 0.107.0rc2__py3-none-any.whl → 0.108.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (58) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +10 -0
  3. cartography/config.py +5 -0
  4. cartography/data/indexes.cypher +0 -10
  5. cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
  6. cartography/intel/aws/__init__.py +1 -0
  7. cartography/intel/aws/cloudtrail.py +17 -4
  8. cartography/intel/aws/cloudtrail_management_events.py +560 -16
  9. cartography/intel/aws/cloudwatch.py +150 -4
  10. cartography/intel/aws/ec2/security_groups.py +140 -122
  11. cartography/intel/aws/ec2/snapshots.py +47 -84
  12. cartography/intel/aws/ec2/subnets.py +37 -63
  13. cartography/intel/aws/ecr.py +55 -80
  14. cartography/intel/aws/ecs.py +17 -0
  15. cartography/intel/aws/elasticache.py +102 -79
  16. cartography/intel/aws/guardduty.py +275 -0
  17. cartography/intel/aws/resources.py +2 -0
  18. cartography/intel/aws/secretsmanager.py +62 -44
  19. cartography/intel/github/repos.py +370 -28
  20. cartography/intel/sentinelone/__init__.py +8 -2
  21. cartography/intel/sentinelone/application.py +248 -0
  22. cartography/intel/sentinelone/utils.py +20 -1
  23. cartography/models/aws/cloudtrail/management_events.py +95 -6
  24. cartography/models/aws/cloudtrail/trail.py +21 -0
  25. cartography/models/aws/cloudwatch/log_metric_filter.py +79 -0
  26. cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
  27. cartography/models/aws/ec2/networkinterfaces.py +2 -0
  28. cartography/models/aws/ec2/security_group_rules.py +109 -0
  29. cartography/models/aws/ec2/security_groups.py +90 -0
  30. cartography/models/aws/ec2/snapshots.py +58 -0
  31. cartography/models/aws/ec2/subnet_instance.py +2 -0
  32. cartography/models/aws/ec2/subnet_networkinterface.py +2 -0
  33. cartography/models/aws/ec2/subnets.py +65 -0
  34. cartography/models/aws/ec2/volumes.py +20 -0
  35. cartography/models/aws/ecr/__init__.py +0 -0
  36. cartography/models/aws/ecr/image.py +41 -0
  37. cartography/models/aws/ecr/repository.py +72 -0
  38. cartography/models/aws/ecr/repository_image.py +95 -0
  39. cartography/models/aws/ecs/tasks.py +24 -1
  40. cartography/models/aws/elasticache/__init__.py +0 -0
  41. cartography/models/aws/elasticache/cluster.py +65 -0
  42. cartography/models/aws/elasticache/topic.py +67 -0
  43. cartography/models/aws/guardduty/__init__.py +1 -0
  44. cartography/models/aws/guardduty/findings.py +102 -0
  45. cartography/models/aws/secretsmanager/secret.py +106 -0
  46. cartography/models/github/dependencies.py +74 -0
  47. cartography/models/github/manifests.py +49 -0
  48. cartography/models/sentinelone/application.py +44 -0
  49. cartography/models/sentinelone/application_version.py +96 -0
  50. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/METADATA +3 -3
  51. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/RECORD +55 -36
  52. cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
  53. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  54. cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
  55. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/WHEEL +0 -0
  56. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/entry_points.txt +0 -0
  57. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/licenses/LICENSE +0 -0
  58. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/top_level.txt +0 -0
@@ -1,118 +1,132 @@
1
1
  import logging
2
- from typing import Dict
3
- from typing import List
4
- from typing import Set
2
+ from typing import Any
5
3
 
6
4
  import boto3
7
5
  import neo4j
8
6
 
7
+ from cartography.client.core.tx import load
8
+ from cartography.graph.job import GraphJob
9
+ from cartography.models.aws.elasticache.cluster import ElasticacheClusterSchema
10
+ from cartography.models.aws.elasticache.topic import ElasticacheTopicSchema
9
11
  from cartography.stats import get_stats_client
10
12
  from cartography.util import aws_handle_regions
11
13
  from cartography.util import merge_module_sync_metadata
12
- from cartography.util import run_cleanup_job
13
14
  from cartography.util import timeit
14
15
 
15
16
  logger = logging.getLogger(__name__)
16
17
  stat_handler = get_stats_client(__name__)
17
18
 
18
19
 
19
- def _get_topic(cluster: Dict) -> Dict:
20
- return cluster["NotificationConfiguration"]
21
-
22
-
23
- def transform_elasticache_topics(cluster_data: List[Dict]) -> List[Dict]:
24
- """
25
- Collect unique TopicArns from the cluster data
26
- """
27
- seen: Set[str] = set()
28
- topics: List[Dict] = []
29
- for cluster in cluster_data:
30
- topic = _get_topic(cluster)
31
- topic_arn = topic["TopicArn"]
32
- if topic_arn not in seen:
33
- seen.add(topic_arn)
34
- topics.append(topic)
35
- return topics
36
-
37
-
38
20
  @timeit
39
21
  @aws_handle_regions
40
22
  def get_elasticache_clusters(
41
23
  boto3_session: boto3.session.Session,
42
24
  region: str,
43
- ) -> List[Dict]:
44
- logger.debug(f"Getting ElastiCache Clusters in region '{region}'.")
25
+ ) -> list[dict[str, Any]]:
45
26
  client = boto3_session.client("elasticache", region_name=region)
46
27
  paginator = client.get_paginator("describe_cache_clusters")
47
- clusters: List[Dict] = []
28
+ clusters: list[dict[str, Any]] = []
48
29
  for page in paginator.paginate():
49
- clusters.extend(page["CacheClusters"])
30
+ clusters.extend(page.get("CacheClusters", []))
50
31
  return clusters
51
32
 
52
33
 
34
+ def transform_elasticache_clusters(
35
+ clusters: list[dict[str, Any]], region: str
36
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
37
+ cluster_data: list[dict[str, Any]] = []
38
+ topics: dict[str, dict[str, Any]] = {}
39
+
40
+ for cluster in clusters:
41
+ notification = cluster.get("NotificationConfiguration", {})
42
+ topic_arn = notification.get("TopicArn")
43
+ cluster_record = {
44
+ "ARN": cluster["ARN"],
45
+ "CacheClusterId": cluster["CacheClusterId"],
46
+ "CacheNodeType": cluster.get("CacheNodeType"),
47
+ "Engine": cluster.get("Engine"),
48
+ "EngineVersion": cluster.get("EngineVersion"),
49
+ "CacheClusterStatus": cluster.get("CacheClusterStatus"),
50
+ "NumCacheNodes": cluster.get("NumCacheNodes"),
51
+ "PreferredAvailabilityZone": cluster.get("PreferredAvailabilityZone"),
52
+ "PreferredMaintenanceWindow": cluster.get("PreferredMaintenanceWindow"),
53
+ "CacheClusterCreateTime": cluster.get("CacheClusterCreateTime"),
54
+ "CacheSubnetGroupName": cluster.get("CacheSubnetGroupName"),
55
+ "AutoMinorVersionUpgrade": cluster.get("AutoMinorVersionUpgrade"),
56
+ "ReplicationGroupId": cluster.get("ReplicationGroupId"),
57
+ "SnapshotRetentionLimit": cluster.get("SnapshotRetentionLimit"),
58
+ "SnapshotWindow": cluster.get("SnapshotWindow"),
59
+ "AuthTokenEnabled": cluster.get("AuthTokenEnabled"),
60
+ "TransitEncryptionEnabled": cluster.get("TransitEncryptionEnabled"),
61
+ "AtRestEncryptionEnabled": cluster.get("AtRestEncryptionEnabled"),
62
+ "TopicArn": topic_arn,
63
+ "Region": region,
64
+ }
65
+ cluster_data.append(cluster_record)
66
+
67
+ if topic_arn:
68
+ topics.setdefault(
69
+ topic_arn,
70
+ {
71
+ "TopicArn": topic_arn,
72
+ "TopicStatus": notification.get("TopicStatus"),
73
+ "cluster_arns": [],
74
+ },
75
+ )["cluster_arns"].append(cluster["ARN"])
76
+
77
+ return cluster_data, list(topics.values())
78
+
79
+
53
80
  @timeit
54
81
  def load_elasticache_clusters(
55
82
  neo4j_session: neo4j.Session,
56
- clusters: List[Dict],
83
+ clusters: list[dict[str, Any]],
57
84
  region: str,
58
85
  aws_account_id: str,
59
86
  update_tag: int,
60
87
  ) -> None:
61
- query = """
62
- UNWIND $clusters as elasticache_cluster
63
- MERGE (cluster:ElasticacheCluster{id:elasticache_cluster.ARN})
64
- ON CREATE SET cluster.firstseen = timestamp(),
65
- cluster.arn = elasticache_cluster.ARN,
66
- cluster.topic_arn = elasticache_cluster.NotificationConfiguration.TopicArn,
67
- cluster.id = elasticache_cluster.CacheClusterId,
68
- cluster.region = $region
69
- SET cluster.lastupdated = $aws_update_tag
70
-
71
- WITH cluster, elasticache_cluster
72
- MATCH (owner:AWSAccount{id: $aws_account_id})
73
- MERGE (owner)-[r3:RESOURCE]->(cluster)
74
- ON CREATE SET r3.firstseen = timestamp()
75
- SET r3.lastupdated = $aws_update_tag
76
-
77
- WITH elasticache_cluster, owner
78
- WHERE NOT elasticache_cluster.NotificationConfiguration IS NULL
79
- MERGE (topic:ElasticacheTopic{id: elasticache_cluster.NotificationConfiguration.TopicArn})
80
- ON CREATE SET topic.firstseen = timestamp(),
81
- topic.arn = elasticache_cluster.NotificationConfiguration.TopicArn
82
- SET topic.lastupdated = $aws_update_tag,
83
- topic.status = elasticache_cluster.NotificationConfiguration.Status
84
-
85
- MERGE (topic)-[r:CACHE_CLUSTER]->(cluster)
86
- ON CREATE SET r.firstseen = timestamp()
87
- SET r.lastupdated = $aws_update_tag
88
- WITH cluster, topic
89
-
90
- MERGE (owner)-[r2:RESOURCE]->(topic)
91
- ON CREATE SET r2.firstseen = timestamp()
92
- SET r2.lastupdated = $aws_update_tag
93
- """
94
88
  logger.info(
95
- f"Loading f{len(clusters)} ElastiCache clusters for region '{region}' into graph.",
89
+ f"Loading {len(clusters)} ElastiCache clusters for region '{region}' into graph."
96
90
  )
97
- neo4j_session.run(
98
- query,
99
- clusters=clusters,
100
- region=region,
101
- aws_update_tag=update_tag,
102
- aws_account_id=aws_account_id,
91
+ load(
92
+ neo4j_session,
93
+ ElasticacheClusterSchema(),
94
+ clusters,
95
+ lastupdated=update_tag,
96
+ Region=region,
97
+ AWS_ID=aws_account_id,
103
98
  )
104
99
 
105
100
 
106
101
  @timeit
107
- def cleanup(
102
+ def load_elasticache_topics(
108
103
  neo4j_session: neo4j.Session,
109
- current_aws_account_id: str,
104
+ topics: list[dict[str, Any]],
105
+ aws_account_id: str,
110
106
  update_tag: int,
111
107
  ) -> None:
112
- run_cleanup_job(
113
- "aws_import_elasticache_cleanup.json",
108
+ if not topics:
109
+ return
110
+ logger.info(f"Loading {len(topics)} ElastiCache topics into graph.")
111
+ load(
114
112
  neo4j_session,
115
- {"UPDATE_TAG": update_tag, "AWS_ID": current_aws_account_id},
113
+ ElasticacheTopicSchema(),
114
+ topics,
115
+ lastupdated=update_tag,
116
+ AWS_ID=aws_account_id,
117
+ )
118
+
119
+
120
+ @timeit
121
+ def cleanup(
122
+ neo4j_session: neo4j.Session,
123
+ common_job_parameters: dict[str, Any],
124
+ ) -> None:
125
+ GraphJob.from_node_schema(ElasticacheClusterSchema(), common_job_parameters).run(
126
+ neo4j_session
127
+ )
128
+ GraphJob.from_node_schema(ElasticacheTopicSchema(), common_job_parameters).run(
129
+ neo4j_session
116
130
  )
117
131
 
118
132
 
@@ -120,24 +134,33 @@ def cleanup(
120
134
  def sync(
121
135
  neo4j_session: neo4j.Session,
122
136
  boto3_session: boto3.session.Session,
123
- regions: List[str],
137
+ regions: list[str],
124
138
  current_aws_account_id: str,
125
139
  update_tag: int,
126
- common_job_parameters: Dict,
140
+ common_job_parameters: dict[str, Any],
127
141
  ) -> None:
128
142
  for region in regions:
129
143
  logger.info(
130
- f"Syncing ElastiCache clusters for region '{region}' in account {current_aws_account_id}",
144
+ "Syncing ElastiCache clusters for region '%s' in account '%s'.",
145
+ region,
146
+ current_aws_account_id,
131
147
  )
132
- clusters = get_elasticache_clusters(boto3_session, region)
148
+ raw_clusters = get_elasticache_clusters(boto3_session, region)
149
+ cluster_data, topic_data = transform_elasticache_clusters(raw_clusters, region)
133
150
  load_elasticache_clusters(
134
151
  neo4j_session,
135
- clusters,
152
+ cluster_data,
136
153
  region,
137
154
  current_aws_account_id,
138
155
  update_tag,
139
156
  )
140
- cleanup(neo4j_session, current_aws_account_id, update_tag)
157
+ load_elasticache_topics(
158
+ neo4j_session,
159
+ topic_data,
160
+ current_aws_account_id,
161
+ update_tag,
162
+ )
163
+ cleanup(neo4j_session, common_job_parameters)
141
164
  merge_module_sync_metadata(
142
165
  neo4j_session,
143
166
  group_type="AWSAccount",
@@ -0,0 +1,275 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import boto3
7
+ import boto3.session
8
+ import neo4j
9
+
10
+ from cartography.client.core.tx import load
11
+ from cartography.graph.job import GraphJob
12
+ from cartography.models.aws.guardduty.findings import GuardDutyFindingSchema
13
+ from cartography.stats import get_stats_client
14
+ from cartography.util import aws_handle_regions
15
+ from cartography.util import aws_paginate
16
+ from cartography.util import merge_module_sync_metadata
17
+ from cartography.util import timeit
18
+
19
+ logger = logging.getLogger(__name__)
20
+ stat_handler = get_stats_client(__name__)
21
+
22
+
23
+ def _get_severity_range_for_threshold(
24
+ severity_threshold: str | None,
25
+ ) -> List[str] | None:
26
+ """
27
+ Convert severity threshold string to GuardDuty numeric severity range.
28
+
29
+ GuardDuty severity mappings:
30
+ - LOW: 1.0-3.9
31
+ - MEDIUM: 4.0-6.9
32
+ - HIGH: 7.0-8.9
33
+ - CRITICAL: 9.0-10.0
34
+
35
+ :param severity_threshold: Severity threshold (LOW, MEDIUM, HIGH, CRITICAL)
36
+ :return: List of numeric severity ranges to include, or None for no filtering
37
+ """
38
+ if not severity_threshold:
39
+ return None
40
+
41
+ threshold_upper = severity_threshold.upper().strip()
42
+
43
+ # Map threshold to numeric ranges - include threshold level and above
44
+ if threshold_upper == "LOW":
45
+ return ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] # All severities
46
+ elif threshold_upper == "MEDIUM":
47
+ return ["4", "5", "6", "7", "8", "9", "10"] # MEDIUM and above
48
+ elif threshold_upper == "HIGH":
49
+ return ["7", "8", "9", "10"] # HIGH and CRITICAL only
50
+ elif threshold_upper == "CRITICAL":
51
+ return ["9", "10"] # CRITICAL only
52
+ else:
53
+ return None
54
+
55
+
56
+ @aws_handle_regions
57
+ def get_detectors(
58
+ boto3_session: boto3.session.Session,
59
+ region: str,
60
+ ) -> List[str]:
61
+ """
62
+ Get GuardDuty detector IDs for all detectors in a region.
63
+ """
64
+ client = boto3_session.client("guardduty", region_name=region)
65
+
66
+ # Get all detector IDs in this region
67
+ detectors_response = client.list_detectors()
68
+ detector_ids = detectors_response.get("DetectorIds", [])
69
+
70
+ if not detector_ids:
71
+ logger.info(f"No GuardDuty detectors found in region {region}")
72
+ return []
73
+
74
+ logger.info(f"Found {len(detector_ids)} GuardDuty detectors in region {region}")
75
+ return detector_ids
76
+
77
+
78
+ @aws_handle_regions
79
+ @timeit
80
+ def get_findings(
81
+ boto3_session: boto3.session.Session,
82
+ region: str,
83
+ detector_id: str,
84
+ severity_threshold: str | None = None,
85
+ ) -> List[Dict[str, Any]]:
86
+ """
87
+ Get GuardDuty findings for a specific detector.
88
+ Only fetches unarchived findings to avoid including closed/resolved findings.
89
+ Optionally filters by severity threshold.
90
+ """
91
+ client = boto3_session.client("guardduty", region_name=region)
92
+
93
+ # Build FindingCriteria - always exclude archived findings
94
+ criteria = {"service.archived": {"Equals": ["false"]}}
95
+
96
+ # Add severity filtering if threshold is provided
97
+ severity_range = _get_severity_range_for_threshold(severity_threshold)
98
+ if severity_range:
99
+ min_severity = min(
100
+ float(s) for s in severity_range
101
+ ) # get min severity from range
102
+ # I chose to ignore the type error here because the AWS API has fields that require different types
103
+ criteria["severity"] = {"GreaterThanOrEqual": int(min_severity)} # type: ignore
104
+
105
+ # Get all finding IDs for this detector with filtering
106
+ finding_ids = list(
107
+ aws_paginate(
108
+ client,
109
+ "list_findings",
110
+ "FindingIds",
111
+ DetectorId=detector_id,
112
+ FindingCriteria={"Criterion": criteria},
113
+ )
114
+ )
115
+
116
+ if not finding_ids:
117
+ logger.info(f"No findings found for detector {detector_id} in region {region}")
118
+ return []
119
+
120
+ findings_data = []
121
+
122
+ # Process findings in batches (GuardDuty API limit is 50)
123
+ batch_size = 50
124
+ for i in range(0, len(finding_ids), batch_size):
125
+ batch_ids = finding_ids[i : i + batch_size]
126
+
127
+ findings_response = client.get_findings(
128
+ DetectorId=detector_id, FindingIds=batch_ids
129
+ )
130
+
131
+ findings_batch = findings_response.get("Findings", [])
132
+ findings_data.extend(findings_batch)
133
+
134
+ logger.info(
135
+ f"Retrieved {len(findings_data)} findings for detector {detector_id} in region {region}"
136
+ )
137
+ return findings_data
138
+
139
+
140
+ def transform_findings(findings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
141
+ """Transform GuardDuty findings from API response to schema format."""
142
+ transformed: List[Dict[str, Any]] = []
143
+ for f in findings:
144
+ item: Dict[str, Any] = {
145
+ "id": f["Id"],
146
+ "arn": f.get("Arn"),
147
+ "type": f.get("Type"),
148
+ "severity": f.get("Severity"),
149
+ "title": f.get("Title"),
150
+ "description": f.get("Description"),
151
+ "confidence": f.get("Confidence"),
152
+ "eventfirstseen": f.get("EventFirstSeen"),
153
+ "eventlastseen": f.get("EventLastSeen"),
154
+ "accountid": f.get("AccountId"),
155
+ "region": f.get("Region"),
156
+ "detectorid": f.get("DetectorId"),
157
+ "archived": f.get("Archived"),
158
+ }
159
+
160
+ # Handle nested resource information
161
+ resource = f.get("Resource", {})
162
+ item["resource_type"] = resource.get("ResourceType")
163
+
164
+ # Extract resource ID based on resource type
165
+ if item["resource_type"] == "Instance":
166
+ details = resource.get("InstanceDetails", {})
167
+ item["resource_id"] = details.get("InstanceId")
168
+ elif item["resource_type"] == "S3Bucket":
169
+ buckets = resource.get("S3BucketDetails") or []
170
+ if buckets:
171
+ item["resource_id"] = buckets[0].get("Name")
172
+ else:
173
+ item["resource_id"] = None
174
+
175
+ transformed.append(item)
176
+
177
+ return transformed
178
+
179
+
180
+ @timeit
181
+ def load_guardduty_findings(
182
+ neo4j_session: neo4j.Session,
183
+ data: List[Dict[str, Any]],
184
+ region: str,
185
+ aws_account_id: str,
186
+ update_tag: int,
187
+ ) -> None:
188
+ """
189
+ Load GuardDuty findings information into the graph.
190
+ """
191
+ logger.info(
192
+ f"Loading {len(data)} GuardDuty findings for region {region} into graph."
193
+ )
194
+
195
+ load(
196
+ neo4j_session,
197
+ GuardDutyFindingSchema(),
198
+ data,
199
+ lastupdated=update_tag,
200
+ Region=region,
201
+ AWS_ID=aws_account_id,
202
+ )
203
+
204
+
205
+ @timeit
206
+ def cleanup_guardduty(
207
+ neo4j_session: neo4j.Session, common_job_parameters: Dict
208
+ ) -> None:
209
+ """
210
+ Run GuardDuty cleanup job.
211
+ """
212
+ logger.debug("Running GuardDuty cleanup job.")
213
+ cleanup_job = GraphJob.from_node_schema(
214
+ GuardDutyFindingSchema(), common_job_parameters
215
+ )
216
+ cleanup_job.run(neo4j_session)
217
+
218
+
219
+ @timeit
220
+ def sync(
221
+ neo4j_session: neo4j.Session,
222
+ boto3_session: boto3.session.Session,
223
+ regions: List[str],
224
+ current_aws_account_id: str,
225
+ update_tag: int,
226
+ common_job_parameters: Dict,
227
+ ) -> None:
228
+ """
229
+ Sync GuardDuty findings for all regions.
230
+ Severity threshold filter is obtained from common_job_parameters.
231
+ """
232
+ # Get severity threshold from common job parameters
233
+ severity_threshold = common_job_parameters.get("aws_guardduty_severity_threshold")
234
+ for region in regions:
235
+ logger.info(
236
+ f"Syncing GuardDuty findings for {region} in account {current_aws_account_id}"
237
+ )
238
+
239
+ # Get all detectors in the region
240
+ detector_ids = get_detectors(boto3_session, region)
241
+
242
+ if not detector_ids:
243
+ logger.info(f"No GuardDuty detectors found in region {region}, skipping.")
244
+ continue
245
+
246
+ all_findings = []
247
+
248
+ # Get findings for each detector
249
+ for detector_id in detector_ids:
250
+ findings = get_findings(
251
+ boto3_session, region, detector_id, severity_threshold
252
+ )
253
+ all_findings.extend(findings)
254
+
255
+ transformed_findings = transform_findings(all_findings)
256
+
257
+ load_guardduty_findings(
258
+ neo4j_session,
259
+ transformed_findings,
260
+ region,
261
+ current_aws_account_id,
262
+ update_tag,
263
+ )
264
+
265
+ # Cleanup and metadata update (outside region loop)
266
+ cleanup_guardduty(neo4j_session, common_job_parameters)
267
+
268
+ merge_module_sync_metadata(
269
+ neo4j_session,
270
+ group_type="AWSAccount",
271
+ group_id=current_aws_account_id,
272
+ synced_type="GuardDutyFinding",
273
+ update_tag=update_tag,
274
+ stat_handler=stat_handler,
275
+ )
@@ -18,6 +18,7 @@ from . import eks
18
18
  from . import elasticache
19
19
  from . import elasticsearch
20
20
  from . import emr
21
+ from . import guardduty
21
22
  from . import iam
22
23
  from . import identitycenter
23
24
  from . import inspector
@@ -111,5 +112,6 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
111
112
  "cloudtrail_management_events": cloudtrail_management_events.sync,
112
113
  "cloudwatch": cloudwatch.sync,
113
114
  "efs": efs.sync,
115
+ "guardduty": guardduty.sync,
114
116
  "codebuild": codebuild.sync,
115
117
  }
@@ -7,6 +7,7 @@ import neo4j
7
7
 
8
8
  from cartography.client.core.tx import load
9
9
  from cartography.graph.job import GraphJob
10
+ from cartography.models.aws.secretsmanager.secret import SecretsManagerSecretSchema
10
11
  from cartography.models.aws.secretsmanager.secret_version import (
11
12
  SecretsManagerSecretVersionSchema,
12
13
  )
@@ -14,7 +15,6 @@ from cartography.stats import get_stats_client
14
15
  from cartography.util import aws_handle_regions
15
16
  from cartography.util import dict_date_to_epoch
16
17
  from cartography.util import merge_module_sync_metadata
17
- from cartography.util import run_cleanup_job
18
18
  from cartography.util import timeit
19
19
 
20
20
  logger = logging.getLogger(__name__)
@@ -32,6 +32,37 @@ def get_secret_list(boto3_session: boto3.session.Session, region: str) -> List[D
32
32
  return secrets
33
33
 
34
34
 
35
+ def transform_secrets(
36
+ secrets: List[Dict],
37
+ ) -> List[Dict]:
38
+ """
39
+ Transform AWS Secrets Manager Secrets to match the data model.
40
+ """
41
+ transformed_data = []
42
+ for secret in secrets:
43
+ # Start with a copy of the original secret data
44
+ transformed = dict(secret)
45
+
46
+ # Convert date fields to epoch timestamps
47
+ transformed["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
48
+ transformed["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
49
+ transformed["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
50
+ transformed["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
51
+ transformed["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
52
+
53
+ # Flatten nested RotationRules.AutomaticallyAfterDays property
54
+ if "RotationRules" in secret and secret["RotationRules"]:
55
+ rotation_rules = secret["RotationRules"]
56
+ if "AutomaticallyAfterDays" in rotation_rules:
57
+ transformed["RotationRulesAutomaticallyAfterDays"] = rotation_rules[
58
+ "AutomaticallyAfterDays"
59
+ ]
60
+
61
+ transformed_data.append(transformed)
62
+
63
+ return transformed_data
64
+
65
+
35
66
  @timeit
36
67
  def load_secrets(
37
68
  neo4j_session: neo4j.Session,
@@ -40,48 +71,33 @@ def load_secrets(
40
71
  current_aws_account_id: str,
41
72
  aws_update_tag: int,
42
73
  ) -> None:
43
- ingest_secrets = """
44
- UNWIND $Secrets as secret
45
- MERGE (s:SecretsManagerSecret{id: secret.ARN})
46
- ON CREATE SET s.firstseen = timestamp()
47
- SET s.name = secret.Name, s.arn = secret.ARN, s.description = secret.Description,
48
- s.kms_key_id = secret.KmsKeyId, s.rotation_enabled = secret.RotationEnabled,
49
- s.rotation_lambda_arn = secret.RotationLambdaARN,
50
- s.rotation_rules_automatically_after_days = secret.RotationRules.AutomaticallyAfterDays,
51
- s.last_rotated_date = secret.LastRotatedDate, s.last_changed_date = secret.LastChangedDate,
52
- s.last_accessed_date = secret.LastAccessedDate, s.deleted_date = secret.DeletedDate,
53
- s.owning_service = secret.OwningService, s.created_date = secret.CreatedDate,
54
- s.primary_region = secret.PrimaryRegion, s.region = $Region,
55
- s.lastupdated = $aws_update_tag
56
- WITH s
57
- MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
58
- MERGE (owner)-[r:RESOURCE]->(s)
59
- ON CREATE SET r.firstseen = timestamp()
60
- SET r.lastupdated = $aws_update_tag
61
- """
62
- for secret in data:
63
- secret["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
64
- secret["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
65
- secret["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
66
- secret["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
67
- secret["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
68
-
69
- neo4j_session.run(
70
- ingest_secrets,
71
- Secrets=data,
74
+ """
75
+ Load transformed secrets into Neo4j using the data model.
76
+ Expects data to already be transformed by transform_secrets().
77
+ """
78
+ logger.info(f"Loading {len(data)} Secrets for region {region} into graph.")
79
+
80
+ # Load using the schema-based approach
81
+ load(
82
+ neo4j_session,
83
+ SecretsManagerSecretSchema(),
84
+ data,
85
+ lastupdated=aws_update_tag,
72
86
  Region=region,
73
- AWS_ACCOUNT_ID=current_aws_account_id,
74
- aws_update_tag=aws_update_tag,
87
+ AWS_ID=current_aws_account_id,
75
88
  )
76
89
 
77
90
 
78
91
  @timeit
79
92
  def cleanup_secrets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
80
- run_cleanup_job(
81
- "aws_import_secrets_cleanup.json",
82
- neo4j_session,
83
- common_job_parameters,
93
+ """
94
+ Run Secrets cleanup job using the data model.
95
+ """
96
+ logger.debug("Running Secrets cleanup job.")
97
+ cleanup_job = GraphJob.from_node_schema(
98
+ SecretsManagerSecretSchema(), common_job_parameters
84
99
  )
100
+ cleanup_job.run(neo4j_session)
85
101
 
86
102
 
87
103
  @timeit
@@ -121,8 +137,6 @@ def get_secret_versions(
121
137
 
122
138
  def transform_secret_versions(
123
139
  versions: List[Dict],
124
- region: str,
125
- aws_account_id: str,
126
140
  ) -> List[Dict]:
127
141
  """
128
142
  Transform AWS Secrets Manager Secret Versions to match the data model.
@@ -203,7 +217,15 @@ def sync(
203
217
  )
204
218
  secrets = get_secret_list(boto3_session, region)
205
219
 
206
- load_secrets(neo4j_session, secrets, region, current_aws_account_id, update_tag)
220
+ transformed_secrets = transform_secrets(secrets)
221
+
222
+ load_secrets(
223
+ neo4j_session,
224
+ transformed_secrets,
225
+ region,
226
+ current_aws_account_id,
227
+ update_tag,
228
+ )
207
229
 
208
230
  all_versions = []
209
231
  for secret in secrets:
@@ -216,11 +238,7 @@ def sync(
216
238
  )
217
239
  all_versions.extend(versions)
218
240
 
219
- transformed_data = transform_secret_versions(
220
- all_versions,
221
- region,
222
- current_aws_account_id,
223
- )
241
+ transformed_data = transform_secret_versions(all_versions)
224
242
 
225
243
  load_secret_versions(
226
244
  neo4j_session,