cartography 0.107.0rc2__py3-none-any.whl → 0.108.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +10 -0
- cartography/config.py +5 -0
- cartography/data/indexes.cypher +0 -8
- cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
- cartography/intel/aws/__init__.py +1 -0
- cartography/intel/aws/cloudwatch.py +77 -0
- cartography/intel/aws/ec2/security_groups.py +140 -122
- cartography/intel/aws/ec2/snapshots.py +47 -84
- cartography/intel/aws/ec2/subnets.py +1 -1
- cartography/intel/aws/ecs.py +17 -0
- cartography/intel/aws/guardduty.py +275 -0
- cartography/intel/aws/resources.py +2 -0
- cartography/intel/github/repos.py +370 -28
- cartography/intel/sentinelone/__init__.py +8 -2
- cartography/intel/sentinelone/application.py +248 -0
- cartography/intel/sentinelone/utils.py +20 -1
- cartography/models/aws/cloudwatch/log_metric_filter.py +79 -0
- cartography/models/aws/ec2/networkinterfaces.py +2 -0
- cartography/models/aws/ec2/security_group_rules.py +109 -0
- cartography/models/aws/ec2/security_groups.py +90 -0
- cartography/models/aws/ec2/snapshots.py +58 -0
- cartography/models/aws/ec2/subnet_instance.py +2 -0
- cartography/models/aws/ec2/subnet_networkinterface.py +2 -0
- cartography/models/aws/ec2/volumes.py +20 -0
- cartography/models/aws/ecs/tasks.py +24 -1
- cartography/models/aws/guardduty/__init__.py +1 -0
- cartography/models/aws/guardduty/findings.py +102 -0
- cartography/models/github/dependencies.py +74 -0
- cartography/models/github/manifests.py +49 -0
- cartography/models/sentinelone/application.py +44 -0
- cartography/models/sentinelone/application_version.py +96 -0
- {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/METADATA +3 -3
- {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/RECORD +38 -28
- cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
- cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
- {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/WHEEL +0 -0
- {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/entry_points.txt +0 -0
- {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from typing import Any
|
|
2
3
|
from typing import Dict
|
|
3
4
|
from typing import List
|
|
4
5
|
|
|
@@ -6,8 +7,11 @@ import boto3
|
|
|
6
7
|
import neo4j
|
|
7
8
|
from botocore.exceptions import ClientError
|
|
8
9
|
|
|
10
|
+
from cartography.client.core.tx import load
|
|
11
|
+
from cartography.client.core.tx import read_list_of_values_tx
|
|
12
|
+
from cartography.graph.job import GraphJob
|
|
13
|
+
from cartography.models.aws.ec2.snapshots import EBSSnapshotSchema
|
|
9
14
|
from cartography.util import aws_handle_regions
|
|
10
|
-
from cartography.util import run_cleanup_job
|
|
11
15
|
from cartography.util import timeit
|
|
12
16
|
|
|
13
17
|
logger = logging.getLogger(__name__)
|
|
@@ -24,12 +28,13 @@ def get_snapshots_in_use(
|
|
|
24
28
|
WHERE v.region = $Region
|
|
25
29
|
RETURN v.snapshotid as snapshot
|
|
26
30
|
"""
|
|
27
|
-
results =
|
|
31
|
+
results = read_list_of_values_tx(
|
|
32
|
+
neo4j_session,
|
|
28
33
|
query,
|
|
29
34
|
AWS_ACCOUNT_ID=current_aws_account_id,
|
|
30
35
|
Region=region,
|
|
31
36
|
)
|
|
32
|
-
return [
|
|
37
|
+
return [str(snapshot) for snapshot in results if snapshot]
|
|
33
38
|
|
|
34
39
|
|
|
35
40
|
@timeit
|
|
@@ -45,7 +50,6 @@ def get_snapshots(
|
|
|
45
50
|
for page in paginator.paginate(OwnerIds=["self"]):
|
|
46
51
|
snapshots.extend(page["Snapshots"])
|
|
47
52
|
|
|
48
|
-
# fetch in-use snapshots not in self_owned snapshots
|
|
49
53
|
self_owned_snapshot_ids = {s["SnapshotId"] for s in snapshots}
|
|
50
54
|
other_snapshot_ids = set(in_use_snapshot_ids) - self_owned_snapshot_ids
|
|
51
55
|
if other_snapshot_ids:
|
|
@@ -55,8 +59,7 @@ def get_snapshots(
|
|
|
55
59
|
except ClientError as e:
|
|
56
60
|
if e.response["Error"]["Code"] == "InvalidSnapshot.NotFound":
|
|
57
61
|
logger.warning(
|
|
58
|
-
f"Failed to retrieve page of in-use,
|
|
59
|
-
not owned snapshots. Continuing anyway. Error - {e}",
|
|
62
|
+
f"Failed to retrieve page of in-use, not owned snapshots. Continuing anyway. Error - {e}"
|
|
60
63
|
)
|
|
61
64
|
else:
|
|
62
65
|
raise
|
|
@@ -64,93 +67,53 @@ def get_snapshots(
|
|
|
64
67
|
return snapshots
|
|
65
68
|
|
|
66
69
|
|
|
70
|
+
def transform_snapshots(snapshots: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
71
|
+
transformed: List[Dict[str, Any]] = []
|
|
72
|
+
for snap in snapshots:
|
|
73
|
+
transformed.append(
|
|
74
|
+
{
|
|
75
|
+
"SnapshotId": snap["SnapshotId"],
|
|
76
|
+
"Description": snap.get("Description"),
|
|
77
|
+
"Encrypted": snap.get("Encrypted"),
|
|
78
|
+
"Progress": snap.get("Progress"),
|
|
79
|
+
"StartTime": snap.get("StartTime"),
|
|
80
|
+
"State": snap.get("State"),
|
|
81
|
+
"StateMessage": snap.get("StateMessage"),
|
|
82
|
+
"VolumeId": snap.get("VolumeId"),
|
|
83
|
+
"VolumeSize": snap.get("VolumeSize"),
|
|
84
|
+
"OutpostArn": snap.get("OutpostArn"),
|
|
85
|
+
"DataEncryptionKeyId": snap.get("DataEncryptionKeyId"),
|
|
86
|
+
"KmsKeyId": snap.get("KmsKeyId"),
|
|
87
|
+
}
|
|
88
|
+
)
|
|
89
|
+
return transformed
|
|
90
|
+
|
|
91
|
+
|
|
67
92
|
@timeit
|
|
68
93
|
def load_snapshots(
|
|
69
94
|
neo4j_session: neo4j.Session,
|
|
70
|
-
data: List[Dict],
|
|
95
|
+
data: List[Dict[str, Any]],
|
|
71
96
|
region: str,
|
|
72
97
|
current_aws_account_id: str,
|
|
73
98
|
update_tag: int,
|
|
74
99
|
) -> None:
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
s.progress = snapshot.Progress, s.starttime = snapshot.StartTime, s.state = snapshot.State,
|
|
81
|
-
s.statemessage = snapshot.StateMessage, s.volumeid = snapshot.VolumeId, s.volumesize = snapshot.VolumeSize,
|
|
82
|
-
s.outpostarn = snapshot.OutpostArn, s.dataencryptionkeyid = snapshot.DataEncryptionKeyId,
|
|
83
|
-
s.kmskeyid = snapshot.KmsKeyId, s.region=$Region
|
|
84
|
-
WITH s
|
|
85
|
-
MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
|
|
86
|
-
MERGE (aa)-[r:RESOURCE]->(s)
|
|
87
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
88
|
-
SET r.lastupdated = $update_tag
|
|
89
|
-
"""
|
|
90
|
-
|
|
91
|
-
for snapshot in data:
|
|
92
|
-
snapshot["StartTime"] = str(snapshot["StartTime"])
|
|
93
|
-
|
|
94
|
-
neo4j_session.run(
|
|
95
|
-
ingest_snapshots,
|
|
96
|
-
snapshots_list=data,
|
|
97
|
-
AWS_ACCOUNT_ID=current_aws_account_id,
|
|
100
|
+
load(
|
|
101
|
+
neo4j_session,
|
|
102
|
+
EBSSnapshotSchema(),
|
|
103
|
+
data,
|
|
104
|
+
lastupdated=update_tag,
|
|
98
105
|
Region=region,
|
|
99
|
-
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
@timeit
|
|
104
|
-
def get_snapshot_volumes(snapshots: List[Dict]) -> List[Dict]:
|
|
105
|
-
snapshot_volumes: List[Dict] = []
|
|
106
|
-
for snapshot in snapshots:
|
|
107
|
-
if snapshot.get("VolumeId"):
|
|
108
|
-
snapshot_volumes.append(snapshot)
|
|
109
|
-
|
|
110
|
-
return snapshot_volumes
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
@timeit
|
|
114
|
-
def load_snapshot_volume_relations(
|
|
115
|
-
neo4j_session: neo4j.Session,
|
|
116
|
-
data: List[Dict],
|
|
117
|
-
current_aws_account_id: str,
|
|
118
|
-
update_tag: int,
|
|
119
|
-
) -> None:
|
|
120
|
-
ingest_volumes = """
|
|
121
|
-
UNWIND $snapshot_volumes_list as volume
|
|
122
|
-
MERGE (v:EBSVolume{id: volume.VolumeId})
|
|
123
|
-
ON CREATE SET v.firstseen = timestamp()
|
|
124
|
-
SET v.lastupdated = $update_tag, v.snapshotid = volume.SnapshotId
|
|
125
|
-
WITH v, volume
|
|
126
|
-
MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
|
|
127
|
-
MERGE (aa)-[r:RESOURCE]->(v)
|
|
128
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
129
|
-
SET r.lastupdated = $update_tag
|
|
130
|
-
WITH v, volume
|
|
131
|
-
MATCH (s:EBSSnapshot{id: volume.SnapshotId})
|
|
132
|
-
MERGE (s)-[r:CREATED_FROM]->(v)
|
|
133
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
134
|
-
SET r.lastupdated = $update_tag
|
|
135
|
-
"""
|
|
136
|
-
|
|
137
|
-
neo4j_session.run(
|
|
138
|
-
ingest_volumes,
|
|
139
|
-
snapshot_volumes_list=data,
|
|
140
|
-
AWS_ACCOUNT_ID=current_aws_account_id,
|
|
141
|
-
update_tag=update_tag,
|
|
106
|
+
AWS_ID=current_aws_account_id,
|
|
142
107
|
)
|
|
143
108
|
|
|
144
109
|
|
|
145
110
|
@timeit
|
|
146
111
|
def cleanup_snapshots(
|
|
147
112
|
neo4j_session: neo4j.Session,
|
|
148
|
-
common_job_parameters: Dict,
|
|
113
|
+
common_job_parameters: Dict[str, Any],
|
|
149
114
|
) -> None:
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
neo4j_session,
|
|
153
|
-
common_job_parameters,
|
|
115
|
+
GraphJob.from_node_schema(EBSSnapshotSchema(), common_job_parameters).run(
|
|
116
|
+
neo4j_session
|
|
154
117
|
)
|
|
155
118
|
|
|
156
119
|
|
|
@@ -161,7 +124,7 @@ def sync_ebs_snapshots(
|
|
|
161
124
|
regions: List[str],
|
|
162
125
|
current_aws_account_id: str,
|
|
163
126
|
update_tag: int,
|
|
164
|
-
common_job_parameters: Dict,
|
|
127
|
+
common_job_parameters: Dict[str, Any],
|
|
165
128
|
) -> None:
|
|
166
129
|
for region in regions:
|
|
167
130
|
logger.debug(
|
|
@@ -174,12 +137,12 @@ def sync_ebs_snapshots(
|
|
|
174
137
|
region,
|
|
175
138
|
current_aws_account_id,
|
|
176
139
|
)
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
load_snapshot_volume_relations(
|
|
140
|
+
raw_data = get_snapshots(boto3_session, region, snapshots_in_use)
|
|
141
|
+
transformed_data = transform_snapshots(raw_data)
|
|
142
|
+
load_snapshots(
|
|
181
143
|
neo4j_session,
|
|
182
|
-
|
|
144
|
+
transformed_data,
|
|
145
|
+
region,
|
|
183
146
|
current_aws_account_id,
|
|
184
147
|
update_tag,
|
|
185
148
|
)
|
|
@@ -53,7 +53,7 @@ def load_subnets(
|
|
|
53
53
|
snet.state = subnet.State, snet.assignipv6addressoncreation = subnet.AssignIpv6AddressOnCreation,
|
|
54
54
|
snet.map_public_ip_on_launch = subnet.MapPublicIpOnLaunch, snet.subnet_arn = subnet.SubnetArn,
|
|
55
55
|
snet.availability_zone = subnet.AvailabilityZone, snet.availability_zone_id = subnet.AvailabilityZoneId,
|
|
56
|
-
snet.
|
|
56
|
+
snet.subnet_id = subnet.SubnetId
|
|
57
57
|
"""
|
|
58
58
|
|
|
59
59
|
ingest_subnet_vpc_relations = """
|
cartography/intel/aws/ecs.py
CHANGED
|
@@ -169,6 +169,22 @@ def _get_containers_from_tasks(tasks: list[dict[str, Any]]) -> list[dict[str, An
|
|
|
169
169
|
return containers
|
|
170
170
|
|
|
171
171
|
|
|
172
|
+
def transform_ecs_tasks(tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
173
|
+
"""
|
|
174
|
+
Extract network interface ID from task attachments.
|
|
175
|
+
"""
|
|
176
|
+
for task in tasks:
|
|
177
|
+
for attachment in task.get("attachments", []):
|
|
178
|
+
if attachment.get("type") == "ElasticNetworkInterface":
|
|
179
|
+
details = attachment.get("details", [])
|
|
180
|
+
for detail in details:
|
|
181
|
+
if detail.get("name") == "networkInterfaceId":
|
|
182
|
+
task["networkInterfaceId"] = detail.get("value")
|
|
183
|
+
break
|
|
184
|
+
break
|
|
185
|
+
return tasks
|
|
186
|
+
|
|
187
|
+
|
|
172
188
|
@timeit
|
|
173
189
|
def load_ecs_clusters(
|
|
174
190
|
neo4j_session: neo4j.Session,
|
|
@@ -407,6 +423,7 @@ def _sync_ecs_task_and_container_defns(
|
|
|
407
423
|
boto3_session,
|
|
408
424
|
region,
|
|
409
425
|
)
|
|
426
|
+
tasks = transform_ecs_tasks(tasks)
|
|
410
427
|
containers = _get_containers_from_tasks(tasks)
|
|
411
428
|
load_ecs_tasks(
|
|
412
429
|
neo4j_session,
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Dict
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
import boto3
|
|
7
|
+
import boto3.session
|
|
8
|
+
import neo4j
|
|
9
|
+
|
|
10
|
+
from cartography.client.core.tx import load
|
|
11
|
+
from cartography.graph.job import GraphJob
|
|
12
|
+
from cartography.models.aws.guardduty.findings import GuardDutyFindingSchema
|
|
13
|
+
from cartography.stats import get_stats_client
|
|
14
|
+
from cartography.util import aws_handle_regions
|
|
15
|
+
from cartography.util import aws_paginate
|
|
16
|
+
from cartography.util import merge_module_sync_metadata
|
|
17
|
+
from cartography.util import timeit
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
stat_handler = get_stats_client(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _get_severity_range_for_threshold(
|
|
24
|
+
severity_threshold: str | None,
|
|
25
|
+
) -> List[str] | None:
|
|
26
|
+
"""
|
|
27
|
+
Convert severity threshold string to GuardDuty numeric severity range.
|
|
28
|
+
|
|
29
|
+
GuardDuty severity mappings:
|
|
30
|
+
- LOW: 1.0-3.9
|
|
31
|
+
- MEDIUM: 4.0-6.9
|
|
32
|
+
- HIGH: 7.0-8.9
|
|
33
|
+
- CRITICAL: 9.0-10.0
|
|
34
|
+
|
|
35
|
+
:param severity_threshold: Severity threshold (LOW, MEDIUM, HIGH, CRITICAL)
|
|
36
|
+
:return: List of numeric severity ranges to include, or None for no filtering
|
|
37
|
+
"""
|
|
38
|
+
if not severity_threshold:
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
threshold_upper = severity_threshold.upper().strip()
|
|
42
|
+
|
|
43
|
+
# Map threshold to numeric ranges - include threshold level and above
|
|
44
|
+
if threshold_upper == "LOW":
|
|
45
|
+
return ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] # All severities
|
|
46
|
+
elif threshold_upper == "MEDIUM":
|
|
47
|
+
return ["4", "5", "6", "7", "8", "9", "10"] # MEDIUM and above
|
|
48
|
+
elif threshold_upper == "HIGH":
|
|
49
|
+
return ["7", "8", "9", "10"] # HIGH and CRITICAL only
|
|
50
|
+
elif threshold_upper == "CRITICAL":
|
|
51
|
+
return ["9", "10"] # CRITICAL only
|
|
52
|
+
else:
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@aws_handle_regions
|
|
57
|
+
def get_detectors(
|
|
58
|
+
boto3_session: boto3.session.Session,
|
|
59
|
+
region: str,
|
|
60
|
+
) -> List[str]:
|
|
61
|
+
"""
|
|
62
|
+
Get GuardDuty detector IDs for all detectors in a region.
|
|
63
|
+
"""
|
|
64
|
+
client = boto3_session.client("guardduty", region_name=region)
|
|
65
|
+
|
|
66
|
+
# Get all detector IDs in this region
|
|
67
|
+
detectors_response = client.list_detectors()
|
|
68
|
+
detector_ids = detectors_response.get("DetectorIds", [])
|
|
69
|
+
|
|
70
|
+
if not detector_ids:
|
|
71
|
+
logger.info(f"No GuardDuty detectors found in region {region}")
|
|
72
|
+
return []
|
|
73
|
+
|
|
74
|
+
logger.info(f"Found {len(detector_ids)} GuardDuty detectors in region {region}")
|
|
75
|
+
return detector_ids
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@aws_handle_regions
|
|
79
|
+
@timeit
|
|
80
|
+
def get_findings(
|
|
81
|
+
boto3_session: boto3.session.Session,
|
|
82
|
+
region: str,
|
|
83
|
+
detector_id: str,
|
|
84
|
+
severity_threshold: str | None = None,
|
|
85
|
+
) -> List[Dict[str, Any]]:
|
|
86
|
+
"""
|
|
87
|
+
Get GuardDuty findings for a specific detector.
|
|
88
|
+
Only fetches unarchived findings to avoid including closed/resolved findings.
|
|
89
|
+
Optionally filters by severity threshold.
|
|
90
|
+
"""
|
|
91
|
+
client = boto3_session.client("guardduty", region_name=region)
|
|
92
|
+
|
|
93
|
+
# Build FindingCriteria - always exclude archived findings
|
|
94
|
+
criteria = {"service.archived": {"Equals": ["false"]}}
|
|
95
|
+
|
|
96
|
+
# Add severity filtering if threshold is provided
|
|
97
|
+
severity_range = _get_severity_range_for_threshold(severity_threshold)
|
|
98
|
+
if severity_range:
|
|
99
|
+
min_severity = min(
|
|
100
|
+
float(s) for s in severity_range
|
|
101
|
+
) # get min severity from range
|
|
102
|
+
# I chose to ignore the type error here because the AWS API has fields that require different types
|
|
103
|
+
criteria["severity"] = {"GreaterThanOrEqual": int(min_severity)} # type: ignore
|
|
104
|
+
|
|
105
|
+
# Get all finding IDs for this detector with filtering
|
|
106
|
+
finding_ids = list(
|
|
107
|
+
aws_paginate(
|
|
108
|
+
client,
|
|
109
|
+
"list_findings",
|
|
110
|
+
"FindingIds",
|
|
111
|
+
DetectorId=detector_id,
|
|
112
|
+
FindingCriteria={"Criterion": criteria},
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if not finding_ids:
|
|
117
|
+
logger.info(f"No findings found for detector {detector_id} in region {region}")
|
|
118
|
+
return []
|
|
119
|
+
|
|
120
|
+
findings_data = []
|
|
121
|
+
|
|
122
|
+
# Process findings in batches (GuardDuty API limit is 50)
|
|
123
|
+
batch_size = 50
|
|
124
|
+
for i in range(0, len(finding_ids), batch_size):
|
|
125
|
+
batch_ids = finding_ids[i : i + batch_size]
|
|
126
|
+
|
|
127
|
+
findings_response = client.get_findings(
|
|
128
|
+
DetectorId=detector_id, FindingIds=batch_ids
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
findings_batch = findings_response.get("Findings", [])
|
|
132
|
+
findings_data.extend(findings_batch)
|
|
133
|
+
|
|
134
|
+
logger.info(
|
|
135
|
+
f"Retrieved {len(findings_data)} findings for detector {detector_id} in region {region}"
|
|
136
|
+
)
|
|
137
|
+
return findings_data
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def transform_findings(findings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
141
|
+
"""Transform GuardDuty findings from API response to schema format."""
|
|
142
|
+
transformed: List[Dict[str, Any]] = []
|
|
143
|
+
for f in findings:
|
|
144
|
+
item: Dict[str, Any] = {
|
|
145
|
+
"id": f["Id"],
|
|
146
|
+
"arn": f.get("Arn"),
|
|
147
|
+
"type": f.get("Type"),
|
|
148
|
+
"severity": f.get("Severity"),
|
|
149
|
+
"title": f.get("Title"),
|
|
150
|
+
"description": f.get("Description"),
|
|
151
|
+
"confidence": f.get("Confidence"),
|
|
152
|
+
"eventfirstseen": f.get("EventFirstSeen"),
|
|
153
|
+
"eventlastseen": f.get("EventLastSeen"),
|
|
154
|
+
"accountid": f.get("AccountId"),
|
|
155
|
+
"region": f.get("Region"),
|
|
156
|
+
"detectorid": f.get("DetectorId"),
|
|
157
|
+
"archived": f.get("Archived"),
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
# Handle nested resource information
|
|
161
|
+
resource = f.get("Resource", {})
|
|
162
|
+
item["resource_type"] = resource.get("ResourceType")
|
|
163
|
+
|
|
164
|
+
# Extract resource ID based on resource type
|
|
165
|
+
if item["resource_type"] == "Instance":
|
|
166
|
+
details = resource.get("InstanceDetails", {})
|
|
167
|
+
item["resource_id"] = details.get("InstanceId")
|
|
168
|
+
elif item["resource_type"] == "S3Bucket":
|
|
169
|
+
buckets = resource.get("S3BucketDetails") or []
|
|
170
|
+
if buckets:
|
|
171
|
+
item["resource_id"] = buckets[0].get("Name")
|
|
172
|
+
else:
|
|
173
|
+
item["resource_id"] = None
|
|
174
|
+
|
|
175
|
+
transformed.append(item)
|
|
176
|
+
|
|
177
|
+
return transformed
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@timeit
|
|
181
|
+
def load_guardduty_findings(
|
|
182
|
+
neo4j_session: neo4j.Session,
|
|
183
|
+
data: List[Dict[str, Any]],
|
|
184
|
+
region: str,
|
|
185
|
+
aws_account_id: str,
|
|
186
|
+
update_tag: int,
|
|
187
|
+
) -> None:
|
|
188
|
+
"""
|
|
189
|
+
Load GuardDuty findings information into the graph.
|
|
190
|
+
"""
|
|
191
|
+
logger.info(
|
|
192
|
+
f"Loading {len(data)} GuardDuty findings for region {region} into graph."
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
load(
|
|
196
|
+
neo4j_session,
|
|
197
|
+
GuardDutyFindingSchema(),
|
|
198
|
+
data,
|
|
199
|
+
lastupdated=update_tag,
|
|
200
|
+
Region=region,
|
|
201
|
+
AWS_ID=aws_account_id,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
@timeit
|
|
206
|
+
def cleanup_guardduty(
|
|
207
|
+
neo4j_session: neo4j.Session, common_job_parameters: Dict
|
|
208
|
+
) -> None:
|
|
209
|
+
"""
|
|
210
|
+
Run GuardDuty cleanup job.
|
|
211
|
+
"""
|
|
212
|
+
logger.debug("Running GuardDuty cleanup job.")
|
|
213
|
+
cleanup_job = GraphJob.from_node_schema(
|
|
214
|
+
GuardDutyFindingSchema(), common_job_parameters
|
|
215
|
+
)
|
|
216
|
+
cleanup_job.run(neo4j_session)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@timeit
|
|
220
|
+
def sync(
|
|
221
|
+
neo4j_session: neo4j.Session,
|
|
222
|
+
boto3_session: boto3.session.Session,
|
|
223
|
+
regions: List[str],
|
|
224
|
+
current_aws_account_id: str,
|
|
225
|
+
update_tag: int,
|
|
226
|
+
common_job_parameters: Dict,
|
|
227
|
+
) -> None:
|
|
228
|
+
"""
|
|
229
|
+
Sync GuardDuty findings for all regions.
|
|
230
|
+
Severity threshold filter is obtained from common_job_parameters.
|
|
231
|
+
"""
|
|
232
|
+
# Get severity threshold from common job parameters
|
|
233
|
+
severity_threshold = common_job_parameters.get("aws_guardduty_severity_threshold")
|
|
234
|
+
for region in regions:
|
|
235
|
+
logger.info(
|
|
236
|
+
f"Syncing GuardDuty findings for {region} in account {current_aws_account_id}"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Get all detectors in the region
|
|
240
|
+
detector_ids = get_detectors(boto3_session, region)
|
|
241
|
+
|
|
242
|
+
if not detector_ids:
|
|
243
|
+
logger.info(f"No GuardDuty detectors found in region {region}, skipping.")
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
all_findings = []
|
|
247
|
+
|
|
248
|
+
# Get findings for each detector
|
|
249
|
+
for detector_id in detector_ids:
|
|
250
|
+
findings = get_findings(
|
|
251
|
+
boto3_session, region, detector_id, severity_threshold
|
|
252
|
+
)
|
|
253
|
+
all_findings.extend(findings)
|
|
254
|
+
|
|
255
|
+
transformed_findings = transform_findings(all_findings)
|
|
256
|
+
|
|
257
|
+
load_guardduty_findings(
|
|
258
|
+
neo4j_session,
|
|
259
|
+
transformed_findings,
|
|
260
|
+
region,
|
|
261
|
+
current_aws_account_id,
|
|
262
|
+
update_tag,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
# Cleanup and metadata update (outside region loop)
|
|
266
|
+
cleanup_guardduty(neo4j_session, common_job_parameters)
|
|
267
|
+
|
|
268
|
+
merge_module_sync_metadata(
|
|
269
|
+
neo4j_session,
|
|
270
|
+
group_type="AWSAccount",
|
|
271
|
+
group_id=current_aws_account_id,
|
|
272
|
+
synced_type="GuardDutyFinding",
|
|
273
|
+
update_tag=update_tag,
|
|
274
|
+
stat_handler=stat_handler,
|
|
275
|
+
)
|
|
@@ -18,6 +18,7 @@ from . import eks
|
|
|
18
18
|
from . import elasticache
|
|
19
19
|
from . import elasticsearch
|
|
20
20
|
from . import emr
|
|
21
|
+
from . import guardduty
|
|
21
22
|
from . import iam
|
|
22
23
|
from . import identitycenter
|
|
23
24
|
from . import inspector
|
|
@@ -111,5 +112,6 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
|
|
|
111
112
|
"cloudtrail_management_events": cloudtrail_management_events.sync,
|
|
112
113
|
"cloudwatch": cloudwatch.sync,
|
|
113
114
|
"efs": efs.sync,
|
|
115
|
+
"guardduty": guardduty.sync,
|
|
114
116
|
"codebuild": codebuild.sync,
|
|
115
117
|
}
|