cartography 0.107.0rc3__py3-none-any.whl → 0.108.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +10 -0
- cartography/config.py +5 -0
- cartography/data/indexes.cypher +0 -8
- cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
- cartography/intel/aws/__init__.py +1 -0
- cartography/intel/aws/cloudtrail.py +17 -4
- cartography/intel/aws/cloudtrail_management_events.py +560 -16
- cartography/intel/aws/cloudwatch.py +73 -4
- cartography/intel/aws/ec2/security_groups.py +140 -122
- cartography/intel/aws/ec2/snapshots.py +47 -84
- cartography/intel/aws/ec2/subnets.py +37 -63
- cartography/intel/aws/elasticache.py +102 -79
- cartography/intel/aws/guardduty.py +275 -0
- cartography/intel/aws/resources.py +2 -0
- cartography/intel/github/repos.py +370 -28
- cartography/models/aws/cloudtrail/management_events.py +95 -6
- cartography/models/aws/cloudtrail/trail.py +21 -0
- cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
- cartography/models/aws/ec2/security_group_rules.py +109 -0
- cartography/models/aws/ec2/security_groups.py +90 -0
- cartography/models/aws/ec2/snapshots.py +58 -0
- cartography/models/aws/ec2/subnets.py +65 -0
- cartography/models/aws/ec2/volumes.py +20 -0
- cartography/models/aws/elasticache/__init__.py +0 -0
- cartography/models/aws/elasticache/cluster.py +65 -0
- cartography/models/aws/elasticache/topic.py +67 -0
- cartography/models/aws/guardduty/__init__.py +1 -0
- cartography/models/aws/guardduty/findings.py +102 -0
- cartography/models/github/dependencies.py +74 -0
- cartography/models/github/manifests.py +49 -0
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/METADATA +3 -3
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/RECORD +37 -26
- cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
- cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/WHEEL +0 -0
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/entry_points.txt +0 -0
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
3
|
-
from typing import List
|
|
2
|
+
from typing import Any
|
|
4
3
|
|
|
5
4
|
import boto3
|
|
6
5
|
import neo4j
|
|
7
6
|
|
|
7
|
+
from cartography.client.core.tx import load
|
|
8
8
|
from cartography.graph.job import GraphJob
|
|
9
9
|
from cartography.models.aws.ec2.auto_scaling_groups import (
|
|
10
10
|
EC2SubnetAutoScalingGroupSchema,
|
|
11
11
|
)
|
|
12
12
|
from cartography.models.aws.ec2.subnet_instance import EC2SubnetInstanceSchema
|
|
13
|
+
from cartography.models.aws.ec2.subnets import EC2SubnetSchema
|
|
13
14
|
from cartography.util import aws_handle_regions
|
|
14
|
-
from cartography.util import run_cleanup_job
|
|
15
15
|
from cartography.util import timeit
|
|
16
16
|
|
|
17
17
|
from .util import get_botocore_config
|
|
@@ -21,86 +21,53 @@ logger = logging.getLogger(__name__)
|
|
|
21
21
|
|
|
22
22
|
@timeit
|
|
23
23
|
@aws_handle_regions
|
|
24
|
-
def get_subnet_data(
|
|
24
|
+
def get_subnet_data(
|
|
25
|
+
boto3_session: boto3.session.Session, region: str
|
|
26
|
+
) -> list[dict[str, Any]]:
|
|
25
27
|
client = boto3_session.client(
|
|
26
28
|
"ec2",
|
|
27
29
|
region_name=region,
|
|
28
30
|
config=get_botocore_config(),
|
|
29
31
|
)
|
|
30
32
|
paginator = client.get_paginator("describe_subnets")
|
|
31
|
-
subnets:
|
|
33
|
+
subnets: list[dict[str, Any]] = []
|
|
32
34
|
for page in paginator.paginate():
|
|
33
35
|
subnets.extend(page["Subnets"])
|
|
34
36
|
return subnets
|
|
35
37
|
|
|
36
38
|
|
|
39
|
+
def transform_subnet_data(subnets: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
40
|
+
"""Transform subnet data into a loadable format."""
|
|
41
|
+
transformed: list[dict[str, Any]] = []
|
|
42
|
+
for subnet in subnets:
|
|
43
|
+
transformed.append(subnet.copy())
|
|
44
|
+
return transformed
|
|
45
|
+
|
|
46
|
+
|
|
37
47
|
@timeit
|
|
38
48
|
def load_subnets(
|
|
39
49
|
neo4j_session: neo4j.Session,
|
|
40
|
-
data:
|
|
50
|
+
data: list[dict[str, Any]],
|
|
41
51
|
region: str,
|
|
42
52
|
aws_account_id: str,
|
|
43
53
|
aws_update_tag: int,
|
|
44
54
|
) -> None:
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
snet.map_customer_owned_ip_on_launch = subnet.MapCustomerOwnedIpOnLaunch,
|
|
53
|
-
snet.state = subnet.State, snet.assignipv6addressoncreation = subnet.AssignIpv6AddressOnCreation,
|
|
54
|
-
snet.map_public_ip_on_launch = subnet.MapPublicIpOnLaunch, snet.subnet_arn = subnet.SubnetArn,
|
|
55
|
-
snet.availability_zone = subnet.AvailabilityZone, snet.availability_zone_id = subnet.AvailabilityZoneId,
|
|
56
|
-
snet.subnet_id = subnet.SubnetId
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
ingest_subnet_vpc_relations = """
|
|
60
|
-
UNWIND $subnets as subnet
|
|
61
|
-
MATCH (snet:EC2Subnet{subnetid: subnet.SubnetId}), (vpc:AWSVpc{id: subnet.VpcId})
|
|
62
|
-
MERGE (snet)-[r:MEMBER_OF_AWS_VPC]->(vpc)
|
|
63
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
64
|
-
SET r.lastupdated = $aws_update_tag
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
ingest_subnet_aws_account_relations = """
|
|
68
|
-
UNWIND $subnets as subnet
|
|
69
|
-
MATCH (snet:EC2Subnet{subnetid: subnet.SubnetId}), (aws:AWSAccount{id: $aws_account_id})
|
|
70
|
-
MERGE (aws)-[r:RESOURCE]->(snet)
|
|
71
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
72
|
-
SET r.lastupdated = $aws_update_tag
|
|
73
|
-
"""
|
|
74
|
-
|
|
75
|
-
neo4j_session.run(
|
|
76
|
-
ingest_subnets,
|
|
77
|
-
subnets=data,
|
|
78
|
-
aws_update_tag=aws_update_tag,
|
|
79
|
-
region=region,
|
|
80
|
-
aws_account_id=aws_account_id,
|
|
81
|
-
)
|
|
82
|
-
neo4j_session.run(
|
|
83
|
-
ingest_subnet_vpc_relations,
|
|
84
|
-
subnets=data,
|
|
85
|
-
aws_update_tag=aws_update_tag,
|
|
86
|
-
region=region,
|
|
87
|
-
aws_account_id=aws_account_id,
|
|
88
|
-
)
|
|
89
|
-
neo4j_session.run(
|
|
90
|
-
ingest_subnet_aws_account_relations,
|
|
91
|
-
subnets=data,
|
|
92
|
-
aws_update_tag=aws_update_tag,
|
|
93
|
-
region=region,
|
|
94
|
-
aws_account_id=aws_account_id,
|
|
55
|
+
load(
|
|
56
|
+
neo4j_session,
|
|
57
|
+
EC2SubnetSchema(),
|
|
58
|
+
data,
|
|
59
|
+
Region=region,
|
|
60
|
+
AWS_ID=aws_account_id,
|
|
61
|
+
lastupdated=aws_update_tag,
|
|
95
62
|
)
|
|
96
63
|
|
|
97
64
|
|
|
98
65
|
@timeit
|
|
99
|
-
def cleanup_subnets(
|
|
100
|
-
|
|
101
|
-
|
|
66
|
+
def cleanup_subnets(
|
|
67
|
+
neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
|
|
68
|
+
) -> None:
|
|
69
|
+
GraphJob.from_node_schema(EC2SubnetSchema(), common_job_parameters).run(
|
|
102
70
|
neo4j_session,
|
|
103
|
-
common_job_parameters,
|
|
104
71
|
)
|
|
105
72
|
GraphJob.from_node_schema(EC2SubnetInstanceSchema(), common_job_parameters).run(
|
|
106
73
|
neo4j_session,
|
|
@@ -115,10 +82,10 @@ def cleanup_subnets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -
|
|
|
115
82
|
def sync_subnets(
|
|
116
83
|
neo4j_session: neo4j.Session,
|
|
117
84
|
boto3_session: boto3.session.Session,
|
|
118
|
-
regions:
|
|
85
|
+
regions: list[str],
|
|
119
86
|
current_aws_account_id: str,
|
|
120
87
|
update_tag: int,
|
|
121
|
-
common_job_parameters:
|
|
88
|
+
common_job_parameters: dict[str, Any],
|
|
122
89
|
) -> None:
|
|
123
90
|
for region in regions:
|
|
124
91
|
logger.info(
|
|
@@ -127,5 +94,12 @@ def sync_subnets(
|
|
|
127
94
|
current_aws_account_id,
|
|
128
95
|
)
|
|
129
96
|
data = get_subnet_data(boto3_session, region)
|
|
130
|
-
|
|
97
|
+
transformed = transform_subnet_data(data)
|
|
98
|
+
load_subnets(
|
|
99
|
+
neo4j_session,
|
|
100
|
+
transformed,
|
|
101
|
+
region,
|
|
102
|
+
current_aws_account_id,
|
|
103
|
+
update_tag,
|
|
104
|
+
)
|
|
131
105
|
cleanup_subnets(neo4j_session, common_job_parameters)
|
|
@@ -1,118 +1,132 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
3
|
-
from typing import List
|
|
4
|
-
from typing import Set
|
|
2
|
+
from typing import Any
|
|
5
3
|
|
|
6
4
|
import boto3
|
|
7
5
|
import neo4j
|
|
8
6
|
|
|
7
|
+
from cartography.client.core.tx import load
|
|
8
|
+
from cartography.graph.job import GraphJob
|
|
9
|
+
from cartography.models.aws.elasticache.cluster import ElasticacheClusterSchema
|
|
10
|
+
from cartography.models.aws.elasticache.topic import ElasticacheTopicSchema
|
|
9
11
|
from cartography.stats import get_stats_client
|
|
10
12
|
from cartography.util import aws_handle_regions
|
|
11
13
|
from cartography.util import merge_module_sync_metadata
|
|
12
|
-
from cartography.util import run_cleanup_job
|
|
13
14
|
from cartography.util import timeit
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
16
17
|
stat_handler = get_stats_client(__name__)
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
def _get_topic(cluster: Dict) -> Dict:
|
|
20
|
-
return cluster["NotificationConfiguration"]
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def transform_elasticache_topics(cluster_data: List[Dict]) -> List[Dict]:
|
|
24
|
-
"""
|
|
25
|
-
Collect unique TopicArns from the cluster data
|
|
26
|
-
"""
|
|
27
|
-
seen: Set[str] = set()
|
|
28
|
-
topics: List[Dict] = []
|
|
29
|
-
for cluster in cluster_data:
|
|
30
|
-
topic = _get_topic(cluster)
|
|
31
|
-
topic_arn = topic["TopicArn"]
|
|
32
|
-
if topic_arn not in seen:
|
|
33
|
-
seen.add(topic_arn)
|
|
34
|
-
topics.append(topic)
|
|
35
|
-
return topics
|
|
36
|
-
|
|
37
|
-
|
|
38
20
|
@timeit
|
|
39
21
|
@aws_handle_regions
|
|
40
22
|
def get_elasticache_clusters(
|
|
41
23
|
boto3_session: boto3.session.Session,
|
|
42
24
|
region: str,
|
|
43
|
-
) ->
|
|
44
|
-
logger.debug(f"Getting ElastiCache Clusters in region '{region}'.")
|
|
25
|
+
) -> list[dict[str, Any]]:
|
|
45
26
|
client = boto3_session.client("elasticache", region_name=region)
|
|
46
27
|
paginator = client.get_paginator("describe_cache_clusters")
|
|
47
|
-
clusters:
|
|
28
|
+
clusters: list[dict[str, Any]] = []
|
|
48
29
|
for page in paginator.paginate():
|
|
49
|
-
clusters.extend(page
|
|
30
|
+
clusters.extend(page.get("CacheClusters", []))
|
|
50
31
|
return clusters
|
|
51
32
|
|
|
52
33
|
|
|
34
|
+
def transform_elasticache_clusters(
|
|
35
|
+
clusters: list[dict[str, Any]], region: str
|
|
36
|
+
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
37
|
+
cluster_data: list[dict[str, Any]] = []
|
|
38
|
+
topics: dict[str, dict[str, Any]] = {}
|
|
39
|
+
|
|
40
|
+
for cluster in clusters:
|
|
41
|
+
notification = cluster.get("NotificationConfiguration", {})
|
|
42
|
+
topic_arn = notification.get("TopicArn")
|
|
43
|
+
cluster_record = {
|
|
44
|
+
"ARN": cluster["ARN"],
|
|
45
|
+
"CacheClusterId": cluster["CacheClusterId"],
|
|
46
|
+
"CacheNodeType": cluster.get("CacheNodeType"),
|
|
47
|
+
"Engine": cluster.get("Engine"),
|
|
48
|
+
"EngineVersion": cluster.get("EngineVersion"),
|
|
49
|
+
"CacheClusterStatus": cluster.get("CacheClusterStatus"),
|
|
50
|
+
"NumCacheNodes": cluster.get("NumCacheNodes"),
|
|
51
|
+
"PreferredAvailabilityZone": cluster.get("PreferredAvailabilityZone"),
|
|
52
|
+
"PreferredMaintenanceWindow": cluster.get("PreferredMaintenanceWindow"),
|
|
53
|
+
"CacheClusterCreateTime": cluster.get("CacheClusterCreateTime"),
|
|
54
|
+
"CacheSubnetGroupName": cluster.get("CacheSubnetGroupName"),
|
|
55
|
+
"AutoMinorVersionUpgrade": cluster.get("AutoMinorVersionUpgrade"),
|
|
56
|
+
"ReplicationGroupId": cluster.get("ReplicationGroupId"),
|
|
57
|
+
"SnapshotRetentionLimit": cluster.get("SnapshotRetentionLimit"),
|
|
58
|
+
"SnapshotWindow": cluster.get("SnapshotWindow"),
|
|
59
|
+
"AuthTokenEnabled": cluster.get("AuthTokenEnabled"),
|
|
60
|
+
"TransitEncryptionEnabled": cluster.get("TransitEncryptionEnabled"),
|
|
61
|
+
"AtRestEncryptionEnabled": cluster.get("AtRestEncryptionEnabled"),
|
|
62
|
+
"TopicArn": topic_arn,
|
|
63
|
+
"Region": region,
|
|
64
|
+
}
|
|
65
|
+
cluster_data.append(cluster_record)
|
|
66
|
+
|
|
67
|
+
if topic_arn:
|
|
68
|
+
topics.setdefault(
|
|
69
|
+
topic_arn,
|
|
70
|
+
{
|
|
71
|
+
"TopicArn": topic_arn,
|
|
72
|
+
"TopicStatus": notification.get("TopicStatus"),
|
|
73
|
+
"cluster_arns": [],
|
|
74
|
+
},
|
|
75
|
+
)["cluster_arns"].append(cluster["ARN"])
|
|
76
|
+
|
|
77
|
+
return cluster_data, list(topics.values())
|
|
78
|
+
|
|
79
|
+
|
|
53
80
|
@timeit
|
|
54
81
|
def load_elasticache_clusters(
|
|
55
82
|
neo4j_session: neo4j.Session,
|
|
56
|
-
clusters:
|
|
83
|
+
clusters: list[dict[str, Any]],
|
|
57
84
|
region: str,
|
|
58
85
|
aws_account_id: str,
|
|
59
86
|
update_tag: int,
|
|
60
87
|
) -> None:
|
|
61
|
-
query = """
|
|
62
|
-
UNWIND $clusters as elasticache_cluster
|
|
63
|
-
MERGE (cluster:ElasticacheCluster{id:elasticache_cluster.ARN})
|
|
64
|
-
ON CREATE SET cluster.firstseen = timestamp(),
|
|
65
|
-
cluster.arn = elasticache_cluster.ARN,
|
|
66
|
-
cluster.topic_arn = elasticache_cluster.NotificationConfiguration.TopicArn,
|
|
67
|
-
cluster.id = elasticache_cluster.CacheClusterId,
|
|
68
|
-
cluster.region = $region
|
|
69
|
-
SET cluster.lastupdated = $aws_update_tag
|
|
70
|
-
|
|
71
|
-
WITH cluster, elasticache_cluster
|
|
72
|
-
MATCH (owner:AWSAccount{id: $aws_account_id})
|
|
73
|
-
MERGE (owner)-[r3:RESOURCE]->(cluster)
|
|
74
|
-
ON CREATE SET r3.firstseen = timestamp()
|
|
75
|
-
SET r3.lastupdated = $aws_update_tag
|
|
76
|
-
|
|
77
|
-
WITH elasticache_cluster, owner
|
|
78
|
-
WHERE NOT elasticache_cluster.NotificationConfiguration IS NULL
|
|
79
|
-
MERGE (topic:ElasticacheTopic{id: elasticache_cluster.NotificationConfiguration.TopicArn})
|
|
80
|
-
ON CREATE SET topic.firstseen = timestamp(),
|
|
81
|
-
topic.arn = elasticache_cluster.NotificationConfiguration.TopicArn
|
|
82
|
-
SET topic.lastupdated = $aws_update_tag,
|
|
83
|
-
topic.status = elasticache_cluster.NotificationConfiguration.Status
|
|
84
|
-
|
|
85
|
-
MERGE (topic)-[r:CACHE_CLUSTER]->(cluster)
|
|
86
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
87
|
-
SET r.lastupdated = $aws_update_tag
|
|
88
|
-
WITH cluster, topic
|
|
89
|
-
|
|
90
|
-
MERGE (owner)-[r2:RESOURCE]->(topic)
|
|
91
|
-
ON CREATE SET r2.firstseen = timestamp()
|
|
92
|
-
SET r2.lastupdated = $aws_update_tag
|
|
93
|
-
"""
|
|
94
88
|
logger.info(
|
|
95
|
-
f"Loading
|
|
89
|
+
f"Loading {len(clusters)} ElastiCache clusters for region '{region}' into graph."
|
|
96
90
|
)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
91
|
+
load(
|
|
92
|
+
neo4j_session,
|
|
93
|
+
ElasticacheClusterSchema(),
|
|
94
|
+
clusters,
|
|
95
|
+
lastupdated=update_tag,
|
|
96
|
+
Region=region,
|
|
97
|
+
AWS_ID=aws_account_id,
|
|
103
98
|
)
|
|
104
99
|
|
|
105
100
|
|
|
106
101
|
@timeit
|
|
107
|
-
def
|
|
102
|
+
def load_elasticache_topics(
|
|
108
103
|
neo4j_session: neo4j.Session,
|
|
109
|
-
|
|
104
|
+
topics: list[dict[str, Any]],
|
|
105
|
+
aws_account_id: str,
|
|
110
106
|
update_tag: int,
|
|
111
107
|
) -> None:
|
|
112
|
-
|
|
113
|
-
|
|
108
|
+
if not topics:
|
|
109
|
+
return
|
|
110
|
+
logger.info(f"Loading {len(topics)} ElastiCache topics into graph.")
|
|
111
|
+
load(
|
|
114
112
|
neo4j_session,
|
|
115
|
-
|
|
113
|
+
ElasticacheTopicSchema(),
|
|
114
|
+
topics,
|
|
115
|
+
lastupdated=update_tag,
|
|
116
|
+
AWS_ID=aws_account_id,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@timeit
|
|
121
|
+
def cleanup(
|
|
122
|
+
neo4j_session: neo4j.Session,
|
|
123
|
+
common_job_parameters: dict[str, Any],
|
|
124
|
+
) -> None:
|
|
125
|
+
GraphJob.from_node_schema(ElasticacheClusterSchema(), common_job_parameters).run(
|
|
126
|
+
neo4j_session
|
|
127
|
+
)
|
|
128
|
+
GraphJob.from_node_schema(ElasticacheTopicSchema(), common_job_parameters).run(
|
|
129
|
+
neo4j_session
|
|
116
130
|
)
|
|
117
131
|
|
|
118
132
|
|
|
@@ -120,24 +134,33 @@ def cleanup(
|
|
|
120
134
|
def sync(
|
|
121
135
|
neo4j_session: neo4j.Session,
|
|
122
136
|
boto3_session: boto3.session.Session,
|
|
123
|
-
regions:
|
|
137
|
+
regions: list[str],
|
|
124
138
|
current_aws_account_id: str,
|
|
125
139
|
update_tag: int,
|
|
126
|
-
common_job_parameters:
|
|
140
|
+
common_job_parameters: dict[str, Any],
|
|
127
141
|
) -> None:
|
|
128
142
|
for region in regions:
|
|
129
143
|
logger.info(
|
|
130
|
-
|
|
144
|
+
"Syncing ElastiCache clusters for region '%s' in account '%s'.",
|
|
145
|
+
region,
|
|
146
|
+
current_aws_account_id,
|
|
131
147
|
)
|
|
132
|
-
|
|
148
|
+
raw_clusters = get_elasticache_clusters(boto3_session, region)
|
|
149
|
+
cluster_data, topic_data = transform_elasticache_clusters(raw_clusters, region)
|
|
133
150
|
load_elasticache_clusters(
|
|
134
151
|
neo4j_session,
|
|
135
|
-
|
|
152
|
+
cluster_data,
|
|
136
153
|
region,
|
|
137
154
|
current_aws_account_id,
|
|
138
155
|
update_tag,
|
|
139
156
|
)
|
|
140
|
-
|
|
157
|
+
load_elasticache_topics(
|
|
158
|
+
neo4j_session,
|
|
159
|
+
topic_data,
|
|
160
|
+
current_aws_account_id,
|
|
161
|
+
update_tag,
|
|
162
|
+
)
|
|
163
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
141
164
|
merge_module_sync_metadata(
|
|
142
165
|
neo4j_session,
|
|
143
166
|
group_type="AWSAccount",
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Dict
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
import boto3
|
|
7
|
+
import boto3.session
|
|
8
|
+
import neo4j
|
|
9
|
+
|
|
10
|
+
from cartography.client.core.tx import load
|
|
11
|
+
from cartography.graph.job import GraphJob
|
|
12
|
+
from cartography.models.aws.guardduty.findings import GuardDutyFindingSchema
|
|
13
|
+
from cartography.stats import get_stats_client
|
|
14
|
+
from cartography.util import aws_handle_regions
|
|
15
|
+
from cartography.util import aws_paginate
|
|
16
|
+
from cartography.util import merge_module_sync_metadata
|
|
17
|
+
from cartography.util import timeit
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
stat_handler = get_stats_client(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _get_severity_range_for_threshold(
|
|
24
|
+
severity_threshold: str | None,
|
|
25
|
+
) -> List[str] | None:
|
|
26
|
+
"""
|
|
27
|
+
Convert severity threshold string to GuardDuty numeric severity range.
|
|
28
|
+
|
|
29
|
+
GuardDuty severity mappings:
|
|
30
|
+
- LOW: 1.0-3.9
|
|
31
|
+
- MEDIUM: 4.0-6.9
|
|
32
|
+
- HIGH: 7.0-8.9
|
|
33
|
+
- CRITICAL: 9.0-10.0
|
|
34
|
+
|
|
35
|
+
:param severity_threshold: Severity threshold (LOW, MEDIUM, HIGH, CRITICAL)
|
|
36
|
+
:return: List of numeric severity ranges to include, or None for no filtering
|
|
37
|
+
"""
|
|
38
|
+
if not severity_threshold:
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
threshold_upper = severity_threshold.upper().strip()
|
|
42
|
+
|
|
43
|
+
# Map threshold to numeric ranges - include threshold level and above
|
|
44
|
+
if threshold_upper == "LOW":
|
|
45
|
+
return ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] # All severities
|
|
46
|
+
elif threshold_upper == "MEDIUM":
|
|
47
|
+
return ["4", "5", "6", "7", "8", "9", "10"] # MEDIUM and above
|
|
48
|
+
elif threshold_upper == "HIGH":
|
|
49
|
+
return ["7", "8", "9", "10"] # HIGH and CRITICAL only
|
|
50
|
+
elif threshold_upper == "CRITICAL":
|
|
51
|
+
return ["9", "10"] # CRITICAL only
|
|
52
|
+
else:
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@aws_handle_regions
|
|
57
|
+
def get_detectors(
|
|
58
|
+
boto3_session: boto3.session.Session,
|
|
59
|
+
region: str,
|
|
60
|
+
) -> List[str]:
|
|
61
|
+
"""
|
|
62
|
+
Get GuardDuty detector IDs for all detectors in a region.
|
|
63
|
+
"""
|
|
64
|
+
client = boto3_session.client("guardduty", region_name=region)
|
|
65
|
+
|
|
66
|
+
# Get all detector IDs in this region
|
|
67
|
+
detectors_response = client.list_detectors()
|
|
68
|
+
detector_ids = detectors_response.get("DetectorIds", [])
|
|
69
|
+
|
|
70
|
+
if not detector_ids:
|
|
71
|
+
logger.info(f"No GuardDuty detectors found in region {region}")
|
|
72
|
+
return []
|
|
73
|
+
|
|
74
|
+
logger.info(f"Found {len(detector_ids)} GuardDuty detectors in region {region}")
|
|
75
|
+
return detector_ids
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@aws_handle_regions
|
|
79
|
+
@timeit
|
|
80
|
+
def get_findings(
|
|
81
|
+
boto3_session: boto3.session.Session,
|
|
82
|
+
region: str,
|
|
83
|
+
detector_id: str,
|
|
84
|
+
severity_threshold: str | None = None,
|
|
85
|
+
) -> List[Dict[str, Any]]:
|
|
86
|
+
"""
|
|
87
|
+
Get GuardDuty findings for a specific detector.
|
|
88
|
+
Only fetches unarchived findings to avoid including closed/resolved findings.
|
|
89
|
+
Optionally filters by severity threshold.
|
|
90
|
+
"""
|
|
91
|
+
client = boto3_session.client("guardduty", region_name=region)
|
|
92
|
+
|
|
93
|
+
# Build FindingCriteria - always exclude archived findings
|
|
94
|
+
criteria = {"service.archived": {"Equals": ["false"]}}
|
|
95
|
+
|
|
96
|
+
# Add severity filtering if threshold is provided
|
|
97
|
+
severity_range = _get_severity_range_for_threshold(severity_threshold)
|
|
98
|
+
if severity_range:
|
|
99
|
+
min_severity = min(
|
|
100
|
+
float(s) for s in severity_range
|
|
101
|
+
) # get min severity from range
|
|
102
|
+
# I chose to ignore the type error here because the AWS API has fields that require different types
|
|
103
|
+
criteria["severity"] = {"GreaterThanOrEqual": int(min_severity)} # type: ignore
|
|
104
|
+
|
|
105
|
+
# Get all finding IDs for this detector with filtering
|
|
106
|
+
finding_ids = list(
|
|
107
|
+
aws_paginate(
|
|
108
|
+
client,
|
|
109
|
+
"list_findings",
|
|
110
|
+
"FindingIds",
|
|
111
|
+
DetectorId=detector_id,
|
|
112
|
+
FindingCriteria={"Criterion": criteria},
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if not finding_ids:
|
|
117
|
+
logger.info(f"No findings found for detector {detector_id} in region {region}")
|
|
118
|
+
return []
|
|
119
|
+
|
|
120
|
+
findings_data = []
|
|
121
|
+
|
|
122
|
+
# Process findings in batches (GuardDuty API limit is 50)
|
|
123
|
+
batch_size = 50
|
|
124
|
+
for i in range(0, len(finding_ids), batch_size):
|
|
125
|
+
batch_ids = finding_ids[i : i + batch_size]
|
|
126
|
+
|
|
127
|
+
findings_response = client.get_findings(
|
|
128
|
+
DetectorId=detector_id, FindingIds=batch_ids
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
findings_batch = findings_response.get("Findings", [])
|
|
132
|
+
findings_data.extend(findings_batch)
|
|
133
|
+
|
|
134
|
+
logger.info(
|
|
135
|
+
f"Retrieved {len(findings_data)} findings for detector {detector_id} in region {region}"
|
|
136
|
+
)
|
|
137
|
+
return findings_data
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def transform_findings(findings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
141
|
+
"""Transform GuardDuty findings from API response to schema format."""
|
|
142
|
+
transformed: List[Dict[str, Any]] = []
|
|
143
|
+
for f in findings:
|
|
144
|
+
item: Dict[str, Any] = {
|
|
145
|
+
"id": f["Id"],
|
|
146
|
+
"arn": f.get("Arn"),
|
|
147
|
+
"type": f.get("Type"),
|
|
148
|
+
"severity": f.get("Severity"),
|
|
149
|
+
"title": f.get("Title"),
|
|
150
|
+
"description": f.get("Description"),
|
|
151
|
+
"confidence": f.get("Confidence"),
|
|
152
|
+
"eventfirstseen": f.get("EventFirstSeen"),
|
|
153
|
+
"eventlastseen": f.get("EventLastSeen"),
|
|
154
|
+
"accountid": f.get("AccountId"),
|
|
155
|
+
"region": f.get("Region"),
|
|
156
|
+
"detectorid": f.get("DetectorId"),
|
|
157
|
+
"archived": f.get("Archived"),
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
# Handle nested resource information
|
|
161
|
+
resource = f.get("Resource", {})
|
|
162
|
+
item["resource_type"] = resource.get("ResourceType")
|
|
163
|
+
|
|
164
|
+
# Extract resource ID based on resource type
|
|
165
|
+
if item["resource_type"] == "Instance":
|
|
166
|
+
details = resource.get("InstanceDetails", {})
|
|
167
|
+
item["resource_id"] = details.get("InstanceId")
|
|
168
|
+
elif item["resource_type"] == "S3Bucket":
|
|
169
|
+
buckets = resource.get("S3BucketDetails") or []
|
|
170
|
+
if buckets:
|
|
171
|
+
item["resource_id"] = buckets[0].get("Name")
|
|
172
|
+
else:
|
|
173
|
+
item["resource_id"] = None
|
|
174
|
+
|
|
175
|
+
transformed.append(item)
|
|
176
|
+
|
|
177
|
+
return transformed
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@timeit
|
|
181
|
+
def load_guardduty_findings(
|
|
182
|
+
neo4j_session: neo4j.Session,
|
|
183
|
+
data: List[Dict[str, Any]],
|
|
184
|
+
region: str,
|
|
185
|
+
aws_account_id: str,
|
|
186
|
+
update_tag: int,
|
|
187
|
+
) -> None:
|
|
188
|
+
"""
|
|
189
|
+
Load GuardDuty findings information into the graph.
|
|
190
|
+
"""
|
|
191
|
+
logger.info(
|
|
192
|
+
f"Loading {len(data)} GuardDuty findings for region {region} into graph."
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
load(
|
|
196
|
+
neo4j_session,
|
|
197
|
+
GuardDutyFindingSchema(),
|
|
198
|
+
data,
|
|
199
|
+
lastupdated=update_tag,
|
|
200
|
+
Region=region,
|
|
201
|
+
AWS_ID=aws_account_id,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
@timeit
|
|
206
|
+
def cleanup_guardduty(
|
|
207
|
+
neo4j_session: neo4j.Session, common_job_parameters: Dict
|
|
208
|
+
) -> None:
|
|
209
|
+
"""
|
|
210
|
+
Run GuardDuty cleanup job.
|
|
211
|
+
"""
|
|
212
|
+
logger.debug("Running GuardDuty cleanup job.")
|
|
213
|
+
cleanup_job = GraphJob.from_node_schema(
|
|
214
|
+
GuardDutyFindingSchema(), common_job_parameters
|
|
215
|
+
)
|
|
216
|
+
cleanup_job.run(neo4j_session)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@timeit
|
|
220
|
+
def sync(
|
|
221
|
+
neo4j_session: neo4j.Session,
|
|
222
|
+
boto3_session: boto3.session.Session,
|
|
223
|
+
regions: List[str],
|
|
224
|
+
current_aws_account_id: str,
|
|
225
|
+
update_tag: int,
|
|
226
|
+
common_job_parameters: Dict,
|
|
227
|
+
) -> None:
|
|
228
|
+
"""
|
|
229
|
+
Sync GuardDuty findings for all regions.
|
|
230
|
+
Severity threshold filter is obtained from common_job_parameters.
|
|
231
|
+
"""
|
|
232
|
+
# Get severity threshold from common job parameters
|
|
233
|
+
severity_threshold = common_job_parameters.get("aws_guardduty_severity_threshold")
|
|
234
|
+
for region in regions:
|
|
235
|
+
logger.info(
|
|
236
|
+
f"Syncing GuardDuty findings for {region} in account {current_aws_account_id}"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Get all detectors in the region
|
|
240
|
+
detector_ids = get_detectors(boto3_session, region)
|
|
241
|
+
|
|
242
|
+
if not detector_ids:
|
|
243
|
+
logger.info(f"No GuardDuty detectors found in region {region}, skipping.")
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
all_findings = []
|
|
247
|
+
|
|
248
|
+
# Get findings for each detector
|
|
249
|
+
for detector_id in detector_ids:
|
|
250
|
+
findings = get_findings(
|
|
251
|
+
boto3_session, region, detector_id, severity_threshold
|
|
252
|
+
)
|
|
253
|
+
all_findings.extend(findings)
|
|
254
|
+
|
|
255
|
+
transformed_findings = transform_findings(all_findings)
|
|
256
|
+
|
|
257
|
+
load_guardduty_findings(
|
|
258
|
+
neo4j_session,
|
|
259
|
+
transformed_findings,
|
|
260
|
+
region,
|
|
261
|
+
current_aws_account_id,
|
|
262
|
+
update_tag,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
# Cleanup and metadata update (outside region loop)
|
|
266
|
+
cleanup_guardduty(neo4j_session, common_job_parameters)
|
|
267
|
+
|
|
268
|
+
merge_module_sync_metadata(
|
|
269
|
+
neo4j_session,
|
|
270
|
+
group_type="AWSAccount",
|
|
271
|
+
group_id=current_aws_account_id,
|
|
272
|
+
synced_type="GuardDutyFinding",
|
|
273
|
+
update_tag=update_tag,
|
|
274
|
+
stat_handler=stat_handler,
|
|
275
|
+
)
|
|
@@ -18,6 +18,7 @@ from . import eks
|
|
|
18
18
|
from . import elasticache
|
|
19
19
|
from . import elasticsearch
|
|
20
20
|
from . import emr
|
|
21
|
+
from . import guardduty
|
|
21
22
|
from . import iam
|
|
22
23
|
from . import identitycenter
|
|
23
24
|
from . import inspector
|
|
@@ -111,5 +112,6 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
|
|
|
111
112
|
"cloudtrail_management_events": cloudtrail_management_events.sync,
|
|
112
113
|
"cloudwatch": cloudwatch.sync,
|
|
113
114
|
"efs": efs.sync,
|
|
115
|
+
"guardduty": guardduty.sync,
|
|
114
116
|
"codebuild": codebuild.sync,
|
|
115
117
|
}
|