cartography 0.108.0rc1__py3-none-any.whl → 0.109.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/data/indexes.cypher +0 -2
- cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
- cartography/intel/aws/cloudtrail.py +17 -4
- cartography/intel/aws/cloudtrail_management_events.py +593 -16
- cartography/intel/aws/cloudwatch.py +73 -4
- cartography/intel/aws/ec2/subnets.py +37 -63
- cartography/intel/aws/ecr.py +55 -80
- cartography/intel/aws/elasticache.py +102 -79
- cartography/intel/aws/resourcegroupstaggingapi.py +77 -18
- cartography/intel/aws/secretsmanager.py +62 -44
- cartography/intel/entra/groups.py +29 -1
- cartography/intel/gcp/__init__.py +10 -0
- cartography/intel/gcp/compute.py +19 -42
- cartography/models/aws/cloudtrail/management_events.py +95 -6
- cartography/models/aws/cloudtrail/trail.py +21 -0
- cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
- cartography/models/aws/ec2/subnets.py +65 -0
- cartography/models/aws/ecr/__init__.py +0 -0
- cartography/models/aws/ecr/image.py +41 -0
- cartography/models/aws/ecr/repository.py +72 -0
- cartography/models/aws/ecr/repository_image.py +95 -0
- cartography/models/aws/elasticache/__init__.py +0 -0
- cartography/models/aws/elasticache/cluster.py +65 -0
- cartography/models/aws/elasticache/topic.py +67 -0
- cartography/models/aws/secretsmanager/secret.py +106 -0
- cartography/models/entra/group.py +26 -0
- cartography/models/entra/user.py +6 -0
- cartography/models/gcp/compute/__init__.py +0 -0
- cartography/models/gcp/compute/vpc.py +50 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/METADATA +1 -1
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/RECORD +36 -25
- cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/WHEEL +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/entry_points.txt +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,118 +1,132 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
3
|
-
from typing import List
|
|
4
|
-
from typing import Set
|
|
2
|
+
from typing import Any
|
|
5
3
|
|
|
6
4
|
import boto3
|
|
7
5
|
import neo4j
|
|
8
6
|
|
|
7
|
+
from cartography.client.core.tx import load
|
|
8
|
+
from cartography.graph.job import GraphJob
|
|
9
|
+
from cartography.models.aws.elasticache.cluster import ElasticacheClusterSchema
|
|
10
|
+
from cartography.models.aws.elasticache.topic import ElasticacheTopicSchema
|
|
9
11
|
from cartography.stats import get_stats_client
|
|
10
12
|
from cartography.util import aws_handle_regions
|
|
11
13
|
from cartography.util import merge_module_sync_metadata
|
|
12
|
-
from cartography.util import run_cleanup_job
|
|
13
14
|
from cartography.util import timeit
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
16
17
|
stat_handler = get_stats_client(__name__)
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
def _get_topic(cluster: Dict) -> Dict:
|
|
20
|
-
return cluster["NotificationConfiguration"]
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def transform_elasticache_topics(cluster_data: List[Dict]) -> List[Dict]:
|
|
24
|
-
"""
|
|
25
|
-
Collect unique TopicArns from the cluster data
|
|
26
|
-
"""
|
|
27
|
-
seen: Set[str] = set()
|
|
28
|
-
topics: List[Dict] = []
|
|
29
|
-
for cluster in cluster_data:
|
|
30
|
-
topic = _get_topic(cluster)
|
|
31
|
-
topic_arn = topic["TopicArn"]
|
|
32
|
-
if topic_arn not in seen:
|
|
33
|
-
seen.add(topic_arn)
|
|
34
|
-
topics.append(topic)
|
|
35
|
-
return topics
|
|
36
|
-
|
|
37
|
-
|
|
38
20
|
@timeit
|
|
39
21
|
@aws_handle_regions
|
|
40
22
|
def get_elasticache_clusters(
|
|
41
23
|
boto3_session: boto3.session.Session,
|
|
42
24
|
region: str,
|
|
43
|
-
) ->
|
|
44
|
-
logger.debug(f"Getting ElastiCache Clusters in region '{region}'.")
|
|
25
|
+
) -> list[dict[str, Any]]:
|
|
45
26
|
client = boto3_session.client("elasticache", region_name=region)
|
|
46
27
|
paginator = client.get_paginator("describe_cache_clusters")
|
|
47
|
-
clusters:
|
|
28
|
+
clusters: list[dict[str, Any]] = []
|
|
48
29
|
for page in paginator.paginate():
|
|
49
|
-
clusters.extend(page
|
|
30
|
+
clusters.extend(page.get("CacheClusters", []))
|
|
50
31
|
return clusters
|
|
51
32
|
|
|
52
33
|
|
|
34
|
+
def transform_elasticache_clusters(
|
|
35
|
+
clusters: list[dict[str, Any]], region: str
|
|
36
|
+
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
37
|
+
cluster_data: list[dict[str, Any]] = []
|
|
38
|
+
topics: dict[str, dict[str, Any]] = {}
|
|
39
|
+
|
|
40
|
+
for cluster in clusters:
|
|
41
|
+
notification = cluster.get("NotificationConfiguration", {})
|
|
42
|
+
topic_arn = notification.get("TopicArn")
|
|
43
|
+
cluster_record = {
|
|
44
|
+
"ARN": cluster["ARN"],
|
|
45
|
+
"CacheClusterId": cluster["CacheClusterId"],
|
|
46
|
+
"CacheNodeType": cluster.get("CacheNodeType"),
|
|
47
|
+
"Engine": cluster.get("Engine"),
|
|
48
|
+
"EngineVersion": cluster.get("EngineVersion"),
|
|
49
|
+
"CacheClusterStatus": cluster.get("CacheClusterStatus"),
|
|
50
|
+
"NumCacheNodes": cluster.get("NumCacheNodes"),
|
|
51
|
+
"PreferredAvailabilityZone": cluster.get("PreferredAvailabilityZone"),
|
|
52
|
+
"PreferredMaintenanceWindow": cluster.get("PreferredMaintenanceWindow"),
|
|
53
|
+
"CacheClusterCreateTime": cluster.get("CacheClusterCreateTime"),
|
|
54
|
+
"CacheSubnetGroupName": cluster.get("CacheSubnetGroupName"),
|
|
55
|
+
"AutoMinorVersionUpgrade": cluster.get("AutoMinorVersionUpgrade"),
|
|
56
|
+
"ReplicationGroupId": cluster.get("ReplicationGroupId"),
|
|
57
|
+
"SnapshotRetentionLimit": cluster.get("SnapshotRetentionLimit"),
|
|
58
|
+
"SnapshotWindow": cluster.get("SnapshotWindow"),
|
|
59
|
+
"AuthTokenEnabled": cluster.get("AuthTokenEnabled"),
|
|
60
|
+
"TransitEncryptionEnabled": cluster.get("TransitEncryptionEnabled"),
|
|
61
|
+
"AtRestEncryptionEnabled": cluster.get("AtRestEncryptionEnabled"),
|
|
62
|
+
"TopicArn": topic_arn,
|
|
63
|
+
"Region": region,
|
|
64
|
+
}
|
|
65
|
+
cluster_data.append(cluster_record)
|
|
66
|
+
|
|
67
|
+
if topic_arn:
|
|
68
|
+
topics.setdefault(
|
|
69
|
+
topic_arn,
|
|
70
|
+
{
|
|
71
|
+
"TopicArn": topic_arn,
|
|
72
|
+
"TopicStatus": notification.get("TopicStatus"),
|
|
73
|
+
"cluster_arns": [],
|
|
74
|
+
},
|
|
75
|
+
)["cluster_arns"].append(cluster["ARN"])
|
|
76
|
+
|
|
77
|
+
return cluster_data, list(topics.values())
|
|
78
|
+
|
|
79
|
+
|
|
53
80
|
@timeit
|
|
54
81
|
def load_elasticache_clusters(
|
|
55
82
|
neo4j_session: neo4j.Session,
|
|
56
|
-
clusters:
|
|
83
|
+
clusters: list[dict[str, Any]],
|
|
57
84
|
region: str,
|
|
58
85
|
aws_account_id: str,
|
|
59
86
|
update_tag: int,
|
|
60
87
|
) -> None:
|
|
61
|
-
query = """
|
|
62
|
-
UNWIND $clusters as elasticache_cluster
|
|
63
|
-
MERGE (cluster:ElasticacheCluster{id:elasticache_cluster.ARN})
|
|
64
|
-
ON CREATE SET cluster.firstseen = timestamp(),
|
|
65
|
-
cluster.arn = elasticache_cluster.ARN,
|
|
66
|
-
cluster.topic_arn = elasticache_cluster.NotificationConfiguration.TopicArn,
|
|
67
|
-
cluster.id = elasticache_cluster.CacheClusterId,
|
|
68
|
-
cluster.region = $region
|
|
69
|
-
SET cluster.lastupdated = $aws_update_tag
|
|
70
|
-
|
|
71
|
-
WITH cluster, elasticache_cluster
|
|
72
|
-
MATCH (owner:AWSAccount{id: $aws_account_id})
|
|
73
|
-
MERGE (owner)-[r3:RESOURCE]->(cluster)
|
|
74
|
-
ON CREATE SET r3.firstseen = timestamp()
|
|
75
|
-
SET r3.lastupdated = $aws_update_tag
|
|
76
|
-
|
|
77
|
-
WITH elasticache_cluster, owner
|
|
78
|
-
WHERE NOT elasticache_cluster.NotificationConfiguration IS NULL
|
|
79
|
-
MERGE (topic:ElasticacheTopic{id: elasticache_cluster.NotificationConfiguration.TopicArn})
|
|
80
|
-
ON CREATE SET topic.firstseen = timestamp(),
|
|
81
|
-
topic.arn = elasticache_cluster.NotificationConfiguration.TopicArn
|
|
82
|
-
SET topic.lastupdated = $aws_update_tag,
|
|
83
|
-
topic.status = elasticache_cluster.NotificationConfiguration.Status
|
|
84
|
-
|
|
85
|
-
MERGE (topic)-[r:CACHE_CLUSTER]->(cluster)
|
|
86
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
87
|
-
SET r.lastupdated = $aws_update_tag
|
|
88
|
-
WITH cluster, topic
|
|
89
|
-
|
|
90
|
-
MERGE (owner)-[r2:RESOURCE]->(topic)
|
|
91
|
-
ON CREATE SET r2.firstseen = timestamp()
|
|
92
|
-
SET r2.lastupdated = $aws_update_tag
|
|
93
|
-
"""
|
|
94
88
|
logger.info(
|
|
95
|
-
f"Loading
|
|
89
|
+
f"Loading {len(clusters)} ElastiCache clusters for region '{region}' into graph."
|
|
96
90
|
)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
91
|
+
load(
|
|
92
|
+
neo4j_session,
|
|
93
|
+
ElasticacheClusterSchema(),
|
|
94
|
+
clusters,
|
|
95
|
+
lastupdated=update_tag,
|
|
96
|
+
Region=region,
|
|
97
|
+
AWS_ID=aws_account_id,
|
|
103
98
|
)
|
|
104
99
|
|
|
105
100
|
|
|
106
101
|
@timeit
|
|
107
|
-
def
|
|
102
|
+
def load_elasticache_topics(
|
|
108
103
|
neo4j_session: neo4j.Session,
|
|
109
|
-
|
|
104
|
+
topics: list[dict[str, Any]],
|
|
105
|
+
aws_account_id: str,
|
|
110
106
|
update_tag: int,
|
|
111
107
|
) -> None:
|
|
112
|
-
|
|
113
|
-
|
|
108
|
+
if not topics:
|
|
109
|
+
return
|
|
110
|
+
logger.info(f"Loading {len(topics)} ElastiCache topics into graph.")
|
|
111
|
+
load(
|
|
114
112
|
neo4j_session,
|
|
115
|
-
|
|
113
|
+
ElasticacheTopicSchema(),
|
|
114
|
+
topics,
|
|
115
|
+
lastupdated=update_tag,
|
|
116
|
+
AWS_ID=aws_account_id,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@timeit
|
|
121
|
+
def cleanup(
|
|
122
|
+
neo4j_session: neo4j.Session,
|
|
123
|
+
common_job_parameters: dict[str, Any],
|
|
124
|
+
) -> None:
|
|
125
|
+
GraphJob.from_node_schema(ElasticacheClusterSchema(), common_job_parameters).run(
|
|
126
|
+
neo4j_session
|
|
127
|
+
)
|
|
128
|
+
GraphJob.from_node_schema(ElasticacheTopicSchema(), common_job_parameters).run(
|
|
129
|
+
neo4j_session
|
|
116
130
|
)
|
|
117
131
|
|
|
118
132
|
|
|
@@ -120,24 +134,33 @@ def cleanup(
|
|
|
120
134
|
def sync(
|
|
121
135
|
neo4j_session: neo4j.Session,
|
|
122
136
|
boto3_session: boto3.session.Session,
|
|
123
|
-
regions:
|
|
137
|
+
regions: list[str],
|
|
124
138
|
current_aws_account_id: str,
|
|
125
139
|
update_tag: int,
|
|
126
|
-
common_job_parameters:
|
|
140
|
+
common_job_parameters: dict[str, Any],
|
|
127
141
|
) -> None:
|
|
128
142
|
for region in regions:
|
|
129
143
|
logger.info(
|
|
130
|
-
|
|
144
|
+
"Syncing ElastiCache clusters for region '%s' in account '%s'.",
|
|
145
|
+
region,
|
|
146
|
+
current_aws_account_id,
|
|
131
147
|
)
|
|
132
|
-
|
|
148
|
+
raw_clusters = get_elasticache_clusters(boto3_session, region)
|
|
149
|
+
cluster_data, topic_data = transform_elasticache_clusters(raw_clusters, region)
|
|
133
150
|
load_elasticache_clusters(
|
|
134
151
|
neo4j_session,
|
|
135
|
-
|
|
152
|
+
cluster_data,
|
|
136
153
|
region,
|
|
137
154
|
current_aws_account_id,
|
|
138
155
|
update_tag,
|
|
139
156
|
)
|
|
140
|
-
|
|
157
|
+
load_elasticache_topics(
|
|
158
|
+
neo4j_session,
|
|
159
|
+
topic_data,
|
|
160
|
+
current_aws_account_id,
|
|
161
|
+
update_tag,
|
|
162
|
+
)
|
|
163
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
141
164
|
merge_module_sync_metadata(
|
|
142
165
|
neo4j_session,
|
|
143
166
|
group_type="AWSAccount",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from string import Template
|
|
3
|
+
from typing import Any
|
|
3
4
|
from typing import Dict
|
|
4
5
|
from typing import List
|
|
5
6
|
|
|
@@ -56,6 +57,35 @@ def get_short_id_from_lb2_arn(alb_arn: str) -> str:
|
|
|
56
57
|
return alb_arn.split("/")[-2]
|
|
57
58
|
|
|
58
59
|
|
|
60
|
+
def get_resource_type_from_arn(arn: str) -> str:
|
|
61
|
+
"""Return the resource type format expected by the Tagging API.
|
|
62
|
+
|
|
63
|
+
The Resource Groups Tagging API requires resource types in the form
|
|
64
|
+
``service:resource``. Most ARNs embed the resource type in the fifth segment
|
|
65
|
+
after the service name. Load balancer ARNs add an extra ``app`` or ``net``
|
|
66
|
+
component that must be preserved. S3 and SQS ARNs only contain the service
|
|
67
|
+
name. This helper extracts the appropriate string so that ARNs can be
|
|
68
|
+
grouped correctly for API calls.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
parts = arn.split(":", 5)
|
|
72
|
+
service = parts[2]
|
|
73
|
+
if service in {"s3", "sqs"}:
|
|
74
|
+
return service
|
|
75
|
+
|
|
76
|
+
resource = parts[5]
|
|
77
|
+
if service == "elasticloadbalancing" and resource.startswith("loadbalancer/"):
|
|
78
|
+
segments = resource.split("/")
|
|
79
|
+
if len(segments) > 2 and segments[1] in {"app", "net"}:
|
|
80
|
+
resource_type = f"{segments[0]}/{segments[1]}"
|
|
81
|
+
else:
|
|
82
|
+
resource_type = segments[0]
|
|
83
|
+
else:
|
|
84
|
+
resource_type = resource.split("/")[0].split(":")[0]
|
|
85
|
+
|
|
86
|
+
return f"{service}:{resource_type}" if resource_type else service
|
|
87
|
+
|
|
88
|
+
|
|
59
89
|
# We maintain a mapping from AWS resource types to their associated labels and unique identifiers.
|
|
60
90
|
# label: the node label used in cartography for this resource type
|
|
61
91
|
# property: the field of this node that uniquely identified this resource type
|
|
@@ -158,27 +188,27 @@ TAG_RESOURCE_TYPE_MAPPINGS: Dict = {
|
|
|
158
188
|
@aws_handle_regions
|
|
159
189
|
def get_tags(
|
|
160
190
|
boto3_session: boto3.session.Session,
|
|
161
|
-
|
|
191
|
+
resource_types: list[str],
|
|
162
192
|
region: str,
|
|
163
|
-
) ->
|
|
164
|
-
"""
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
193
|
+
) -> list[dict[str, Any]]:
|
|
194
|
+
"""Retrieve tag data for the provided resource types."""
|
|
195
|
+
resources: list[dict[str, Any]] = []
|
|
196
|
+
|
|
197
|
+
if "iam:role" in resource_types:
|
|
198
|
+
resources.extend(get_role_tags(boto3_session))
|
|
199
|
+
resource_types = [rt for rt in resource_types if rt != "iam:role"]
|
|
200
|
+
|
|
201
|
+
if not resource_types:
|
|
202
|
+
return resources
|
|
172
203
|
|
|
173
204
|
client = boto3_session.client("resourcegroupstaggingapi", region_name=region)
|
|
174
205
|
paginator = client.get_paginator("get_resources")
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
ResourceTypeFilters=
|
|
180
|
-
|
|
181
|
-
resources.extend(page["ResourceTagMappingList"])
|
|
206
|
+
|
|
207
|
+
# Batch resource types into groups of 100
|
|
208
|
+
# (https://docs.aws.amazon.com/resourcegroupstagging/latest/APIReference/API_GetResources.html)
|
|
209
|
+
for resource_types_batch in batch(resource_types, size=100):
|
|
210
|
+
for page in paginator.paginate(ResourceTypeFilters=resource_types_batch):
|
|
211
|
+
resources.extend(page["ResourceTagMappingList"])
|
|
182
212
|
return resources
|
|
183
213
|
|
|
184
214
|
|
|
@@ -210,6 +240,9 @@ def _load_tags_tx(
|
|
|
210
240
|
r.firstseen = timestamp()
|
|
211
241
|
""",
|
|
212
242
|
)
|
|
243
|
+
if not tag_data:
|
|
244
|
+
return
|
|
245
|
+
|
|
213
246
|
query = INGEST_TAG_TEMPLATE.safe_substitute(
|
|
214
247
|
resource_label=TAG_RESOURCE_TYPE_MAPPINGS[resource_type]["label"],
|
|
215
248
|
property=TAG_RESOURCE_TYPE_MAPPINGS[resource_type]["property"],
|
|
@@ -262,6 +295,26 @@ def compute_resource_id(tag_mapping: Dict, resource_type: str) -> str:
|
|
|
262
295
|
return resource_id
|
|
263
296
|
|
|
264
297
|
|
|
298
|
+
def _group_tag_data_by_resource_type(
|
|
299
|
+
tag_data: List[Dict],
|
|
300
|
+
tag_resource_type_mappings: Dict,
|
|
301
|
+
) -> Dict[str, List[Dict]]:
|
|
302
|
+
"""Group raw tag data by the resource types Cartography supports."""
|
|
303
|
+
|
|
304
|
+
grouped: Dict[str, List[Dict]] = {rtype: [] for rtype in tag_resource_type_mappings}
|
|
305
|
+
for mapping in tag_data:
|
|
306
|
+
rtype = get_resource_type_from_arn(mapping["ResourceARN"])
|
|
307
|
+
if rtype in grouped:
|
|
308
|
+
grouped[rtype].append(mapping)
|
|
309
|
+
else:
|
|
310
|
+
logger.debug(
|
|
311
|
+
"Unknown tag resource type %s from ARN %s",
|
|
312
|
+
rtype,
|
|
313
|
+
mapping["ResourceARN"],
|
|
314
|
+
)
|
|
315
|
+
return grouped
|
|
316
|
+
|
|
317
|
+
|
|
265
318
|
@timeit
|
|
266
319
|
def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
|
|
267
320
|
run_cleanup_job(
|
|
@@ -285,8 +338,14 @@ def sync(
|
|
|
285
338
|
logger.info(
|
|
286
339
|
f"Syncing AWS tags for account {current_aws_account_id} and region {region}",
|
|
287
340
|
)
|
|
341
|
+
all_tag_data = get_tags(
|
|
342
|
+
boto3_session, list(tag_resource_type_mappings.keys()), region
|
|
343
|
+
)
|
|
344
|
+
grouped = _group_tag_data_by_resource_type(
|
|
345
|
+
all_tag_data, tag_resource_type_mappings
|
|
346
|
+
)
|
|
288
347
|
for resource_type in tag_resource_type_mappings.keys():
|
|
289
|
-
tag_data =
|
|
348
|
+
tag_data = grouped.get(resource_type, [])
|
|
290
349
|
transform_tags(tag_data, resource_type) # type: ignore
|
|
291
350
|
logger.info(
|
|
292
351
|
f"Loading {len(tag_data)} tags for resource type {resource_type}",
|
|
@@ -7,6 +7,7 @@ import neo4j
|
|
|
7
7
|
|
|
8
8
|
from cartography.client.core.tx import load
|
|
9
9
|
from cartography.graph.job import GraphJob
|
|
10
|
+
from cartography.models.aws.secretsmanager.secret import SecretsManagerSecretSchema
|
|
10
11
|
from cartography.models.aws.secretsmanager.secret_version import (
|
|
11
12
|
SecretsManagerSecretVersionSchema,
|
|
12
13
|
)
|
|
@@ -14,7 +15,6 @@ from cartography.stats import get_stats_client
|
|
|
14
15
|
from cartography.util import aws_handle_regions
|
|
15
16
|
from cartography.util import dict_date_to_epoch
|
|
16
17
|
from cartography.util import merge_module_sync_metadata
|
|
17
|
-
from cartography.util import run_cleanup_job
|
|
18
18
|
from cartography.util import timeit
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
@@ -32,6 +32,37 @@ def get_secret_list(boto3_session: boto3.session.Session, region: str) -> List[D
|
|
|
32
32
|
return secrets
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
def transform_secrets(
|
|
36
|
+
secrets: List[Dict],
|
|
37
|
+
) -> List[Dict]:
|
|
38
|
+
"""
|
|
39
|
+
Transform AWS Secrets Manager Secrets to match the data model.
|
|
40
|
+
"""
|
|
41
|
+
transformed_data = []
|
|
42
|
+
for secret in secrets:
|
|
43
|
+
# Start with a copy of the original secret data
|
|
44
|
+
transformed = dict(secret)
|
|
45
|
+
|
|
46
|
+
# Convert date fields to epoch timestamps
|
|
47
|
+
transformed["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
|
|
48
|
+
transformed["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
|
|
49
|
+
transformed["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
|
|
50
|
+
transformed["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
|
|
51
|
+
transformed["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
|
|
52
|
+
|
|
53
|
+
# Flatten nested RotationRules.AutomaticallyAfterDays property
|
|
54
|
+
if "RotationRules" in secret and secret["RotationRules"]:
|
|
55
|
+
rotation_rules = secret["RotationRules"]
|
|
56
|
+
if "AutomaticallyAfterDays" in rotation_rules:
|
|
57
|
+
transformed["RotationRulesAutomaticallyAfterDays"] = rotation_rules[
|
|
58
|
+
"AutomaticallyAfterDays"
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
transformed_data.append(transformed)
|
|
62
|
+
|
|
63
|
+
return transformed_data
|
|
64
|
+
|
|
65
|
+
|
|
35
66
|
@timeit
|
|
36
67
|
def load_secrets(
|
|
37
68
|
neo4j_session: neo4j.Session,
|
|
@@ -40,48 +71,33 @@ def load_secrets(
|
|
|
40
71
|
current_aws_account_id: str,
|
|
41
72
|
aws_update_tag: int,
|
|
42
73
|
) -> None:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
s.lastupdated = $aws_update_tag
|
|
56
|
-
WITH s
|
|
57
|
-
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
|
|
58
|
-
MERGE (owner)-[r:RESOURCE]->(s)
|
|
59
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
60
|
-
SET r.lastupdated = $aws_update_tag
|
|
61
|
-
"""
|
|
62
|
-
for secret in data:
|
|
63
|
-
secret["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
|
|
64
|
-
secret["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
|
|
65
|
-
secret["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
|
|
66
|
-
secret["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
|
|
67
|
-
secret["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
|
|
68
|
-
|
|
69
|
-
neo4j_session.run(
|
|
70
|
-
ingest_secrets,
|
|
71
|
-
Secrets=data,
|
|
74
|
+
"""
|
|
75
|
+
Load transformed secrets into Neo4j using the data model.
|
|
76
|
+
Expects data to already be transformed by transform_secrets().
|
|
77
|
+
"""
|
|
78
|
+
logger.info(f"Loading {len(data)} Secrets for region {region} into graph.")
|
|
79
|
+
|
|
80
|
+
# Load using the schema-based approach
|
|
81
|
+
load(
|
|
82
|
+
neo4j_session,
|
|
83
|
+
SecretsManagerSecretSchema(),
|
|
84
|
+
data,
|
|
85
|
+
lastupdated=aws_update_tag,
|
|
72
86
|
Region=region,
|
|
73
|
-
|
|
74
|
-
aws_update_tag=aws_update_tag,
|
|
87
|
+
AWS_ID=current_aws_account_id,
|
|
75
88
|
)
|
|
76
89
|
|
|
77
90
|
|
|
78
91
|
@timeit
|
|
79
92
|
def cleanup_secrets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
93
|
+
"""
|
|
94
|
+
Run Secrets cleanup job using the data model.
|
|
95
|
+
"""
|
|
96
|
+
logger.debug("Running Secrets cleanup job.")
|
|
97
|
+
cleanup_job = GraphJob.from_node_schema(
|
|
98
|
+
SecretsManagerSecretSchema(), common_job_parameters
|
|
84
99
|
)
|
|
100
|
+
cleanup_job.run(neo4j_session)
|
|
85
101
|
|
|
86
102
|
|
|
87
103
|
@timeit
|
|
@@ -121,8 +137,6 @@ def get_secret_versions(
|
|
|
121
137
|
|
|
122
138
|
def transform_secret_versions(
|
|
123
139
|
versions: List[Dict],
|
|
124
|
-
region: str,
|
|
125
|
-
aws_account_id: str,
|
|
126
140
|
) -> List[Dict]:
|
|
127
141
|
"""
|
|
128
142
|
Transform AWS Secrets Manager Secret Versions to match the data model.
|
|
@@ -203,7 +217,15 @@ def sync(
|
|
|
203
217
|
)
|
|
204
218
|
secrets = get_secret_list(boto3_session, region)
|
|
205
219
|
|
|
206
|
-
|
|
220
|
+
transformed_secrets = transform_secrets(secrets)
|
|
221
|
+
|
|
222
|
+
load_secrets(
|
|
223
|
+
neo4j_session,
|
|
224
|
+
transformed_secrets,
|
|
225
|
+
region,
|
|
226
|
+
current_aws_account_id,
|
|
227
|
+
update_tag,
|
|
228
|
+
)
|
|
207
229
|
|
|
208
230
|
all_versions = []
|
|
209
231
|
for secret in secrets:
|
|
@@ -216,11 +238,7 @@ def sync(
|
|
|
216
238
|
)
|
|
217
239
|
all_versions.extend(versions)
|
|
218
240
|
|
|
219
|
-
transformed_data = transform_secret_versions(
|
|
220
|
-
all_versions,
|
|
221
|
-
region,
|
|
222
|
-
current_aws_account_id,
|
|
223
|
-
)
|
|
241
|
+
transformed_data = transform_secret_versions(all_versions)
|
|
224
242
|
|
|
225
243
|
load_secret_versions(
|
|
226
244
|
neo4j_session,
|
|
@@ -59,10 +59,29 @@ async def get_group_members(
|
|
|
59
59
|
return user_ids, group_ids
|
|
60
60
|
|
|
61
61
|
|
|
62
|
+
@timeit
|
|
63
|
+
async def get_group_owners(client: GraphServiceClient, group_id: str) -> list[str]:
|
|
64
|
+
"""Get owner user IDs for a given group."""
|
|
65
|
+
owner_ids: list[str] = []
|
|
66
|
+
request_builder = client.groups.by_group_id(group_id).owners
|
|
67
|
+
page = await request_builder.get()
|
|
68
|
+
while page:
|
|
69
|
+
if page.value:
|
|
70
|
+
for obj in page.value:
|
|
71
|
+
odata_type = getattr(obj, "odata_type", "")
|
|
72
|
+
if odata_type == "#microsoft.graph.user":
|
|
73
|
+
owner_ids.append(obj.id)
|
|
74
|
+
if not page.odata_next_link:
|
|
75
|
+
break
|
|
76
|
+
page = await request_builder.with_url(page.odata_next_link).get()
|
|
77
|
+
return owner_ids
|
|
78
|
+
|
|
79
|
+
|
|
62
80
|
def transform_groups(
|
|
63
81
|
groups: list[Group],
|
|
64
82
|
user_member_map: dict[str, list[str]],
|
|
65
83
|
group_member_map: dict[str, list[str]],
|
|
84
|
+
group_owner_map: dict[str, list[str]],
|
|
66
85
|
) -> list[dict[str, Any]]:
|
|
67
86
|
"""Transform API responses into dictionaries for ingestion."""
|
|
68
87
|
result: list[dict[str, Any]] = []
|
|
@@ -82,6 +101,7 @@ def transform_groups(
|
|
|
82
101
|
"deleted_date_time": g.deleted_date_time,
|
|
83
102
|
"member_ids": user_member_map.get(g.id, []),
|
|
84
103
|
"member_group_ids": group_member_map.get(g.id, []),
|
|
104
|
+
"owner_ids": group_owner_map.get(g.id, []),
|
|
85
105
|
}
|
|
86
106
|
result.append(transformed)
|
|
87
107
|
return result
|
|
@@ -134,6 +154,12 @@ async def sync_entra_groups(
|
|
|
134
154
|
|
|
135
155
|
user_member_map: dict[str, list[str]] = {}
|
|
136
156
|
group_member_map: dict[str, list[str]] = {}
|
|
157
|
+
group_owner_map: dict[str, list[str]] = {}
|
|
158
|
+
|
|
159
|
+
for group in groups:
|
|
160
|
+
owners = await get_group_owners(client, group.id)
|
|
161
|
+
group_owner_map[group.id] = owners
|
|
162
|
+
|
|
137
163
|
for group in groups:
|
|
138
164
|
try:
|
|
139
165
|
users, subgroups = await get_group_members(client, group.id)
|
|
@@ -144,7 +170,9 @@ async def sync_entra_groups(
|
|
|
144
170
|
user_member_map[group.id] = []
|
|
145
171
|
group_member_map[group.id] = []
|
|
146
172
|
|
|
147
|
-
transformed_groups = transform_groups(
|
|
173
|
+
transformed_groups = transform_groups(
|
|
174
|
+
groups, user_member_map, group_member_map, group_owner_map
|
|
175
|
+
)
|
|
148
176
|
|
|
149
177
|
load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
|
|
150
178
|
load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)
|
|
@@ -391,6 +391,7 @@ def _sync_multiple_projects(
|
|
|
391
391
|
# Compute data sync
|
|
392
392
|
for project in projects:
|
|
393
393
|
project_id = project["projectId"]
|
|
394
|
+
common_job_parameters["PROJECT_ID"] = project_id
|
|
394
395
|
logger.info("Syncing GCP project %s for Compute.", project_id)
|
|
395
396
|
_sync_single_project_compute(
|
|
396
397
|
neo4j_session,
|
|
@@ -399,10 +400,12 @@ def _sync_multiple_projects(
|
|
|
399
400
|
gcp_update_tag,
|
|
400
401
|
common_job_parameters,
|
|
401
402
|
)
|
|
403
|
+
del common_job_parameters["PROJECT_ID"]
|
|
402
404
|
|
|
403
405
|
# Storage data sync
|
|
404
406
|
for project in projects:
|
|
405
407
|
project_id = project["projectId"]
|
|
408
|
+
common_job_parameters["PROJECT_ID"] = project_id
|
|
406
409
|
logger.info("Syncing GCP project %s for Storage", project_id)
|
|
407
410
|
_sync_single_project_storage(
|
|
408
411
|
neo4j_session,
|
|
@@ -411,10 +414,12 @@ def _sync_multiple_projects(
|
|
|
411
414
|
gcp_update_tag,
|
|
412
415
|
common_job_parameters,
|
|
413
416
|
)
|
|
417
|
+
del common_job_parameters["PROJECT_ID"]
|
|
414
418
|
|
|
415
419
|
# GKE data sync
|
|
416
420
|
for project in projects:
|
|
417
421
|
project_id = project["projectId"]
|
|
422
|
+
common_job_parameters["PROJECT_ID"] = project_id
|
|
418
423
|
logger.info("Syncing GCP project %s for GKE", project_id)
|
|
419
424
|
_sync_single_project_gke(
|
|
420
425
|
neo4j_session,
|
|
@@ -423,10 +428,12 @@ def _sync_multiple_projects(
|
|
|
423
428
|
gcp_update_tag,
|
|
424
429
|
common_job_parameters,
|
|
425
430
|
)
|
|
431
|
+
del common_job_parameters["PROJECT_ID"]
|
|
426
432
|
|
|
427
433
|
# DNS data sync
|
|
428
434
|
for project in projects:
|
|
429
435
|
project_id = project["projectId"]
|
|
436
|
+
common_job_parameters["PROJECT_ID"] = project_id
|
|
430
437
|
logger.info("Syncing GCP project %s for DNS", project_id)
|
|
431
438
|
_sync_single_project_dns(
|
|
432
439
|
neo4j_session,
|
|
@@ -435,14 +442,17 @@ def _sync_multiple_projects(
|
|
|
435
442
|
gcp_update_tag,
|
|
436
443
|
common_job_parameters,
|
|
437
444
|
)
|
|
445
|
+
del common_job_parameters["PROJECT_ID"]
|
|
438
446
|
|
|
439
447
|
# IAM data sync
|
|
440
448
|
for project in projects:
|
|
441
449
|
project_id = project["projectId"]
|
|
450
|
+
common_job_parameters["PROJECT_ID"] = project_id
|
|
442
451
|
logger.info("Syncing GCP project %s for IAM", project_id)
|
|
443
452
|
_sync_single_project_iam(
|
|
444
453
|
neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters
|
|
445
454
|
)
|
|
455
|
+
del common_job_parameters["PROJECT_ID"]
|
|
446
456
|
|
|
447
457
|
|
|
448
458
|
@timeit
|