cartography 0.108.0rc2__py3-none-any.whl → 0.109.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/data/indexes.cypher +0 -2
- cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
- cartography/intel/aws/cloudtrail_management_events.py +36 -3
- cartography/intel/aws/ecr.py +55 -80
- cartography/intel/aws/resourcegroupstaggingapi.py +77 -18
- cartography/intel/aws/secretsmanager.py +62 -44
- cartography/intel/entra/groups.py +29 -1
- cartography/intel/gcp/__init__.py +10 -0
- cartography/intel/gcp/compute.py +19 -42
- cartography/models/aws/ecr/__init__.py +0 -0
- cartography/models/aws/ecr/image.py +41 -0
- cartography/models/aws/ecr/repository.py +72 -0
- cartography/models/aws/ecr/repository_image.py +95 -0
- cartography/models/aws/secretsmanager/secret.py +106 -0
- cartography/models/entra/group.py +26 -0
- cartography/models/entra/user.py +6 -0
- cartography/models/gcp/compute/__init__.py +0 -0
- cartography/models/gcp/compute/vpc.py +50 -0
- {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/METADATA +1 -1
- {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/RECORD +25 -19
- cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
- {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/WHEEL +0 -0
- {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/entry_points.txt +0 -0
- {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.108.0rc2.dist-info → cartography-0.109.0rc1.dist-info}/top_level.txt +0 -0
cartography/_version.py
CHANGED
|
@@ -17,5 +17,5 @@ __version__: str
|
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
|
18
18
|
version_tuple: VERSION_TUPLE
|
|
19
19
|
|
|
20
|
-
__version__ = version = '0.
|
|
21
|
-
__version_tuple__ = version_tuple = (0,
|
|
20
|
+
__version__ = version = '0.109.0rc1'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 109, 0, 'rc1')
|
cartography/data/indexes.cypher
CHANGED
|
@@ -259,8 +259,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.id);
|
|
|
259
259
|
CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.name);
|
|
260
260
|
CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.arn);
|
|
261
261
|
CREATE INDEX IF NOT EXISTS FOR (n:S3Bucket) ON (n.lastupdated);
|
|
262
|
-
CREATE INDEX IF NOT EXISTS FOR (n:SecretsManagerSecret) ON (n.id);
|
|
263
|
-
CREATE INDEX IF NOT EXISTS FOR (n:SecretsManagerSecret) ON (n.lastupdated);
|
|
264
262
|
CREATE INDEX IF NOT EXISTS FOR (n:SecurityHub) ON (n.id);
|
|
265
263
|
CREATE INDEX IF NOT EXISTS FOR (n:SecurityHub) ON (n.lastupdated);
|
|
266
264
|
CREATE INDEX IF NOT EXISTS FOR (n:SpotlightVulnerability) ON (n.id);
|
|
@@ -1,17 +1,5 @@
|
|
|
1
1
|
{
|
|
2
2
|
"statements": [
|
|
3
|
-
{
|
|
4
|
-
"query": "MATCH (n:GCPVpc) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)",
|
|
5
|
-
"iterative": true,
|
|
6
|
-
"iterationsize": 100,
|
|
7
|
-
"__comment__": "Delete GCP VPCs that no longer exist and detach them from all previously connected nodes."
|
|
8
|
-
},
|
|
9
|
-
{
|
|
10
|
-
"query": "MATCH (:GCPVpc)<-[r:RESOURCE]-(:GCPProject) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
|
|
11
|
-
"iterative": true,
|
|
12
|
-
"iterationsize": 100,
|
|
13
|
-
"__comment__": "Remove GCP VPC-to-Project relationships that are out of date."
|
|
14
|
-
},
|
|
15
3
|
{
|
|
16
4
|
"query": "MATCH (:GCPInstance)-[r:MEMBER_OF_GCP_VPC]->(:GCPVpc) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
|
|
17
5
|
"iterative": true,
|
|
@@ -381,13 +381,15 @@ def transform_web_identity_role_events_to_role_assumptions(
|
|
|
381
381
|
|
|
382
382
|
# Only process GitHub Actions events
|
|
383
383
|
if "token.actions.githubusercontent.com" in identity_provider:
|
|
384
|
-
# GitHub repo fullname
|
|
385
|
-
|
|
386
|
-
if not
|
|
384
|
+
# Extract GitHub repo fullname from userName format: "repo:{organization}/{repository}:{context}"
|
|
385
|
+
user_name = user_identity.get("userName", "")
|
|
386
|
+
if not user_name:
|
|
387
387
|
logger.debug(
|
|
388
388
|
f"Missing userName in GitHub WebIdentity event: {event.get('EventId', 'unknown')}"
|
|
389
389
|
)
|
|
390
390
|
continue
|
|
391
|
+
|
|
392
|
+
github_repo = _extract_github_repo_from_username(user_name)
|
|
391
393
|
key = (github_repo, destination_principal)
|
|
392
394
|
|
|
393
395
|
if key in github_aggregated:
|
|
@@ -572,6 +574,37 @@ def _convert_assumed_role_arn_to_role_arn(assumed_role_arn: str) -> str:
|
|
|
572
574
|
return assumed_role_arn
|
|
573
575
|
|
|
574
576
|
|
|
577
|
+
def _extract_github_repo_from_username(user_name: str) -> str:
|
|
578
|
+
"""
|
|
579
|
+
Extract GitHub repository fullname from CloudTrail userName field.
|
|
580
|
+
|
|
581
|
+
GitHub Actions CloudTrail events have userName in the format:
|
|
582
|
+
"repo:{organization}/{repository}:{context}"
|
|
583
|
+
"""
|
|
584
|
+
if not user_name:
|
|
585
|
+
return ""
|
|
586
|
+
|
|
587
|
+
parts = user_name.split(":")
|
|
588
|
+
|
|
589
|
+
# Need at least 3 parts: ["repo", "{organization}/{repository}", "{context}"]
|
|
590
|
+
if len(parts) < 3 or parts[0] != "repo":
|
|
591
|
+
return ""
|
|
592
|
+
|
|
593
|
+
# Extract "{organization}/{repository}"
|
|
594
|
+
repo_fullname = parts[1]
|
|
595
|
+
|
|
596
|
+
# Validate it looks like "{organization}/{repository}" format
|
|
597
|
+
if repo_fullname.count("/") != 1:
|
|
598
|
+
return ""
|
|
599
|
+
|
|
600
|
+
# Ensure both organization and repo exist
|
|
601
|
+
owner, repo = repo_fullname.split("/")
|
|
602
|
+
if not owner or not repo:
|
|
603
|
+
return ""
|
|
604
|
+
|
|
605
|
+
return repo_fullname
|
|
606
|
+
|
|
607
|
+
|
|
575
608
|
@timeit
|
|
576
609
|
def cleanup(
|
|
577
610
|
neo4j_session: neo4j.Session, current_aws_account_id: str, update_tag: int
|
cartography/intel/aws/ecr.py
CHANGED
|
@@ -6,9 +6,12 @@ from typing import List
|
|
|
6
6
|
import boto3
|
|
7
7
|
import neo4j
|
|
8
8
|
|
|
9
|
+
from cartography.client.core.tx import load
|
|
10
|
+
from cartography.graph.job import GraphJob
|
|
11
|
+
from cartography.models.aws.ecr.image import ECRImageSchema
|
|
12
|
+
from cartography.models.aws.ecr.repository import ECRRepositorySchema
|
|
13
|
+
from cartography.models.aws.ecr.repository_image import ECRRepositoryImageSchema
|
|
9
14
|
from cartography.util import aws_handle_regions
|
|
10
|
-
from cartography.util import batch
|
|
11
|
-
from cartography.util import run_cleanup_job
|
|
12
15
|
from cartography.util import timeit
|
|
13
16
|
from cartography.util import to_asynchronous
|
|
14
17
|
from cartography.util import to_synchronous
|
|
@@ -74,33 +77,17 @@ def load_ecr_repositories(
|
|
|
74
77
|
current_aws_account_id: str,
|
|
75
78
|
aws_update_tag: int,
|
|
76
79
|
) -> None:
|
|
77
|
-
query = """
|
|
78
|
-
UNWIND $Repositories as ecr_repo
|
|
79
|
-
MERGE (repo:ECRRepository{id: ecr_repo.repositoryArn})
|
|
80
|
-
ON CREATE SET repo.firstseen = timestamp(),
|
|
81
|
-
repo.arn = ecr_repo.repositoryArn,
|
|
82
|
-
repo.name = ecr_repo.repositoryName,
|
|
83
|
-
repo.region = $Region,
|
|
84
|
-
repo.created_at = ecr_repo.createdAt
|
|
85
|
-
SET repo.lastupdated = $aws_update_tag,
|
|
86
|
-
repo.uri = ecr_repo.repositoryUri
|
|
87
|
-
WITH repo
|
|
88
|
-
|
|
89
|
-
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
|
|
90
|
-
MERGE (owner)-[r:RESOURCE]->(repo)
|
|
91
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
92
|
-
SET r.lastupdated = $aws_update_tag
|
|
93
|
-
"""
|
|
94
80
|
logger.info(
|
|
95
81
|
f"Loading {len(repos)} ECR repositories for region {region} into graph.",
|
|
96
82
|
)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
83
|
+
load(
|
|
84
|
+
neo4j_session,
|
|
85
|
+
ECRRepositorySchema(),
|
|
86
|
+
repos,
|
|
87
|
+
lastupdated=aws_update_tag,
|
|
100
88
|
Region=region,
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
).consume() # See issue #440
|
|
89
|
+
AWS_ID=current_aws_account_id,
|
|
90
|
+
)
|
|
104
91
|
|
|
105
92
|
|
|
106
93
|
@timeit
|
|
@@ -114,8 +101,13 @@ def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
|
|
|
114
101
|
for repo_uri in sorted(repo_data.keys()):
|
|
115
102
|
repo_images = repo_data[repo_uri]
|
|
116
103
|
for img in repo_images:
|
|
117
|
-
|
|
104
|
+
digest = img.get("imageDigest")
|
|
105
|
+
if digest:
|
|
106
|
+
tag = img.get("imageTag")
|
|
107
|
+
uri = repo_uri + (f":{tag}" if tag else "")
|
|
118
108
|
img["repo_uri"] = repo_uri
|
|
109
|
+
img["uri"] = uri
|
|
110
|
+
img["id"] = uri
|
|
119
111
|
repo_images_list.append(img)
|
|
120
112
|
else:
|
|
121
113
|
logger.warning(
|
|
@@ -127,74 +119,51 @@ def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
|
|
|
127
119
|
return repo_images_list
|
|
128
120
|
|
|
129
121
|
|
|
130
|
-
def _load_ecr_repo_img_tx(
|
|
131
|
-
tx: neo4j.Transaction,
|
|
132
|
-
repo_images_list: List[Dict],
|
|
133
|
-
aws_update_tag: int,
|
|
134
|
-
region: str,
|
|
135
|
-
) -> None:
|
|
136
|
-
query = """
|
|
137
|
-
UNWIND $RepoList as repo_img
|
|
138
|
-
MERGE (ri:ECRRepositoryImage{id: repo_img.repo_uri + COALESCE(":" + repo_img.imageTag, '')})
|
|
139
|
-
ON CREATE SET ri.firstseen = timestamp()
|
|
140
|
-
SET ri.lastupdated = $aws_update_tag,
|
|
141
|
-
ri.tag = repo_img.imageTag,
|
|
142
|
-
ri.uri = repo_img.repo_uri + COALESCE(":" + repo_img.imageTag, ''),
|
|
143
|
-
ri.image_size_bytes = repo_img.imageSizeInBytes,
|
|
144
|
-
ri.image_pushed_at = repo_img.imagePushedAt,
|
|
145
|
-
ri.image_manifest_media_type = repo_img.imageManifestMediaType,
|
|
146
|
-
ri.artifact_media_type = repo_img.artifactMediaType,
|
|
147
|
-
ri.last_recorded_pull_time = repo_img.lastRecordedPullTime
|
|
148
|
-
WITH ri, repo_img
|
|
149
|
-
|
|
150
|
-
MERGE (img:ECRImage{id: repo_img.imageDigest})
|
|
151
|
-
ON CREATE SET img.firstseen = timestamp(),
|
|
152
|
-
img.digest = repo_img.imageDigest
|
|
153
|
-
SET img.lastupdated = $aws_update_tag,
|
|
154
|
-
img.region = $Region
|
|
155
|
-
WITH ri, img, repo_img
|
|
156
|
-
|
|
157
|
-
MERGE (ri)-[r1:IMAGE]->(img)
|
|
158
|
-
ON CREATE SET r1.firstseen = timestamp()
|
|
159
|
-
SET r1.lastupdated = $aws_update_tag
|
|
160
|
-
WITH ri, repo_img
|
|
161
|
-
|
|
162
|
-
MATCH (repo:ECRRepository{uri: repo_img.repo_uri})
|
|
163
|
-
MERGE (repo)-[r2:REPO_IMAGE]->(ri)
|
|
164
|
-
ON CREATE SET r2.firstseen = timestamp()
|
|
165
|
-
SET r2.lastupdated = $aws_update_tag
|
|
166
|
-
"""
|
|
167
|
-
tx.run(
|
|
168
|
-
query,
|
|
169
|
-
RepoList=repo_images_list,
|
|
170
|
-
Region=region,
|
|
171
|
-
aws_update_tag=aws_update_tag,
|
|
172
|
-
)
|
|
173
|
-
|
|
174
|
-
|
|
175
122
|
@timeit
|
|
176
123
|
def load_ecr_repository_images(
|
|
177
124
|
neo4j_session: neo4j.Session,
|
|
178
125
|
repo_images_list: List[Dict],
|
|
179
126
|
region: str,
|
|
127
|
+
current_aws_account_id: str,
|
|
180
128
|
aws_update_tag: int,
|
|
181
129
|
) -> None:
|
|
182
130
|
logger.info(
|
|
183
131
|
f"Loading {len(repo_images_list)} ECR repository images in {region} into graph.",
|
|
184
132
|
)
|
|
185
|
-
for
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
133
|
+
image_digests = {img["imageDigest"] for img in repo_images_list}
|
|
134
|
+
ecr_images = [{"imageDigest": d} for d in image_digests]
|
|
135
|
+
|
|
136
|
+
load(
|
|
137
|
+
neo4j_session,
|
|
138
|
+
ECRImageSchema(),
|
|
139
|
+
ecr_images,
|
|
140
|
+
lastupdated=aws_update_tag,
|
|
141
|
+
Region=region,
|
|
142
|
+
AWS_ID=current_aws_account_id,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
load(
|
|
146
|
+
neo4j_session,
|
|
147
|
+
ECRRepositoryImageSchema(),
|
|
148
|
+
repo_images_list,
|
|
149
|
+
lastupdated=aws_update_tag,
|
|
150
|
+
Region=region,
|
|
151
|
+
AWS_ID=current_aws_account_id,
|
|
152
|
+
)
|
|
192
153
|
|
|
193
154
|
|
|
194
155
|
@timeit
|
|
195
156
|
def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
|
|
196
157
|
logger.debug("Running ECR cleanup job.")
|
|
197
|
-
|
|
158
|
+
GraphJob.from_node_schema(ECRRepositorySchema(), common_job_parameters).run(
|
|
159
|
+
neo4j_session
|
|
160
|
+
)
|
|
161
|
+
GraphJob.from_node_schema(ECRRepositoryImageSchema(), common_job_parameters).run(
|
|
162
|
+
neo4j_session
|
|
163
|
+
)
|
|
164
|
+
GraphJob.from_node_schema(ECRImageSchema(), common_job_parameters).run(
|
|
165
|
+
neo4j_session
|
|
166
|
+
)
|
|
198
167
|
|
|
199
168
|
|
|
200
169
|
def _get_image_data(
|
|
@@ -251,5 +220,11 @@ def sync(
|
|
|
251
220
|
update_tag,
|
|
252
221
|
)
|
|
253
222
|
repo_images_list = transform_ecr_repository_images(image_data)
|
|
254
|
-
load_ecr_repository_images(
|
|
223
|
+
load_ecr_repository_images(
|
|
224
|
+
neo4j_session,
|
|
225
|
+
repo_images_list,
|
|
226
|
+
region,
|
|
227
|
+
current_aws_account_id,
|
|
228
|
+
update_tag,
|
|
229
|
+
)
|
|
255
230
|
cleanup(neo4j_session, common_job_parameters)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from string import Template
|
|
3
|
+
from typing import Any
|
|
3
4
|
from typing import Dict
|
|
4
5
|
from typing import List
|
|
5
6
|
|
|
@@ -56,6 +57,35 @@ def get_short_id_from_lb2_arn(alb_arn: str) -> str:
|
|
|
56
57
|
return alb_arn.split("/")[-2]
|
|
57
58
|
|
|
58
59
|
|
|
60
|
+
def get_resource_type_from_arn(arn: str) -> str:
|
|
61
|
+
"""Return the resource type format expected by the Tagging API.
|
|
62
|
+
|
|
63
|
+
The Resource Groups Tagging API requires resource types in the form
|
|
64
|
+
``service:resource``. Most ARNs embed the resource type in the fifth segment
|
|
65
|
+
after the service name. Load balancer ARNs add an extra ``app`` or ``net``
|
|
66
|
+
component that must be preserved. S3 and SQS ARNs only contain the service
|
|
67
|
+
name. This helper extracts the appropriate string so that ARNs can be
|
|
68
|
+
grouped correctly for API calls.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
parts = arn.split(":", 5)
|
|
72
|
+
service = parts[2]
|
|
73
|
+
if service in {"s3", "sqs"}:
|
|
74
|
+
return service
|
|
75
|
+
|
|
76
|
+
resource = parts[5]
|
|
77
|
+
if service == "elasticloadbalancing" and resource.startswith("loadbalancer/"):
|
|
78
|
+
segments = resource.split("/")
|
|
79
|
+
if len(segments) > 2 and segments[1] in {"app", "net"}:
|
|
80
|
+
resource_type = f"{segments[0]}/{segments[1]}"
|
|
81
|
+
else:
|
|
82
|
+
resource_type = segments[0]
|
|
83
|
+
else:
|
|
84
|
+
resource_type = resource.split("/")[0].split(":")[0]
|
|
85
|
+
|
|
86
|
+
return f"{service}:{resource_type}" if resource_type else service
|
|
87
|
+
|
|
88
|
+
|
|
59
89
|
# We maintain a mapping from AWS resource types to their associated labels and unique identifiers.
|
|
60
90
|
# label: the node label used in cartography for this resource type
|
|
61
91
|
# property: the field of this node that uniquely identified this resource type
|
|
@@ -158,27 +188,27 @@ TAG_RESOURCE_TYPE_MAPPINGS: Dict = {
|
|
|
158
188
|
@aws_handle_regions
|
|
159
189
|
def get_tags(
|
|
160
190
|
boto3_session: boto3.session.Session,
|
|
161
|
-
|
|
191
|
+
resource_types: list[str],
|
|
162
192
|
region: str,
|
|
163
|
-
) ->
|
|
164
|
-
"""
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
193
|
+
) -> list[dict[str, Any]]:
|
|
194
|
+
"""Retrieve tag data for the provided resource types."""
|
|
195
|
+
resources: list[dict[str, Any]] = []
|
|
196
|
+
|
|
197
|
+
if "iam:role" in resource_types:
|
|
198
|
+
resources.extend(get_role_tags(boto3_session))
|
|
199
|
+
resource_types = [rt for rt in resource_types if rt != "iam:role"]
|
|
200
|
+
|
|
201
|
+
if not resource_types:
|
|
202
|
+
return resources
|
|
172
203
|
|
|
173
204
|
client = boto3_session.client("resourcegroupstaggingapi", region_name=region)
|
|
174
205
|
paginator = client.get_paginator("get_resources")
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
ResourceTypeFilters=
|
|
180
|
-
|
|
181
|
-
resources.extend(page["ResourceTagMappingList"])
|
|
206
|
+
|
|
207
|
+
# Batch resource types into groups of 100
|
|
208
|
+
# (https://docs.aws.amazon.com/resourcegroupstagging/latest/APIReference/API_GetResources.html)
|
|
209
|
+
for resource_types_batch in batch(resource_types, size=100):
|
|
210
|
+
for page in paginator.paginate(ResourceTypeFilters=resource_types_batch):
|
|
211
|
+
resources.extend(page["ResourceTagMappingList"])
|
|
182
212
|
return resources
|
|
183
213
|
|
|
184
214
|
|
|
@@ -210,6 +240,9 @@ def _load_tags_tx(
|
|
|
210
240
|
r.firstseen = timestamp()
|
|
211
241
|
""",
|
|
212
242
|
)
|
|
243
|
+
if not tag_data:
|
|
244
|
+
return
|
|
245
|
+
|
|
213
246
|
query = INGEST_TAG_TEMPLATE.safe_substitute(
|
|
214
247
|
resource_label=TAG_RESOURCE_TYPE_MAPPINGS[resource_type]["label"],
|
|
215
248
|
property=TAG_RESOURCE_TYPE_MAPPINGS[resource_type]["property"],
|
|
@@ -262,6 +295,26 @@ def compute_resource_id(tag_mapping: Dict, resource_type: str) -> str:
|
|
|
262
295
|
return resource_id
|
|
263
296
|
|
|
264
297
|
|
|
298
|
+
def _group_tag_data_by_resource_type(
|
|
299
|
+
tag_data: List[Dict],
|
|
300
|
+
tag_resource_type_mappings: Dict,
|
|
301
|
+
) -> Dict[str, List[Dict]]:
|
|
302
|
+
"""Group raw tag data by the resource types Cartography supports."""
|
|
303
|
+
|
|
304
|
+
grouped: Dict[str, List[Dict]] = {rtype: [] for rtype in tag_resource_type_mappings}
|
|
305
|
+
for mapping in tag_data:
|
|
306
|
+
rtype = get_resource_type_from_arn(mapping["ResourceARN"])
|
|
307
|
+
if rtype in grouped:
|
|
308
|
+
grouped[rtype].append(mapping)
|
|
309
|
+
else:
|
|
310
|
+
logger.debug(
|
|
311
|
+
"Unknown tag resource type %s from ARN %s",
|
|
312
|
+
rtype,
|
|
313
|
+
mapping["ResourceARN"],
|
|
314
|
+
)
|
|
315
|
+
return grouped
|
|
316
|
+
|
|
317
|
+
|
|
265
318
|
@timeit
|
|
266
319
|
def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
|
|
267
320
|
run_cleanup_job(
|
|
@@ -285,8 +338,14 @@ def sync(
|
|
|
285
338
|
logger.info(
|
|
286
339
|
f"Syncing AWS tags for account {current_aws_account_id} and region {region}",
|
|
287
340
|
)
|
|
341
|
+
all_tag_data = get_tags(
|
|
342
|
+
boto3_session, list(tag_resource_type_mappings.keys()), region
|
|
343
|
+
)
|
|
344
|
+
grouped = _group_tag_data_by_resource_type(
|
|
345
|
+
all_tag_data, tag_resource_type_mappings
|
|
346
|
+
)
|
|
288
347
|
for resource_type in tag_resource_type_mappings.keys():
|
|
289
|
-
tag_data =
|
|
348
|
+
tag_data = grouped.get(resource_type, [])
|
|
290
349
|
transform_tags(tag_data, resource_type) # type: ignore
|
|
291
350
|
logger.info(
|
|
292
351
|
f"Loading {len(tag_data)} tags for resource type {resource_type}",
|
|
@@ -7,6 +7,7 @@ import neo4j
|
|
|
7
7
|
|
|
8
8
|
from cartography.client.core.tx import load
|
|
9
9
|
from cartography.graph.job import GraphJob
|
|
10
|
+
from cartography.models.aws.secretsmanager.secret import SecretsManagerSecretSchema
|
|
10
11
|
from cartography.models.aws.secretsmanager.secret_version import (
|
|
11
12
|
SecretsManagerSecretVersionSchema,
|
|
12
13
|
)
|
|
@@ -14,7 +15,6 @@ from cartography.stats import get_stats_client
|
|
|
14
15
|
from cartography.util import aws_handle_regions
|
|
15
16
|
from cartography.util import dict_date_to_epoch
|
|
16
17
|
from cartography.util import merge_module_sync_metadata
|
|
17
|
-
from cartography.util import run_cleanup_job
|
|
18
18
|
from cartography.util import timeit
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
@@ -32,6 +32,37 @@ def get_secret_list(boto3_session: boto3.session.Session, region: str) -> List[D
|
|
|
32
32
|
return secrets
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
def transform_secrets(
|
|
36
|
+
secrets: List[Dict],
|
|
37
|
+
) -> List[Dict]:
|
|
38
|
+
"""
|
|
39
|
+
Transform AWS Secrets Manager Secrets to match the data model.
|
|
40
|
+
"""
|
|
41
|
+
transformed_data = []
|
|
42
|
+
for secret in secrets:
|
|
43
|
+
# Start with a copy of the original secret data
|
|
44
|
+
transformed = dict(secret)
|
|
45
|
+
|
|
46
|
+
# Convert date fields to epoch timestamps
|
|
47
|
+
transformed["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
|
|
48
|
+
transformed["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
|
|
49
|
+
transformed["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
|
|
50
|
+
transformed["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
|
|
51
|
+
transformed["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
|
|
52
|
+
|
|
53
|
+
# Flatten nested RotationRules.AutomaticallyAfterDays property
|
|
54
|
+
if "RotationRules" in secret and secret["RotationRules"]:
|
|
55
|
+
rotation_rules = secret["RotationRules"]
|
|
56
|
+
if "AutomaticallyAfterDays" in rotation_rules:
|
|
57
|
+
transformed["RotationRulesAutomaticallyAfterDays"] = rotation_rules[
|
|
58
|
+
"AutomaticallyAfterDays"
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
transformed_data.append(transformed)
|
|
62
|
+
|
|
63
|
+
return transformed_data
|
|
64
|
+
|
|
65
|
+
|
|
35
66
|
@timeit
|
|
36
67
|
def load_secrets(
|
|
37
68
|
neo4j_session: neo4j.Session,
|
|
@@ -40,48 +71,33 @@ def load_secrets(
|
|
|
40
71
|
current_aws_account_id: str,
|
|
41
72
|
aws_update_tag: int,
|
|
42
73
|
) -> None:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
s.lastupdated = $aws_update_tag
|
|
56
|
-
WITH s
|
|
57
|
-
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
|
|
58
|
-
MERGE (owner)-[r:RESOURCE]->(s)
|
|
59
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
60
|
-
SET r.lastupdated = $aws_update_tag
|
|
61
|
-
"""
|
|
62
|
-
for secret in data:
|
|
63
|
-
secret["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
|
|
64
|
-
secret["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
|
|
65
|
-
secret["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
|
|
66
|
-
secret["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
|
|
67
|
-
secret["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
|
|
68
|
-
|
|
69
|
-
neo4j_session.run(
|
|
70
|
-
ingest_secrets,
|
|
71
|
-
Secrets=data,
|
|
74
|
+
"""
|
|
75
|
+
Load transformed secrets into Neo4j using the data model.
|
|
76
|
+
Expects data to already be transformed by transform_secrets().
|
|
77
|
+
"""
|
|
78
|
+
logger.info(f"Loading {len(data)} Secrets for region {region} into graph.")
|
|
79
|
+
|
|
80
|
+
# Load using the schema-based approach
|
|
81
|
+
load(
|
|
82
|
+
neo4j_session,
|
|
83
|
+
SecretsManagerSecretSchema(),
|
|
84
|
+
data,
|
|
85
|
+
lastupdated=aws_update_tag,
|
|
72
86
|
Region=region,
|
|
73
|
-
|
|
74
|
-
aws_update_tag=aws_update_tag,
|
|
87
|
+
AWS_ID=current_aws_account_id,
|
|
75
88
|
)
|
|
76
89
|
|
|
77
90
|
|
|
78
91
|
@timeit
|
|
79
92
|
def cleanup_secrets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
93
|
+
"""
|
|
94
|
+
Run Secrets cleanup job using the data model.
|
|
95
|
+
"""
|
|
96
|
+
logger.debug("Running Secrets cleanup job.")
|
|
97
|
+
cleanup_job = GraphJob.from_node_schema(
|
|
98
|
+
SecretsManagerSecretSchema(), common_job_parameters
|
|
84
99
|
)
|
|
100
|
+
cleanup_job.run(neo4j_session)
|
|
85
101
|
|
|
86
102
|
|
|
87
103
|
@timeit
|
|
@@ -121,8 +137,6 @@ def get_secret_versions(
|
|
|
121
137
|
|
|
122
138
|
def transform_secret_versions(
|
|
123
139
|
versions: List[Dict],
|
|
124
|
-
region: str,
|
|
125
|
-
aws_account_id: str,
|
|
126
140
|
) -> List[Dict]:
|
|
127
141
|
"""
|
|
128
142
|
Transform AWS Secrets Manager Secret Versions to match the data model.
|
|
@@ -203,7 +217,15 @@ def sync(
|
|
|
203
217
|
)
|
|
204
218
|
secrets = get_secret_list(boto3_session, region)
|
|
205
219
|
|
|
206
|
-
|
|
220
|
+
transformed_secrets = transform_secrets(secrets)
|
|
221
|
+
|
|
222
|
+
load_secrets(
|
|
223
|
+
neo4j_session,
|
|
224
|
+
transformed_secrets,
|
|
225
|
+
region,
|
|
226
|
+
current_aws_account_id,
|
|
227
|
+
update_tag,
|
|
228
|
+
)
|
|
207
229
|
|
|
208
230
|
all_versions = []
|
|
209
231
|
for secret in secrets:
|
|
@@ -216,11 +238,7 @@ def sync(
|
|
|
216
238
|
)
|
|
217
239
|
all_versions.extend(versions)
|
|
218
240
|
|
|
219
|
-
transformed_data = transform_secret_versions(
|
|
220
|
-
all_versions,
|
|
221
|
-
region,
|
|
222
|
-
current_aws_account_id,
|
|
223
|
-
)
|
|
241
|
+
transformed_data = transform_secret_versions(all_versions)
|
|
224
242
|
|
|
225
243
|
load_secret_versions(
|
|
226
244
|
neo4j_session,
|
|
@@ -59,10 +59,29 @@ async def get_group_members(
|
|
|
59
59
|
return user_ids, group_ids
|
|
60
60
|
|
|
61
61
|
|
|
62
|
+
@timeit
|
|
63
|
+
async def get_group_owners(client: GraphServiceClient, group_id: str) -> list[str]:
|
|
64
|
+
"""Get owner user IDs for a given group."""
|
|
65
|
+
owner_ids: list[str] = []
|
|
66
|
+
request_builder = client.groups.by_group_id(group_id).owners
|
|
67
|
+
page = await request_builder.get()
|
|
68
|
+
while page:
|
|
69
|
+
if page.value:
|
|
70
|
+
for obj in page.value:
|
|
71
|
+
odata_type = getattr(obj, "odata_type", "")
|
|
72
|
+
if odata_type == "#microsoft.graph.user":
|
|
73
|
+
owner_ids.append(obj.id)
|
|
74
|
+
if not page.odata_next_link:
|
|
75
|
+
break
|
|
76
|
+
page = await request_builder.with_url(page.odata_next_link).get()
|
|
77
|
+
return owner_ids
|
|
78
|
+
|
|
79
|
+
|
|
62
80
|
def transform_groups(
|
|
63
81
|
groups: list[Group],
|
|
64
82
|
user_member_map: dict[str, list[str]],
|
|
65
83
|
group_member_map: dict[str, list[str]],
|
|
84
|
+
group_owner_map: dict[str, list[str]],
|
|
66
85
|
) -> list[dict[str, Any]]:
|
|
67
86
|
"""Transform API responses into dictionaries for ingestion."""
|
|
68
87
|
result: list[dict[str, Any]] = []
|
|
@@ -82,6 +101,7 @@ def transform_groups(
|
|
|
82
101
|
"deleted_date_time": g.deleted_date_time,
|
|
83
102
|
"member_ids": user_member_map.get(g.id, []),
|
|
84
103
|
"member_group_ids": group_member_map.get(g.id, []),
|
|
104
|
+
"owner_ids": group_owner_map.get(g.id, []),
|
|
85
105
|
}
|
|
86
106
|
result.append(transformed)
|
|
87
107
|
return result
|
|
@@ -134,6 +154,12 @@ async def sync_entra_groups(
|
|
|
134
154
|
|
|
135
155
|
user_member_map: dict[str, list[str]] = {}
|
|
136
156
|
group_member_map: dict[str, list[str]] = {}
|
|
157
|
+
group_owner_map: dict[str, list[str]] = {}
|
|
158
|
+
|
|
159
|
+
for group in groups:
|
|
160
|
+
owners = await get_group_owners(client, group.id)
|
|
161
|
+
group_owner_map[group.id] = owners
|
|
162
|
+
|
|
137
163
|
for group in groups:
|
|
138
164
|
try:
|
|
139
165
|
users, subgroups = await get_group_members(client, group.id)
|
|
@@ -144,7 +170,9 @@ async def sync_entra_groups(
|
|
|
144
170
|
user_member_map[group.id] = []
|
|
145
171
|
group_member_map[group.id] = []
|
|
146
172
|
|
|
147
|
-
transformed_groups = transform_groups(
|
|
173
|
+
transformed_groups = transform_groups(
|
|
174
|
+
groups, user_member_map, group_member_map, group_owner_map
|
|
175
|
+
)
|
|
148
176
|
|
|
149
177
|
load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
|
|
150
178
|
load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)
|