cartography 0.104.0rc2__py3-none-any.whl → 0.105.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +26 -1
- cartography/client/aws/__init__.py +19 -0
- cartography/client/aws/ecr.py +51 -0
- cartography/config.py +8 -0
- cartography/data/indexes.cypher +0 -3
- cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +1 -1
- cartography/graph/cleanupbuilder.py +151 -41
- cartography/intel/aws/acm.py +124 -0
- cartography/intel/aws/cloudtrail.py +3 -38
- cartography/intel/aws/ecr.py +8 -2
- cartography/intel/aws/iam.py +1 -1
- cartography/intel/aws/lambda_function.py +1 -1
- cartography/intel/aws/resources.py +2 -2
- cartography/intel/aws/s3.py +195 -4
- cartography/intel/aws/secretsmanager.py +19 -5
- cartography/intel/aws/sqs.py +36 -90
- cartography/intel/entra/__init__.py +11 -0
- cartography/intel/entra/groups.py +151 -0
- cartography/intel/entra/ou.py +21 -5
- cartography/intel/trivy/__init__.py +161 -0
- cartography/intel/trivy/scanner.py +363 -0
- cartography/models/aws/acm/certificate.py +75 -0
- cartography/models/aws/cloudtrail/trail.py +24 -0
- cartography/models/aws/s3/notification.py +24 -0
- cartography/models/aws/secretsmanager/secret_version.py +0 -2
- cartography/models/aws/sqs/__init__.py +0 -0
- cartography/models/aws/sqs/queue.py +89 -0
- cartography/models/core/nodes.py +15 -2
- cartography/models/entra/group.py +91 -0
- cartography/models/trivy/__init__.py +0 -0
- cartography/models/trivy/findings.py +66 -0
- cartography/models/trivy/fix.py +66 -0
- cartography/models/trivy/package.py +71 -0
- cartography/sync.py +2 -0
- {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/METADATA +3 -2
- {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/RECORD +42 -30
- cartography/intel/aws/efs.py +0 -93
- cartography/models/aws/efs/mount_target.py +0 -52
- /cartography/models/aws/{efs → acm}/__init__.py +0 -0
- {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/WHEEL +0 -0
- {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import neo4j
|
|
5
|
+
from azure.identity import ClientSecretCredential
|
|
6
|
+
from msgraph import GraphServiceClient
|
|
7
|
+
from msgraph.generated.models.directory_object import DirectoryObject
|
|
8
|
+
from msgraph.generated.models.group import Group
|
|
9
|
+
|
|
10
|
+
from cartography.client.core.tx import load
|
|
11
|
+
from cartography.graph.job import GraphJob
|
|
12
|
+
from cartography.intel.entra.users import load_tenant
|
|
13
|
+
from cartography.models.entra.group import EntraGroupSchema
|
|
14
|
+
from cartography.util import timeit
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@timeit
|
|
20
|
+
async def get_entra_groups(client: GraphServiceClient) -> list[Group]:
|
|
21
|
+
"""Get all groups from Microsoft Graph API with pagination."""
|
|
22
|
+
all_groups: list[Group] = []
|
|
23
|
+
|
|
24
|
+
request_configuration = client.groups.GroupsRequestBuilderGetRequestConfiguration(
|
|
25
|
+
query_parameters=client.groups.GroupsRequestBuilderGetQueryParameters(top=999)
|
|
26
|
+
)
|
|
27
|
+
page = await client.groups.get(request_configuration=request_configuration)
|
|
28
|
+
while page:
|
|
29
|
+
if page.value:
|
|
30
|
+
all_groups.extend(page.value)
|
|
31
|
+
if not page.odata_next_link:
|
|
32
|
+
break
|
|
33
|
+
page = await client.groups.with_url(page.odata_next_link).get()
|
|
34
|
+
|
|
35
|
+
return all_groups
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@timeit
|
|
39
|
+
async def get_group_members(
|
|
40
|
+
client: GraphServiceClient, group_id: str
|
|
41
|
+
) -> tuple[list[str], list[str]]:
|
|
42
|
+
"""Get member user IDs and subgroup IDs for a given group."""
|
|
43
|
+
user_ids: list[str] = []
|
|
44
|
+
group_ids: list[str] = []
|
|
45
|
+
request_builder = client.groups.by_group_id(group_id).members
|
|
46
|
+
page = await request_builder.get()
|
|
47
|
+
while page:
|
|
48
|
+
if page.value:
|
|
49
|
+
for obj in page.value:
|
|
50
|
+
if isinstance(obj, DirectoryObject):
|
|
51
|
+
odata_type = getattr(obj, "odata_type", "")
|
|
52
|
+
if odata_type == "#microsoft.graph.user":
|
|
53
|
+
user_ids.append(obj.id)
|
|
54
|
+
elif odata_type == "#microsoft.graph.group":
|
|
55
|
+
group_ids.append(obj.id)
|
|
56
|
+
if not page.odata_next_link:
|
|
57
|
+
break
|
|
58
|
+
page = await request_builder.with_url(page.odata_next_link).get()
|
|
59
|
+
return user_ids, group_ids
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def transform_groups(
|
|
63
|
+
groups: list[Group],
|
|
64
|
+
user_member_map: dict[str, list[str]],
|
|
65
|
+
group_member_map: dict[str, list[str]],
|
|
66
|
+
) -> list[dict[str, Any]]:
|
|
67
|
+
"""Transform API responses into dictionaries for ingestion."""
|
|
68
|
+
result: list[dict[str, Any]] = []
|
|
69
|
+
for g in groups:
|
|
70
|
+
transformed = {
|
|
71
|
+
"id": g.id,
|
|
72
|
+
"display_name": g.display_name,
|
|
73
|
+
"description": g.description,
|
|
74
|
+
"mail": g.mail,
|
|
75
|
+
"mail_nickname": g.mail_nickname,
|
|
76
|
+
"mail_enabled": g.mail_enabled,
|
|
77
|
+
"security_enabled": g.security_enabled,
|
|
78
|
+
"group_types": g.group_types,
|
|
79
|
+
"visibility": g.visibility,
|
|
80
|
+
"is_assignable_to_role": g.is_assignable_to_role,
|
|
81
|
+
"created_date_time": g.created_date_time,
|
|
82
|
+
"deleted_date_time": g.deleted_date_time,
|
|
83
|
+
"member_ids": user_member_map.get(g.id, []),
|
|
84
|
+
"member_group_ids": group_member_map.get(g.id, []),
|
|
85
|
+
}
|
|
86
|
+
result.append(transformed)
|
|
87
|
+
return result
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@timeit
|
|
91
|
+
def load_groups(
|
|
92
|
+
neo4j_session: neo4j.Session,
|
|
93
|
+
groups: list[dict[str, Any]],
|
|
94
|
+
update_tag: int,
|
|
95
|
+
tenant_id: str,
|
|
96
|
+
) -> None:
|
|
97
|
+
logger.info(f"Loading {len(groups)} Entra groups")
|
|
98
|
+
load(
|
|
99
|
+
neo4j_session,
|
|
100
|
+
EntraGroupSchema(),
|
|
101
|
+
groups,
|
|
102
|
+
lastupdated=update_tag,
|
|
103
|
+
TENANT_ID=tenant_id,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@timeit
|
|
108
|
+
def cleanup_groups(
|
|
109
|
+
neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
|
|
110
|
+
) -> None:
|
|
111
|
+
GraphJob.from_node_schema(EntraGroupSchema(), common_job_parameters).run(
|
|
112
|
+
neo4j_session
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@timeit
|
|
117
|
+
async def sync_entra_groups(
|
|
118
|
+
neo4j_session: neo4j.Session,
|
|
119
|
+
tenant_id: str,
|
|
120
|
+
client_id: str,
|
|
121
|
+
client_secret: str,
|
|
122
|
+
update_tag: int,
|
|
123
|
+
common_job_parameters: dict[str, Any],
|
|
124
|
+
) -> None:
|
|
125
|
+
"""Sync Entra groups."""
|
|
126
|
+
credential = ClientSecretCredential(
|
|
127
|
+
tenant_id=tenant_id, client_id=client_id, client_secret=client_secret
|
|
128
|
+
)
|
|
129
|
+
client = GraphServiceClient(
|
|
130
|
+
credential, scopes=["https://graph.microsoft.com/.default"]
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
groups = await get_entra_groups(client)
|
|
134
|
+
|
|
135
|
+
user_member_map: dict[str, list[str]] = {}
|
|
136
|
+
group_member_map: dict[str, list[str]] = {}
|
|
137
|
+
for group in groups:
|
|
138
|
+
try:
|
|
139
|
+
users, subgroups = await get_group_members(client, group.id)
|
|
140
|
+
user_member_map[group.id] = users
|
|
141
|
+
group_member_map[group.id] = subgroups
|
|
142
|
+
except Exception as e:
|
|
143
|
+
logger.error(f"Failed to fetch members for group {group.id}: {e}")
|
|
144
|
+
user_member_map[group.id] = []
|
|
145
|
+
group_member_map[group.id] = []
|
|
146
|
+
|
|
147
|
+
transformed_groups = transform_groups(groups, user_member_map, group_member_map)
|
|
148
|
+
|
|
149
|
+
load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
|
|
150
|
+
load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)
|
|
151
|
+
cleanup_groups(neo4j_session, common_job_parameters)
|
cartography/intel/entra/ou.py
CHANGED
|
@@ -22,12 +22,28 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
|
|
|
22
22
|
Get all OUs from Microsoft Graph API with pagination support
|
|
23
23
|
"""
|
|
24
24
|
all_units: list[AdministrativeUnit] = []
|
|
25
|
-
request = client.directory.administrative_units.request()
|
|
26
25
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
26
|
+
# Initialize first page request
|
|
27
|
+
current_request = client.directory.administrative_units
|
|
28
|
+
|
|
29
|
+
while current_request:
|
|
30
|
+
try:
|
|
31
|
+
response = await current_request.get()
|
|
32
|
+
if response and response.value:
|
|
33
|
+
all_units.extend(response.value)
|
|
34
|
+
|
|
35
|
+
# Handle next page using OData link
|
|
36
|
+
if response.odata_next_link:
|
|
37
|
+
current_request = client.directory.administrative_units.with_url(
|
|
38
|
+
response.odata_next_link
|
|
39
|
+
)
|
|
40
|
+
else:
|
|
41
|
+
current_request = None
|
|
42
|
+
else:
|
|
43
|
+
current_request = None
|
|
44
|
+
except Exception as e:
|
|
45
|
+
logger.error(f"Failed to retrieve administrative units: {str(e)}")
|
|
46
|
+
current_request = None
|
|
31
47
|
|
|
32
48
|
return all_units
|
|
33
49
|
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
from neo4j import Session
|
|
6
|
+
|
|
7
|
+
from cartography.client.aws import list_accounts
|
|
8
|
+
from cartography.client.aws.ecr import get_ecr_images
|
|
9
|
+
from cartography.config import Config
|
|
10
|
+
from cartography.intel.trivy.scanner import cleanup
|
|
11
|
+
from cartography.intel.trivy.scanner import get_json_files_in_s3
|
|
12
|
+
from cartography.intel.trivy.scanner import sync_single_image_from_s3
|
|
13
|
+
from cartography.stats import get_stats_client
|
|
14
|
+
from cartography.util import timeit
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
stat_handler = get_stats_client("trivy.scanner")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@timeit
|
|
21
|
+
def get_scan_targets(
|
|
22
|
+
neo4j_session: Session,
|
|
23
|
+
account_ids: list[str] | None = None,
|
|
24
|
+
) -> set[str]:
|
|
25
|
+
"""
|
|
26
|
+
Return list of ECR images from all accounts in the graph.
|
|
27
|
+
"""
|
|
28
|
+
if not account_ids:
|
|
29
|
+
aws_accounts = list_accounts(neo4j_session)
|
|
30
|
+
else:
|
|
31
|
+
aws_accounts = account_ids
|
|
32
|
+
|
|
33
|
+
ecr_images: set[str] = set()
|
|
34
|
+
for account_id in aws_accounts:
|
|
35
|
+
for _, _, image_uri, _, _ in get_ecr_images(neo4j_session, account_id):
|
|
36
|
+
ecr_images.add(image_uri)
|
|
37
|
+
|
|
38
|
+
return ecr_images
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _get_intersection(
|
|
42
|
+
images_in_graph: set[str], json_files: set[str], trivy_s3_prefix: str
|
|
43
|
+
) -> list[tuple[str, str]]:
|
|
44
|
+
"""
|
|
45
|
+
Get the intersection of ECR images in the graph and S3 scan results.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
images_in_graph: Set of ECR images in the graph
|
|
49
|
+
json_files: Set of S3 object keys for JSON files
|
|
50
|
+
trivy_s3_prefix: S3 prefix path containing scan results
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
List of tuples (image_uri, s3_object_key)
|
|
54
|
+
"""
|
|
55
|
+
intersection = []
|
|
56
|
+
prefix_len = len(trivy_s3_prefix)
|
|
57
|
+
for s3_object_key in json_files:
|
|
58
|
+
# Sample key "123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
|
|
59
|
+
# Sample key "folder/derp/123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
|
|
60
|
+
# Remove the prefix and the .json suffix
|
|
61
|
+
image_uri = s3_object_key[prefix_len:-5]
|
|
62
|
+
|
|
63
|
+
if image_uri in images_in_graph:
|
|
64
|
+
intersection.append((image_uri, s3_object_key))
|
|
65
|
+
|
|
66
|
+
return intersection
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@timeit
|
|
70
|
+
def sync_trivy_aws_ecr_from_s3(
|
|
71
|
+
neo4j_session: Session,
|
|
72
|
+
trivy_s3_bucket: str,
|
|
73
|
+
trivy_s3_prefix: str,
|
|
74
|
+
update_tag: int,
|
|
75
|
+
common_job_parameters: dict[str, Any],
|
|
76
|
+
boto3_session: boto3.Session,
|
|
77
|
+
) -> None:
|
|
78
|
+
"""
|
|
79
|
+
Sync Trivy scan results from S3 for AWS ECR images.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
neo4j_session: Neo4j session for database operations
|
|
83
|
+
trivy_s3_bucket: S3 bucket containing scan results
|
|
84
|
+
trivy_s3_prefix: S3 prefix path containing scan results
|
|
85
|
+
update_tag: Update tag for tracking
|
|
86
|
+
common_job_parameters: Common job parameters for cleanup
|
|
87
|
+
boto3_session: boto3 session for S3 operations
|
|
88
|
+
"""
|
|
89
|
+
logger.info(
|
|
90
|
+
f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
images_in_graph: set[str] = get_scan_targets(neo4j_session)
|
|
94
|
+
json_files: set[str] = get_json_files_in_s3(
|
|
95
|
+
trivy_s3_bucket, trivy_s3_prefix, boto3_session
|
|
96
|
+
)
|
|
97
|
+
intersection: list[tuple[str, str]] = _get_intersection(
|
|
98
|
+
images_in_graph, json_files, trivy_s3_prefix
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
if len(intersection) == 0:
|
|
102
|
+
logger.error(
|
|
103
|
+
f"Trivy sync was configured, but there are no ECR images with S3 json scan results in bucket "
|
|
104
|
+
f"'{trivy_s3_bucket}' with prefix '{trivy_s3_prefix}'. "
|
|
105
|
+
"Skipping Trivy sync to avoid potential data loss. "
|
|
106
|
+
"Please check the S3 bucket and prefix configuration. We expect the json files in s3 to be named "
|
|
107
|
+
f"`<image_uri>.json` and to be in the same bucket and prefix as the scan results. If the prefix is "
|
|
108
|
+
"a folder, it MUST end with a trailing slash '/'. "
|
|
109
|
+
)
|
|
110
|
+
logger.error(f"JSON files in S3: {json_files}")
|
|
111
|
+
raise ValueError("No ECR images with S3 json scan results found.")
|
|
112
|
+
|
|
113
|
+
logger.info(f"Processing {len(intersection)} ECR images with S3 scan results")
|
|
114
|
+
for image_uri, s3_object_key in intersection:
|
|
115
|
+
sync_single_image_from_s3(
|
|
116
|
+
neo4j_session,
|
|
117
|
+
image_uri,
|
|
118
|
+
update_tag,
|
|
119
|
+
trivy_s3_bucket,
|
|
120
|
+
s3_object_key,
|
|
121
|
+
boto3_session,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@timeit
|
|
128
|
+
def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
|
|
129
|
+
"""
|
|
130
|
+
Start Trivy scan ingestion from S3.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
neo4j_session: Neo4j session for database operations
|
|
134
|
+
config: Configuration object containing S3 settings
|
|
135
|
+
"""
|
|
136
|
+
# Check if S3 configuration is provided
|
|
137
|
+
if not config.trivy_s3_bucket:
|
|
138
|
+
logger.info("Trivy S3 configuration not provided. Skipping Trivy ingestion.")
|
|
139
|
+
return
|
|
140
|
+
|
|
141
|
+
# Default to empty string if s3 prefix is not provided
|
|
142
|
+
if config.trivy_s3_prefix is None:
|
|
143
|
+
config.trivy_s3_prefix = ""
|
|
144
|
+
|
|
145
|
+
common_job_parameters = {
|
|
146
|
+
"UPDATE_TAG": config.update_tag,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# Get ECR images to scan
|
|
150
|
+
boto3_session = boto3.Session()
|
|
151
|
+
|
|
152
|
+
sync_trivy_aws_ecr_from_s3(
|
|
153
|
+
neo4j_session,
|
|
154
|
+
config.trivy_s3_bucket,
|
|
155
|
+
config.trivy_s3_prefix,
|
|
156
|
+
config.update_tag,
|
|
157
|
+
common_job_parameters,
|
|
158
|
+
boto3_session,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Support other Trivy resource types here e.g. if Google Cloud has images.
|