cartography 0.109.0rc1__py3-none-any.whl → 0.110.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +22 -0
- cartography/config.py +13 -0
- cartography/data/indexes.cypher +0 -15
- cartography/intel/aws/cloudtrail_management_events.py +21 -0
- cartography/intel/aws/eventbridge.py +91 -0
- cartography/intel/aws/glue.py +117 -0
- cartography/intel/aws/identitycenter.py +71 -23
- cartography/intel/aws/kms.py +160 -200
- cartography/intel/aws/lambda_function.py +206 -190
- cartography/intel/aws/rds.py +243 -458
- cartography/intel/aws/resources.py +4 -0
- cartography/intel/aws/route53.py +334 -332
- cartography/intel/entra/__init__.py +43 -41
- cartography/intel/entra/applications.py +1 -2
- cartography/intel/entra/ou.py +1 -1
- cartography/intel/entra/resources.py +20 -0
- cartography/intel/trivy/__init__.py +73 -13
- cartography/intel/trivy/scanner.py +115 -92
- cartography/models/aws/eventbridge/__init__.py +0 -0
- cartography/models/aws/eventbridge/rule.py +77 -0
- cartography/models/aws/glue/__init__.py +0 -0
- cartography/models/aws/glue/connection.py +51 -0
- cartography/models/aws/identitycenter/awspermissionset.py +44 -0
- cartography/models/aws/kms/__init__.py +0 -0
- cartography/models/aws/kms/aliases.py +86 -0
- cartography/models/aws/kms/grants.py +65 -0
- cartography/models/aws/kms/keys.py +88 -0
- cartography/models/aws/lambda_function/__init__.py +0 -0
- cartography/models/aws/lambda_function/alias.py +74 -0
- cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
- cartography/models/aws/lambda_function/lambda_function.py +89 -0
- cartography/models/aws/lambda_function/layer.py +72 -0
- cartography/models/aws/rds/__init__.py +0 -0
- cartography/models/aws/rds/cluster.py +89 -0
- cartography/models/aws/rds/instance.py +154 -0
- cartography/models/aws/rds/snapshot.py +108 -0
- cartography/models/aws/rds/subnet_group.py +101 -0
- cartography/models/aws/route53/__init__.py +0 -0
- cartography/models/aws/route53/dnsrecord.py +214 -0
- cartography/models/aws/route53/nameserver.py +63 -0
- cartography/models/aws/route53/subzone.py +40 -0
- cartography/models/aws/route53/zone.py +47 -0
- cartography/models/snipeit/asset.py +1 -0
- cartography/util.py +8 -1
- {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/METADATA +2 -2
- {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/RECORD +51 -32
- cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
- cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
- cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
- cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
- cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
- {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/WHEEL +0 -0
- {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/entry_points.txt +0 -0
- {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import datetime
|
|
2
3
|
import logging
|
|
4
|
+
from traceback import TracebackException
|
|
5
|
+
from typing import Awaitable
|
|
6
|
+
from typing import Callable
|
|
3
7
|
|
|
4
8
|
import neo4j
|
|
5
9
|
|
|
6
10
|
from cartography.config import Config
|
|
7
|
-
from cartography.intel.entra.
|
|
8
|
-
from cartography.intel.entra.groups import sync_entra_groups
|
|
9
|
-
from cartography.intel.entra.ou import sync_entra_ous
|
|
10
|
-
from cartography.intel.entra.users import sync_entra_users
|
|
11
|
+
from cartography.intel.entra.resources import RESOURCE_FUNCTIONS
|
|
11
12
|
from cartography.util import timeit
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
@@ -39,45 +40,46 @@ def start_entra_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
|
39
40
|
}
|
|
40
41
|
|
|
41
42
|
async def main() -> None:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
neo4j_session,
|
|
45
|
-
config.entra_tenant_id,
|
|
46
|
-
config.entra_client_id,
|
|
47
|
-
config.entra_client_secret,
|
|
48
|
-
config.update_tag,
|
|
49
|
-
common_job_parameters,
|
|
50
|
-
)
|
|
43
|
+
failed_stages = []
|
|
44
|
+
exception_tracebacks = []
|
|
51
45
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
46
|
+
async def run_stage(name: str, func: Callable[..., Awaitable[None]]) -> None:
|
|
47
|
+
try:
|
|
48
|
+
await func(
|
|
49
|
+
neo4j_session,
|
|
50
|
+
config.entra_tenant_id,
|
|
51
|
+
config.entra_client_id,
|
|
52
|
+
config.entra_client_secret,
|
|
53
|
+
config.update_tag,
|
|
54
|
+
common_job_parameters,
|
|
55
|
+
)
|
|
56
|
+
except Exception as e:
|
|
57
|
+
if config.entra_best_effort_mode:
|
|
58
|
+
timestamp = datetime.datetime.now()
|
|
59
|
+
failed_stages.append(name)
|
|
60
|
+
exception_traceback = TracebackException.from_exception(e)
|
|
61
|
+
traceback_string = "".join(exception_traceback.format())
|
|
62
|
+
exception_tracebacks.append(
|
|
63
|
+
f"{timestamp} - Exception for stage {name}\n{traceback_string}"
|
|
64
|
+
)
|
|
65
|
+
logger.warning(
|
|
66
|
+
f"Caught exception syncing {name}. entra-best-effort-mode is on so we are continuing "
|
|
67
|
+
"on to the next Entra sync. All exceptions will be aggregated and re-logged at the end of the sync.",
|
|
68
|
+
exc_info=True,
|
|
69
|
+
)
|
|
70
|
+
else:
|
|
71
|
+
logger.error("Error during Entra sync", exc_info=True)
|
|
72
|
+
raise
|
|
61
73
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
neo4j_session,
|
|
65
|
-
config.entra_tenant_id,
|
|
66
|
-
config.entra_client_id,
|
|
67
|
-
config.entra_client_secret,
|
|
68
|
-
config.update_tag,
|
|
69
|
-
common_job_parameters,
|
|
70
|
-
)
|
|
74
|
+
for name, func in RESOURCE_FUNCTIONS:
|
|
75
|
+
await run_stage(name, func)
|
|
71
76
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
config.update_tag,
|
|
79
|
-
common_job_parameters,
|
|
80
|
-
)
|
|
77
|
+
if failed_stages:
|
|
78
|
+
logger.error(
|
|
79
|
+
f"Entra sync failed for the following stages: {', '.join(failed_stages)}. "
|
|
80
|
+
"See the logs for more details.",
|
|
81
|
+
)
|
|
82
|
+
raise Exception("\n".join(exception_tracebacks))
|
|
81
83
|
|
|
82
|
-
# Execute
|
|
84
|
+
# Execute all syncs in sequence
|
|
83
85
|
asyncio.run(main())
|
|
@@ -172,12 +172,11 @@ async def get_app_role_assignments(
|
|
|
172
172
|
)
|
|
173
173
|
continue
|
|
174
174
|
except Exception as e:
|
|
175
|
-
# Only catch truly unexpected errors - these should be rare
|
|
176
175
|
logger.error(
|
|
177
176
|
f"Unexpected error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}",
|
|
178
177
|
exc_info=True,
|
|
179
178
|
)
|
|
180
|
-
|
|
179
|
+
raise
|
|
181
180
|
|
|
182
181
|
logger.info(f"Retrieved {len(assignments)} app role assignments total")
|
|
183
182
|
return assignments
|
cartography/intel/entra/ou.py
CHANGED
|
@@ -43,7 +43,7 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
|
|
|
43
43
|
current_request = None
|
|
44
44
|
except Exception as e:
|
|
45
45
|
logger.error(f"Failed to retrieve administrative units: {str(e)}")
|
|
46
|
-
|
|
46
|
+
raise
|
|
47
47
|
|
|
48
48
|
return all_units
|
|
49
49
|
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from cartography.intel.entra.applications import sync_entra_applications
|
|
2
|
+
from cartography.intel.entra.groups import sync_entra_groups
|
|
3
|
+
from cartography.intel.entra.ou import sync_entra_ous
|
|
4
|
+
from cartography.intel.entra.users import sync_entra_users
|
|
5
|
+
|
|
6
|
+
# This is a list so that we sync these resources in order.
|
|
7
|
+
# Data shape: [("resource_name", sync_function), ...]
|
|
8
|
+
# Each sync function will be called with the following arguments:
|
|
9
|
+
# - neo4j_session
|
|
10
|
+
# - config.entra_tenant_id
|
|
11
|
+
# - config.entra_client_id
|
|
12
|
+
# - config.entra_client_secret
|
|
13
|
+
# - config.update_tag
|
|
14
|
+
# - common_job_parameters
|
|
15
|
+
RESOURCE_FUNCTIONS = [
|
|
16
|
+
("users", sync_entra_users),
|
|
17
|
+
("groups", sync_entra_groups),
|
|
18
|
+
("ous", sync_entra_ous),
|
|
19
|
+
("applications", sync_entra_applications),
|
|
20
|
+
]
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import logging
|
|
2
3
|
from typing import Any
|
|
3
4
|
|
|
@@ -8,7 +9,9 @@ from cartography.client.aws import list_accounts
|
|
|
8
9
|
from cartography.client.aws.ecr import get_ecr_images
|
|
9
10
|
from cartography.config import Config
|
|
10
11
|
from cartography.intel.trivy.scanner import cleanup
|
|
12
|
+
from cartography.intel.trivy.scanner import get_json_files_in_dir
|
|
11
13
|
from cartography.intel.trivy.scanner import get_json_files_in_s3
|
|
14
|
+
from cartography.intel.trivy.scanner import sync_single_image_from_file
|
|
12
15
|
from cartography.intel.trivy.scanner import sync_single_image_from_s3
|
|
13
16
|
from cartography.stats import get_stats_client
|
|
14
17
|
from cartography.util import timeit
|
|
@@ -39,13 +42,13 @@ def get_scan_targets(
|
|
|
39
42
|
|
|
40
43
|
|
|
41
44
|
def _get_intersection(
|
|
42
|
-
|
|
45
|
+
image_uris: set[str], json_files: set[str], trivy_s3_prefix: str
|
|
43
46
|
) -> list[tuple[str, str]]:
|
|
44
47
|
"""
|
|
45
48
|
Get the intersection of ECR images in the graph and S3 scan results.
|
|
46
49
|
|
|
47
50
|
Args:
|
|
48
|
-
|
|
51
|
+
image_uris: Set of ECR images in the graph
|
|
49
52
|
json_files: Set of S3 object keys for JSON files
|
|
50
53
|
trivy_s3_prefix: S3 prefix path containing scan results
|
|
51
54
|
|
|
@@ -60,7 +63,7 @@ def _get_intersection(
|
|
|
60
63
|
# Remove the prefix and the .json suffix
|
|
61
64
|
image_uri = s3_object_key[prefix_len:-5]
|
|
62
65
|
|
|
63
|
-
if image_uri in
|
|
66
|
+
if image_uri in image_uris:
|
|
64
67
|
intersection.append((image_uri, s3_object_key))
|
|
65
68
|
|
|
66
69
|
return intersection
|
|
@@ -90,12 +93,12 @@ def sync_trivy_aws_ecr_from_s3(
|
|
|
90
93
|
f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
|
|
91
94
|
)
|
|
92
95
|
|
|
93
|
-
|
|
96
|
+
image_uris: set[str] = get_scan_targets(neo4j_session)
|
|
94
97
|
json_files: set[str] = get_json_files_in_s3(
|
|
95
98
|
trivy_s3_bucket, trivy_s3_prefix, boto3_session
|
|
96
99
|
)
|
|
97
100
|
intersection: list[tuple[str, str]] = _get_intersection(
|
|
98
|
-
|
|
101
|
+
image_uris, json_files, trivy_s3_prefix
|
|
99
102
|
)
|
|
100
103
|
|
|
101
104
|
if len(intersection) == 0:
|
|
@@ -124,21 +127,79 @@ def sync_trivy_aws_ecr_from_s3(
|
|
|
124
127
|
cleanup(neo4j_session, common_job_parameters)
|
|
125
128
|
|
|
126
129
|
|
|
130
|
+
@timeit
|
|
131
|
+
def sync_trivy_aws_ecr_from_dir(
|
|
132
|
+
neo4j_session: Session,
|
|
133
|
+
results_dir: str,
|
|
134
|
+
update_tag: int,
|
|
135
|
+
common_job_parameters: dict[str, Any],
|
|
136
|
+
) -> None:
|
|
137
|
+
"""Sync Trivy scan results from local files for AWS ECR images."""
|
|
138
|
+
logger.info(f"Using Trivy scan results from {results_dir}")
|
|
139
|
+
|
|
140
|
+
image_uris: set[str] = get_scan_targets(neo4j_session)
|
|
141
|
+
json_files: set[str] = get_json_files_in_dir(results_dir)
|
|
142
|
+
|
|
143
|
+
if not json_files:
|
|
144
|
+
logger.error(
|
|
145
|
+
f"Trivy sync was configured, but no json files were found in {results_dir}."
|
|
146
|
+
)
|
|
147
|
+
raise ValueError("No Trivy json results found on disk")
|
|
148
|
+
|
|
149
|
+
logger.info(f"Processing {len(json_files)} local Trivy result files")
|
|
150
|
+
|
|
151
|
+
for file_path in json_files:
|
|
152
|
+
# First, check if the image exists in the graph before syncing
|
|
153
|
+
try:
|
|
154
|
+
# Peek at the artifact name without processing the file
|
|
155
|
+
with open(file_path, encoding="utf-8") as f:
|
|
156
|
+
trivy_data = json.load(f)
|
|
157
|
+
artifact_name = trivy_data.get("ArtifactName")
|
|
158
|
+
|
|
159
|
+
if artifact_name and artifact_name not in image_uris:
|
|
160
|
+
logger.debug(
|
|
161
|
+
f"Skipping results for {artifact_name} since the image is not present in the graph"
|
|
162
|
+
)
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
166
|
+
logger.error(f"Failed to read artifact name from {file_path}: {e}")
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
# Now sync the file since we know the image exists in the graph
|
|
170
|
+
sync_single_image_from_file(
|
|
171
|
+
neo4j_session,
|
|
172
|
+
file_path,
|
|
173
|
+
update_tag,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
177
|
+
|
|
178
|
+
|
|
127
179
|
@timeit
|
|
128
180
|
def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
|
|
129
|
-
"""
|
|
130
|
-
Start Trivy scan ingestion from S3.
|
|
181
|
+
"""Start Trivy scan ingestion from S3 or local files.
|
|
131
182
|
|
|
132
183
|
Args:
|
|
133
184
|
neo4j_session: Neo4j session for database operations
|
|
134
|
-
config: Configuration object containing S3
|
|
185
|
+
config: Configuration object containing S3 or directory paths
|
|
135
186
|
"""
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
187
|
+
if not config.trivy_s3_bucket and not config.trivy_results_dir:
|
|
188
|
+
logger.info("Trivy configuration not provided. Skipping Trivy ingestion.")
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
if config.trivy_results_dir:
|
|
192
|
+
common_job_parameters = {
|
|
193
|
+
"UPDATE_TAG": config.update_tag,
|
|
194
|
+
}
|
|
195
|
+
sync_trivy_aws_ecr_from_dir(
|
|
196
|
+
neo4j_session,
|
|
197
|
+
config.trivy_results_dir,
|
|
198
|
+
config.update_tag,
|
|
199
|
+
common_job_parameters,
|
|
200
|
+
)
|
|
139
201
|
return
|
|
140
202
|
|
|
141
|
-
# Default to empty string if s3 prefix is not provided
|
|
142
203
|
if config.trivy_s3_prefix is None:
|
|
143
204
|
config.trivy_s3_prefix = ""
|
|
144
205
|
|
|
@@ -146,7 +207,6 @@ def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
|
|
|
146
207
|
"UPDATE_TAG": config.update_tag,
|
|
147
208
|
}
|
|
148
209
|
|
|
149
|
-
# Get ECR images to scan
|
|
150
210
|
boto3_session = boto3.Session()
|
|
151
211
|
|
|
152
212
|
sync_trivy_aws_ecr_from_s3(
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import os
|
|
3
4
|
from typing import Any
|
|
4
5
|
|
|
5
6
|
import boto3
|
|
@@ -127,6 +128,90 @@ def transform_scan_results(
|
|
|
127
128
|
return findings_list, packages_list, fixes_list
|
|
128
129
|
|
|
129
130
|
|
|
131
|
+
def _parse_trivy_data(
|
|
132
|
+
trivy_data: dict, source: str
|
|
133
|
+
) -> tuple[str | None, list[dict], str]:
|
|
134
|
+
"""
|
|
135
|
+
Parse Trivy scan data and extract common fields.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
trivy_data: Raw JSON Trivy data
|
|
139
|
+
source: Source identifier for error messages (file path or S3 URI)
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Tuple of (artifact_name, results, image_digest)
|
|
143
|
+
"""
|
|
144
|
+
# Extract artifact name if present (only for file-based)
|
|
145
|
+
artifact_name = trivy_data.get("ArtifactName")
|
|
146
|
+
|
|
147
|
+
if "Results" not in trivy_data:
|
|
148
|
+
logger.error(
|
|
149
|
+
f"Scan data did not contain a `Results` key for {source}. This indicates a malformed scan result."
|
|
150
|
+
)
|
|
151
|
+
raise ValueError(f"Missing 'Results' key in scan data for {source}")
|
|
152
|
+
|
|
153
|
+
results = trivy_data["Results"]
|
|
154
|
+
if not results:
|
|
155
|
+
stat_handler.incr("image_scan_no_results_count")
|
|
156
|
+
logger.info(f"No vulnerabilities found for {source}")
|
|
157
|
+
|
|
158
|
+
if "Metadata" not in trivy_data or not trivy_data["Metadata"]:
|
|
159
|
+
raise ValueError(f"Missing 'Metadata' in scan data for {source}")
|
|
160
|
+
|
|
161
|
+
repo_digests = trivy_data["Metadata"].get("RepoDigests", [])
|
|
162
|
+
if not repo_digests:
|
|
163
|
+
raise ValueError(f"Missing 'RepoDigests' in scan metadata for {source}")
|
|
164
|
+
|
|
165
|
+
repo_digest = repo_digests[0]
|
|
166
|
+
if "@" not in repo_digest:
|
|
167
|
+
raise ValueError(f"Invalid repo digest format in {source}: {repo_digest}")
|
|
168
|
+
|
|
169
|
+
image_digest = repo_digest.split("@")[1]
|
|
170
|
+
if not image_digest:
|
|
171
|
+
raise ValueError(f"Empty image digest for {source}")
|
|
172
|
+
|
|
173
|
+
return artifact_name, results, image_digest
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@timeit
|
|
177
|
+
def sync_single_image(
|
|
178
|
+
neo4j_session: Session,
|
|
179
|
+
trivy_data: dict,
|
|
180
|
+
source: str,
|
|
181
|
+
update_tag: int,
|
|
182
|
+
) -> None:
|
|
183
|
+
"""
|
|
184
|
+
Sync a single image's Trivy scan results to Neo4j.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
neo4j_session: Neo4j session for database operations
|
|
188
|
+
trivy_data: Raw Trivy JSON data
|
|
189
|
+
source: Source identifier for logging (file path or image URI)
|
|
190
|
+
update_tag: Update tag for tracking
|
|
191
|
+
"""
|
|
192
|
+
try:
|
|
193
|
+
_, results, image_digest = _parse_trivy_data(trivy_data, source)
|
|
194
|
+
|
|
195
|
+
# Transform all data in one pass
|
|
196
|
+
findings_list, packages_list, fixes_list = transform_scan_results(
|
|
197
|
+
results,
|
|
198
|
+
image_digest,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
num_findings = len(findings_list)
|
|
202
|
+
stat_handler.incr("image_scan_cve_count", num_findings)
|
|
203
|
+
|
|
204
|
+
# Load the transformed data
|
|
205
|
+
load_scan_vulns(neo4j_session, findings_list, update_tag=update_tag)
|
|
206
|
+
load_scan_packages(neo4j_session, packages_list, update_tag=update_tag)
|
|
207
|
+
load_scan_fixes(neo4j_session, fixes_list, update_tag=update_tag)
|
|
208
|
+
stat_handler.incr("images_processed_count")
|
|
209
|
+
|
|
210
|
+
except Exception as e:
|
|
211
|
+
logger.error(f"Failed to process scan results for {source}: {e}")
|
|
212
|
+
raise
|
|
213
|
+
|
|
214
|
+
|
|
130
215
|
@timeit
|
|
131
216
|
def get_json_files_in_s3(
|
|
132
217
|
s3_bucket: str, s3_prefix: str, boto3_session: boto3.Session
|
|
@@ -177,6 +262,18 @@ def get_json_files_in_s3(
|
|
|
177
262
|
return results
|
|
178
263
|
|
|
179
264
|
|
|
265
|
+
@timeit
|
|
266
|
+
def get_json_files_in_dir(results_dir: str) -> set[str]:
|
|
267
|
+
"""Return set of JSON file paths under a directory."""
|
|
268
|
+
results = set()
|
|
269
|
+
for root, _dirs, files in os.walk(results_dir):
|
|
270
|
+
for filename in files:
|
|
271
|
+
if filename.endswith(".json"):
|
|
272
|
+
results.add(os.path.join(root, filename))
|
|
273
|
+
logger.info(f"Found {len(results)} json files in {results_dir}")
|
|
274
|
+
return results
|
|
275
|
+
|
|
276
|
+
|
|
180
277
|
@timeit
|
|
181
278
|
def cleanup(neo4j_session: Session, common_job_parameters: dict[str, Any]) -> None:
|
|
182
279
|
"""
|
|
@@ -245,58 +342,6 @@ def load_scan_fixes(
|
|
|
245
342
|
)
|
|
246
343
|
|
|
247
344
|
|
|
248
|
-
@timeit
|
|
249
|
-
def read_scan_results_from_s3(
|
|
250
|
-
boto3_session: boto3.Session,
|
|
251
|
-
s3_bucket: str,
|
|
252
|
-
s3_object_key: str,
|
|
253
|
-
image_uri: str,
|
|
254
|
-
) -> tuple[list[dict], str | None]:
|
|
255
|
-
"""
|
|
256
|
-
Read and parse Trivy scan results from S3.
|
|
257
|
-
|
|
258
|
-
Args:
|
|
259
|
-
boto3_session: boto3 session for S3 operations
|
|
260
|
-
s3_bucket: S3 bucket containing scan results
|
|
261
|
-
s3_object_key: S3 object key for the scan results
|
|
262
|
-
image_uri: ECR image URI (for logging purposes)
|
|
263
|
-
|
|
264
|
-
Returns:
|
|
265
|
-
Tuple of (list of scan result dictionaries from the "Results" key, image digest)
|
|
266
|
-
"""
|
|
267
|
-
s3_client = boto3_session.client("s3")
|
|
268
|
-
|
|
269
|
-
# Read JSON scan results from S3
|
|
270
|
-
logger.debug(f"Reading scan results from S3: s3://{s3_bucket}/{s3_object_key}")
|
|
271
|
-
response = s3_client.get_object(Bucket=s3_bucket, Key=s3_object_key)
|
|
272
|
-
scan_data_json = response["Body"].read().decode("utf-8")
|
|
273
|
-
|
|
274
|
-
# Parse JSON data
|
|
275
|
-
trivy_data = json.loads(scan_data_json)
|
|
276
|
-
|
|
277
|
-
# Extract results using the same logic as binary scanning
|
|
278
|
-
if "Results" in trivy_data and trivy_data["Results"]:
|
|
279
|
-
results = trivy_data["Results"]
|
|
280
|
-
else:
|
|
281
|
-
stat_handler.incr("image_scan_no_results_count")
|
|
282
|
-
logger.warning(
|
|
283
|
-
f"S3 scan data did not contain a `Results` key for URI = {image_uri}; continuing."
|
|
284
|
-
)
|
|
285
|
-
results = []
|
|
286
|
-
|
|
287
|
-
image_digest = None
|
|
288
|
-
if "Metadata" in trivy_data and trivy_data["Metadata"]:
|
|
289
|
-
repo_digests = trivy_data["Metadata"].get("RepoDigests", [])
|
|
290
|
-
if repo_digests:
|
|
291
|
-
# Sample input: 000000000000.dkr.ecr.us-east-1.amazonaws.com/test-repository@sha256:88016
|
|
292
|
-
# Sample output: sha256:88016
|
|
293
|
-
repo_digest = repo_digests[0]
|
|
294
|
-
if "@" in repo_digest:
|
|
295
|
-
image_digest = repo_digest.split("@")[1]
|
|
296
|
-
|
|
297
|
-
return results, image_digest
|
|
298
|
-
|
|
299
|
-
|
|
300
345
|
@timeit
|
|
301
346
|
def sync_single_image_from_s3(
|
|
302
347
|
neo4j_session: Session,
|
|
@@ -317,47 +362,25 @@ def sync_single_image_from_s3(
|
|
|
317
362
|
s3_object_key: S3 object key for this image's scan results
|
|
318
363
|
boto3_session: boto3 session for S3 operations
|
|
319
364
|
"""
|
|
320
|
-
|
|
321
|
-
# Read and parse scan results from S3
|
|
322
|
-
results, image_digest = read_scan_results_from_s3(
|
|
323
|
-
boto3_session,
|
|
324
|
-
s3_bucket,
|
|
325
|
-
s3_object_key,
|
|
326
|
-
image_uri,
|
|
327
|
-
)
|
|
328
|
-
if not image_digest:
|
|
329
|
-
logger.warning(f"No image digest found for {image_uri}; skipping over.")
|
|
330
|
-
return
|
|
365
|
+
s3_client = boto3_session.client("s3")
|
|
331
366
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
image_digest,
|
|
336
|
-
)
|
|
367
|
+
logger.debug(f"Reading scan results from S3: s3://{s3_bucket}/{s3_object_key}")
|
|
368
|
+
response = s3_client.get_object(Bucket=s3_bucket, Key=s3_object_key)
|
|
369
|
+
scan_data_json = response["Body"].read().decode("utf-8")
|
|
337
370
|
|
|
338
|
-
|
|
339
|
-
|
|
371
|
+
trivy_data = json.loads(scan_data_json)
|
|
372
|
+
sync_single_image(neo4j_session, trivy_data, image_uri, update_tag)
|
|
340
373
|
|
|
341
|
-
# Load the transformed data using existing functions
|
|
342
|
-
load_scan_vulns(
|
|
343
|
-
neo4j_session,
|
|
344
|
-
findings_list,
|
|
345
|
-
update_tag=update_tag,
|
|
346
|
-
)
|
|
347
|
-
load_scan_packages(
|
|
348
|
-
neo4j_session,
|
|
349
|
-
packages_list,
|
|
350
|
-
update_tag=update_tag,
|
|
351
|
-
)
|
|
352
|
-
load_scan_fixes(
|
|
353
|
-
neo4j_session,
|
|
354
|
-
fixes_list,
|
|
355
|
-
update_tag=update_tag,
|
|
356
|
-
)
|
|
357
|
-
stat_handler.incr("images_processed_count")
|
|
358
374
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
375
|
+
@timeit
|
|
376
|
+
def sync_single_image_from_file(
|
|
377
|
+
neo4j_session: Session,
|
|
378
|
+
file_path: str,
|
|
379
|
+
update_tag: int,
|
|
380
|
+
) -> None:
|
|
381
|
+
"""Read a Trivy JSON file from disk and sync to Neo4j."""
|
|
382
|
+
logger.debug(f"Reading scan results from file: {file_path}")
|
|
383
|
+
with open(file_path, encoding="utf-8") as f:
|
|
384
|
+
trivy_data = json.load(f)
|
|
385
|
+
|
|
386
|
+
sync_single_image(neo4j_session, trivy_data, file_path, update_tag)
|
|
File without changes
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from cartography.models.core.common import PropertyRef
|
|
4
|
+
from cartography.models.core.nodes import CartographyNodeProperties
|
|
5
|
+
from cartography.models.core.nodes import CartographyNodeSchema
|
|
6
|
+
from cartography.models.core.relationships import CartographyRelProperties
|
|
7
|
+
from cartography.models.core.relationships import CartographyRelSchema
|
|
8
|
+
from cartography.models.core.relationships import LinkDirection
|
|
9
|
+
from cartography.models.core.relationships import make_target_node_matcher
|
|
10
|
+
from cartography.models.core.relationships import OtherRelationships
|
|
11
|
+
from cartography.models.core.relationships import TargetNodeMatcher
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class EventBridgeRuleNodeProperties(CartographyNodeProperties):
|
|
16
|
+
id: PropertyRef = PropertyRef("Arn")
|
|
17
|
+
arn: PropertyRef = PropertyRef("Arn", extra_index=True)
|
|
18
|
+
name: PropertyRef = PropertyRef("Name")
|
|
19
|
+
region: PropertyRef = PropertyRef("Region", set_in_kwargs=True)
|
|
20
|
+
event_pattern: PropertyRef = PropertyRef("EventPattern")
|
|
21
|
+
state: PropertyRef = PropertyRef("State")
|
|
22
|
+
description: PropertyRef = PropertyRef("Description")
|
|
23
|
+
schedule_expression: PropertyRef = PropertyRef("ScheduleExpression")
|
|
24
|
+
role_arn: PropertyRef = PropertyRef("RoleArn")
|
|
25
|
+
managed_by: PropertyRef = PropertyRef("ManagedBy")
|
|
26
|
+
event_bus_name: PropertyRef = PropertyRef("EventBusName")
|
|
27
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class EventBridgeRuleToAwsAccountRelProperties(CartographyRelProperties):
|
|
32
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class EventBridgeRuleToAWSAccountRel(CartographyRelSchema):
|
|
37
|
+
target_node_label: str = "AWSAccount"
|
|
38
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
39
|
+
{"id": PropertyRef("AWS_ID", set_in_kwargs=True)},
|
|
40
|
+
)
|
|
41
|
+
direction: LinkDirection = LinkDirection.INWARD
|
|
42
|
+
rel_label: str = "RESOURCE"
|
|
43
|
+
properties: EventBridgeRuleToAwsAccountRelProperties = (
|
|
44
|
+
EventBridgeRuleToAwsAccountRelProperties()
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(frozen=True)
|
|
49
|
+
class EventBridgeRuleToAWSRoleRelProperties(CartographyRelProperties):
|
|
50
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True)
|
|
54
|
+
class EventBridgeRuleToAWSRoleRel(CartographyRelSchema):
|
|
55
|
+
target_node_label: str = "AWSRole"
|
|
56
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
57
|
+
{"arn": PropertyRef("RoleArn")},
|
|
58
|
+
)
|
|
59
|
+
direction: LinkDirection = LinkDirection.OUTWARD
|
|
60
|
+
rel_label: str = "ASSOCIATED_WITH"
|
|
61
|
+
properties: EventBridgeRuleToAWSRoleRelProperties = (
|
|
62
|
+
EventBridgeRuleToAWSRoleRelProperties()
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass(frozen=True)
|
|
67
|
+
class EventBridgeRuleSchema(CartographyNodeSchema):
|
|
68
|
+
label: str = "EventBridgeRule"
|
|
69
|
+
properties: EventBridgeRuleNodeProperties = EventBridgeRuleNodeProperties()
|
|
70
|
+
sub_resource_relationship: EventBridgeRuleToAWSAccountRel = (
|
|
71
|
+
EventBridgeRuleToAWSAccountRel()
|
|
72
|
+
)
|
|
73
|
+
other_relationships: OtherRelationships = OtherRelationships(
|
|
74
|
+
[
|
|
75
|
+
EventBridgeRuleToAWSRoleRel(),
|
|
76
|
+
]
|
|
77
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from cartography.models.core.common import PropertyRef
|
|
4
|
+
from cartography.models.core.nodes import CartographyNodeProperties
|
|
5
|
+
from cartography.models.core.nodes import CartographyNodeSchema
|
|
6
|
+
from cartography.models.core.relationships import CartographyRelProperties
|
|
7
|
+
from cartography.models.core.relationships import CartographyRelSchema
|
|
8
|
+
from cartography.models.core.relationships import LinkDirection
|
|
9
|
+
from cartography.models.core.relationships import make_target_node_matcher
|
|
10
|
+
from cartography.models.core.relationships import TargetNodeMatcher
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class GlueConnectionNodeProperties(CartographyNodeProperties):
|
|
15
|
+
id: PropertyRef = PropertyRef("Name")
|
|
16
|
+
arn: PropertyRef = PropertyRef("Name", extra_index=True)
|
|
17
|
+
region: PropertyRef = PropertyRef("Region", set_in_kwargs=True)
|
|
18
|
+
description: PropertyRef = PropertyRef("Description")
|
|
19
|
+
connection_type: PropertyRef = PropertyRef("ConnectionType")
|
|
20
|
+
status: PropertyRef = PropertyRef("Status")
|
|
21
|
+
status_reason: PropertyRef = PropertyRef("StatusReason")
|
|
22
|
+
authentication_type: PropertyRef = PropertyRef("AuthenticationType")
|
|
23
|
+
secret_arn: PropertyRef = PropertyRef("SecretArn")
|
|
24
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True)
|
|
28
|
+
class GlueConnectionToAwsAccountRelProperties(CartographyRelProperties):
|
|
29
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class GlueConnectionToAWSAccountRel(CartographyRelSchema):
|
|
34
|
+
target_node_label: str = "AWSAccount"
|
|
35
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
36
|
+
{"id": PropertyRef("AWS_ID", set_in_kwargs=True)},
|
|
37
|
+
)
|
|
38
|
+
direction: LinkDirection = LinkDirection.INWARD
|
|
39
|
+
rel_label: str = "RESOURCE"
|
|
40
|
+
properties: GlueConnectionToAwsAccountRelProperties = (
|
|
41
|
+
GlueConnectionToAwsAccountRelProperties()
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class GlueConnectionSchema(CartographyNodeSchema):
|
|
47
|
+
label: str = "GlueConnection"
|
|
48
|
+
properties: GlueConnectionNodeProperties = GlueConnectionNodeProperties()
|
|
49
|
+
sub_resource_relationship: GlueConnectionToAWSAccountRel = (
|
|
50
|
+
GlueConnectionToAWSAccountRel()
|
|
51
|
+
)
|