cartography 0.104.0rc3__py3-none-any.whl → 0.105.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +26 -1
- cartography/client/aws/__init__.py +19 -0
- cartography/client/aws/ecr.py +51 -0
- cartography/config.py +8 -0
- cartography/data/indexes.cypher +0 -3
- cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +1 -1
- cartography/graph/cleanupbuilder.py +151 -41
- cartography/intel/aws/acm.py +124 -0
- cartography/intel/aws/cloudtrail.py +3 -38
- cartography/intel/aws/ecr.py +8 -2
- cartography/intel/aws/iam.py +1 -1
- cartography/intel/aws/lambda_function.py +1 -1
- cartography/intel/aws/resources.py +2 -2
- cartography/intel/aws/s3.py +195 -4
- cartography/intel/aws/sqs.py +36 -90
- cartography/intel/entra/__init__.py +11 -0
- cartography/intel/entra/groups.py +151 -0
- cartography/intel/entra/ou.py +21 -5
- cartography/intel/trivy/__init__.py +161 -0
- cartography/intel/trivy/scanner.py +363 -0
- cartography/models/aws/acm/certificate.py +75 -0
- cartography/models/aws/cloudtrail/trail.py +24 -0
- cartography/models/aws/s3/notification.py +24 -0
- cartography/models/aws/secretsmanager/secret_version.py +0 -2
- cartography/models/aws/sqs/__init__.py +0 -0
- cartography/models/aws/sqs/queue.py +89 -0
- cartography/models/core/nodes.py +15 -2
- cartography/models/entra/group.py +91 -0
- cartography/models/trivy/__init__.py +0 -0
- cartography/models/trivy/findings.py +66 -0
- cartography/models/trivy/fix.py +66 -0
- cartography/models/trivy/package.py +71 -0
- cartography/sync.py +2 -0
- {cartography-0.104.0rc3.dist-info → cartography-0.105.0.dist-info}/METADATA +3 -2
- {cartography-0.104.0rc3.dist-info → cartography-0.105.0.dist-info}/RECORD +41 -29
- cartography/intel/aws/efs.py +0 -93
- cartography/models/aws/efs/mount_target.py +0 -52
- /cartography/models/aws/{efs → acm}/__init__.py +0 -0
- {cartography-0.104.0rc3.dist-info → cartography-0.105.0.dist-info}/WHEEL +0 -0
- {cartography-0.104.0rc3.dist-info → cartography-0.105.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.104.0rc3.dist-info → cartography-0.105.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.104.0rc3.dist-info → cartography-0.105.0.dist-info}/top_level.txt +0 -0
cartography/intel/entra/ou.py
CHANGED
|
@@ -22,12 +22,28 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
|
|
|
22
22
|
Get all OUs from Microsoft Graph API with pagination support
|
|
23
23
|
"""
|
|
24
24
|
all_units: list[AdministrativeUnit] = []
|
|
25
|
-
request = client.directory.administrative_units.request()
|
|
26
25
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
26
|
+
# Initialize first page request
|
|
27
|
+
current_request = client.directory.administrative_units
|
|
28
|
+
|
|
29
|
+
while current_request:
|
|
30
|
+
try:
|
|
31
|
+
response = await current_request.get()
|
|
32
|
+
if response and response.value:
|
|
33
|
+
all_units.extend(response.value)
|
|
34
|
+
|
|
35
|
+
# Handle next page using OData link
|
|
36
|
+
if response.odata_next_link:
|
|
37
|
+
current_request = client.directory.administrative_units.with_url(
|
|
38
|
+
response.odata_next_link
|
|
39
|
+
)
|
|
40
|
+
else:
|
|
41
|
+
current_request = None
|
|
42
|
+
else:
|
|
43
|
+
current_request = None
|
|
44
|
+
except Exception as e:
|
|
45
|
+
logger.error(f"Failed to retrieve administrative units: {str(e)}")
|
|
46
|
+
current_request = None
|
|
31
47
|
|
|
32
48
|
return all_units
|
|
33
49
|
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
from neo4j import Session
|
|
6
|
+
|
|
7
|
+
from cartography.client.aws import list_accounts
|
|
8
|
+
from cartography.client.aws.ecr import get_ecr_images
|
|
9
|
+
from cartography.config import Config
|
|
10
|
+
from cartography.intel.trivy.scanner import cleanup
|
|
11
|
+
from cartography.intel.trivy.scanner import get_json_files_in_s3
|
|
12
|
+
from cartography.intel.trivy.scanner import sync_single_image_from_s3
|
|
13
|
+
from cartography.stats import get_stats_client
|
|
14
|
+
from cartography.util import timeit
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
stat_handler = get_stats_client("trivy.scanner")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@timeit
|
|
21
|
+
def get_scan_targets(
|
|
22
|
+
neo4j_session: Session,
|
|
23
|
+
account_ids: list[str] | None = None,
|
|
24
|
+
) -> set[str]:
|
|
25
|
+
"""
|
|
26
|
+
Return list of ECR images from all accounts in the graph.
|
|
27
|
+
"""
|
|
28
|
+
if not account_ids:
|
|
29
|
+
aws_accounts = list_accounts(neo4j_session)
|
|
30
|
+
else:
|
|
31
|
+
aws_accounts = account_ids
|
|
32
|
+
|
|
33
|
+
ecr_images: set[str] = set()
|
|
34
|
+
for account_id in aws_accounts:
|
|
35
|
+
for _, _, image_uri, _, _ in get_ecr_images(neo4j_session, account_id):
|
|
36
|
+
ecr_images.add(image_uri)
|
|
37
|
+
|
|
38
|
+
return ecr_images
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _get_intersection(
|
|
42
|
+
images_in_graph: set[str], json_files: set[str], trivy_s3_prefix: str
|
|
43
|
+
) -> list[tuple[str, str]]:
|
|
44
|
+
"""
|
|
45
|
+
Get the intersection of ECR images in the graph and S3 scan results.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
images_in_graph: Set of ECR images in the graph
|
|
49
|
+
json_files: Set of S3 object keys for JSON files
|
|
50
|
+
trivy_s3_prefix: S3 prefix path containing scan results
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
List of tuples (image_uri, s3_object_key)
|
|
54
|
+
"""
|
|
55
|
+
intersection = []
|
|
56
|
+
prefix_len = len(trivy_s3_prefix)
|
|
57
|
+
for s3_object_key in json_files:
|
|
58
|
+
# Sample key "123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
|
|
59
|
+
# Sample key "folder/derp/123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
|
|
60
|
+
# Remove the prefix and the .json suffix
|
|
61
|
+
image_uri = s3_object_key[prefix_len:-5]
|
|
62
|
+
|
|
63
|
+
if image_uri in images_in_graph:
|
|
64
|
+
intersection.append((image_uri, s3_object_key))
|
|
65
|
+
|
|
66
|
+
return intersection
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@timeit
|
|
70
|
+
def sync_trivy_aws_ecr_from_s3(
|
|
71
|
+
neo4j_session: Session,
|
|
72
|
+
trivy_s3_bucket: str,
|
|
73
|
+
trivy_s3_prefix: str,
|
|
74
|
+
update_tag: int,
|
|
75
|
+
common_job_parameters: dict[str, Any],
|
|
76
|
+
boto3_session: boto3.Session,
|
|
77
|
+
) -> None:
|
|
78
|
+
"""
|
|
79
|
+
Sync Trivy scan results from S3 for AWS ECR images.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
neo4j_session: Neo4j session for database operations
|
|
83
|
+
trivy_s3_bucket: S3 bucket containing scan results
|
|
84
|
+
trivy_s3_prefix: S3 prefix path containing scan results
|
|
85
|
+
update_tag: Update tag for tracking
|
|
86
|
+
common_job_parameters: Common job parameters for cleanup
|
|
87
|
+
boto3_session: boto3 session for S3 operations
|
|
88
|
+
"""
|
|
89
|
+
logger.info(
|
|
90
|
+
f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
images_in_graph: set[str] = get_scan_targets(neo4j_session)
|
|
94
|
+
json_files: set[str] = get_json_files_in_s3(
|
|
95
|
+
trivy_s3_bucket, trivy_s3_prefix, boto3_session
|
|
96
|
+
)
|
|
97
|
+
intersection: list[tuple[str, str]] = _get_intersection(
|
|
98
|
+
images_in_graph, json_files, trivy_s3_prefix
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
if len(intersection) == 0:
|
|
102
|
+
logger.error(
|
|
103
|
+
f"Trivy sync was configured, but there are no ECR images with S3 json scan results in bucket "
|
|
104
|
+
f"'{trivy_s3_bucket}' with prefix '{trivy_s3_prefix}'. "
|
|
105
|
+
"Skipping Trivy sync to avoid potential data loss. "
|
|
106
|
+
"Please check the S3 bucket and prefix configuration. We expect the json files in s3 to be named "
|
|
107
|
+
f"`<image_uri>.json` and to be in the same bucket and prefix as the scan results. If the prefix is "
|
|
108
|
+
"a folder, it MUST end with a trailing slash '/'. "
|
|
109
|
+
)
|
|
110
|
+
logger.error(f"JSON files in S3: {json_files}")
|
|
111
|
+
raise ValueError("No ECR images with S3 json scan results found.")
|
|
112
|
+
|
|
113
|
+
logger.info(f"Processing {len(intersection)} ECR images with S3 scan results")
|
|
114
|
+
for image_uri, s3_object_key in intersection:
|
|
115
|
+
sync_single_image_from_s3(
|
|
116
|
+
neo4j_session,
|
|
117
|
+
image_uri,
|
|
118
|
+
update_tag,
|
|
119
|
+
trivy_s3_bucket,
|
|
120
|
+
s3_object_key,
|
|
121
|
+
boto3_session,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@timeit
|
|
128
|
+
def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
|
|
129
|
+
"""
|
|
130
|
+
Start Trivy scan ingestion from S3.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
neo4j_session: Neo4j session for database operations
|
|
134
|
+
config: Configuration object containing S3 settings
|
|
135
|
+
"""
|
|
136
|
+
# Check if S3 configuration is provided
|
|
137
|
+
if not config.trivy_s3_bucket:
|
|
138
|
+
logger.info("Trivy S3 configuration not provided. Skipping Trivy ingestion.")
|
|
139
|
+
return
|
|
140
|
+
|
|
141
|
+
# Default to empty string if s3 prefix is not provided
|
|
142
|
+
if config.trivy_s3_prefix is None:
|
|
143
|
+
config.trivy_s3_prefix = ""
|
|
144
|
+
|
|
145
|
+
common_job_parameters = {
|
|
146
|
+
"UPDATE_TAG": config.update_tag,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# Get ECR images to scan
|
|
150
|
+
boto3_session = boto3.Session()
|
|
151
|
+
|
|
152
|
+
sync_trivy_aws_ecr_from_s3(
|
|
153
|
+
neo4j_session,
|
|
154
|
+
config.trivy_s3_bucket,
|
|
155
|
+
config.trivy_s3_prefix,
|
|
156
|
+
config.update_tag,
|
|
157
|
+
common_job_parameters,
|
|
158
|
+
boto3_session,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Support other Trivy resource types here e.g. if Google Cloud has images.
|
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import boto3
|
|
6
|
+
from neo4j import Session
|
|
7
|
+
|
|
8
|
+
from cartography.client.core.tx import load
|
|
9
|
+
from cartography.graph.job import GraphJob
|
|
10
|
+
from cartography.models.trivy.findings import TrivyImageFindingSchema
|
|
11
|
+
from cartography.models.trivy.fix import TrivyFixSchema
|
|
12
|
+
from cartography.models.trivy.package import TrivyPackageSchema
|
|
13
|
+
from cartography.stats import get_stats_client
|
|
14
|
+
from cartography.util import timeit
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
stat_handler = get_stats_client(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _validate_packages(package_list: list[dict]) -> list[dict]:
|
|
21
|
+
"""
|
|
22
|
+
Validates that each package has the required fields.
|
|
23
|
+
Returns only packages that have both InstalledVersion and PkgName.
|
|
24
|
+
"""
|
|
25
|
+
validated_packages: list[dict] = []
|
|
26
|
+
for pkg in package_list:
|
|
27
|
+
if (
|
|
28
|
+
"InstalledVersion" in pkg
|
|
29
|
+
and pkg["InstalledVersion"]
|
|
30
|
+
and "PkgName" in pkg
|
|
31
|
+
and pkg["PkgName"]
|
|
32
|
+
):
|
|
33
|
+
validated_packages.append(pkg)
|
|
34
|
+
else:
|
|
35
|
+
logger.warning(
|
|
36
|
+
"Package object does not have required fields `InstalledVersion` or `PkgName` - skipping."
|
|
37
|
+
)
|
|
38
|
+
return validated_packages
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def transform_scan_results(
|
|
42
|
+
results: list[dict], image_digest: str
|
|
43
|
+
) -> tuple[list[dict], list[dict], list[dict]]:
|
|
44
|
+
"""
|
|
45
|
+
Transform raw Trivy scan results into a format suitable for loading into Neo4j.
|
|
46
|
+
Returns a tuple of (findings_list, packages_list, fixes_list).
|
|
47
|
+
"""
|
|
48
|
+
findings_list = []
|
|
49
|
+
packages_list = []
|
|
50
|
+
fixes_list = []
|
|
51
|
+
|
|
52
|
+
for scan_class in results:
|
|
53
|
+
# Sometimes a scan class will have no vulns and Trivy will leave the key undefined instead of showing [].
|
|
54
|
+
if "Vulnerabilities" in scan_class and scan_class["Vulnerabilities"]:
|
|
55
|
+
for result in scan_class["Vulnerabilities"]:
|
|
56
|
+
# Transform finding data
|
|
57
|
+
finding = {
|
|
58
|
+
"id": f'TIF|{result["VulnerabilityID"]}',
|
|
59
|
+
"VulnerabilityID": result["VulnerabilityID"],
|
|
60
|
+
"cve_id": result["VulnerabilityID"],
|
|
61
|
+
"Description": result.get("Description"),
|
|
62
|
+
"LastModifiedDate": result.get("LastModifiedDate"),
|
|
63
|
+
"PrimaryURL": result.get("PrimaryURL"),
|
|
64
|
+
"PublishedDate": result.get("PublishedDate"),
|
|
65
|
+
"Severity": result["Severity"],
|
|
66
|
+
"SeveritySource": result.get("SeveritySource"),
|
|
67
|
+
"Title": result.get("Title"),
|
|
68
|
+
"nvd_v2_score": None,
|
|
69
|
+
"nvd_v2_vector": None,
|
|
70
|
+
"nvd_v3_score": None,
|
|
71
|
+
"nvd_v3_vector": None,
|
|
72
|
+
"redhat_v3_score": None,
|
|
73
|
+
"redhat_v3_vector": None,
|
|
74
|
+
"ubuntu_v3_score": None,
|
|
75
|
+
"ubuntu_v3_vector": None,
|
|
76
|
+
"Class": scan_class["Class"],
|
|
77
|
+
"Type": scan_class["Type"],
|
|
78
|
+
"ImageDigest": image_digest, # For AFFECTS relationship
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Add CVSS scores if available
|
|
82
|
+
if "CVSS" in result:
|
|
83
|
+
if "nvd" in result["CVSS"]:
|
|
84
|
+
nvd = result["CVSS"]["nvd"]
|
|
85
|
+
finding["nvd_v2_score"] = nvd.get("V2Score")
|
|
86
|
+
finding["nvd_v2_vector"] = nvd.get("V2Vector")
|
|
87
|
+
finding["nvd_v3_score"] = nvd.get("V3Score")
|
|
88
|
+
finding["nvd_v3_vector"] = nvd.get("V3Vector")
|
|
89
|
+
if "redhat" in result["CVSS"]:
|
|
90
|
+
redhat = result["CVSS"]["redhat"]
|
|
91
|
+
finding["redhat_v3_score"] = redhat.get("V3Score")
|
|
92
|
+
finding["redhat_v3_vector"] = redhat.get("V3Vector")
|
|
93
|
+
if "ubuntu" in result["CVSS"]:
|
|
94
|
+
ubuntu = result["CVSS"]["ubuntu"]
|
|
95
|
+
finding["ubuntu_v3_score"] = ubuntu.get("V3Score")
|
|
96
|
+
finding["ubuntu_v3_vector"] = ubuntu.get("V3Vector")
|
|
97
|
+
|
|
98
|
+
findings_list.append(finding)
|
|
99
|
+
|
|
100
|
+
# Transform package data
|
|
101
|
+
package_id = f"{result['InstalledVersion']}|{result['PkgName']}"
|
|
102
|
+
packages_list.append(
|
|
103
|
+
{
|
|
104
|
+
"id": package_id,
|
|
105
|
+
"InstalledVersion": result["InstalledVersion"],
|
|
106
|
+
"PkgName": result["PkgName"],
|
|
107
|
+
"Class": scan_class["Class"],
|
|
108
|
+
"Type": scan_class["Type"],
|
|
109
|
+
"ImageDigest": image_digest, # For DEPLOYED relationship
|
|
110
|
+
"FindingId": finding["id"], # For AFFECTS relationship
|
|
111
|
+
}
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Transform fix data if available
|
|
115
|
+
if result.get("FixedVersion") is not None:
|
|
116
|
+
fixes_list.append(
|
|
117
|
+
{
|
|
118
|
+
"id": f"{result['FixedVersion']}|{result['PkgName']}",
|
|
119
|
+
"FixedVersion": result["FixedVersion"],
|
|
120
|
+
"PackageId": package_id,
|
|
121
|
+
"FindingId": finding["id"],
|
|
122
|
+
}
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Validate packages before returning
|
|
126
|
+
packages_list = _validate_packages(packages_list)
|
|
127
|
+
return findings_list, packages_list, fixes_list
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@timeit
|
|
131
|
+
def get_json_files_in_s3(
|
|
132
|
+
s3_bucket: str, s3_prefix: str, boto3_session: boto3.Session
|
|
133
|
+
) -> set[str]:
|
|
134
|
+
"""
|
|
135
|
+
List S3 objects in the S3 prefix.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
s3_bucket: S3 bucket name containing scan results
|
|
139
|
+
s3_prefix: S3 prefix path containing scan results
|
|
140
|
+
boto3_session: boto3 session for dependency injection
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Set of S3 object keys for JSON files in the S3 prefix
|
|
144
|
+
"""
|
|
145
|
+
s3_client = boto3_session.client("s3")
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
# List objects in the S3 prefix
|
|
149
|
+
paginator = s3_client.get_paginator("list_objects_v2")
|
|
150
|
+
page_iterator = paginator.paginate(Bucket=s3_bucket, Prefix=s3_prefix)
|
|
151
|
+
results = set()
|
|
152
|
+
|
|
153
|
+
for page in page_iterator:
|
|
154
|
+
if "Contents" not in page:
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
for obj in page["Contents"]:
|
|
158
|
+
object_key = obj["Key"]
|
|
159
|
+
|
|
160
|
+
# Skip non-JSON files
|
|
161
|
+
if not object_key.endswith(".json"):
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
# Skip files that don't start with our prefix
|
|
165
|
+
if not object_key.startswith(s3_prefix):
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
results.add(object_key)
|
|
169
|
+
|
|
170
|
+
except Exception as e:
|
|
171
|
+
logger.error(
|
|
172
|
+
f"Error listing S3 objects in bucket {s3_bucket} with prefix {s3_prefix}: {e}"
|
|
173
|
+
)
|
|
174
|
+
raise
|
|
175
|
+
|
|
176
|
+
logger.info(f"Found {len(results)} json files in s3://{s3_bucket}/{s3_prefix}")
|
|
177
|
+
return results
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@timeit
|
|
181
|
+
def cleanup(neo4j_session: Session, common_job_parameters: dict[str, Any]) -> None:
|
|
182
|
+
"""
|
|
183
|
+
Run cleanup jobs for Trivy nodes.
|
|
184
|
+
"""
|
|
185
|
+
logger.info("Running Trivy cleanup")
|
|
186
|
+
GraphJob.from_node_schema(TrivyImageFindingSchema(), common_job_parameters).run(
|
|
187
|
+
neo4j_session
|
|
188
|
+
)
|
|
189
|
+
GraphJob.from_node_schema(TrivyPackageSchema(), common_job_parameters).run(
|
|
190
|
+
neo4j_session
|
|
191
|
+
)
|
|
192
|
+
GraphJob.from_node_schema(TrivyFixSchema(), common_job_parameters).run(
|
|
193
|
+
neo4j_session
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@timeit
|
|
198
|
+
def load_scan_vulns(
|
|
199
|
+
neo4j_session: Session,
|
|
200
|
+
findings_list: list[dict[str, Any]],
|
|
201
|
+
update_tag: int,
|
|
202
|
+
) -> None:
|
|
203
|
+
"""
|
|
204
|
+
Load TrivyImageFinding nodes into Neo4j.
|
|
205
|
+
"""
|
|
206
|
+
load(
|
|
207
|
+
neo4j_session,
|
|
208
|
+
TrivyImageFindingSchema(),
|
|
209
|
+
findings_list,
|
|
210
|
+
lastupdated=update_tag,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@timeit
|
|
215
|
+
def load_scan_packages(
|
|
216
|
+
neo4j_session: Session,
|
|
217
|
+
packages_list: list[dict[str, Any]],
|
|
218
|
+
update_tag: int,
|
|
219
|
+
) -> None:
|
|
220
|
+
"""
|
|
221
|
+
Load TrivyPackage nodes into Neo4j.
|
|
222
|
+
"""
|
|
223
|
+
load(
|
|
224
|
+
neo4j_session,
|
|
225
|
+
TrivyPackageSchema(),
|
|
226
|
+
packages_list,
|
|
227
|
+
lastupdated=update_tag,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
@timeit
|
|
232
|
+
def load_scan_fixes(
|
|
233
|
+
neo4j_session: Session,
|
|
234
|
+
fixes_list: list[dict[str, Any]],
|
|
235
|
+
update_tag: int,
|
|
236
|
+
) -> None:
|
|
237
|
+
"""
|
|
238
|
+
Load TrivyFix nodes into Neo4j.
|
|
239
|
+
"""
|
|
240
|
+
load(
|
|
241
|
+
neo4j_session,
|
|
242
|
+
TrivyFixSchema(),
|
|
243
|
+
fixes_list,
|
|
244
|
+
lastupdated=update_tag,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
@timeit
|
|
249
|
+
def read_scan_results_from_s3(
|
|
250
|
+
boto3_session: boto3.Session,
|
|
251
|
+
s3_bucket: str,
|
|
252
|
+
s3_object_key: str,
|
|
253
|
+
image_uri: str,
|
|
254
|
+
) -> tuple[list[dict], str | None]:
|
|
255
|
+
"""
|
|
256
|
+
Read and parse Trivy scan results from S3.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
boto3_session: boto3 session for S3 operations
|
|
260
|
+
s3_bucket: S3 bucket containing scan results
|
|
261
|
+
s3_object_key: S3 object key for the scan results
|
|
262
|
+
image_uri: ECR image URI (for logging purposes)
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Tuple of (list of scan result dictionaries from the "Results" key, image digest)
|
|
266
|
+
"""
|
|
267
|
+
s3_client = boto3_session.client("s3")
|
|
268
|
+
|
|
269
|
+
# Read JSON scan results from S3
|
|
270
|
+
logger.debug(f"Reading scan results from S3: s3://{s3_bucket}/{s3_object_key}")
|
|
271
|
+
response = s3_client.get_object(Bucket=s3_bucket, Key=s3_object_key)
|
|
272
|
+
scan_data_json = response["Body"].read().decode("utf-8")
|
|
273
|
+
|
|
274
|
+
# Parse JSON data
|
|
275
|
+
trivy_data = json.loads(scan_data_json)
|
|
276
|
+
|
|
277
|
+
# Extract results using the same logic as binary scanning
|
|
278
|
+
if "Results" in trivy_data and trivy_data["Results"]:
|
|
279
|
+
results = trivy_data["Results"]
|
|
280
|
+
else:
|
|
281
|
+
stat_handler.incr("image_scan_no_results_count")
|
|
282
|
+
logger.warning(
|
|
283
|
+
f"S3 scan data did not contain a `Results` key for URI = {image_uri}; continuing."
|
|
284
|
+
)
|
|
285
|
+
results = []
|
|
286
|
+
|
|
287
|
+
image_digest = None
|
|
288
|
+
if "Metadata" in trivy_data and trivy_data["Metadata"]:
|
|
289
|
+
repo_digests = trivy_data["Metadata"].get("RepoDigests", [])
|
|
290
|
+
if repo_digests:
|
|
291
|
+
# Sample input: 000000000000.dkr.ecr.us-east-1.amazonaws.com/test-repository@sha256:88016
|
|
292
|
+
# Sample output: sha256:88016
|
|
293
|
+
repo_digest = repo_digests[0]
|
|
294
|
+
if "@" in repo_digest:
|
|
295
|
+
image_digest = repo_digest.split("@")[1]
|
|
296
|
+
|
|
297
|
+
return results, image_digest
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
@timeit
|
|
301
|
+
def sync_single_image_from_s3(
|
|
302
|
+
neo4j_session: Session,
|
|
303
|
+
image_uri: str,
|
|
304
|
+
update_tag: int,
|
|
305
|
+
s3_bucket: str,
|
|
306
|
+
s3_object_key: str,
|
|
307
|
+
boto3_session: boto3.Session,
|
|
308
|
+
) -> None:
|
|
309
|
+
"""
|
|
310
|
+
Read Trivy scan results from S3 and sync to Neo4j.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
neo4j_session: Neo4j session for database operations
|
|
314
|
+
image_uri: ECR image URI
|
|
315
|
+
update_tag: Update tag for tracking
|
|
316
|
+
s3_bucket: S3 bucket containing scan results
|
|
317
|
+
s3_object_key: S3 object key for this image's scan results
|
|
318
|
+
boto3_session: boto3 session for S3 operations
|
|
319
|
+
"""
|
|
320
|
+
try:
|
|
321
|
+
# Read and parse scan results from S3
|
|
322
|
+
results, image_digest = read_scan_results_from_s3(
|
|
323
|
+
boto3_session,
|
|
324
|
+
s3_bucket,
|
|
325
|
+
s3_object_key,
|
|
326
|
+
image_uri,
|
|
327
|
+
)
|
|
328
|
+
if not image_digest:
|
|
329
|
+
logger.warning(f"No image digest found for {image_uri}; skipping over.")
|
|
330
|
+
return
|
|
331
|
+
|
|
332
|
+
# Transform all data in one pass using existing function
|
|
333
|
+
findings_list, packages_list, fixes_list = transform_scan_results(
|
|
334
|
+
results,
|
|
335
|
+
image_digest,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
num_findings = len(findings_list)
|
|
339
|
+
stat_handler.incr("image_scan_cve_count", num_findings)
|
|
340
|
+
|
|
341
|
+
# Load the transformed data using existing functions
|
|
342
|
+
load_scan_vulns(
|
|
343
|
+
neo4j_session,
|
|
344
|
+
findings_list,
|
|
345
|
+
update_tag=update_tag,
|
|
346
|
+
)
|
|
347
|
+
load_scan_packages(
|
|
348
|
+
neo4j_session,
|
|
349
|
+
packages_list,
|
|
350
|
+
update_tag=update_tag,
|
|
351
|
+
)
|
|
352
|
+
load_scan_fixes(
|
|
353
|
+
neo4j_session,
|
|
354
|
+
fixes_list,
|
|
355
|
+
update_tag=update_tag,
|
|
356
|
+
)
|
|
357
|
+
stat_handler.incr("images_processed_count")
|
|
358
|
+
|
|
359
|
+
except Exception as e:
|
|
360
|
+
logger.error(
|
|
361
|
+
f"Failed to process S3 scan results for {image_uri} from {s3_object_key}: {e}"
|
|
362
|
+
)
|
|
363
|
+
raise
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from cartography.models.core.common import PropertyRef
|
|
4
|
+
from cartography.models.core.nodes import CartographyNodeProperties
|
|
5
|
+
from cartography.models.core.nodes import CartographyNodeSchema
|
|
6
|
+
from cartography.models.core.relationships import CartographyRelProperties
|
|
7
|
+
from cartography.models.core.relationships import CartographyRelSchema
|
|
8
|
+
from cartography.models.core.relationships import LinkDirection
|
|
9
|
+
from cartography.models.core.relationships import make_target_node_matcher
|
|
10
|
+
from cartography.models.core.relationships import OtherRelationships
|
|
11
|
+
from cartography.models.core.relationships import TargetNodeMatcher
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class ACMCertificateNodeProperties(CartographyNodeProperties):
|
|
16
|
+
id: PropertyRef = PropertyRef("Arn")
|
|
17
|
+
arn: PropertyRef = PropertyRef("Arn", extra_index=True)
|
|
18
|
+
domainname: PropertyRef = PropertyRef("DomainName")
|
|
19
|
+
type: PropertyRef = PropertyRef("Type")
|
|
20
|
+
status: PropertyRef = PropertyRef("Status")
|
|
21
|
+
key_algorithm: PropertyRef = PropertyRef("KeyAlgorithm")
|
|
22
|
+
signature_algorithm: PropertyRef = PropertyRef("SignatureAlgorithm")
|
|
23
|
+
not_before: PropertyRef = PropertyRef("NotBefore")
|
|
24
|
+
not_after: PropertyRef = PropertyRef("NotAfter")
|
|
25
|
+
in_use_by: PropertyRef = PropertyRef("InUseBy")
|
|
26
|
+
region: PropertyRef = PropertyRef("Region", set_in_kwargs=True)
|
|
27
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class ACMCertificateToAWSAccountRelProperties(CartographyRelProperties):
|
|
32
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class ACMCertificateToAWSAccountRel(CartographyRelSchema):
|
|
37
|
+
target_node_label: str = "AWSAccount"
|
|
38
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
39
|
+
{"id": PropertyRef("AWS_ID", set_in_kwargs=True)}
|
|
40
|
+
)
|
|
41
|
+
direction: LinkDirection = LinkDirection.INWARD
|
|
42
|
+
rel_label: str = "RESOURCE"
|
|
43
|
+
properties: ACMCertificateToAWSAccountRelProperties = (
|
|
44
|
+
ACMCertificateToAWSAccountRelProperties()
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(frozen=True)
|
|
49
|
+
class ACMCertificateToELBV2ListenerRelProperties(CartographyRelProperties):
|
|
50
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True)
|
|
54
|
+
class ACMCertificateToELBV2ListenerRel(CartographyRelSchema):
|
|
55
|
+
target_node_label: str = "ELBV2Listener"
|
|
56
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
57
|
+
{"id": PropertyRef("ELBV2ListenerArns", one_to_many=True)}
|
|
58
|
+
)
|
|
59
|
+
direction: LinkDirection = LinkDirection.OUTWARD
|
|
60
|
+
rel_label: str = "USED_BY"
|
|
61
|
+
properties: ACMCertificateToELBV2ListenerRelProperties = (
|
|
62
|
+
ACMCertificateToELBV2ListenerRelProperties()
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass(frozen=True)
|
|
67
|
+
class ACMCertificateSchema(CartographyNodeSchema):
|
|
68
|
+
label: str = "ACMCertificate"
|
|
69
|
+
properties: ACMCertificateNodeProperties = ACMCertificateNodeProperties()
|
|
70
|
+
sub_resource_relationship: ACMCertificateToAWSAccountRel = (
|
|
71
|
+
ACMCertificateToAWSAccountRel()
|
|
72
|
+
)
|
|
73
|
+
other_relationships: OtherRelationships = OtherRelationships(
|
|
74
|
+
[ACMCertificateToELBV2ListenerRel()]
|
|
75
|
+
)
|
|
@@ -7,6 +7,7 @@ from cartography.models.core.relationships import CartographyRelProperties
|
|
|
7
7
|
from cartography.models.core.relationships import CartographyRelSchema
|
|
8
8
|
from cartography.models.core.relationships import LinkDirection
|
|
9
9
|
from cartography.models.core.relationships import make_target_node_matcher
|
|
10
|
+
from cartography.models.core.relationships import OtherRelationships
|
|
10
11
|
from cartography.models.core.relationships import TargetNodeMatcher
|
|
11
12
|
|
|
12
13
|
|
|
@@ -54,8 +55,31 @@ class CloudTrailToAWSAccountRel(CartographyRelSchema):
|
|
|
54
55
|
)
|
|
55
56
|
|
|
56
57
|
|
|
58
|
+
@dataclass(frozen=True)
|
|
59
|
+
class CloudTrailTrailToS3BucketRelProperties(CartographyRelProperties):
|
|
60
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass(frozen=True)
|
|
64
|
+
class CloudTrailTrailToS3BucketRel(CartographyRelSchema):
|
|
65
|
+
target_node_label: str = "S3Bucket"
|
|
66
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
67
|
+
{"name": PropertyRef("S3BucketName")},
|
|
68
|
+
)
|
|
69
|
+
direction: LinkDirection = LinkDirection.OUTWARD
|
|
70
|
+
rel_label: str = "LOGS_TO"
|
|
71
|
+
properties: CloudTrailTrailToS3BucketRelProperties = (
|
|
72
|
+
CloudTrailTrailToS3BucketRelProperties()
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
57
76
|
@dataclass(frozen=True)
|
|
58
77
|
class CloudTrailTrailSchema(CartographyNodeSchema):
|
|
59
78
|
label: str = "CloudTrailTrail"
|
|
60
79
|
properties: CloudTrailTrailNodeProperties = CloudTrailTrailNodeProperties()
|
|
61
80
|
sub_resource_relationship: CloudTrailToAWSAccountRel = CloudTrailToAWSAccountRel()
|
|
81
|
+
other_relationships: OtherRelationships = OtherRelationships(
|
|
82
|
+
[
|
|
83
|
+
CloudTrailTrailToS3BucketRel(),
|
|
84
|
+
]
|
|
85
|
+
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from cartography.models.core.common import PropertyRef
|
|
4
|
+
from cartography.models.core.relationships import CartographyRelProperties
|
|
5
|
+
from cartography.models.core.relationships import CartographyRelSchema
|
|
6
|
+
from cartography.models.core.relationships import LinkDirection
|
|
7
|
+
from cartography.models.core.relationships import make_target_node_matcher
|
|
8
|
+
from cartography.models.core.relationships import TargetNodeMatcher
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class S3BucketToSNSTopicRelProperties(CartographyRelProperties):
|
|
13
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class S3BucketToSNSTopicRel(CartographyRelSchema):
|
|
18
|
+
target_node_label: str = "SNSTopic"
|
|
19
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
20
|
+
{"arn": PropertyRef("TopicArn")},
|
|
21
|
+
)
|
|
22
|
+
direction: LinkDirection = LinkDirection.OUTWARD
|
|
23
|
+
rel_label: str = "NOTIFIES"
|
|
24
|
+
properties: S3BucketToSNSTopicRelProperties = S3BucketToSNSTopicRelProperties()
|