regscale-cli 6.27.3.0__py3-none-any.whl → 6.28.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of regscale-cli might be problematic. Click here for more details.
- regscale/_version.py +1 -1
- regscale/core/app/utils/app_utils.py +11 -2
- regscale/dev/cli.py +26 -0
- regscale/dev/version.py +72 -0
- regscale/integrations/commercial/__init__.py +15 -1
- regscale/integrations/commercial/amazon/amazon/__init__.py +0 -0
- regscale/integrations/commercial/amazon/amazon/common.py +204 -0
- regscale/integrations/commercial/amazon/common.py +48 -58
- regscale/integrations/commercial/aws/audit_manager_compliance.py +2671 -0
- regscale/integrations/commercial/aws/cli.py +3093 -55
- regscale/integrations/commercial/aws/cloudtrail_control_mappings.py +333 -0
- regscale/integrations/commercial/aws/cloudtrail_evidence.py +501 -0
- regscale/integrations/commercial/aws/cloudwatch_control_mappings.py +357 -0
- regscale/integrations/commercial/aws/cloudwatch_evidence.py +490 -0
- regscale/integrations/commercial/aws/config_compliance.py +914 -0
- regscale/integrations/commercial/aws/conformance_pack_mappings.py +198 -0
- regscale/integrations/commercial/aws/evidence_generator.py +283 -0
- regscale/integrations/commercial/aws/guardduty_control_mappings.py +340 -0
- regscale/integrations/commercial/aws/guardduty_evidence.py +1053 -0
- regscale/integrations/commercial/aws/iam_control_mappings.py +368 -0
- regscale/integrations/commercial/aws/iam_evidence.py +574 -0
- regscale/integrations/commercial/aws/inventory/__init__.py +223 -22
- regscale/integrations/commercial/aws/inventory/base.py +107 -5
- regscale/integrations/commercial/aws/inventory/resources/audit_manager.py +513 -0
- regscale/integrations/commercial/aws/inventory/resources/cloudtrail.py +315 -0
- regscale/integrations/commercial/aws/inventory/resources/cloudtrail_logs_metadata.py +476 -0
- regscale/integrations/commercial/aws/inventory/resources/cloudwatch.py +191 -0
- regscale/integrations/commercial/aws/inventory/resources/compute.py +66 -9
- regscale/integrations/commercial/aws/inventory/resources/config.py +464 -0
- regscale/integrations/commercial/aws/inventory/resources/containers.py +74 -9
- regscale/integrations/commercial/aws/inventory/resources/database.py +106 -31
- regscale/integrations/commercial/aws/inventory/resources/guardduty.py +286 -0
- regscale/integrations/commercial/aws/inventory/resources/iam.py +470 -0
- regscale/integrations/commercial/aws/inventory/resources/inspector.py +476 -0
- regscale/integrations/commercial/aws/inventory/resources/integration.py +175 -61
- regscale/integrations/commercial/aws/inventory/resources/kms.py +447 -0
- regscale/integrations/commercial/aws/inventory/resources/networking.py +103 -67
- regscale/integrations/commercial/aws/inventory/resources/s3.py +394 -0
- regscale/integrations/commercial/aws/inventory/resources/security.py +268 -72
- regscale/integrations/commercial/aws/inventory/resources/securityhub.py +473 -0
- regscale/integrations/commercial/aws/inventory/resources/storage.py +53 -29
- regscale/integrations/commercial/aws/inventory/resources/systems_manager.py +657 -0
- regscale/integrations/commercial/aws/inventory/resources/vpc.py +655 -0
- regscale/integrations/commercial/aws/kms_control_mappings.py +288 -0
- regscale/integrations/commercial/aws/kms_evidence.py +879 -0
- regscale/integrations/commercial/aws/ocsf/__init__.py +7 -0
- regscale/integrations/commercial/aws/ocsf/constants.py +115 -0
- regscale/integrations/commercial/aws/ocsf/mapper.py +435 -0
- regscale/integrations/commercial/aws/org_control_mappings.py +286 -0
- regscale/integrations/commercial/aws/org_evidence.py +666 -0
- regscale/integrations/commercial/aws/s3_control_mappings.py +356 -0
- regscale/integrations/commercial/aws/s3_evidence.py +632 -0
- regscale/integrations/commercial/aws/scanner.py +851 -206
- regscale/integrations/commercial/aws/security_hub.py +319 -0
- regscale/integrations/commercial/aws/session_manager.py +282 -0
- regscale/integrations/commercial/aws/ssm_control_mappings.py +291 -0
- regscale/integrations/commercial/aws/ssm_evidence.py +492 -0
- regscale/integrations/compliance_integration.py +308 -38
- regscale/integrations/due_date_handler.py +3 -0
- regscale/integrations/scanner_integration.py +399 -84
- regscale/models/integration_models/cisa_kev_data.json +34 -4
- regscale/models/integration_models/synqly_models/capabilities.json +1 -1
- regscale/models/integration_models/synqly_models/connectors/vulnerabilities.py +17 -9
- regscale/models/regscale_models/assessment.py +2 -1
- regscale/models/regscale_models/control_objective.py +74 -5
- regscale/models/regscale_models/file.py +2 -0
- regscale/models/regscale_models/issue.py +2 -5
- {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.0.0.dist-info}/METADATA +1 -1
- {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.0.0.dist-info}/RECORD +112 -33
- tests/regscale/integrations/commercial/aws/__init__.py +0 -0
- tests/regscale/integrations/commercial/aws/test_audit_manager_compliance.py +1304 -0
- tests/regscale/integrations/commercial/aws/test_audit_manager_evidence_aggregation.py +341 -0
- tests/regscale/integrations/commercial/aws/test_aws_audit_manager_collector.py +1155 -0
- tests/regscale/integrations/commercial/aws/test_aws_cloudtrail_collector.py +534 -0
- tests/regscale/integrations/commercial/aws/test_aws_config_collector.py +400 -0
- tests/regscale/integrations/commercial/aws/test_aws_guardduty_collector.py +315 -0
- tests/regscale/integrations/commercial/aws/test_aws_iam_collector.py +458 -0
- tests/regscale/integrations/commercial/aws/test_aws_inspector_collector.py +353 -0
- tests/regscale/integrations/commercial/aws/test_aws_inventory_integration.py +530 -0
- tests/regscale/integrations/commercial/aws/test_aws_kms_collector.py +919 -0
- tests/regscale/integrations/commercial/aws/test_aws_s3_collector.py +722 -0
- tests/regscale/integrations/commercial/aws/test_aws_scanner_integration.py +722 -0
- tests/regscale/integrations/commercial/aws/test_aws_securityhub_collector.py +792 -0
- tests/regscale/integrations/commercial/aws/test_aws_systems_manager_collector.py +918 -0
- tests/regscale/integrations/commercial/aws/test_aws_vpc_collector.py +996 -0
- tests/regscale/integrations/commercial/aws/test_cli_evidence.py +431 -0
- tests/regscale/integrations/commercial/aws/test_cloudtrail_control_mappings.py +452 -0
- tests/regscale/integrations/commercial/aws/test_cloudtrail_evidence.py +788 -0
- tests/regscale/integrations/commercial/aws/test_config_compliance.py +298 -0
- tests/regscale/integrations/commercial/aws/test_conformance_pack_mappings.py +200 -0
- tests/regscale/integrations/commercial/aws/test_evidence_generator.py +386 -0
- tests/regscale/integrations/commercial/aws/test_guardduty_control_mappings.py +564 -0
- tests/regscale/integrations/commercial/aws/test_guardduty_evidence.py +1041 -0
- tests/regscale/integrations/commercial/aws/test_iam_control_mappings.py +718 -0
- tests/regscale/integrations/commercial/aws/test_iam_evidence.py +1375 -0
- tests/regscale/integrations/commercial/aws/test_kms_control_mappings.py +656 -0
- tests/regscale/integrations/commercial/aws/test_kms_evidence.py +1163 -0
- tests/regscale/integrations/commercial/aws/test_ocsf_mapper.py +370 -0
- tests/regscale/integrations/commercial/aws/test_org_control_mappings.py +546 -0
- tests/regscale/integrations/commercial/aws/test_org_evidence.py +1240 -0
- tests/regscale/integrations/commercial/aws/test_s3_control_mappings.py +672 -0
- tests/regscale/integrations/commercial/aws/test_s3_evidence.py +987 -0
- tests/regscale/integrations/commercial/aws/test_scanner_evidence.py +373 -0
- tests/regscale/integrations/commercial/aws/test_security_hub_config_filtering.py +539 -0
- tests/regscale/integrations/commercial/aws/test_session_manager.py +516 -0
- tests/regscale/integrations/commercial/aws/test_ssm_control_mappings.py +588 -0
- tests/regscale/integrations/commercial/aws/test_ssm_evidence.py +735 -0
- tests/regscale/integrations/commercial/test_aws.py +55 -56
- {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.0.0.dist-info}/LICENSE +0 -0
- {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.0.0.dist-info}/WHEEL +0 -0
- {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.0.0.dist-info}/entry_points.txt +0 -0
- {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
"""AWS CloudTrail S3 Logs Metadata Collector.
|
|
2
|
+
|
|
3
|
+
This collector retrieves metadata about CloudTrail log files stored in S3
|
|
4
|
+
WITHOUT downloading or unzipping the files. It collects:
|
|
5
|
+
- File names
|
|
6
|
+
- File sizes
|
|
7
|
+
- Last modified dates
|
|
8
|
+
- S3 keys/paths
|
|
9
|
+
|
|
10
|
+
This metadata can then be saved as evidence in RegScale.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from typing import Any, Dict, List, Optional
|
|
17
|
+
|
|
18
|
+
from botocore.exceptions import ClientError
|
|
19
|
+
|
|
20
|
+
from regscale.integrations.commercial.aws.inventory.base import BaseCollector
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger("regscale")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CloudTrailLogsMetadataCollector(BaseCollector):
|
|
26
|
+
"""Collector for CloudTrail log file metadata from S3."""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
session: Any,
|
|
31
|
+
region: str,
|
|
32
|
+
account_id: Optional[str] = None,
|
|
33
|
+
days_back: int = 30,
|
|
34
|
+
max_files: Optional[int] = None,
|
|
35
|
+
tags: Optional[Dict[str, str]] = None,
|
|
36
|
+
):
|
|
37
|
+
"""
|
|
38
|
+
Initialize CloudTrail logs metadata collector.
|
|
39
|
+
|
|
40
|
+
:param session: AWS session to use for API calls
|
|
41
|
+
:param str region: AWS region to collect from
|
|
42
|
+
:param str account_id: Optional AWS account ID to filter resources
|
|
43
|
+
:param int days_back: Number of days back to collect logs (default: 30)
|
|
44
|
+
:param int max_files: Optional maximum number of files to collect metadata for
|
|
45
|
+
:param dict tags: Optional tags to filter trails (key-value pairs)
|
|
46
|
+
"""
|
|
47
|
+
super().__init__(session, region)
|
|
48
|
+
self.account_id = account_id
|
|
49
|
+
self.days_back = days_back
|
|
50
|
+
self.max_files = max_files
|
|
51
|
+
self.tags = tags or {}
|
|
52
|
+
|
|
53
|
+
def collect(self) -> Dict[str, Any]:
|
|
54
|
+
"""
|
|
55
|
+
Collect CloudTrail log file metadata from S3 buckets.
|
|
56
|
+
|
|
57
|
+
:return: Dictionary containing log file metadata
|
|
58
|
+
:rtype: Dict[str, Any]
|
|
59
|
+
"""
|
|
60
|
+
result = {
|
|
61
|
+
"SnapshotDate": datetime.now(timezone.utc).isoformat(),
|
|
62
|
+
"Region": self.region,
|
|
63
|
+
"AccountId": self.account_id,
|
|
64
|
+
"Trails": [],
|
|
65
|
+
"TotalFiles": 0,
|
|
66
|
+
"TotalSize": 0,
|
|
67
|
+
"CollectionPeriodDays": self.days_back,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
# First, get all CloudTrail trails to find S3 buckets
|
|
72
|
+
cloudtrail_client = self._get_client("cloudtrail")
|
|
73
|
+
trails = self._list_trails(cloudtrail_client)
|
|
74
|
+
|
|
75
|
+
for trail in trails:
|
|
76
|
+
trail_arn = trail.get("TrailARN", "")
|
|
77
|
+
trail_name = trail.get("Name", "")
|
|
78
|
+
|
|
79
|
+
# Get detailed trail information to find S3 bucket
|
|
80
|
+
trail_details = self._describe_trail(cloudtrail_client, trail_arn)
|
|
81
|
+
if not trail_details:
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
# Filter by tags if specified
|
|
85
|
+
if self.tags:
|
|
86
|
+
trail_tags = self._get_trail_tags(cloudtrail_client, trail_arn)
|
|
87
|
+
if not self._matches_tags(trail_tags):
|
|
88
|
+
logger.debug(f"Skipping trail {trail_name} - does not match tag filters")
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
s3_bucket_name = trail_details.get("S3BucketName")
|
|
92
|
+
s3_prefix = trail_details.get("S3KeyPrefix", "")
|
|
93
|
+
|
|
94
|
+
if not s3_bucket_name:
|
|
95
|
+
logger.warning(f"Trail {trail_name} does not have an S3 bucket configured")
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
logger.info(f"Collecting log metadata from S3 bucket: {s3_bucket_name} for trail: {trail_name}")
|
|
99
|
+
|
|
100
|
+
# Collect metadata for this trail's logs
|
|
101
|
+
trail_log_metadata = self._collect_s3_log_metadata(
|
|
102
|
+
s3_bucket_name=s3_bucket_name,
|
|
103
|
+
s3_prefix=s3_prefix,
|
|
104
|
+
trail_name=trail_name,
|
|
105
|
+
trail_arn=trail_arn,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
result["Trails"].append(trail_log_metadata)
|
|
109
|
+
result["TotalFiles"] += trail_log_metadata["FileCount"]
|
|
110
|
+
result["TotalSize"] += trail_log_metadata["TotalSize"]
|
|
111
|
+
|
|
112
|
+
logger.info(
|
|
113
|
+
f"Collected metadata for {result['TotalFiles']} CloudTrail log files "
|
|
114
|
+
f"({self._format_size(result['TotalSize'])}) from {len(result['Trails'])} trail(s)"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
except ClientError as e:
|
|
118
|
+
self._handle_error(e, "CloudTrail logs metadata")
|
|
119
|
+
except Exception as e:
|
|
120
|
+
logger.error(f"Unexpected error collecting CloudTrail logs metadata: {e}", exc_info=True)
|
|
121
|
+
|
|
122
|
+
return result
|
|
123
|
+
|
|
124
|
+
def _list_trails(self, client: Any) -> List[Dict[str, Any]]:
|
|
125
|
+
"""
|
|
126
|
+
List all CloudTrail trails.
|
|
127
|
+
|
|
128
|
+
:param client: CloudTrail client
|
|
129
|
+
:return: List of trail summaries
|
|
130
|
+
:rtype: List[Dict[str, Any]]
|
|
131
|
+
"""
|
|
132
|
+
try:
|
|
133
|
+
response = client.list_trails()
|
|
134
|
+
return response.get("Trails", [])
|
|
135
|
+
except ClientError as e:
|
|
136
|
+
if e.response["Error"]["Code"] == "AccessDeniedException":
|
|
137
|
+
logger.warning(f"Access denied to list CloudTrail trails in {self.region}")
|
|
138
|
+
return []
|
|
139
|
+
raise
|
|
140
|
+
|
|
141
|
+
def _describe_trail(self, client: Any, trail_arn: str) -> Optional[Dict[str, Any]]:
|
|
142
|
+
"""
|
|
143
|
+
Get detailed information about a specific trail.
|
|
144
|
+
|
|
145
|
+
:param client: CloudTrail client
|
|
146
|
+
:param str trail_arn: Trail ARN
|
|
147
|
+
:return: Trail details or None if not found
|
|
148
|
+
:rtype: Optional[Dict[str, Any]]
|
|
149
|
+
"""
|
|
150
|
+
try:
|
|
151
|
+
response = client.describe_trails(trailNameList=[trail_arn])
|
|
152
|
+
trails = response.get("trailList", [])
|
|
153
|
+
if trails:
|
|
154
|
+
trail = trails[0]
|
|
155
|
+
# Remove AWS response metadata for consistency
|
|
156
|
+
trail.pop("ResponseMetadata", None)
|
|
157
|
+
return trail
|
|
158
|
+
return None
|
|
159
|
+
except ClientError as e:
|
|
160
|
+
logger.error(f"Error describing trail {trail_arn}: {e}")
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
def _collect_s3_log_metadata(
|
|
164
|
+
self, s3_bucket_name: str, s3_prefix: str, trail_name: str, trail_arn: str
|
|
165
|
+
) -> Dict[str, Any]:
|
|
166
|
+
"""
|
|
167
|
+
Collect metadata for CloudTrail log files in an S3 bucket.
|
|
168
|
+
|
|
169
|
+
This recursively collects ALL log files across all regions and dates
|
|
170
|
+
within the CloudTrail folder structure.
|
|
171
|
+
|
|
172
|
+
:param str s3_bucket_name: Name of the S3 bucket
|
|
173
|
+
:param str s3_prefix: S3 key prefix for the trail logs
|
|
174
|
+
:param str trail_name: Name of the CloudTrail trail
|
|
175
|
+
:param str trail_arn: ARN of the CloudTrail trail
|
|
176
|
+
:return: Dictionary containing log file metadata
|
|
177
|
+
:rtype: Dict[str, Any]
|
|
178
|
+
"""
|
|
179
|
+
s3_client = self._get_client("s3")
|
|
180
|
+
full_prefix = self._build_s3_prefix(s3_prefix)
|
|
181
|
+
trail_metadata = self._initialize_trail_metadata(trail_name, trail_arn, s3_bucket_name, s3_prefix, full_prefix)
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
logger.info(f"Scanning S3 bucket {s3_bucket_name} with prefix: {full_prefix}")
|
|
185
|
+
self._scan_s3_objects(s3_client, s3_bucket_name, full_prefix, trail_metadata)
|
|
186
|
+
self._finalize_trail_metadata(trail_metadata, trail_name)
|
|
187
|
+
|
|
188
|
+
except ClientError as e:
|
|
189
|
+
self._handle_s3_client_error(e, s3_bucket_name)
|
|
190
|
+
except Exception as e:
|
|
191
|
+
logger.error(f"Unexpected error collecting S3 log metadata: {e}", exc_info=True)
|
|
192
|
+
|
|
193
|
+
return trail_metadata
|
|
194
|
+
|
|
195
|
+
def _build_s3_prefix(self, s3_prefix: str) -> str:
|
|
196
|
+
"""
|
|
197
|
+
Build the full S3 prefix for CloudTrail log files.
|
|
198
|
+
|
|
199
|
+
:param str s3_prefix: S3 key prefix for the trail logs
|
|
200
|
+
:return: Full S3 prefix
|
|
201
|
+
:rtype: str
|
|
202
|
+
"""
|
|
203
|
+
full_prefix = f"{s3_prefix}/AWSLogs/" if s3_prefix else "AWSLogs/"
|
|
204
|
+
|
|
205
|
+
if self.account_id:
|
|
206
|
+
full_prefix = f"{full_prefix}{self.account_id}/CloudTrail/"
|
|
207
|
+
else:
|
|
208
|
+
full_prefix = f"{full_prefix}*/CloudTrail/"
|
|
209
|
+
|
|
210
|
+
return full_prefix
|
|
211
|
+
|
|
212
|
+
def _initialize_trail_metadata(
|
|
213
|
+
self, trail_name: str, trail_arn: str, s3_bucket_name: str, s3_prefix: str, full_prefix: str
|
|
214
|
+
) -> Dict[str, Any]:
|
|
215
|
+
"""
|
|
216
|
+
Initialize the trail metadata dictionary.
|
|
217
|
+
|
|
218
|
+
:param str trail_name: Name of the CloudTrail trail
|
|
219
|
+
:param str trail_arn: ARN of the CloudTrail trail
|
|
220
|
+
:param str s3_bucket_name: Name of the S3 bucket
|
|
221
|
+
:param str s3_prefix: S3 key prefix for the trail logs
|
|
222
|
+
:param str full_prefix: Full S3 prefix for log files
|
|
223
|
+
:return: Initialized trail metadata dictionary
|
|
224
|
+
:rtype: Dict[str, Any]
|
|
225
|
+
"""
|
|
226
|
+
return {
|
|
227
|
+
"TrailName": trail_name,
|
|
228
|
+
"TrailARN": trail_arn,
|
|
229
|
+
"S3BucketName": s3_bucket_name,
|
|
230
|
+
"S3Prefix": s3_prefix,
|
|
231
|
+
"FullLogPrefix": full_prefix,
|
|
232
|
+
"FileCount": 0,
|
|
233
|
+
"TotalSize": 0,
|
|
234
|
+
"Files": [],
|
|
235
|
+
"FilesByRegion": {},
|
|
236
|
+
"FilesByDate": {},
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
def _scan_s3_objects(
|
|
240
|
+
self, s3_client: Any, s3_bucket_name: str, full_prefix: str, trail_metadata: Dict[str, Any]
|
|
241
|
+
) -> None:
|
|
242
|
+
"""
|
|
243
|
+
Scan S3 objects and collect metadata for CloudTrail log files.
|
|
244
|
+
|
|
245
|
+
:param s3_client: S3 client
|
|
246
|
+
:param str s3_bucket_name: Name of the S3 bucket
|
|
247
|
+
:param str full_prefix: Full S3 prefix for log files
|
|
248
|
+
:param dict trail_metadata: Trail metadata dictionary to populate
|
|
249
|
+
"""
|
|
250
|
+
paginator = s3_client.get_paginator("list_objects_v2")
|
|
251
|
+
file_count = 0
|
|
252
|
+
|
|
253
|
+
for page in paginator.paginate(Bucket=s3_bucket_name, Prefix=full_prefix):
|
|
254
|
+
if "Contents" not in page:
|
|
255
|
+
logger.info(f"No log files found in S3 bucket {s3_bucket_name} with prefix {full_prefix}")
|
|
256
|
+
break
|
|
257
|
+
|
|
258
|
+
for obj in page["Contents"]:
|
|
259
|
+
if not obj["Key"].endswith(".json.gz"):
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
if self._max_files_reached(file_count):
|
|
263
|
+
return
|
|
264
|
+
|
|
265
|
+
self._process_s3_object(obj, trail_metadata)
|
|
266
|
+
file_count += 1
|
|
267
|
+
|
|
268
|
+
if self._max_files_reached(file_count):
|
|
269
|
+
break
|
|
270
|
+
|
|
271
|
+
def _max_files_reached(self, file_count: int) -> bool:
|
|
272
|
+
"""
|
|
273
|
+
Check if the maximum file limit has been reached.
|
|
274
|
+
|
|
275
|
+
:param int file_count: Current file count
|
|
276
|
+
:return: True if max files reached
|
|
277
|
+
:rtype: bool
|
|
278
|
+
"""
|
|
279
|
+
if self.max_files and file_count >= self.max_files:
|
|
280
|
+
logger.info(f"Reached maximum file limit: {self.max_files}")
|
|
281
|
+
return True
|
|
282
|
+
return False
|
|
283
|
+
|
|
284
|
+
def _process_s3_object(self, obj: Dict[str, Any], trail_metadata: Dict[str, Any]) -> None:
|
|
285
|
+
"""
|
|
286
|
+
Process a single S3 object and add its metadata to the trail metadata.
|
|
287
|
+
|
|
288
|
+
:param dict obj: S3 object metadata
|
|
289
|
+
:param dict trail_metadata: Trail metadata dictionary to update
|
|
290
|
+
"""
|
|
291
|
+
key = obj["Key"]
|
|
292
|
+
file_region, file_date = self._extract_region_and_date(key)
|
|
293
|
+
|
|
294
|
+
file_metadata = {
|
|
295
|
+
"Key": key,
|
|
296
|
+
"FileName": key.split("/")[-1],
|
|
297
|
+
"Region": file_region,
|
|
298
|
+
"Date": file_date,
|
|
299
|
+
"Size": obj["Size"],
|
|
300
|
+
"SizeFormatted": self._format_size(obj["Size"]),
|
|
301
|
+
"LastModified": obj["LastModified"].isoformat(),
|
|
302
|
+
"LastModifiedTimestamp": obj["LastModified"].timestamp(),
|
|
303
|
+
"ETag": obj.get("ETag", "").strip('"'),
|
|
304
|
+
"StorageClass": obj.get("StorageClass", "STANDARD"),
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
trail_metadata["Files"].append(file_metadata)
|
|
308
|
+
trail_metadata["TotalSize"] += obj["Size"]
|
|
309
|
+
|
|
310
|
+
self._organize_by_region(trail_metadata, file_region, obj["Size"], file_metadata["FileName"])
|
|
311
|
+
self._organize_by_date(trail_metadata, file_date, obj["Size"], file_metadata["FileName"])
|
|
312
|
+
|
|
313
|
+
def _extract_region_and_date(self, key: str) -> tuple:
|
|
314
|
+
"""
|
|
315
|
+
Extract region and date from S3 key.
|
|
316
|
+
|
|
317
|
+
:param str key: S3 object key
|
|
318
|
+
:return: Tuple of (region, date)
|
|
319
|
+
:rtype: tuple
|
|
320
|
+
"""
|
|
321
|
+
key_parts = key.split("/")
|
|
322
|
+
try:
|
|
323
|
+
ct_index = key_parts.index("CloudTrail")
|
|
324
|
+
file_region = key_parts[ct_index + 1] if len(key_parts) > ct_index + 1 else "unknown"
|
|
325
|
+
file_year = key_parts[ct_index + 2] if len(key_parts) > ct_index + 2 else ""
|
|
326
|
+
file_month = key_parts[ct_index + 3] if len(key_parts) > ct_index + 3 else ""
|
|
327
|
+
file_day = key_parts[ct_index + 4] if len(key_parts) > ct_index + 4 else ""
|
|
328
|
+
file_date = f"{file_year}-{file_month}-{file_day}" if file_year else "unknown"
|
|
329
|
+
except (ValueError, IndexError):
|
|
330
|
+
file_region = "unknown"
|
|
331
|
+
file_date = "unknown"
|
|
332
|
+
|
|
333
|
+
return file_region, file_date
|
|
334
|
+
|
|
335
|
+
def _organize_by_region(
|
|
336
|
+
self, trail_metadata: Dict[str, Any], file_region: str, file_size: int, file_name: str
|
|
337
|
+
) -> None:
|
|
338
|
+
"""
|
|
339
|
+
Organize file metadata by region.
|
|
340
|
+
|
|
341
|
+
:param dict trail_metadata: Trail metadata dictionary to update
|
|
342
|
+
:param str file_region: AWS region
|
|
343
|
+
:param int file_size: File size in bytes
|
|
344
|
+
:param str file_name: File name
|
|
345
|
+
"""
|
|
346
|
+
if file_region not in trail_metadata["FilesByRegion"]:
|
|
347
|
+
trail_metadata["FilesByRegion"][file_region] = {"Count": 0, "TotalSize": 0, "Files": []}
|
|
348
|
+
|
|
349
|
+
trail_metadata["FilesByRegion"][file_region]["Count"] += 1
|
|
350
|
+
trail_metadata["FilesByRegion"][file_region]["TotalSize"] += file_size
|
|
351
|
+
trail_metadata["FilesByRegion"][file_region]["Files"].append(file_name)
|
|
352
|
+
|
|
353
|
+
def _organize_by_date(self, trail_metadata: Dict[str, Any], file_date: str, file_size: int, file_name: str) -> None:
|
|
354
|
+
"""
|
|
355
|
+
Organize file metadata by date.
|
|
356
|
+
|
|
357
|
+
:param dict trail_metadata: Trail metadata dictionary to update
|
|
358
|
+
:param str file_date: File date (YYYY-MM-DD)
|
|
359
|
+
:param int file_size: File size in bytes
|
|
360
|
+
:param str file_name: File name
|
|
361
|
+
"""
|
|
362
|
+
if file_date not in trail_metadata["FilesByDate"]:
|
|
363
|
+
trail_metadata["FilesByDate"][file_date] = {"Count": 0, "TotalSize": 0, "Files": []}
|
|
364
|
+
|
|
365
|
+
trail_metadata["FilesByDate"][file_date]["Count"] += 1
|
|
366
|
+
trail_metadata["FilesByDate"][file_date]["TotalSize"] += file_size
|
|
367
|
+
trail_metadata["FilesByDate"][file_date]["Files"].append(file_name)
|
|
368
|
+
|
|
369
|
+
def _finalize_trail_metadata(self, trail_metadata: Dict[str, Any], trail_name: str) -> None:
|
|
370
|
+
"""
|
|
371
|
+
Finalize trail metadata by calculating date ranges and logging results.
|
|
372
|
+
|
|
373
|
+
:param dict trail_metadata: Trail metadata dictionary to finalize
|
|
374
|
+
:param str trail_name: Name of the CloudTrail trail
|
|
375
|
+
"""
|
|
376
|
+
trail_metadata["FileCount"] = len(trail_metadata["Files"])
|
|
377
|
+
|
|
378
|
+
if trail_metadata["Files"]:
|
|
379
|
+
timestamps = [f["LastModifiedTimestamp"] for f in trail_metadata["Files"]]
|
|
380
|
+
trail_metadata["OldestLogDate"] = datetime.fromtimestamp(min(timestamps)).isoformat()
|
|
381
|
+
trail_metadata["NewestLogDate"] = datetime.fromtimestamp(max(timestamps)).isoformat()
|
|
382
|
+
|
|
383
|
+
logger.info(
|
|
384
|
+
f"Found {trail_metadata['FileCount']} log files "
|
|
385
|
+
f"({self._format_size(trail_metadata['TotalSize'])}) for trail {trail_name}"
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
def _handle_s3_client_error(self, error: ClientError, s3_bucket_name: str) -> None:
|
|
389
|
+
"""
|
|
390
|
+
Handle S3 client errors.
|
|
391
|
+
|
|
392
|
+
:param ClientError error: The client error
|
|
393
|
+
:param str s3_bucket_name: Name of the S3 bucket
|
|
394
|
+
"""
|
|
395
|
+
error_code = error.response["Error"]["Code"]
|
|
396
|
+
if error_code == "NoSuchBucket":
|
|
397
|
+
logger.error(f"S3 bucket does not exist: {s3_bucket_name}")
|
|
398
|
+
elif error_code == "AccessDenied":
|
|
399
|
+
logger.error(f"Access denied to S3 bucket: {s3_bucket_name}")
|
|
400
|
+
else:
|
|
401
|
+
logger.error(f"Error accessing S3 bucket {s3_bucket_name}: {error}")
|
|
402
|
+
|
|
403
|
+
def _get_trail_tags(self, client: Any, trail_arn: str) -> Dict[str, str]:
|
|
404
|
+
"""
|
|
405
|
+
Get tags for a CloudTrail trail.
|
|
406
|
+
|
|
407
|
+
:param client: CloudTrail client
|
|
408
|
+
:param str trail_arn: Trail ARN
|
|
409
|
+
:return: Dictionary of tags (Key -> Value)
|
|
410
|
+
:rtype: Dict[str, str]
|
|
411
|
+
"""
|
|
412
|
+
try:
|
|
413
|
+
response = client.list_tags(ResourceIdList=[trail_arn])
|
|
414
|
+
resource_tag_list = response.get("ResourceTagList", [])
|
|
415
|
+
if resource_tag_list and "TagsList" in resource_tag_list[0]:
|
|
416
|
+
tags_list = resource_tag_list[0]["TagsList"]
|
|
417
|
+
# Convert list of {"Key": "k", "Value": "v"} to {"k": "v"}
|
|
418
|
+
return {tag.get("Key", ""): tag.get("Value", "") for tag in tags_list}
|
|
419
|
+
return {}
|
|
420
|
+
except ClientError as e:
|
|
421
|
+
logger.debug(f"Error getting tags for trail {trail_arn}: {e}")
|
|
422
|
+
return {}
|
|
423
|
+
|
|
424
|
+
def _matches_tags(self, resource_tags: Dict[str, str]) -> bool:
|
|
425
|
+
"""
|
|
426
|
+
Check if resource tags match the specified filter tags.
|
|
427
|
+
|
|
428
|
+
:param dict resource_tags: Tags on the resource
|
|
429
|
+
:return: True if all filter tags match
|
|
430
|
+
:rtype: bool
|
|
431
|
+
"""
|
|
432
|
+
if not self.tags:
|
|
433
|
+
return True
|
|
434
|
+
|
|
435
|
+
# All filter tags must match
|
|
436
|
+
for key, value in self.tags.items():
|
|
437
|
+
if resource_tags.get(key) != value:
|
|
438
|
+
return False
|
|
439
|
+
|
|
440
|
+
return True
|
|
441
|
+
|
|
442
|
+
@staticmethod
|
|
443
|
+
def _format_size(size_bytes: int) -> str:
|
|
444
|
+
"""
|
|
445
|
+
Format bytes into human-readable size.
|
|
446
|
+
|
|
447
|
+
:param int size_bytes: Size in bytes
|
|
448
|
+
:return: Formatted size string (e.g., "1.5 MB")
|
|
449
|
+
:rtype: str
|
|
450
|
+
"""
|
|
451
|
+
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
|
452
|
+
if size_bytes < 1024.0:
|
|
453
|
+
return f"{size_bytes:.2f} {unit}"
|
|
454
|
+
size_bytes /= 1024.0
|
|
455
|
+
return f"{size_bytes:.2f} PB"
|
|
456
|
+
|
|
457
|
+
def export_to_json(self, output_file: str = "cloudtrail_logs_metadata.json") -> str:
|
|
458
|
+
"""
|
|
459
|
+
Collect metadata and export to JSON file.
|
|
460
|
+
|
|
461
|
+
:param str output_file: Path to output JSON file
|
|
462
|
+
:return: Path to the created JSON file
|
|
463
|
+
:rtype: str
|
|
464
|
+
"""
|
|
465
|
+
metadata = self.collect()
|
|
466
|
+
|
|
467
|
+
try:
|
|
468
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
469
|
+
json.dump(metadata, f, indent=2, ensure_ascii=False)
|
|
470
|
+
|
|
471
|
+
logger.info(f"CloudTrail logs metadata exported to: {output_file}")
|
|
472
|
+
return output_file
|
|
473
|
+
|
|
474
|
+
except Exception as e:
|
|
475
|
+
logger.error(f"Error exporting metadata to JSON: {e}")
|
|
476
|
+
raise
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""AWS CloudWatch Logs resource collection."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
from botocore.exceptions import ClientError
|
|
7
|
+
|
|
8
|
+
from regscale.integrations.commercial.aws.inventory.base import BaseCollector
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger("regscale")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CloudWatchLogsCollector(BaseCollector):
|
|
14
|
+
"""Collector for AWS CloudWatch Logs resources."""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self, session: Any, region: str, account_id: Optional[str] = None, tags: Optional[Dict[str, str]] = None
|
|
18
|
+
):
|
|
19
|
+
"""
|
|
20
|
+
Initialize CloudWatch Logs collector.
|
|
21
|
+
|
|
22
|
+
:param session: AWS session to use for API calls
|
|
23
|
+
:param str region: AWS region to collect from
|
|
24
|
+
:param str account_id: Optional AWS account ID to filter resources
|
|
25
|
+
:param dict tags: Optional tags to filter resources (key-value pairs)
|
|
26
|
+
"""
|
|
27
|
+
super().__init__(session, region)
|
|
28
|
+
self.account_id = account_id
|
|
29
|
+
self.tags = tags or {}
|
|
30
|
+
|
|
31
|
+
def collect(self) -> Dict[str, Any]:
|
|
32
|
+
"""
|
|
33
|
+
Collect CloudWatch Logs resources.
|
|
34
|
+
|
|
35
|
+
:return: Dictionary containing CloudWatch log groups and their configurations
|
|
36
|
+
:rtype: Dict[str, Any]
|
|
37
|
+
"""
|
|
38
|
+
result = {"LogGroups": [], "LogGroupMetrics": {}, "RetentionPolicies": {}}
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
client = self._get_client("logs")
|
|
42
|
+
|
|
43
|
+
# List all log groups
|
|
44
|
+
log_groups = self._list_log_groups(client)
|
|
45
|
+
|
|
46
|
+
# Get detailed information for each log group
|
|
47
|
+
for log_group in log_groups:
|
|
48
|
+
log_group_name = log_group.get("logGroupName", "")
|
|
49
|
+
log_group_arn = log_group.get("arn", "")
|
|
50
|
+
|
|
51
|
+
# Filter by account ID if specified
|
|
52
|
+
if self.account_id and log_group_arn and not self._matches_account_id(log_group_arn):
|
|
53
|
+
logger.debug(f"Skipping log group {log_group_name} - does not match account ID {self.account_id}")
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
# Get tags for filtering
|
|
57
|
+
log_group_tags = self._get_log_group_tags(client, log_group_name)
|
|
58
|
+
|
|
59
|
+
# Filter by tags if specified
|
|
60
|
+
if self.tags and not self._matches_tags(log_group_tags):
|
|
61
|
+
logger.debug(f"Skipping log group {log_group_name} - does not match tag filters")
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
log_group["Tags"] = log_group_tags
|
|
65
|
+
|
|
66
|
+
# Get metric filters for this log group
|
|
67
|
+
metric_filters = self._get_metric_filters(client, log_group_name)
|
|
68
|
+
log_group["MetricFilters"] = metric_filters
|
|
69
|
+
|
|
70
|
+
# Get subscription filters
|
|
71
|
+
subscription_filters = self._get_subscription_filters(client, log_group_name)
|
|
72
|
+
log_group["SubscriptionFilters"] = subscription_filters
|
|
73
|
+
|
|
74
|
+
# Get retention policy
|
|
75
|
+
retention_days = log_group.get("retentionInDays")
|
|
76
|
+
if retention_days:
|
|
77
|
+
result["RetentionPolicies"][log_group_name] = retention_days
|
|
78
|
+
|
|
79
|
+
# Add region information
|
|
80
|
+
log_group["Region"] = self.region
|
|
81
|
+
|
|
82
|
+
# Get storage bytes
|
|
83
|
+
stored_bytes = log_group.get("storedBytes", 0)
|
|
84
|
+
result["LogGroupMetrics"][log_group_name] = {
|
|
85
|
+
"StoredBytes": stored_bytes,
|
|
86
|
+
"MetricFilterCount": len(metric_filters),
|
|
87
|
+
"SubscriptionFilterCount": len(subscription_filters),
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
result["LogGroups"].append(log_group)
|
|
91
|
+
|
|
92
|
+
logger.info(f"Collected {len(result['LogGroups'])} CloudWatch log group(s) from {self.region}")
|
|
93
|
+
|
|
94
|
+
except ClientError as e:
|
|
95
|
+
self._handle_error(e, "CloudWatch Logs log groups")
|
|
96
|
+
except Exception as e:
|
|
97
|
+
logger.error(f"Unexpected error collecting CloudWatch Logs: {e}", exc_info=True)
|
|
98
|
+
|
|
99
|
+
return result
|
|
100
|
+
|
|
101
|
+
def _list_log_groups(self, client: Any) -> List[Dict[str, Any]]:
|
|
102
|
+
"""
|
|
103
|
+
List all CloudWatch log groups with pagination.
|
|
104
|
+
|
|
105
|
+
:param client: CloudWatch Logs client
|
|
106
|
+
:return: List of log groups
|
|
107
|
+
:rtype: List[Dict[str, Any]]
|
|
108
|
+
"""
|
|
109
|
+
try:
|
|
110
|
+
log_groups = []
|
|
111
|
+
paginator = client.get_paginator("describe_log_groups")
|
|
112
|
+
|
|
113
|
+
for page in paginator.paginate():
|
|
114
|
+
log_groups.extend(page.get("logGroups", []))
|
|
115
|
+
|
|
116
|
+
logger.debug(f"Found {len(log_groups)} log groups in {self.region}")
|
|
117
|
+
return log_groups
|
|
118
|
+
|
|
119
|
+
except ClientError as e:
|
|
120
|
+
if e.response["Error"]["Code"] == "AccessDeniedException":
|
|
121
|
+
logger.warning(f"Access denied to list CloudWatch log groups in {self.region}")
|
|
122
|
+
return []
|
|
123
|
+
raise
|
|
124
|
+
|
|
125
|
+
def _get_log_group_tags(self, client: Any, log_group_name: str) -> Dict[str, str]:
|
|
126
|
+
"""
|
|
127
|
+
Get tags for a log group.
|
|
128
|
+
|
|
129
|
+
:param client: CloudWatch Logs client
|
|
130
|
+
:param str log_group_name: Log group name
|
|
131
|
+
:return: Dictionary of tags
|
|
132
|
+
:rtype: Dict[str, str]
|
|
133
|
+
"""
|
|
134
|
+
try:
|
|
135
|
+
response = client.list_tags_for_resource(resourceArn=self._build_log_group_arn(log_group_name))
|
|
136
|
+
return response.get("tags", {})
|
|
137
|
+
except ClientError as e:
|
|
138
|
+
if e.response["Error"]["Code"] in ["ResourceNotFoundException", "AccessDeniedException"]:
|
|
139
|
+
logger.debug(f"Cannot get tags for log group {log_group_name}: {e}")
|
|
140
|
+
return {}
|
|
141
|
+
logger.error(f"Error getting tags for log group {log_group_name}: {e}")
|
|
142
|
+
return {}
|
|
143
|
+
|
|
144
|
+
def _build_log_group_arn(self, log_group_name: str) -> str:
|
|
145
|
+
"""
|
|
146
|
+
Build ARN for a log group.
|
|
147
|
+
|
|
148
|
+
:param str log_group_name: Log group name
|
|
149
|
+
:return: Log group ARN
|
|
150
|
+
:rtype: str
|
|
151
|
+
"""
|
|
152
|
+
account_id = self.account_id or self.session.client("sts").get_caller_identity()["Account"]
|
|
153
|
+
return f"arn:aws:logs:{self.region}:{account_id}:log-group:{log_group_name}"
|
|
154
|
+
|
|
155
|
+
def _get_metric_filters(self, client: Any, log_group_name: str) -> List[Dict[str, Any]]:
|
|
156
|
+
"""
|
|
157
|
+
Get metric filters for a log group.
|
|
158
|
+
|
|
159
|
+
:param client: CloudWatch Logs client
|
|
160
|
+
:param str log_group_name: Log group name
|
|
161
|
+
:return: List of metric filters
|
|
162
|
+
:rtype: List[Dict[str, Any]]
|
|
163
|
+
"""
|
|
164
|
+
try:
|
|
165
|
+
response = client.describe_metric_filters(logGroupName=log_group_name)
|
|
166
|
+
return response.get("metricFilters", [])
|
|
167
|
+
except ClientError as e:
|
|
168
|
+
if e.response["Error"]["Code"] in ["ResourceNotFoundException", "AccessDeniedException"]:
|
|
169
|
+
logger.debug(f"Cannot get metric filters for log group {log_group_name}: {e}")
|
|
170
|
+
return []
|
|
171
|
+
logger.error(f"Error getting metric filters for log group {log_group_name}: {e}")
|
|
172
|
+
return []
|
|
173
|
+
|
|
174
|
+
def _get_subscription_filters(self, client: Any, log_group_name: str) -> List[Dict[str, Any]]:
|
|
175
|
+
"""
|
|
176
|
+
Get subscription filters for a log group.
|
|
177
|
+
|
|
178
|
+
:param client: CloudWatch Logs client
|
|
179
|
+
:param str log_group_name: Log group name
|
|
180
|
+
:return: List of subscription filters
|
|
181
|
+
:rtype: List[Dict[str, Any]]
|
|
182
|
+
"""
|
|
183
|
+
try:
|
|
184
|
+
response = client.describe_subscription_filters(logGroupName=log_group_name)
|
|
185
|
+
return response.get("subscriptionFilters", [])
|
|
186
|
+
except ClientError as e:
|
|
187
|
+
if e.response["Error"]["Code"] in ["ResourceNotFoundException", "AccessDeniedException"]:
|
|
188
|
+
logger.debug(f"Cannot get subscription filters for log group {log_group_name}: {e}")
|
|
189
|
+
return []
|
|
190
|
+
logger.error(f"Error getting subscription filters for log group {log_group_name}: {e}")
|
|
191
|
+
return []
|