regscale-cli 6.27.3.0__py3-none-any.whl → 6.28.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of regscale-cli might be problematic. Click here for more details.

Files changed (113) hide show
  1. regscale/_version.py +1 -1
  2. regscale/core/app/utils/app_utils.py +11 -2
  3. regscale/dev/cli.py +26 -0
  4. regscale/dev/version.py +72 -0
  5. regscale/integrations/commercial/__init__.py +15 -1
  6. regscale/integrations/commercial/amazon/amazon/__init__.py +0 -0
  7. regscale/integrations/commercial/amazon/amazon/common.py +204 -0
  8. regscale/integrations/commercial/amazon/common.py +48 -58
  9. regscale/integrations/commercial/aws/audit_manager_compliance.py +2671 -0
  10. regscale/integrations/commercial/aws/cli.py +3093 -55
  11. regscale/integrations/commercial/aws/cloudtrail_control_mappings.py +333 -0
  12. regscale/integrations/commercial/aws/cloudtrail_evidence.py +501 -0
  13. regscale/integrations/commercial/aws/cloudwatch_control_mappings.py +357 -0
  14. regscale/integrations/commercial/aws/cloudwatch_evidence.py +490 -0
  15. regscale/integrations/commercial/aws/config_compliance.py +914 -0
  16. regscale/integrations/commercial/aws/conformance_pack_mappings.py +198 -0
  17. regscale/integrations/commercial/aws/evidence_generator.py +283 -0
  18. regscale/integrations/commercial/aws/guardduty_control_mappings.py +340 -0
  19. regscale/integrations/commercial/aws/guardduty_evidence.py +1053 -0
  20. regscale/integrations/commercial/aws/iam_control_mappings.py +368 -0
  21. regscale/integrations/commercial/aws/iam_evidence.py +574 -0
  22. regscale/integrations/commercial/aws/inventory/__init__.py +223 -22
  23. regscale/integrations/commercial/aws/inventory/base.py +107 -5
  24. regscale/integrations/commercial/aws/inventory/resources/audit_manager.py +513 -0
  25. regscale/integrations/commercial/aws/inventory/resources/cloudtrail.py +315 -0
  26. regscale/integrations/commercial/aws/inventory/resources/cloudtrail_logs_metadata.py +476 -0
  27. regscale/integrations/commercial/aws/inventory/resources/cloudwatch.py +191 -0
  28. regscale/integrations/commercial/aws/inventory/resources/compute.py +66 -9
  29. regscale/integrations/commercial/aws/inventory/resources/config.py +464 -0
  30. regscale/integrations/commercial/aws/inventory/resources/containers.py +74 -9
  31. regscale/integrations/commercial/aws/inventory/resources/database.py +106 -31
  32. regscale/integrations/commercial/aws/inventory/resources/guardduty.py +286 -0
  33. regscale/integrations/commercial/aws/inventory/resources/iam.py +470 -0
  34. regscale/integrations/commercial/aws/inventory/resources/inspector.py +476 -0
  35. regscale/integrations/commercial/aws/inventory/resources/integration.py +175 -61
  36. regscale/integrations/commercial/aws/inventory/resources/kms.py +447 -0
  37. regscale/integrations/commercial/aws/inventory/resources/networking.py +103 -67
  38. regscale/integrations/commercial/aws/inventory/resources/s3.py +394 -0
  39. regscale/integrations/commercial/aws/inventory/resources/security.py +268 -72
  40. regscale/integrations/commercial/aws/inventory/resources/securityhub.py +473 -0
  41. regscale/integrations/commercial/aws/inventory/resources/storage.py +53 -29
  42. regscale/integrations/commercial/aws/inventory/resources/systems_manager.py +657 -0
  43. regscale/integrations/commercial/aws/inventory/resources/vpc.py +655 -0
  44. regscale/integrations/commercial/aws/kms_control_mappings.py +288 -0
  45. regscale/integrations/commercial/aws/kms_evidence.py +879 -0
  46. regscale/integrations/commercial/aws/ocsf/__init__.py +7 -0
  47. regscale/integrations/commercial/aws/ocsf/constants.py +115 -0
  48. regscale/integrations/commercial/aws/ocsf/mapper.py +435 -0
  49. regscale/integrations/commercial/aws/org_control_mappings.py +286 -0
  50. regscale/integrations/commercial/aws/org_evidence.py +666 -0
  51. regscale/integrations/commercial/aws/s3_control_mappings.py +356 -0
  52. regscale/integrations/commercial/aws/s3_evidence.py +632 -0
  53. regscale/integrations/commercial/aws/scanner.py +851 -206
  54. regscale/integrations/commercial/aws/security_hub.py +319 -0
  55. regscale/integrations/commercial/aws/session_manager.py +282 -0
  56. regscale/integrations/commercial/aws/ssm_control_mappings.py +291 -0
  57. regscale/integrations/commercial/aws/ssm_evidence.py +492 -0
  58. regscale/integrations/commercial/synqly/ticketing.py +27 -0
  59. regscale/integrations/compliance_integration.py +308 -38
  60. regscale/integrations/due_date_handler.py +3 -0
  61. regscale/integrations/scanner_integration.py +399 -84
  62. regscale/models/integration_models/cisa_kev_data.json +65 -5
  63. regscale/models/integration_models/synqly_models/capabilities.json +1 -1
  64. regscale/models/integration_models/synqly_models/connectors/vulnerabilities.py +17 -9
  65. regscale/models/regscale_models/assessment.py +2 -1
  66. regscale/models/regscale_models/control_objective.py +74 -5
  67. regscale/models/regscale_models/file.py +2 -0
  68. regscale/models/regscale_models/issue.py +2 -5
  69. {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.1.0.dist-info}/METADATA +1 -1
  70. {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.1.0.dist-info}/RECORD +113 -34
  71. tests/regscale/integrations/commercial/aws/__init__.py +0 -0
  72. tests/regscale/integrations/commercial/aws/test_audit_manager_compliance.py +1304 -0
  73. tests/regscale/integrations/commercial/aws/test_audit_manager_evidence_aggregation.py +341 -0
  74. tests/regscale/integrations/commercial/aws/test_aws_audit_manager_collector.py +1155 -0
  75. tests/regscale/integrations/commercial/aws/test_aws_cloudtrail_collector.py +534 -0
  76. tests/regscale/integrations/commercial/aws/test_aws_config_collector.py +400 -0
  77. tests/regscale/integrations/commercial/aws/test_aws_guardduty_collector.py +315 -0
  78. tests/regscale/integrations/commercial/aws/test_aws_iam_collector.py +458 -0
  79. tests/regscale/integrations/commercial/aws/test_aws_inspector_collector.py +353 -0
  80. tests/regscale/integrations/commercial/aws/test_aws_inventory_integration.py +530 -0
  81. tests/regscale/integrations/commercial/aws/test_aws_kms_collector.py +919 -0
  82. tests/regscale/integrations/commercial/aws/test_aws_s3_collector.py +722 -0
  83. tests/regscale/integrations/commercial/aws/test_aws_scanner_integration.py +722 -0
  84. tests/regscale/integrations/commercial/aws/test_aws_securityhub_collector.py +792 -0
  85. tests/regscale/integrations/commercial/aws/test_aws_systems_manager_collector.py +918 -0
  86. tests/regscale/integrations/commercial/aws/test_aws_vpc_collector.py +996 -0
  87. tests/regscale/integrations/commercial/aws/test_cli_evidence.py +431 -0
  88. tests/regscale/integrations/commercial/aws/test_cloudtrail_control_mappings.py +452 -0
  89. tests/regscale/integrations/commercial/aws/test_cloudtrail_evidence.py +788 -0
  90. tests/regscale/integrations/commercial/aws/test_config_compliance.py +298 -0
  91. tests/regscale/integrations/commercial/aws/test_conformance_pack_mappings.py +200 -0
  92. tests/regscale/integrations/commercial/aws/test_evidence_generator.py +386 -0
  93. tests/regscale/integrations/commercial/aws/test_guardduty_control_mappings.py +564 -0
  94. tests/regscale/integrations/commercial/aws/test_guardduty_evidence.py +1041 -0
  95. tests/regscale/integrations/commercial/aws/test_iam_control_mappings.py +718 -0
  96. tests/regscale/integrations/commercial/aws/test_iam_evidence.py +1375 -0
  97. tests/regscale/integrations/commercial/aws/test_kms_control_mappings.py +656 -0
  98. tests/regscale/integrations/commercial/aws/test_kms_evidence.py +1163 -0
  99. tests/regscale/integrations/commercial/aws/test_ocsf_mapper.py +370 -0
  100. tests/regscale/integrations/commercial/aws/test_org_control_mappings.py +546 -0
  101. tests/regscale/integrations/commercial/aws/test_org_evidence.py +1240 -0
  102. tests/regscale/integrations/commercial/aws/test_s3_control_mappings.py +672 -0
  103. tests/regscale/integrations/commercial/aws/test_s3_evidence.py +987 -0
  104. tests/regscale/integrations/commercial/aws/test_scanner_evidence.py +373 -0
  105. tests/regscale/integrations/commercial/aws/test_security_hub_config_filtering.py +539 -0
  106. tests/regscale/integrations/commercial/aws/test_session_manager.py +516 -0
  107. tests/regscale/integrations/commercial/aws/test_ssm_control_mappings.py +588 -0
  108. tests/regscale/integrations/commercial/aws/test_ssm_evidence.py +735 -0
  109. tests/regscale/integrations/commercial/test_aws.py +55 -56
  110. {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.1.0.dist-info}/LICENSE +0 -0
  111. {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.1.0.dist-info}/WHEEL +0 -0
  112. {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.1.0.dist-info}/entry_points.txt +0 -0
  113. {regscale_cli-6.27.3.0.dist-info → regscale_cli-6.28.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,476 @@
1
+ """AWS CloudTrail S3 Logs Metadata Collector.
2
+
3
+ This collector retrieves metadata about CloudTrail log files stored in S3
4
+ WITHOUT downloading or unzipping the files. It collects:
5
+ - File names
6
+ - File sizes
7
+ - Last modified dates
8
+ - S3 keys/paths
9
+
10
+ This metadata can then be saved as evidence in RegScale.
11
+ """
12
+
13
+ import json
14
+ import logging
15
+ from datetime import datetime, timezone
16
+ from typing import Any, Dict, List, Optional
17
+
18
+ from botocore.exceptions import ClientError
19
+
20
+ from regscale.integrations.commercial.aws.inventory.base import BaseCollector
21
+
22
+ logger = logging.getLogger("regscale")
23
+
24
+
25
+ class CloudTrailLogsMetadataCollector(BaseCollector):
26
+ """Collector for CloudTrail log file metadata from S3."""
27
+
28
+ def __init__(
29
+ self,
30
+ session: Any,
31
+ region: str,
32
+ account_id: Optional[str] = None,
33
+ days_back: int = 30,
34
+ max_files: Optional[int] = None,
35
+ tags: Optional[Dict[str, str]] = None,
36
+ ):
37
+ """
38
+ Initialize CloudTrail logs metadata collector.
39
+
40
+ :param session: AWS session to use for API calls
41
+ :param str region: AWS region to collect from
42
+ :param str account_id: Optional AWS account ID to filter resources
43
+ :param int days_back: Number of days back to collect logs (default: 30)
44
+ :param int max_files: Optional maximum number of files to collect metadata for
45
+ :param dict tags: Optional tags to filter trails (key-value pairs)
46
+ """
47
+ super().__init__(session, region)
48
+ self.account_id = account_id
49
+ self.days_back = days_back
50
+ self.max_files = max_files
51
+ self.tags = tags or {}
52
+
53
+ def collect(self) -> Dict[str, Any]:
54
+ """
55
+ Collect CloudTrail log file metadata from S3 buckets.
56
+
57
+ :return: Dictionary containing log file metadata
58
+ :rtype: Dict[str, Any]
59
+ """
60
+ result = {
61
+ "SnapshotDate": datetime.now(timezone.utc).isoformat(),
62
+ "Region": self.region,
63
+ "AccountId": self.account_id,
64
+ "Trails": [],
65
+ "TotalFiles": 0,
66
+ "TotalSize": 0,
67
+ "CollectionPeriodDays": self.days_back,
68
+ }
69
+
70
+ try:
71
+ # First, get all CloudTrail trails to find S3 buckets
72
+ cloudtrail_client = self._get_client("cloudtrail")
73
+ trails = self._list_trails(cloudtrail_client)
74
+
75
+ for trail in trails:
76
+ trail_arn = trail.get("TrailARN", "")
77
+ trail_name = trail.get("Name", "")
78
+
79
+ # Get detailed trail information to find S3 bucket
80
+ trail_details = self._describe_trail(cloudtrail_client, trail_arn)
81
+ if not trail_details:
82
+ continue
83
+
84
+ # Filter by tags if specified
85
+ if self.tags:
86
+ trail_tags = self._get_trail_tags(cloudtrail_client, trail_arn)
87
+ if not self._matches_tags(trail_tags):
88
+ logger.debug(f"Skipping trail {trail_name} - does not match tag filters")
89
+ continue
90
+
91
+ s3_bucket_name = trail_details.get("S3BucketName")
92
+ s3_prefix = trail_details.get("S3KeyPrefix", "")
93
+
94
+ if not s3_bucket_name:
95
+ logger.warning(f"Trail {trail_name} does not have an S3 bucket configured")
96
+ continue
97
+
98
+ logger.info(f"Collecting log metadata from S3 bucket: {s3_bucket_name} for trail: {trail_name}")
99
+
100
+ # Collect metadata for this trail's logs
101
+ trail_log_metadata = self._collect_s3_log_metadata(
102
+ s3_bucket_name=s3_bucket_name,
103
+ s3_prefix=s3_prefix,
104
+ trail_name=trail_name,
105
+ trail_arn=trail_arn,
106
+ )
107
+
108
+ result["Trails"].append(trail_log_metadata)
109
+ result["TotalFiles"] += trail_log_metadata["FileCount"]
110
+ result["TotalSize"] += trail_log_metadata["TotalSize"]
111
+
112
+ logger.info(
113
+ f"Collected metadata for {result['TotalFiles']} CloudTrail log files "
114
+ f"({self._format_size(result['TotalSize'])}) from {len(result['Trails'])} trail(s)"
115
+ )
116
+
117
+ except ClientError as e:
118
+ self._handle_error(e, "CloudTrail logs metadata")
119
+ except Exception as e:
120
+ logger.error(f"Unexpected error collecting CloudTrail logs metadata: {e}", exc_info=True)
121
+
122
+ return result
123
+
124
+ def _list_trails(self, client: Any) -> List[Dict[str, Any]]:
125
+ """
126
+ List all CloudTrail trails.
127
+
128
+ :param client: CloudTrail client
129
+ :return: List of trail summaries
130
+ :rtype: List[Dict[str, Any]]
131
+ """
132
+ try:
133
+ response = client.list_trails()
134
+ return response.get("Trails", [])
135
+ except ClientError as e:
136
+ if e.response["Error"]["Code"] == "AccessDeniedException":
137
+ logger.warning(f"Access denied to list CloudTrail trails in {self.region}")
138
+ return []
139
+ raise
140
+
141
+ def _describe_trail(self, client: Any, trail_arn: str) -> Optional[Dict[str, Any]]:
142
+ """
143
+ Get detailed information about a specific trail.
144
+
145
+ :param client: CloudTrail client
146
+ :param str trail_arn: Trail ARN
147
+ :return: Trail details or None if not found
148
+ :rtype: Optional[Dict[str, Any]]
149
+ """
150
+ try:
151
+ response = client.describe_trails(trailNameList=[trail_arn])
152
+ trails = response.get("trailList", [])
153
+ if trails:
154
+ trail = trails[0]
155
+ # Remove AWS response metadata for consistency
156
+ trail.pop("ResponseMetadata", None)
157
+ return trail
158
+ return None
159
+ except ClientError as e:
160
+ logger.error(f"Error describing trail {trail_arn}: {e}")
161
+ return None
162
+
163
+ def _collect_s3_log_metadata(
164
+ self, s3_bucket_name: str, s3_prefix: str, trail_name: str, trail_arn: str
165
+ ) -> Dict[str, Any]:
166
+ """
167
+ Collect metadata for CloudTrail log files in an S3 bucket.
168
+
169
+ This recursively collects ALL log files across all regions and dates
170
+ within the CloudTrail folder structure.
171
+
172
+ :param str s3_bucket_name: Name of the S3 bucket
173
+ :param str s3_prefix: S3 key prefix for the trail logs
174
+ :param str trail_name: Name of the CloudTrail trail
175
+ :param str trail_arn: ARN of the CloudTrail trail
176
+ :return: Dictionary containing log file metadata
177
+ :rtype: Dict[str, Any]
178
+ """
179
+ s3_client = self._get_client("s3")
180
+ full_prefix = self._build_s3_prefix(s3_prefix)
181
+ trail_metadata = self._initialize_trail_metadata(trail_name, trail_arn, s3_bucket_name, s3_prefix, full_prefix)
182
+
183
+ try:
184
+ logger.info(f"Scanning S3 bucket {s3_bucket_name} with prefix: {full_prefix}")
185
+ self._scan_s3_objects(s3_client, s3_bucket_name, full_prefix, trail_metadata)
186
+ self._finalize_trail_metadata(trail_metadata, trail_name)
187
+
188
+ except ClientError as e:
189
+ self._handle_s3_client_error(e, s3_bucket_name)
190
+ except Exception as e:
191
+ logger.error(f"Unexpected error collecting S3 log metadata: {e}", exc_info=True)
192
+
193
+ return trail_metadata
194
+
195
+ def _build_s3_prefix(self, s3_prefix: str) -> str:
196
+ """
197
+ Build the full S3 prefix for CloudTrail log files.
198
+
199
+ :param str s3_prefix: S3 key prefix for the trail logs
200
+ :return: Full S3 prefix
201
+ :rtype: str
202
+ """
203
+ full_prefix = f"{s3_prefix}/AWSLogs/" if s3_prefix else "AWSLogs/"
204
+
205
+ if self.account_id:
206
+ full_prefix = f"{full_prefix}{self.account_id}/CloudTrail/"
207
+ else:
208
+ full_prefix = f"{full_prefix}*/CloudTrail/"
209
+
210
+ return full_prefix
211
+
212
+ def _initialize_trail_metadata(
213
+ self, trail_name: str, trail_arn: str, s3_bucket_name: str, s3_prefix: str, full_prefix: str
214
+ ) -> Dict[str, Any]:
215
+ """
216
+ Initialize the trail metadata dictionary.
217
+
218
+ :param str trail_name: Name of the CloudTrail trail
219
+ :param str trail_arn: ARN of the CloudTrail trail
220
+ :param str s3_bucket_name: Name of the S3 bucket
221
+ :param str s3_prefix: S3 key prefix for the trail logs
222
+ :param str full_prefix: Full S3 prefix for log files
223
+ :return: Initialized trail metadata dictionary
224
+ :rtype: Dict[str, Any]
225
+ """
226
+ return {
227
+ "TrailName": trail_name,
228
+ "TrailARN": trail_arn,
229
+ "S3BucketName": s3_bucket_name,
230
+ "S3Prefix": s3_prefix,
231
+ "FullLogPrefix": full_prefix,
232
+ "FileCount": 0,
233
+ "TotalSize": 0,
234
+ "Files": [],
235
+ "FilesByRegion": {},
236
+ "FilesByDate": {},
237
+ }
238
+
239
+ def _scan_s3_objects(
240
+ self, s3_client: Any, s3_bucket_name: str, full_prefix: str, trail_metadata: Dict[str, Any]
241
+ ) -> None:
242
+ """
243
+ Scan S3 objects and collect metadata for CloudTrail log files.
244
+
245
+ :param s3_client: S3 client
246
+ :param str s3_bucket_name: Name of the S3 bucket
247
+ :param str full_prefix: Full S3 prefix for log files
248
+ :param dict trail_metadata: Trail metadata dictionary to populate
249
+ """
250
+ paginator = s3_client.get_paginator("list_objects_v2")
251
+ file_count = 0
252
+
253
+ for page in paginator.paginate(Bucket=s3_bucket_name, Prefix=full_prefix):
254
+ if "Contents" not in page:
255
+ logger.info(f"No log files found in S3 bucket {s3_bucket_name} with prefix {full_prefix}")
256
+ break
257
+
258
+ for obj in page["Contents"]:
259
+ if not obj["Key"].endswith(".json.gz"):
260
+ continue
261
+
262
+ if self._max_files_reached(file_count):
263
+ return
264
+
265
+ self._process_s3_object(obj, trail_metadata)
266
+ file_count += 1
267
+
268
+ if self._max_files_reached(file_count):
269
+ break
270
+
271
+ def _max_files_reached(self, file_count: int) -> bool:
272
+ """
273
+ Check if the maximum file limit has been reached.
274
+
275
+ :param int file_count: Current file count
276
+ :return: True if max files reached
277
+ :rtype: bool
278
+ """
279
+ if self.max_files and file_count >= self.max_files:
280
+ logger.info(f"Reached maximum file limit: {self.max_files}")
281
+ return True
282
+ return False
283
+
284
+ def _process_s3_object(self, obj: Dict[str, Any], trail_metadata: Dict[str, Any]) -> None:
285
+ """
286
+ Process a single S3 object and add its metadata to the trail metadata.
287
+
288
+ :param dict obj: S3 object metadata
289
+ :param dict trail_metadata: Trail metadata dictionary to update
290
+ """
291
+ key = obj["Key"]
292
+ file_region, file_date = self._extract_region_and_date(key)
293
+
294
+ file_metadata = {
295
+ "Key": key,
296
+ "FileName": key.split("/")[-1],
297
+ "Region": file_region,
298
+ "Date": file_date,
299
+ "Size": obj["Size"],
300
+ "SizeFormatted": self._format_size(obj["Size"]),
301
+ "LastModified": obj["LastModified"].isoformat(),
302
+ "LastModifiedTimestamp": obj["LastModified"].timestamp(),
303
+ "ETag": obj.get("ETag", "").strip('"'),
304
+ "StorageClass": obj.get("StorageClass", "STANDARD"),
305
+ }
306
+
307
+ trail_metadata["Files"].append(file_metadata)
308
+ trail_metadata["TotalSize"] += obj["Size"]
309
+
310
+ self._organize_by_region(trail_metadata, file_region, obj["Size"], file_metadata["FileName"])
311
+ self._organize_by_date(trail_metadata, file_date, obj["Size"], file_metadata["FileName"])
312
+
313
+ def _extract_region_and_date(self, key: str) -> tuple:
314
+ """
315
+ Extract region and date from S3 key.
316
+
317
+ :param str key: S3 object key
318
+ :return: Tuple of (region, date)
319
+ :rtype: tuple
320
+ """
321
+ key_parts = key.split("/")
322
+ try:
323
+ ct_index = key_parts.index("CloudTrail")
324
+ file_region = key_parts[ct_index + 1] if len(key_parts) > ct_index + 1 else "unknown"
325
+ file_year = key_parts[ct_index + 2] if len(key_parts) > ct_index + 2 else ""
326
+ file_month = key_parts[ct_index + 3] if len(key_parts) > ct_index + 3 else ""
327
+ file_day = key_parts[ct_index + 4] if len(key_parts) > ct_index + 4 else ""
328
+ file_date = f"{file_year}-{file_month}-{file_day}" if file_year else "unknown"
329
+ except (ValueError, IndexError):
330
+ file_region = "unknown"
331
+ file_date = "unknown"
332
+
333
+ return file_region, file_date
334
+
335
+ def _organize_by_region(
336
+ self, trail_metadata: Dict[str, Any], file_region: str, file_size: int, file_name: str
337
+ ) -> None:
338
+ """
339
+ Organize file metadata by region.
340
+
341
+ :param dict trail_metadata: Trail metadata dictionary to update
342
+ :param str file_region: AWS region
343
+ :param int file_size: File size in bytes
344
+ :param str file_name: File name
345
+ """
346
+ if file_region not in trail_metadata["FilesByRegion"]:
347
+ trail_metadata["FilesByRegion"][file_region] = {"Count": 0, "TotalSize": 0, "Files": []}
348
+
349
+ trail_metadata["FilesByRegion"][file_region]["Count"] += 1
350
+ trail_metadata["FilesByRegion"][file_region]["TotalSize"] += file_size
351
+ trail_metadata["FilesByRegion"][file_region]["Files"].append(file_name)
352
+
353
+ def _organize_by_date(self, trail_metadata: Dict[str, Any], file_date: str, file_size: int, file_name: str) -> None:
354
+ """
355
+ Organize file metadata by date.
356
+
357
+ :param dict trail_metadata: Trail metadata dictionary to update
358
+ :param str file_date: File date (YYYY-MM-DD)
359
+ :param int file_size: File size in bytes
360
+ :param str file_name: File name
361
+ """
362
+ if file_date not in trail_metadata["FilesByDate"]:
363
+ trail_metadata["FilesByDate"][file_date] = {"Count": 0, "TotalSize": 0, "Files": []}
364
+
365
+ trail_metadata["FilesByDate"][file_date]["Count"] += 1
366
+ trail_metadata["FilesByDate"][file_date]["TotalSize"] += file_size
367
+ trail_metadata["FilesByDate"][file_date]["Files"].append(file_name)
368
+
369
+ def _finalize_trail_metadata(self, trail_metadata: Dict[str, Any], trail_name: str) -> None:
370
+ """
371
+ Finalize trail metadata by calculating date ranges and logging results.
372
+
373
+ :param dict trail_metadata: Trail metadata dictionary to finalize
374
+ :param str trail_name: Name of the CloudTrail trail
375
+ """
376
+ trail_metadata["FileCount"] = len(trail_metadata["Files"])
377
+
378
+ if trail_metadata["Files"]:
379
+ timestamps = [f["LastModifiedTimestamp"] for f in trail_metadata["Files"]]
380
+ trail_metadata["OldestLogDate"] = datetime.fromtimestamp(min(timestamps)).isoformat()
381
+ trail_metadata["NewestLogDate"] = datetime.fromtimestamp(max(timestamps)).isoformat()
382
+
383
+ logger.info(
384
+ f"Found {trail_metadata['FileCount']} log files "
385
+ f"({self._format_size(trail_metadata['TotalSize'])}) for trail {trail_name}"
386
+ )
387
+
388
+ def _handle_s3_client_error(self, error: ClientError, s3_bucket_name: str) -> None:
389
+ """
390
+ Handle S3 client errors.
391
+
392
+ :param ClientError error: The client error
393
+ :param str s3_bucket_name: Name of the S3 bucket
394
+ """
395
+ error_code = error.response["Error"]["Code"]
396
+ if error_code == "NoSuchBucket":
397
+ logger.error(f"S3 bucket does not exist: {s3_bucket_name}")
398
+ elif error_code == "AccessDenied":
399
+ logger.error(f"Access denied to S3 bucket: {s3_bucket_name}")
400
+ else:
401
+ logger.error(f"Error accessing S3 bucket {s3_bucket_name}: {error}")
402
+
403
+ def _get_trail_tags(self, client: Any, trail_arn: str) -> Dict[str, str]:
404
+ """
405
+ Get tags for a CloudTrail trail.
406
+
407
+ :param client: CloudTrail client
408
+ :param str trail_arn: Trail ARN
409
+ :return: Dictionary of tags (Key -> Value)
410
+ :rtype: Dict[str, str]
411
+ """
412
+ try:
413
+ response = client.list_tags(ResourceIdList=[trail_arn])
414
+ resource_tag_list = response.get("ResourceTagList", [])
415
+ if resource_tag_list and "TagsList" in resource_tag_list[0]:
416
+ tags_list = resource_tag_list[0]["TagsList"]
417
+ # Convert list of {"Key": "k", "Value": "v"} to {"k": "v"}
418
+ return {tag.get("Key", ""): tag.get("Value", "") for tag in tags_list}
419
+ return {}
420
+ except ClientError as e:
421
+ logger.debug(f"Error getting tags for trail {trail_arn}: {e}")
422
+ return {}
423
+
424
+ def _matches_tags(self, resource_tags: Dict[str, str]) -> bool:
425
+ """
426
+ Check if resource tags match the specified filter tags.
427
+
428
+ :param dict resource_tags: Tags on the resource
429
+ :return: True if all filter tags match
430
+ :rtype: bool
431
+ """
432
+ if not self.tags:
433
+ return True
434
+
435
+ # All filter tags must match
436
+ for key, value in self.tags.items():
437
+ if resource_tags.get(key) != value:
438
+ return False
439
+
440
+ return True
441
+
442
+ @staticmethod
443
+ def _format_size(size_bytes: int) -> str:
444
+ """
445
+ Format bytes into human-readable size.
446
+
447
+ :param int size_bytes: Size in bytes
448
+ :return: Formatted size string (e.g., "1.5 MB")
449
+ :rtype: str
450
+ """
451
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
452
+ if size_bytes < 1024.0:
453
+ return f"{size_bytes:.2f} {unit}"
454
+ size_bytes /= 1024.0
455
+ return f"{size_bytes:.2f} PB"
456
+
457
+ def export_to_json(self, output_file: str = "cloudtrail_logs_metadata.json") -> str:
458
+ """
459
+ Collect metadata and export to JSON file.
460
+
461
+ :param str output_file: Path to output JSON file
462
+ :return: Path to the created JSON file
463
+ :rtype: str
464
+ """
465
+ metadata = self.collect()
466
+
467
+ try:
468
+ with open(output_file, "w", encoding="utf-8") as f:
469
+ json.dump(metadata, f, indent=2, ensure_ascii=False)
470
+
471
+ logger.info(f"CloudTrail logs metadata exported to: {output_file}")
472
+ return output_file
473
+
474
+ except Exception as e:
475
+ logger.error(f"Error exporting metadata to JSON: {e}")
476
+ raise
@@ -0,0 +1,191 @@
1
+ """AWS CloudWatch Logs resource collection."""
2
+
3
+ import logging
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ from botocore.exceptions import ClientError
7
+
8
+ from regscale.integrations.commercial.aws.inventory.base import BaseCollector
9
+
10
+ logger = logging.getLogger("regscale")
11
+
12
+
13
+ class CloudWatchLogsCollector(BaseCollector):
14
+ """Collector for AWS CloudWatch Logs resources."""
15
+
16
+ def __init__(
17
+ self, session: Any, region: str, account_id: Optional[str] = None, tags: Optional[Dict[str, str]] = None
18
+ ):
19
+ """
20
+ Initialize CloudWatch Logs collector.
21
+
22
+ :param session: AWS session to use for API calls
23
+ :param str region: AWS region to collect from
24
+ :param str account_id: Optional AWS account ID to filter resources
25
+ :param dict tags: Optional tags to filter resources (key-value pairs)
26
+ """
27
+ super().__init__(session, region)
28
+ self.account_id = account_id
29
+ self.tags = tags or {}
30
+
31
+ def collect(self) -> Dict[str, Any]:
32
+ """
33
+ Collect CloudWatch Logs resources.
34
+
35
+ :return: Dictionary containing CloudWatch log groups and their configurations
36
+ :rtype: Dict[str, Any]
37
+ """
38
+ result = {"LogGroups": [], "LogGroupMetrics": {}, "RetentionPolicies": {}}
39
+
40
+ try:
41
+ client = self._get_client("logs")
42
+
43
+ # List all log groups
44
+ log_groups = self._list_log_groups(client)
45
+
46
+ # Get detailed information for each log group
47
+ for log_group in log_groups:
48
+ log_group_name = log_group.get("logGroupName", "")
49
+ log_group_arn = log_group.get("arn", "")
50
+
51
+ # Filter by account ID if specified
52
+ if self.account_id and log_group_arn and not self._matches_account_id(log_group_arn):
53
+ logger.debug(f"Skipping log group {log_group_name} - does not match account ID {self.account_id}")
54
+ continue
55
+
56
+ # Get tags for filtering
57
+ log_group_tags = self._get_log_group_tags(client, log_group_name)
58
+
59
+ # Filter by tags if specified
60
+ if self.tags and not self._matches_tags(log_group_tags):
61
+ logger.debug(f"Skipping log group {log_group_name} - does not match tag filters")
62
+ continue
63
+
64
+ log_group["Tags"] = log_group_tags
65
+
66
+ # Get metric filters for this log group
67
+ metric_filters = self._get_metric_filters(client, log_group_name)
68
+ log_group["MetricFilters"] = metric_filters
69
+
70
+ # Get subscription filters
71
+ subscription_filters = self._get_subscription_filters(client, log_group_name)
72
+ log_group["SubscriptionFilters"] = subscription_filters
73
+
74
+ # Get retention policy
75
+ retention_days = log_group.get("retentionInDays")
76
+ if retention_days:
77
+ result["RetentionPolicies"][log_group_name] = retention_days
78
+
79
+ # Add region information
80
+ log_group["Region"] = self.region
81
+
82
+ # Get storage bytes
83
+ stored_bytes = log_group.get("storedBytes", 0)
84
+ result["LogGroupMetrics"][log_group_name] = {
85
+ "StoredBytes": stored_bytes,
86
+ "MetricFilterCount": len(metric_filters),
87
+ "SubscriptionFilterCount": len(subscription_filters),
88
+ }
89
+
90
+ result["LogGroups"].append(log_group)
91
+
92
+ logger.info(f"Collected {len(result['LogGroups'])} CloudWatch log group(s) from {self.region}")
93
+
94
+ except ClientError as e:
95
+ self._handle_error(e, "CloudWatch Logs log groups")
96
+ except Exception as e:
97
+ logger.error(f"Unexpected error collecting CloudWatch Logs: {e}", exc_info=True)
98
+
99
+ return result
100
+
101
+ def _list_log_groups(self, client: Any) -> List[Dict[str, Any]]:
102
+ """
103
+ List all CloudWatch log groups with pagination.
104
+
105
+ :param client: CloudWatch Logs client
106
+ :return: List of log groups
107
+ :rtype: List[Dict[str, Any]]
108
+ """
109
+ try:
110
+ log_groups = []
111
+ paginator = client.get_paginator("describe_log_groups")
112
+
113
+ for page in paginator.paginate():
114
+ log_groups.extend(page.get("logGroups", []))
115
+
116
+ logger.debug(f"Found {len(log_groups)} log groups in {self.region}")
117
+ return log_groups
118
+
119
+ except ClientError as e:
120
+ if e.response["Error"]["Code"] == "AccessDeniedException":
121
+ logger.warning(f"Access denied to list CloudWatch log groups in {self.region}")
122
+ return []
123
+ raise
124
+
125
+ def _get_log_group_tags(self, client: Any, log_group_name: str) -> Dict[str, str]:
126
+ """
127
+ Get tags for a log group.
128
+
129
+ :param client: CloudWatch Logs client
130
+ :param str log_group_name: Log group name
131
+ :return: Dictionary of tags
132
+ :rtype: Dict[str, str]
133
+ """
134
+ try:
135
+ response = client.list_tags_for_resource(resourceArn=self._build_log_group_arn(log_group_name))
136
+ return response.get("tags", {})
137
+ except ClientError as e:
138
+ if e.response["Error"]["Code"] in ["ResourceNotFoundException", "AccessDeniedException"]:
139
+ logger.debug(f"Cannot get tags for log group {log_group_name}: {e}")
140
+ return {}
141
+ logger.error(f"Error getting tags for log group {log_group_name}: {e}")
142
+ return {}
143
+
144
+ def _build_log_group_arn(self, log_group_name: str) -> str:
145
+ """
146
+ Build ARN for a log group.
147
+
148
+ :param str log_group_name: Log group name
149
+ :return: Log group ARN
150
+ :rtype: str
151
+ """
152
+ account_id = self.account_id or self.session.client("sts").get_caller_identity()["Account"]
153
+ return f"arn:aws:logs:{self.region}:{account_id}:log-group:{log_group_name}"
154
+
155
+ def _get_metric_filters(self, client: Any, log_group_name: str) -> List[Dict[str, Any]]:
156
+ """
157
+ Get metric filters for a log group.
158
+
159
+ :param client: CloudWatch Logs client
160
+ :param str log_group_name: Log group name
161
+ :return: List of metric filters
162
+ :rtype: List[Dict[str, Any]]
163
+ """
164
+ try:
165
+ response = client.describe_metric_filters(logGroupName=log_group_name)
166
+ return response.get("metricFilters", [])
167
+ except ClientError as e:
168
+ if e.response["Error"]["Code"] in ["ResourceNotFoundException", "AccessDeniedException"]:
169
+ logger.debug(f"Cannot get metric filters for log group {log_group_name}: {e}")
170
+ return []
171
+ logger.error(f"Error getting metric filters for log group {log_group_name}: {e}")
172
+ return []
173
+
174
+ def _get_subscription_filters(self, client: Any, log_group_name: str) -> List[Dict[str, Any]]:
175
+ """
176
+ Get subscription filters for a log group.
177
+
178
+ :param client: CloudWatch Logs client
179
+ :param str log_group_name: Log group name
180
+ :return: List of subscription filters
181
+ :rtype: List[Dict[str, Any]]
182
+ """
183
+ try:
184
+ response = client.describe_subscription_filters(logGroupName=log_group_name)
185
+ return response.get("subscriptionFilters", [])
186
+ except ClientError as e:
187
+ if e.response["Error"]["Code"] in ["ResourceNotFoundException", "AccessDeniedException"]:
188
+ logger.debug(f"Cannot get subscription filters for log group {log_group_name}: {e}")
189
+ return []
190
+ logger.error(f"Error getting subscription filters for log group {log_group_name}: {e}")
191
+ return []