regscale-cli 6.21.0.0__py3-none-any.whl → 6.21.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. regscale/_version.py +1 -1
  2. regscale/core/app/application.py +7 -0
  3. regscale/integrations/commercial/__init__.py +9 -10
  4. regscale/integrations/commercial/amazon/common.py +79 -2
  5. regscale/integrations/commercial/aws/cli.py +183 -9
  6. regscale/integrations/commercial/aws/scanner.py +544 -9
  7. regscale/integrations/commercial/cpe.py +18 -1
  8. regscale/integrations/commercial/import_all/import_all_cmd.py +2 -2
  9. regscale/integrations/commercial/microsoft_defender/__init__.py +0 -0
  10. regscale/integrations/commercial/{defender.py → microsoft_defender/defender.py} +38 -612
  11. regscale/integrations/commercial/microsoft_defender/defender_api.py +286 -0
  12. regscale/integrations/commercial/microsoft_defender/defender_constants.py +80 -0
  13. regscale/integrations/commercial/microsoft_defender/defender_scanner.py +168 -0
  14. regscale/integrations/commercial/qualys/__init__.py +24 -86
  15. regscale/integrations/commercial/qualys/containers.py +2 -0
  16. regscale/integrations/commercial/qualys/scanner.py +7 -2
  17. regscale/integrations/commercial/sonarcloud.py +110 -71
  18. regscale/integrations/commercial/tenablev2/jsonl_scanner.py +2 -1
  19. regscale/integrations/commercial/wizv2/async_client.py +10 -3
  20. regscale/integrations/commercial/wizv2/click.py +105 -26
  21. regscale/integrations/commercial/wizv2/constants.py +249 -1
  22. regscale/integrations/commercial/wizv2/data_fetcher.py +401 -0
  23. regscale/integrations/commercial/wizv2/finding_processor.py +295 -0
  24. regscale/integrations/commercial/wizv2/issue.py +2 -2
  25. regscale/integrations/commercial/wizv2/parsers.py +3 -2
  26. regscale/integrations/commercial/wizv2/policy_compliance.py +3057 -0
  27. regscale/integrations/commercial/wizv2/policy_compliance_helpers.py +564 -0
  28. regscale/integrations/commercial/wizv2/scanner.py +19 -25
  29. regscale/integrations/commercial/wizv2/utils.py +258 -85
  30. regscale/integrations/commercial/wizv2/variables.py +4 -3
  31. regscale/integrations/compliance_integration.py +1607 -0
  32. regscale/integrations/public/fedramp/fedramp_five.py +93 -8
  33. regscale/integrations/public/fedramp/markdown_parser.py +7 -1
  34. regscale/integrations/scanner_integration.py +57 -6
  35. regscale/models/__init__.py +1 -1
  36. regscale/models/app_models/__init__.py +1 -0
  37. regscale/models/integration_models/cisa_kev_data.json +103 -4
  38. regscale/models/integration_models/synqly_models/capabilities.json +1 -1
  39. regscale/{integrations/commercial/wizv2/models.py → models/integration_models/wizv2.py} +4 -12
  40. regscale/models/regscale_models/file.py +4 -0
  41. regscale/models/regscale_models/issue.py +151 -8
  42. regscale/models/regscale_models/regscale_model.py +4 -2
  43. regscale/models/regscale_models/security_plan.py +1 -1
  44. regscale/utils/graphql_client.py +3 -1
  45. {regscale_cli-6.21.0.0.dist-info → regscale_cli-6.21.2.0.dist-info}/METADATA +9 -9
  46. {regscale_cli-6.21.0.0.dist-info → regscale_cli-6.21.2.0.dist-info}/RECORD +52 -44
  47. tests/regscale/core/test_version_regscale.py +5 -3
  48. tests/regscale/integrations/test_wiz_policy_compliance_affected_controls.py +154 -0
  49. tests/regscale/test_authorization.py +0 -65
  50. tests/regscale/test_init.py +0 -96
  51. {regscale_cli-6.21.0.0.dist-info → regscale_cli-6.21.2.0.dist-info}/LICENSE +0 -0
  52. {regscale_cli-6.21.0.0.dist-info → regscale_cli-6.21.2.0.dist-info}/WHEEL +0 -0
  53. {regscale_cli-6.21.0.0.dist-info → regscale_cli-6.21.2.0.dist-info}/entry_points.txt +0 -0
  54. {regscale_cli-6.21.0.0.dist-info → regscale_cli-6.21.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,3057 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Wiz Policy Compliance Integration for RegScale CLI."""
4
+
5
+ import json
6
+ import logging
7
+ import os
8
+ import re
9
+ from datetime import datetime
10
+ from typing import Dict, List, Optional, Iterator, Any
11
+
12
+ from regscale.core.app.application import Application
13
+ from regscale.core.app.utils.app_utils import error_and_exit, check_license, get_current_datetime
14
+ from regscale.integrations.commercial.wizv2.async_client import run_async_queries
15
+ from regscale.integrations.commercial.wizv2.constants import (
16
+ WizVulnerabilityType,
17
+ WIZ_POLICY_QUERY,
18
+ WIZ_FRAMEWORK_QUERY,
19
+ FRAMEWORK_MAPPINGS,
20
+ FRAMEWORK_SHORTCUTS,
21
+ FRAMEWORK_CATEGORIES,
22
+ )
23
+ from regscale.integrations.commercial.wizv2.data_fetcher import PolicyAssessmentFetcher
24
+ from regscale.integrations.commercial.wizv2.finding_processor import (
25
+ FindingConsolidator,
26
+ FindingToIssueProcessor,
27
+ )
28
+ from regscale.integrations.commercial.wizv2.policy_compliance_helpers import (
29
+ ControlImplementationCache,
30
+ AssetConsolidator,
31
+ IssueFieldSetter,
32
+ ControlAssessmentProcessor,
33
+ )
34
+ from regscale.integrations.commercial.wizv2.wiz_auth import wiz_authenticate
35
+ from regscale.integrations.compliance_integration import ComplianceIntegration, ComplianceItem
36
+ from regscale.integrations.scanner_integration import (
37
+ ScannerIntegrationType,
38
+ IntegrationAsset,
39
+ IntegrationFinding,
40
+ issue_due_date,
41
+ )
42
+ from regscale.models import regscale_models
43
+
44
+ logger = logging.getLogger("regscale")
45
+
46
+
47
+ # Constants for file operations
48
+ JSON_FILE_EXT = ".json"
49
+ JSONL_FILE_EXT = ".jsonl"
50
+ MAX_DISPLAY_ASSETS = 10 # Maximum number of asset names to display in descriptions
51
+ CACHE_CLEANUP_KEEP_COUNT = 5 # Number of recent cache files to keep during cleanup
52
+ WIZ_URL = "https://api.wiz.io/graphql"
53
+
54
+ # Safer, linear-time regex for control-id normalization.
55
+ # Examples supported: 'AC-4', 'AC-4(2)', 'AC-4 (2)', 'AC-4-2', 'AC-4 2'
56
+ # This avoids ambiguous nested optional whitespace with alternation that can
57
+ # trigger excessive backtracking. Each branch starts with a distinct token
58
+ # ('(', '-' or whitespace), so the engine proceeds deterministically.
59
+ SAFE_CONTROL_ID_RE = re.compile( # NOSONAR
60
+ r"^([A-Za-z]{2}-\d+)(?:\s*\(\s*(\d+)\s*\)|-\s*(\d+)|\s+(\d+))?$", # NOSONAR
61
+ re.IGNORECASE, # NOSONAR
62
+ ) # NOSONAR
63
+
64
+
65
+ class WizComplianceItem(ComplianceItem):
66
+ """Wiz implementation of ComplianceItem."""
67
+
68
+ def __init__(self, raw_data: Dict[str, Any], integration: Optional["WizPolicyComplianceIntegration"] = None):
69
+ """
70
+ Initialize WizComplianceItem from raw GraphQL response.
71
+
72
+ :param Dict[str, Any] raw_data: Raw policy assessment data from Wiz
73
+ :param Optional['WizPolicyComplianceIntegration'] integration: Integration instance for framework mapping
74
+ """
75
+ self.id = raw_data.get("id", "")
76
+ self.result = raw_data.get("result", "")
77
+ self.policy = raw_data.get("policy", {})
78
+ self.resource = raw_data.get("resource", {})
79
+ self.output = raw_data.get("output", {})
80
+ self._integration = integration
81
+
82
+ def _get_filtered_subcategories(self) -> List[Dict[str, Any]]:
83
+ """
84
+ Return only subcategories that belong to the selected framework.
85
+
86
+ If no integration or framework filter is available, return all.
87
+
88
+ :return: List of filtered security subcategories
89
+ :rtype: List[Dict[str, Any]]
90
+ """
91
+ subcategories = self.policy.get("securitySubCategories", []) if self.policy else []
92
+ if not subcategories or not self._integration or not getattr(self._integration, "framework_id", None):
93
+ return subcategories
94
+
95
+ target_framework_id = self._integration.framework_id
96
+ filtered = [
97
+ sc for sc in subcategories if sc.get("category", {}).get("framework", {}).get("id") == target_framework_id
98
+ ]
99
+ # Return filtered results - if empty, the control_id will be empty (framework filtering working as intended)
100
+ return filtered
101
+
102
+ @property
103
+ def resource_id(self) -> str:
104
+ """Unique identifier for the resource being assessed."""
105
+ return self.resource.get("id", "")
106
+
107
+ @property
108
+ def resource_name(self) -> str:
109
+ """Human-readable name of the resource."""
110
+ return self.resource.get("name", "")
111
+
112
+ @property
113
+ def control_id(self) -> str:
114
+ """Control identifier (e.g., AC-3, SI-2)."""
115
+ if not self.policy:
116
+ return ""
117
+
118
+ subcategories = self._get_filtered_subcategories()
119
+ if subcategories:
120
+ return subcategories[0].get("externalId", "")
121
+ return ""
122
+
123
+ @property
124
+ def compliance_result(self) -> str:
125
+ """Result of compliance check (PASS, FAIL, etc)."""
126
+ return self.result
127
+
128
+ @property
129
+ def severity(self) -> Optional[str]:
130
+ """Severity level of the compliance violation (if failed)."""
131
+ return self.policy.get("severity")
132
+
133
+ @property
134
+ def description(self) -> str:
135
+ """Description of the compliance check."""
136
+ desc = self.policy.get("description") or self.policy.get("ruleDescription", "")
137
+ if not desc:
138
+ desc = f"Compliance check for {self.policy.get('name', 'unknown policy')}"
139
+ return desc
140
+
141
+ @property
142
+ def framework(self) -> str:
143
+ """Compliance framework (e.g., NIST800-53R5, CSF)."""
144
+ if not self.policy:
145
+ return ""
146
+
147
+ subcategories = self._get_filtered_subcategories()
148
+ if subcategories:
149
+ category = subcategories[0].get("category", {})
150
+ framework = category.get("framework", {})
151
+ framework_id = framework.get("id", "")
152
+
153
+ # Prefer integration mapping using the actual framework id from the item
154
+ if self._integration and framework_id:
155
+ return self._integration.get_framework_name(framework_id)
156
+
157
+ return framework.get("name", "")
158
+ return ""
159
+
160
+ @property
161
+ def framework_id(self) -> Optional[str]:
162
+ """Extract framework ID."""
163
+ if not self.policy:
164
+ return None
165
+
166
+ subcategories = self._get_filtered_subcategories()
167
+ if subcategories:
168
+ category = subcategories[0].get("category", {})
169
+ framework = category.get("framework", {})
170
+ return framework.get("id")
171
+ return None
172
+
173
+ @property
174
+ def is_pass(self) -> bool:
175
+ """Check if assessment result is PASS."""
176
+ return self.result == "PASS"
177
+
178
+ @property
179
+ def is_fail(self) -> bool:
180
+ """Check if assessment result is FAIL."""
181
+ return self.result == "FAIL"
182
+
183
+
184
+ class WizPolicyComplianceIntegration(ComplianceIntegration):
185
+ """
186
+ Wiz Policy Compliance Integration for syncing policy assessments from Wiz to RegScale.
187
+
188
+ This integration fetches policy assessment data from Wiz, processes the results,
189
+ and creates control assessments in RegScale based on compliance status.
190
+ """
191
+
192
+ title = "Wiz Policy Compliance Integration"
193
+ type = ScannerIntegrationType.CONTROL_TEST
194
+ # Use wizId field for asset identification (matches other Wiz integrations)
195
+ asset_identifier_field = "wizId"
196
+ # Do not create assets - they come from separate inventory import
197
+ options_map_assets_to_components: bool = False
198
+ # Do not create vulnerabilities from compliance policy results
199
+ create_vulnerabilities: bool = False
200
+ # Do not create scan history - this is compliance report ingest, not a vulnerability scan
201
+ enable_scan_history: bool = False
202
+
203
+ # Control whether JSONL control-centric export is written alongside JSON
204
+ write_jsonl_output: bool = False
205
+
206
+ def __init__(
207
+ self,
208
+ plan_id: int,
209
+ wiz_project_id: str,
210
+ client_id: str,
211
+ client_secret: str,
212
+ framework_id: str = "wf-id-4", # Default to NIST SP 800-53 Revision 5
213
+ catalog_id: Optional[int] = None,
214
+ tenant_id: int = 1,
215
+ create_issues: bool = True,
216
+ update_control_status: bool = True,
217
+ create_poams: bool = False,
218
+ regscale_module: Optional[str] = "securityplans",
219
+ **kwargs,
220
+ ):
221
+ """
222
+ Initialize the Wiz Policy Compliance Integration.
223
+
224
+ :param int plan_id: RegScale Security Plan ID
225
+ :param str wiz_project_id: Wiz Project ID to query
226
+ :param str client_id: Wiz API client ID
227
+ :param str client_secret: Wiz API client secret
228
+ :param str framework_id: Wiz framework ID to filter by (default: wf-id-4)
229
+ :param Optional[int] catalog_id: RegScale catalog ID
230
+ :param int tenant_id: RegScale tenant ID
231
+ :param bool create_issues: Whether to create issues for failed compliance
232
+ :param bool update_control_status: Whether to update control implementation status
233
+ :param bool create_poams: Whether to mark issues as POAMs
234
+ :param Optional[str] regscale_module: RegScale module string (overrides default parent_module)
235
+ """
236
+ super().__init__(
237
+ plan_id=plan_id,
238
+ parent_module=regscale_module,
239
+ catalog_id=catalog_id,
240
+ framework=self._map_framework_id_to_name(framework_id),
241
+ create_issues=create_issues,
242
+ update_control_status=update_control_status,
243
+ create_poams=create_poams,
244
+ tenant_id=tenant_id,
245
+ **kwargs,
246
+ )
247
+
248
+ # Override parent_module if regscale_module is provided
249
+ if regscale_module:
250
+ self.parent_module = regscale_module
251
+
252
+ self.wiz_project_id = wiz_project_id
253
+ self.client_id = client_id
254
+ self.client_secret = client_secret
255
+ self.framework_id = framework_id
256
+ self.wiz_endpoint = ""
257
+ self.access_token = ""
258
+ self.framework_mapping: Dict[str, str] = {}
259
+ self.framework_cache_file = os.path.join("artifacts", "wiz", "framework_mapping.json")
260
+ self.raw_policy_assessments: List[Dict[str, Any]] = []
261
+
262
+ # Caching configuration for policy assessments
263
+ # Default: disabled for tests; CLI enables via --cache-duration
264
+ self.cache_duration_minutes: int = int(kwargs.get("cache_duration_minutes", 0))
265
+ self.force_refresh: bool = bool(kwargs.get("force_refresh", False))
266
+ self.policy_cache_dir: str = os.path.join("artifacts", "wiz")
267
+ self.policy_cache_file: str = os.path.join(
268
+ self.policy_cache_dir, f"policy_assessments_{wiz_project_id}_{framework_id}.json"
269
+ )
270
+
271
+ # Initialize helper classes for cleaner code organization
272
+ self._control_cache = ControlImplementationCache()
273
+ self._asset_consolidator = AssetConsolidator()
274
+ self._issue_field_setter = IssueFieldSetter(self._control_cache, plan_id, regscale_module or "securityplans")
275
+ self._finding_consolidator = FindingConsolidator(self)
276
+ self._finding_processor = FindingToIssueProcessor(self)
277
+ self._assessment_processor = ControlAssessmentProcessor(
278
+ plan_id,
279
+ regscale_module or "securityplans",
280
+ self.scan_date,
281
+ self.title,
282
+ self._map_framework_id_to_name(framework_id),
283
+ )
284
+
285
+ def fetch_compliance_data(self) -> List[Any]:
286
+ """
287
+ Fetch compliance data from Wiz GraphQL API and filter to framework-specific
288
+ items for existing assets only.
289
+
290
+ :return: List of filtered raw compliance data
291
+ :rtype: List[Any]
292
+ """
293
+ # Authenticate if not already done
294
+ if not self.access_token:
295
+ self.authenticate_wiz()
296
+
297
+ # Load existing assets early for filtering
298
+ self._load_regscale_assets()
299
+
300
+ # Use the data fetcher for cleaner code
301
+ fetcher = PolicyAssessmentFetcher(
302
+ wiz_endpoint=self.wiz_endpoint or WIZ_URL,
303
+ access_token=self.access_token,
304
+ wiz_project_id=self.wiz_project_id,
305
+ framework_id=self.framework_id,
306
+ cache_duration_minutes=self.cache_duration_minutes,
307
+ )
308
+
309
+ all_policy_assessments = fetcher.fetch_policy_assessments()
310
+
311
+ if not all_policy_assessments:
312
+ logger.info("No policy assessments fetched from Wiz")
313
+ self.raw_policy_assessments = []
314
+ return []
315
+
316
+ # Filter to only items with existing assets in RegScale
317
+ filtered_assessments = self._filter_assessments_to_existing_assets(all_policy_assessments)
318
+
319
+ self.raw_policy_assessments = filtered_assessments
320
+ return filtered_assessments
321
+
322
+ def _filter_assessments_to_existing_assets(self, assessments: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
323
+ """
324
+ Filter assessments to only include items with existing assets and control IDs.
325
+
326
+ :param assessments: List of raw assessments from Wiz
327
+ :return: Filtered list of assessments
328
+ """
329
+ assets_exist = getattr(self, "_regscale_assets_by_wiz_id", {})
330
+ filtered_assessments = []
331
+ skipped_no_control = 0
332
+ skipped_no_asset = 0
333
+
334
+ for assessment in assessments:
335
+ # Convert to compliance item to check framework and asset existence
336
+ temp_item = WizComplianceItem(assessment, self)
337
+
338
+ # Skip if no control ID (not in selected framework)
339
+ if not temp_item.control_id:
340
+ skipped_no_control += 1
341
+ continue
342
+
343
+ # Skip if asset doesn't exist in RegScale (use cached lookup)
344
+ if temp_item.resource_id not in assets_exist:
345
+ skipped_no_asset += 1
346
+ continue
347
+
348
+ filtered_assessments.append(assessment)
349
+ logger.debug(f"Skipped {skipped_no_control} assessments with no control ID for framework.")
350
+ logger.debug(f"Skipped {skipped_no_asset} assessments with no existing asset in RegScale.")
351
+ return filtered_assessments
352
+
353
+ def create_compliance_item(self, raw_data: Any) -> ComplianceItem:
354
+ """
355
+ Create a ComplianceItem from raw compliance data.
356
+
357
+ :param Any raw_data: Raw compliance data from Wiz
358
+ :return: ComplianceItem instance
359
+ :rtype: ComplianceItem
360
+ """
361
+ return WizComplianceItem(raw_data, self)
362
+
363
+ def _map_resource_type_to_asset_type(self, compliance_item: ComplianceItem) -> str:
364
+ """
365
+ Map Wiz resource type to RegScale asset type.
366
+
367
+ :param ComplianceItem compliance_item: Compliance item
368
+ :return: Asset type string
369
+ :rtype: str
370
+ """
371
+ if isinstance(compliance_item, WizComplianceItem):
372
+ resource_type = compliance_item.resource.get("type", "").upper()
373
+
374
+ # Minimal mapping expected by tests; default to generic type name
375
+ name_mapping = {
376
+ "VIRTUAL_MACHINE": "Virtual Machine",
377
+ "CONTAINER": "Container",
378
+ "DATABASE": "Database",
379
+ "BUCKET": "Storage",
380
+ }
381
+ if resource_type in name_mapping:
382
+ return name_mapping[resource_type]
383
+
384
+ return "Cloud Resource"
385
+
386
+ def _get_component_name_from_source_type(self, compliance_item: WizComplianceItem) -> str:
387
+ """
388
+ Build a component name from the original Wiz resource type (source type).
389
+
390
+ Example: "STORAGE_ACCOUNT" -> "Storage Account"
391
+
392
+ :param WizComplianceItem compliance_item: Compliance item containing resource information
393
+ :return: Human-readable component name derived from resource type
394
+ :rtype: str
395
+ """
396
+ raw_type = (compliance_item.resource or {}).get("type", "Unknown Resource")
397
+ return raw_type.replace("_", " ").title()
398
+
399
+ def fetch_assets(self, *args, **kwargs) -> Iterator[IntegrationAsset]:
400
+ """
401
+ No assets are created in policy compliance integration.
402
+ Assets come from separate Wiz inventory import.
403
+ """
404
+ return iter([])
405
+
406
+ def fetch_findings(self, *args, **kwargs) -> Iterator[IntegrationFinding]:
407
+ """
408
+ Create consolidated findings grouped by control, with all affected resources under each control.
409
+
410
+ This approach groups by control first, then collects all resources that fail that control.
411
+ This results in one finding per control with multiple resources, making consolidation much easier.
412
+ """
413
+ if not self.failed_compliance_items:
414
+ return
415
+
416
+ # Use the finding consolidator for cleaner code
417
+ yield from self._finding_consolidator.create_consolidated_findings(self.failed_compliance_items)
418
+
419
+ def _get_all_control_ids_for_compliance_item(self, compliance_item: WizComplianceItem) -> List[str]:
420
+ """
421
+ Get ALL control IDs that a compliance item maps to.
422
+
423
+ Wiz policies can map to multiple controls (e.g., one policy failure might affect
424
+ AC-4(2), AC-4(4), and SC-28(1) controls). This method returns all of them.
425
+
426
+ :param WizComplianceItem compliance_item: Compliance item to extract control IDs from
427
+ :return: List of control IDs this policy maps to
428
+ :rtype: List[str]
429
+ """
430
+ if not compliance_item.policy:
431
+ return []
432
+
433
+ subcategories = compliance_item._get_filtered_subcategories()
434
+ if not subcategories:
435
+ return []
436
+
437
+ # Extract control IDs and deduplicate in one pass
438
+ unique_control_ids = []
439
+ seen = set()
440
+
441
+ for subcat in subcategories:
442
+ external_id = subcat.get("externalId", "")
443
+ if external_id and external_id not in seen:
444
+ seen.add(external_id)
445
+ unique_control_ids.append(external_id)
446
+
447
+ return unique_control_ids
448
+
449
+ def _group_compliance_items_by_control(self) -> Dict[str, Dict[str, WizComplianceItem]]:
450
+ """
451
+ Group failed compliance items by control ID.
452
+
453
+ :return: Dictionary mapping control IDs to resource dictionaries
454
+ :rtype: Dict[str, Dict[str, WizComplianceItem]]
455
+ """
456
+ control_to_resources = {} # {control_id: {resource_id: compliance_item}}
457
+
458
+ for compliance_item in self.failed_compliance_items:
459
+ if not isinstance(compliance_item, WizComplianceItem):
460
+ continue
461
+
462
+ asset_id = (compliance_item.resource_id or "").lower()
463
+ if not asset_id:
464
+ continue
465
+
466
+ # Get ALL control IDs that this policy assessment maps to
467
+ all_control_ids = self._get_all_control_ids_for_compliance_item(compliance_item)
468
+ if not all_control_ids:
469
+ continue
470
+
471
+ # Add this resource to each control it fails
472
+ for control_id in all_control_ids:
473
+ control = control_id.upper()
474
+
475
+ if control not in control_to_resources:
476
+ control_to_resources[control] = {}
477
+
478
+ # Use the first compliance item we find for this resource-control pair
479
+ # (there might be duplicates from multiple policy assessments)
480
+ if asset_id not in control_to_resources[control]:
481
+ control_to_resources[control][asset_id] = compliance_item
482
+
483
+ return control_to_resources
484
+
485
+ def _create_consolidated_findings(
486
+ self, control_to_resources: Dict[str, Dict[str, WizComplianceItem]]
487
+ ) -> Iterator[IntegrationFinding]:
488
+ """
489
+ Create consolidated findings from grouped control-resource mappings.
490
+
491
+ :param Dict[str, Dict[str, WizComplianceItem]] control_to_resources: Control groupings
492
+ :yield: Consolidated findings
493
+ :rtype: Iterator[IntegrationFinding]
494
+ """
495
+ for control_id, resources in control_to_resources.items():
496
+
497
+ # Use the first compliance item as the base for this control's finding
498
+ base_compliance_item = next(iter(resources.values()))
499
+
500
+ # Create a consolidated finding for this control
501
+ finding = self._create_consolidated_finding_for_control(
502
+ control_id=control_id, compliance_item=base_compliance_item, affected_resources=list(resources.keys())
503
+ )
504
+
505
+ if finding:
506
+ yield finding
507
+
508
+ def _create_consolidated_finding_for_control(
509
+ self, control_id: str, compliance_item: WizComplianceItem, affected_resources: List[str]
510
+ ) -> Optional[IntegrationFinding]:
511
+ """
512
+ Create a consolidated finding for a control with all affected resources.
513
+
514
+ :param str control_id: The control ID (e.g., 'AC-4(2)')
515
+ :param WizComplianceItem compliance_item: Base compliance item for this control
516
+ :param List[str] affected_resources: List of Wiz resource IDs that fail this control
517
+ :return: Consolidated finding with all affected resources
518
+ :rtype: Optional[IntegrationFinding]
519
+ """
520
+ # Filter to only resources that exist as assets in RegScale
521
+ asset_mappings = self._build_asset_mappings(affected_resources)
522
+
523
+ if not asset_mappings:
524
+ return None
525
+
526
+ # Create the base finding using the control-specific approach
527
+ finding = self._create_finding_for_specific_control(compliance_item, control_id)
528
+ if not finding:
529
+ return None
530
+
531
+ # Update the asset identifier and description with consolidated info
532
+ self._update_finding_with_consolidated_assets(finding, asset_mappings)
533
+ return finding
534
+
535
+ def _build_asset_mappings(self, resource_ids: List[str]) -> Dict[str, Dict[str, str]]:
536
+ """
537
+ Build asset mappings for resources that exist in RegScale.
538
+
539
+ :param List[str] resource_ids: List of Wiz resource IDs
540
+ :return: Mapping of resource IDs to asset information
541
+ :rtype: Dict[str, Dict[str, str]]
542
+ """
543
+ asset_mappings = {}
544
+
545
+ for resource_id in resource_ids:
546
+ if self._asset_exists_in_regscale(resource_id):
547
+ asset = self.get_asset_by_identifier(resource_id)
548
+ if asset and asset.name:
549
+ asset_mappings[resource_id] = {"name": asset.name, "wiz_id": resource_id}
550
+ else:
551
+ # Fallback to resource ID if asset name not found
552
+ asset_mappings[resource_id] = {"name": resource_id, "wiz_id": resource_id}
553
+
554
+ return asset_mappings
555
+
556
+ def _update_finding_with_consolidated_assets(
557
+ self, finding: IntegrationFinding, asset_mappings: Dict[str, Dict[str, str]]
558
+ ) -> None:
559
+ """
560
+ Update a finding with consolidated asset information.
561
+
562
+ :param IntegrationFinding finding: Finding to update
563
+ :param Dict[str, Dict[str, str]] asset_mappings: Asset mapping information
564
+ :return: None
565
+ :rtype: None
566
+ """
567
+ # Update the asset identifier to include all asset names (clean format for POAMs)
568
+ consolidated_asset_identifier = self._create_consolidated_asset_identifier(asset_mappings)
569
+ finding.asset_identifier = consolidated_asset_identifier
570
+
571
+ # Update finding description to indicate multiple resources
572
+ asset_names = [info["name"] for info in asset_mappings.values()]
573
+ if len(asset_names) > 1:
574
+ finding.description = f"{finding.description}\n\nThis control failure affects {len(asset_names)} assets: {', '.join(asset_names[:MAX_DISPLAY_ASSETS])}"
575
+ if len(asset_names) > MAX_DISPLAY_ASSETS:
576
+ finding.description += f" (and {len(asset_names) - MAX_DISPLAY_ASSETS} more)"
577
+
578
+ def _create_finding_for_specific_control(
579
+ self, compliance_item: WizComplianceItem, control_id: str
580
+ ) -> Optional[IntegrationFinding]:
581
+ """
582
+ Create a finding for a specific control ID from a compliance item.
583
+
584
+ This is similar to create_finding_from_compliance_item but ensures the finding
585
+ uses the specific control ID rather than just the first one.
586
+
587
+ :param WizComplianceItem compliance_item: Source compliance item
588
+ :param str control_id: Specific control ID to create finding for
589
+ :return: Integration finding for this specific control
590
+ :rtype: Optional[IntegrationFinding]
591
+ """
592
+ try:
593
+ control_labels = [control_id] if control_id else []
594
+ severity = self._map_severity(compliance_item.severity)
595
+ policy_name = self._get_policy_name(compliance_item)
596
+ title = f"{policy_name} ({control_id})" if control_id else policy_name
597
+ description = self._compose_description(policy_name, compliance_item)
598
+
599
+ finding = self._build_finding(
600
+ control_labels=control_labels,
601
+ title=title,
602
+ description=description,
603
+ severity=severity,
604
+ compliance_item=compliance_item,
605
+ )
606
+
607
+ # Set the specific control ID for this finding
608
+ finding.rule_id = control_id
609
+ finding.affected_controls = self._normalize_control_id_string(control_id)
610
+
611
+ # Ensure unique external_id for each control to prevent unwanted updates
612
+ finding.external_id = f"wiz-policy-control-{control_id.upper()}-{self.framework_id}"
613
+
614
+ self._set_assessment_id_if_available(finding, compliance_item)
615
+ return finding
616
+
617
+ except Exception as e:
618
+ logger.error(f"Error creating finding for control {control_id}: {e}")
619
+ return None
620
+
621
+ def _asset_exists_in_regscale(self, resource_id: str) -> bool:
622
+ """
623
+ Check if an asset with the given Wiz resource ID exists in RegScale.
624
+
625
+ :param str resource_id: Wiz resource ID to check (stored in RegScale asset wizId field)
626
+ :return: True if asset exists, False otherwise
627
+ :rtype: bool
628
+ """
629
+ if not resource_id:
630
+ return False
631
+
632
+ try:
633
+ # Check if we have a cached lookup of existing assets
634
+ if not hasattr(self, "_regscale_assets_by_wiz_id"):
635
+ self._load_regscale_assets()
636
+
637
+ return resource_id in self._regscale_assets_by_wiz_id
638
+ except Exception:
639
+ return False
640
+
641
+ def _load_regscale_assets(self) -> None:
642
+ """
643
+ Load all existing assets from RegScale into a Wiz ID-based lookup cache.
644
+ Wiz resource IDs are stored in the RegScale asset wizId field.
645
+ """
646
+ try:
647
+ logger.info("Loading existing assets from RegScale for asset existence checks...")
648
+ # Get all assets for the current plan
649
+ existing_assets = regscale_models.Asset.get_all_by_parent(
650
+ parent_id=self.plan_id,
651
+ parent_module=self.parent_module,
652
+ )
653
+
654
+ # Create Wiz ID-based lookup cache (Wiz resource ID -> RegScale asset)
655
+ self._regscale_assets_by_wiz_id = {asset.wizId: asset for asset in existing_assets if asset.wizId}
656
+ logger.info(f"Loaded {len(self._regscale_assets_by_wiz_id)} existing assets for lookup")
657
+
658
+ except Exception as e:
659
+ logger.error(f"Error loading RegScale assets: {e}")
660
+ # Initialize empty cache to avoid repeated failures
661
+ self._regscale_assets_by_wiz_id = {}
662
+
663
+ def _map_framework_id_to_name(self, framework_id: str) -> str:
664
+ """
665
+ Map framework ID to framework name.
666
+
667
+ :param str framework_id: Framework ID to map
668
+ :return: Human-readable framework name
669
+ :rtype: str
670
+ """
671
+ # Default mappings - will be enhanced with cached data
672
+ default_mappings = {
673
+ "wf-id-4": "NIST800-53R5",
674
+ "wf-id-48": "NIST800-53R4",
675
+ "wf-id-5": "FedRAMP",
676
+ }
677
+
678
+ return default_mappings.get(framework_id, framework_id)
679
+
680
+ def create_finding_from_compliance_item(self, compliance_item: ComplianceItem) -> Optional[IntegrationFinding]:
681
+ """
682
+ Create an IntegrationFinding from a failed compliance item with proper asset/issue matching.
683
+
684
+ :param ComplianceItem compliance_item: The compliance item
685
+ :return: IntegrationFinding or None
686
+ :rtype: Optional[IntegrationFinding]
687
+ """
688
+ if not isinstance(compliance_item, WizComplianceItem):
689
+ return super().create_finding_from_compliance_item(compliance_item)
690
+
691
+ try:
692
+ control_labels = self._get_control_labels(compliance_item)
693
+ severity = self._map_severity(compliance_item.severity)
694
+ policy_name = self._get_policy_name(compliance_item)
695
+ title = self._compose_title(policy_name, compliance_item)
696
+ description = self._compose_description(policy_name, compliance_item)
697
+ finding = self._build_finding(
698
+ control_labels=control_labels,
699
+ title=title,
700
+ description=description,
701
+ severity=severity,
702
+ compliance_item=compliance_item,
703
+ )
704
+ self._set_affected_controls(finding, compliance_item)
705
+ self._set_assessment_id_if_available(finding, compliance_item)
706
+ return finding
707
+ except Exception as e:
708
+ logger.error(f"Error creating finding from Wiz compliance item: {e}")
709
+ return None
710
+
711
+ # ---------- Private helpers (low-complexity building blocks) ----------
712
+
713
+ @staticmethod
714
+ def _get_control_labels(item: WizComplianceItem) -> List[str]:
715
+ """
716
+ Extract control labels from a Wiz compliance item.
717
+
718
+ :param WizComplianceItem item: Compliance item to extract labels from
719
+ :return: List of control labels
720
+ :rtype: List[str]
721
+ """
722
+ return [item.control_id] if item.control_id else []
723
+
724
+ @staticmethod
725
+ def _get_policy_name(item: WizComplianceItem) -> str:
726
+ """
727
+ Extract policy name from a Wiz compliance item.
728
+
729
+ :param WizComplianceItem item: Compliance item to extract policy name from
730
+ :return: Policy name or 'Unknown Policy' if not found
731
+ :rtype: str
732
+ """
733
+ return (item.policy.get("name") or "Unknown Policy").strip()
734
+
735
+ @staticmethod
736
+ def _compose_title(policy_name: str, item: WizComplianceItem) -> str:
737
+ """
738
+ Compose a finding title from policy name and control information.
739
+
740
+ :param str policy_name: Name of the policy
741
+ :param WizComplianceItem item: Compliance item with control information
742
+ :return: Formatted title for the finding
743
+ :rtype: str
744
+ """
745
+ return f"{policy_name} ({item.control_id})" if item.control_id else policy_name
746
+
747
+ def _compose_description(self, policy_name: str, item: WizComplianceItem) -> str:
748
+ """
749
+ Compose a detailed description for a compliance finding.
750
+
751
+ :param str policy_name: Name of the policy that failed
752
+ :param WizComplianceItem item: Compliance item with resource and policy details
753
+ :return: Formatted markdown description
754
+ :rtype: str
755
+ """
756
+ parts: List[str] = [
757
+ f"Policy compliance failure detected by Wiz for resource '{item.resource_name}'.",
758
+ "",
759
+ f"**Policy:** {policy_name}",
760
+ f"**Resource:** {item.resource_name} ({item.resource.get('type', 'Unknown')})",
761
+ f"**Control:** {item.control_id}",
762
+ f"**Framework:** {item.framework}",
763
+ f"**Result:** {item.result}",
764
+ ]
765
+
766
+ # Policy/Remediation details
767
+ policy_desc = item.policy.get("description") or item.policy.get("ruleDescription")
768
+ if policy_desc:
769
+ parts.extend(["", "**Policy Description:**", policy_desc])
770
+
771
+ remediation = item.policy.get("remediationInstructions")
772
+ if remediation:
773
+ parts.extend(["", "**Remediation Instructions:**", remediation])
774
+
775
+ # Location details
776
+ if item.resource.get("region"):
777
+ parts.append(f"**Region:** {item.resource['region']}")
778
+ if item.resource.get("subscription"):
779
+ sub = item.resource["subscription"]
780
+ parts.append(
781
+ f"**Cloud Provider:** {sub.get('cloudProvider', 'Unknown')} "
782
+ f"(Subscription: {sub.get('name', 'Unknown')})"
783
+ )
784
+
785
+ return "\n".join(parts)
786
+
787
+ def _build_finding(
788
+ self,
789
+ *,
790
+ control_labels: List[str],
791
+ title: str,
792
+ description: str,
793
+ severity: regscale_models.IssueSeverity,
794
+ compliance_item: WizComplianceItem,
795
+ ) -> IntegrationFinding:
796
+ """
797
+ Build an IntegrationFinding from compliance item components.
798
+
799
+ :param List[str] control_labels: List of control labels
800
+ :param str title: Finding title
801
+ :param str description: Finding description
802
+ :param regscale_models.IssueSeverity severity: Finding severity
803
+ :param WizComplianceItem compliance_item: Source compliance item
804
+ :return: Constructed integration finding
805
+ :rtype: IntegrationFinding
806
+ """
807
+ stable_rule = compliance_item.control_id or ""
808
+ return IntegrationFinding(
809
+ control_labels=control_labels,
810
+ title=f"Policy Compliance Failure: {title}" if compliance_item.is_fail else title,
811
+ category="Policy Compliance",
812
+ plugin_name=f"{self.title}",
813
+ severity=severity,
814
+ description=description,
815
+ status=regscale_models.IssueStatus.Open,
816
+ priority=self._map_severity_to_priority(severity),
817
+ plugin_id=f"policy-control:{self.framework_id}:{stable_rule}",
818
+ external_id=(
819
+ f"wiz-policy-{compliance_item.id}" if compliance_item.id else f"wiz-policy-control-{stable_rule}"
820
+ ),
821
+ identification="Security Control Assessment",
822
+ first_seen=self.scan_date,
823
+ last_seen=self.scan_date,
824
+ scan_date=self.scan_date,
825
+ asset_identifier=self._get_regscale_asset_identifier(compliance_item),
826
+ vulnerability_type="Policy Compliance Violation",
827
+ rule_id=compliance_item.control_id,
828
+ baseline=compliance_item.framework,
829
+ remediation=compliance_item.policy.get("remediationInstructions") or "",
830
+ )
831
+
832
+ def _set_affected_controls(self, finding: IntegrationFinding, item: WizComplianceItem) -> None:
833
+ """
834
+ Set the affected controls field on a finding from a compliance item.
835
+
836
+ :param IntegrationFinding finding: Finding to update
837
+ :param WizComplianceItem item: Compliance item with control information
838
+ :return: None
839
+ :rtype: None
840
+ """
841
+ if item.control_id:
842
+ finding.affected_controls = self._normalize_control_id_string(item.control_id)
843
+
844
+ def _set_assessment_id_if_available(self, finding: IntegrationFinding, item: WizComplianceItem) -> None:
845
+ """
846
+ Set the assessment ID on a finding if available from cached mappings.
847
+
848
+ :param IntegrationFinding finding: Finding to update with assessment ID
849
+ :param WizComplianceItem item: Compliance item with control information
850
+ :return: None
851
+ :rtype: None
852
+ """
853
+ try:
854
+ ctrl_norm = self._normalize_control_id_string(item.control_id)
855
+ if ctrl_norm and hasattr(self, "_impl_id_by_control"):
856
+ impl_id = self._impl_id_by_control.get(ctrl_norm)
857
+ if impl_id and hasattr(self, "_assessment_by_impl_today"):
858
+ assess = self._assessment_by_impl_today.get(impl_id)
859
+ if assess:
860
+ finding.assessment_id = assess.id
861
+ except Exception:
862
+ pass
863
+
864
+ def create_asset_from_compliance_item(self, compliance_item: ComplianceItem) -> Optional[IntegrationAsset]:
865
+ """
866
+ Create an IntegrationAsset from a Wiz compliance item with enhanced metadata.
867
+
868
+ :param ComplianceItem compliance_item: The compliance item
869
+ :return: IntegrationAsset or None
870
+ :rtype: Optional[IntegrationAsset]
871
+ """
872
+ if not isinstance(compliance_item, WizComplianceItem):
873
+ return super().create_asset_from_compliance_item(compliance_item)
874
+
875
+ try:
876
+ resource = compliance_item.resource
877
+ asset_type = self._map_resource_type_to_asset_type(compliance_item)
878
+
879
+ # Build asset description with cloud metadata
880
+ description_parts = [
881
+ "Cloud resource from Wiz compliance scan",
882
+ f"Type: {resource.get('type', 'Unknown')}",
883
+ ]
884
+
885
+ if resource.get("region"):
886
+ description_parts.append(f"Region: {resource['region']}")
887
+
888
+ if resource.get("subscription"):
889
+ sub = resource["subscription"]
890
+ description_parts.append(
891
+ f"Cloud Provider: {sub.get('cloudProvider', 'Unknown')} "
892
+ f"(Subscription: {sub.get('name', 'Unknown')})"
893
+ )
894
+
895
+ # Add tags if available
896
+ tags = resource.get("tags", [])
897
+ if tags:
898
+ tag_strings = [f"{tag.get('key')}:{tag.get('value')}" for tag in tags if tag.get("key")]
899
+ if tag_strings:
900
+ description_parts.append(f"Tags: {', '.join(tag_strings)}")
901
+
902
+ # Get user ID directly from application config
903
+ app = Application()
904
+ config = app.config
905
+ user_id = config.get("userId")
906
+
907
+ asset = IntegrationAsset(
908
+ name=compliance_item.resource_name,
909
+ identifier=f"{compliance_item.resource_name} ({compliance_item.resource_id})",
910
+ external_id=compliance_item.resource_id,
911
+ other_tracking_number=compliance_item.resource_id, # For deduplication
912
+ asset_type=asset_type,
913
+ asset_category=regscale_models.AssetCategory.Hardware,
914
+ description="\n".join(description_parts),
915
+ parent_id=self.plan_id,
916
+ parent_module=self.parent_module,
917
+ status=regscale_models.AssetStatus.Active,
918
+ date_last_updated=self.scan_date,
919
+ notes=self._create_asset_notes(compliance_item),
920
+ # Set asset owner ID from config
921
+ asset_owner_id=user_id,
922
+ # Enable component mapping flow downstream
923
+ component_names=[],
924
+ )
925
+
926
+ return asset
927
+
928
+ except Exception as e:
929
+ logger.error(f"Error creating asset from Wiz compliance item: {e}")
930
+ return None
931
+
932
+ def create_scan_history(self): # type: ignore[override]
933
+ """No scan history created for compliance report ingest."""
934
+ return None
935
+
936
+ def _create_asset_notes(self, compliance_item: WizComplianceItem) -> str:
937
+ """
938
+ Create detailed notes for asset with compliance context.
939
+
940
+ :param WizComplianceItem compliance_item: Compliance item with resource details
941
+ :return: Formatted asset notes in markdown
942
+ :rtype: str
943
+ """
944
+ resource = compliance_item.resource
945
+ notes_parts = [
946
+ "# Wiz Asset Details",
947
+ f"**Resource ID:** {compliance_item.resource_id}",
948
+ f"**Resource Type:** {resource.get('type', 'Unknown')}",
949
+ ]
950
+
951
+ # Add subscription details
952
+ if resource.get("subscription"):
953
+ sub = resource["subscription"]
954
+ notes_parts.extend(
955
+ [
956
+ "",
957
+ "## Cloud Provider Details",
958
+ f"**Provider:** {sub.get('cloudProvider', 'Unknown')}",
959
+ f"**Subscription Name:** {sub.get('name', 'Unknown')}",
960
+ f"**Subscription ID:** {sub.get('externalId', 'Unknown')}",
961
+ ]
962
+ )
963
+
964
+ # Add compliance summary
965
+ total_items = len(self.asset_compliance_map.get(compliance_item.resource_id, []))
966
+ failed_items = len(
967
+ [
968
+ item
969
+ for item in self.asset_compliance_map.get(compliance_item.resource_id, [])
970
+ if item.compliance_result in self.FAIL_STATUSES
971
+ ]
972
+ )
973
+
974
+ if total_items > 0:
975
+ notes_parts.extend(
976
+ [
977
+ "",
978
+ "## Compliance Summary",
979
+ f"**Total Assessments:** {total_items}",
980
+ f"**Failed Assessments:** {failed_items}",
981
+ f"**Compliance Rate:** {((total_items - failed_items) / total_items * 100):.1f}%",
982
+ ]
983
+ )
984
+
985
+ return "\n".join(notes_parts)
986
+
987
+ def authenticate_wiz(self) -> str:
988
+ """
989
+ Authenticate with Wiz and return access token.
990
+
991
+ :return: Wiz access token
992
+ :rtype: str
993
+ """
994
+ logger.info("Authenticating with Wiz...")
995
+ try:
996
+ token = wiz_authenticate(client_id=self.client_id, client_secret=self.client_secret)
997
+ if not token:
998
+ error_and_exit("Failed to authenticate with Wiz")
999
+
1000
+ # Get Wiz endpoint from config
1001
+ app = check_license()
1002
+ config = app.config
1003
+ self.wiz_endpoint = config.get("wizUrl", "")
1004
+ if not self.wiz_endpoint:
1005
+ error_and_exit("No Wiz URL found in configuration")
1006
+
1007
+ self.access_token = token
1008
+ logger.info("Successfully authenticated with Wiz")
1009
+ return token
1010
+
1011
+ except Exception as e:
1012
+ logger.error(f"Wiz authentication failed: {str(e)}")
1013
+ error_and_exit(f"Wiz authentication failed: {str(e)}")
1014
+
1015
+ def _fetch_policy_assessments_from_wiz(self) -> List[Dict[str, Any]]:
1016
+ """
1017
+ Fetch policy assessments from Wiz GraphQL API.
1018
+
1019
+ :return: List of raw policy assessment data
1020
+ :rtype: List[Dict[str, Any]]
1021
+ """
1022
+ logger.info("Fetching policy assessments from Wiz...")
1023
+
1024
+ # Authenticate if not already done
1025
+ if not self.access_token:
1026
+ self.authenticate_wiz()
1027
+
1028
+ headers = self._build_wiz_headers()
1029
+ session = self._prepare_wiz_requests_session()
1030
+
1031
+ # Try cache first unless forced refresh
1032
+ cached_nodes = self._load_assessments_from_cache()
1033
+ if cached_nodes is not None:
1034
+ logger.info("Using cached Wiz policy assessments")
1035
+ return cached_nodes
1036
+
1037
+ # Only include variables supported by the query (avoid validation errors)
1038
+ page_size = 100
1039
+ base_variables = {"first": page_size}
1040
+
1041
+ # Try multiple filter key variants to avoid schema differences across tenants
1042
+ filter_variants = [
1043
+ {"project": [self.wiz_project_id]},
1044
+ {"projectId": [self.wiz_project_id]},
1045
+ {"projects": [self.wiz_project_id]},
1046
+ {}, # Empty filterBy
1047
+ None, # Omit filterBy entirely
1048
+ ]
1049
+
1050
+ # First, try async client (unit tests patch this path)
1051
+ try:
1052
+ from regscale.integrations.commercial.wizv2.utils import compliance_job_progress
1053
+
1054
+ with compliance_job_progress:
1055
+ task = compliance_job_progress.add_task(
1056
+ f"[#f68d1f]Fetching Wiz policy assessments (async, page size: {page_size})...",
1057
+ total=1,
1058
+ )
1059
+ results = run_async_queries(
1060
+ endpoint=self.wiz_endpoint or WIZ_URL,
1061
+ headers=headers,
1062
+ query_configs=[
1063
+ {
1064
+ "type": WizVulnerabilityType.CONFIGURATION,
1065
+ "query": WIZ_POLICY_QUERY,
1066
+ "topic_key": "policyAssessments",
1067
+ "variables": {"first": page_size},
1068
+ }
1069
+ ],
1070
+ progress_tracker=compliance_job_progress,
1071
+ max_concurrent=1,
1072
+ )
1073
+ compliance_job_progress.update(task, completed=1, advance=1)
1074
+ if results and len(results) == 1 and not results[0][2]:
1075
+ nodes = results[0][1] or []
1076
+ filtered = self._filter_nodes_to_framework(nodes)
1077
+ self._write_assessments_cache(filtered)
1078
+ return filtered
1079
+ except Exception:
1080
+ # Fall back to requests-based method below
1081
+ pass
1082
+
1083
+ filtered_nodes = self._fetch_assessments_with_variants(
1084
+ session=session,
1085
+ headers=headers,
1086
+ base_variables=base_variables,
1087
+ page_size=page_size,
1088
+ filter_variants=filter_variants,
1089
+ )
1090
+ self._write_assessments_cache(filtered_nodes)
1091
+ return filtered_nodes
1092
+
1093
+ def _build_wiz_headers(self) -> Dict[str, str]:
1094
+ """
1095
+ Build HTTP headers for Wiz GraphQL API requests.
1096
+
1097
+ :return: Dictionary of HTTP headers including authorization
1098
+ :rtype: Dict[str, str]
1099
+ """
1100
+ return {
1101
+ "Authorization": f"Bearer {self.access_token}",
1102
+ "Content-Type": "application/json",
1103
+ }
1104
+
1105
+ def _prepare_wiz_requests_session(self):
1106
+ """
1107
+ Prepare a requests session with retry logic for Wiz API calls.
1108
+
1109
+ :return: Configured requests session with retry adapter
1110
+ :rtype: requests.Session
1111
+ """
1112
+ import requests
1113
+ from requests.adapters import HTTPAdapter
1114
+ from urllib3.util.retry import Retry
1115
+
1116
+ session = requests.Session()
1117
+ retry = Retry(
1118
+ total=5,
1119
+ connect=5,
1120
+ read=5,
1121
+ backoff_factor=0.5,
1122
+ status_forcelist=[429, 500, 502, 503, 504],
1123
+ allowed_methods=["POST"],
1124
+ )
1125
+ adapter = HTTPAdapter(max_retries=retry)
1126
+ session.mount("https://", adapter)
1127
+ session.mount("http://", adapter)
1128
+ return session
1129
+
1130
+ def _fetch_assessments_with_variants(
1131
+ self,
1132
+ *,
1133
+ session,
1134
+ headers: Dict[str, str],
1135
+ base_variables: Dict[str, Any],
1136
+ page_size: int,
1137
+ filter_variants: List[Optional[Dict[str, Any]]],
1138
+ ) -> List[Dict[str, Any]]:
1139
+ from regscale.integrations.commercial.wizv2.utils import compliance_job_progress
1140
+
1141
+ last_error: Optional[Exception] = None
1142
+
1143
+ # In unit tests, the async client is patched and we should not hit network.
1144
+
1145
+ with compliance_job_progress:
1146
+ task = compliance_job_progress.add_task(
1147
+ f"[#f68d1f]Fetching Wiz policy assessments (page size: {page_size})...",
1148
+ total=None,
1149
+ )
1150
+ for fv in filter_variants:
1151
+ try:
1152
+ # If endpoint is not set (tests), short-circuit to async path mock
1153
+ if not self.wiz_endpoint:
1154
+ results = run_async_queries(
1155
+ endpoint=WIZ_URL,
1156
+ headers=headers,
1157
+ query_configs=[
1158
+ {
1159
+ "type": WizVulnerabilityType.CONFIGURATION,
1160
+ "query": WIZ_POLICY_QUERY,
1161
+ "topic_key": "policyAssessments",
1162
+ "variables": {**base_variables, **({"filterBy": fv} if fv is not None else {})},
1163
+ }
1164
+ ],
1165
+ progress_tracker=compliance_job_progress,
1166
+ max_concurrent=1,
1167
+ )
1168
+ # Expected mocked structure: [(type, nodes, error)]
1169
+ if results and len(results) == 1 and not results[0][2]:
1170
+ nodes = results[0][1] or []
1171
+ return self._filter_nodes_to_framework(nodes)
1172
+
1173
+ return self._fetch_with_filter_variant(
1174
+ session=session,
1175
+ headers=headers,
1176
+ base_variables=base_variables,
1177
+ filter_variant=fv,
1178
+ page_size=page_size,
1179
+ progress=compliance_job_progress,
1180
+ task=task,
1181
+ )
1182
+ except Exception as exc: # noqa: BLE001 - propagate last error
1183
+ last_error = exc
1184
+
1185
+ msg = f"Failed to fetch policy assessments after trying all filter variants: {last_error}"
1186
+ logger.error(msg)
1187
+ error_and_exit(msg)
1188
+
1189
+ def _variant_name(self, fv: Optional[Dict[str, Any]]) -> str:
1190
+ """
1191
+ Get a human-readable name for a filter variant.
1192
+
1193
+ :param Optional[Dict[str, Any]] fv: Filter variant dictionary
1194
+ :return: Human-readable variant name
1195
+ :rtype: str
1196
+ """
1197
+ if fv is None:
1198
+ return "omitted"
1199
+ if fv == {}:
1200
+ return "empty"
1201
+ try:
1202
+ return next(iter(fv.keys()))
1203
+ except Exception:
1204
+ return "unknown"
1205
+
1206
+ def _fetch_with_filter_variant(
1207
+ self,
1208
+ *,
1209
+ session,
1210
+ headers: Dict[str, str],
1211
+ base_variables: Dict[str, Any],
1212
+ filter_variant: Optional[Dict[str, Any]],
1213
+ page_size: int,
1214
+ progress,
1215
+ task,
1216
+ ) -> List[Dict[str, Any]]:
1217
+ variant_name = self._variant_name(filter_variant)
1218
+ progress.update(
1219
+ task,
1220
+ description=(f"[#f68d1f]Fetching Wiz policy assessments (limit: {page_size}, variant: {variant_name})..."),
1221
+ advance=1,
1222
+ )
1223
+
1224
+ variables = base_variables.copy() if filter_variant is None else {**base_variables, "filterBy": filter_variant}
1225
+
1226
+ def on_page(page_idx: int, page_count: int, total_nodes: int) -> None:
1227
+ progress.update(
1228
+ task,
1229
+ description=(
1230
+ f"[cyan]Fetching policy assessments: page {page_idx}, "
1231
+ f"fetched {total_nodes} nodes (last page: {page_count})"
1232
+ ),
1233
+ advance=1,
1234
+ )
1235
+
1236
+ nodes = self._execute_wiz_policy_query_paginated(
1237
+ session=session, headers=headers, variables=variables, on_page=on_page
1238
+ )
1239
+ filtered_nodes = self._filter_nodes_to_framework(nodes)
1240
+ progress.update(
1241
+ task,
1242
+ description=f"[green]✓ Completed Wiz policy assessments: {len(filtered_nodes)} nodes",
1243
+ completed=1,
1244
+ total=1,
1245
+ )
1246
+ logger.info("Successfully fetched Wiz policy assessments")
1247
+
1248
+ return filtered_nodes
1249
+
1250
+ def _execute_wiz_policy_query_paginated(
1251
+ self,
1252
+ *,
1253
+ session,
1254
+ headers: Dict[str, str],
1255
+ variables: Dict[str, Any],
1256
+ on_page=None,
1257
+ ) -> List[Dict[str, Any]]:
1258
+ import requests
1259
+
1260
+ nodes: List[Dict[str, Any]] = []
1261
+ after_cursor: Optional[str] = variables.get("after")
1262
+ page_index = 0
1263
+ while True:
1264
+ payload_vars = variables.copy()
1265
+ payload_vars["after"] = after_cursor
1266
+ payload = {"query": WIZ_POLICY_QUERY, "variables": payload_vars}
1267
+ resp = session.post(self.wiz_endpoint, json=payload, headers=headers, timeout=300)
1268
+ if resp.status_code >= 400:
1269
+ raise requests.HTTPError(f"{resp.status_code} {resp.text[:500]}")
1270
+ data = resp.json()
1271
+ if "errors" in data:
1272
+ raise RuntimeError(str(data["errors"]))
1273
+ topic = data.get("data", {}).get("policyAssessments", {})
1274
+ page_nodes = topic.get("nodes", [])
1275
+ page_info = topic.get("pageInfo", {})
1276
+ nodes.extend(page_nodes)
1277
+ page_index += 1
1278
+ if on_page:
1279
+ try:
1280
+ on_page(page_index, len(page_nodes), len(nodes))
1281
+ except Exception:
1282
+ pass
1283
+ has_next = page_info.get("hasNextPage", False)
1284
+ after_cursor = page_info.get("endCursor")
1285
+ if not has_next:
1286
+ break
1287
+ return nodes
1288
+
1289
+ def _filter_nodes_to_framework(self, nodes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
1290
+ filtered_nodes: List[Dict[str, Any]] = []
1291
+ for n in nodes:
1292
+ try:
1293
+ subcats = ((n or {}).get("policy") or {}).get("securitySubCategories", [])
1294
+ # If no subcategories info is present, include the node (cannot evaluate framework)
1295
+ if not subcats:
1296
+ filtered_nodes.append(n)
1297
+ continue
1298
+ if any((sc.get("category", {}).get("framework", {}).get("id") == self.framework_id) for sc in subcats):
1299
+ filtered_nodes.append(n)
1300
+ except Exception:
1301
+ filtered_nodes.append(n)
1302
+ return filtered_nodes
1303
+
1304
+ def _get_assessments_cache_path(self) -> str:
1305
+ """
1306
+ Get the file path for policy assessments cache.
1307
+
1308
+ :return: Full path to cache file
1309
+ :rtype: str
1310
+ """
1311
+ try:
1312
+ os.makedirs(self.policy_cache_dir, exist_ok=True)
1313
+ except Exception:
1314
+ pass
1315
+ return self.policy_cache_file
1316
+
1317
+ def _load_assessments_from_cache(self) -> Optional[List[Dict[str, Any]]]:
1318
+ """
1319
+ Load policy assessments from cache file if valid and within TTL.
1320
+
1321
+ :return: Cached assessment nodes or None if cache is invalid/expired
1322
+ :rtype: Optional[List[Dict[str, Any]]]
1323
+ """
1324
+ if self.force_refresh or self.cache_duration_minutes <= 0:
1325
+ return None
1326
+ try:
1327
+ path = self._get_assessments_cache_path()
1328
+ if not os.path.exists(path):
1329
+ return None
1330
+ # File age check
1331
+ max_age_seconds = max(0, int(self.cache_duration_minutes)) * 60
1332
+ age = max(0.0, (datetime.now().timestamp() - os.path.getmtime(path)))
1333
+ if age > max_age_seconds:
1334
+ return None
1335
+ with open(path, "r", encoding="utf-8") as f:
1336
+ data = json.load(f)
1337
+ nodes = data.get("nodes") or data.get("assessments") or []
1338
+ # Defensive: ensure list
1339
+ if not isinstance(nodes, list):
1340
+ return None
1341
+ return nodes
1342
+ except Exception:
1343
+ return None
1344
+
1345
+ def _write_assessments_cache(self, nodes: List[Dict[str, Any]]) -> None:
1346
+ """
1347
+ Write policy assessment nodes to cache file.
1348
+
1349
+ :param List[Dict[str, Any]] nodes: Assessment nodes to cache
1350
+ :return: None
1351
+ :rtype: None
1352
+ """
1353
+ # Only write cache when enabled
1354
+ if self.cache_duration_minutes <= 0:
1355
+ return None
1356
+ try:
1357
+ path = self._get_assessments_cache_path()
1358
+ payload = {
1359
+ "timestamp": datetime.now().isoformat(),
1360
+ "wiz_project_id": self.wiz_project_id,
1361
+ "framework_id": self.framework_id,
1362
+ "nodes": nodes,
1363
+ }
1364
+ with open(path, "w", encoding="utf-8") as f:
1365
+ json.dump(payload, f, ensure_ascii=False)
1366
+ except Exception:
1367
+ # Cache write failures should not interrupt flow
1368
+ pass
1369
+
1370
+ def write_policy_data_to_json(self) -> str:
1371
+ """
1372
+ Write policy assessment data to JSON and JSONL files with timestamp.
1373
+
1374
+ :return: Path to the written JSON file
1375
+ :rtype: str
1376
+ """
1377
+ # Create artifacts/wiz directory if it doesn't exist
1378
+ artifacts_dir = os.path.join("artifacts", "wiz")
1379
+ os.makedirs(artifacts_dir, exist_ok=True)
1380
+
1381
+ # Generate timestamped filename
1382
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1383
+ filename_json = f"policy_compliance_report_{timestamp}.json"
1384
+ filename_jsonl = f"policy_compliance_report_{timestamp}.jsonl"
1385
+ file_path = os.path.join(artifacts_dir, filename_json)
1386
+ file_path_jsonl = os.path.join(artifacts_dir, filename_jsonl)
1387
+
1388
+ # Prepare data for JSON export
1389
+ export_data = {
1390
+ "metadata": {
1391
+ "timestamp": timestamp,
1392
+ "wiz_project_id": self.wiz_project_id,
1393
+ "framework_id": self.framework_id,
1394
+ "framework_name": self.get_framework_name(self.framework_id),
1395
+ "total_assessments": len(self.all_compliance_items),
1396
+ "pass_count": len(self.all_compliance_items) - len(self.failed_compliance_items),
1397
+ "fail_count": len(self.failed_compliance_items),
1398
+ "unique_controls": len({item.control_id for item in self.all_compliance_items if item.control_id}),
1399
+ },
1400
+ "framework_mapping": self.framework_mapping,
1401
+ "policy_assessments": [],
1402
+ }
1403
+
1404
+ # Convert compliance items to serializable format
1405
+ for compliance_item in self.all_compliance_items:
1406
+ if isinstance(compliance_item, WizComplianceItem):
1407
+ # Filter policy subcategories to only the selected framework to avoid noise
1408
+ filtered_policy = dict(compliance_item.policy) if compliance_item.policy else {}
1409
+ if filtered_policy:
1410
+ subcats = filtered_policy.get("securitySubCategories", [])
1411
+ if subcats:
1412
+ target_framework_id = self.framework_id
1413
+ filtered_subcats = [
1414
+ sc
1415
+ for sc in subcats
1416
+ if sc.get("category", {}).get("framework", {}).get("id") == target_framework_id
1417
+ ]
1418
+ if filtered_subcats:
1419
+ filtered_policy["securitySubCategories"] = filtered_subcats
1420
+ else:
1421
+ # If filter removes all, keep original to retain context
1422
+ pass
1423
+ assessment_data = {
1424
+ "id": compliance_item.id,
1425
+ "result": compliance_item.result,
1426
+ "control_id": compliance_item.control_id,
1427
+ "framework_name": compliance_item.framework,
1428
+ "framework_id": compliance_item.framework_id,
1429
+ "policy": filtered_policy or compliance_item.policy,
1430
+ "resource": compliance_item.resource,
1431
+ "output": compliance_item.output,
1432
+ }
1433
+ export_data["policy_assessments"].append(assessment_data)
1434
+
1435
+ # Write to JSON and JSONL files
1436
+ try:
1437
+ with open(file_path, "w", encoding="utf-8") as f:
1438
+ json.dump(export_data, f, indent=2, ensure_ascii=False)
1439
+
1440
+ logger.info(f"Policy compliance data written to: {file_path}")
1441
+ # JSONL: aggregated by control_id (optional)
1442
+ if getattr(self, "write_jsonl_output", False):
1443
+ control_agg = self._build_control_aggregation()
1444
+ with open(file_path_jsonl, "w", encoding="utf-8") as jf:
1445
+ for control_id, ctrl in control_agg.items():
1446
+ jf.write(json.dumps(ctrl, ensure_ascii=False) + "\n")
1447
+ logger.info(f"Policy compliance JSONL written to: {file_path_jsonl}")
1448
+ self._cleanup_artifacts(artifacts_dir, keep=CACHE_CLEANUP_KEEP_COUNT)
1449
+ return file_path
1450
+
1451
+ except Exception as e:
1452
+ error_and_exit(f"Failed to write policy data to JSON: {str(e)}")
1453
+
1454
+ def _build_control_aggregation(self) -> Dict[str, Dict[str, Any]]:
1455
+ """
1456
+ Build an aggregated view per control_id for JSONL export.
1457
+
1458
+ Creates a control-centric view with assets affected and policy checks.
1459
+
1460
+ :return: Dictionary mapping control IDs to aggregated data
1461
+ :rtype: Dict[str, Dict[str, Any]]
1462
+
1463
+ {
1464
+ control_id: {
1465
+ "control_id": "AC-2(1)",
1466
+ "framework_id": "wf-id-4",
1467
+ "framework_name": "NIST SP 800-53 Revision 5",
1468
+ "failed": true,
1469
+ "assets_affected": [
1470
+ {
1471
+ "resource_id": "...",
1472
+ "resource_name": "...",
1473
+ "resource_type": "...",
1474
+ "region": "...",
1475
+ "subscription": "...",
1476
+ "checks": [
1477
+ {"title": "Policy name", "result": "FAIL", "remediation": "..."}
1478
+ ]
1479
+ }
1480
+ ]
1481
+ }
1482
+ }
1483
+ """
1484
+ control_map: Dict[str, Dict[str, Any]] = {}
1485
+
1486
+ for item in self.all_compliance_items:
1487
+ if not isinstance(item, WizComplianceItem):
1488
+ # Skip non-wiz items in this aggregation
1489
+ continue
1490
+
1491
+ ctrl_id = self._normalize_control_id_string(item.control_id)
1492
+ if not ctrl_id:
1493
+ continue
1494
+
1495
+ ctrl_entry = control_map.get(ctrl_id)
1496
+ if not ctrl_entry:
1497
+ ctrl_entry = {
1498
+ "control_id": ctrl_id,
1499
+ "framework_id": self.framework_id,
1500
+ "framework_name": self.get_framework_name(self.framework_id),
1501
+ "failed": False,
1502
+ "assets_affected": [],
1503
+ }
1504
+ # Track assets in a dict for dedupe while building, convert to list at end
1505
+ ctrl_entry["_assets_idx"] = {}
1506
+ control_map[ctrl_id] = ctrl_entry
1507
+
1508
+ # Determine fail/pass at control level
1509
+ if item.compliance_result in self.FAIL_STATUSES:
1510
+ ctrl_entry["failed"] = True
1511
+
1512
+ # Asset bucket
1513
+ asset_id = item.resource_id
1514
+ assets_idx: Dict[str, Any] = ctrl_entry["_assets_idx"] # type: ignore
1515
+ asset_entry = assets_idx.get(asset_id)
1516
+ if not asset_entry:
1517
+ asset_entry = {
1518
+ "resource_id": item.resource_id,
1519
+ "resource_name": item.resource_name,
1520
+ "resource_type": (item.resource or {}).get("type"),
1521
+ "region": (item.resource or {}).get("region"),
1522
+ "subscription": ((item.resource or {}).get("subscription") or {}).get("name"),
1523
+ "checks": [],
1524
+ }
1525
+ assets_idx[asset_id] = asset_entry
1526
+
1527
+ # Append policy check info
1528
+ policy_name = (item.policy or {}).get("name") or (item.policy or {}).get("hostConfigurationRule", {}).get(
1529
+ "name"
1530
+ )
1531
+ remediation = (item.policy or {}).get("remediationInstructions")
1532
+ if policy_name:
1533
+ # Deduplicate identical checks by title within an asset
1534
+ titles = {c.get("title") for c in asset_entry["checks"]}
1535
+ if policy_name not in titles:
1536
+ check = {
1537
+ "title": policy_name,
1538
+ "result": item.compliance_result,
1539
+ "remediation": remediation,
1540
+ }
1541
+ asset_entry["checks"].append(check)
1542
+
1543
+ # Convert asset index maps to lists for final output
1544
+ for ctrl in control_map.values():
1545
+ assets_idx = ctrl.pop("_assets_idx", {}) # type: ignore
1546
+ ctrl["assets_affected"] = list(assets_idx.values())
1547
+
1548
+ return control_map
1549
+
1550
+ @staticmethod
1551
+ def _normalize_control_id_string(control_id: Optional[str]) -> Optional[str]:
1552
+ """
1553
+ Normalize control id variants to a canonical form, e.g. 'AC-4(2)'.
1554
+ Accepts 'ac-4 (2)', 'AC-4-2', 'AC-4(2)'. Returns uppercase base with optional '(sub)'.
1555
+ """
1556
+ if not control_id:
1557
+ return None
1558
+ cid = control_id.strip()
1559
+ # Use precompiled safe regex to avoid catastrophic backtracking on crafted input
1560
+ m = SAFE_CONTROL_ID_RE.match(cid)
1561
+ if not m:
1562
+ return cid.upper()
1563
+ base = m.group(1).upper()
1564
+ # Subcontrol may be captured in group 2, 3, or 4 depending on the branch matched
1565
+ sub = m.group(2) or m.group(3) or m.group(4)
1566
+ return f"{base}({sub})" if sub else base
1567
+
1568
+ @staticmethod
1569
+ def parse_control_jsonl(jsonl_path: str) -> Dict[str, Dict[str, Any]]:
1570
+ """
1571
+ Parse the aggregated control JSONL back into a dict keyed by control_id.
1572
+ """
1573
+ aggregated: Dict[str, Dict[str, Any]] = {}
1574
+ try:
1575
+ with open(jsonl_path, "r", encoding="utf-8") as jf:
1576
+ for line in jf:
1577
+ line = line.strip()
1578
+ if not line:
1579
+ continue
1580
+ obj = json.loads(line)
1581
+ ctrl_id = obj.get("control_id")
1582
+ if ctrl_id:
1583
+ aggregated[ctrl_id] = obj
1584
+ except Exception as exc:
1585
+ logger.error(f"Error parsing JSONL {jsonl_path}: {exc}")
1586
+ return aggregated
1587
+
1588
+ def _cleanup_artifacts(self, dir_path: str, keep: int = CACHE_CLEANUP_KEEP_COUNT) -> None:
1589
+ """
1590
+ Keep the most recent JSON and JSONL policy_compliance_report files, delete older ones.
1591
+
1592
+ :param str dir_path: Directory containing artifacts to clean
1593
+ :param int keep: Number of most recent files per extension to keep
1594
+ :return: None
1595
+ :rtype: None
1596
+ """
1597
+ try:
1598
+ entries = [
1599
+ (f, os.path.join(dir_path, f))
1600
+ for f in os.listdir(dir_path)
1601
+ if f.startswith("policy_compliance_report_")
1602
+ and (f.endswith(JSON_FILE_EXT) or f.endswith(JSONL_FILE_EXT))
1603
+ ]
1604
+ # Group by extension to keep per-type
1605
+ by_ext: Dict[str, List[tuple[str, str]]] = {JSON_FILE_EXT: [], JSONL_FILE_EXT: []}
1606
+ for name, path in entries:
1607
+ ext = JSONL_FILE_EXT if name.endswith(JSONL_FILE_EXT) else JSON_FILE_EXT
1608
+ by_ext[ext].append((name, path))
1609
+
1610
+ for ext, files in by_ext.items():
1611
+ files.sort(key=lambda p: os.path.getmtime(p[1]), reverse=True)
1612
+ for _, old_path in files[keep:]:
1613
+ try:
1614
+ os.remove(old_path)
1615
+ except Exception:
1616
+ # Non-fatal; continue cleanup
1617
+ pass
1618
+ except Exception:
1619
+ pass
1620
+
1621
+ def load_or_create_framework_mapping(self) -> Dict[str, str]:
1622
+ """
1623
+ Load framework mapping from cache file or create it by fetching from Wiz.
1624
+
1625
+ :return: Framework ID to name mapping dictionary
1626
+ :rtype: Dict[str, str]
1627
+ """
1628
+ # Check if cache file exists
1629
+ if os.path.exists(self.framework_cache_file):
1630
+ logger.info("Loading framework mapping from cache file")
1631
+ return self._load_framework_mapping_from_cache()
1632
+
1633
+ logger.info("Framework mapping cache not found, fetching from Wiz API")
1634
+ return self._fetch_and_cache_framework_mapping()
1635
+
1636
+ def _load_framework_mapping_from_cache(self) -> Dict[str, str]:
1637
+ """
1638
+ Load framework mapping from existing JSON cache file.
1639
+
1640
+ :return: Framework ID to name mapping
1641
+ :rtype: Dict[str, str]
1642
+ """
1643
+ try:
1644
+ with open(self.framework_cache_file, "r", encoding="utf-8") as f:
1645
+ cache_data = json.load(f)
1646
+
1647
+ framework_mapping = cache_data.get("framework_mapping", {})
1648
+ cache_timestamp = cache_data.get("timestamp", "")
1649
+
1650
+ logger.info(f"Loaded {len(framework_mapping)} frameworks from cache (created: {cache_timestamp})")
1651
+ self.framework_mapping = framework_mapping
1652
+ return framework_mapping
1653
+
1654
+ except Exception as e:
1655
+ logger.error(f"Error loading framework mapping from cache: {str(e)}")
1656
+ logger.info("Falling back to fetching fresh framework data")
1657
+ return self._fetch_and_cache_framework_mapping()
1658
+
1659
+ def _fetch_and_cache_framework_mapping(self) -> Dict[str, str]:
1660
+ """
1661
+ Fetch framework data from Wiz API and cache it to JSON file.
1662
+
1663
+ :return: Framework ID to name mapping
1664
+ :rtype: Dict[str, str]
1665
+ """
1666
+ frameworks = self._fetch_security_frameworks()
1667
+ framework_mapping = self._create_framework_mapping(frameworks)
1668
+ self._write_framework_mapping_to_json(framework_mapping, frameworks)
1669
+
1670
+ self.framework_mapping = framework_mapping
1671
+ return framework_mapping
1672
+
1673
+ def _fetch_security_frameworks(self) -> List[Dict[str, Any]]:
1674
+ """
1675
+ Fetch security frameworks from Wiz GraphQL API.
1676
+
1677
+ :return: List of framework data
1678
+ :rtype: List[Dict[str, Any]]
1679
+ """
1680
+ logger.info("Fetching security frameworks from Wiz...")
1681
+
1682
+ # Authenticate if not already done
1683
+ if not self.access_token:
1684
+ self.authenticate_wiz()
1685
+
1686
+ headers = {
1687
+ "Authorization": f"Bearer {self.access_token}",
1688
+ "Content-Type": "application/json",
1689
+ }
1690
+
1691
+ query_config = {
1692
+ "type": WizVulnerabilityType.CONFIGURATION, # Using existing enum type
1693
+ "query": WIZ_FRAMEWORK_QUERY,
1694
+ "topic_key": "securityFrameworks",
1695
+ "variables": {"first": 200, "filterBy": {}}, # Get all frameworks, no filtering
1696
+ }
1697
+
1698
+ try:
1699
+ # Execute the query using async client with visible progress
1700
+ from regscale.integrations.commercial.wizv2.utils import compliance_job_progress
1701
+
1702
+ with compliance_job_progress:
1703
+ task = compliance_job_progress.add_task("[#f68d1f]Fetching Wiz security frameworks...", total=1)
1704
+ results = run_async_queries(
1705
+ endpoint=self.wiz_endpoint,
1706
+ headers=headers,
1707
+ query_configs=[query_config],
1708
+ progress_tracker=compliance_job_progress,
1709
+ max_concurrent=1,
1710
+ )
1711
+ compliance_job_progress.update(task, completed=1, advance=1)
1712
+
1713
+ if not results or len(results) == 0:
1714
+ logger.warning("No framework results returned from Wiz")
1715
+ return []
1716
+
1717
+ _, nodes, error = results[0]
1718
+
1719
+ if error:
1720
+ logger.error(f"Error fetching security frameworks: {error}")
1721
+ error_and_exit(f"Error fetching security frameworks: {error}")
1722
+
1723
+ logger.info(f"Successfully fetched {len(nodes)} security frameworks")
1724
+ return nodes
1725
+
1726
+ except Exception as e:
1727
+ error_and_exit(f"Failed to fetch security frameworks: {str(e)}")
1728
+
1729
+ def _create_framework_mapping(self, frameworks: List[Dict[str, Any]]) -> Dict[str, str]:
1730
+ """
1731
+ Create framework ID to name mapping from framework data.
1732
+
1733
+ :param List[Dict[str, Any]] frameworks: Raw framework data from Wiz API
1734
+ :return: Dictionary mapping framework IDs to human-readable names
1735
+ :rtype: Dict[str, str]
1736
+ """
1737
+ framework_mapping = {}
1738
+
1739
+ for framework in frameworks:
1740
+ framework_id = framework.get("id")
1741
+ framework_name = framework.get("name")
1742
+
1743
+ if framework_id and framework_name:
1744
+ framework_mapping[framework_id] = framework_name
1745
+
1746
+ logger.info(f"Created mapping for {len(framework_mapping)} frameworks")
1747
+ return framework_mapping
1748
+
1749
+ def _write_framework_mapping_to_json(
1750
+ self, framework_mapping: Dict[str, str], raw_frameworks: List[Dict[str, Any]]
1751
+ ) -> None:
1752
+ """
1753
+ Write framework mapping and raw data to JSON cache file.
1754
+
1755
+ :param Dict[str, str] framework_mapping: Framework ID to name mapping dictionary
1756
+ :param List[Dict[str, Any]] raw_frameworks: Raw framework data from Wiz API
1757
+ :return: None
1758
+ :rtype: None
1759
+ """
1760
+ # Create artifacts/wiz directory if it doesn't exist
1761
+ artifacts_dir = os.path.dirname(self.framework_cache_file)
1762
+ os.makedirs(artifacts_dir, exist_ok=True)
1763
+
1764
+ # Prepare data for JSON export
1765
+ cache_data = {
1766
+ "metadata": {
1767
+ "timestamp": datetime.now().isoformat(),
1768
+ "total_frameworks": len(framework_mapping),
1769
+ "enabled_frameworks": len([f for f in raw_frameworks if f.get("enabled", False)]),
1770
+ "builtin_frameworks": len([f for f in raw_frameworks if f.get("builtin", False)]),
1771
+ "description": "Cached Wiz security framework mappings",
1772
+ },
1773
+ "framework_mapping": framework_mapping,
1774
+ "raw_frameworks": raw_frameworks,
1775
+ }
1776
+
1777
+ # Write to JSON file
1778
+ try:
1779
+ with open(self.framework_cache_file, "w", encoding="utf-8") as f:
1780
+ json.dump(cache_data, f, indent=2, ensure_ascii=False)
1781
+
1782
+ logger.info(f"Framework mapping cached to: {self.framework_cache_file}")
1783
+
1784
+ except Exception as e:
1785
+ logger.error(f"Failed to write framework mapping to cache: {str(e)}")
1786
+ # Don't exit here - this is not critical to the main functionality
1787
+
1788
+ def get_framework_name(self, framework_id: str) -> str:
1789
+ """
1790
+ Get framework name by ID from cached mapping.
1791
+
1792
+ :param str framework_id: Framework ID
1793
+ :return: Framework name or ID if not found
1794
+ :rtype: str
1795
+ """
1796
+ # Load mapping if not already loaded
1797
+ if not self.framework_mapping:
1798
+ self.load_or_create_framework_mapping()
1799
+
1800
+ return self.framework_mapping.get(framework_id, framework_id)
1801
+
1802
+ def sync_compliance(self) -> None:
1803
+ """
1804
+ Override base sync_compliance to ensure proper order for controlId/assessmentId assignment.
1805
+
1806
+ CRITICAL: Control assessments MUST be created BEFORE issues are processed
1807
+ to ensure controlId and assessmentId can be properly set.
1808
+ """
1809
+ logger.info(f"Starting {self.title} compliance sync with proper assessment ordering...")
1810
+
1811
+ try:
1812
+ scan_history = self.create_scan_history()
1813
+ self.process_compliance_data()
1814
+
1815
+ # Step 1: Sync assets first
1816
+ self._sync_assets()
1817
+
1818
+ # Step 2: CRITICAL - Pre-populate control implementation cache BEFORE creating assessments
1819
+ logger.info("🔧 Pre-populating control implementation cache for issue processing...")
1820
+ self._populate_control_implementation_cache()
1821
+
1822
+ # Step 3: Create control assessments BEFORE issues (ensures assessmentId is available)
1823
+ logger.info("🔧 Creating control assessments BEFORE issue processing...")
1824
+ self._sync_control_assessments()
1825
+
1826
+ # Step 3.5: CRITICAL - Refresh assessment cache after assessments are created
1827
+ logger.info("🔧 Refreshing assessment cache with newly created assessments...")
1828
+ self._refresh_assessment_cache_after_creation()
1829
+
1830
+ # Step 4: NOW process issues with controlId and assessmentId properly set
1831
+ logger.info("🔧 Processing issues with control and assessment IDs available...")
1832
+ self._sync_issues()
1833
+
1834
+ self._finalize_scan_history(scan_history)
1835
+
1836
+ logger.info(f"Completed {self.title} compliance sync with proper assessment ordering")
1837
+
1838
+ except Exception as e:
1839
+ error_and_exit(f"Error during compliance sync: {e}")
1840
+
1841
+ def sync_policy_compliance(self, create_issues: bool = None, update_control_status: bool = None) -> None:
1842
+ """
1843
+ Main method to sync policy compliance data from Wiz.
1844
+
1845
+ :param bool create_issues: Whether to create issues for failed assessments (uses instance default if None)
1846
+ :param bool update_control_status: Whether to update control implementation status (uses instance default if None)
1847
+ """
1848
+ logger.info("Starting Wiz policy compliance sync...")
1849
+
1850
+ try:
1851
+ # Use instance defaults if not specified
1852
+ if create_issues is None:
1853
+ create_issues = self.create_issues
1854
+ if update_control_status is None:
1855
+ update_control_status = self.update_control_status
1856
+
1857
+ # Step 1: Authenticate with Wiz
1858
+ self.authenticate_wiz()
1859
+
1860
+ # Step 2: Load or create framework mapping cache
1861
+ self.load_or_create_framework_mapping()
1862
+
1863
+ # Persist flags on the instance for downstream logic
1864
+ if create_issues is not None:
1865
+ self.create_issues = create_issues
1866
+ if update_control_status is not None:
1867
+ self.update_control_status = update_control_status
1868
+
1869
+ # Step 3: Sync using the overridden method (which ensures proper ordering)
1870
+ logger.info(
1871
+ f"🔧 Sync parameters: create_issues={self.create_issues}, update_control_status={self.update_control_status}"
1872
+ )
1873
+
1874
+ self.sync_compliance()
1875
+
1876
+ # Step 4: Write data to JSON file for reference (post-processing)
1877
+ json_file = self.write_policy_data_to_json()
1878
+ logger.info(f"Policy compliance data saved to: {json_file}")
1879
+
1880
+ logger.info("Policy compliance sync completed successfully")
1881
+
1882
+ except Exception as e:
1883
+ error_and_exit(f"Policy compliance sync failed: {str(e)}")
1884
+
1885
+ def sync_wiz_compliance(self) -> None:
1886
+ """
1887
+ Convenience method for backward compatibility.
1888
+
1889
+ :return: None
1890
+ :rtype: None
1891
+ """
1892
+ self.sync_policy_compliance()
1893
+
1894
+ def is_poam(self, finding: IntegrationFinding) -> bool: # type: ignore[override]
1895
+ """
1896
+ Determine if an issue should be a POAM.
1897
+
1898
+ If the CLI flag `--create-poams/-cp` was provided (mapped to `self.create_poams`),
1899
+ force POAM for all created/updated issues. Otherwise, fall back to the default
1900
+ scanner behavior.
1901
+ """
1902
+ try:
1903
+ if getattr(self, "create_poams", False):
1904
+ return True
1905
+ except Exception:
1906
+ pass
1907
+ return super().is_poam(finding)
1908
+
1909
+ def create_or_update_issue_from_finding(
1910
+ self,
1911
+ title: str,
1912
+ finding: IntegrationFinding,
1913
+ ) -> regscale_models.Issue:
1914
+ """
1915
+ Create/update the issue with ALL fields set BEFORE saving.
1916
+
1917
+ This method ensures proper data flow:
1918
+ 1. Check for existing issues to prevent duplicates
1919
+ 2. Pre-populate compliance fields on the finding
1920
+ 3. Use parent class logic which saves with all fields set
1921
+
1922
+ This fixes the duplicate issue creation problem by using proper
1923
+ duplicate detection and avoids double-saving.
1924
+ """
1925
+ # Load cache if not already loaded for duplicate detection
1926
+ self._load_existing_records_cache()
1927
+
1928
+ # CRITICAL: Pre-populate compliance fields on the finding BEFORE parent call
1929
+ # This ensures the parent class saves the issue with all fields already set
1930
+ self._populate_compliance_fields_on_finding(finding)
1931
+
1932
+ # CRITICAL FIX: If assessment_id is set, prepare the finding for assessment parenting
1933
+ if hasattr(finding, "assessment_id") and finding.assessment_id:
1934
+ assessment_id = finding.assessment_id
1935
+ logger.debug(f"🔄 PRE-SETTING ASSESSMENT PARENT: assessmentId={assessment_id}")
1936
+
1937
+ # Add parent override fields to the finding for the ScannerIntegration to use
1938
+ finding._override_parent_id = assessment_id
1939
+ finding._override_parent_module = "assessments"
1940
+
1941
+ logger.debug(f" ✅ Finding will use parent: assessments #{assessment_id}")
1942
+
1943
+ # Check for existing issue by external_id first
1944
+ external_id = finding.external_id
1945
+ existing_issue = self._find_existing_issue_cached(external_id)
1946
+
1947
+ if existing_issue:
1948
+ return self._update_existing_issue_with_compliance_fields(existing_issue, title, finding)
1949
+ else:
1950
+ # Set finding context for our override method to access
1951
+ self._current_finding_context = finding
1952
+ try:
1953
+ # Parent class will now create/save the issue with compliance fields already set
1954
+ return super().create_or_update_issue_from_finding(title, finding)
1955
+ finally:
1956
+ # Clean up context
1957
+ if hasattr(self, "_current_finding_context"):
1958
+ delattr(self, "_current_finding_context")
1959
+
1960
+ def _update_existing_issue_with_compliance_fields(
1961
+ self, existing_issue: regscale_models.Issue, title: str, finding: IntegrationFinding
1962
+ ) -> regscale_models.Issue:
1963
+ """
1964
+ Update existing issue with basic fields and enhance with compliance-specific fields.
1965
+
1966
+ :param existing_issue: The existing issue to update
1967
+ :param title: New issue title
1968
+ :param finding: Finding with updated data
1969
+ :return: Updated issue with all fields set
1970
+ """
1971
+
1972
+ # Update basic fields (similar to parent class logic)
1973
+ existing_issue.title = title
1974
+ existing_issue.description = finding.description
1975
+ existing_issue.severity = finding.severity
1976
+ existing_issue.status = finding.status
1977
+ existing_issue.dateLastUpdated = self.scan_date
1978
+
1979
+ # Set control-related field
1980
+ if getattr(finding, "control_labels", None):
1981
+ existing_issue.affectedControls = ",".join(finding.control_labels)
1982
+ elif getattr(finding, "affected_controls", None):
1983
+ existing_issue.affectedControls = finding.affected_controls
1984
+
1985
+ # Enhance with compliance-specific fields
1986
+ self._enhance_issue_with_compliance_fields(existing_issue, finding)
1987
+
1988
+ # CRITICAL FIX: Handle assessment parenting for existing issues too
1989
+ if hasattr(finding, "assessment_id") and finding.assessment_id:
1990
+ assessment_id = finding.assessment_id
1991
+
1992
+ # Set assessment as the parent
1993
+ existing_issue.parentId = assessment_id
1994
+ existing_issue.parentModule = "assessments"
1995
+ existing_issue.assessmentId = assessment_id
1996
+
1997
+ existing_issue.save()
1998
+
1999
+ return existing_issue
2000
+
2001
+ def _create_or_update_issue(
2002
+ self,
2003
+ finding: IntegrationFinding,
2004
+ issue_status,
2005
+ title: str,
2006
+ existing_issue=None,
2007
+ ):
2008
+ """
2009
+ Override parent method to handle assessment parenting correctly.
2010
+
2011
+ CRITICAL FIX: Check if the finding has assessment parent overrides and apply them.
2012
+ """
2013
+ # Get consolidated asset identifier
2014
+ asset_identifier = self.get_consolidated_asset_identifier(finding, existing_issue)
2015
+
2016
+ # Prepare issue data
2017
+ issue_title = self.get_issue_title(finding) or title
2018
+ description = finding.description or ""
2019
+ remediation_description = finding.recommendation_for_mitigation or finding.remediation or ""
2020
+ is_poam = self.is_poam(finding)
2021
+
2022
+ if existing_issue:
2023
+ logger.debug(
2024
+ "Updating existing issue %s with assetIdentifier %s", existing_issue.id, finding.asset_identifier
2025
+ )
2026
+
2027
+ # If we have an existing issue, update its fields instead of creating a new one
2028
+ issue = existing_issue or regscale_models.Issue()
2029
+
2030
+ # CRITICAL FIX: Check for parent overrides from the finding
2031
+ if hasattr(finding, "_override_parent_id") and hasattr(finding, "_override_parent_module"):
2032
+ parent_id = finding._override_parent_id
2033
+ parent_module = finding._override_parent_module
2034
+ logger.debug(f"🔄 USING OVERRIDE PARENT: {parent_module} #{parent_id}")
2035
+ else:
2036
+ parent_id = self.plan_id
2037
+ parent_module = self.parent_module
2038
+
2039
+ # Update all fields (copying from ScannerIntegration but with override parent)
2040
+ issue.parentId = parent_id
2041
+ issue.parentModule = parent_module
2042
+ issue.vulnerabilityId = finding.vulnerability_id
2043
+ issue.title = issue_title
2044
+ issue.dateCreated = finding.date_created
2045
+ issue.status = issue_status
2046
+ issue.dateCompleted = (
2047
+ self.get_date_completed(finding, issue_status)
2048
+ if issue_status == regscale_models.IssueStatus.Closed
2049
+ else None
2050
+ )
2051
+ issue.severityLevel = finding.severity
2052
+ issue.issueOwnerId = self.assessor_id
2053
+ issue.securityPlanId = self.plan_id if not self.is_component else None
2054
+ issue.identification = finding.identification
2055
+ issue.dateFirstDetected = finding.first_seen
2056
+
2057
+ # Ensure a due date is always set using configured policy defaults (e.g., FedRAMP)
2058
+ if not finding.due_date:
2059
+ try:
2060
+ base_created = finding.date_created or issue.dateCreated
2061
+ finding.due_date = issue_due_date(
2062
+ severity=finding.severity,
2063
+ created_date=base_created,
2064
+ title=self.title,
2065
+ )
2066
+ except Exception:
2067
+ # Final fallback to a Low severity default if anything goes wrong
2068
+ base_created = finding.date_created or issue.dateCreated
2069
+ finding.due_date = issue_due_date(
2070
+ severity=regscale_models.IssueSeverity.Low,
2071
+ created_date=base_created,
2072
+ title=self.title,
2073
+ )
2074
+ issue.dueDate = finding.due_date
2075
+ issue.description = description
2076
+ issue.sourceReport = finding.source_report or self.title
2077
+ issue.recommendedActions = finding.recommendation_for_mitigation
2078
+ issue.assetIdentifier = asset_identifier
2079
+ issue.securityChecks = finding.security_check or finding.external_id
2080
+ issue.remediationDescription = remediation_description
2081
+ issue.integrationFindingId = self.get_finding_identifier(finding)
2082
+ issue.poamComments = finding.poam_comments
2083
+ issue.cve = finding.cve
2084
+
2085
+ # CRITICAL: Set assessmentId (this is the key fix)
2086
+ issue.assessmentId = finding.assessment_id
2087
+ logger.debug(f"✅ SETTING assessmentId = {finding.assessment_id} with parent = {parent_module} #{parent_id}")
2088
+
2089
+ control_id = self.get_control_implementation_id_for_cci(finding.cci_ref) if finding.cci_ref else None
2090
+ issue.controlId = control_id
2091
+
2092
+ # Add the control implementation ids and the cci ref if it exists
2093
+ cci_control_ids = [control_id] if control_id is not None else []
2094
+ if finding.affected_controls:
2095
+ issue.affectedControls = finding.affected_controls
2096
+ elif finding.control_labels:
2097
+ issue.affectedControls = ", ".join(sorted({cl for cl in finding.control_labels if cl}))
2098
+
2099
+ issue.controlImplementationIds = list(set(finding._control_implementation_ids + cci_control_ids)) # noqa
2100
+ issue.isPoam = is_poam
2101
+ issue.basisForAdjustment = (
2102
+ finding.basis_for_adjustment if finding.basis_for_adjustment else f"{self.title} import"
2103
+ )
2104
+ issue.pluginId = finding.plugin_id
2105
+ issue.originalRiskRating = regscale_models.Issue.assign_risk_rating(finding.severity)
2106
+ issue.changes = "<p>Current: {}</p><p>Planned: {}</p>".format(
2107
+ finding.milestone_changes, finding.planned_milestone_changes
2108
+ )
2109
+ issue.adjustedRiskRating = finding.adjusted_risk_rating
2110
+ issue.riskAdjustment = finding.risk_adjustment
2111
+ issue.operationalRequirement = finding.operational_requirements
2112
+ issue.deviationRationale = finding.deviation_rationale
2113
+ issue.dateLastUpdated = get_current_datetime()
2114
+ issue.affectedControls = finding.affected_controls
2115
+
2116
+ if finding.cve:
2117
+ issue = self.lookup_kev_and_update_issue(cve=finding.cve, issue=issue, cisa_kevs=self._kev_data)
2118
+
2119
+ if existing_issue:
2120
+ logger.debug(f"💾 Saving existing issue {issue.id} with assessmentId={issue.assessmentId}")
2121
+ issue.save(bulk=True)
2122
+ else:
2123
+ logger.info(f"💾 Creating new issue with assessmentId={issue.assessmentId}")
2124
+ issue = issue.create_or_update(
2125
+ bulk_update=True, defaults={"otherIdentifier": self._get_other_identifier(finding, is_poam)}
2126
+ )
2127
+ self.extra_data_to_properties(finding, issue.id)
2128
+
2129
+ self._handle_property_and_milestone_creation(issue, finding, existing_issue)
2130
+ return issue
2131
+
2132
+ def _populate_compliance_fields_on_finding(self, finding: IntegrationFinding) -> None:
2133
+ """
2134
+ Pre-populate compliance-specific fields on the finding before issue creation.
2135
+
2136
+ This ensures controlId and assessmentId are set on the finding object
2137
+ so the parent class can save the issue with all fields in one operation.
2138
+
2139
+ The parent class expects:
2140
+ - finding.assessment_id -> issue.assessmentId
2141
+ - finding.cci_ref -> calls get_control_implementation_id_for_cci() -> issue.controlId
2142
+
2143
+ :param finding: Finding to populate with compliance fields
2144
+ """
2145
+ try:
2146
+ # Set compliance fields on the finding itself before issue creation
2147
+ if hasattr(finding, "rule_id") and finding.rule_id:
2148
+ control_id = self._normalize_control_id_string(finding.rule_id)
2149
+ if control_id:
2150
+
2151
+ # Get control implementation ID
2152
+ impl_id = self._issue_field_setter._get_or_find_implementation_id(control_id)
2153
+ if impl_id:
2154
+ # Store the control ID as cci_ref so parent class calls our override method
2155
+ finding.cci_ref = control_id
2156
+ # Cache the implementation ID for our override method
2157
+ finding._wiz_control_implementation_id = impl_id
2158
+
2159
+ # Get assessment ID and set it on the finding (parent class uses this directly)
2160
+ assess_id = self._issue_field_setter._get_or_find_assessment_id(impl_id)
2161
+ if assess_id:
2162
+ finding.assessment_id = assess_id
2163
+ except Exception:
2164
+ pass
2165
+
2166
+ def _enhance_issue_with_compliance_fields(self, issue: regscale_models.Issue, finding: IntegrationFinding) -> None:
2167
+ """
2168
+ Enhance an issue with compliance-specific fields (controlId and assessmentId).
2169
+
2170
+ NOTE: This method is now primarily for the existing issue update path.
2171
+ New issues should have fields set via _populate_compliance_fields_on_finding.
2172
+
2173
+ :param issue: Issue object to enhance
2174
+ :param finding: Finding with control data
2175
+ """
2176
+ try:
2177
+ # Set control implementation and assessment IDs using our field setter
2178
+ if hasattr(finding, "rule_id") and finding.rule_id:
2179
+ control_id = self._normalize_control_id_string(finding.rule_id)
2180
+ if control_id:
2181
+ result = self._issue_field_setter.set_control_and_assessment_ids(issue, control_id)
2182
+ if not result.success:
2183
+ logger.warning(f"Failed to set compliance fields for '{control_id}': {result.error_message}")
2184
+ except Exception:
2185
+ pass
2186
+
2187
+ def get_control_implementation_id_for_cci(self, cci: Optional[str]) -> Optional[int]:
2188
+ """
2189
+ Override parent method to return control implementation ID for Wiz control IDs.
2190
+
2191
+ The parent class calls this method when finding.cci_ref is set, and uses the
2192
+ returned value to set issue.controlId. We store our control implementation
2193
+ ID on the finding and return it here.
2194
+
2195
+ :param cci: Control identifier (e.g., 'AC-2(1)') stored in finding.cci_ref
2196
+ :return: Control implementation ID if found, None otherwise
2197
+ """
2198
+ # Check if this is a call with our cached implementation ID on the current finding
2199
+ if hasattr(self, "_current_finding_context"):
2200
+ finding = self._current_finding_context
2201
+ if (
2202
+ hasattr(finding, "_wiz_control_implementation_id")
2203
+ and hasattr(finding, "cci_ref")
2204
+ and finding.cci_ref == cci
2205
+ ):
2206
+ impl_id = finding._wiz_control_implementation_id
2207
+ return impl_id
2208
+
2209
+ # Fallback: try to look it up directly (for edge cases)
2210
+ if cci:
2211
+ control_id = self._normalize_control_id_string(cci)
2212
+ if control_id:
2213
+ impl_id = self._issue_field_setter._get_or_find_implementation_id(control_id)
2214
+ if impl_id:
2215
+ return impl_id
2216
+
2217
+ # Final fallback to parent class behavior
2218
+ return super().get_control_implementation_id_for_cci(cci)
2219
+
2220
+ def _populate_control_implementation_cache(self) -> None:
2221
+ """
2222
+ Pre-populate the control implementation and assessment caches.
2223
+
2224
+ CRITICAL: This ensures controlId and assessmentId can be reliably set on issues.
2225
+ This method loads control implementations and their associated assessments into
2226
+ cache to enable fast lookups during issue processing.
2227
+
2228
+ :return: None
2229
+ :rtype: None
2230
+ """
2231
+ try:
2232
+ from regscale.models import regscale_models
2233
+
2234
+ logger.info("🔍 Pre-populating control implementation cache for issue processing...")
2235
+
2236
+ # Get all control implementations for this plan
2237
+ implementations = regscale_models.ControlImplementation.get_all_by_parent(
2238
+ parent_id=self.plan_id, parent_module=self.parent_module
2239
+ )
2240
+
2241
+ if not implementations:
2242
+ logger.warning("No control implementations found for this plan")
2243
+ return
2244
+
2245
+ logger.info(f"Found {len(implementations)} control implementations to cache")
2246
+
2247
+ # Cache SecurityControl lookups to avoid repeated API calls
2248
+ security_control_cache = {}
2249
+ controls_mapped = 0
2250
+ assessments_mapped = 0
2251
+
2252
+ for impl in implementations:
2253
+ try:
2254
+ # Skip if no controlID reference
2255
+ if not hasattr(impl, "controlID") or not impl.controlID:
2256
+ continue
2257
+
2258
+ # Get or cache the security control
2259
+ if impl.controlID not in security_control_cache:
2260
+ security_control = regscale_models.SecurityControl.get_object(object_id=impl.controlID)
2261
+ security_control_cache[impl.controlID] = security_control
2262
+ else:
2263
+ security_control = security_control_cache[impl.controlID]
2264
+
2265
+ if security_control and hasattr(security_control, "controlId"):
2266
+ # Normalize and cache the control ID mapping
2267
+ normalized_id = self._normalize_control_id_string(security_control.controlId)
2268
+ if normalized_id:
2269
+ self._impl_id_by_control[normalized_id] = impl.id
2270
+ controls_mapped += 1
2271
+
2272
+ # Also try to cache the most recent assessment
2273
+ try:
2274
+ assessments = regscale_models.Assessment.get_all_by_parent(
2275
+ parent_id=impl.id, parent_module="controls"
2276
+ )
2277
+ if assessments:
2278
+ # Get the most recent assessment
2279
+ assessments.sort(key=lambda a: a.id if hasattr(a, "id") else 0, reverse=True)
2280
+ self._assessment_by_impl_today[impl.id] = assessments[0]
2281
+ assessments_mapped += 1
2282
+ except Exception:
2283
+ pass
2284
+
2285
+ except Exception:
2286
+ continue
2287
+
2288
+ logger.info("✓ Control implementation cache populated:")
2289
+ logger.info(f" - {controls_mapped} control ID mappings")
2290
+ logger.info(f" - {assessments_mapped} assessment mappings")
2291
+
2292
+ except Exception as e:
2293
+ logger.error(f"Error populating control implementation cache: {e}")
2294
+
2295
+ def _refresh_assessment_cache_after_creation(self) -> None:
2296
+ """
2297
+ Refresh the assessment cache after control assessments have been created.
2298
+
2299
+ CRITICAL: This ensures that newly created assessments from the sync_control_assessments
2300
+ step are available when processing issues. Without this, assessmentId will not be set
2301
+ on issues because the cache only contains old assessments.
2302
+
2303
+ :return: None
2304
+ :rtype: None
2305
+ """
2306
+ try:
2307
+ from regscale.models import regscale_models
2308
+ from datetime import datetime
2309
+
2310
+ logger.info("🔄 Refreshing assessment cache with newly created assessments...")
2311
+
2312
+ refreshed_count = 0
2313
+ today = datetime.now().date()
2314
+
2315
+ # Only refresh assessments for implementations we know about
2316
+ for control_id, impl_id in self._impl_id_by_control.items():
2317
+ try:
2318
+ # Get all assessments for this implementation
2319
+ assessments = regscale_models.Assessment.get_all_by_parent(
2320
+ parent_id=impl_id, parent_module="controls"
2321
+ )
2322
+
2323
+ if not assessments:
2324
+ continue
2325
+
2326
+ # Find today's assessment (most recent created today)
2327
+ today_assessments = []
2328
+ for assessment in assessments:
2329
+ assessment_date = None
2330
+ try:
2331
+ # Try to get assessment date from various fields
2332
+ date_fields = ["actualFinish", "plannedFinish", "dateCreated"]
2333
+ for field in date_fields:
2334
+ if hasattr(assessment, field) and getattr(assessment, field):
2335
+ date_value = getattr(assessment, field)
2336
+ if isinstance(date_value, str):
2337
+ from regscale.core.app.utils.app_utils import regscale_string_to_datetime
2338
+
2339
+ assessment_date = regscale_string_to_datetime(date_value).date()
2340
+ elif hasattr(date_value, "date"):
2341
+ assessment_date = date_value.date()
2342
+ else:
2343
+ assessment_date = date_value
2344
+ break
2345
+
2346
+ if assessment_date == today:
2347
+ today_assessments.append(assessment)
2348
+ except Exception:
2349
+ continue
2350
+
2351
+ # Use most recent today's assessment, or fallback to most recent overall
2352
+ if today_assessments:
2353
+ best_assessment = max(today_assessments, key=lambda a: getattr(a, "id", 0))
2354
+ else:
2355
+ best_assessment = max(assessments, key=lambda a: getattr(a, "id", 0))
2356
+
2357
+ # Update the cache
2358
+ self._assessment_by_impl_today[impl_id] = best_assessment
2359
+ refreshed_count += 1
2360
+
2361
+ except Exception:
2362
+ continue
2363
+
2364
+ logger.info(f"✓ Assessment cache refreshed: {refreshed_count} assessments updated")
2365
+
2366
+ except Exception as e:
2367
+ logger.error(f"Error refreshing assessment cache: {e}")
2368
+
2369
+ def _find_control_implementation_id(self, control_id: str) -> Optional[int]:
2370
+ """
2371
+ Find control implementation ID by querying the database directly.
2372
+ OPTIMIZED: Uses controlID field directly and caches SecurityControl lookups.
2373
+
2374
+ :param str control_id: Normalized control ID (e.g., 'AC-2(1)')
2375
+ :return: Control implementation ID if found
2376
+ :rtype: Optional[int]
2377
+ """
2378
+ try:
2379
+ from regscale.models import regscale_models
2380
+
2381
+ # First check cache
2382
+ if hasattr(self, "_impl_id_by_control") and control_id in self._impl_id_by_control:
2383
+ cached_id = self._impl_id_by_control[control_id]
2384
+ return cached_id
2385
+
2386
+ # Get all control implementations for this plan
2387
+ implementations = regscale_models.ControlImplementation.get_all_by_parent(
2388
+ parent_id=self.plan_id, parent_module=self.parent_module
2389
+ )
2390
+
2391
+ # Create a cache for SecurityControl lookups to avoid repeated API calls
2392
+ security_control_cache = {}
2393
+
2394
+ for impl in implementations:
2395
+ try:
2396
+ # Use controlID field which references the SecurityControl
2397
+ if not hasattr(impl, "controlID") or not impl.controlID:
2398
+ continue
2399
+
2400
+ # Check if we've already looked up this security control
2401
+ if impl.controlID not in security_control_cache:
2402
+ security_control = regscale_models.SecurityControl.get_object(object_id=impl.controlID)
2403
+ security_control_cache[impl.controlID] = security_control
2404
+ else:
2405
+ security_control = security_control_cache[impl.controlID]
2406
+
2407
+ if security_control and hasattr(security_control, "controlId"):
2408
+ impl_control_id = self._normalize_control_id_string(security_control.controlId)
2409
+
2410
+ if impl_control_id == control_id:
2411
+ logger.info(f"✓ Found control implementation {impl.id} for control {control_id}")
2412
+ # Cache it for future lookups
2413
+ if not hasattr(self, "_impl_id_by_control"):
2414
+ self._impl_id_by_control = {}
2415
+ self._impl_id_by_control[control_id] = impl.id
2416
+ return impl.id
2417
+ except Exception:
2418
+ continue
2419
+
2420
+ logger.warning(
2421
+ f"⚠️ No control implementation found for control {control_id} among {len(implementations)} implementations"
2422
+ )
2423
+ return None
2424
+ except Exception as e:
2425
+ logger.error(f"Error finding control implementation for {control_id}: {e}")
2426
+ return None
2427
+
2428
+ def _find_assessment_id_for_implementation(self, implementation_id: int) -> Optional[int]:
2429
+ """
2430
+ Find the most recent assessment ID for a control implementation.
2431
+ IMPROVED: Better date handling and caching.
2432
+
2433
+ :param int implementation_id: Control implementation ID
2434
+ :return: Assessment ID if found
2435
+ :rtype: Optional[int]
2436
+ """
2437
+ try:
2438
+ from regscale.models import regscale_models
2439
+ from datetime import datetime
2440
+ from regscale.core.app.utils.app_utils import regscale_string_to_datetime
2441
+
2442
+ # Check cache first
2443
+ if hasattr(self, "_assessment_by_impl_today") and implementation_id in self._assessment_by_impl_today:
2444
+ cached_assessment = self._assessment_by_impl_today[implementation_id]
2445
+ if cached_assessment and hasattr(cached_assessment, "id"):
2446
+ logger.debug(
2447
+ f"Found cached assessment {cached_assessment.id} for implementation {implementation_id}"
2448
+ )
2449
+ return cached_assessment.id
2450
+
2451
+ # Get assessments for this control implementation
2452
+ assessments = regscale_models.Assessment.get_all_by_parent(
2453
+ parent_id=implementation_id, parent_module="controls"
2454
+ )
2455
+
2456
+ if not assessments:
2457
+ logger.warning(f"No assessments found for control implementation {implementation_id}")
2458
+ return None
2459
+
2460
+ # Find the most recent assessment (preferably from today)
2461
+ today = datetime.now().date()
2462
+ today_assessments = []
2463
+ recent_assessments = []
2464
+
2465
+ for assessment in assessments:
2466
+ try:
2467
+ assessment_date = None
2468
+
2469
+ # Try multiple date fields in order of preference
2470
+ date_fields = ["plannedStart", "actualFinish", "plannedFinish", "dateCreated"]
2471
+ for field in date_fields:
2472
+ if hasattr(assessment, field) and getattr(assessment, field):
2473
+ date_value = getattr(assessment, field)
2474
+ if isinstance(date_value, str):
2475
+ assessment_date = regscale_string_to_datetime(date_value).date()
2476
+ elif hasattr(date_value, "date"):
2477
+ assessment_date = date_value.date()
2478
+ else:
2479
+ assessment_date = date_value
2480
+ break
2481
+
2482
+ if assessment_date:
2483
+ if assessment_date == today:
2484
+ today_assessments.append(assessment)
2485
+ else:
2486
+ recent_assessments.append((assessment, assessment_date))
2487
+ else:
2488
+ # Assessment with no parseable date
2489
+ recent_assessments.append((assessment, None))
2490
+ except Exception:
2491
+ recent_assessments.append((assessment, None))
2492
+
2493
+ # Prefer today's assessments
2494
+ if today_assessments:
2495
+ # Sort by ID (highest/newest first) if multiple today
2496
+ today_assessments.sort(key=lambda a: a.id if hasattr(a, "id") else 0, reverse=True)
2497
+ assessment = today_assessments[0]
2498
+ logger.info(
2499
+ f"✓ Found today's assessment {assessment.id} for control implementation {implementation_id}"
2500
+ )
2501
+ # Cache it for future lookups
2502
+ if not hasattr(self, "_assessment_by_impl_today"):
2503
+ self._assessment_by_impl_today = {}
2504
+ self._assessment_by_impl_today[implementation_id] = assessment
2505
+ return assessment.id
2506
+
2507
+ # Fall back to most recent assessment
2508
+ if recent_assessments:
2509
+ # Sort by date (newest first), handling None dates
2510
+ recent_assessments.sort(
2511
+ key=lambda x: (x[1] if x[1] else datetime.min.date(), x[0].id if hasattr(x[0], "id") else 0),
2512
+ reverse=True,
2513
+ )
2514
+ assessment = recent_assessments[0][0]
2515
+ logger.info(f"✓ Found recent assessment {assessment.id} for control implementation {implementation_id}")
2516
+ # Cache it even if not today's
2517
+ if not hasattr(self, "_assessment_by_impl_today"):
2518
+ self._assessment_by_impl_today = {}
2519
+ self._assessment_by_impl_today[implementation_id] = assessment
2520
+ return assessment.id
2521
+
2522
+ logger.warning(f"⚠️ No usable assessments found for control implementation {implementation_id}")
2523
+ return None
2524
+ except Exception as e:
2525
+ logger.error(f"Error finding assessment for control implementation {implementation_id}: {e}")
2526
+ return None
2527
+
2528
+ def _reparent_issue_to_asset(self, issue: regscale_models.Issue) -> None:
2529
+ """
2530
+ Reparent issue to the control implementation instead of the security plan.
2531
+ This ensures issues are properly associated with their control implementations.
2532
+
2533
+ :param regscale_models.Issue issue: Issue to reparent to control implementation
2534
+ :param IntegrationFinding finding: Finding with control information
2535
+ :return: None
2536
+ :rtype: None
2537
+ """
2538
+ # If we have a control implementation ID, parent the issue to it
2539
+ if issue.controlId:
2540
+ issue.parentId = issue.controlId
2541
+ issue.parentModule = "controls"
2542
+ else:
2543
+ # Fall back to security plan if no control implementation found
2544
+ pass
2545
+
2546
+ def _update_scan_history(self, scan_history: regscale_models.ScanHistory) -> None:
2547
+ """
2548
+ No scan history updates for compliance report ingest.
2549
+
2550
+ :param regscale_models.ScanHistory scan_history: Scan history record (unused)
2551
+ """
2552
+ # No scan history for compliance report ingest
2553
+ pass
2554
+
2555
+ def _process_control_assessments(self) -> None:
2556
+ """
2557
+ Process control assessments only for controls that have validated compliance items
2558
+ with existing assets in RegScale. This ensures we don't create assessments for
2559
+ controls that have no assets in our boundary.
2560
+ """
2561
+ logger.info("🎯 Starting control assessment processing for Wiz compliance integration")
2562
+
2563
+ # Ensure existing records cache is loaded
2564
+ self._load_existing_records_cache()
2565
+
2566
+ implementations = self._get_control_implementations()
2567
+ if not implementations:
2568
+ logger.warning("No control implementations found for assessment processing")
2569
+ return
2570
+
2571
+ # Get all potential control IDs from compliance data
2572
+ all_potential_controls = set(self.passing_controls.keys()) | set(self.failing_controls.keys())
2573
+ logger.debug(
2574
+ f"Found {len(all_potential_controls)} potential controls from compliance data: {sorted(all_potential_controls)}"
2575
+ )
2576
+
2577
+ # Validate each control has actual assets in our boundary before processing
2578
+ validated_controls_with_assets = {}
2579
+ validated_passing_controls = {}
2580
+ validated_failing_controls = {}
2581
+
2582
+ for control_id in all_potential_controls:
2583
+ # Get all compliance items for this control
2584
+ control_items = self._get_validated_control_compliance_items(control_id)
2585
+
2586
+ if not control_items:
2587
+ continue
2588
+
2589
+ # Check if we have any assets for the compliance items
2590
+ asset_identifiers = set()
2591
+ assets_found = 0
2592
+
2593
+ for item in control_items:
2594
+ if hasattr(item, "resource_name") and item.resource_name:
2595
+ resource_id = getattr(item, "resource_id", "")
2596
+ # Verify the asset actually exists in RegScale
2597
+ if self._asset_exists_in_regscale(resource_id):
2598
+ asset_identifiers.add(item.resource_name)
2599
+ assets_found += 1
2600
+ else:
2601
+ logger.debug(
2602
+ f"Control {control_id}: Asset {resource_id} ({item.resource_name}) not found in RegScale"
2603
+ )
2604
+ logger.debug(f"Found {assets_found} valid assets for control {control_id}")
2605
+ if not asset_identifiers:
2606
+ continue
2607
+
2608
+ # This control has valid assets, include it in processing
2609
+ validated_controls_with_assets[control_id] = list(asset_identifiers)
2610
+
2611
+ # Preserve the pass/fail status for validated controls
2612
+ if control_id in self.failing_controls:
2613
+ validated_failing_controls[control_id] = self.failing_controls[control_id]
2614
+ elif control_id in self.passing_controls:
2615
+ validated_passing_controls[control_id] = self.passing_controls[control_id]
2616
+
2617
+ if not validated_controls_with_assets:
2618
+ logger.warning("❌ No controls have assets in RegScale boundary - no control assessments will be created")
2619
+ logger.info("📊 SUMMARY: 0 control assessments created (no assets exist in RegScale)")
2620
+ return
2621
+
2622
+ assessments_created = 0
2623
+ processed_impl_today: set[int] = set()
2624
+
2625
+ # Only process validated controls that have assets in our boundary
2626
+ for control_id in validated_controls_with_assets.keys():
2627
+ created = self._process_single_control_assessment(
2628
+ control_id=control_id,
2629
+ implementations=implementations,
2630
+ processed_impl_today=processed_impl_today,
2631
+ )
2632
+ assessments_created += created
2633
+
2634
+ # Calculate stats only for validated controls
2635
+ validated_control_ids = set(validated_controls_with_assets.keys())
2636
+ passing_assessments = len([cid for cid in validated_control_ids if cid not in validated_failing_controls])
2637
+ failing_assessments = len([cid for cid in validated_control_ids if cid in validated_failing_controls])
2638
+
2639
+ if assessments_created > 0:
2640
+ logger.info(
2641
+ f"✅ Created {assessments_created} control assessments: {passing_assessments} passing, {failing_assessments} failing"
2642
+ )
2643
+ else:
2644
+ logger.warning(
2645
+ f"⚠️ No control assessments were actually created (0 assessments) despite finding {len(validated_controls_with_assets)} controls with assets"
2646
+ )
2647
+
2648
+ logger.info(
2649
+ f"📊 CONTROL ASSESSMENT SUMMARY: {assessments_created} assessments created for {len(validated_controls_with_assets)} validated controls"
2650
+ )
2651
+
2652
+ def _sync_assessment_cache_from_base_class(self) -> None:
2653
+ """
2654
+ Sync assessments from base class cache to our control cache.
2655
+
2656
+ This ensures that assessments created by the base class ComplianceIntegration
2657
+ are available to our IssueFieldSetter for linking issues to assessments.
2658
+ """
2659
+ try:
2660
+ # Copy assessments from base class cache to our cache
2661
+ base_cache = getattr(self, "_assessment_by_impl_today", {})
2662
+ synced_count = 0
2663
+
2664
+ for impl_id, assessment in base_cache.items():
2665
+ self._control_cache.set_assessment(impl_id, assessment)
2666
+ synced_count += 1
2667
+
2668
+ logger.info(f"✅ Synced {synced_count} assessments from base class cache to control cache")
2669
+
2670
+ except Exception as e:
2671
+ logger.warning(f"⚠️ Failed to sync assessment cache: {e}")
2672
+
2673
+ def _get_validated_control_compliance_items(self, control_id: str) -> List[ComplianceItem]:
2674
+ """
2675
+ Get validated compliance items for a specific control.
2676
+ Only returns items that have existing assets in RegScale boundary.
2677
+
2678
+ :param str control_id: Control identifier to filter by
2679
+ :return: List of validated compliance items for the control
2680
+ :rtype: List[ComplianceItem]
2681
+ """
2682
+ validated_items: List[ComplianceItem] = []
2683
+
2684
+ for item in self.all_compliance_items:
2685
+ # Check if this item matches the control
2686
+ matches_control = False
2687
+ if hasattr(item, "control_ids"):
2688
+ item_control_ids = getattr(item, "control_ids", [])
2689
+ if any(cid.lower() == control_id.lower() for cid in item_control_ids):
2690
+ matches_control = True
2691
+ elif hasattr(item, "control_id") and item.control_id.lower() == control_id.lower():
2692
+ matches_control = True
2693
+
2694
+ if not matches_control:
2695
+ continue
2696
+
2697
+ # Additional validation: ensure the asset exists in RegScale
2698
+ resource_id = getattr(item, "resource_id", "")
2699
+ if resource_id and self._asset_exists_in_regscale(resource_id):
2700
+ validated_items.append(item)
2701
+ else:
2702
+ logger.debug(
2703
+ f"Filtered out compliance item for control {control_id} - asset {resource_id} not in RegScale"
2704
+ )
2705
+
2706
+ return validated_items
2707
+
2708
+ def _get_control_compliance_items(self, control_id: str) -> List[ComplianceItem]:
2709
+ """
2710
+ Get all compliance items for a specific control.
2711
+ All items have already been filtered to framework-specific items with existing assets.
2712
+
2713
+ :param str control_id: Control identifier to filter by
2714
+ :return: List of compliance items for the control
2715
+ :rtype: List[ComplianceItem]
2716
+ """
2717
+ items: List[ComplianceItem] = []
2718
+
2719
+ for item in self.all_compliance_items:
2720
+ # Check if this item matches the control
2721
+ matches_control = False
2722
+ if hasattr(item, "control_ids"):
2723
+ item_control_ids = getattr(item, "control_ids", [])
2724
+ if any(cid.lower() == control_id.lower() for cid in item_control_ids):
2725
+ matches_control = True
2726
+ elif hasattr(item, "control_id") and item.control_id.lower() == control_id.lower():
2727
+ matches_control = True
2728
+
2729
+ if matches_control:
2730
+ items.append(item)
2731
+
2732
+ return items
2733
+
2734
+ # flake8: noqa: C901
2735
+ def get_asset_by_identifier(self, identifier: str) -> Optional["regscale_models.Asset"]:
2736
+ """
2737
+ Override asset lookup for Wiz policy compliance integration.
2738
+
2739
+ For policy compliance, the identifier should be the Wiz resource ID.
2740
+ We'll try multiple lookup strategies to find the corresponding RegScale asset.
2741
+
2742
+ :param str identifier: Asset identifier (should be Wiz resource ID)
2743
+ :return: Asset if found, None otherwise
2744
+ :rtype: Optional[regscale_models.Asset]
2745
+ """
2746
+
2747
+ # First try the standard lookup by identifier (uses asset_map_by_identifier)
2748
+ asset = super().get_asset_by_identifier(identifier)
2749
+ if asset:
2750
+ return asset
2751
+
2752
+ # If not found, try to find using our cached RegScale assets by Wiz ID
2753
+ try:
2754
+ if hasattr(self, "_regscale_assets_by_wiz_id") and self._regscale_assets_by_wiz_id:
2755
+ # Direct lookup by Wiz ID (most common case)
2756
+ if identifier in self._regscale_assets_by_wiz_id:
2757
+ regscale_asset = self._regscale_assets_by_wiz_id[identifier]
2758
+ return regscale_asset
2759
+
2760
+ # Fallback: check all assets for name/identifier matches
2761
+ for wiz_id, regscale_asset in self._regscale_assets_by_wiz_id.items():
2762
+ # Check if asset name matches the identifier
2763
+ if regscale_asset.name == identifier:
2764
+ return regscale_asset
2765
+
2766
+ # Also check identifier field
2767
+ if hasattr(regscale_asset, "identifier") and regscale_asset.identifier == identifier:
2768
+ return regscale_asset
2769
+
2770
+ # Check other tracking number
2771
+ if (
2772
+ hasattr(regscale_asset, "otherTrackingNumber")
2773
+ and regscale_asset.otherTrackingNumber == identifier
2774
+ ):
2775
+ logger.debug(
2776
+ f"Found asset via otherTrackingNumber match: {regscale_asset.name} (Wiz ID: {wiz_id})"
2777
+ )
2778
+ return regscale_asset
2779
+
2780
+ except Exception:
2781
+ pass
2782
+
2783
+ # Asset not found
2784
+ return None
2785
+
2786
+ def _ensure_asset_for_finding(self, finding: IntegrationFinding) -> Optional["regscale_models.Asset"]:
2787
+ """
2788
+ Override asset creation for Wiz policy compliance integration.
2789
+
2790
+ We don't create assets in policy compliance integration - they come from
2791
+ separate Wiz inventory import. If an asset isn't found, we skip the finding.
2792
+
2793
+ :param IntegrationFinding finding: Finding that needs an asset
2794
+ :return: None (we don't create assets)
2795
+ :rtype: Optional[regscale_models.Asset]
2796
+ """
2797
+ return None
2798
+
2799
+ def _process_consolidated_issues(self, findings: List[IntegrationFinding]) -> None:
2800
+ """
2801
+ Process pre-consolidated findings to create issues.
2802
+
2803
+ Since fetch_findings() now creates consolidated findings (one per control with all resources),
2804
+ this method simply creates issues directly from each finding.
2805
+
2806
+ :param List[IntegrationFinding] findings: List of pre-consolidated findings to process
2807
+ """
2808
+ if not findings:
2809
+ return
2810
+
2811
+ issues_processed = 0
2812
+
2813
+ for finding in findings:
2814
+ try:
2815
+ control_id = self._normalize_control_id_string(finding.rule_id) or finding.rule_id
2816
+
2817
+ # Create issue title
2818
+ issue_title = self.get_issue_title(finding)
2819
+
2820
+ # Create issue directly from the consolidated finding
2821
+ issue = self.create_or_update_issue_from_finding(title=issue_title, finding=finding)
2822
+ if issue:
2823
+ issues_processed += 1
2824
+
2825
+ else:
2826
+ logger.debug(
2827
+ f"Failed to create issue for control {control_id} - create_or_update_issue_from_finding returned None"
2828
+ )
2829
+
2830
+ except Exception as e:
2831
+ logger.error(f"Error processing consolidated issue for control {control_id}: {e}")
2832
+
2833
+ # Store the count for summary reporting
2834
+ self._issues_processed_count = issues_processed
2835
+
2836
+ def _find_existing_issue_for_control(self) -> Optional["regscale_models.Issue"]:
2837
+ """
2838
+ Find existing issue for a specific control.
2839
+
2840
+ :param str control_id: Control ID to search for
2841
+ :return: Existing issue if found
2842
+ :rtype: Optional[regscale_models.Issue]
2843
+ """
2844
+ # This is a simplified check - in practice you might want to search by external_id or other fields
2845
+ # that uniquely identify control-specific issues
2846
+ return None # For now, always create new issues
2847
+
2848
+ def sync_compliance(self, *args, **kwargs) -> None:
2849
+ """Override sync to use consolidated issue processing and add summary reporting."""
2850
+ # Initialize issue counter
2851
+ self._issues_created_count = 0
2852
+
2853
+ try:
2854
+ # Initialize cache dictionaries if not already initialized
2855
+ if not hasattr(self, "_impl_id_by_control"):
2856
+ self._impl_id_by_control = {}
2857
+ if not hasattr(self, "_assessment_by_impl_today"):
2858
+ self._assessment_by_impl_today = {}
2859
+
2860
+ # Ensure existing records cache is loaded before processing
2861
+ self._load_existing_records_cache()
2862
+
2863
+ # CRITICAL: Pre-populate control implementation cache before any processing
2864
+ logger.info("🎯 Pre-populating control implementation cache for reliable issue linking...")
2865
+ self._populate_control_implementation_cache()
2866
+
2867
+ # Call parent's compliance data processing (assessments, etc.) but skip issue creation
2868
+ original_create_issues = self.create_issues
2869
+ self.create_issues = False # Disable base class issue creation
2870
+ super().sync_compliance() # Call the base ComplianceIntegration.sync_compliance method
2871
+ self.create_issues = original_create_issues # Restore setting
2872
+
2873
+ # CRITICAL: Copy assessments from base class cache to our cache so IssueFieldSetter can find them
2874
+ self._sync_assessment_cache_from_base_class()
2875
+
2876
+ # Now handle issue creation with consolidated logic
2877
+ if self.create_issues:
2878
+ findings = list(self.fetch_findings())
2879
+ if findings:
2880
+ self._process_consolidated_issues(findings)
2881
+
2882
+ # Provide concise summary
2883
+ issues_processed = getattr(self, "_issues_processed_count", 0)
2884
+
2885
+ if issues_processed > 0:
2886
+ # Count actual unique issues in the database for this security plan
2887
+ from regscale.models import regscale_models
2888
+
2889
+ actual_issues = len(
2890
+ regscale_models.Issue.get_all_by_parent(parent_id=self.plan_id, parent_module=self.parent_module)
2891
+ )
2892
+
2893
+ logger.info(
2894
+ f"📊 SUMMARY: Processed {issues_processed} policy violations resulting in {actual_issues} consolidated issues for failed controls for assets in RegScale"
2895
+ )
2896
+ else:
2897
+ logger.info("📊 SUMMARY: No issues processed - no failed controls with existing assets")
2898
+
2899
+ except Exception as e:
2900
+ error_and_exit(f"Error during Wiz compliance sync: {e}")
2901
+
2902
+ def _get_regscale_asset_identifier(self, compliance_item: "WizComplianceItem") -> str:
2903
+ """
2904
+ Get the appropriate RegScale asset identifier for a compliance item.
2905
+
2906
+ For Wiz integrations, the asset_identifier_field is "wizId", so we need to return
2907
+ the Wiz resource ID that will match what's stored in the RegScale Asset's wizId field.
2908
+
2909
+ :param WizComplianceItem compliance_item: Compliance item with resource information
2910
+ :return: Wiz resource ID that matches the RegScale Asset's wizId field
2911
+ :rtype: str
2912
+ """
2913
+ resource_id = getattr(compliance_item, "resource_id", "")
2914
+ resource_name = getattr(compliance_item, "resource_name", "")
2915
+
2916
+ # For Wiz policy compliance, the asset identifier should be the Wiz resource ID
2917
+ # because that's what gets stored in RegScale Asset's wizId field (asset_identifier_field = "wizId")
2918
+ if resource_id:
2919
+ return resource_id
2920
+
2921
+ # Fallback (should not normally happen since resource_id is required)
2922
+ return resource_name or "Unknown Resource"
2923
+
2924
+ def _create_consolidated_asset_identifier(self, asset_mappings: Dict[str, Dict[str, str]]) -> str:
2925
+ """
2926
+ Create a consolidated asset identifier with only asset names (one per line).
2927
+
2928
+ Format: "Asset Name 1\nAsset Name 2\nAsset Name 3"
2929
+ This format provides clean, human-readable asset names for POAMs and issues
2930
+ without cluttering them with Wiz resource IDs.
2931
+
2932
+ :param Dict[str, Dict[str, str]] asset_mappings: Map of Wiz resource IDs to asset info
2933
+ :return: Consolidated identifier string with asset names only
2934
+ :rtype: str
2935
+ """
2936
+ if not asset_mappings:
2937
+ return ""
2938
+
2939
+ # Create entries that show only asset names (one per line)
2940
+ identifier_parts = []
2941
+ # Sort by asset name for consistent ordering
2942
+ sorted_mappings = sorted(asset_mappings.items(), key=lambda x: x[1]["name"])
2943
+ for wiz_id, asset_info in sorted_mappings:
2944
+ asset_name = asset_info["name"]
2945
+ wiz_resource_id = asset_info["wiz_id"]
2946
+
2947
+ # Format: Just the asset name (no Wiz resource ID for cleaner POAMs)
2948
+ if asset_name != wiz_resource_id:
2949
+ # Asset was successfully mapped, show only the name
2950
+ identifier_part = asset_name
2951
+ else:
2952
+ # Asset lookup failed, use the Wiz resource ID as fallback
2953
+ identifier_part = wiz_resource_id
2954
+
2955
+ identifier_parts.append(identifier_part)
2956
+
2957
+ # Join with newlines for multi-asset issues
2958
+ consolidated_identifier = "\n".join(identifier_parts)
2959
+ logger.debug(
2960
+ f"Created consolidated asset identifier with {len(identifier_parts)} assets: {consolidated_identifier}"
2961
+ )
2962
+ return consolidated_identifier
2963
+
2964
+
2965
+ def resolve_framework_id(framework_input: str) -> str:
2966
+ """
2967
+ Resolve framework input to actual Wiz framework ID.
2968
+
2969
+ Supports:
2970
+ - Direct framework IDs (wf-id-4)
2971
+ - Shorthand names (nist, aws, soc2)
2972
+ - Partial matches (case insensitive)
2973
+
2974
+ :param str framework_input: User input for framework
2975
+ :return: Resolved framework ID
2976
+ :rtype: str
2977
+ :raises ValueError: If framework cannot be resolved
2978
+ """
2979
+ if not framework_input or not framework_input.strip():
2980
+ error_and_exit("Framework input cannot be empty. Use --list-frameworks to see available options.")
2981
+
2982
+ framework_input = framework_input.lower().strip()
2983
+
2984
+ # Direct framework ID
2985
+ if framework_input.startswith("wf-id-"):
2986
+ if framework_input in FRAMEWORK_MAPPINGS:
2987
+ return framework_input
2988
+ else:
2989
+ error_and_exit(f"Unknown framework ID: {framework_input}")
2990
+
2991
+ # Shorthand lookup
2992
+ if framework_input in FRAMEWORK_SHORTCUTS:
2993
+ return FRAMEWORK_SHORTCUTS[framework_input]
2994
+
2995
+ # Partial name matching
2996
+ for shorthand, framework_id in FRAMEWORK_SHORTCUTS.items():
2997
+ if framework_input in shorthand:
2998
+ return framework_id
2999
+
3000
+ # Search in full framework names (case insensitive)
3001
+ for framework_id, framework_name in FRAMEWORK_MAPPINGS.items():
3002
+ if framework_input in framework_name.lower():
3003
+ return framework_id
3004
+
3005
+ error_and_exit(f"Could not resolve framework: '{framework_input}'. Use --list-frameworks to see available options.")
3006
+
3007
+
3008
+ def list_available_frameworks() -> str:
3009
+ """
3010
+ Generate a formatted list of available frameworks.
3011
+
3012
+ :return: Formatted framework list
3013
+ :rtype: str
3014
+ """
3015
+ output = []
3016
+ output.append("🔒 Available Wiz Compliance Frameworks")
3017
+ output.append("=" * 50)
3018
+
3019
+ # Show shorthand mappings first
3020
+ output.append("\n📋 Quick Shortcuts:")
3021
+ output.append("-" * 20)
3022
+ shortcut_items = sorted(FRAMEWORK_SHORTCUTS.items())
3023
+ for shorthand, framework_id in shortcut_items[:10]: # Show first 10
3024
+ framework_name = FRAMEWORK_MAPPINGS.get(framework_id, "Unknown")
3025
+ output.append(f" {shorthand:<15} → {framework_name}")
3026
+
3027
+ if len(shortcut_items) > 10:
3028
+ output.append(f" ... and {len(shortcut_items) - 10} more shortcuts")
3029
+
3030
+ # Show frameworks by category
3031
+ output.append("\n📚 All Frameworks by Category:")
3032
+ output.append("-" * 35)
3033
+
3034
+ for category, framework_ids in FRAMEWORK_CATEGORIES.items():
3035
+ output.append(f"\n🏷️ {category}:")
3036
+ for framework_id in framework_ids:
3037
+ if framework_id in FRAMEWORK_MAPPINGS:
3038
+ framework_name = FRAMEWORK_MAPPINGS[framework_id]
3039
+ output.append(f" {framework_id:<12} → {framework_name}")
3040
+
3041
+ # Usage examples
3042
+ output.append("\n💡 Usage Examples:")
3043
+ output.append("-" * 18)
3044
+ output.append(" # Using shortcuts:")
3045
+ output.append(" regscale wiz sync-policy-compliance -f nist")
3046
+ output.append(" regscale wiz sync-policy-compliance -f aws")
3047
+ output.append(" regscale wiz sync-policy-compliance -f soc2")
3048
+ output.append("")
3049
+ output.append(" # Using full framework IDs:")
3050
+ output.append(" regscale wiz sync-policy-compliance -f wf-id-4")
3051
+ output.append(" regscale wiz sync-policy-compliance -f wf-id-197")
3052
+ output.append("")
3053
+ output.append(" # Using partial names (case insensitive):")
3054
+ output.append(" regscale wiz sync-policy-compliance -f 'nist 800-53'")
3055
+ output.append(" regscale wiz sync-policy-compliance -f kubernetes")
3056
+
3057
+ return "\n".join(output)