regscale-cli 6.17.0.0__py3-none-any.whl → 6.19.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of regscale-cli might be problematic. Click here for more details.

Files changed (48) hide show
  1. regscale/__init__.py +1 -1
  2. regscale/core/app/api.py +5 -0
  3. regscale/core/login.py +3 -0
  4. regscale/integrations/api_paginator.py +932 -0
  5. regscale/integrations/api_paginator_example.py +348 -0
  6. regscale/integrations/commercial/__init__.py +11 -10
  7. regscale/integrations/commercial/burp.py +4 -0
  8. regscale/integrations/commercial/{qualys.py → qualys/__init__.py} +756 -105
  9. regscale/integrations/commercial/qualys/scanner.py +1051 -0
  10. regscale/integrations/commercial/qualys/variables.py +21 -0
  11. regscale/integrations/commercial/sicura/api.py +1 -0
  12. regscale/integrations/commercial/stigv2/click_commands.py +36 -8
  13. regscale/integrations/commercial/stigv2/stig_integration.py +63 -9
  14. regscale/integrations/commercial/tenablev2/__init__.py +9 -0
  15. regscale/integrations/commercial/tenablev2/authenticate.py +23 -2
  16. regscale/integrations/commercial/tenablev2/commands.py +779 -0
  17. regscale/integrations/commercial/tenablev2/jsonl_scanner.py +1999 -0
  18. regscale/integrations/commercial/tenablev2/sc_scanner.py +600 -0
  19. regscale/integrations/commercial/tenablev2/scanner.py +7 -5
  20. regscale/integrations/commercial/tenablev2/utils.py +21 -4
  21. regscale/integrations/commercial/tenablev2/variables.py +4 -0
  22. regscale/integrations/jsonl_scanner_integration.py +523 -142
  23. regscale/integrations/scanner_integration.py +102 -26
  24. regscale/integrations/transformer/__init__.py +17 -0
  25. regscale/integrations/transformer/data_transformer.py +445 -0
  26. regscale/integrations/transformer/mappings/__init__.py +8 -0
  27. regscale/integrations/variables.py +2 -0
  28. regscale/models/__init__.py +5 -2
  29. regscale/models/integration_models/cisa_kev_data.json +63 -7
  30. regscale/models/integration_models/synqly_models/capabilities.json +1 -1
  31. regscale/models/regscale_models/asset.py +5 -2
  32. regscale/models/regscale_models/file.py +5 -2
  33. regscale/regscale.py +3 -1
  34. {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/METADATA +1 -1
  35. {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/RECORD +47 -31
  36. tests/regscale/core/test_version.py +22 -0
  37. tests/regscale/integrations/__init__.py +0 -0
  38. tests/regscale/integrations/test_api_paginator.py +597 -0
  39. tests/regscale/integrations/test_integration_mapping.py +60 -0
  40. tests/regscale/integrations/test_issue_creation.py +317 -0
  41. tests/regscale/integrations/test_issue_due_date.py +46 -0
  42. tests/regscale/integrations/transformer/__init__.py +0 -0
  43. tests/regscale/integrations/transformer/test_data_transformer.py +850 -0
  44. regscale/integrations/commercial/tenablev2/click.py +0 -1637
  45. {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/LICENSE +0 -0
  46. {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/WHEEL +0 -0
  47. {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/entry_points.txt +0 -0
  48. {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1999 @@
1
+ """
2
+ Integration class for Tenable SC vulnerability scanning using JSONLScannerIntegration.
3
+
4
+ This module provides a direct implementation of JSONLScannerIntegration for Tenable SC,
5
+ optimized for processing large volumes of scan data.
6
+ """
7
+
8
+ import dataclasses
9
+ import inspect
10
+ import json
11
+ import logging
12
+ import os
13
+ import re
14
+ import sys
15
+ import tempfile
16
+ from datetime import datetime
17
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
18
+
19
+ from pathlib import Path
20
+
21
+ from regscale.core.app.utils.app_utils import epoch_to_datetime, get_current_datetime
22
+ from regscale.core.app.utils.file_utils import find_files
23
+ from regscale.exceptions.validation_exception import ValidationException
24
+ from regscale.integrations.commercial.tenablev2.authenticate import gen_tsc
25
+ from regscale.integrations.commercial.tenablev2.utils import get_filtered_severities
26
+ from regscale.integrations.commercial.tenablev2.variables import TenableVariables
27
+ from regscale.integrations.integration_override import IntegrationOverride
28
+ from regscale.integrations.jsonl_scanner_integration import JSONLScannerIntegration
29
+ from regscale.integrations.scanner_integration import IntegrationAsset, IntegrationFinding, issue_due_date
30
+ from regscale.integrations.transformer.data_transformer import DataTransformer
31
+ from regscale.integrations.variables import ScannerVariables
32
+ from regscale.models import regscale_models, AssetStatus, AssetType
33
+ from regscale.models.integration_models.tenable_models.models import TenableAsset
34
+
35
+ logger = logging.getLogger("regscale")
36
+
37
+ FILE_TYPE = ".jsonl"
38
+ UNKNOWN_PLUGIN = "Unknown Plugin"
39
+
40
+
41
+ class TenableSCJsonlScanner(JSONLScannerIntegration):
42
+ """
43
+ Integration class for Tenable SC vulnerability scanning using JSONLScannerIntegration.
44
+
45
+ This class provides functionality for processing Tenable SC data files and
46
+ syncing assets and findings to RegScale.
47
+ """
48
+
49
+ # Class attributes - customized for Tenable SC
50
+ title: str = "Tenable SC Vulnerability Scanner"
51
+ asset_identifier_field: str = "tenableId"
52
+
53
+ # Custom file paths for Tenable SC data
54
+ ASSETS_FILE = "./artifacts/tenable_sc_assets.jsonl"
55
+ FINDINGS_FILE = "./artifacts/tenable_sc_findings.jsonl"
56
+ file_pattern = "sc_*.*" # Match both JSON and JSONL files
57
+
58
+ # Severity mapping dictionary
59
+ finding_severity_map = {
60
+ "5": regscale_models.IssueSeverity.Critical, # Critical
61
+ "4": regscale_models.IssueSeverity.High, # High
62
+ "3": regscale_models.IssueSeverity.Moderate, # Medium
63
+ "2": regscale_models.IssueSeverity.Low, # Low
64
+ "1": regscale_models.IssueSeverity.Low, # Info
65
+ "0": regscale_models.IssueSeverity.Low, # None
66
+ "Info": regscale_models.IssueSeverity.NotAssigned,
67
+ "Low": regscale_models.IssueSeverity.Low,
68
+ "Medium": regscale_models.IssueSeverity.Moderate,
69
+ "High": regscale_models.IssueSeverity.High,
70
+ "Critical": regscale_models.IssueSeverity.Critical,
71
+ }
72
+
73
+ def __init__(
74
+ self,
75
+ plan_id: int,
76
+ tenant_id: int = 1,
77
+ scan_date: datetime = None,
78
+ query_id: int = None,
79
+ batch_size: int = None,
80
+ optimize_memory: bool = True,
81
+ force_download: bool = False,
82
+ **kwargs,
83
+ ):
84
+ """
85
+ Initialize the Tenable SC JSONLScannerIntegration.
86
+
87
+ :param int plan_id: The ID of the security plan
88
+ :param int tenant_id: The ID of the tenant, defaults to 1
89
+ :param datetime scan_date: The date of the scan, defaults to None
90
+ :param int query_id: The ID of the query to use, defaults to None
91
+ :param int batch_size: Batch size for API requests, defaults to 1000
92
+ :param bool optimize_memory: Whether to optimize memory usage, defaults to True
93
+ :param bool force_download: Whether to force download data from Tenable SC, defaults to False
94
+ """
95
+ # Set specific file pattern for Tenable SC files
96
+ kwargs["file_pattern"] = self.file_pattern
97
+ kwargs["read_files_only"] = True
98
+ # Pass scan_date through kwargs to parent class
99
+ if scan_date:
100
+ kwargs["scan_date"] = scan_date
101
+
102
+ super().__init__(plan_id=plan_id, tenant_id=tenant_id, **kwargs)
103
+
104
+ self.query_id = query_id
105
+ self.batch_size = batch_size or 1000
106
+ self.optimize_memory = optimize_memory
107
+ self.force_download = force_download
108
+ self.auth_token = None
109
+ self.base_url = None
110
+ self.username = None
111
+ self.password = None
112
+ self.verify_ssl = None
113
+ self.client = None
114
+ self.scan_date = scan_date or get_current_datetime()
115
+ self.closed_count = 0
116
+ self.app = kwargs.get("app")
117
+ self.temp_dir = None
118
+
119
+ def authenticate(self) -> bool:
120
+ """
121
+ Authenticate to Tenable SC.
122
+
123
+ :return: True if authentication was successful, False otherwise
124
+ :rtype: bool
125
+ """
126
+ try:
127
+ # Log Tenable URL and other settings
128
+ logger.info(f"Authenticating to Tenable SC with URL: {TenableVariables.tenableUrl}")
129
+ logger.info(f"Batch size: {self.batch_size}")
130
+
131
+ # Log other relevant connection settings
132
+ ssl_verify = getattr(ScannerVariables, "sslVerify", True)
133
+ logger.info(f"Using SSL verification: {ssl_verify}")
134
+
135
+ # Initialize the Tenable SC client
136
+ self.client = gen_tsc()
137
+
138
+ # Test authentication by making a simple API call
139
+ if self.client:
140
+ # Try a simple API call to verify connection
141
+ try:
142
+ # Get Tenable SC version to verify connection - fixed to use the proper API
143
+ status = self.client.status.status() # Use status.status() instead of status()
144
+ version = status.get("version", "unknown")
145
+ logger.info(f"Successfully authenticated to Tenable SC (version: {version})")
146
+
147
+ # Set client timeout for large queries if supported
148
+ if hasattr(self.client, "timeout"):
149
+ logger.info("Setting increased timeout for large queries")
150
+ self.client.timeout = 300 # 5 minutes
151
+
152
+ return True
153
+ except Exception as e:
154
+ logger.error(f"Authentication successful but API test failed: {str(e)}", exc_info=True)
155
+ # Still return True as we authenticated, even if the test call failed
156
+ return True
157
+ else:
158
+ logger.error("Failed to create Tenable SC client")
159
+ return False
160
+ except Exception as e:
161
+ logger.error(f"Error authenticating to Tenable SC: {str(e)}", exc_info=True)
162
+ return False
163
+
164
+ def find_valid_files(self, path: Union[Path, str]) -> Iterator[Tuple[Union[Path, str], Dict[str, Any]]]:
165
+ """
166
+ Find all valid Tenable SC data files in the given path.
167
+
168
+ :param Union[Path, str] path: Path to search for files
169
+ :return: Iterator of (file_path, data) tuples
170
+ :rtype: Iterator[Tuple[Union[Path, str], Dict[str, Any]]]
171
+ """
172
+ if not path or path == "":
173
+ # If no specific path provided, search artifacts directory
174
+ path = self.create_artifacts_dir()
175
+
176
+ # Add debug logging
177
+ logger.info(f"Looking for files in path: {path}")
178
+
179
+ # Add support for JSONL files
180
+ jsonl_pattern = "sc_*.jsonl"
181
+
182
+ # Find both JSON and JSONL files
183
+ found_files = 0
184
+
185
+ # First yield regular JSON files using parent implementation
186
+ for file_data in super().find_valid_files(path):
187
+ found_files += 1
188
+ if isinstance(file_data, tuple) and len(file_data) >= 2:
189
+ file_path = file_data[0]
190
+ logger.info(f"Found valid file: {file_path}")
191
+
192
+ # Check if it's an assets file and log details
193
+ str_path = str(file_path)
194
+ if "sc_assets" in str_path:
195
+ data = file_data[1]
196
+ if data and isinstance(data, dict):
197
+ assets = data.get("response", {}).get("usable", [])
198
+ logger.info(f"Assets file contains {len(assets)} assets")
199
+ yield file_data
200
+
201
+ # Now look for JSONL files
202
+ for file_path in find_files(path, jsonl_pattern):
203
+ found_files += 1
204
+ logger.info(f"Found JSONL file: {file_path}")
205
+
206
+ # For JSONL files, create an empty dict as placeholder
207
+ # Actual processing is handled in _process_jsonl_findings
208
+ yield file_path, {}
209
+
210
+ logger.info(f"Total valid files found: {found_files}")
211
+
212
+ def is_valid_file(self, data: Any, file_path: Union[Path, str]) -> Tuple[bool, Optional[Dict[str, Any]]]:
213
+ """
214
+ Validate Tenable SC data file structure.
215
+
216
+ :param Any data: Data from the file
217
+ :param Union[Path, str] file_path: Path to the file
218
+ :return: Tuple of (is_valid, validated_data)
219
+ :rtype: Tuple[bool, Optional[Dict[str, Any]]]
220
+ """
221
+ # Handle JSONL files separately
222
+ str_path = str(file_path)
223
+ if str_path.endswith(FILE_TYPE):
224
+ logger.info(f"Validating JSONL file: {file_path}")
225
+ # For JSONL files, we just verify the file exists and is readable
226
+ if os.path.exists(str_path) and os.path.getsize(str_path) > 0:
227
+ return True, {}
228
+ return False, None
229
+
230
+ # First use parent validation to ensure it's a non-empty dict
231
+ is_valid, data = super().is_valid_file(data, file_path)
232
+ if not is_valid or not data:
233
+ return False, None
234
+
235
+ # Now check for Tenable SC specific structures
236
+ if "sc_assets" in str_path:
237
+ if "response" not in data or "usable" not in data.get("response", {}):
238
+ logger.warning(f"Invalid Tenable SC assets file format: {file_path}")
239
+ return False, None
240
+ return True, data
241
+
242
+ # Validate vulnerabilities file
243
+ if "sc_vulns" in str_path:
244
+ if "response" not in data or "results" not in data.get("response", {}):
245
+ logger.warning(f"Invalid Tenable SC vulnerabilities file format: {file_path}")
246
+ return False, None
247
+ return True, data
248
+
249
+ # File doesn't match our expected patterns
250
+ logger.warning(f"File doesn't appear to be a Tenable SC data file: {file_path}")
251
+ return False, None
252
+
253
+ def parse_asset(self, file_path: Union[Path, str], data: Dict[str, Any]) -> IntegrationAsset:
254
+ """
255
+ Parse a Tenable SC asset from source data.
256
+
257
+ :param Union[Path, str] file_path: Path to the file
258
+ :param Dict[str, Any] data: Parsed data
259
+ :return: IntegrationAsset object
260
+ :rtype: IntegrationAsset
261
+ """
262
+ if not data:
263
+ logger.warning("Empty data provided to parse_asset")
264
+ # Return a minimal valid asset to avoid NoneType errors
265
+ return IntegrationAsset(
266
+ identifier="unknown",
267
+ name="Unknown Asset",
268
+ ip_address="",
269
+ status=AssetStatus.Active,
270
+ asset_type="Other",
271
+ asset_category="Software",
272
+ parent_id=self.plan_id,
273
+ parent_module=regscale_models.SecurityPlan.get_module_slug(),
274
+ )
275
+
276
+ try:
277
+ # Attempt to convert to TenableAsset object if from vulnerability data
278
+ if "response" in data and "results" in data.get("response", {}):
279
+ results = data.get("response", {}).get("results", [])
280
+ if results:
281
+ try:
282
+ vuln_data = TenableAsset(**results[0])
283
+ return self.to_integration_asset(
284
+ vuln_data, app=self.app, override=IntegrationOverride(self.app)
285
+ )
286
+ except Exception as e:
287
+ logger.warning(f"Could not parse vulnerability as TenableAsset: {str(e)}")
288
+ # Continue to parse as a basic asset instead of returning None
289
+
290
+ # If we reach here, it's either an assets file or no valid data was found
291
+ return self._parse_asset_from_assets_file(data)
292
+ except Exception as e:
293
+ logger.error(f"Error parsing Tenable SC asset: {str(e)}", exc_info=True)
294
+ # Return a minimal valid asset to avoid NoneType errors
295
+ return IntegrationAsset(
296
+ identifier="error",
297
+ name=f"Error Asset ({str(file_path)})",
298
+ ip_address="",
299
+ status=AssetStatus.Active,
300
+ asset_type="Other",
301
+ asset_category="Software",
302
+ parent_id=self.plan_id,
303
+ parent_module=regscale_models.SecurityPlan.get_module_slug(),
304
+ )
305
+
306
+ def _parse_asset_from_assets_file(self, data: Dict[str, Any]) -> IntegrationAsset:
307
+ """
308
+ Parse asset from a Tenable SC assets file.
309
+
310
+ :param Dict[str, Any] data: Assets file data
311
+ :return: IntegrationAsset object
312
+ :rtype: IntegrationAsset
313
+ """
314
+ # Get first asset from usable list
315
+ assets = data.get("response", {}).get("usable", [])
316
+ if not assets:
317
+ logger.warning("No assets found in Tenable SC assets file")
318
+ return IntegrationAsset(
319
+ identifier="unknown",
320
+ name="Unknown Asset",
321
+ ip_address="",
322
+ status=AssetStatus.ACTIVE,
323
+ asset_type="Other",
324
+ asset_category="Software",
325
+ parent_id=self.plan_id,
326
+ parent_module=regscale_models.SecurityPlan.get_module_slug(),
327
+ )
328
+
329
+ asset = assets[0]
330
+
331
+ # Extract asset data
332
+ asset_id = asset.get("id", "")
333
+ asset_name = asset.get("name", "")
334
+
335
+ if not asset_id:
336
+ logger.warning("Asset is missing ID, using default ID")
337
+ asset_id = "missing_id"
338
+
339
+ # Extract IP information if available
340
+ ip_info = ""
341
+ definition = asset.get("definition", "")
342
+ if "ip=" in definition:
343
+ ip_parts = definition.split("ip=")[1].split("&")[0]
344
+ ip_info = ip_parts.replace("%3B", ";")
345
+
346
+ # Use IP address as the identifier for consistency
347
+ # If we can extract an IP from the definition, use that as the identifier
348
+ identifier = ip_info if ip_info else asset_id
349
+
350
+ return IntegrationAsset(
351
+ identifier=identifier,
352
+ name=asset_name or f"Asset {asset_id}",
353
+ ip_address=ip_info,
354
+ parent_id=self.plan_id,
355
+ parent_module=regscale_models.SecurityPlan.get_module_slug(),
356
+ asset_owner_id=ScannerVariables.userId,
357
+ asset_category=regscale_models.AssetCategory.Hardware,
358
+ asset_type=regscale_models.AssetType.Other,
359
+ status=AssetStatus.ACTIVE,
360
+ date_last_updated=get_current_datetime(),
361
+ )
362
+
363
+ def to_integration_asset(self, asset: TenableAsset, **kwargs: dict) -> IntegrationAsset:
364
+ """Converts a TenableAsset object to an IntegrationAsset object
365
+
366
+ :param TenableAsset asset: The Tenable SC asset
367
+ :param dict **kwargs: Additional keyword arguments
368
+ :return: An IntegrationAsset object
369
+ :rtype: IntegrationAsset
370
+ """
371
+ override = kwargs.get("override")
372
+
373
+ validated_match = None
374
+ if override:
375
+ validated_match = override.field_map_validation(obj=asset, model_type="asset")
376
+
377
+ # Use IP as the primary identifier for consistency between assets and findings
378
+ asset_identifier = asset.ip
379
+ # If no IP, fall back to other identifiers
380
+ if not asset_identifier:
381
+ asset_identifier = validated_match or asset.dnsName or asset.dns or "unknown"
382
+
383
+ name = asset.dnsName or asset.ip
384
+
385
+ return IntegrationAsset(
386
+ name=name,
387
+ identifier=asset_identifier,
388
+ ip_address=asset.ip,
389
+ mac_address=asset.macAddress,
390
+ asset_owner_id=ScannerVariables.userId,
391
+ status=(
392
+ AssetStatus.ACTIVE
393
+ if getattr(asset, "family", None) and getattr(asset.family, "type", None)
394
+ else AssetStatus.Inactive
395
+ ),
396
+ asset_type=AssetType.Other,
397
+ asset_category="Hardware",
398
+ )
399
+
400
+ def parse_finding(
401
+ self, asset_identifier: str, data: Dict[str, Any], item: Dict[str, Any]
402
+ ) -> Optional[IntegrationFinding]:
403
+ """
404
+ Parse a finding from a Tenable SC vulnerability.
405
+
406
+ :param str asset_identifier: Asset identifier
407
+ :param Dict[str, Any] data: Asset data
408
+ :param Dict[str, Any] item: Finding data (vulnerability)
409
+ :return: IntegrationFinding object
410
+ :rtype: Optional[IntegrationFinding]
411
+ """
412
+ if not item:
413
+ return None
414
+
415
+ try:
416
+ # Try to convert to TenableAsset for consistent processing
417
+ try:
418
+ vuln = TenableAsset(**item)
419
+ except Exception as e:
420
+ logger.warning(f"Could not create TenableAsset from finding data: {str(e)}")
421
+ # Get the IP from the vulnerability item directly rather than using passed asset_identifier
422
+ finding_asset_id = item.get("ip", asset_identifier)
423
+
424
+ # Create a minimal finding since TenableAsset creation failed
425
+ return IntegrationFinding(
426
+ control_labels=[], # Add an empty list for control_labels
427
+ title=item.get("pluginName", "Unknown Finding"),
428
+ description=item.get("description", "No description available"),
429
+ severity=regscale_models.IssueSeverity.Low,
430
+ status=regscale_models.IssueStatus.Open,
431
+ asset_identifier=finding_asset_id, # Use the IP from the finding
432
+ category="Vulnerability",
433
+ scan_date=self.scan_date,
434
+ plugin_name=item.get("pluginName", UNKNOWN_PLUGIN),
435
+ )
436
+
437
+ # Use the integration_mapping if available
438
+ integration_mapping = IntegrationOverride(self.app) if self.app else None
439
+
440
+ # Process findings similar to SC scanner
441
+ findings = self.parse_findings(vuln, integration_mapping)
442
+
443
+ if findings:
444
+ return findings[0] # Return the first finding
445
+
446
+ # If no findings were created, return a basic finding
447
+ # Get the IP from the vulnerability directly rather than using passed asset_identifier
448
+ finding_asset_id = vuln.ip or asset_identifier
449
+
450
+ return IntegrationFinding(
451
+ title=item.get("pluginName", "Unknown Finding"),
452
+ description=item.get("description", "No description available"),
453
+ severity=regscale_models.IssueSeverity.Low,
454
+ status=regscale_models.IssueStatus.Open,
455
+ asset_identifier=finding_asset_id, # Use the IP from the finding
456
+ category="Vulnerability",
457
+ scan_date=self.scan_date,
458
+ plugin_name=item.get("pluginName", UNKNOWN_PLUGIN),
459
+ )
460
+
461
+ except Exception as e:
462
+ logger.error(f"Error parsing Tenable SC finding: {str(e)}", exc_info=True)
463
+ # Return a minimal finding on error
464
+ return IntegrationFinding(
465
+ control_labels=[], # Add an empty list for control_labels
466
+ title="Error Finding",
467
+ description=f"Error parsing finding: {str(e)}",
468
+ severity=regscale_models.IssueSeverity.Low,
469
+ status=regscale_models.IssueStatus.Open,
470
+ asset_identifier=asset_identifier,
471
+ category="Vulnerability",
472
+ scan_date=self.scan_date,
473
+ plugin_name=UNKNOWN_PLUGIN,
474
+ )
475
+
476
+ def parse_findings(self, vuln: TenableAsset, integration_mapping: Any) -> List[IntegrationFinding]:
477
+ """
478
+ Parses a TenableAsset into an IntegrationFinding object
479
+
480
+ :param TenableAsset vuln: The Tenable SC finding
481
+ :param Any integration_mapping: The IntegrationMapping object
482
+ :return: A list of IntegrationFinding objects
483
+ :rtype: List[IntegrationFinding]
484
+ """
485
+ findings = []
486
+ try:
487
+ severity = self.finding_severity_map.get(vuln.severity.name, regscale_models.IssueSeverity.Low)
488
+ cve_set = set(vuln.cve.split(",")) if vuln.cve else set()
489
+ if severity in get_filtered_severities():
490
+ if cve_set:
491
+ for cve in cve_set:
492
+ findings.append(
493
+ self._create_finding(vuln=vuln, cve=cve, integration_mapping=integration_mapping)
494
+ )
495
+ else:
496
+ findings.append(self._create_finding(vuln=vuln, cve="", integration_mapping=integration_mapping))
497
+ except (KeyError, TypeError, ValueError) as e:
498
+ logger.error("Error parsing Tenable SC finding: %s", str(e), exc_info=True)
499
+
500
+ return findings
501
+
502
+ def _create_finding(
503
+ self, vuln: TenableAsset, cve: str, integration_mapping: IntegrationOverride
504
+ ) -> IntegrationFinding:
505
+ """
506
+ Helper method to create an IntegrationFinding object
507
+
508
+ :param TenableAsset vuln: The Tenable SC finding
509
+ :param str cve: The CVE identifier
510
+ :param IntegrationOverride integration_mapping: The IntegrationMapping object
511
+ :return: An IntegrationFinding object
512
+ :rtype: IntegrationFinding
513
+ """
514
+
515
+ # Extract helper method to simplify the main method
516
+ def getter(field_name: str) -> Optional[str]:
517
+ """
518
+ Helper method to get the field value from the integration mapping
519
+
520
+ :param str field_name: The field name to get the value for
521
+ :return: The field value
522
+ :rtype: Optional[str]
523
+ """
524
+ if integration_mapping and (val := integration_mapping.load("tenable_sc", field_name)):
525
+ return getattr(vuln, val, None)
526
+ return None
527
+
528
+ # Get asset identifier
529
+ asset_identifier = self._get_asset_identifier(vuln, integration_mapping)
530
+
531
+ # Get CVSS scores
532
+ cvss_scores = self.get_cvss_scores(vuln)
533
+
534
+ # Map severity
535
+ severity = self.finding_severity_map.get(vuln.severity.name, regscale_models.IssueSeverity.Low)
536
+
537
+ # Extract version information
538
+ installed_versions_str, fixed_versions_str, package_path_str = self._extract_version_info(vuln)
539
+
540
+ # Handle dates
541
+ first_seen = epoch_to_datetime(vuln.firstSeen) if vuln.firstSeen else self.scan_date
542
+ last_seen = epoch_to_datetime(vuln.lastSeen) if vuln.lastSeen else self.scan_date
543
+
544
+ # Create finding title
545
+ title = self._create_finding_title(vuln, cve, getter)
546
+
547
+ # Create and return the finding
548
+ return IntegrationFinding(
549
+ control_labels=[], # Add an empty list for control_labels
550
+ category="Tenable SC Vulnerability", # Add a default category
551
+ dns=vuln.dnsName,
552
+ title=title,
553
+ description=getter("description") or (vuln.description or vuln.pluginInfo),
554
+ severity=severity,
555
+ status=regscale_models.IssueStatus.Open, # Findings of > Low are considered as FAIL
556
+ asset_identifier=asset_identifier,
557
+ external_id=vuln.pluginID, # Weakness Source Identifier
558
+ first_seen=first_seen,
559
+ last_seen=last_seen,
560
+ date_created=first_seen,
561
+ date_last_updated=last_seen,
562
+ recommendation_for_mitigation=vuln.solution,
563
+ cve=cve,
564
+ cvss_v3_score=cvss_scores.get("cvss_v3_base_score", 0.0),
565
+ cvss_score=cvss_scores.get("cvss_v3_base_score", 0.0),
566
+ cvss_v3_vector=vuln.cvssV3Vector,
567
+ cvss_v2_score=cvss_scores.get("cvss_v2_base_score", 0.0),
568
+ cvss_v2_vector=vuln.cvssVector,
569
+ vpr_score=float(vuln.vprScore) if vuln.vprScore else None,
570
+ comments=vuln.cvssV3Vector,
571
+ plugin_id=vuln.pluginID,
572
+ plugin_name=vuln.pluginName,
573
+ rule_id=vuln.pluginID,
574
+ rule_version=vuln.pluginName,
575
+ basis_for_adjustment="Tenable SC import",
576
+ vulnerability_type="Tenable SC Vulnerability",
577
+ vulnerable_asset=vuln.dnsName,
578
+ build_version="",
579
+ affected_os=vuln.operatingSystem,
580
+ affected_packages=vuln.pluginName,
581
+ package_path=package_path_str,
582
+ installed_versions=installed_versions_str,
583
+ fixed_versions=fixed_versions_str,
584
+ fix_status="",
585
+ scan_date=self.scan_date,
586
+ due_date=issue_due_date(
587
+ severity=severity, created_date=first_seen, title="tenable", config=self.app.config if self.app else {}
588
+ ),
589
+ )
590
+
591
+ def _get_asset_identifier(self, vuln: TenableAsset, integration_mapping: IntegrationOverride) -> str:
592
+ """
593
+ Extract asset identifier from vulnerability data
594
+
595
+ :param TenableAsset vuln: The Tenable SC finding
596
+ :param IntegrationOverride integration_mapping: The IntegrationMapping object
597
+ :return: Asset identifier
598
+ :rtype: str
599
+ """
600
+ validated_match = None
601
+ if integration_mapping:
602
+ validated_match = integration_mapping.field_map_validation(obj=vuln, model_type="asset")
603
+
604
+ # Use IP as the primary identifier for consistency between assets and findings
605
+ if vuln.ip:
606
+ return vuln.ip
607
+
608
+ # If no IP, fall back to other identifiers
609
+ return validated_match or vuln.dnsName or vuln.dns or "unknown"
610
+
611
+ def _extract_version_info(self, vuln: TenableAsset) -> Tuple[str, str, str]:
612
+ """
613
+ Extract version information from vulnerability plugin text
614
+
615
+ :param TenableAsset vuln: The Tenable SC finding
616
+ :return: Tuple of (installed_versions, fixed_versions, package_path)
617
+ :rtype: Tuple[str, str, str]
618
+ """
619
+ installed_versions_str = ""
620
+ fixed_versions_str = ""
621
+ package_path_str = ""
622
+
623
+ if not hasattr(vuln, "pluginText"):
624
+ return installed_versions_str, fixed_versions_str, package_path_str
625
+
626
+ plugin_text = vuln.pluginText
627
+
628
+ # Extract installed package information
629
+ if "Installed package" in plugin_text:
630
+ installed_versions = re.findall(r"Installed package\s*:\s*(\S+)", plugin_text)
631
+ installed_versions_str = ", ".join(installed_versions)
632
+ elif "Installed version" in plugin_text:
633
+ installed_versions = re.findall(r"Installed version\s*:\s*(.+)", plugin_text)
634
+ installed_versions_str = ", ".join(installed_versions)
635
+
636
+ # Extract fixed package information
637
+ if "Fixed package" in plugin_text:
638
+ fixed_versions = re.findall(r"Fixed package\s*:\s*(\S+)", plugin_text)
639
+ fixed_versions_str = ", ".join(fixed_versions)
640
+ elif "Fixed version" in plugin_text:
641
+ fixed_versions = re.findall(r"Fixed version\s*:\s*(.+)", plugin_text)
642
+ fixed_versions_str = ", ".join(fixed_versions)
643
+
644
+ # Extract package path
645
+ if "Path" in plugin_text:
646
+ package_path = re.findall(r"Path\s*:\s*(\S+)", plugin_text)
647
+ package_path_str = ", ".join(package_path)
648
+
649
+ return installed_versions_str, fixed_versions_str, package_path_str
650
+
651
+ def _create_finding_title(self, vuln: TenableAsset, cve: str, getter_func) -> str:
652
+ """
653
+ Create a title for the finding
654
+
655
+ :param TenableAsset vuln: The Tenable SC finding
656
+ :param str cve: The CVE identifier
657
+ :param callable getter_func: Function to get mapped fields
658
+ :return: Finding title
659
+ :rtype: str
660
+ """
661
+ # First try to get title from mapping
662
+ title = getter_func("title")
663
+ if title:
664
+ return title
665
+
666
+ # Fall back to constructing title from CVE and synopsis
667
+ if cve:
668
+ return f"{cve}: {vuln.synopsis}"
669
+
670
+ # Last resort: use synopsis or plugin name
671
+ return vuln.synopsis or vuln.pluginName
672
+
673
+ def get_cvss_scores(self, vuln: TenableAsset) -> dict:
674
+ """
675
+ Returns the CVSS score for the finding
676
+
677
+ :param TenableAsset vuln: The Tenable SC finding
678
+ :return: The CVSS score
679
+ :rtype: float
680
+ """
681
+ res = {}
682
+ try:
683
+ res["cvss_v3_base_score"] = float(vuln.cvssV3BaseScore) if vuln.cvssV3BaseScore else 0.0
684
+ res["cvss_v2_base_score"] = float(vuln.baseScore) if vuln.baseScore else 0.0
685
+ except (ValueError, TypeError):
686
+ res["cvss_v3_base_score"] = 0.0
687
+ res["cvss_v2_base_score"] = 0.0
688
+
689
+ return res
690
+
691
+ def process_source_files(self, file_paths: List[str], assets_output_file: str, findings_output_file: str) -> None:
692
+ """
693
+ Process source files to extract assets and findings.
694
+
695
+ :param List[str] file_paths: List of file paths to process
696
+ :param str assets_output_file: Path to write assets to
697
+ :param str findings_output_file: Path to write findings to
698
+ """
699
+ # Ensure output directories exist
700
+ os.makedirs(os.path.dirname(assets_output_file), exist_ok=True)
701
+ os.makedirs(os.path.dirname(findings_output_file), exist_ok=True)
702
+
703
+ # Prepare output files
704
+ asset_info = self._prepare_output_file(assets_output_file, True, "asset")
705
+ finding_info = self._prepare_output_file(findings_output_file, True, "finding")
706
+
707
+ # Process each file
708
+ for file_path in file_paths:
709
+ file_path_str = str(file_path)
710
+ logger.info(f"Processing file: {file_path_str}")
711
+
712
+ try:
713
+ # Read and parse the file
714
+ with open(file_path_str, "r") as f:
715
+ data = json.load(f)
716
+
717
+ # Validate the file
718
+ is_valid, validated_data = self.is_valid_file(data, file_path_str)
719
+ if not is_valid or validated_data is None:
720
+ logger.warning(f"Invalid file: {file_path_str}")
721
+ continue
722
+
723
+ # Process assets or findings based on file path
724
+ if "sc_assets" in file_path_str:
725
+ # Process assets file
726
+ with open(assets_output_file, asset_info.get("mode", "w")) as output_f:
727
+ self._process_asset_file(
728
+ file_path_str, validated_data, output_f, asset_info.get("existing_items", {})
729
+ )
730
+ # Use append mode for subsequent files
731
+ asset_info["mode"] = "a"
732
+
733
+ elif "sc_vulns" in file_path_str:
734
+ # Process findings file
735
+ with open(findings_output_file, finding_info.get("mode", "w")) as output_f:
736
+ self._process_finding_file(
737
+ file_path_str, validated_data, output_f, finding_info.get("existing_items", {})
738
+ )
739
+ # Use append mode for subsequent files
740
+ finding_info["mode"] = "a"
741
+
742
+ except Exception as e:
743
+ logger.error(f"Error processing file {file_path_str}: {str(e)}", exc_info=True)
744
+
745
+ def _download_sc_data(self, output_dir: str) -> List[str]:
746
+ """
747
+ Download Tenable SC data using the SC client and save to files.
748
+
749
+ This method fetches vulnerabilities from Tenable SC API
750
+ using the Tenable SC client library for proper authentication and
751
+ API access. Assets are derived from the vulnerability data.
752
+
753
+ :param str output_dir: Directory to save the files to
754
+ :return: List of file paths that were created
755
+ :rtype: List[str]
756
+ """
757
+ logger.info("Downloading Tenable SC data...")
758
+ files_created = []
759
+
760
+ try:
761
+ # Ensure authentication and directory setup
762
+ if not self._initialize_client_and_directory(output_dir):
763
+ return files_created
764
+
765
+ # Define output files
766
+ vulns_file = os.path.join(output_dir, "sc_vulns.json")
767
+
768
+ # Fetch vulnerabilities if query_id is available
769
+ files_created = self._fetch_vulnerabilities(vulns_file, files_created)
770
+
771
+ # Create assets file if needed
772
+ assets_file = os.path.join(output_dir, "sc_assets.json")
773
+ if not os.path.exists(assets_file):
774
+ self._create_assets_file_from_vulns(assets_file, vulns_file, files_created)
775
+
776
+ except Exception as e:
777
+ logger.error(f"Error downloading Tenable SC data: {str(e)}", exc_info=True)
778
+
779
+ return files_created
780
+
781
+ def _initialize_client_and_directory(self, output_dir: str) -> bool:
782
+ """
783
+ Initialize the client and ensure the output directory exists
784
+
785
+ :param str output_dir: Directory to save files to
786
+ :return: True if initialization successful, False otherwise
787
+ :rtype: bool
788
+ """
789
+ # Ensure client is initialized
790
+ if not self.client:
791
+ logger.info("Authenticating to Tenable SC...")
792
+ if not self.authenticate():
793
+ logger.error("Failed to authenticate to Tenable SC")
794
+ return False
795
+
796
+ # Ensure output directory exists
797
+ os.makedirs(output_dir, exist_ok=True)
798
+ logger.info(f"Output directory: {output_dir}")
799
+ return True
800
+
801
+ def _fetch_vulnerabilities(self, vulns_file: str, files_created: List[str]) -> List[str]:
802
+ """
803
+ Fetch vulnerabilities from Tenable SC
804
+
805
+ :param str vulns_file: Path to save vulnerabilities to
806
+ :param List[str] files_created: Current list of created files
807
+ :return: Updated list of created files
808
+ :rtype: List[str]
809
+ """
810
+ # Make a copy of the list to avoid modifying the original
811
+ updated_files = files_created.copy()
812
+
813
+ if not self.query_id:
814
+ logger.warning("No query_id provided, skipping vulnerability download")
815
+ return updated_files
816
+
817
+ logger.info(f"Fetching vulnerabilities using query ID: {self.query_id}")
818
+ vulns_count = self._fetch_vulns_with_client(vulns_file, self.query_id)
819
+
820
+ if vulns_count > 0:
821
+ updated_files.append(vulns_file)
822
+ logger.info(f"Successfully downloaded {vulns_count} vulnerabilities to {vulns_file}")
823
+ else:
824
+ # Create an empty file to avoid errors later
825
+ self._create_empty_vulns_file(vulns_file, updated_files)
826
+
827
+ return updated_files
828
+
829
+ def _create_empty_vulns_file(self, vulns_file: str, files_created: List[str]) -> None:
830
+ """
831
+ Create an empty vulnerabilities file
832
+
833
+ :param str vulns_file: Path to create file at
834
+ :param List[str] files_created: List to append the file path to
835
+ """
836
+ logger.warning(f"No vulnerabilities found for query ID: {self.query_id}")
837
+ # Create an empty file to avoid errors later
838
+ with open(vulns_file, "w") as f:
839
+ json.dump({"response": {"results": []}}, f)
840
+ files_created.append(vulns_file)
841
+ logger.info(f"Created empty vulnerabilities file: {vulns_file}")
842
+
843
+ def _create_assets_file_from_vulns(self, assets_file: str, vulns_file: str, files_created: List[str]) -> None:
844
+ """
845
+ Create an assets file from vulnerability data
846
+
847
+ :param str assets_file: Path to create assets file
848
+ :param str vulns_file: Path to vulnerabilities file
849
+ :param List[str] files_created: List to append file path to
850
+ """
851
+ logger.info("Creating assets file from vulnerability results...")
852
+ # Create a minimal assets file from the vulnerabilities data
853
+ asset_data = {"response": {"usable": []}}
854
+
855
+ # Try to extract unique assets from vulnerability data
856
+ if not os.path.exists(vulns_file):
857
+ self._write_assets_file(assets_file, asset_data, files_created)
858
+ return
859
+
860
+ try:
861
+ unique_assets = self._extract_assets_from_vulns(vulns_file)
862
+
863
+ # Add unique assets to the asset data
864
+ asset_data["response"]["usable"] = list(unique_assets.values())
865
+ logger.info(f"Extracted {len(unique_assets)} unique assets from vulnerability data")
866
+ except Exception as e:
867
+ logger.error(f"Error extracting assets from vulnerability data: {str(e)}", exc_info=True)
868
+
869
+ # Write the assets file
870
+ self._write_assets_file(assets_file, asset_data, files_created)
871
+
872
+ def _extract_assets_from_vulns(self, vulns_file: str) -> Dict[str, Dict]:
873
+ """
874
+ Extract unique assets from vulnerability data
875
+
876
+ :param str vulns_file: Path to vulnerability file
877
+ :return: Dictionary of unique assets
878
+ :rtype: Dict[str, Dict]
879
+ """
880
+ unique_assets = {}
881
+
882
+ # Read vulnerabilities file
883
+ with open(vulns_file, "r") as f:
884
+ vuln_data = json.load(f)
885
+
886
+ # Extract unique assets from vulnerability results
887
+ for vuln in vuln_data.get("response", {}).get("results", []):
888
+ # Use IP or hostname as identifier
889
+ identifier = vuln.get("ip", "") or vuln.get("dnsName", "")
890
+ if identifier and identifier not in unique_assets:
891
+ # Create an asset entry
892
+ asset_entry = {
893
+ "id": identifier,
894
+ "name": vuln.get("dnsName", identifier),
895
+ "definition": f"ip={identifier}",
896
+ "description": "Asset created from vulnerability data",
897
+ }
898
+ unique_assets[identifier] = asset_entry
899
+
900
+ return unique_assets
901
+
902
+ def _write_assets_file(self, assets_file: str, asset_data: Dict, files_created: List[str]) -> None:
903
+ """
904
+ Write asset data to file
905
+
906
+ :param str assets_file: File path to write to
907
+ :param Dict asset_data: Asset data to write
908
+ :param List[str] files_created: List to append file path to
909
+ """
910
+ with open(assets_file, "w") as f:
911
+ json.dump(asset_data, f)
912
+
913
+ files_created.append(assets_file)
914
+ logger.info(f"Created assets file: {assets_file} with {len(asset_data['response']['usable'])} assets")
915
+
916
+ def _fetch_vulns_with_client(self, output_file: str, query_id: int) -> int:
917
+ """
918
+ Fetch vulnerabilities from Tenable SC using the client library.
919
+
920
+ This version writes results incrementally to avoid memory issues with large datasets.
921
+
922
+ :param str output_file: File to save the vulnerabilities to
923
+ :param int query_id: ID of the query to use
924
+ :return: Number of vulnerabilities fetched
925
+ :rtype: int
926
+ """
927
+ logger.info(f"Fetching vulnerabilities from Tenable SC using query ID: {query_id}...")
928
+
929
+ # Check TenableVariables for minimum severity filter
930
+ min_severity = getattr(TenableVariables, "tenableMinimumSeverityFilter", "critical").lower()
931
+ logger.info(f"Using minimum severity filter: {min_severity}")
932
+
933
+ # Initialize counters
934
+ total_vulns = 0
935
+
936
+ try:
937
+ # Set up for processing
938
+ temp_dir = os.path.dirname(output_file)
939
+ findings_jsonl = self._initialize_jsonl_file(temp_dir)
940
+
941
+ # Get and process vuln data
942
+ vulns_iterator = self._create_vulns_iterator(query_id)
943
+
944
+ # Process the vulnerabilities
945
+ total_vulns = self._process_vuln_iterator(vulns_iterator, findings_jsonl, output_file)
946
+
947
+ # Log completion
948
+ logger.info(f"Successfully processed {total_vulns} vulnerabilities")
949
+ logger.info(f"Data written to temporary JSONL file: {findings_jsonl}")
950
+
951
+ except Exception as e:
952
+ self._handle_vuln_fetch_error(e, output_file, query_id)
953
+
954
+ return total_vulns
955
+
956
+ def _initialize_jsonl_file(self, temp_dir: str) -> str:
957
+ """
958
+ Initialize JSONL file for findings
959
+
960
+ :param str temp_dir: Directory to create the file in
961
+ :return: Path to the JSONL file
962
+ :rtype: str
963
+ """
964
+ # Create a temp JSONL file for processing
965
+ findings_jsonl = os.path.join(temp_dir, "sc_findings.jsonl")
966
+ logger.info("Starting to process vulnerability data...")
967
+ logger.info(f"Creating temporary JSONL file: {findings_jsonl}")
968
+ return findings_jsonl
969
+
970
+ def _create_vulns_iterator(self, query_id: int):
971
+ """
972
+ Create an iterator for Tenable SC vulnerabilities
973
+
974
+ :param int query_id: Query ID to use
975
+ :return: Iterator for vulnerabilities
976
+ """
977
+ # For large queries, we need to use pagination
978
+ logger.info(f"Using analysis.vulns with query_id={query_id}")
979
+
980
+ # Set up query parameters
981
+ query_params = {"query_id": query_id, "tool": "vulndetails", "sourceType": "cumulative"}
982
+
983
+ # Log the query parameters for debugging
984
+ logger.info(f"Query parameters: {query_params}")
985
+
986
+ # The client library handles pagination internally via an iterator
987
+ return self.client.analysis.vulns(**query_params)
988
+
989
+ def _process_vuln_iterator(self, vulns_iterator, findings_jsonl: str, output_file: str) -> int:
990
+ """
991
+ Process vulnerability iterator and write data to files
992
+
993
+ :param vulns_iterator: Iterator for vulnerabilities
994
+ :param str findings_jsonl: Path to JSONL file for findings
995
+ :param str output_file: Path to output file
996
+ :return: Number of vulnerabilities processed
997
+ :rtype: int
998
+ """
999
+ # Process results in batches directly writing to files
1000
+ batch = []
1001
+ batch_size = self.batch_size or 1000
1002
+ batch_count = 0
1003
+ total_vulns = 0
1004
+ unique_assets = {}
1005
+
1006
+ # Open the findings JSONL file for writing
1007
+ with open(findings_jsonl, "w") as jsonl_file:
1008
+ for vuln in vulns_iterator:
1009
+ # Write this vulnerability to the JSONL file immediately
1010
+ jsonl_file.write(json.dumps(vuln) + "\n")
1011
+
1012
+ # Extract asset information
1013
+ self._extract_asset_from_vuln(vuln, unique_assets)
1014
+
1015
+ # Count processed items
1016
+ total_vulns += 1
1017
+ batch.append(vuln)
1018
+
1019
+ # Log progress on batch completion
1020
+ if len(batch) >= batch_size:
1021
+ batch_count += 1
1022
+ logger.info(f"Processed batch {batch_count} - {total_vulns} vulnerabilities so far...")
1023
+ # Clear the batch but don't keep results in memory
1024
+ batch = []
1025
+
1026
+ # Log final count
1027
+ if batch:
1028
+ logger.info(f"Processed final batch - total: {total_vulns} vulnerabilities")
1029
+
1030
+ # Write the output file
1031
+ self._write_output_file(output_file, total_vulns)
1032
+
1033
+ # Create assets file if needed
1034
+ self._create_assets_file_from_unique(unique_assets, output_file)
1035
+
1036
+ return total_vulns
1037
+
1038
+ def _extract_asset_from_vuln(self, vuln: Dict, unique_assets: Dict) -> None:
1039
+ """
1040
+ Extract asset information from vulnerability data
1041
+
1042
+ :param Dict vuln: Vulnerability data
1043
+ :param Dict unique_assets: Dictionary to store unique assets
1044
+ """
1045
+ identifier = vuln.get("ip", "") or vuln.get("dnsName", "")
1046
+ if identifier and identifier not in unique_assets:
1047
+ # Create an asset entry
1048
+ asset_entry = {
1049
+ "id": identifier,
1050
+ "name": vuln.get("dnsName", identifier),
1051
+ "definition": f"ip={identifier}",
1052
+ "description": "Asset created from vulnerability data",
1053
+ }
1054
+ unique_assets[identifier] = asset_entry
1055
+
1056
+ def _write_output_file(self, output_file: str, total_vulns: int) -> None:
1057
+ """
1058
+ Write output file with vulnerability data
1059
+
1060
+ :param str output_file: Path to output file
1061
+ :param int total_vulns: Number of vulnerabilities processed
1062
+ """
1063
+ logger.info(f"Writing {total_vulns} vulnerabilities to output file: {output_file}")
1064
+ with open(output_file, "w") as f:
1065
+ # Write the header only - we'll read from the JSONL for actual data processing
1066
+ f.write('{"response": {"results": []}}')
1067
+
1068
+ def _create_assets_file_from_unique(self, unique_assets: Dict, output_file: str) -> None:
1069
+ """
1070
+ Create assets file from unique assets
1071
+
1072
+ :param Dict unique_assets: Dictionary of unique assets
1073
+ :param str output_file: Path to output file used to determine directory
1074
+ """
1075
+ if not unique_assets:
1076
+ return
1077
+
1078
+ temp_dir = os.path.dirname(output_file)
1079
+ assets_file = os.path.join(temp_dir, "sc_assets.json")
1080
+
1081
+ asset_data = {"response": {"usable": list(unique_assets.values())}}
1082
+ with open(assets_file, "w") as f:
1083
+ json.dump(asset_data, f)
1084
+ logger.info(f"Created assets file with {len(unique_assets)} unique assets: {assets_file}")
1085
+
1086
+ def _handle_vuln_fetch_error(self, error: Exception, output_file: str, query_id: int) -> None:
1087
+ """
1088
+ Handle errors during vulnerability fetching
1089
+
1090
+ :param Exception error: The exception that occurred
1091
+ :param str output_file: Path to output file
1092
+ :param int query_id: Query ID that was used
1093
+ """
1094
+ logger.error(f"Error fetching vulnerabilities: {str(error)}", exc_info=True)
1095
+
1096
+ # Try to provide more helpful error messages
1097
+ if "unauthorized" in str(error).lower() or "401" in str(error):
1098
+ logger.error("Authentication error. Please check credentials.")
1099
+ elif "not found" in str(error).lower() or "404" in str(error):
1100
+ logger.error(f"Query ID {query_id} not found. Please verify the query exists.")
1101
+ elif "timeout" in str(error).lower():
1102
+ logger.error("Request timed out. The query may be too large or the server is busy.")
1103
+
1104
+ # Create an empty result if we couldn't get any data
1105
+ with open(output_file, "w") as f:
1106
+ json.dump({"response": {"results": []}}, f)
1107
+ logger.info(f"Created empty results file {output_file}")
1108
+
1109
+ def find_or_download_data(self) -> List[str]:
1110
+ """
1111
+ Find existing Tenable SC data files or download new ones.
1112
+
1113
+ :return: List of file paths
1114
+ :rtype: List[str]
1115
+ """
1116
+ # Create temporary directory if needed
1117
+ self._ensure_temp_directory()
1118
+ artifacts_dir = self.create_artifacts_dir()
1119
+
1120
+ # Check for existing files and clean them up
1121
+ self._find_existing_files(artifacts_dir)
1122
+
1123
+ # Download new data
1124
+ return self._download_data_files(artifacts_dir)
1125
+
1126
+ def _ensure_temp_directory(self) -> None:
1127
+ """
1128
+ Ensure a temporary directory exists for processing files.
1129
+ """
1130
+ if not self.temp_dir:
1131
+ self.temp_dir = tempfile.mkdtemp(prefix="tenable_sc_")
1132
+ logger.info(f"Created temporary directory: {self.temp_dir}")
1133
+
1134
+ def _find_existing_files(self, artifacts_dir: str) -> List[str]:
1135
+ """
1136
+ Find existing Tenable SC data files in the artifacts directory.
1137
+ Always returns an empty list since we want to force download fresh data.
1138
+
1139
+ :param str artifacts_dir: Path to the artifacts directory
1140
+ :return: Empty list (never use existing files)
1141
+ :rtype: List[str]
1142
+ """
1143
+ # Identify any existing files
1144
+ existing_files = list(find_files(artifacts_dir, self.file_pattern))
1145
+ if existing_files:
1146
+ logger.info(f"Found {len(existing_files)} existing Tenable SC data files to clean up")
1147
+ # Clean up existing files
1148
+ for file_path in existing_files:
1149
+ try:
1150
+ os.remove(file_path)
1151
+ logger.info(f"Removed existing file: {file_path}")
1152
+ except OSError as e:
1153
+ logger.warning(f"Failed to remove file {file_path}: {e}")
1154
+
1155
+ # Always return empty list to force fresh download
1156
+ return []
1157
+
1158
+ def _download_data_files(self, artifacts_dir: str) -> List[str]:
1159
+ """
1160
+ Download Tenable SC data files to the artifacts directory.
1161
+
1162
+ :param str artifacts_dir: Path to the artifacts directory
1163
+ :return: List of downloaded file paths
1164
+ :rtype: List[str]
1165
+ """
1166
+ logger.info("Downloading new Tenable SC data...")
1167
+
1168
+ # Clean up any existing output JSONL files
1169
+ for output_file in [self.ASSETS_FILE, self.FINDINGS_FILE]:
1170
+ if os.path.exists(output_file):
1171
+ try:
1172
+ os.remove(output_file)
1173
+ logger.info(f"Removed existing output file: {output_file}")
1174
+ except OSError as e:
1175
+ logger.warning(f"Failed to remove output file {output_file}: {e}")
1176
+
1177
+ # Verify query_id is provided for downloading
1178
+ if not self.query_id:
1179
+ logger.error("No query_id provided and no existing files found")
1180
+ raise ValidationException("Cannot download data: No query_id provided and no existing files found")
1181
+
1182
+ logger.info(f"Downloading data using query_id: {self.query_id}")
1183
+ downloaded_files = self._download_sc_data(artifacts_dir)
1184
+
1185
+ # Create placeholder file if no files were downloaded
1186
+ if not downloaded_files:
1187
+ downloaded_files = [self._create_placeholder_file(artifacts_dir)]
1188
+
1189
+ logger.info(f"Downloaded {len(downloaded_files)} files:")
1190
+ for file_path in downloaded_files:
1191
+ logger.info(f" - {file_path}")
1192
+
1193
+ return downloaded_files
1194
+
1195
+ def _create_placeholder_file(self, artifacts_dir: str) -> str:
1196
+ """
1197
+ Create a placeholder file for debugging when no files are downloaded.
1198
+
1199
+ :param str artifacts_dir: Path to the artifacts directory
1200
+ :return: Path to the created placeholder file
1201
+ :rtype: str
1202
+ """
1203
+ logger.warning("No files were downloaded. Creating a placeholder file for debugging.")
1204
+ debug_file = os.path.join(artifacts_dir, "sc_vulns.json")
1205
+ with open(debug_file, "w") as f:
1206
+ json.dump({"response": {"results": []}}, f)
1207
+ logger.info(f"Created placeholder file: {debug_file}")
1208
+ return debug_file
1209
+
1210
+ def _process_asset_file(self, file, data, output_f, existing_items):
1211
+ """
1212
+ Process a Tenable SC data file for assets with mapping and validation.
1213
+ Overrides the parent method to handle multiple assets in a single file.
1214
+
1215
+ :param file: The file being processed
1216
+ :param data: The data from the file
1217
+ :param output_f: The output file handle
1218
+ :param existing_items: Dictionary of existing items
1219
+ :return: Number of assets processed
1220
+ :rtype: int
1221
+ """
1222
+ # Check if this is an assets file with a "usable" array
1223
+ assets_list = data.get("response", {}).get("usable", [])
1224
+
1225
+ # Process multiple assets if available
1226
+ if assets_list and len(assets_list) > 0:
1227
+ return self._process_multiple_assets(file, assets_list, output_f, existing_items)
1228
+ else:
1229
+ # For non-assets files or empty assets files, process as single asset
1230
+ return self._process_single_file_asset(file, data, output_f, existing_items)
1231
+
1232
+ def _process_multiple_assets(self, file, assets_list, output_f, existing_items):
1233
+ """
1234
+ Process multiple assets from an assets file.
1235
+
1236
+ :param file: The file being processed
1237
+ :param assets_list: List of asset data
1238
+ :param output_f: The output file handle
1239
+ :param existing_items: Dictionary of existing items
1240
+ :return: Number of assets processed
1241
+ :rtype: int
1242
+ """
1243
+ assets_added = 0
1244
+ logger.info(f"Processing {len(assets_list)} assets from file {file}")
1245
+
1246
+ for asset_data in assets_list:
1247
+ # Extract asset data and create asset
1248
+ asset_id, asset_name, ip_info = self._extract_asset_info(asset_data)
1249
+ identifier = ip_info if ip_info else asset_id
1250
+ asset = self._create_basic_asset(identifier, asset_name or f"Asset {asset_id}", ip_info)
1251
+
1252
+ # Apply mapping if needed and validate
1253
+ mapped_asset = self._apply_asset_mapping(asset, asset_data, asset_id, asset_name, ip_info)
1254
+
1255
+ try:
1256
+ # Validate and write to output
1257
+ if self._validate_and_write_asset(mapped_asset, existing_items, output_f):
1258
+ assets_added += 1
1259
+ except Exception as e:
1260
+ logger.error(f"Error processing asset {asset_id}: {str(e)}")
1261
+
1262
+ logger.info(f"Added {assets_added} assets from file {file}")
1263
+ return assets_added
1264
+
1265
+ def _process_single_file_asset(self, file, data, output_f, existing_items):
1266
+ """
1267
+ Process a single asset from a file.
1268
+
1269
+ :param file: The file being processed
1270
+ :param data: The data from the file
1271
+ :param output_f: The output file handle
1272
+ :param existing_items: Dictionary of existing items
1273
+ :return: Number of assets processed (0 or 1)
1274
+ :rtype: int
1275
+ """
1276
+ try:
1277
+ # Parse asset from file
1278
+ asset = self.parse_asset(file, data)
1279
+
1280
+ # Apply mapping if needed
1281
+ mapped_asset = self._apply_asset_mapping(asset, data)
1282
+
1283
+ # Validate and write to output
1284
+ if self._validate_and_write_asset(mapped_asset, existing_items, output_f):
1285
+ return 1
1286
+ return 0
1287
+ except Exception as e:
1288
+ logger.error(f"Error processing asset from file {file}: {str(e)}")
1289
+ return 0
1290
+
1291
+ def _extract_asset_info(self, asset_data):
1292
+ """
1293
+ Extract asset information from the asset data.
1294
+
1295
+ :param asset_data: The asset data
1296
+ :return: Tuple of (asset_id, asset_name, ip_info)
1297
+ :rtype: Tuple[str, str, str]
1298
+ """
1299
+ asset_id = asset_data.get("id", "")
1300
+ asset_name = asset_data.get("name", "")
1301
+
1302
+ # Extract IP information if available
1303
+ ip_info = ""
1304
+ definition = asset_data.get("definition", "")
1305
+ if "ip=" in definition:
1306
+ ip_parts = definition.split("ip=")[1].split("&")[0]
1307
+ ip_info = ip_parts.replace("%3B", ";")
1308
+
1309
+ return asset_id, asset_name, ip_info
1310
+
1311
+ def _create_basic_asset(self, identifier, name, ip_address):
1312
+ """
1313
+ Create a basic IntegrationAsset object.
1314
+
1315
+ :param str identifier: Asset identifier
1316
+ :param str name: Asset name
1317
+ :param str ip_address: Asset IP address
1318
+ :return: IntegrationAsset object
1319
+ :rtype: IntegrationAsset
1320
+ """
1321
+ return IntegrationAsset(
1322
+ identifier=identifier,
1323
+ name=name,
1324
+ ip_address=ip_address,
1325
+ parent_id=self.plan_id,
1326
+ parent_module=regscale_models.SecurityPlan.get_module_slug(),
1327
+ asset_owner_id=ScannerVariables.userId,
1328
+ asset_category=regscale_models.AssetCategory.Hardware,
1329
+ asset_type=regscale_models.AssetType.Other,
1330
+ status=AssetStatus.ACTIVE,
1331
+ date_last_updated=get_current_datetime(),
1332
+ )
1333
+
1334
+ def _apply_asset_mapping(self, asset, source_data, asset_id=None, asset_name=None, ip_info=None):
1335
+ """
1336
+ Apply field mapping to an asset.
1337
+
1338
+ :param IntegrationAsset asset: The asset to apply mapping to
1339
+ :param dict source_data: Source data for mapping
1340
+ :param str asset_id: Optional asset ID for single-asset mapping
1341
+ :param str asset_name: Optional asset name for single-asset mapping
1342
+ :param str ip_info: Optional IP info for single-asset mapping
1343
+ :return: Mapped IntegrationAsset
1344
+ :rtype: IntegrationAsset
1345
+ """
1346
+ asset_dict = dataclasses.asdict(asset)
1347
+
1348
+ # For single assets from assets file, create simplified data structure
1349
+ if asset_id is not None:
1350
+ source_data = {"id": asset_id, "name": asset_name, "ip": ip_info}
1351
+
1352
+ if not self.disable_mapping:
1353
+ mapping = getattr(self.mapping, "fields", {}).get("asset_mapping", {}) if self.mapping else {}
1354
+ mapped_asset_dict = self._apply_mapping(source_data or {}, asset_dict, mapping)
1355
+
1356
+ # Ensure we only pass valid fields to IntegrationAsset
1357
+ valid_fields = {}
1358
+ for field, value in mapped_asset_dict.items():
1359
+ if hasattr(IntegrationAsset, field) or field in inspect.signature(IntegrationAsset.__init__).parameters:
1360
+ valid_fields[field] = value
1361
+
1362
+ return IntegrationAsset(**valid_fields)
1363
+ else:
1364
+ return asset
1365
+
1366
+ def _validate_and_write_asset(self, asset, existing_items, output_f):
1367
+ """
1368
+ Validate an asset and write it to the output file if valid.
1369
+
1370
+ :param IntegrationAsset asset: The asset to validate and write
1371
+ :param dict existing_items: Dictionary of existing items
1372
+ :param file output_f: The output file handle
1373
+ :return: True if asset was written, False otherwise
1374
+ :rtype: bool
1375
+ """
1376
+ self._validate_fields(asset, self.required_asset_fields)
1377
+
1378
+ # Check if asset already exists
1379
+ key = self._get_item_key(dataclasses.asdict(asset), "asset")
1380
+ if key in existing_items:
1381
+ logger.debug(f"Asset with identifier {key} already exists, skipping")
1382
+ return False
1383
+
1384
+ # Write to output
1385
+ output_f.write(json.dumps(dataclasses.asdict(asset)) + "\n")
1386
+ output_f.flush()
1387
+ existing_items[key] = True
1388
+ return True
1389
+
1390
+ def _process_finding_file(self, file, data, output_f, existing_items):
1391
+ """
1392
+ Process a single file for findings with memory-efficient streaming.
1393
+
1394
+ :param file: The file being processed
1395
+ :param data: The data from the file
1396
+ :param output_f: The output file handle
1397
+ :param existing_items: Dictionary of existing items
1398
+ :return: Number of findings processed
1399
+ :rtype: int
1400
+ """
1401
+ file_path_str = str(file)
1402
+
1403
+ # Check if this is a JSONL file from our incremental processing
1404
+ if file_path_str.endswith(FILE_TYPE):
1405
+ logger.info(f"Processing JSONL findings file: {file_path_str}")
1406
+ return self._process_jsonl_findings(file_path_str, output_f, existing_items)
1407
+
1408
+ # Get asset identifier from file
1409
+ identifier = self._get_asset_identifier_from_file(file, data)
1410
+
1411
+ # Extract findings data
1412
+ findings_data = data.get("response", {}).get("results", []) if data and "response" in data else []
1413
+
1414
+ # Process each finding
1415
+ return self._process_findings_list(file, findings_data, identifier, output_f, existing_items)
1416
+
1417
+ def _get_asset_identifier_from_file(self, file, data):
1418
+ """
1419
+ Extract asset identifier from file data.
1420
+
1421
+ :param file: The file being processed
1422
+ :param data: The data from the file
1423
+ :return: Asset identifier
1424
+ :rtype: str
1425
+ """
1426
+ try:
1427
+ asset = self.parse_asset(file, data)
1428
+ return asset.identifier
1429
+ except Exception as e:
1430
+ logger.error(f"Error parsing asset from file {file}: {str(e)}")
1431
+ # Use a fallback identifier from the data if possible
1432
+ identifier = "unknown"
1433
+ if data and isinstance(data, dict):
1434
+ # Try to extract IP from vuln data
1435
+ if "response" in data and "results" in data.get("response", {}):
1436
+ results = data.get("response", {}).get("results", [])
1437
+ if results and len(results) > 0:
1438
+ identifier = results[0].get("ip", "unknown")
1439
+ return identifier
1440
+
1441
+ def _process_findings_list(self, file, findings_data, default_identifier, output_f, existing_items):
1442
+ """
1443
+ Process a list of findings and write them to the output file.
1444
+
1445
+ :param file: The source file
1446
+ :param list findings_data: List of finding data
1447
+ :param str default_identifier: Default asset identifier to use
1448
+ :param file output_f: Output file handle
1449
+ :param dict existing_items: Dictionary of existing items
1450
+ :return: Number of findings processed
1451
+ :rtype: int
1452
+ """
1453
+ findings_in_file = 0
1454
+
1455
+ for finding_item in findings_data:
1456
+ # Get IP directly from finding item if available
1457
+ finding_asset_id = finding_item.get("ip", default_identifier)
1458
+
1459
+ # Process the individual finding
1460
+ if self._process_individual_finding(file, finding_item, finding_asset_id, output_f, existing_items):
1461
+ findings_in_file += 1
1462
+
1463
+ if findings_in_file > 0:
1464
+ logger.info(f"Added {findings_in_file} new findings from file {file}")
1465
+ return findings_in_file
1466
+
1467
+ def _process_jsonl_findings(self, jsonl_file_path, output_f, existing_items):
1468
+ """
1469
+ Process findings from a JSONL file in a memory-efficient way.
1470
+
1471
+ :param str jsonl_file_path: Path to the JSONL file
1472
+ :param file output_f: Output file handle
1473
+ :param dict existing_items: Dictionary of existing items
1474
+ :return: Number of findings processed
1475
+ :rtype: int
1476
+ """
1477
+ findings_in_file = 0
1478
+ processed_count = 0
1479
+
1480
+ try:
1481
+ # Process the JSONL file line by line
1482
+ with open(jsonl_file_path, "r") as f:
1483
+ for line_num, line in enumerate(f, 1):
1484
+ processed_count += 1
1485
+
1486
+ # Log progress every 1000 lines
1487
+ if processed_count % 1000 == 0:
1488
+ logger.info(f"Processing finding {processed_count} from JSONL file...")
1489
+
1490
+ try:
1491
+ # Parse the JSON line
1492
+ finding_item = json.loads(line)
1493
+
1494
+ # Extract the asset identifier and process the finding
1495
+ finding_asset_id = finding_item.get("ip", "unknown")
1496
+ if self._process_individual_finding(
1497
+ jsonl_file_path, finding_item, finding_asset_id, output_f, existing_items, line_num
1498
+ ):
1499
+ findings_in_file += 1
1500
+
1501
+ except json.JSONDecodeError as e:
1502
+ logger.warning(f"Invalid JSON at line {line_num} in {jsonl_file_path}: {e}")
1503
+ except Exception as e:
1504
+ logger.warning(f"Error processing finding at line {line_num} in {jsonl_file_path}: {e}")
1505
+
1506
+ logger.info(f"Processed {processed_count} total findings from JSONL, added {findings_in_file} new findings")
1507
+
1508
+ except Exception as e:
1509
+ logger.error(f"Error processing JSONL file {jsonl_file_path}: {e}", exc_info=True)
1510
+
1511
+ return findings_in_file
1512
+
1513
+ def _process_individual_finding(self, file, finding_item, asset_id, output_f, existing_items, line_num=None):
1514
+ """
1515
+ Process an individual finding and write it to the output file if valid.
1516
+
1517
+ :param file: Source file or file path
1518
+ :param dict finding_item: The finding data
1519
+ :param str asset_id: Asset identifier
1520
+ :param file output_f: Output file handle
1521
+ :param dict existing_items: Dictionary of existing items
1522
+ :param int line_num: Optional line number for JSONL processing
1523
+ :return: True if finding was written, False otherwise
1524
+ :rtype: bool
1525
+ """
1526
+ # Parse the finding
1527
+ data = None # Only needed for specific implementations
1528
+ finding = self.parse_finding(asset_id, data, finding_item)
1529
+
1530
+ if not finding:
1531
+ logger_fn = logger.debug if line_num else logger.warning
1532
+ logger_fn(f"Failed to parse finding from {file}" + (f" at line {line_num}" if line_num else ""))
1533
+ return False
1534
+
1535
+ # Apply mapping
1536
+ mapped_finding = self._apply_finding_mapping(finding, finding_item)
1537
+
1538
+ # Validate and check for duplicates
1539
+ try:
1540
+ self._validate_fields(mapped_finding, self.required_finding_fields)
1541
+
1542
+ key = self._get_item_key(dataclasses.asdict(mapped_finding), "finding")
1543
+ if key in existing_items:
1544
+ logger.debug(f"Finding with key {key} already exists, skipping")
1545
+ return False
1546
+
1547
+ # Write to output
1548
+ output_f.write(json.dumps(dataclasses.asdict(mapped_finding)) + "\n")
1549
+ output_f.flush()
1550
+ existing_items[key] = True
1551
+ return True
1552
+ except Exception as e:
1553
+ logger_fn = logger.debug if line_num else logger.error
1554
+ logger_fn(f"Error processing finding: {e}")
1555
+ return False
1556
+
1557
+ def _apply_finding_mapping(self, finding, finding_item):
1558
+ """
1559
+ Apply mapping to a finding.
1560
+
1561
+ :param IntegrationFinding finding: The finding to map
1562
+ :param dict finding_item: The source finding data
1563
+ :return: Mapped IntegrationFinding
1564
+ :rtype: IntegrationFinding
1565
+ """
1566
+ finding_dict = dataclasses.asdict(finding)
1567
+
1568
+ if self.disable_mapping:
1569
+ return finding
1570
+
1571
+ mapped_finding_dict = self._apply_mapping(
1572
+ finding_item,
1573
+ finding_dict,
1574
+ getattr(self.mapping, "fields", {}).get("finding_mapping", {}) if self.mapping else {},
1575
+ )
1576
+
1577
+ # Normalize field names - convert camelCase to snake_case and remove unknown fields
1578
+ normalized_dict = {}
1579
+ for key, value in mapped_finding_dict.items():
1580
+ # Convert camelCase to snake_case
1581
+ if key == "pluginID":
1582
+ normalized_dict["plugin_id"] = value
1583
+ elif key == "pluginName":
1584
+ normalized_dict["plugin_name"] = value
1585
+ # Only add known fields to avoid unexpected keyword argument errors
1586
+ elif hasattr(IntegrationFinding, key) or key in inspect.signature(IntegrationFinding.__init__).parameters:
1587
+ normalized_dict[key] = value
1588
+
1589
+ # Make sure required fields are present
1590
+ for field in self.required_finding_fields:
1591
+ if field not in normalized_dict and field in mapped_finding_dict:
1592
+ normalized_dict[field] = mapped_finding_dict[field]
1593
+
1594
+ try:
1595
+ return IntegrationFinding(**normalized_dict)
1596
+ except TypeError as e:
1597
+ logger.debug(f"Error creating IntegrationFinding: {e}. Using original finding.")
1598
+ return finding
1599
+
1600
+ def sync_assets_and_findings(self) -> None:
1601
+ """
1602
+ Process both assets and findings, downloading if necessary, and sync to RegScale.
1603
+
1604
+ This method overrides the parent method to handle the case where file_path is not provided
1605
+ but query_id is, by first finding or downloading Tenable SC data files and then processing them.
1606
+
1607
+ :rtype: None
1608
+ """
1609
+ try:
1610
+ # Ensure we have a valid file path, downloading data if needed
1611
+ file_path = self._get_or_download_file_path()
1612
+
1613
+ # Process files into JSONL format for assets and findings
1614
+ total_assets, total_findings = self._process_and_prepare_data(file_path)
1615
+
1616
+ # Sync assets and findings to RegScale
1617
+ self._sync_data_to_regscale(total_assets, total_findings)
1618
+
1619
+ except Exception as e:
1620
+ logger.error(f"Error in sync_assets_and_findings: {str(e)}", exc_info=True)
1621
+ raise
1622
+
1623
+ def _get_or_download_file_path(self) -> str:
1624
+ """
1625
+ Get a valid file path, downloading data if necessary.
1626
+
1627
+ :return: Valid file path to process
1628
+ :rtype: str
1629
+ """
1630
+ # If file_path is not provided, find or download files
1631
+ if not self.file_path:
1632
+ logger.info("No file path provided, finding or downloading Tenable SC data files")
1633
+ found_files = self.find_or_download_data()
1634
+
1635
+ if not found_files:
1636
+ logger.error("No Tenable SC data files found or downloaded")
1637
+ raise ValidationException("No Tenable SC data files found or downloaded")
1638
+
1639
+ # Use the directory containing the found files as the file_path
1640
+ if len(found_files) > 0:
1641
+ # Get the directory containing the files
1642
+ first_file = found_files[0]
1643
+ self.file_path = os.path.dirname(first_file)
1644
+ logger.info(f"Using directory containing found files as file_path: {self.file_path}")
1645
+
1646
+ # Validate the file path
1647
+ return self._validate_file_path(self.file_path)
1648
+
1649
+ def _process_and_prepare_data(self, file_path: str) -> Tuple[int, int]:
1650
+ """
1651
+ Process files into JSONL format for assets and findings.
1652
+
1653
+ :param str file_path: Path to source files
1654
+ :return: Tuple of (asset_count, finding_count)
1655
+ :rtype: Tuple[int, int]
1656
+ """
1657
+ logger.info("Processing assets and findings together from %s", file_path)
1658
+ return self._process_files(
1659
+ file_path=file_path,
1660
+ assets_output_file=self.ASSETS_FILE,
1661
+ findings_output_file=self.FINDINGS_FILE,
1662
+ empty_assets_file=self.empty_files,
1663
+ empty_findings_file=self.empty_files,
1664
+ )
1665
+
1666
+ def _sync_data_to_regscale(self, total_assets: int, total_findings: int) -> None:
1667
+ """
1668
+ Sync processed assets and findings to RegScale.
1669
+
1670
+ :param int total_assets: Number of assets to sync
1671
+ :param int total_findings: Number of findings to sync
1672
+ """
1673
+ # Sync assets
1674
+ logger.info("Syncing %d assets to RegScale", total_assets)
1675
+ self.sync_assets(
1676
+ plan_id=self.plan_id,
1677
+ file_path=self.file_path,
1678
+ use_jsonl_file=True,
1679
+ asset_count=total_assets,
1680
+ scan_date=self.scan_date,
1681
+ )
1682
+
1683
+ # Sync findings
1684
+ logger.info("Syncing %d findings to RegScale", total_findings)
1685
+ self.sync_findings(
1686
+ plan_id=self.plan_id,
1687
+ file_path=self.file_path,
1688
+ use_jsonl_file=True,
1689
+ finding_count=total_findings,
1690
+ scan_date=self.scan_date,
1691
+ )
1692
+
1693
+ logger.info("Assets and findings sync complete")
1694
+
1695
+ def check_data_file(self, data_files: List[str]) -> bool:
1696
+ """
1697
+ Check if any Tenable SC data files exist.
1698
+ """
1699
+ if not data_files:
1700
+ logger.warning("No Tenable SC data files found, nothing to sync")
1701
+ sys.exit(0)
1702
+
1703
+ def sync_with_transformer(self, mapping_file: Optional[str] = None) -> None:
1704
+ """
1705
+ Sync assets and findings to RegScale using the DataTransformer.
1706
+
1707
+ This method combines the ApiPaginator and DataTransformer to efficiently download
1708
+ and transform Tenable SC data for RegScale integration.
1709
+
1710
+ Args:
1711
+ mapping_file (Optional[str]): Path to custom mapping file (uses default if None)
1712
+
1713
+ Raises:
1714
+ Exception: If there is an error during synchronization
1715
+ """
1716
+ try:
1717
+ logger.info("Starting synchronization using DataTransformer...")
1718
+
1719
+ # Step 1: Download or find data files
1720
+ data_files = self._get_data_files_for_sync()
1721
+
1722
+ # Step 2: Create transformer
1723
+ transformer = self._create_transformer(mapping_file)
1724
+
1725
+ # Step 3: Load and process data from files
1726
+ assets_list, findings_list = self._load_assets_and_findings(data_files)
1727
+
1728
+ # Step 4: Transform data
1729
+ assets, findings = self._transform_data(transformer, assets_list, findings_list)
1730
+
1731
+ # Step 5: Sync with RegScale
1732
+ self._sync_transformed_data(assets, findings)
1733
+
1734
+ except Exception as e:
1735
+ logger.error(f"Error syncing with transformer: {str(e)}", exc_info=True)
1736
+ raise
1737
+
1738
+ def _get_data_files_for_sync(self) -> List[str]:
1739
+ """
1740
+ Get data files for synchronization
1741
+
1742
+ :return: List of data file paths
1743
+ :rtype: List[str]
1744
+ """
1745
+ data_files = self.find_or_download_data()
1746
+ self.check_data_file(data_files)
1747
+ logger.info(f"Processing {len(data_files)} Tenable SC data files")
1748
+ return data_files
1749
+
1750
+ def _create_transformer(self, mapping_file: Optional[str] = None) -> DataTransformer:
1751
+ """
1752
+ Create a DataTransformer instance
1753
+
1754
+ :param Optional[str] mapping_file: Path to custom mapping file
1755
+ :return: Configured DataTransformer instance
1756
+ :rtype: DataTransformer
1757
+ """
1758
+ transformer = DataTransformer(mapping_file=mapping_file)
1759
+ transformer.scan_date = self.scan_date
1760
+ return transformer
1761
+
1762
+ def _load_assets_and_findings(self, data_files: List[str]) -> Tuple[List[Dict], List[Dict]]:
1763
+ """
1764
+ Load assets and findings from data files
1765
+
1766
+ :param List[str] data_files: List of data file paths
1767
+ :return: Tuple of (assets_list, findings_list)
1768
+ :rtype: Tuple[List[Dict], List[Dict]]
1769
+ """
1770
+ assets_list = []
1771
+ findings_list = []
1772
+
1773
+ for file_path in data_files:
1774
+ file_path_str = str(file_path)
1775
+ logger.info(f"Processing file: {file_path_str}")
1776
+
1777
+ try:
1778
+ self._process_data_file(file_path_str, assets_list, findings_list)
1779
+ except Exception as e:
1780
+ logger.error(f"Error processing file {file_path_str}: {str(e)}", exc_info=True)
1781
+
1782
+ return assets_list, findings_list
1783
+
1784
+ def _process_data_file(self, file_path: str, assets_list: List[Dict], findings_list: List[Dict]) -> None:
1785
+ """
1786
+ Process a single data file and extract assets and findings
1787
+
1788
+ :param str file_path: Path to data file
1789
+ :param List[Dict] assets_list: List to append assets to
1790
+ :param List[Dict] findings_list: List to append findings to
1791
+ """
1792
+ # Load the file data
1793
+ with open(file_path, "r") as f:
1794
+ data = json.load(f)
1795
+
1796
+ # Validate the file
1797
+ is_valid, validated_data = self.is_valid_file(data, file_path)
1798
+ if not is_valid or validated_data is None:
1799
+ logger.warning(f"Invalid file: {file_path}")
1800
+ return
1801
+
1802
+ # Process assets and findings based on file type
1803
+ if "sc_assets" in file_path:
1804
+ self._extract_assets(validated_data, assets_list, file_path)
1805
+ elif "sc_vulns" in file_path:
1806
+ self._extract_findings(validated_data, findings_list, file_path)
1807
+
1808
+ def _extract_assets(self, validated_data: Dict, assets_list: List[Dict], file_path: str) -> None:
1809
+ """
1810
+ Extract assets from validated data
1811
+
1812
+ :param Dict validated_data: Validated data from file
1813
+ :param List[Dict] assets_list: List to append assets to
1814
+ :param str file_path: Path to source file (for logging)
1815
+ """
1816
+ # Extract assets from assets file
1817
+ assets = validated_data.get("response", {}).get("usable", [])
1818
+ for asset_data in assets:
1819
+ assets_list.append(asset_data)
1820
+ logger.info(f"Added {len(assets)} assets from file: {file_path}")
1821
+
1822
+ def _extract_findings(self, validated_data: Dict, findings_list: List[Dict], file_path: str) -> None:
1823
+ """
1824
+ Extract findings from validated data
1825
+
1826
+ :param Dict validated_data: Validated data from file
1827
+ :param List[Dict] findings_list: List to append findings to
1828
+ :param str file_path: Path to source file (for logging)
1829
+ """
1830
+ # Extract findings from vulnerabilities file
1831
+ findings = validated_data.get("response", {}).get("results", [])
1832
+ for finding_data in findings:
1833
+ findings_list.append(finding_data)
1834
+ logger.info(f"Added {len(findings)} findings from file: {file_path}")
1835
+
1836
+ def _transform_data(
1837
+ self, transformer: DataTransformer, assets_list: List[Dict], findings_list: List[Dict]
1838
+ ) -> Tuple[List[IntegrationAsset], List[IntegrationFinding]]:
1839
+ """
1840
+ Transform raw data into IntegrationAsset and IntegrationFinding objects
1841
+
1842
+ :param DataTransformer transformer: DataTransformer instance
1843
+ :param List[Dict] assets_list: List of asset data
1844
+ :param List[Dict] findings_list: List of finding data
1845
+ :return: Tuple of (assets, findings)
1846
+ :rtype: Tuple[List[IntegrationAsset], List[IntegrationFinding]]
1847
+ """
1848
+ # Transform assets
1849
+ assets = list(transformer.batch_transform_to_assets(assets_list, plan_id=self.plan_id))
1850
+
1851
+ # Link findings to assets
1852
+ self._link_findings_to_assets(assets, findings_list)
1853
+
1854
+ # Transform findings
1855
+ findings = list(transformer.batch_transform_to_findings(findings_list))
1856
+
1857
+ logger.info(f"Transformed {len(assets)} assets and {len(findings)} findings")
1858
+ return assets, findings
1859
+
1860
+ def _link_findings_to_assets(self, assets: List[IntegrationAsset], findings_list: List[Dict]) -> None:
1861
+ """
1862
+ Link findings to assets using IP address
1863
+
1864
+ :param List[IntegrationAsset] assets: List of assets
1865
+ :param List[Dict] findings_list: List of finding data to update
1866
+ """
1867
+ # Create mapping from IP to asset identifier
1868
+ asset_identifier_map = {asset.ip_address: asset.identifier for asset in assets if asset.ip_address}
1869
+
1870
+ # Add asset identifier to each finding
1871
+ for finding_data in findings_list:
1872
+ ip = finding_data.get("ip", "")
1873
+ asset_id = asset_identifier_map.get(ip, ip)
1874
+ finding_data["asset_identifier"] = asset_id
1875
+
1876
+ def _sync_transformed_data(self, assets: List[IntegrationAsset], findings: List[IntegrationFinding]) -> None:
1877
+ """
1878
+ Sync transformed data to RegScale
1879
+
1880
+ :param List[IntegrationAsset] assets: List of assets
1881
+ :param List[IntegrationFinding] findings: List of findings
1882
+ """
1883
+ # Sync assets and findings to RegScale
1884
+ asset_count = self.update_regscale_assets(iter(assets))
1885
+ finding_count = self.update_regscale_findings(iter(findings))
1886
+
1887
+ logger.info(f"Synchronized {asset_count} assets and {finding_count} findings to RegScale")
1888
+
1889
+ def _process_files(
1890
+ self,
1891
+ file_path: Union[str, Path],
1892
+ assets_output_file: str,
1893
+ findings_output_file: str,
1894
+ empty_assets_file: bool = True,
1895
+ empty_findings_file: bool = True,
1896
+ ) -> Tuple[int, int]:
1897
+ """
1898
+ Process source files to extract assets and findings.
1899
+
1900
+ :param Union[str, Path] file_path: Path to source file or directory
1901
+ :param str assets_output_file: Path to write assets to
1902
+ :param str findings_output_file: Path to write findings to
1903
+ :param bool empty_assets_file: Whether to empty the assets file before writing
1904
+ :param bool empty_findings_file: Whether to empty the findings file before writing
1905
+ :return: Tuple of (asset_count, finding_count)
1906
+ :rtype: Tuple[int, int]
1907
+ """
1908
+ # Ensure output directories exist
1909
+ os.makedirs(os.path.dirname(assets_output_file), exist_ok=True)
1910
+ os.makedirs(os.path.dirname(findings_output_file), exist_ok=True)
1911
+
1912
+ # Prepare output files
1913
+ asset_info = self._prepare_output_file(assets_output_file, empty_assets_file, "asset")
1914
+ finding_info = self._prepare_output_file(findings_output_file, empty_findings_file, "finding")
1915
+
1916
+ # Initialize counters for memory-efficient tracking
1917
+ asset_count = 0
1918
+ finding_count = 0
1919
+ processed_files = 0
1920
+
1921
+ # Log start of processing
1922
+ logger.info(f"Starting to process files from {file_path}")
1923
+
1924
+ # Process each file
1925
+ for file_path_obj, data in self.find_valid_files(file_path):
1926
+ processed_files += 1
1927
+ file_path_str = str(file_path_obj)
1928
+ file_size_mb = os.path.getsize(file_path_str) / (1024 * 1024) if os.path.exists(file_path_str) else 0
1929
+
1930
+ logger.info(f"Processing file {processed_files}: {file_path_str} ({file_size_mb:.2f} MB)")
1931
+
1932
+ try:
1933
+ # Check for JSONL files first - these are already in our optimized format
1934
+ if file_path_str.endswith(FILE_TYPE):
1935
+ if "findings" in file_path_str.lower():
1936
+ # Process findings JSONL file
1937
+ with open(findings_output_file, finding_info.get("mode", "w")) as f:
1938
+ count = self._process_jsonl_findings(
1939
+ file_path_str, f, finding_info.get("existing_items", {})
1940
+ )
1941
+ finding_count += count
1942
+ logger.info(f"Added {count} findings from JSONL file {file_path_str}")
1943
+ # Use append mode after first file
1944
+ finding_info["mode"] = "a"
1945
+ continue
1946
+
1947
+ # For JSON files, process normally
1948
+ if not self.is_valid_file(data, file_path_str)[0]:
1949
+ logger.warning(f"Invalid file format: {file_path_str}")
1950
+ continue
1951
+
1952
+ # Process assets or findings based on file path
1953
+ if "sc_assets" in file_path_str:
1954
+ # Process assets file
1955
+ with open(assets_output_file, asset_info.get("mode", "w")) as output_f:
1956
+ count = self._process_asset_file(
1957
+ file_path_str, data, output_f, asset_info.get("existing_items", {})
1958
+ )
1959
+ asset_count += count
1960
+ # Use append mode for subsequent files
1961
+ asset_info["mode"] = "a"
1962
+
1963
+ elif "sc_vulns" in file_path_str:
1964
+ # Process findings file
1965
+ with open(findings_output_file, finding_info.get("mode", "w")) as output_f:
1966
+ count = self._process_finding_file(
1967
+ file_path_str, data, output_f, finding_info.get("existing_items", {})
1968
+ )
1969
+ finding_count += count
1970
+ # Use append mode for subsequent files
1971
+ finding_info["mode"] = "a"
1972
+
1973
+ except Exception as e:
1974
+ logger.error(f"Error processing file {file_path_str}: {str(e)}", exc_info=True)
1975
+
1976
+ # Log completion
1977
+ logger.info(f"Finished processing {processed_files} files")
1978
+ logger.info(f"Added {asset_count} assets and {finding_count} findings to JSONL files")
1979
+
1980
+ return asset_count, finding_count
1981
+
1982
+ # Add method to support scanner_integration.py
1983
+ def _process_single_asset(self, asset, loading_assets=False):
1984
+ """
1985
+ Process a single asset for the scanner integration framework.
1986
+ This method is called by scanner_integration.py's _process_assets method.
1987
+
1988
+ :param asset: The asset to process
1989
+ :param loading_assets: Whether assets are being loaded
1990
+ :return: True if successful, False otherwise
1991
+ :rtype: bool
1992
+ """
1993
+ try:
1994
+ # Process the asset as needed for integration
1995
+ # This is a simplified version just to handle the scanner integration's expectations
1996
+ return True
1997
+ except Exception as e:
1998
+ logger.error(f"Error processing asset: {str(e)}")
1999
+ return False