regscale-cli 6.17.0.0__py3-none-any.whl → 6.19.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of regscale-cli might be problematic. Click here for more details.
- regscale/__init__.py +1 -1
- regscale/core/app/api.py +5 -0
- regscale/core/login.py +3 -0
- regscale/integrations/api_paginator.py +932 -0
- regscale/integrations/api_paginator_example.py +348 -0
- regscale/integrations/commercial/__init__.py +11 -10
- regscale/integrations/commercial/burp.py +4 -0
- regscale/integrations/commercial/{qualys.py → qualys/__init__.py} +756 -105
- regscale/integrations/commercial/qualys/scanner.py +1051 -0
- regscale/integrations/commercial/qualys/variables.py +21 -0
- regscale/integrations/commercial/sicura/api.py +1 -0
- regscale/integrations/commercial/stigv2/click_commands.py +36 -8
- regscale/integrations/commercial/stigv2/stig_integration.py +63 -9
- regscale/integrations/commercial/tenablev2/__init__.py +9 -0
- regscale/integrations/commercial/tenablev2/authenticate.py +23 -2
- regscale/integrations/commercial/tenablev2/commands.py +779 -0
- regscale/integrations/commercial/tenablev2/jsonl_scanner.py +1999 -0
- regscale/integrations/commercial/tenablev2/sc_scanner.py +600 -0
- regscale/integrations/commercial/tenablev2/scanner.py +7 -5
- regscale/integrations/commercial/tenablev2/utils.py +21 -4
- regscale/integrations/commercial/tenablev2/variables.py +4 -0
- regscale/integrations/jsonl_scanner_integration.py +523 -142
- regscale/integrations/scanner_integration.py +102 -26
- regscale/integrations/transformer/__init__.py +17 -0
- regscale/integrations/transformer/data_transformer.py +445 -0
- regscale/integrations/transformer/mappings/__init__.py +8 -0
- regscale/integrations/variables.py +2 -0
- regscale/models/__init__.py +5 -2
- regscale/models/integration_models/cisa_kev_data.json +63 -7
- regscale/models/integration_models/synqly_models/capabilities.json +1 -1
- regscale/models/regscale_models/asset.py +5 -2
- regscale/models/regscale_models/file.py +5 -2
- regscale/regscale.py +3 -1
- {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/METADATA +1 -1
- {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/RECORD +47 -31
- tests/regscale/core/test_version.py +22 -0
- tests/regscale/integrations/__init__.py +0 -0
- tests/regscale/integrations/test_api_paginator.py +597 -0
- tests/regscale/integrations/test_integration_mapping.py +60 -0
- tests/regscale/integrations/test_issue_creation.py +317 -0
- tests/regscale/integrations/test_issue_due_date.py +46 -0
- tests/regscale/integrations/transformer/__init__.py +0 -0
- tests/regscale/integrations/transformer/test_data_transformer.py +850 -0
- regscale/integrations/commercial/tenablev2/click.py +0 -1637
- {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/LICENSE +0 -0
- {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/WHEEL +0 -0
- {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/entry_points.txt +0 -0
- {regscale_cli-6.17.0.0.dist-info → regscale_cli-6.19.0.0.dist-info}/top_level.txt +0 -0
|
@@ -55,6 +55,10 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
55
55
|
"""
|
|
56
56
|
logger.info("Initializing JSONLScannerIntegration")
|
|
57
57
|
self.plan_id = kwargs.get("plan_id", None)
|
|
58
|
+
|
|
59
|
+
# Pass vulnerability creation option to parent class
|
|
60
|
+
self.vulnerability_creation = kwargs.get("vulnerability_creation", None)
|
|
61
|
+
|
|
58
62
|
# plan_id is required for all integrations
|
|
59
63
|
super().__init__(**kwargs)
|
|
60
64
|
# Extract S3-related kwargs
|
|
@@ -76,7 +80,8 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
76
80
|
self.mapping = self._load_mapping() if not self.disable_mapping else None
|
|
77
81
|
|
|
78
82
|
self.set_scan_date(kwargs.get("scan_date", get_current_datetime()))
|
|
79
|
-
|
|
83
|
+
|
|
84
|
+
self.existing_assets = {}
|
|
80
85
|
|
|
81
86
|
self.s3_client = None
|
|
82
87
|
if self.s3_bucket and not self.read_files_only:
|
|
@@ -290,10 +295,7 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
290
295
|
|
|
291
296
|
with open(output_file, "a") as output_f:
|
|
292
297
|
for file_data in self.find_valid_files(file_path):
|
|
293
|
-
|
|
294
|
-
file, data = file_data[0], file_data[1]
|
|
295
|
-
else:
|
|
296
|
-
file, data = file_data, None
|
|
298
|
+
file, data = self._extract_file_and_data(file_data)
|
|
297
299
|
|
|
298
300
|
file_str = str(file)
|
|
299
301
|
if file_str in processed_files:
|
|
@@ -301,23 +303,64 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
301
303
|
|
|
302
304
|
processed_files.add(file_str)
|
|
303
305
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
self._process_asset_file(file, data, output_f, existing_items)
|
|
308
|
-
new_items_count += 1
|
|
309
|
-
total_items_count += 1
|
|
310
|
-
else:
|
|
311
|
-
findings_count = self._process_finding_file(file, data, output_f, existing_items)
|
|
312
|
-
new_items_count += findings_count
|
|
313
|
-
total_items_count += findings_count
|
|
306
|
+
items_added = self._process_file_by_type(file, data, output_f, existing_items, item_type)
|
|
307
|
+
new_items_count += items_added
|
|
308
|
+
total_items_count += items_added
|
|
314
309
|
|
|
315
|
-
|
|
316
|
-
|
|
310
|
+
self._log_items_added(new_items_count, item_type, output_file)
|
|
311
|
+
return total_items_count
|
|
312
|
+
|
|
313
|
+
def _extract_file_and_data(self, file_data: Any) -> Tuple[Union[Path, str], Optional[Dict[str, Any]]]:
|
|
314
|
+
"""
|
|
315
|
+
Extract file path and data from file_data which might be a tuple or a single value.
|
|
316
|
+
|
|
317
|
+
:param Any file_data: File data from find_valid_files
|
|
318
|
+
:return: Tuple of (file_path, file_data)
|
|
319
|
+
:rtype: Tuple[Union[Path, str], Optional[Dict[str, Any]]]
|
|
320
|
+
"""
|
|
321
|
+
if isinstance(file_data, tuple) and len(file_data) >= 2:
|
|
322
|
+
return file_data[0], file_data[1]
|
|
323
|
+
return file_data, None
|
|
317
324
|
|
|
325
|
+
def _process_file_by_type(
|
|
326
|
+
self,
|
|
327
|
+
file: Union[Path, str],
|
|
328
|
+
data: Optional[Dict[str, Any]],
|
|
329
|
+
output_f: Any,
|
|
330
|
+
existing_items: Dict[str, bool],
|
|
331
|
+
item_type: str,
|
|
332
|
+
) -> int:
|
|
333
|
+
"""
|
|
334
|
+
Process a file based on the item type (asset or finding).
|
|
335
|
+
|
|
336
|
+
:param Union[Path, str] file: File path
|
|
337
|
+
:param Optional[Dict[str, Any]] data: File data
|
|
338
|
+
:param Any output_f: Output file handle
|
|
339
|
+
:param Dict[str, bool] existing_items: Dictionary of existing item keys
|
|
340
|
+
:param str item_type: Type of items to process ('asset' or 'finding')
|
|
341
|
+
:return: Number of items added
|
|
342
|
+
:rtype: int
|
|
343
|
+
"""
|
|
344
|
+
try:
|
|
345
|
+
logger.info(f"Processing file: {file}")
|
|
346
|
+
if item_type == "asset":
|
|
347
|
+
return self._process_asset_file(file, data, output_f, existing_items)
|
|
348
|
+
else:
|
|
349
|
+
return self._process_finding_file(file, data, output_f, existing_items)
|
|
350
|
+
except Exception as e:
|
|
351
|
+
logger.error(f"Error processing file {file}: {str(e)}")
|
|
352
|
+
return 0
|
|
353
|
+
|
|
354
|
+
def _log_items_added(self, new_items_count: int, item_type: str, output_file: str) -> None:
|
|
355
|
+
"""
|
|
356
|
+
Log information about the number of items added.
|
|
357
|
+
|
|
358
|
+
:param int new_items_count: Number of new items added
|
|
359
|
+
:param str item_type: Type of items processed ('asset' or 'finding')
|
|
360
|
+
:param str output_file: Path to the output file
|
|
361
|
+
"""
|
|
318
362
|
item_type_label = "assets" if item_type == "asset" else "findings"
|
|
319
363
|
logger.info(f"Added {new_items_count} new {item_type_label} to {output_file}")
|
|
320
|
-
return total_items_count
|
|
321
364
|
|
|
322
365
|
def _process_asset_file(self, file, data, output_f, existing_items):
|
|
323
366
|
"""
|
|
@@ -350,8 +393,7 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
350
393
|
logger.debug(f"Asset with identifier {key} already exists, skipping")
|
|
351
394
|
return 0
|
|
352
395
|
|
|
353
|
-
|
|
354
|
-
output_f.flush()
|
|
396
|
+
self._write_item(output_f, mapped_asset)
|
|
355
397
|
existing_items[key] = True
|
|
356
398
|
return 1
|
|
357
399
|
|
|
@@ -392,8 +434,7 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
392
434
|
logger.debug(f"Finding with key {key} already exists, skipping")
|
|
393
435
|
continue
|
|
394
436
|
|
|
395
|
-
|
|
396
|
-
output_f.flush()
|
|
437
|
+
self._write_item(output_f, mapped_finding)
|
|
397
438
|
existing_items[key] = True
|
|
398
439
|
findings_in_file += 1
|
|
399
440
|
|
|
@@ -416,20 +457,54 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
416
457
|
|
|
417
458
|
def _yield_items_from_jsonl(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
|
|
418
459
|
"""
|
|
419
|
-
Read items from JSONL file and yield them one by one.
|
|
460
|
+
Read items from JSONL file and yield them one by one with optimizations for large files.
|
|
461
|
+
|
|
462
|
+
This method automatically selects an appropriate processing strategy based on file size:
|
|
463
|
+
- Small files (<100MB): Simple line-by-line processing
|
|
464
|
+
- Medium files (100MB-500MB): Batch processing with increased buffer size
|
|
465
|
+
- Large files (>500MB): Parallel processing with multiprocessing
|
|
420
466
|
|
|
421
467
|
:param str jsonl_file: Path to JSONL file containing items
|
|
422
468
|
:param Type[ItemType] item_class: Class to convert dictionary items to (IntegrationAsset or IntegrationFinding)
|
|
423
469
|
:yields: Items one at a time
|
|
424
470
|
:rtype: Iterator[ItemType]
|
|
425
471
|
"""
|
|
472
|
+
# Standard library imports should be at the module level, but these are only needed here
|
|
473
|
+
# and having them at the top would create unnecessary dependencies for small files
|
|
474
|
+
|
|
426
475
|
if not os.path.exists(jsonl_file):
|
|
427
476
|
logger.warning(f"JSONL file {jsonl_file} does not exist")
|
|
428
477
|
return
|
|
429
478
|
|
|
430
|
-
|
|
479
|
+
# Check file size to determine best strategy
|
|
480
|
+
file_size = os.path.getsize(jsonl_file)
|
|
481
|
+
file_size_mb = file_size / (1024 * 1024)
|
|
482
|
+
logger.info(f"Reading items from {jsonl_file} (size: {file_size_mb:.2f} MB)")
|
|
483
|
+
|
|
484
|
+
# Select processing strategy based on file size
|
|
485
|
+
if file_size < 100 * 1024 * 1024: # < 100MB
|
|
486
|
+
yield from self._process_small_file(jsonl_file, item_class)
|
|
487
|
+
elif file_size < 500 * 1024 * 1024: # 100MB-500MB
|
|
488
|
+
yield from self._process_medium_file(jsonl_file, item_class)
|
|
489
|
+
else: # > 500MB
|
|
490
|
+
yield from self._process_large_file(jsonl_file, item_class)
|
|
491
|
+
|
|
492
|
+
logger.info(f"Finished reading items from {jsonl_file}")
|
|
493
|
+
|
|
494
|
+
def _process_small_file(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
|
|
495
|
+
"""
|
|
496
|
+
Process a small JSONL file (<100MB) using line-by-line processing.
|
|
497
|
+
|
|
498
|
+
:param str jsonl_file: Path to JSONL file
|
|
499
|
+
:param Type[ItemType] item_class: Class to convert dictionary items to
|
|
500
|
+
:yields: Items one at a time
|
|
501
|
+
:rtype: Iterator[ItemType]
|
|
502
|
+
"""
|
|
431
503
|
with open(jsonl_file, "r") as f:
|
|
432
504
|
for line_number, line in enumerate(f, 1):
|
|
505
|
+
if not line.strip(): # Skip empty lines
|
|
506
|
+
continue
|
|
507
|
+
|
|
433
508
|
try:
|
|
434
509
|
item_dict = json.loads(line.strip())
|
|
435
510
|
yield item_class(**item_dict)
|
|
@@ -438,6 +513,95 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
438
513
|
except Exception as e:
|
|
439
514
|
logger.error(f"Error processing line {line_number} in {jsonl_file}: {str(e)}")
|
|
440
515
|
|
|
516
|
+
def _process_batch(self, batch: List[str], item_class: Type[ItemType]) -> List[ItemType]:
|
|
517
|
+
"""
|
|
518
|
+
Process a batch of lines into item objects.
|
|
519
|
+
|
|
520
|
+
:param List[str] batch: List of JSON lines to process
|
|
521
|
+
:param Type[ItemType] item_class: Class to convert dictionary items to
|
|
522
|
+
:return: List of processed items
|
|
523
|
+
:rtype: List[ItemType]
|
|
524
|
+
"""
|
|
525
|
+
results = []
|
|
526
|
+
for line in batch:
|
|
527
|
+
if not line.strip(): # Skip empty lines
|
|
528
|
+
continue
|
|
529
|
+
|
|
530
|
+
try:
|
|
531
|
+
item_dict = json.loads(line.strip())
|
|
532
|
+
results.append(item_class(**item_dict))
|
|
533
|
+
except json.JSONDecodeError:
|
|
534
|
+
logger.warning("Could not parse line in batch")
|
|
535
|
+
except Exception as e:
|
|
536
|
+
logger.error(f"Error processing line in batch: {str(e)}")
|
|
537
|
+
return results
|
|
538
|
+
|
|
539
|
+
def _process_medium_file(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
|
|
540
|
+
"""
|
|
541
|
+
Process a medium-sized JSONL file (100MB-500MB) using batch processing.
|
|
542
|
+
|
|
543
|
+
:param str jsonl_file: Path to JSONL file
|
|
544
|
+
:param Type[ItemType] item_class: Class to convert dictionary items to
|
|
545
|
+
:yields: Items one at a time
|
|
546
|
+
:rtype: Iterator[ItemType]
|
|
547
|
+
"""
|
|
548
|
+
batch_size = 10000 # Process 10,000 lines at a time
|
|
549
|
+
buffer_size = 10 * 1024 * 1024 # 10MB buffer
|
|
550
|
+
|
|
551
|
+
with open(jsonl_file, "r", buffering=buffer_size) as f:
|
|
552
|
+
batch = []
|
|
553
|
+
|
|
554
|
+
for line in f:
|
|
555
|
+
batch.append(line)
|
|
556
|
+
|
|
557
|
+
if len(batch) >= batch_size:
|
|
558
|
+
for item in self._process_batch(batch, item_class):
|
|
559
|
+
yield item
|
|
560
|
+
batch = []
|
|
561
|
+
|
|
562
|
+
# Process any remaining lines
|
|
563
|
+
if batch:
|
|
564
|
+
for item in self._process_batch(batch, item_class):
|
|
565
|
+
yield item
|
|
566
|
+
|
|
567
|
+
def _process_large_file(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
|
|
568
|
+
"""
|
|
569
|
+
Process a large JSONL file (>500MB) using parallel processing.
|
|
570
|
+
|
|
571
|
+
:param str jsonl_file: Path to JSONL file
|
|
572
|
+
:param Type[ItemType] item_class: Class to convert dictionary items to
|
|
573
|
+
:yields: Items one at a time
|
|
574
|
+
:rtype: Iterator[ItemType]
|
|
575
|
+
"""
|
|
576
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
577
|
+
from functools import partial
|
|
578
|
+
|
|
579
|
+
max_workers = min(os.cpu_count() or 4, 8)
|
|
580
|
+
batch_size = 10000 # Process 10,000 lines at a time
|
|
581
|
+
buffer_size = 10 * 1024 * 1024 # 10MB buffer
|
|
582
|
+
|
|
583
|
+
logger.info(f"Processing large file with {max_workers} workers, batch size: {batch_size}")
|
|
584
|
+
|
|
585
|
+
with open(jsonl_file, "r", buffering=buffer_size) as f:
|
|
586
|
+
batch = []
|
|
587
|
+
process_func = partial(self._process_batch, item_class=item_class)
|
|
588
|
+
|
|
589
|
+
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|
590
|
+
for line in f:
|
|
591
|
+
batch.append(line)
|
|
592
|
+
|
|
593
|
+
if len(batch) >= batch_size:
|
|
594
|
+
future = executor.submit(process_func, batch)
|
|
595
|
+
batch = []
|
|
596
|
+
# Yield results as they complete
|
|
597
|
+
for item in future.result():
|
|
598
|
+
yield item
|
|
599
|
+
|
|
600
|
+
# Process any remaining lines
|
|
601
|
+
if batch:
|
|
602
|
+
for item in executor.submit(process_func, batch).result():
|
|
603
|
+
yield item
|
|
604
|
+
|
|
441
605
|
def _process_files(
|
|
442
606
|
self,
|
|
443
607
|
file_path: Union[str, Path],
|
|
@@ -590,11 +754,52 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
590
754
|
"""
|
|
591
755
|
findings_data = self._get_findings_data_from_file(data)
|
|
592
756
|
logger.info(f"Found {len(findings_data)} findings in file: {file}")
|
|
757
|
+
self.existing_assets = existing_assets
|
|
758
|
+
asset_id = self._get_asset_id_from_assets()
|
|
759
|
+
findings_added = self._process_finding_items(findings_data, asset_id, data, findings_file, tracker)
|
|
760
|
+
|
|
761
|
+
if findings_added > 0:
|
|
762
|
+
logger.info(f"Added {findings_added} new findings from file {file}")
|
|
763
|
+
|
|
764
|
+
def _get_asset_id_from_assets(self) -> str:
|
|
765
|
+
"""
|
|
766
|
+
Get the first asset ID from existing assets, or 'unknown' if none exist.
|
|
767
|
+
|
|
768
|
+
:return: The first asset ID found or 'unknown'
|
|
769
|
+
:rtype: str
|
|
770
|
+
"""
|
|
771
|
+
return list(self.existing_assets.keys())[0] if self.existing_assets else "unknown"
|
|
772
|
+
|
|
773
|
+
def _process_finding_items(
|
|
774
|
+
self,
|
|
775
|
+
findings_data: List[Dict[str, Any]],
|
|
776
|
+
asset_id: str,
|
|
777
|
+
data: Optional[Dict[str, Any]],
|
|
778
|
+
findings_file: Any,
|
|
779
|
+
tracker: "CountTracker",
|
|
780
|
+
) -> int:
|
|
781
|
+
"""
|
|
782
|
+
Process individual finding items and write them to the findings file.
|
|
783
|
+
|
|
784
|
+
:param List[Dict[str, Any]] findings_data: List of findings data
|
|
785
|
+
:param str asset_id: Asset ID to associate with findings
|
|
786
|
+
:param Optional[Dict[str, Any]] data: Source data from the file
|
|
787
|
+
:param Any findings_file: Open file handle for writing findings
|
|
788
|
+
:param CountTracker tracker: Tracker for finding counts
|
|
789
|
+
:return: Number of findings added
|
|
790
|
+
:rtype: int
|
|
791
|
+
"""
|
|
593
792
|
findings_added = 0
|
|
594
793
|
|
|
595
|
-
asset_id
|
|
794
|
+
# Create a default asset_id to use only if absolutely necessary
|
|
795
|
+
default_asset_id = self._get_asset_id_from_assets()
|
|
796
|
+
|
|
797
|
+
# Process each finding individually
|
|
596
798
|
for finding_item in findings_data:
|
|
597
|
-
|
|
799
|
+
# Let the parse_finding implementation determine the correct asset_identifier
|
|
800
|
+
# This relies on subclasses implementing parse_finding to extract the right asset ID
|
|
801
|
+
# from the finding_item directly
|
|
802
|
+
finding = self.parse_finding(default_asset_id, data, finding_item)
|
|
598
803
|
finding_dict = dataclasses.asdict(finding)
|
|
599
804
|
mapped_finding = self._map_item(finding_dict, "finding_mapping", IntegrationFinding)
|
|
600
805
|
self._validate_fields(mapped_finding, self.required_finding_fields)
|
|
@@ -605,12 +810,47 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
605
810
|
tracker.existing[finding_key] = True
|
|
606
811
|
tracker.new_count += 1
|
|
607
812
|
tracker.total_count += 1
|
|
813
|
+
|
|
814
|
+
if self._process_single_finding(finding_item, asset_id, data, findings_file, tracker):
|
|
608
815
|
findings_added += 1
|
|
609
|
-
else:
|
|
610
|
-
logger.debug(f"Finding with key {finding_key} already exists, skipping")
|
|
611
816
|
|
|
612
|
-
|
|
613
|
-
|
|
817
|
+
return findings_added
|
|
818
|
+
|
|
819
|
+
def _process_single_finding(
|
|
820
|
+
self,
|
|
821
|
+
finding_item: Dict[str, Any],
|
|
822
|
+
asset_id: str,
|
|
823
|
+
data: Optional[Dict[str, Any]],
|
|
824
|
+
findings_file: Any,
|
|
825
|
+
tracker: "CountTracker",
|
|
826
|
+
) -> bool:
|
|
827
|
+
"""
|
|
828
|
+
Process a single finding item and write it if it's new.
|
|
829
|
+
|
|
830
|
+
:param Dict[str, Any] finding_item: Finding data
|
|
831
|
+
:param str asset_id: Asset ID to associate with the finding
|
|
832
|
+
:param Optional[Dict[str, Any]] data: Source data from the file
|
|
833
|
+
:param Any findings_file: Open file handle for writing findings
|
|
834
|
+
:param CountTracker tracker: Tracker for finding counts
|
|
835
|
+
:return: True if the finding was added, False otherwise
|
|
836
|
+
:rtype: bool
|
|
837
|
+
"""
|
|
838
|
+
finding = self.parse_finding(asset_id, data, finding_item)
|
|
839
|
+
finding_dict = dataclasses.asdict(finding)
|
|
840
|
+
mapped_finding = self._map_item(finding_dict, "finding_mapping", IntegrationFinding)
|
|
841
|
+
self._validate_fields(mapped_finding, self.required_finding_fields)
|
|
842
|
+
|
|
843
|
+
finding_key = self._get_item_key(dataclasses.asdict(mapped_finding), "finding")
|
|
844
|
+
|
|
845
|
+
if finding_key in tracker.existing:
|
|
846
|
+
logger.debug(f"Finding with key {finding_key} already exists, skipping")
|
|
847
|
+
return False
|
|
848
|
+
|
|
849
|
+
self._write_item(findings_file, mapped_finding)
|
|
850
|
+
tracker.existing[finding_key] = True
|
|
851
|
+
tracker.new_count += 1
|
|
852
|
+
tracker.total_count += 1
|
|
853
|
+
return True
|
|
614
854
|
|
|
615
855
|
def _map_item(self, item_dict: Dict[str, Any], mapping_key: str, item_class: Type) -> Any:
|
|
616
856
|
"""
|
|
@@ -629,16 +869,116 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
629
869
|
return item_class(**mapped_dict)
|
|
630
870
|
return item_class(**item_dict)
|
|
631
871
|
|
|
632
|
-
def _write_item(self,
|
|
872
|
+
def _write_item(self, file_handle_or_path: Any, item: Any) -> None:
|
|
633
873
|
"""
|
|
634
|
-
Write an item to
|
|
874
|
+
Write an item to a JSONL file.
|
|
635
875
|
|
|
636
|
-
:param Any
|
|
637
|
-
:param Any item: Item
|
|
638
|
-
|
|
876
|
+
:param Any file_handle_or_path: Open file handle or file path to write to
|
|
877
|
+
:param Any item: Item to write (IntegrationAsset or IntegrationFinding)
|
|
878
|
+
"""
|
|
879
|
+
try:
|
|
880
|
+
item_dict = self._convert_item_to_dict(item)
|
|
881
|
+
item_dict = self._ensure_serializable(item_dict)
|
|
882
|
+
self._write_dict_to_file(file_handle_or_path, item_dict)
|
|
883
|
+
except Exception as e:
|
|
884
|
+
logger.error(f"Error writing item: {str(e)}")
|
|
885
|
+
logger.debug(f"Problem item: {str(item)}")
|
|
886
|
+
self._write_fallback_record(file_handle_or_path, item, e)
|
|
887
|
+
|
|
888
|
+
def _convert_item_to_dict(self, item: Any) -> Dict[str, Any]:
|
|
639
889
|
"""
|
|
640
|
-
|
|
641
|
-
|
|
890
|
+
Convert an item to a dictionary using the most appropriate method.
|
|
891
|
+
|
|
892
|
+
:param Any item: Item to convert
|
|
893
|
+
:return: Dictionary representation of the item
|
|
894
|
+
:rtype: Dict[str, Any]
|
|
895
|
+
"""
|
|
896
|
+
if dataclasses.is_dataclass(item):
|
|
897
|
+
return dataclasses.asdict(item)
|
|
898
|
+
|
|
899
|
+
if hasattr(item, "to_dict") and callable(item.to_dict):
|
|
900
|
+
return item.to_dict()
|
|
901
|
+
|
|
902
|
+
if hasattr(item, "__dict__"):
|
|
903
|
+
return item.__dict__
|
|
904
|
+
|
|
905
|
+
if isinstance(item, dict):
|
|
906
|
+
return item
|
|
907
|
+
|
|
908
|
+
return {"value": str(item)}
|
|
909
|
+
|
|
910
|
+
def _write_dict_to_file(self, file_handle_or_path: Any, item_dict: Dict[str, Any]) -> None:
|
|
911
|
+
"""
|
|
912
|
+
Write a dictionary to a file as JSON.
|
|
913
|
+
|
|
914
|
+
:param Any file_handle_or_path: Open file handle or file path
|
|
915
|
+
:param Dict[str, Any] item_dict: Dictionary to write
|
|
916
|
+
"""
|
|
917
|
+
json_line = json.dumps(item_dict) + "\n"
|
|
918
|
+
|
|
919
|
+
if self._is_file_handle(file_handle_or_path):
|
|
920
|
+
file_handle_or_path.write(json_line)
|
|
921
|
+
file_handle_or_path.flush()
|
|
922
|
+
else:
|
|
923
|
+
with open(file_handle_or_path, "a") as f:
|
|
924
|
+
f.write(json_line)
|
|
925
|
+
|
|
926
|
+
def _is_file_handle(self, file_handle_or_path: Any) -> bool:
|
|
927
|
+
"""
|
|
928
|
+
Check if the given object is a file handle.
|
|
929
|
+
|
|
930
|
+
:param Any file_handle_or_path: Object to check
|
|
931
|
+
:return: True if it's a file handle, False otherwise
|
|
932
|
+
:rtype: bool
|
|
933
|
+
"""
|
|
934
|
+
return hasattr(file_handle_or_path, "write") and callable(file_handle_or_path.write)
|
|
935
|
+
|
|
936
|
+
def _write_fallback_record(self, file_handle_or_path: Any, item: Any, error: Exception) -> None:
|
|
937
|
+
"""
|
|
938
|
+
Write a simplified fallback record when normal serialization fails.
|
|
939
|
+
|
|
940
|
+
:param Any file_handle_or_path: Open file handle or file path
|
|
941
|
+
:param Any item: Original item that failed to serialize
|
|
942
|
+
:param Exception error: The exception that occurred
|
|
943
|
+
"""
|
|
944
|
+
try:
|
|
945
|
+
simplified = {
|
|
946
|
+
"error": "Failed to serialize original item",
|
|
947
|
+
"item_type": str(type(item)),
|
|
948
|
+
"error_message": str(error),
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
if hasattr(item, "__str__"):
|
|
952
|
+
simplified["item_string"] = str(item)
|
|
953
|
+
|
|
954
|
+
self._write_dict_to_file(file_handle_or_path, simplified)
|
|
955
|
+
logger.warning("Wrote simplified version of item after serialization error")
|
|
956
|
+
except Exception as e2:
|
|
957
|
+
logger.error(f"Failed to write simplified item: {str(e2)}")
|
|
958
|
+
|
|
959
|
+
def _ensure_serializable(self, obj: Any) -> Any:
|
|
960
|
+
"""
|
|
961
|
+
Ensure all values in an object are JSON serializable.
|
|
962
|
+
|
|
963
|
+
:param Any obj: Object to make serializable
|
|
964
|
+
:return: Serializable object
|
|
965
|
+
"""
|
|
966
|
+
if isinstance(obj, (str, int, float, bool, type(None))):
|
|
967
|
+
return obj
|
|
968
|
+
elif isinstance(obj, datetime):
|
|
969
|
+
return obj.isoformat()
|
|
970
|
+
elif isinstance(obj, dict):
|
|
971
|
+
return {k: self._ensure_serializable(v) for k, v in obj.items()}
|
|
972
|
+
elif isinstance(obj, list):
|
|
973
|
+
return [self._ensure_serializable(i) for i in obj]
|
|
974
|
+
elif dataclasses.is_dataclass(obj):
|
|
975
|
+
return self._ensure_serializable(dataclasses.asdict(obj))
|
|
976
|
+
elif hasattr(obj, "to_dict") and callable(obj.to_dict):
|
|
977
|
+
return self._ensure_serializable(obj.to_dict())
|
|
978
|
+
elif hasattr(obj, "__dict__"):
|
|
979
|
+
return self._ensure_serializable(obj.__dict__)
|
|
980
|
+
else:
|
|
981
|
+
return str(obj)
|
|
642
982
|
|
|
643
983
|
def _log_processing_results(self, new_count: int, output_file: str, item_type: str) -> None:
|
|
644
984
|
"""
|
|
@@ -684,38 +1024,16 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
684
1024
|
:yields: Iterator[IntegrationAsset]
|
|
685
1025
|
"""
|
|
686
1026
|
logger.info("Starting fetch_assets")
|
|
687
|
-
file_path = kwargs.get("file_path", self.file_path)
|
|
688
|
-
empty_file = kwargs.get("empty_file", True)
|
|
689
|
-
process_together = kwargs.get("process_together", False)
|
|
690
|
-
use_jsonl_file = kwargs.get("use_jsonl_file", False)
|
|
691
|
-
|
|
692
|
-
self.create_artifacts_dir()
|
|
693
|
-
|
|
694
|
-
if use_jsonl_file:
|
|
695
|
-
logger.info(f"Using existing JSONL file: {self.ASSETS_FILE}")
|
|
696
|
-
total_assets = sum(1 for _ in open(self.ASSETS_FILE, "r")) if os.path.exists(self.ASSETS_FILE) else 0
|
|
697
|
-
self.num_assets_to_process = total_assets
|
|
698
|
-
logger.info(f"Found {total_assets} assets in existing JSONL file")
|
|
699
|
-
else:
|
|
700
|
-
file_path = self._validate_file_path(file_path)
|
|
701
|
-
if process_together:
|
|
702
|
-
total_assets, _ = self._process_files(
|
|
703
|
-
file_path,
|
|
704
|
-
self.ASSETS_FILE,
|
|
705
|
-
self.FINDINGS_FILE,
|
|
706
|
-
empty_assets_file=empty_file,
|
|
707
|
-
empty_findings_file=False,
|
|
708
|
-
)
|
|
709
|
-
self.num_assets_to_process = total_assets
|
|
710
|
-
else:
|
|
711
|
-
total_assets = self._write_items_to_jsonl(file_path, self.ASSETS_FILE, "asset", empty_file=empty_file)
|
|
712
|
-
self.num_assets_to_process = total_assets
|
|
713
|
-
logger.info(f"Total assets to process: {total_assets}")
|
|
714
1027
|
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
1028
|
+
return self._fetch_items(
|
|
1029
|
+
"asset",
|
|
1030
|
+
self.ASSETS_FILE,
|
|
1031
|
+
IntegrationAsset,
|
|
1032
|
+
kwargs.get("file_path", self.file_path),
|
|
1033
|
+
kwargs.get("empty_file", True),
|
|
1034
|
+
kwargs.get("process_together", False),
|
|
1035
|
+
kwargs.get("use_jsonl_file", False),
|
|
1036
|
+
)
|
|
719
1037
|
|
|
720
1038
|
def fetch_findings(self, *args: Any, **kwargs: Any) -> Iterator[IntegrationFinding]:
|
|
721
1039
|
"""
|
|
@@ -730,40 +1048,151 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
730
1048
|
:yields: Iterator[IntegrationFinding]
|
|
731
1049
|
"""
|
|
732
1050
|
logger.info("Starting fetch_findings")
|
|
733
|
-
file_path = kwargs.get("file_path", self.file_path)
|
|
734
|
-
empty_file = kwargs.get("empty_file", True)
|
|
735
|
-
process_together = kwargs.get("process_together", False)
|
|
736
|
-
use_jsonl_file = kwargs.get("use_jsonl_file", False)
|
|
737
1051
|
|
|
1052
|
+
return self._fetch_items(
|
|
1053
|
+
"finding",
|
|
1054
|
+
self.FINDINGS_FILE,
|
|
1055
|
+
IntegrationFinding,
|
|
1056
|
+
kwargs.get("file_path", self.file_path),
|
|
1057
|
+
kwargs.get("empty_file", True),
|
|
1058
|
+
kwargs.get("process_together", False),
|
|
1059
|
+
kwargs.get("use_jsonl_file", False),
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
def _fetch_items(
|
|
1063
|
+
self,
|
|
1064
|
+
item_type: str,
|
|
1065
|
+
jsonl_file: str,
|
|
1066
|
+
item_class: Type[ItemType],
|
|
1067
|
+
file_path: Optional[str] = None,
|
|
1068
|
+
empty_file: bool = True,
|
|
1069
|
+
process_together: bool = False,
|
|
1070
|
+
use_jsonl_file: bool = False,
|
|
1071
|
+
) -> Iterator[ItemType]:
|
|
1072
|
+
"""
|
|
1073
|
+
Common method to fetch assets or findings from processed source files.
|
|
1074
|
+
|
|
1075
|
+
:param str item_type: Type of items to fetch ('asset' or 'finding')
|
|
1076
|
+
:param str jsonl_file: Path to the JSONL file containing items
|
|
1077
|
+
:param Type[ItemType] item_class: Class to convert dictionary items to
|
|
1078
|
+
:param Optional[str] file_path: Path to source file or directory
|
|
1079
|
+
:param bool empty_file: Whether to empty the output file before writing
|
|
1080
|
+
:param bool process_together: Whether to process assets and findings together
|
|
1081
|
+
:param bool use_jsonl_file: Whether to use an existing JSONL file
|
|
1082
|
+
:yields: Iterator[ItemType]
|
|
1083
|
+
:rtype: Iterator[ItemType]
|
|
1084
|
+
"""
|
|
738
1085
|
self.create_artifacts_dir()
|
|
1086
|
+
is_asset = item_type == "asset"
|
|
1087
|
+
counter_attr = "num_assets_to_process" if is_asset else "num_findings_to_process"
|
|
739
1088
|
|
|
740
1089
|
if use_jsonl_file:
|
|
741
|
-
logger.info(f"Using existing JSONL file: {
|
|
742
|
-
|
|
743
|
-
self
|
|
744
|
-
logger.info(f"Found {
|
|
1090
|
+
logger.info(f"Using existing JSONL file: {jsonl_file}")
|
|
1091
|
+
total_items = sum(1 for _ in open(jsonl_file, "r")) if os.path.exists(jsonl_file) else 0
|
|
1092
|
+
setattr(self, counter_attr, total_items)
|
|
1093
|
+
logger.info(f"Found {total_items} {item_type}s in existing JSONL file")
|
|
745
1094
|
else:
|
|
746
1095
|
file_path = self._validate_file_path(file_path)
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
1096
|
+
total_items = self._process_source_files(
|
|
1097
|
+
file_path, jsonl_file, item_type, empty_file, process_together, counter_attr
|
|
1098
|
+
)
|
|
1099
|
+
logger.info(f"Total {item_type}s to process: {total_items}")
|
|
1100
|
+
|
|
1101
|
+
# Yield items from the JSONL file
|
|
1102
|
+
for item in self._yield_items_from_jsonl(jsonl_file, item_class):
|
|
1103
|
+
yield item
|
|
1104
|
+
|
|
1105
|
+
logger.info(
|
|
1106
|
+
f"{item_type.capitalize()}s read from JSONL complete. Total {item_type}s identified: {getattr(self, counter_attr)}"
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
def _process_source_files(
|
|
1110
|
+
self,
|
|
1111
|
+
file_path: str,
|
|
1112
|
+
jsonl_file: str,
|
|
1113
|
+
item_type: str,
|
|
1114
|
+
empty_file: bool,
|
|
1115
|
+
process_together: bool,
|
|
1116
|
+
counter_attr: str,
|
|
1117
|
+
) -> int:
|
|
1118
|
+
"""
|
|
1119
|
+
Process source files and return the total count of items.
|
|
1120
|
+
|
|
1121
|
+
:param str file_path: Path to source file or directory
|
|
1122
|
+
:param str jsonl_file: Path to the JSONL file to write
|
|
1123
|
+
:param str item_type: Type of items to process ('asset' or 'finding')
|
|
1124
|
+
:param bool empty_file: Whether to empty output files
|
|
1125
|
+
:param bool process_together: Whether to process assets and findings together
|
|
1126
|
+
:param str counter_attr: Attribute name for storing the count
|
|
1127
|
+
:return: Total count of items
|
|
1128
|
+
:rtype: int
|
|
1129
|
+
"""
|
|
1130
|
+
is_asset = item_type == "asset"
|
|
1131
|
+
|
|
1132
|
+
if process_together:
|
|
1133
|
+
# Handle joint processing of assets and findings
|
|
1134
|
+
asset_count, finding_count = self._process_files(
|
|
1135
|
+
file_path,
|
|
1136
|
+
self.ASSETS_FILE,
|
|
1137
|
+
self.FINDINGS_FILE,
|
|
1138
|
+
empty_assets_file=empty_file if is_asset else False,
|
|
1139
|
+
empty_findings_file=empty_file if not is_asset else False,
|
|
1140
|
+
)
|
|
1141
|
+
total_items = asset_count if is_asset else finding_count
|
|
1142
|
+
else:
|
|
1143
|
+
# Process just one type
|
|
1144
|
+
total_items = self._write_items_to_jsonl(file_path, jsonl_file, item_type, empty_file=empty_file)
|
|
762
1145
|
|
|
763
|
-
|
|
764
|
-
|
|
1146
|
+
setattr(self, counter_attr, total_items)
|
|
1147
|
+
return total_items
|
|
765
1148
|
|
|
766
|
-
|
|
1149
|
+
def parse_asset(self, file_path: Union[Path, str], data: Dict[str, Any]) -> IntegrationAsset:
|
|
1150
|
+
"""
|
|
1151
|
+
Parse a single asset from source data.
|
|
1152
|
+
|
|
1153
|
+
Subclasses must implement this method to parse assets from their specific file format.
|
|
1154
|
+
|
|
1155
|
+
:param Union[Path, str] file_path: Path to the file containing the asset data
|
|
1156
|
+
:param Dict[str, Any] data: The parsed data
|
|
1157
|
+
:return: IntegrationAsset object
|
|
1158
|
+
:rtype: IntegrationAsset
|
|
1159
|
+
"""
|
|
1160
|
+
raise NotImplementedError("Subclasses must implement parse_asset")
|
|
1161
|
+
|
|
1162
|
+
def parse_finding(self, asset_identifier: str, data: Dict[str, Any], item: Dict[str, Any]) -> IntegrationFinding:
|
|
1163
|
+
"""Parse a single finding from source data.
|
|
1164
|
+
|
|
1165
|
+
Subclasses must implement this method to parse findings from their specific file format.
|
|
1166
|
+
|
|
1167
|
+
:param str asset_identifier: The identifier of the asset this finding belongs to
|
|
1168
|
+
:param Dict[str, Any] data: The asset data
|
|
1169
|
+
:param Dict[str, Any] item: The finding data
|
|
1170
|
+
:return: IntegrationFinding object
|
|
1171
|
+
:rtype: IntegrationFinding
|
|
1172
|
+
"""
|
|
1173
|
+
raise NotImplementedError("Subclasses must implement parse_finding")
|
|
1174
|
+
|
|
1175
|
+
def is_valid_file(self, data: Any, file_path: Union[Path, str]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
|
1176
|
+
"""
|
|
1177
|
+
Check if the provided data is valid for processing.
|
|
1178
|
+
|
|
1179
|
+
This default implementation ensures the data is a non-empty dictionary.
|
|
1180
|
+
Subclasses should override this to implement specific validation logic.
|
|
1181
|
+
|
|
1182
|
+
:param Any data: Data parsed from the file to validate
|
|
1183
|
+
:param Union[Path, str] file_path: Path to the file being processed
|
|
1184
|
+
:return: Tuple of (is_valid, data) where is_valid indicates validity and data is the validated content or None
|
|
1185
|
+
:rtype: Tuple[bool, Optional[Dict[str, Any]]]
|
|
1186
|
+
"""
|
|
1187
|
+
if not isinstance(data, dict):
|
|
1188
|
+
logger.warning(f"Data is not a dictionary for file {file_path}, skipping")
|
|
1189
|
+
return False, None
|
|
1190
|
+
|
|
1191
|
+
if not data:
|
|
1192
|
+
logger.warning(f"Data is an empty dictionary for file {file_path}, skipping")
|
|
1193
|
+
return False, None
|
|
1194
|
+
|
|
1195
|
+
return True, data
|
|
767
1196
|
|
|
768
1197
|
def fetch_assets_and_findings(
|
|
769
1198
|
self, file_path: str = None, empty_files: bool = True
|
|
@@ -923,51 +1352,3 @@ class JSONLScannerIntegration(ScannerIntegration):
|
|
|
923
1352
|
is_valid, validated_data = self.is_valid_file(data, file)
|
|
924
1353
|
if is_valid and validated_data is not None:
|
|
925
1354
|
yield file, validated_data
|
|
926
|
-
|
|
927
|
-
def parse_asset(self, file_path: Union[Path, str], data: Dict[str, Any]) -> IntegrationAsset:
|
|
928
|
-
"""
|
|
929
|
-
Parse a single asset from source data.
|
|
930
|
-
|
|
931
|
-
Subclasses must implement this method to parse assets from their specific file format.
|
|
932
|
-
|
|
933
|
-
:param Union[Path, str] file_path: Path to the file containing the asset data
|
|
934
|
-
:param Dict[str, Any] data: The parsed data
|
|
935
|
-
:return: IntegrationAsset object
|
|
936
|
-
:rtype: IntegrationAsset
|
|
937
|
-
"""
|
|
938
|
-
raise NotImplementedError("Subclasses must implement parse_asset")
|
|
939
|
-
|
|
940
|
-
def parse_finding(self, asset_identifier: str, data: Dict[str, Any], item: Dict[str, Any]) -> IntegrationFinding:
|
|
941
|
-
"""Parse a single finding from source data.
|
|
942
|
-
|
|
943
|
-
Subclasses must implement this method to parse findings from their specific file format.
|
|
944
|
-
|
|
945
|
-
:param str asset_identifier: The identifier of the asset this finding belongs to
|
|
946
|
-
:param Dict[str, Any] data: The asset data
|
|
947
|
-
:param Dict[str, Any] item: The finding data
|
|
948
|
-
:return: IntegrationFinding object
|
|
949
|
-
:rtype: IntegrationFinding
|
|
950
|
-
"""
|
|
951
|
-
raise NotImplementedError("Subclasses must implement parse_finding")
|
|
952
|
-
|
|
953
|
-
def is_valid_file(self, data: Any, file_path: Union[Path, str]) -> Tuple[bool, Optional[Dict[str, Any]]]:
|
|
954
|
-
"""
|
|
955
|
-
Check if the provided data is valid for processing.
|
|
956
|
-
|
|
957
|
-
This default implementation ensures the data is a non-empty dictionary.
|
|
958
|
-
Subclasses should override this to implement specific validation logic.
|
|
959
|
-
|
|
960
|
-
:param Any data: Data parsed from the file to validate
|
|
961
|
-
:param Union[Path, str] file_path: Path to the file being processed
|
|
962
|
-
:return: Tuple of (is_valid, data) where is_valid indicates validity and data is the validated content or None
|
|
963
|
-
:rtype: Tuple[bool, Optional[Dict[str, Any]]]
|
|
964
|
-
"""
|
|
965
|
-
if not isinstance(data, dict):
|
|
966
|
-
logger.warning(f"Data is not a dictionary for file {file_path}, skipping")
|
|
967
|
-
return False, None
|
|
968
|
-
|
|
969
|
-
if not data:
|
|
970
|
-
logger.warning(f"Data is an empty dictionary for file {file_path}, skipping")
|
|
971
|
-
return False, None
|
|
972
|
-
|
|
973
|
-
return True, data
|