regscale-cli 6.18.0.0__py3-none-any.whl → 6.19.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of regscale-cli might be problematic. Click here for more details.

Files changed (47) hide show
  1. regscale/__init__.py +1 -1
  2. regscale/integrations/api_paginator.py +932 -0
  3. regscale/integrations/api_paginator_example.py +348 -0
  4. regscale/integrations/commercial/__init__.py +11 -10
  5. regscale/integrations/commercial/{qualys.py → qualys/__init__.py} +756 -105
  6. regscale/integrations/commercial/qualys/scanner.py +1051 -0
  7. regscale/integrations/commercial/qualys/variables.py +21 -0
  8. regscale/integrations/commercial/sicura/api.py +1 -0
  9. regscale/integrations/commercial/stigv2/click_commands.py +36 -8
  10. regscale/integrations/commercial/stigv2/stig_integration.py +63 -9
  11. regscale/integrations/commercial/tenablev2/__init__.py +9 -0
  12. regscale/integrations/commercial/tenablev2/authenticate.py +23 -2
  13. regscale/integrations/commercial/tenablev2/commands.py +779 -0
  14. regscale/integrations/commercial/tenablev2/jsonl_scanner.py +1999 -0
  15. regscale/integrations/commercial/tenablev2/sc_scanner.py +600 -0
  16. regscale/integrations/commercial/tenablev2/scanner.py +7 -5
  17. regscale/integrations/commercial/tenablev2/utils.py +21 -4
  18. regscale/integrations/commercial/tenablev2/variables.py +4 -0
  19. regscale/integrations/jsonl_scanner_integration.py +523 -142
  20. regscale/integrations/scanner_integration.py +102 -26
  21. regscale/integrations/transformer/__init__.py +17 -0
  22. regscale/integrations/transformer/data_transformer.py +445 -0
  23. regscale/integrations/transformer/mappings/__init__.py +8 -0
  24. regscale/integrations/variables.py +2 -0
  25. regscale/models/__init__.py +5 -2
  26. regscale/models/integration_models/cisa_kev_data.json +6 -6
  27. regscale/models/integration_models/synqly_models/capabilities.json +1 -1
  28. regscale/models/regscale_models/asset.py +5 -2
  29. regscale/models/regscale_models/file.py +5 -2
  30. regscale/models/regscale_models/group.py +2 -1
  31. regscale/models/regscale_models/user_group.py +1 -1
  32. regscale/regscale.py +3 -1
  33. {regscale_cli-6.18.0.0.dist-info → regscale_cli-6.19.0.1.dist-info}/METADATA +1 -1
  34. {regscale_cli-6.18.0.0.dist-info → regscale_cli-6.19.0.1.dist-info}/RECORD +46 -30
  35. tests/regscale/core/test_version.py +22 -0
  36. tests/regscale/integrations/__init__.py +0 -0
  37. tests/regscale/integrations/test_api_paginator.py +597 -0
  38. tests/regscale/integrations/test_integration_mapping.py +60 -0
  39. tests/regscale/integrations/test_issue_creation.py +317 -0
  40. tests/regscale/integrations/test_issue_due_date.py +46 -0
  41. tests/regscale/integrations/transformer/__init__.py +0 -0
  42. tests/regscale/integrations/transformer/test_data_transformer.py +850 -0
  43. regscale/integrations/commercial/tenablev2/click.py +0 -1641
  44. {regscale_cli-6.18.0.0.dist-info → regscale_cli-6.19.0.1.dist-info}/LICENSE +0 -0
  45. {regscale_cli-6.18.0.0.dist-info → regscale_cli-6.19.0.1.dist-info}/WHEEL +0 -0
  46. {regscale_cli-6.18.0.0.dist-info → regscale_cli-6.19.0.1.dist-info}/entry_points.txt +0 -0
  47. {regscale_cli-6.18.0.0.dist-info → regscale_cli-6.19.0.1.dist-info}/top_level.txt +0 -0
@@ -55,6 +55,10 @@ class JSONLScannerIntegration(ScannerIntegration):
55
55
  """
56
56
  logger.info("Initializing JSONLScannerIntegration")
57
57
  self.plan_id = kwargs.get("plan_id", None)
58
+
59
+ # Pass vulnerability creation option to parent class
60
+ self.vulnerability_creation = kwargs.get("vulnerability_creation", None)
61
+
58
62
  # plan_id is required for all integrations
59
63
  super().__init__(**kwargs)
60
64
  # Extract S3-related kwargs
@@ -76,7 +80,8 @@ class JSONLScannerIntegration(ScannerIntegration):
76
80
  self.mapping = self._load_mapping() if not self.disable_mapping else None
77
81
 
78
82
  self.set_scan_date(kwargs.get("scan_date", get_current_datetime()))
79
- # Initialize parent class
83
+
84
+ self.existing_assets = {}
80
85
 
81
86
  self.s3_client = None
82
87
  if self.s3_bucket and not self.read_files_only:
@@ -290,10 +295,7 @@ class JSONLScannerIntegration(ScannerIntegration):
290
295
 
291
296
  with open(output_file, "a") as output_f:
292
297
  for file_data in self.find_valid_files(file_path):
293
- if isinstance(file_data, tuple) and len(file_data) >= 2:
294
- file, data = file_data[0], file_data[1]
295
- else:
296
- file, data = file_data, None
298
+ file, data = self._extract_file_and_data(file_data)
297
299
 
298
300
  file_str = str(file)
299
301
  if file_str in processed_files:
@@ -301,23 +303,64 @@ class JSONLScannerIntegration(ScannerIntegration):
301
303
 
302
304
  processed_files.add(file_str)
303
305
 
304
- try:
305
- logger.info(f"Processing file: {file}")
306
- if item_type == "asset":
307
- self._process_asset_file(file, data, output_f, existing_items)
308
- new_items_count += 1
309
- total_items_count += 1
310
- else:
311
- findings_count = self._process_finding_file(file, data, output_f, existing_items)
312
- new_items_count += findings_count
313
- total_items_count += findings_count
306
+ items_added = self._process_file_by_type(file, data, output_f, existing_items, item_type)
307
+ new_items_count += items_added
308
+ total_items_count += items_added
314
309
 
315
- except Exception as e:
316
- logger.error(f"Error processing file {file}: {str(e)}")
310
+ self._log_items_added(new_items_count, item_type, output_file)
311
+ return total_items_count
312
+
313
+ def _extract_file_and_data(self, file_data: Any) -> Tuple[Union[Path, str], Optional[Dict[str, Any]]]:
314
+ """
315
+ Extract file path and data from file_data which might be a tuple or a single value.
316
+
317
+ :param Any file_data: File data from find_valid_files
318
+ :return: Tuple of (file_path, file_data)
319
+ :rtype: Tuple[Union[Path, str], Optional[Dict[str, Any]]]
320
+ """
321
+ if isinstance(file_data, tuple) and len(file_data) >= 2:
322
+ return file_data[0], file_data[1]
323
+ return file_data, None
317
324
 
325
+ def _process_file_by_type(
326
+ self,
327
+ file: Union[Path, str],
328
+ data: Optional[Dict[str, Any]],
329
+ output_f: Any,
330
+ existing_items: Dict[str, bool],
331
+ item_type: str,
332
+ ) -> int:
333
+ """
334
+ Process a file based on the item type (asset or finding).
335
+
336
+ :param Union[Path, str] file: File path
337
+ :param Optional[Dict[str, Any]] data: File data
338
+ :param Any output_f: Output file handle
339
+ :param Dict[str, bool] existing_items: Dictionary of existing item keys
340
+ :param str item_type: Type of items to process ('asset' or 'finding')
341
+ :return: Number of items added
342
+ :rtype: int
343
+ """
344
+ try:
345
+ logger.info(f"Processing file: {file}")
346
+ if item_type == "asset":
347
+ return self._process_asset_file(file, data, output_f, existing_items)
348
+ else:
349
+ return self._process_finding_file(file, data, output_f, existing_items)
350
+ except Exception as e:
351
+ logger.error(f"Error processing file {file}: {str(e)}")
352
+ return 0
353
+
354
+ def _log_items_added(self, new_items_count: int, item_type: str, output_file: str) -> None:
355
+ """
356
+ Log information about the number of items added.
357
+
358
+ :param int new_items_count: Number of new items added
359
+ :param str item_type: Type of items processed ('asset' or 'finding')
360
+ :param str output_file: Path to the output file
361
+ """
318
362
  item_type_label = "assets" if item_type == "asset" else "findings"
319
363
  logger.info(f"Added {new_items_count} new {item_type_label} to {output_file}")
320
- return total_items_count
321
364
 
322
365
  def _process_asset_file(self, file, data, output_f, existing_items):
323
366
  """
@@ -350,8 +393,7 @@ class JSONLScannerIntegration(ScannerIntegration):
350
393
  logger.debug(f"Asset with identifier {key} already exists, skipping")
351
394
  return 0
352
395
 
353
- output_f.write(json.dumps(dataclasses.asdict(mapped_asset)) + "\n")
354
- output_f.flush()
396
+ self._write_item(output_f, mapped_asset)
355
397
  existing_items[key] = True
356
398
  return 1
357
399
 
@@ -392,8 +434,7 @@ class JSONLScannerIntegration(ScannerIntegration):
392
434
  logger.debug(f"Finding with key {key} already exists, skipping")
393
435
  continue
394
436
 
395
- output_f.write(json.dumps(dataclasses.asdict(mapped_finding)) + "\n")
396
- output_f.flush()
437
+ self._write_item(output_f, mapped_finding)
397
438
  existing_items[key] = True
398
439
  findings_in_file += 1
399
440
 
@@ -416,20 +457,54 @@ class JSONLScannerIntegration(ScannerIntegration):
416
457
 
417
458
  def _yield_items_from_jsonl(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
418
459
  """
419
- Read items from JSONL file and yield them one by one.
460
+ Read items from JSONL file and yield them one by one with optimizations for large files.
461
+
462
+ This method automatically selects an appropriate processing strategy based on file size:
463
+ - Small files (<100MB): Simple line-by-line processing
464
+ - Medium files (100MB-500MB): Batch processing with increased buffer size
465
+ - Large files (>500MB): Parallel processing with multiprocessing
420
466
 
421
467
  :param str jsonl_file: Path to JSONL file containing items
422
468
  :param Type[ItemType] item_class: Class to convert dictionary items to (IntegrationAsset or IntegrationFinding)
423
469
  :yields: Items one at a time
424
470
  :rtype: Iterator[ItemType]
425
471
  """
472
+ # Standard library imports should be at the module level, but these are only needed here
473
+ # and having them at the top would create unnecessary dependencies for small files
474
+
426
475
  if not os.path.exists(jsonl_file):
427
476
  logger.warning(f"JSONL file {jsonl_file} does not exist")
428
477
  return
429
478
 
430
- logger.info(f"Reading items from {jsonl_file}")
479
+ # Check file size to determine best strategy
480
+ file_size = os.path.getsize(jsonl_file)
481
+ file_size_mb = file_size / (1024 * 1024)
482
+ logger.info(f"Reading items from {jsonl_file} (size: {file_size_mb:.2f} MB)")
483
+
484
+ # Select processing strategy based on file size
485
+ if file_size < 100 * 1024 * 1024: # < 100MB
486
+ yield from self._process_small_file(jsonl_file, item_class)
487
+ elif file_size < 500 * 1024 * 1024: # 100MB-500MB
488
+ yield from self._process_medium_file(jsonl_file, item_class)
489
+ else: # > 500MB
490
+ yield from self._process_large_file(jsonl_file, item_class)
491
+
492
+ logger.info(f"Finished reading items from {jsonl_file}")
493
+
494
+ def _process_small_file(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
495
+ """
496
+ Process a small JSONL file (<100MB) using line-by-line processing.
497
+
498
+ :param str jsonl_file: Path to JSONL file
499
+ :param Type[ItemType] item_class: Class to convert dictionary items to
500
+ :yields: Items one at a time
501
+ :rtype: Iterator[ItemType]
502
+ """
431
503
  with open(jsonl_file, "r") as f:
432
504
  for line_number, line in enumerate(f, 1):
505
+ if not line.strip(): # Skip empty lines
506
+ continue
507
+
433
508
  try:
434
509
  item_dict = json.loads(line.strip())
435
510
  yield item_class(**item_dict)
@@ -438,6 +513,95 @@ class JSONLScannerIntegration(ScannerIntegration):
438
513
  except Exception as e:
439
514
  logger.error(f"Error processing line {line_number} in {jsonl_file}: {str(e)}")
440
515
 
516
+ def _process_batch(self, batch: List[str], item_class: Type[ItemType]) -> List[ItemType]:
517
+ """
518
+ Process a batch of lines into item objects.
519
+
520
+ :param List[str] batch: List of JSON lines to process
521
+ :param Type[ItemType] item_class: Class to convert dictionary items to
522
+ :return: List of processed items
523
+ :rtype: List[ItemType]
524
+ """
525
+ results = []
526
+ for line in batch:
527
+ if not line.strip(): # Skip empty lines
528
+ continue
529
+
530
+ try:
531
+ item_dict = json.loads(line.strip())
532
+ results.append(item_class(**item_dict))
533
+ except json.JSONDecodeError:
534
+ logger.warning("Could not parse line in batch")
535
+ except Exception as e:
536
+ logger.error(f"Error processing line in batch: {str(e)}")
537
+ return results
538
+
539
+ def _process_medium_file(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
540
+ """
541
+ Process a medium-sized JSONL file (100MB-500MB) using batch processing.
542
+
543
+ :param str jsonl_file: Path to JSONL file
544
+ :param Type[ItemType] item_class: Class to convert dictionary items to
545
+ :yields: Items one at a time
546
+ :rtype: Iterator[ItemType]
547
+ """
548
+ batch_size = 10000 # Process 10,000 lines at a time
549
+ buffer_size = 10 * 1024 * 1024 # 10MB buffer
550
+
551
+ with open(jsonl_file, "r", buffering=buffer_size) as f:
552
+ batch = []
553
+
554
+ for line in f:
555
+ batch.append(line)
556
+
557
+ if len(batch) >= batch_size:
558
+ for item in self._process_batch(batch, item_class):
559
+ yield item
560
+ batch = []
561
+
562
+ # Process any remaining lines
563
+ if batch:
564
+ for item in self._process_batch(batch, item_class):
565
+ yield item
566
+
567
+ def _process_large_file(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
568
+ """
569
+ Process a large JSONL file (>500MB) using parallel processing.
570
+
571
+ :param str jsonl_file: Path to JSONL file
572
+ :param Type[ItemType] item_class: Class to convert dictionary items to
573
+ :yields: Items one at a time
574
+ :rtype: Iterator[ItemType]
575
+ """
576
+ from concurrent.futures import ProcessPoolExecutor
577
+ from functools import partial
578
+
579
+ max_workers = min(os.cpu_count() or 4, 8)
580
+ batch_size = 10000 # Process 10,000 lines at a time
581
+ buffer_size = 10 * 1024 * 1024 # 10MB buffer
582
+
583
+ logger.info(f"Processing large file with {max_workers} workers, batch size: {batch_size}")
584
+
585
+ with open(jsonl_file, "r", buffering=buffer_size) as f:
586
+ batch = []
587
+ process_func = partial(self._process_batch, item_class=item_class)
588
+
589
+ with ProcessPoolExecutor(max_workers=max_workers) as executor:
590
+ for line in f:
591
+ batch.append(line)
592
+
593
+ if len(batch) >= batch_size:
594
+ future = executor.submit(process_func, batch)
595
+ batch = []
596
+ # Yield results as they complete
597
+ for item in future.result():
598
+ yield item
599
+
600
+ # Process any remaining lines
601
+ if batch:
602
+ for item in executor.submit(process_func, batch).result():
603
+ yield item
604
+
441
605
  def _process_files(
442
606
  self,
443
607
  file_path: Union[str, Path],
@@ -590,11 +754,52 @@ class JSONLScannerIntegration(ScannerIntegration):
590
754
  """
591
755
  findings_data = self._get_findings_data_from_file(data)
592
756
  logger.info(f"Found {len(findings_data)} findings in file: {file}")
757
+ self.existing_assets = existing_assets
758
+ asset_id = self._get_asset_id_from_assets()
759
+ findings_added = self._process_finding_items(findings_data, asset_id, data, findings_file, tracker)
760
+
761
+ if findings_added > 0:
762
+ logger.info(f"Added {findings_added} new findings from file {file}")
763
+
764
+ def _get_asset_id_from_assets(self) -> str:
765
+ """
766
+ Get the first asset ID from existing assets, or 'unknown' if none exist.
767
+
768
+ :return: The first asset ID found or 'unknown'
769
+ :rtype: str
770
+ """
771
+ return list(self.existing_assets.keys())[0] if self.existing_assets else "unknown"
772
+
773
+ def _process_finding_items(
774
+ self,
775
+ findings_data: List[Dict[str, Any]],
776
+ asset_id: str,
777
+ data: Optional[Dict[str, Any]],
778
+ findings_file: Any,
779
+ tracker: "CountTracker",
780
+ ) -> int:
781
+ """
782
+ Process individual finding items and write them to the findings file.
783
+
784
+ :param List[Dict[str, Any]] findings_data: List of findings data
785
+ :param str asset_id: Asset ID to associate with findings
786
+ :param Optional[Dict[str, Any]] data: Source data from the file
787
+ :param Any findings_file: Open file handle for writing findings
788
+ :param CountTracker tracker: Tracker for finding counts
789
+ :return: Number of findings added
790
+ :rtype: int
791
+ """
593
792
  findings_added = 0
594
793
 
595
- asset_id = list(existing_assets.keys())[0] if existing_assets else "unknown"
794
+ # Create a default asset_id to use only if absolutely necessary
795
+ default_asset_id = self._get_asset_id_from_assets()
796
+
797
+ # Process each finding individually
596
798
  for finding_item in findings_data:
597
- finding = self.parse_finding(asset_id, data, finding_item)
799
+ # Let the parse_finding implementation determine the correct asset_identifier
800
+ # This relies on subclasses implementing parse_finding to extract the right asset ID
801
+ # from the finding_item directly
802
+ finding = self.parse_finding(default_asset_id, data, finding_item)
598
803
  finding_dict = dataclasses.asdict(finding)
599
804
  mapped_finding = self._map_item(finding_dict, "finding_mapping", IntegrationFinding)
600
805
  self._validate_fields(mapped_finding, self.required_finding_fields)
@@ -605,12 +810,47 @@ class JSONLScannerIntegration(ScannerIntegration):
605
810
  tracker.existing[finding_key] = True
606
811
  tracker.new_count += 1
607
812
  tracker.total_count += 1
813
+
814
+ if self._process_single_finding(finding_item, asset_id, data, findings_file, tracker):
608
815
  findings_added += 1
609
- else:
610
- logger.debug(f"Finding with key {finding_key} already exists, skipping")
611
816
 
612
- if findings_added > 0:
613
- logger.info(f"Added {findings_added} new findings from file {file}")
817
+ return findings_added
818
+
819
+ def _process_single_finding(
820
+ self,
821
+ finding_item: Dict[str, Any],
822
+ asset_id: str,
823
+ data: Optional[Dict[str, Any]],
824
+ findings_file: Any,
825
+ tracker: "CountTracker",
826
+ ) -> bool:
827
+ """
828
+ Process a single finding item and write it if it's new.
829
+
830
+ :param Dict[str, Any] finding_item: Finding data
831
+ :param str asset_id: Asset ID to associate with the finding
832
+ :param Optional[Dict[str, Any]] data: Source data from the file
833
+ :param Any findings_file: Open file handle for writing findings
834
+ :param CountTracker tracker: Tracker for finding counts
835
+ :return: True if the finding was added, False otherwise
836
+ :rtype: bool
837
+ """
838
+ finding = self.parse_finding(asset_id, data, finding_item)
839
+ finding_dict = dataclasses.asdict(finding)
840
+ mapped_finding = self._map_item(finding_dict, "finding_mapping", IntegrationFinding)
841
+ self._validate_fields(mapped_finding, self.required_finding_fields)
842
+
843
+ finding_key = self._get_item_key(dataclasses.asdict(mapped_finding), "finding")
844
+
845
+ if finding_key in tracker.existing:
846
+ logger.debug(f"Finding with key {finding_key} already exists, skipping")
847
+ return False
848
+
849
+ self._write_item(findings_file, mapped_finding)
850
+ tracker.existing[finding_key] = True
851
+ tracker.new_count += 1
852
+ tracker.total_count += 1
853
+ return True
614
854
 
615
855
  def _map_item(self, item_dict: Dict[str, Any], mapping_key: str, item_class: Type) -> Any:
616
856
  """
@@ -629,16 +869,116 @@ class JSONLScannerIntegration(ScannerIntegration):
629
869
  return item_class(**mapped_dict)
630
870
  return item_class(**item_dict)
631
871
 
632
- def _write_item(self, file_handle: Any, item: Any) -> None:
872
+ def _write_item(self, file_handle_or_path: Any, item: Any) -> None:
633
873
  """
634
- Write an item to the specified file handle.
874
+ Write an item to a JSONL file.
635
875
 
636
- :param Any file_handle: Open file handle to write to
637
- :param Any item: Item object to write (IntegrationAsset or IntegrationFinding)
638
- :rtype: None
876
+ :param Any file_handle_or_path: Open file handle or file path to write to
877
+ :param Any item: Item to write (IntegrationAsset or IntegrationFinding)
878
+ """
879
+ try:
880
+ item_dict = self._convert_item_to_dict(item)
881
+ item_dict = self._ensure_serializable(item_dict)
882
+ self._write_dict_to_file(file_handle_or_path, item_dict)
883
+ except Exception as e:
884
+ logger.error(f"Error writing item: {str(e)}")
885
+ logger.debug(f"Problem item: {str(item)}")
886
+ self._write_fallback_record(file_handle_or_path, item, e)
887
+
888
+ def _convert_item_to_dict(self, item: Any) -> Dict[str, Any]:
639
889
  """
640
- file_handle.write(json.dumps(dataclasses.asdict(item)) + "\n")
641
- file_handle.flush()
890
+ Convert an item to a dictionary using the most appropriate method.
891
+
892
+ :param Any item: Item to convert
893
+ :return: Dictionary representation of the item
894
+ :rtype: Dict[str, Any]
895
+ """
896
+ if dataclasses.is_dataclass(item):
897
+ return dataclasses.asdict(item)
898
+
899
+ if hasattr(item, "to_dict") and callable(item.to_dict):
900
+ return item.to_dict()
901
+
902
+ if hasattr(item, "__dict__"):
903
+ return item.__dict__
904
+
905
+ if isinstance(item, dict):
906
+ return item
907
+
908
+ return {"value": str(item)}
909
+
910
+ def _write_dict_to_file(self, file_handle_or_path: Any, item_dict: Dict[str, Any]) -> None:
911
+ """
912
+ Write a dictionary to a file as JSON.
913
+
914
+ :param Any file_handle_or_path: Open file handle or file path
915
+ :param Dict[str, Any] item_dict: Dictionary to write
916
+ """
917
+ json_line = json.dumps(item_dict) + "\n"
918
+
919
+ if self._is_file_handle(file_handle_or_path):
920
+ file_handle_or_path.write(json_line)
921
+ file_handle_or_path.flush()
922
+ else:
923
+ with open(file_handle_or_path, "a") as f:
924
+ f.write(json_line)
925
+
926
+ def _is_file_handle(self, file_handle_or_path: Any) -> bool:
927
+ """
928
+ Check if the given object is a file handle.
929
+
930
+ :param Any file_handle_or_path: Object to check
931
+ :return: True if it's a file handle, False otherwise
932
+ :rtype: bool
933
+ """
934
+ return hasattr(file_handle_or_path, "write") and callable(file_handle_or_path.write)
935
+
936
+ def _write_fallback_record(self, file_handle_or_path: Any, item: Any, error: Exception) -> None:
937
+ """
938
+ Write a simplified fallback record when normal serialization fails.
939
+
940
+ :param Any file_handle_or_path: Open file handle or file path
941
+ :param Any item: Original item that failed to serialize
942
+ :param Exception error: The exception that occurred
943
+ """
944
+ try:
945
+ simplified = {
946
+ "error": "Failed to serialize original item",
947
+ "item_type": str(type(item)),
948
+ "error_message": str(error),
949
+ }
950
+
951
+ if hasattr(item, "__str__"):
952
+ simplified["item_string"] = str(item)
953
+
954
+ self._write_dict_to_file(file_handle_or_path, simplified)
955
+ logger.warning("Wrote simplified version of item after serialization error")
956
+ except Exception as e2:
957
+ logger.error(f"Failed to write simplified item: {str(e2)}")
958
+
959
+ def _ensure_serializable(self, obj: Any) -> Any:
960
+ """
961
+ Ensure all values in an object are JSON serializable.
962
+
963
+ :param Any obj: Object to make serializable
964
+ :return: Serializable object
965
+ """
966
+ if isinstance(obj, (str, int, float, bool, type(None))):
967
+ return obj
968
+ elif isinstance(obj, datetime):
969
+ return obj.isoformat()
970
+ elif isinstance(obj, dict):
971
+ return {k: self._ensure_serializable(v) for k, v in obj.items()}
972
+ elif isinstance(obj, list):
973
+ return [self._ensure_serializable(i) for i in obj]
974
+ elif dataclasses.is_dataclass(obj):
975
+ return self._ensure_serializable(dataclasses.asdict(obj))
976
+ elif hasattr(obj, "to_dict") and callable(obj.to_dict):
977
+ return self._ensure_serializable(obj.to_dict())
978
+ elif hasattr(obj, "__dict__"):
979
+ return self._ensure_serializable(obj.__dict__)
980
+ else:
981
+ return str(obj)
642
982
 
643
983
  def _log_processing_results(self, new_count: int, output_file: str, item_type: str) -> None:
644
984
  """
@@ -684,38 +1024,16 @@ class JSONLScannerIntegration(ScannerIntegration):
684
1024
  :yields: Iterator[IntegrationAsset]
685
1025
  """
686
1026
  logger.info("Starting fetch_assets")
687
- file_path = kwargs.get("file_path", self.file_path)
688
- empty_file = kwargs.get("empty_file", True)
689
- process_together = kwargs.get("process_together", False)
690
- use_jsonl_file = kwargs.get("use_jsonl_file", False)
691
-
692
- self.create_artifacts_dir()
693
-
694
- if use_jsonl_file:
695
- logger.info(f"Using existing JSONL file: {self.ASSETS_FILE}")
696
- total_assets = sum(1 for _ in open(self.ASSETS_FILE, "r")) if os.path.exists(self.ASSETS_FILE) else 0
697
- self.num_assets_to_process = total_assets
698
- logger.info(f"Found {total_assets} assets in existing JSONL file")
699
- else:
700
- file_path = self._validate_file_path(file_path)
701
- if process_together:
702
- total_assets, _ = self._process_files(
703
- file_path,
704
- self.ASSETS_FILE,
705
- self.FINDINGS_FILE,
706
- empty_assets_file=empty_file,
707
- empty_findings_file=False,
708
- )
709
- self.num_assets_to_process = total_assets
710
- else:
711
- total_assets = self._write_items_to_jsonl(file_path, self.ASSETS_FILE, "asset", empty_file=empty_file)
712
- self.num_assets_to_process = total_assets
713
- logger.info(f"Total assets to process: {total_assets}")
714
1027
 
715
- for asset in self._yield_items_from_jsonl(self.ASSETS_FILE, IntegrationAsset):
716
- yield asset
717
-
718
- logger.info(f"Assets read from JSONL complete. Total assets identified: {self.num_assets_to_process}")
1028
+ return self._fetch_items(
1029
+ "asset",
1030
+ self.ASSETS_FILE,
1031
+ IntegrationAsset,
1032
+ kwargs.get("file_path", self.file_path),
1033
+ kwargs.get("empty_file", True),
1034
+ kwargs.get("process_together", False),
1035
+ kwargs.get("use_jsonl_file", False),
1036
+ )
719
1037
 
720
1038
  def fetch_findings(self, *args: Any, **kwargs: Any) -> Iterator[IntegrationFinding]:
721
1039
  """
@@ -730,40 +1048,151 @@ class JSONLScannerIntegration(ScannerIntegration):
730
1048
  :yields: Iterator[IntegrationFinding]
731
1049
  """
732
1050
  logger.info("Starting fetch_findings")
733
- file_path = kwargs.get("file_path", self.file_path)
734
- empty_file = kwargs.get("empty_file", True)
735
- process_together = kwargs.get("process_together", False)
736
- use_jsonl_file = kwargs.get("use_jsonl_file", False)
737
1051
 
1052
+ return self._fetch_items(
1053
+ "finding",
1054
+ self.FINDINGS_FILE,
1055
+ IntegrationFinding,
1056
+ kwargs.get("file_path", self.file_path),
1057
+ kwargs.get("empty_file", True),
1058
+ kwargs.get("process_together", False),
1059
+ kwargs.get("use_jsonl_file", False),
1060
+ )
1061
+
1062
+ def _fetch_items(
1063
+ self,
1064
+ item_type: str,
1065
+ jsonl_file: str,
1066
+ item_class: Type[ItemType],
1067
+ file_path: Optional[str] = None,
1068
+ empty_file: bool = True,
1069
+ process_together: bool = False,
1070
+ use_jsonl_file: bool = False,
1071
+ ) -> Iterator[ItemType]:
1072
+ """
1073
+ Common method to fetch assets or findings from processed source files.
1074
+
1075
+ :param str item_type: Type of items to fetch ('asset' or 'finding')
1076
+ :param str jsonl_file: Path to the JSONL file containing items
1077
+ :param Type[ItemType] item_class: Class to convert dictionary items to
1078
+ :param Optional[str] file_path: Path to source file or directory
1079
+ :param bool empty_file: Whether to empty the output file before writing
1080
+ :param bool process_together: Whether to process assets and findings together
1081
+ :param bool use_jsonl_file: Whether to use an existing JSONL file
1082
+ :yields: Iterator[ItemType]
1083
+ :rtype: Iterator[ItemType]
1084
+ """
738
1085
  self.create_artifacts_dir()
1086
+ is_asset = item_type == "asset"
1087
+ counter_attr = "num_assets_to_process" if is_asset else "num_findings_to_process"
739
1088
 
740
1089
  if use_jsonl_file:
741
- logger.info(f"Using existing JSONL file: {self.FINDINGS_FILE}")
742
- total_findings = sum(1 for _ in open(self.FINDINGS_FILE, "r")) if os.path.exists(self.FINDINGS_FILE) else 0
743
- self.num_findings_to_process = total_findings
744
- logger.info(f"Found {total_findings} findings in existing JSONL file")
1090
+ logger.info(f"Using existing JSONL file: {jsonl_file}")
1091
+ total_items = sum(1 for _ in open(jsonl_file, "r")) if os.path.exists(jsonl_file) else 0
1092
+ setattr(self, counter_attr, total_items)
1093
+ logger.info(f"Found {total_items} {item_type}s in existing JSONL file")
745
1094
  else:
746
1095
  file_path = self._validate_file_path(file_path)
747
- if process_together:
748
- _, total_findings = self._process_files(
749
- file_path,
750
- self.ASSETS_FILE,
751
- self.FINDINGS_FILE,
752
- empty_assets_file=False,
753
- empty_findings_file=empty_file,
754
- )
755
- self.num_findings_to_process = total_findings
756
- else:
757
- total_findings = self._write_items_to_jsonl(
758
- file_path, self.FINDINGS_FILE, "finding", empty_file=empty_file
759
- )
760
- self.num_findings_to_process = total_findings
761
- logger.info(f"Total findings to process: {total_findings}")
1096
+ total_items = self._process_source_files(
1097
+ file_path, jsonl_file, item_type, empty_file, process_together, counter_attr
1098
+ )
1099
+ logger.info(f"Total {item_type}s to process: {total_items}")
1100
+
1101
+ # Yield items from the JSONL file
1102
+ for item in self._yield_items_from_jsonl(jsonl_file, item_class):
1103
+ yield item
1104
+
1105
+ logger.info(
1106
+ f"{item_type.capitalize()}s read from JSONL complete. Total {item_type}s identified: {getattr(self, counter_attr)}"
1107
+ )
1108
+
1109
+ def _process_source_files(
1110
+ self,
1111
+ file_path: str,
1112
+ jsonl_file: str,
1113
+ item_type: str,
1114
+ empty_file: bool,
1115
+ process_together: bool,
1116
+ counter_attr: str,
1117
+ ) -> int:
1118
+ """
1119
+ Process source files and return the total count of items.
1120
+
1121
+ :param str file_path: Path to source file or directory
1122
+ :param str jsonl_file: Path to the JSONL file to write
1123
+ :param str item_type: Type of items to process ('asset' or 'finding')
1124
+ :param bool empty_file: Whether to empty output files
1125
+ :param bool process_together: Whether to process assets and findings together
1126
+ :param str counter_attr: Attribute name for storing the count
1127
+ :return: Total count of items
1128
+ :rtype: int
1129
+ """
1130
+ is_asset = item_type == "asset"
1131
+
1132
+ if process_together:
1133
+ # Handle joint processing of assets and findings
1134
+ asset_count, finding_count = self._process_files(
1135
+ file_path,
1136
+ self.ASSETS_FILE,
1137
+ self.FINDINGS_FILE,
1138
+ empty_assets_file=empty_file if is_asset else False,
1139
+ empty_findings_file=empty_file if not is_asset else False,
1140
+ )
1141
+ total_items = asset_count if is_asset else finding_count
1142
+ else:
1143
+ # Process just one type
1144
+ total_items = self._write_items_to_jsonl(file_path, jsonl_file, item_type, empty_file=empty_file)
762
1145
 
763
- for finding in self._yield_items_from_jsonl(self.FINDINGS_FILE, IntegrationFinding):
764
- yield finding
1146
+ setattr(self, counter_attr, total_items)
1147
+ return total_items
765
1148
 
766
- logger.info(f"Findings read from JSONL complete. Total findings identified: {self.num_findings_to_process}")
1149
+ def parse_asset(self, file_path: Union[Path, str], data: Dict[str, Any]) -> IntegrationAsset:
1150
+ """
1151
+ Parse a single asset from source data.
1152
+
1153
+ Subclasses must implement this method to parse assets from their specific file format.
1154
+
1155
+ :param Union[Path, str] file_path: Path to the file containing the asset data
1156
+ :param Dict[str, Any] data: The parsed data
1157
+ :return: IntegrationAsset object
1158
+ :rtype: IntegrationAsset
1159
+ """
1160
+ raise NotImplementedError("Subclasses must implement parse_asset")
1161
+
1162
+ def parse_finding(self, asset_identifier: str, data: Dict[str, Any], item: Dict[str, Any]) -> IntegrationFinding:
1163
+ """Parse a single finding from source data.
1164
+
1165
+ Subclasses must implement this method to parse findings from their specific file format.
1166
+
1167
+ :param str asset_identifier: The identifier of the asset this finding belongs to
1168
+ :param Dict[str, Any] data: The asset data
1169
+ :param Dict[str, Any] item: The finding data
1170
+ :return: IntegrationFinding object
1171
+ :rtype: IntegrationFinding
1172
+ """
1173
+ raise NotImplementedError("Subclasses must implement parse_finding")
1174
+
1175
+ def is_valid_file(self, data: Any, file_path: Union[Path, str]) -> Tuple[bool, Optional[Dict[str, Any]]]:
1176
+ """
1177
+ Check if the provided data is valid for processing.
1178
+
1179
+ This default implementation ensures the data is a non-empty dictionary.
1180
+ Subclasses should override this to implement specific validation logic.
1181
+
1182
+ :param Any data: Data parsed from the file to validate
1183
+ :param Union[Path, str] file_path: Path to the file being processed
1184
+ :return: Tuple of (is_valid, data) where is_valid indicates validity and data is the validated content or None
1185
+ :rtype: Tuple[bool, Optional[Dict[str, Any]]]
1186
+ """
1187
+ if not isinstance(data, dict):
1188
+ logger.warning(f"Data is not a dictionary for file {file_path}, skipping")
1189
+ return False, None
1190
+
1191
+ if not data:
1192
+ logger.warning(f"Data is an empty dictionary for file {file_path}, skipping")
1193
+ return False, None
1194
+
1195
+ return True, data
767
1196
 
768
1197
  def fetch_assets_and_findings(
769
1198
  self, file_path: str = None, empty_files: bool = True
@@ -923,51 +1352,3 @@ class JSONLScannerIntegration(ScannerIntegration):
923
1352
  is_valid, validated_data = self.is_valid_file(data, file)
924
1353
  if is_valid and validated_data is not None:
925
1354
  yield file, validated_data
926
-
927
- def parse_asset(self, file_path: Union[Path, str], data: Dict[str, Any]) -> IntegrationAsset:
928
- """
929
- Parse a single asset from source data.
930
-
931
- Subclasses must implement this method to parse assets from their specific file format.
932
-
933
- :param Union[Path, str] file_path: Path to the file containing the asset data
934
- :param Dict[str, Any] data: The parsed data
935
- :return: IntegrationAsset object
936
- :rtype: IntegrationAsset
937
- """
938
- raise NotImplementedError("Subclasses must implement parse_asset")
939
-
940
- def parse_finding(self, asset_identifier: str, data: Dict[str, Any], item: Dict[str, Any]) -> IntegrationFinding:
941
- """Parse a single finding from source data.
942
-
943
- Subclasses must implement this method to parse findings from their specific file format.
944
-
945
- :param str asset_identifier: The identifier of the asset this finding belongs to
946
- :param Dict[str, Any] data: The asset data
947
- :param Dict[str, Any] item: The finding data
948
- :return: IntegrationFinding object
949
- :rtype: IntegrationFinding
950
- """
951
- raise NotImplementedError("Subclasses must implement parse_finding")
952
-
953
- def is_valid_file(self, data: Any, file_path: Union[Path, str]) -> Tuple[bool, Optional[Dict[str, Any]]]:
954
- """
955
- Check if the provided data is valid for processing.
956
-
957
- This default implementation ensures the data is a non-empty dictionary.
958
- Subclasses should override this to implement specific validation logic.
959
-
960
- :param Any data: Data parsed from the file to validate
961
- :param Union[Path, str] file_path: Path to the file being processed
962
- :return: Tuple of (is_valid, data) where is_valid indicates validity and data is the validated content or None
963
- :rtype: Tuple[bool, Optional[Dict[str, Any]]]
964
- """
965
- if not isinstance(data, dict):
966
- logger.warning(f"Data is not a dictionary for file {file_path}, skipping")
967
- return False, None
968
-
969
- if not data:
970
- logger.warning(f"Data is an empty dictionary for file {file_path}, skipping")
971
- return False, None
972
-
973
- return True, data